diff --git a/segmentation_1_4_0_fp32_combined.onnx b/segmentation_1_4_0_fp32_combined.onnx new file mode 100644 index 0000000000000000000000000000000000000000..cbc506eae973a2b0fa9b42e512b9d8aec7effa83 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined.onnx @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a73e7b69981004bd3df16a7d8494b40a757c422c999f310feee416e6e58b3d81 +size 14975310 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_128.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_128.bin new file mode 100644 index 0000000000000000000000000000000000000000..02b5086de27074f1e09d68452f5ef6d0c8efed1b --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_128.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:801429b6a86c59d0a139acc086a4c0b19dea4fcf010750b8af911795779b3ef7 +size 296 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_129.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_129.bin new file mode 100644 index 0000000000000000000000000000000000000000..59a5e7819e4620aab3c985b622fd7fbbfe7a3c27 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_129.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1545a2de7f8e377ec48bc84db8572b4a4733a86ba5f59ba34b08237b130101c4 +size 296 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_130.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_130.bin new file mode 100644 index 0000000000000000000000000000000000000000..2d1b27655b96f291b538e43f096322323c210379 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_130.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8eb16cc09bc51217dbd78f0fa4ff8c297513bb3ab6176fd7c20784771e643ea4 +size 296 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_131.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_131.bin new file mode 100644 index 0000000000000000000000000000000000000000..25db846185e1ac68a773db91bc828da19d3686cd --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_131.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8986a453226b3ab1a63a3f018f4e1368dc71e078b43de4a7b0812f2355fbf6e +size 376 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_136.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_136.bin new file mode 100644 index 0000000000000000000000000000000000000000..25db846185e1ac68a773db91bc828da19d3686cd --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_136.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8986a453226b3ab1a63a3f018f4e1368dc71e078b43de4a7b0812f2355fbf6e +size 376 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_250.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_250.bin new file mode 100644 index 0000000000000000000000000000000000000000..c0f00d81c21c3f0c742f871c55db69cb6655aa74 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_250.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9deb3e2e988c1fb7596f08c77c56782705cd603ff0f4db22b6f730ce92a3d9da +size 296 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_37.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_37.bin new file mode 100644 index 0000000000000000000000000000000000000000..07f454b5d1c091d1588089dd5fbe4708ef4843ef --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_37.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a98dcc2951bc9192d34bfb6307858452851a99bccf04a0e6f0751da03ee9aed +size 384 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_38.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_38.bin new file mode 100644 index 0000000000000000000000000000000000000000..2de9b2d1af393de3d39925d433edf51853b67321 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_38.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e3f5788d2f47a2ff92b6afba037dfc1cb27150a5d686b3a890ab7396dc16171 +size 384 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_4.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_4.bin new file mode 100644 index 0000000000000000000000000000000000000000..220883bb1ce24ea066d47abfb1cab77933d3db76 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_4.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f901ed3e0570f6938244bfcab71f035049bb7eb40aca251115b78c1c6230eb7b +size 384 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_40.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_40.bin new file mode 100644 index 0000000000000000000000000000000000000000..a230765297efaa452fa83c41de6517c38df214c3 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_40.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10fe86e880716f51f9b88c3bfd1f1a3a2a6bf5d80aa35a23fe02afc67a0bf8ce +size 384 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_41.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_41.bin new file mode 100644 index 0000000000000000000000000000000000000000..b37fa99dec5425846e81b12a7ad7cbd83a12fb25 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_41.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8be2ecd7a3db1c78360a3a8fdbe93c3f05a70ce09b8a87c58877275bceef18b9 +size 384 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_42.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_42.bin new file mode 100644 index 0000000000000000000000000000000000000000..0e4dcec3c39992cd6c499f0fb89ea67024f84788 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_42.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8fec3a09bf7510cd8ce1ab86c293d7b466805391ace22f6b975a073c96e6e03 +size 296 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_43.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_43.bin new file mode 100644 index 0000000000000000000000000000000000000000..aee78aacd7295f4203f9b7edd9ae96ae9b801e09 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_43.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:541b329c484a094eeac656452e18f0be0816bb2438bdfd19f8ad1e0a9b3b9cc3 +size 296 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_44.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_44.bin new file mode 100644 index 0000000000000000000000000000000000000000..a46306b7b954a0c04b68581daf882a90cd19a738 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_44.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6fb9253ded0289c5b796164e40201bd8f5d660b2373d91b5b58eb314375af7e7 +size 296 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_45.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_45.bin new file mode 100644 index 0000000000000000000000000000000000000000..c28a10e4854f0a442ab6aa8d2e165576197153f6 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_45.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ae43a8dcc0cd99130f42249d4f2a9f1fc91c3c99699db65bb98ad360a326ef6 +size 376 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_46.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_46.bin new file mode 100644 index 0000000000000000000000000000000000000000..13a911d7d3267aa197d7c394e5f5519dbdec1964 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_46.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f160f47a8817f836489ba77f94fd815c17526fb3448a8ce9f2c7e6a14630ebe2 +size 516 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_56.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_56.bin new file mode 100644 index 0000000000000000000000000000000000000000..a707c71102a670f6b49ffaca1b20988fcf1bdced --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_56.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b94f047a402bfc8e5e365a3a8d8d97d3513b9df28c496a07106a7e4c18f8ba3e +size 376 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_57.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_57.bin new file mode 100644 index 0000000000000000000000000000000000000000..a91b885fcf2933f093b1cdf0bb917eb8c00e455a --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_57.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be5bafa37310e0c9e5f314af01061c87eaa59c6b7c6b8940be41b557e434fa49 +size 384 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_58.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_58.bin new file mode 100644 index 0000000000000000000000000000000000000000..e54ef067f6c36216f50e6c932a799549d16278cd --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_58.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb7e05ff34424a0c9fe31a13c78e1ed7edff1daf5f7a36ef57e9dce32b8bf80a +size 384 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_59.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_59.bin new file mode 100644 index 0000000000000000000000000000000000000000..22a757510051813c931a2c83f542103f9004c5a1 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_59.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac9a09a34c4d7ce81628e63bd5fa99c91004ce4c80d282cbd1e5a6931a1d97cc +size 296 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_6.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..cc78895426f0932bfcb2c9a86500529a165c7261 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_6.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ef1a050a26e3498db0b30095a869f974c7c8b96c8c5824b4f6f1d81c25a597e +size 296 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_60.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_60.bin new file mode 100644 index 0000000000000000000000000000000000000000..76b2482fbca904ce7ae3a5a04f33a4aa87343e1c --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_60.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e9022fcedc09886b890a1051e3db46a07cfe8e11f911a04af20c132431970bc +size 296 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_61.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_61.bin new file mode 100644 index 0000000000000000000000000000000000000000..ea1174c1a2a3dfc04184b154a98020fc6fdd9e44 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_61.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d103c8f0b6bab9ffe3211caa0f368d276d23b6b251daba563d2537fac9a0dfb9 +size 296 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_62.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_62.bin new file mode 100644 index 0000000000000000000000000000000000000000..a707c71102a670f6b49ffaca1b20988fcf1bdced --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_62.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b94f047a402bfc8e5e365a3a8d8d97d3513b9df28c496a07106a7e4c18f8ba3e +size 376 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_63.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_63.bin new file mode 100644 index 0000000000000000000000000000000000000000..fc7340e1ded71750550ce1df888672bc334d6f28 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_63.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f448e8669da6d20a453fb22041d00e74696063efae178dd0d9a366bcd5e11182 +size 384 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_64.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_64.bin new file mode 100644 index 0000000000000000000000000000000000000000..22a757510051813c931a2c83f542103f9004c5a1 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_64.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac9a09a34c4d7ce81628e63bd5fa99c91004ce4c80d282cbd1e5a6931a1d97cc +size 296 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_65.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_65.bin new file mode 100644 index 0000000000000000000000000000000000000000..76b2482fbca904ce7ae3a5a04f33a4aa87343e1c --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_65.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e9022fcedc09886b890a1051e3db46a07cfe8e11f911a04af20c132431970bc +size 296 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_66.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_66.bin new file mode 100644 index 0000000000000000000000000000000000000000..ea1174c1a2a3dfc04184b154a98020fc6fdd9e44 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_66.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d103c8f0b6bab9ffe3211caa0f368d276d23b6b251daba563d2537fac9a0dfb9 +size 296 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_67.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_67.bin new file mode 100644 index 0000000000000000000000000000000000000000..a707c71102a670f6b49ffaca1b20988fcf1bdced --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_67.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b94f047a402bfc8e5e365a3a8d8d97d3513b9df28c496a07106a7e4c18f8ba3e +size 376 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_68.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_68.bin new file mode 100644 index 0000000000000000000000000000000000000000..8731f5411f6c501c883399f896e90b21b3ec9203 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_68.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:002cf5d23c8929cc8c27573ba3129673938dd0c2fcd7a017f3bff7800e5134ef +size 516 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_69.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_69.bin new file mode 100644 index 0000000000000000000000000000000000000000..4a5da4a1ebc53ea233d7ac88634687613da13148 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_69.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93e19fd229d0a4bcb70568ea1e79e2309157429f8aa5e82cf98d41f3462ba35f +size 384 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_7.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_7.bin new file mode 100644 index 0000000000000000000000000000000000000000..1e05aba4faccef5b8c60acb57f49e84058f4c620 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_7.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f728557041508f674178e958e5df414570f923862e516e9c49f57290a915720 +size 296 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_70.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_70.bin new file mode 100644 index 0000000000000000000000000000000000000000..15ffa46426360ffcf0cbd0f668b39e0d3386afbf --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_70.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d68d94744795723d9de33f9801a38c386d18ee297c760856c06a0937f9a4bc0 +size 296 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_71.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_71.bin new file mode 100644 index 0000000000000000000000000000000000000000..3ccaa2c1bf1a9922346d7a356a885dc6e94f1579 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_71.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c53dfc2035a1838cfbc626818b0648aa57434b654f90e9509b786589ae076c1 +size 296 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_72.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_72.bin new file mode 100644 index 0000000000000000000000000000000000000000..39c1b24a349dddd4e84aed294ee569926b973bc7 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_72.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:414db66a66f7ecb2350828a73e9496c75a63bd6657996d697cb372292a535d1b +size 296 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_73.bin b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_73.bin new file mode 100644 index 0000000000000000000000000000000000000000..932f2a4195e3744cc7c001cb3663bfa425c2650a --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/lcp/0_0_73.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a5b30355951604c93a16f54d1605fa81e54ed33661924f5348d80270534c3c4 +size 376 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable4/src/0_2_reloadable4.cc b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable4/src/0_2_reloadable4.cc new file mode 100644 index 0000000000000000000000000000000000000000..7b211124072bdc08c2e3d113228cd9b65f8857a3 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable4/src/0_2_reloadable4.cc @@ -0,0 +1,41 @@ +// Automatically generated processor driver using AIEngine tool-chain + +#include +#include +#include + + +// Declare Kernel functions and initializers +void superkernel_reduce_mean_c8(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict); + +// Declare Kernel objects and external arrays + + +void _b961_wrapper(void* args[]) +{ + superkernel_reduce_mean_c8( + *reinterpret_cast*>(args[0]), + *reinterpret_cast(args[2]), + *reinterpret_cast*>(args[1])); +} + +using UniformKernelFunc = void (*)(void **); + +static UniformKernelFunc g_uniformKernelFuncs[1] = { + _b961_wrapper +}; + +__attribute__((always_inline)) void kernelWrapper(void* args[], uint32 kernelId, uint32 numSyncIn, uint32 numAsyncIn, uint32 numSyncOut) +{ + uint32 idx = 0; + reinterpret_cast(args[idx])->acquire(numSyncIn > 0); + idx += (numSyncIn > 0) ? 1 : 0; + idx += numAsyncIn; + + (*(g_uniformKernelFuncs[kernelId]))(args); + + idx = 0; + reinterpret_cast(args[idx])->release(numSyncIn > 0); + idx += (numSyncIn > 0) ? 1 : 0; + idx += numAsyncIn; +} diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable5/Release/0_2_reloadable5.calltree b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable5/Release/0_2_reloadable5.calltree new file mode 100644 index 0000000000000000000000000000000000000000..a9aa937024e08d6db65ac17b5f174a0a1241e359 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable5/Release/0_2_reloadable5.calltree @@ -0,0 +1,108 @@ + +// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:49:20 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// bridge -o../Release/0_0_reloadable5 ../Release/0_0_reloadable5.o -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/isg -g -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable5.bcf -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork4008 -pme + + +// Release: ipp V-2024.06-TGT-241219 + +_Z13kernelWrapperPPvjjjj + _Z13_b896_wrapperPPv (referenced text) + _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh + _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params + _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams + _Z13_b901_wrapperPPv (referenced text) + _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E + _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E + _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E + _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E + _Z13_b906_wrapperPPv (referenced text) + _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv + _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E + _Z13_b881_wrapperPPv (referenced text) + _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv + _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv + _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E + _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE + _Z13_b891_wrapperPPv (referenced text) + _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E + _Z13_b924_wrapperPPv (referenced text) + _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE + _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh (*) + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv (*) + _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv + _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params (*) + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E (*) + _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E + _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE (*) + _Z13_b919_wrapperPPv (referenced text) + _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh + _ZN12me_primitive10udiv_dstepEjjRjS0_ + _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params + + +Call tree stack and functions sizes: + +stack stack stack call func func function name + desc level level desc +----- ----- ----- ----- ----- ----- -------------------------------------------------------------- + 64 320 0 0 390 13150 _Z13kernelWrapperPPvjjjj + 0 192 1 1 36 4714 _Z13_b896_wrapperPPv + 64 192 1 2 568 4678 _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + 64 64 2 3 1430 1430 _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh + 128 128 2 3 2410 2680 _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params + 0 0 3 4 270 270 _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams + 0 192 1 1 32 1252 _Z13_b901_wrapperPPv + 64 192 1 2 488 1220 _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + 64 128 2 3 62 304 _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv + 64 64 3 4 162 186 _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv + 0 0 4 5 24 24 _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E + 0 0 2 4 56 56 _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E + 128 128 2 3 114 428 _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E + 0 0 3 4 314 314 _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E + 0 64 1 1 32 862 _Z13_b906_wrapperPPv + 64 64 1 2 488 830 _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + 0 0 2 3 100 100 _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv + 0 0 2 3 242 242 _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E + 0 256 1 1 32 1394 _Z13_b881_wrapperPPv + 64 256 1 2 488 1362 _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + 64 64 2 3 74 190 _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv + 0 0 3 4 116 116 _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv + 64 192 2 3 150 684 _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E + 128 128 3 4 534 534 _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE + 0 128 1 1 36 1092 _Z13_b891_wrapperPPv + 64 128 1 2 602 1056 _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE + 64 64 2 3 138 162 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv + 0 0 3 4 24 24 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E + 0 0 2 3 292 292 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E + 0 192 1 1 40 6494 _Z13_b924_wrapperPPv + 64 192 1 2 1126 6454 _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE + 64 64 2 3 1430 1430 _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh + 64 64 2 3 138 162 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv (*) + 64 64 2 3 98 214 _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv + 0 0 3 4 116 116 _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv + 128 128 2 3 2410 2680 _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params (*) + 0 0 2 3 292 292 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E + 0 128 2 3 16 550 _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E + 128 128 2 4 534 534 _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE + 0 192 1 1 36 2050 _Z13_b919_wrapperPPv + 128 192 1 2 478 2014 _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + 64 64 2 3 672 814 _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh + 0 0 3 4 142 142 _ZN12me_primitive10udiv_dstepEjjRjS0_ + 0 0 2 3 722 722 _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params + + +Maximum call level : 5 +Maximum stack level: 4 +Maximum stack size : 320 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable5/Release/0_2_reloadable5.cmic2 b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable5/Release/0_2_reloadable5.cmic2 new file mode 100644 index 0000000000000000000000000000000000000000..cc24263e196c609ab062129e37812e382b48d43f --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable5/Release/0_2_reloadable5.cmic2 @@ -0,0 +1,19187 @@ + +// File generated by darts version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:49:22 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// darts -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -d -h -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable5 me + +// Release: ipp V-2024.06-TGT-241219 +.label __Z13kernelWrapperPPvjjjj___func_begin0 +.label _Z13kernelWrapperPPvjjjj +.function kernelWrapper _Z13kernelWrapperPPvjjjj +.src_ref 0 "0_0_reloadable5.cc" 94 first +.src_ref 0 "0_0_reloadable5.cc" 96 60 first +.src_ref 0 "0_0_reloadable5.cc" 96 110 +.src_ref 0 "0_0_reloadable5.cc" 98 110 +.src_ref 1 "io_buffer_compiler.h" 606 24 +.function_start + 2352 "11010100" // LDA r17, [p0]; MOV r2, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2353 "01000001" // /* MW 5 */ + 2354 "00100001" // /* MW 4 */ + 2355 "11010001" // /* MW 3 */ + 2356 "11000110" // /* MW 2 */ + 2357 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 94 + 2358 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2359 "00000001" // /* MW 5 */ + 2360 "00000000" // /* MW 4 */ + 2361 "00000000" // /* MW 3 */ + 2362 "00001000" // /* MW 2 */ + 2363 "00000000" // /* MW 1 */ + 2364 "00000010" // ST p7, [sp, #-12]; MOV r1, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2365 "01110000" // /* MW 7 */ + 2366 "11010000" // /* MW 6 */ + 2367 "00101011" // /* MW 5 */ + 2368 "00000000" // /* MW 4 */ + 2369 "10110000" // /* MW 3 */ + 2370 "11110011" // /* MW 2 */ + 2371 "11111110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 96 110 +.src_ref 0 "0_0_reloadable5.cc" 98 110 +.src_ref 1 "io_buffer_compiler.h" 606 24 + 2372 "00000010" // ST lr, [sp, #-4]; MOV r15, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2373 "01110000" // /* MW 7 */ + 2374 "10010000" // /* MW 6 */ + 2375 "11101000" // /* MW 5 */ + 2376 "00000001" // /* MW 4 */ + 2377 "10110000" // /* MW 3 */ + 2378 "10000111" // /* MW 2 */ + 2379 "11111111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 96 110 first + 2380 "01011100" // ST r1, [sp, #-8]; NEZ r16, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2381 "11100000" // /* MW 5 */ + 2382 "11000001" // /* MW 4 */ + 2383 "10110111" // /* MW 3 */ + 2384 "00000110" // /* MW 2 */ + 2385 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 + 2386 "11111000" // MOV r26, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2387 "00100000" // /* MW 3 */ + 2388 "10011000" // /* MW 2 */ + 2389 "00011110" // /* MW 1 */ + 2390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2391 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 first + 2392 "00011000" // ADD.NC p7, r17, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2393 "10000010" // /* MW 3 */ + 2394 "01101000" // /* MW 2 */ + 2395 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 + 2396 "10011000" // LDA r17, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2397 "00110110" // /* MW 3 */ + 2398 "00011110" // /* MW 2 */ + 2399 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 36 + 2400 "10011000" // LDA r19, [p7], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2401 "01110110" // /* MW 3 */ + 2402 "00111110" // /* MW 2 */ + 2403 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 46 + 2404 "10011000" // LDA r18, [p7], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2405 "01010110" // /* MW 3 */ + 2406 "11101110" // /* MW 2 */ + 2407 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 60 + 2408 "10011000" // LDA r27, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2409 "01110110" // /* MW 3 */ + 2410 "00000111" // /* MW 2 */ + 2411 "00000111" // /* MW 1 */ + 2412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2413 "00000000" // /* MW 1 */ + 2414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2415 "00000000" // /* MW 1 */ + 2416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2417 "00000000" // /* MW 1 */ + 2418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2419 "00000000" // /* MW 1 */ + 2420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2421 "00000000" // /* MW 1 */ + 2422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2423 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 195 30 first +.src_ref 1 "io_buffer_compiler.h" 195 37 first + 2424 "00011000" // SEL.EQZ r17, r17, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2425 "00110010" // /* MW 3 */ + 2426 "01100011" // /* MW 2 */ + 2427 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 194 23 first + 2428 "10011000" // ST r17, [p7, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2429 "00110001" // /* MW 3 */ + 2430 "11010110" // /* MW 2 */ + 2431 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 +.src_ref 1 "io_buffer_main.h" 410 8 + 2432 "00011000" // MOVX r17, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2433 "11111101" // /* MW 3 */ + 2434 "11100010" // /* MW 2 */ + 2435 "00010111" // /* MW 1 */ + 2436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2437 "00000000" // /* MW 1 */ + 2438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2439 "00000000" // /* MW 1 */ + 2440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2441 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 first + 2442 "00011000" // ACQ.COND r18, r17, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2443 "00011000" // /* MW 3 */ + 2444 "10010111" // /* MW 2 */ + 2445 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 98 60 +.src_ref 0 "0_0_reloadable5.cc" 102 7 + 2446 "00011000" // MOVX r18, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2447 "00001001" // /* MW 3 */ + 2448 "00100100" // /* MW 2 */ + 2449 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 98 60 first + 2450 "10011000" // LSHL r20, r16, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2451 "00101101" // /* MW 3 */ + 2452 "00101001" // /* MW 2 */ + 2453 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 98 60 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 2454 "11111000" // MOV dj0, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2455 "00100000" // /* MW 3 */ + 2456 "10001010" // /* MW 2 */ + 2457 "00011000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 98 60 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 2458 "00001100" // LDA r19, [p0, dj0]; ST dj0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2459 "10001011" // /* MW 5 */ + 2460 "11011000" // /* MW 4 */ + 2461 "11011111" // /* MW 3 */ + 2462 "01001110" // /* MW 2 */ + 2463 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2465 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2467 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2469 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2471 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 98 110 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2472 "00011000" // MOVX r19, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2473 "00000101" // /* MW 3 */ + 2474 "00100110" // /* MW 2 */ + 2475 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 98 110 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2476 "10011000" // LTU r26, r19, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2477 "11111100" // /* MW 3 */ + 2478 "11110100" // /* MW 2 */ + 2479 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 first + 2480 "00000010" // ST r26, [sp, #-16]; ADD.NC p7, r19, #4 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2481 "00000000" // /* MW 7 */ + 2482 "11000001" // /* MW 6 */ + 2483 "10110100" // /* MW 5 */ + 2484 "00000011" // /* MW 4 */ + 2485 "10110000" // /* MW 3 */ + 2486 "01101010" // /* MW 2 */ + 2487 "11111110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 + 2488 "10011000" // LDA r19, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2489 "01110110" // /* MW 3 */ + 2490 "00011110" // /* MW 2 */ + 2491 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 36 + 2492 "10011000" // LDA r21, [p7], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2493 "10110110" // /* MW 3 */ + 2494 "00111110" // /* MW 2 */ + 2495 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 46 + 2496 "10011000" // LDA r20, [p7], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2497 "10010110" // /* MW 3 */ + 2498 "11101110" // /* MW 2 */ + 2499 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 60 + 2500 "10011000" // LDA r27, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2501 "01110110" // /* MW 3 */ + 2502 "00000111" // /* MW 2 */ + 2503 "00000111" // /* MW 1 */ + 2504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2505 "00000000" // /* MW 1 */ + 2506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2507 "00000000" // /* MW 1 */ + 2508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2509 "00000000" // /* MW 1 */ + 2510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2511 "00000000" // /* MW 1 */ + 2512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2513 "00000000" // /* MW 1 */ + 2514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2515 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 195 30 first +.src_ref 1 "io_buffer_compiler.h" 195 37 first + 2516 "00011000" // SEL.EQZ r19, r19, r21, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2517 "01010010" // /* MW 3 */ + 2518 "11100111" // /* MW 2 */ + 2519 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 194 23 first + 2520 "10011000" // ST r19, [p7, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2521 "01110001" // /* MW 3 */ + 2522 "11010110" // /* MW 2 */ + 2523 "00001111" // /* MW 1 */ + 2524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2525 "00000000" // /* MW 1 */ + 2526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2527 "00000000" // /* MW 1 */ + 2528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2529 "00000000" // /* MW 1 */ + 2530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2531 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 first + 2532 "00011000" // ACQ.COND r20, r17, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2533 "00011000" // /* MW 3 */ + 2534 "00010111" // /* MW 2 */ + 2535 "00010101" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 102 7 first + 2536 "10011000" // LSHL r17, r0, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2537 "00101101" // /* MW 3 */ + 2538 "00100011" // /* MW 2 */ + 2539 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 102 7 + 2540 "11111000" // MOV dj0, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2541 "10100000" // /* MW 3 */ + 2542 "10001000" // /* MW 2 */ + 2543 "00011000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 102 7 + 2544 "01000100" // MOVXM p7, #509056 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2545 "00000000" // /* MW 5 */ + 2546 "11001001" // /* MW 4 */ + 2547 "11001110" // /* MW 3 */ + 2548 "00000111" // /* MW 2 */ + 2549 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 102 7 + 2550 "00001100" // LDA p1, [p7, dj0]; ST r16, [sp, #-24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2551 "00101011" // /* MW 5 */ + 2552 "11010100" // /* MW 4 */ + 2553 "11011111" // /* MW 3 */ + 2554 "00010011" // /* MW 2 */ + 2555 "11100000" // /* MW 1 */ + 2556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2557 "00000000" // /* MW 1 */ + 2558 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2559 "00000000" // /* MW 1 */ + 2560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2561 "00000000" // /* MW 1 */ + 2562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2563 "00000000" // /* MW 1 */ + 2564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2565 "00000000" // /* MW 1 */ + 2566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2567 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 102 4 +.no_stack_arguments + 2568 "00011000" // JL p1 /* MW 4 */ /* control_operation: words=4 call unconditional cycles_taken=1 indirect absolute delay_slots=5 */ + 2569 "01000000" // /* MW 3 */ + 2570 "00110000" // /* MW 2 */ + 2571 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 105 60 +.src_ref 0 "0_0_reloadable5.cc" 107 60 +.delay_slot + 2572 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2573 "11000000" // /* MW 3 */ + 2574 "01100000" // /* MW 2 */ + 2575 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2577 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2579 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2581 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2582 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2583 "01111110" // /* MW 9 */ + 2584 "10100101" // /* MW 8 */ + 2585 "00000001" // /* MW 7 */ + 2586 "00000000" // /* MW 6 */ + 2587 "00010000" // /* MW 5 */ + 2588 "00000000" // /* MW 4 */ + 2589 "11110000" // /* MW 3 */ + 2590 "00101100" // /* MW 2 */ + 2591 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 105 60 first +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_main.h" 440 8 +.src_ref 1 "io_buffer_main.h" 440 8 +.return_address + 2592 "00101100" // LDA r17, [p7]; MOVX r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2593 "00001010" // /* MW 5 */ + 2594 "01000000" // /* MW 4 */ + 2595 "11010000" // /* MW 3 */ + 2596 "11000110" // /* MW 2 */ + 2597 "11100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 + 2598 "00011000" // LDA r26, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2599 "01010001" // /* MW 3 */ + 2600 "11101011" // /* MW 2 */ + 2601 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 107 60 + 2602 "00011000" // LDA dj0, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2603 "01000001" // /* MW 3 */ + 2604 "11101100" // /* MW 2 */ + 2605 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_main.h" 440 8 + 2606 "00011000" // LDA el0, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2607 "00101001" // /* MW 3 */ + 2608 "11110000" // /* MW 2 */ + 2609 "00000111" // /* MW 1 */ + 2610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2611 "00000000" // /* MW 1 */ + 2612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2613 "00000000" // /* MW 1 */ + 2614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2615 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 first + 2616 "00011000" // ADD.NC p1, r17, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2617 "10001000" // /* MW 3 */ + 2618 "01101000" // /* MW 2 */ + 2619 "00011001" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 + 2620 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2621 "00110110" // /* MW 3 */ + 2622 "00000110" // /* MW 2 */ + 2623 "00000001" // /* MW 1 */ + 2624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2625 "00000000" // /* MW 1 */ + 2626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2627 "00000000" // /* MW 1 */ + 2628 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2629 "00000000" // /* MW 1 */ + 2630 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2631 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 2632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2633 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 2634 "11111000" // MOV r26, el0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2635 "00011100" // /* MW 3 */ + 2636 "10100000" // /* MW 2 */ + 2637 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2638 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2639 "00001000" // /* MW 3 */ + 2640 "01010101" // /* MW 2 */ + 2641 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_compiler.h" 606 24 first + 2642 "11010100" // LDA r17, [p1, #-4]; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2643 "01000001" // /* MW 5 */ + 2644 "10101111" // /* MW 4 */ + 2645 "11011101" // /* MW 3 */ + 2646 "11000110" // /* MW 2 */ + 2647 "00111110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 107 60 first + 2648 "10011000" // LDA r18, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2649 "01010110" // /* MW 3 */ + 2650 "00000010" // /* MW 2 */ + 2651 "00000111" // /* MW 1 */ + 2652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2653 "00000000" // /* MW 1 */ + 2654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2655 "00000000" // /* MW 1 */ + 2656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2657 "00000000" // /* MW 1 */ + 2658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2659 "00000000" // /* MW 1 */ + 2660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2661 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 first + 2662 "10011000" // SUB r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2663 "00010001" // /* MW 3 */ + 2664 "00100111" // /* MW 2 */ + 2665 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 first +.src_ref 1 "io_buffer_compiler.h" 606 24 + 2666 "00100100" // SEL.EQZ r17, r17, r19, r27; ADD.NC p0, r18, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2667 "00010000" // /* MW 5 */ + 2668 "11010010" // /* MW 4 */ + 2669 "01000000" // /* MW 3 */ + 2670 "01100110" // /* MW 2 */ + 2671 "10001100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 +.src_ref 1 "io_buffer_compiler.h" 606 22 first + 2672 "00001100" // LDA r17, [p0]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2673 "01100011" // /* MW 5 */ + 2674 "11101100" // /* MW 4 */ + 2675 "11010011" // /* MW 3 */ + 2676 "11000110" // /* MW 2 */ + 2677 "00000000" // /* MW 1 */ + 2678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2679 "00000000" // /* MW 1 */ + 2680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2681 "00000000" // /* MW 1 */ + 2682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2683 "00000000" // /* MW 1 */ + 2684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2685 "00000000" // /* MW 1 */ + 2686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2687 "00000000" // /* MW 1 */ + 2688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2689 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 first + 2690 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2691 "00001000" // /* MW 3 */ + 2692 "01010101" // /* MW 2 */ + 2693 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 110 + 2694 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2695 "00111001" // /* MW 3 */ + 2696 "11111100" // /* MW 2 */ + 2697 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 first + 2698 "10011000" // LDA r17, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2699 "00110110" // /* MW 3 */ + 2700 "11110110" // /* MW 2 */ + 2701 "00000000" // /* MW 1 */ + 2702 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2703 "10011001" // /* MW 3 */ + 2704 "11110111" // /* MW 2 */ + 2705 "00000111" // /* MW 1 */ + 2706 "00011000" // LDA r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2707 "11110001" // /* MW 3 */ + 2708 "11111001" // /* MW 2 */ + 2709 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 110 first + 2710 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2711 "00000001" // /* MW 5 */ + 2712 "00000000" // /* MW 4 */ + 2713 "00000000" // /* MW 3 */ + 2714 "11111000" // /* MW 2 */ + 2715 "11111111" // /* MW 1 */ + 2716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2717 "00000000" // /* MW 1 */ + 2718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2719 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 110 + 2720 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 2721 "00000000" // /* MW 3 */ + 2722 "00101000" // /* MW 2 */ + 2723 "00010000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.delay_slot + 2724 "11111000" // MOV r27, el0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2725 "00011100" // /* MW 3 */ + 2726 "11100000" // /* MW 2 */ + 2727 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 first +.delay_slot + 2728 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2729 "00010001" // /* MW 3 */ + 2730 "00100001" // /* MW 2 */ + 2731 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.delay_slot + 2732 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2733 "00000010" // /* MW 3 */ + 2734 "01100001" // /* MW 2 */ + 2735 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 22 +.delay_slot + 2736 "10011000" // ST r16, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2737 "00010001" // /* MW 3 */ + 2738 "11110110" // /* MW 2 */ + 2739 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13kernelWrapperPPvjjjj__end +.label __Z13kernelWrapperPPvjjjj___func_end0 + 2741 "00000000" // /* MW 1 */ +.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_begin0 +.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh +.function setup_conv2d_bf16_params _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh +.src_ref 2 "conv2d_bf16_params.h" 432 first +.src_ref 2 "conv2d_bf16_params.h" 438 17 first +.src_ref 2 "conv2d_bf16_params.h" 452 40 +.src_ref 2 "conv2d_bf16_params.h" 453 40 +.src_ref 2 "conv2d_bf16_params.h" 458 36 +.src_ref 2 "conv2d_bf16_params.h" 470 11 +.function_start + 2752 "10111010" // LDA el0, [p0], #4; MOVX r4, #4; MOV r2, p1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2753 "01111000" // /* MW 9 */ + 2754 "01100000" // /* MW 8 */ + 2755 "01001001" // /* MW 7 */ + 2756 "10001000" // /* MW 6 */ + 2757 "01000000" // /* MW 5 */ + 2758 "00000000" // /* MW 4 */ + 2759 "11010000" // /* MW 3 */ + 2760 "10000101" // /* MW 2 */ + 2761 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 +.src_ref 2 "conv2d_bf16_params.h" 438 17 first +.src_ref 2 "conv2d_bf16_params.h" 452 40 +.src_ref 2 "conv2d_bf16_params.h" 462 7 + 2762 "10111010" // LDA eh0, [p0], #4; MOVX r5, #-1; ADD.NC p2, r2, #9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2763 "01001000" // /* MW 9 */ + 2764 "10000010" // /* MW 8 */ + 2765 "00110000" // /* MW 7 */ + 2766 "11101001" // /* MW 6 */ + 2767 "01010111" // /* MW 5 */ + 2768 "00111110" // /* MW 4 */ + 2769 "11010000" // /* MW 3 */ + 2770 "10000001" // /* MW 2 */ + 2771 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 432 +.src_ref 2 "conv2d_bf16_params.h" 444 52 + 2772 "10111010" // MOVA r1, #-4; PADDXM [sp], #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2773 "01110000" // /* MW 9 */ + 2774 "00000000" // /* MW 8 */ + 2775 "00000000" // /* MW 7 */ + 2776 "00000000" // /* MW 6 */ + 2777 "00000010" // /* MW 5 */ + 2778 "00000000" // /* MW 4 */ + 2779 "00000000" // /* MW 3 */ + 2780 "10000001" // /* MW 2 */ + 2781 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 453 40 +.src_ref 2 "conv2d_bf16_params.h" 458 30 +.src_ref 2 "conv2d_bf16_params.h" 458 30 + 2782 "01110110" // MOVA r6, #12; ST r13, [sp, #-4]; MOVX r16, #1; MOV m0, #16 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 2783 "01011000" // /* MW 11 */ + 2784 "00010000" // /* MW 10 */ + 2785 "00000000" // /* MW 9 */ + 2786 "00101000" // /* MW 8 */ + 2787 "00000000" // /* MW 7 */ + 2788 "10000001" // /* MW 6 */ + 2789 "10110101" // /* MW 5 */ + 2790 "11111101" // /* MW 4 */ + 2791 "00000111" // /* MW 3 */ + 2792 "10000110" // /* MW 2 */ + 2793 "00000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 444 52 +.src_ref 2 "conv2d_bf16_params.h" 470 11 +.src_ref 2 "conv2d_bf16_params.h" 477 40 +.src_ref 2 "conv2d_bf16_params.h" 557 34 + 2794 "01110110" // MOVA r3, #3; ST r14, [sp, #-8]; MOVX r21, #-3; MOV r20, #15 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 2795 "01011000" // /* MW 11 */ + 2796 "00001111" // /* MW 10 */ + 2797 "10001000" // /* MW 9 */ + 2798 "10101010" // /* MW 8 */ + 2799 "01010111" // /* MW 7 */ + 2800 "10111111" // /* MW 6 */ + 2801 "11010101" // /* MW 5 */ + 2802 "11111001" // /* MW 4 */ + 2803 "00000111" // /* MW 3 */ + 2804 "01100011" // /* MW 2 */ + 2805 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 452 40 +.src_ref 2 "conv2d_bf16_params.h" 458 36 +.src_ref 2 "conv2d_bf16_params.h" 462 7 + 2806 "01011100" // ST r15, [sp, #-12]; MOVX r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2807 "00000010" // /* MW 5 */ + 2808 "01100000" // /* MW 4 */ + 2809 "10110000" // /* MW 3 */ + 2810 "10111110" // /* MW 2 */ + 2811 "11111110" // /* MW 1 */ + 2812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2813 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2814 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2815 "00101001" // /* MW 3 */ + 2816 "00011100" // /* MW 2 */ + 2817 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2818 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2819 "00001001" // /* MW 3 */ + 2820 "00011100" // /* MW 2 */ + 2821 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2822 "10011000" // LDA el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2823 "00101110" // /* MW 3 */ + 2824 "00011100" // /* MW 2 */ + 2825 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2826 "10011000" // LDA eh0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2827 "00001110" // /* MW 3 */ + 2828 "00011100" // /* MW 2 */ + 2829 "00000000" // /* MW 1 */ + 2830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2831 "00000000" // /* MW 1 */ + 2832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2833 "00000000" // /* MW 1 */ + 2834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2835 "00000000" // /* MW 1 */ + 2836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2837 "00000000" // /* MW 1 */ + 2838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2839 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2840 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2841 "00101001" // /* MW 3 */ + 2842 "00011100" // /* MW 2 */ + 2843 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2844 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2845 "00001001" // /* MW 3 */ + 2846 "00011100" // /* MW 2 */ + 2847 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2848 "10011000" // LDA el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2849 "00101110" // /* MW 3 */ + 2850 "00011100" // /* MW 2 */ + 2851 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2852 "10011000" // LDA eh0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2853 "00001110" // /* MW 3 */ + 2854 "00011100" // /* MW 2 */ + 2855 "00000000" // /* MW 1 */ + 2856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2857 "00000000" // /* MW 1 */ + 2858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2859 "00000000" // /* MW 1 */ + 2860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2861 "00000000" // /* MW 1 */ + 2862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2863 "00000000" // /* MW 1 */ + 2864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2865 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2866 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2867 "00101001" // /* MW 3 */ + 2868 "00011100" // /* MW 2 */ + 2869 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2870 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2871 "00001001" // /* MW 3 */ + 2872 "00011100" // /* MW 2 */ + 2873 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2874 "10011000" // LDA eh0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2875 "00001110" // /* MW 3 */ + 2876 "00000100" // /* MW 2 */ + 2877 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2878 "10011000" // LDA el0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2879 "00101110" // /* MW 3 */ + 2880 "00010100" // /* MW 2 */ + 2881 "00000000" // /* MW 1 */ + 2882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2883 "00000000" // /* MW 1 */ + 2884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2885 "00000000" // /* MW 1 */ + 2886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2887 "00000000" // /* MW 1 */ + 2888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2889 "00000000" // /* MW 1 */ + 2890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2891 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2892 "10011000" // ST eh0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2893 "00001001" // /* MW 3 */ + 2894 "00000100" // /* MW 2 */ + 2895 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2896 "10011000" // ST el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2897 "00101001" // /* MW 3 */ + 2898 "00010100" // /* MW 2 */ + 2899 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 444 40 first + 2900 "10011000" // LDA.u8 r13, [p2], #-3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2901 "10101010" // /* MW 3 */ + 2902 "11011101" // /* MW 2 */ + 2903 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 447 34 first + 2904 "10011000" // LDA.u8 r17, [p2], #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2905 "00101010" // /* MW 3 */ + 2906 "00011110" // /* MW 2 */ + 2907 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 448 34 first + 2908 "10011000" // LDA.u8 r14, [p2], #-5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2909 "11001010" // /* MW 3 */ + 2910 "10111101" // /* MW 2 */ + 2911 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 452 40 first + 2912 "10011000" // LDA.u16 r15, [p2], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2913 "11111010" // /* MW 3 */ + 2914 "11111101" // /* MW 2 */ + 2915 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 453 40 first + 2916 "10011000" // LDA.u8 r19, [p2], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2917 "01101010" // /* MW 3 */ + 2918 "00001010" // /* MW 2 */ + 2919 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 20 first + 2920 "10011000" // LDA.u8 r7, [p2], #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2921 "11101010" // /* MW 3 */ + 2922 "10101100" // /* MW 2 */ + 2923 "00000010" // /* MW 1 */ + 2924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2925 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 444 52 first + 2926 "10011000" // LSHL r1, r13, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2927 "00011101" // /* MW 3 */ + 2928 "01000010" // /* MW 2 */ + 2929 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 30 first +.src_ref 2 "conv2d_bf16_params.h" 462 7 first + 2930 "00100100" // EQ r16, r1, r16; ADD.NC r18, r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2931 "00000001" // /* MW 5 */ + 2932 "00110001" // /* MW 4 */ + 2933 "11111001" // /* MW 3 */ + 2934 "00100000" // /* MW 2 */ + 2935 "00001100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 462 7 + 2936 "10011000" // LSHL r18, r18, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2937 "01011101" // /* MW 3 */ + 2938 "10100100" // /* MW 2 */ + 2939 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 452 40 first + 2940 "10011000" // EQ r27, r15, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2941 "01000111" // /* MW 3 */ + 2942 "11110110" // /* MW 2 */ + 2943 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 452 40 +.src_ref 2 "conv2d_bf16_params.h" 462 7 first +.src_ref 2 "conv2d_bf16_params.h" 557 34 + 2944 "11100100" // SEL.EQZ r5, r24, r5, r27; MOV eh0, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2945 "00111001" // /* MW 5 */ + 2946 "10110111" // /* MW 4 */ + 2947 "01000000" // /* MW 3 */ + 2948 "01001010" // /* MW 2 */ + 2949 "11000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 462 7 + 2950 "00011000" // SEL.EQZ r29, r17, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2951 "00100010" // /* MW 3 */ + 2952 "01111011" // /* MW 2 */ + 2953 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 30 first + 2954 "10011000" // EQ r6, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2955 "01100111" // /* MW 3 */ + 2956 "11001100" // /* MW 2 */ + 2957 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 + 2958 "10011000" // AND r27, r6, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2959 "00000100" // /* MW 3 */ + 2960 "10110111" // /* MW 2 */ + 2961 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 +.src_ref 2 "conv2d_bf16_params.h" 477 40 +.src_ref 2 "conv2d_bf16_params.h" 557 34 first + 2962 "11100100" // LSHL r15, r15, r21; MOV r25, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2963 "01000001" // /* MW 5 */ + 2964 "10111011" // /* MW 4 */ + 2965 "10111100" // /* MW 3 */ + 2966 "11101011" // /* MW 2 */ + 2967 "01111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 first + 2968 "01011100" // ST r15, [sp, #-20]; SEL.EQZ r6, r7, r24, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2969 "00000100" // /* MW 5 */ + 2970 "10011011" // /* MW 4 */ + 2971 "10110011" // /* MW 3 */ + 2972 "10111110" // /* MW 2 */ + 2973 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 +.src_ref 2 "conv2d_bf16_params.h" 477 40 first + 2974 "10000100" // JNZ r25, #3056 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3056 delay_slots=5 */ + 2975 "00000001" // /* MW 5 */ + 2976 "01000000" // /* MW 4 */ + 2977 "11111000" // /* MW 3 */ + 2978 "00000101" // /* MW 2 */ + 2979 "11001000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 first +.delay_slot + 2980 "10011000" // EQ r27, r6, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2981 "01000111" // /* MW 3 */ + 2982 "10110110" // /* MW 2 */ + 2983 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 444 52 first +.delay_slot + 2984 "10011000" // AND r24, r13, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2985 "01000100" // /* MW 3 */ + 2986 "01110001" // /* MW 2 */ + 2987 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 462 7 first +.delay_slot + 2988 "10011000" // LSHL r30, r19, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2989 "01011101" // /* MW 3 */ + 2990 "11111100" // /* MW 2 */ + 2991 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 470 11 first +.delay_slot + 2992 "10011000" // LSHL r20, r27, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2993 "01001101" // /* MW 3 */ + 2994 "11101000" // /* MW 2 */ + 2995 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 470 11 +.src_ref 2 "conv2d_bf16_params.h" 477 40 first +.delay_slot + 2996 "00011000" // SEL.EQZ r6, r6, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2997 "00110010" // /* MW 3 */ + 2998 "10001100" // /* MW 2 */ + 2999 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 40 + 3000 "10000100" // JNZ r27, #3056 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3056 delay_slots=5 */ + 3001 "00000001" // /* MW 5 */ + 3002 "01000000" // /* MW 4 */ + 3003 "11111000" // /* MW 3 */ + 3004 "00000101" // /* MW 2 */ + 3005 "11011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3007 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3009 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3011 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3013 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3015 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 491 25 +.src_ref 2 "conv2d_bf16_params.h" 492 25 +.src_ref 2 "conv2d_bf16_params.h" 495 99 +.src_ref 2 "conv2d_bf16_params.h" 502 57 +.src_ref 2 "conv2d_bf16_params.h" 533 46 +.src_ref 2 "conv2d_bf16_params.h" 539 82 +.src_ref 2 "conv2d_bf16_params.h" 557 34 +.src_ref 2 "conv2d_bf16_params.h" 621 240 +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.src_ref 2 "conv2d_bf16_params.h" 709 76 + 3016 "10111010" // MOVA r15, #1; J #3104 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=3104 delay_slots=5 */ + 3017 "00100000" // /* MW 9 */ + 3018 "00000000" // /* MW 8 */ + 3019 "00000000" // /* MW 7 */ + 3020 "10000100" // /* MW 6 */ + 3021 "00000001" // /* MW 5 */ + 3022 "00000000" // /* MW 4 */ + 3023 "00000000" // /* MW 3 */ + 3024 "00101111" // /* MW 2 */ + 3025 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 481 24 +.src_ref 2 "conv2d_bf16_params.h" 500 53 +.src_ref 2 "conv2d_bf16_params.h" 506 73 +.src_ref 2 "conv2d_bf16_params.h" 507 53 +.src_ref 2 "conv2d_bf16_params.h" 524 122 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.delay_slot + 3026 "10111010" // MOVA r26, #0; MOVX r5, #-3; MOV r28, #12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3027 "01011000" // /* MW 9 */ + 3028 "00001100" // /* MW 8 */ + 3029 "10001000" // /* MW 7 */ + 3030 "10101011" // /* MW 6 */ + 3031 "01010111" // /* MW 5 */ + 3032 "00111110" // /* MW 4 */ + 3033 "00000000" // /* MW 3 */ + 3034 "00011010" // /* MW 2 */ + 3035 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 504 45 +.src_ref 2 "conv2d_bf16_params.h" 510 45 +.src_ref 2 "conv2d_bf16_params.h" 520 48 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.delay_slot + 3036 "01100100" // MOVX r21, #4; MOV r2, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3037 "01000001" // /* MW 5 */ + 3038 "00100000" // /* MW 4 */ + 3039 "00100001" // /* MW 3 */ + 3040 "01000010" // /* MW 2 */ + 3041 "00000101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 40 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 578 52 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.delay_slot + 3042 "00011000" // MOVX r13, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3043 "00001101" // /* MW 3 */ + 3044 "00011010" // /* MW 2 */ + 3045 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 78 +.src_ref 2 "conv2d_bf16_params.h" 642 20 +.src_ref 2 "conv2d_bf16_params.h" 642 87 +.delay_slot + 3046 "00011000" // MOVX r7, #15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3047 "00111101" // /* MW 3 */ + 3048 "00001110" // /* MW 2 */ + 3049 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.delay_slot + 3050 "00101100" // NOPA; MOVX r4, #-4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3051 "11100010" // /* MW 5 */ + 3052 "10010001" // /* MW 4 */ + 3053 "11111111" // /* MW 3 */ + 3054 "00101100" // /* MW 2 */ + 3055 "00000000" // /* MW 1 */ +.label __ll6__Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh +.src_ref 2 "conv2d_bf16_params.h" 453 40 +.src_ref 2 "conv2d_bf16_params.h" 453 40 +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 504 45 +.src_ref 2 "conv2d_bf16_params.h" 655 23 + 3056 "01110110" // MOVA dj0, #16; MOVS p1, r2; MOVX r21, #4; MOV r4, #-4 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3057 "01011000" // /* MW 11 */ + 3058 "11111100" // /* MW 10 */ + 3059 "10001111" // /* MW 9 */ + 3060 "10001000" // /* MW 8 */ + 3061 "01010000" // /* MW 7 */ + 3062 "00000001" // /* MW 6 */ + 3063 "00001011" // /* MW 5 */ + 3064 "10000010" // /* MW 4 */ + 3065 "10000001" // /* MW 3 */ + 3066 "00000010" // /* MW 2 */ + 3067 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 453 40 first +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 481 24 +.src_ref 2 "conv2d_bf16_params.h" 500 53 +.src_ref 2 "conv2d_bf16_params.h" 506 73 +.src_ref 2 "conv2d_bf16_params.h" 507 53 +.src_ref 2 "conv2d_bf16_params.h" 524 122 +.src_ref 2 "conv2d_bf16_params.h" 539 139 + 3068 "10111010" // ST.s8 r6, [p1, dj0]; MOVX r26, #0; MOV r28, #12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3069 "01011000" // /* MW 9 */ + 3070 "00001100" // /* MW 8 */ + 3071 "10001000" // /* MW 7 */ + 3072 "00001011" // /* MW 6 */ + 3073 "10100000" // /* MW 5 */ + 3074 "00000001" // /* MW 4 */ + 3075 "11100000" // /* MW 3 */ + 3076 "00011000" // /* MW 2 */ + 3077 "00100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 491 25 +.src_ref 2 "conv2d_bf16_params.h" 492 25 +.src_ref 2 "conv2d_bf16_params.h" 495 99 +.src_ref 2 "conv2d_bf16_params.h" 502 57 +.src_ref 2 "conv2d_bf16_params.h" 510 45 +.src_ref 2 "conv2d_bf16_params.h" 520 48 +.src_ref 2 "conv2d_bf16_params.h" 533 46 +.src_ref 2 "conv2d_bf16_params.h" 539 82 +.src_ref 2 "conv2d_bf16_params.h" 557 34 +.src_ref 2 "conv2d_bf16_params.h" 621 240 +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.src_ref 2 "conv2d_bf16_params.h" 709 76 + 3078 "10111010" // MOVA r2, #16; MOVX r5, #-3; MOV r15, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3079 "01011000" // /* MW 9 */ + 3080 "00000001" // /* MW 8 */ + 3081 "11101000" // /* MW 7 */ + 3082 "10101001" // /* MW 6 */ + 3083 "01010111" // /* MW 5 */ + 3084 "00111110" // /* MW 4 */ + 3085 "00000000" // /* MW 3 */ + 3086 "00000010" // /* MW 2 */ + 3087 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 40 +.src_ref 2 "conv2d_bf16_params.h" 529 78 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 578 52 +.src_ref 2 "conv2d_bf16_params.h" 642 20 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 642 87 + 3088 "11100001" // NOPA; NOPB; NOPS; MOVX r7, #15; MOV r13, #3; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3089 "00000000" // /* MW 15 */ + 3090 "00000000" // /* MW 14 */ + 3091 "01011000" // /* MW 13 */ + 3092 "00000011" // /* MW 12 */ + 3093 "10101000" // /* MW 11 */ + 3094 "11101001" // /* MW 10 */ + 3095 "01110001" // /* MW 9 */ + 3096 "00000000" // /* MW 8 */ + 3097 "01011011" // /* MW 7 */ + 3098 "00000001" // /* MW 6 */ + 3099 "00100000" // /* MW 5 */ + 3100 "00000000" // /* MW 4 */ + 3101 "11110000" // /* MW 3 */ + 3102 "00101100" // /* MW 2 */ + 3103 "00000000" // /* MW 1 */ +.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_352 +.src_ref 2 "conv2d_bf16_params.h" 477 40 first +.src_ref 2 "conv2d_bf16_params.h" 495 68 first +.src_ref 2 "conv2d_bf16_params.h" 495 112 +.src_ref 2 "conv2d_bf16_params.h" 682 38 + 3104 "10111010" // LDA.u8 r17, [p2], #-2; EQ r27, r13, r6; MOV m0, #60 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3105 "01011000" // /* MW 9 */ + 3106 "00111100" // /* MW 8 */ + 3107 "00000000" // /* MW 7 */ + 3108 "00111100" // /* MW 6 */ + 3109 "10110011" // /* MW 5 */ + 3110 "00011011" // /* MW 4 */ + 3111 "01010000" // /* MW 3 */ + 3112 "11000101" // /* MW 2 */ + 3113 "01011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 481 24 first +.src_ref 2 "conv2d_bf16_params.h" 495 53 +.src_ref 2 "conv2d_bf16_params.h" 495 112 + 3114 "10111010" // LDA.u8 r1, [p2], m0; SEL.EQZ r18, r1, r26, r27; MOV m5, #-51 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3115 "01011000" // /* MW 9 */ + 3116 "11001101" // /* MW 8 */ + 3117 "10000111" // /* MW 7 */ + 3118 "00010010" // /* MW 6 */ + 3119 "00101101" // /* MW 5 */ + 3120 "00000011" // /* MW 4 */ + 3121 "01010000" // /* MW 3 */ + 3122 "00000101" // /* MW 2 */ + 3123 "01000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 18 first +.src_ref 2 "conv2d_bf16_params.h" 496 68 +.src_ref 2 "conv2d_bf16_params.h" 504 35 +.src_ref 2 "conv2d_bf16_params.h" 539 14 +.src_ref 2 "conv2d_bf16_params.h" 578 47 + 3124 "10111010" // MOVA r23, #2; SEL.EQZ r29, r29, r21, r27; MOV m3, #55 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3125 "01011000" // /* MW 9 */ + 3126 "00110111" // /* MW 8 */ + 3127 "10000000" // /* MW 7 */ + 3128 "10010001" // /* MW 6 */ + 3129 "11011010" // /* MW 5 */ + 3130 "00111011" // /* MW 4 */ + 3131 "00000000" // /* MW 3 */ + 3132 "01010111" // /* MW 2 */ + 3133 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 496 53 +.src_ref 2 "conv2d_bf16_params.h" 499 51 +.src_ref 2 "conv2d_bf16_params.h" 504 45 first +.src_ref 2 "conv2d_bf16_params.h" 509 50 +.src_ref 2 "conv2d_bf16_params.h" 519 42 +.src_ref 2 "conv2d_bf16_params.h" 700 34 + 3134 "10111010" // MOVA r3, #8; EQ r27, r21, r0; MOV m2, #-68 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3135 "01011000" // /* MW 9 */ + 3136 "10111100" // /* MW 8 */ + 3137 "00000111" // /* MW 7 */ + 3138 "00111101" // /* MW 6 */ + 3139 "10110000" // /* MW 5 */ + 3140 "00101011" // /* MW 4 */ + 3141 "00000000" // /* MW 3 */ + 3142 "00000011" // /* MW 2 */ + 3143 "00000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 492 25 first +.src_ref 2 "conv2d_bf16_params.h" 497 46 +.src_ref 2 "conv2d_bf16_params.h" 509 50 + 3144 "10111010" // MOVA r16, #512; LSHL r22, r15, r24; MOV m1, #112 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3145 "01011000" // /* MW 9 */ + 3146 "01110000" // /* MW 8 */ + 3147 "10000000" // /* MW 7 */ + 3148 "01101100" // /* MW 6 */ + 3149 "01101100" // /* MW 5 */ + 3150 "00011111" // /* MW 4 */ + 3151 "00000000" // /* MW 3 */ + 3152 "00010000" // /* MW 2 */ + 3153 "01000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 500 53 +.src_ref 2 "conv2d_bf16_params.h" 520 34 first + 3154 "01100100" // EXTEND.u8 r22, r22; MOV m4, #-105 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3155 "01011101" // /* MW 5 */ + 3156 "00011110" // /* MW 4 */ + 3157 "00001000" // /* MW 3 */ + 3158 "10010010" // /* MW 2 */ + 3159 "10110101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 77 +.src_ref 2 "conv2d_bf16_params.h" 520 48 + 3160 "00111010" // ST r22, [sp, #-16]; LSHL r22, r22, r2; MOV m7, #49 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3161 "01011001" // /* MW 9 */ + 3162 "00110001" // /* MW 8 */ + 3163 "10000000" // /* MW 7 */ + 3164 "01101111" // /* MW 6 */ + 3165 "01100001" // /* MW 5 */ + 3166 "00101101" // /* MW 4 */ + 3167 "10110000" // /* MW 3 */ + 3168 "01011010" // /* MW 2 */ + 3169 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 53 +.src_ref 2 "conv2d_bf16_params.h" 507 42 first + 3170 "01100100" // SUB r30, r30, r29; MOV m6, #-63 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3171 "00000101" // /* MW 5 */ + 3172 "00011111" // /* MW 4 */ + 3173 "00111100" // /* MW 3 */ + 3174 "10111010" // /* MW 2 */ + 3175 "11110111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 495 99 first + 3176 "10011000" // SUB r1, r15, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3177 "00010001" // /* MW 3 */ + 3178 "11000010" // /* MW 2 */ + 3179 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 495 96 +.src_ref 2 "conv2d_bf16_params.h" 495 96 +.src_ref 2 "conv2d_bf16_params.h" 610 64 +.src_ref 2 "conv2d_bf16_params.h" 709 96 + 3180 "01100100" // MUL r31, r17, r1; MOV r1, #7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3181 "00011101" // /* MW 5 */ + 3182 "10100000" // /* MW 4 */ + 3183 "11110000" // /* MW 3 */ + 3184 "11000011" // /* MW 2 */ + 3185 "10001111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 507 53 first + 3186 "10011000" // SUB r17, r26, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3187 "00100001" // /* MW 3 */ + 3188 "10100011" // /* MW 2 */ + 3189 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 495 96 first + 3190 "10011000" // LSHL r31, r31, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3191 "00011101" // /* MW 3 */ + 3192 "11111110" // /* MW 2 */ + 3193 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 495 53 +.src_ref 2 "conv2d_bf16_params.h" 506 48 +.src_ref 2 "conv2d_bf16_params.h" 519 42 first + 3194 "00111010" // ST r31, [p2], m5; LSHL r31, r29, r3; MOV m5, #87 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3195 "01011001" // /* MW 9 */ + 3196 "01010111" // /* MW 8 */ + 3197 "10000000" // /* MW 7 */ + 3198 "11101110" // /* MW 6 */ + 3199 "11110001" // /* MW 5 */ + 3200 "00111011" // /* MW 4 */ + 3201 "00110000" // /* MW 3 */ + 3202 "01111110" // /* MW 2 */ + 3203 "01010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 496 68 first +.src_ref 2 "conv2d_bf16_params.h" 504 35 first +.src_ref 2 "conv2d_bf16_params.h" 522 68 + 3204 "10111010" // LDA.u8 r21, [p2], m3; EQ r19, r23, r0; MOV m3, #-78 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3205 "01011000" // /* MW 9 */ + 3206 "10110010" // /* MW 8 */ + 3207 "10000111" // /* MW 7 */ + 3208 "00111101" // /* MW 6 */ + 3209 "00110000" // /* MW 5 */ + 3210 "00101111" // /* MW 4 */ + 3211 "01010000" // /* MW 3 */ + 3212 "01010101" // /* MW 2 */ + 3213 "01001101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 509 50 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3214 "01011100" // ST r19, [sp, #-24]; LSHL r19, r19, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3215 "01111011" // /* MW 5 */ + 3216 "11001100" // /* MW 4 */ + 3217 "10111001" // /* MW 3 */ + 3218 "01001110" // /* MW 2 */ + 3219 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 496 53 first +.src_ref 2 "conv2d_bf16_params.h" 520 19 first +.src_ref 2 "conv2d_bf16_params.h" 529 62 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3220 "10111010" // ST.s8 r21, [p2], m2; OR r22, r31, r22; MOV m2, #246 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3221 "01011000" // /* MW 9 */ + 3222 "11110110" // /* MW 8 */ + 3223 "00000000" // /* MW 7 */ + 3224 "00101101" // /* MW 6 */ + 3225 "01101011" // /* MW 5 */ + 3226 "00111111" // /* MW 4 */ + 3227 "11100000" // /* MW 3 */ + 3228 "01010100" // /* MW 2 */ + 3229 "01001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 497 46 first +.src_ref 2 "conv2d_bf16_params.h" 509 50 first +.src_ref 2 "conv2d_bf16_params.h" 533 44 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3230 "10111010" // LDA.u16 r16, [p2], m1; SEL.EQZ r19, r19, r16, r27; MOV m1, #-176 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3231 "01011000" // /* MW 9 */ + 3232 "01010000" // /* MW 8 */ + 3233 "10000111" // /* MW 7 */ + 3234 "00010000" // /* MW 6 */ + 3235 "00111000" // /* MW 5 */ + 3236 "00100111" // /* MW 4 */ + 3237 "01010000" // /* MW 3 */ + 3238 "01000011" // /* MW 2 */ + 3239 "01000101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 14 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3240 "10011000" // EQ r31, r23, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3241 "01100111" // /* MW 3 */ + 3242 "11111110" // /* MW 2 */ + 3243 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 499 51 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3244 "10011000" // EQ r16, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3245 "01100111" // /* MW 3 */ + 3246 "11100000" // /* MW 2 */ + 3247 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 499 51 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3248 "10011000" // OR r27, r31, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3249 "00000101" // /* MW 3 */ + 3250 "11110111" // /* MW 2 */ + 3251 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 78 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3252 "10011000" // AND r21, r7, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3253 "01010100" // /* MW 3 */ + 3254 "11101011" // /* MW 2 */ + 3255 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 507 53 first +.src_ref 2 "conv2d_bf16_params.h" 511 47 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3256 "01100100" // ASHL r30, r30, r17; MOV r17, #24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3257 "01100001" // /* MW 5 */ + 3258 "10100000" // /* MW 4 */ + 3259 "11011000" // /* MW 3 */ + 3260 "10100011" // /* MW 2 */ + 3261 "11110111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 491 25 first +.src_ref 2 "conv2d_bf16_params.h" 507 34 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3262 "00111010" // ST r16, [sp, #-32]; LSHL r18, r15, r18; ADD.NC r30, r30, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3263 "01001001" // /* MW 9 */ + 3264 "10000000" // /* MW 8 */ + 3265 "11001111" // /* MW 7 */ + 3266 "01101111" // /* MW 6 */ + 3267 "00101001" // /* MW 5 */ + 3268 "00011111" // /* MW 4 */ + 3269 "10110000" // /* MW 3 */ + 3270 "01000010" // /* MW 2 */ + 3271 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 500 53 first +.src_ref 2 "conv2d_bf16_params.h" 511 47 first + 3272 "01011100" // ST r26, [p2], #4; LSHL r17, r30, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3273 "00111011" // /* MW 5 */ + 3274 "01000110" // /* MW 4 */ + 3275 "00111111" // /* MW 3 */ + 3276 "11101010" // /* MW 2 */ + 3277 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 500 53 +.src_ref 2 "conv2d_bf16_params.h" 534 44 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 3278 "00000010" // ST r26, [p2], m4; MOV m4, #168 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3279 "01010000" // /* MW 7 */ + 3280 "10101000" // /* MW 6 */ + 3281 "00000000" // /* MW 5 */ + 3282 "00000010" // /* MW 4 */ + 3283 "00110000" // /* MW 3 */ + 3284 "01101010" // /* MW 2 */ + 3285 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 77 first +.src_ref 2 "conv2d_bf16_params.h" 509 19 first +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 3286 "01110110" // LDA.u8 r18, [p2], m7; ST r31, [sp, #-28]; OR r27, r19, r0; MOV el0, r27 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3287 "01111000" // /* MW 11 */ + 3288 "11001110" // /* MW 10 */ + 3289 "00001101" // /* MW 9 */ + 3290 "00101100" // /* MW 8 */ + 3291 "10110000" // /* MW 7 */ + 3292 "10100111" // /* MW 6 */ + 3293 "11110101" // /* MW 5 */ + 3294 "11100111" // /* MW 4 */ + 3295 "01010111" // /* MW 3 */ + 3296 "01001001" // /* MW 2 */ + 3297 "01011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 19 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3298 "10011000" // OR r17, r27, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3299 "00010101" // /* MW 3 */ + 3300 "11100011" // /* MW 2 */ + 3301 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 506 73 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3302 "10011000" // SUB r27, r26, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3303 "10000001" // /* MW 3 */ + 3304 "10110111" // /* MW 2 */ + 3305 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 527 47 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3306 "00011000" // EXTEND.u8 r24, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3307 "10010000" // /* MW 3 */ + 3308 "10110000" // /* MW 2 */ + 3309 "00010100" // /* MW 1 */ + 3310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3311 "00000000" // /* MW 1 */ + 3312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3313 "00000000" // /* MW 1 */ + 3314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3315 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 57 first + 3316 "10011000" // SUB r18, r15, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3317 "00100001" // /* MW 3 */ + 3318 "11100101" // /* MW 2 */ + 3319 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 53 + 3320 "10011000" // ST r18, [p2], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3321 "01010001" // /* MW 3 */ + 3322 "11001010" // /* MW 2 */ + 3323 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 506 48 first + 3324 "10011000" // LDA.u8 r18, [p2], m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3325 "01001010" // /* MW 3 */ + 3326 "10101010" // /* MW 2 */ + 3327 "00000010" // /* MW 1 */ + 3328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3329 "00000000" // /* MW 1 */ + 3330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3331 "00000000" // /* MW 1 */ + 3332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3333 "00000000" // /* MW 1 */ + 3334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3335 "00000000" // /* MW 1 */ + 3336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3337 "00000000" // /* MW 1 */ + 3338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3339 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 506 62 + 3340 "10011000" // SUB r18, r18, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3341 "11100001" // /* MW 3 */ + 3342 "10100100" // /* MW 2 */ + 3343 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 506 73 + 3344 "10011000" // ASHL r18, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3345 "10111110" // /* MW 3 */ + 3346 "10100101" // /* MW 2 */ + 3347 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 first + 3348 "10011000" // LSHL r18, r18, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3349 "00101101" // /* MW 3 */ + 3350 "10100100" // /* MW 2 */ + 3351 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 + 3352 "01000100" // MOVXM r27, #65536 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3353 "00000000" // /* MW 5 */ + 3354 "10100000" // /* MW 4 */ + 3355 "00001101" // /* MW 3 */ + 3356 "00000001" // /* MW 2 */ + 3357 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 + 3358 "10011000" // ADD r18, r27, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3359 "00100000" // /* MW 3 */ + 3360 "11100101" // /* MW 2 */ + 3361 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 + 3362 "01000100" // MOVXM r27, #16711680 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3363 "00000000" // /* MW 5 */ + 3364 "10100000" // /* MW 4 */ + 3365 "00001101" // /* MW 3 */ + 3366 "11111111" // /* MW 2 */ + 3367 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 +.src_ref 2 "conv2d_bf16_params.h" 539 14 +.src_ref 2 "conv2d_bf16_params.h" 642 99 + 3368 "01100100" // AND r27, r27, r18; MOV r18, #-16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3369 "11000001" // /* MW 5 */ + 3370 "00111111" // /* MW 4 */ + 3371 "10011001" // /* MW 3 */ + 3372 "11100100" // /* MW 2 */ + 3373 "11011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 511 19 first +.src_ref 2 "conv2d_bf16_params.h" 524 122 +.src_ref 2 "conv2d_bf16_params.h" 539 14 + 3374 "01100100" // OR r27, r27, r17; MOV r17, #-8 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3375 "11100001" // /* MW 5 */ + 3376 "10111111" // /* MW 4 */ + 3377 "10111000" // /* MW 3 */ + 3378 "11100010" // /* MW 2 */ + 3379 "11011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 512 64 first +.src_ref 2 "conv2d_bf16_params.h" 524 122 first + 3380 "01011100" // ST r27, [p2], #4; LSHL r19, r19, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3381 "00111011" // /* MW 5 */ + 3382 "11001110" // /* MW 4 */ + 3383 "00111001" // /* MW 3 */ + 3384 "11101110" // /* MW 2 */ + 3385 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 524 122 + 3386 "10011000" // SUB r26, r26, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3387 "00110001" // /* MW 3 */ + 3388 "10110101" // /* MW 2 */ + 3389 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 524 122 + 3390 "10011000" // LSHL r20, r20, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3391 "10101101" // /* MW 3 */ + 3392 "00101001" // /* MW 2 */ + 3393 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 520 19 first + 3394 "10011000" // OR r26, r14, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3395 "01100101" // /* MW 3 */ + 3396 "10110101" // /* MW 2 */ + 3397 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 511 36 first +.src_ref 2 "conv2d_bf16_params.h" 522 68 first + 3398 "01011100" // ST r26, [p2], m3; EXTEND.u8 r26, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3399 "00100000" // /* MW 5 */ + 3400 "01101001" // /* MW 4 */ + 3401 "00111111" // /* MW 3 */ + 3402 "01101010" // /* MW 2 */ + 3403 "01001101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 524 65 first +.src_ref 2 "conv2d_bf16_params.h" 529 62 first +.src_ref 2 "conv2d_bf16_params.h" 539 14 first + 3404 "10111010" // LDA.u8 r25, [p2], m2; LSHL r20, r27, r18; ADD.NC r30, r26, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3405 "10101000" // /* MW 9 */ + 3406 "10101000" // /* MW 8 */ + 3407 "11001110" // /* MW 7 */ + 3408 "01101111" // /* MW 6 */ + 3409 "01001001" // /* MW 5 */ + 3410 "00110111" // /* MW 4 */ + 3411 "01010000" // /* MW 3 */ + 3412 "01100101" // /* MW 2 */ + 3413 "01001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 534 93 +.src_ref 2 "conv2d_bf16_params.h" 539 14 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 3414 "01100100" // LSHL r22, r22, r17; MOV r17, #254 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3415 "11111001" // /* MW 5 */ + 3416 "10100011" // /* MW 4 */ + 3417 "10111000" // /* MW 3 */ + 3418 "10100011" // /* MW 2 */ + 3419 "10110101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 527 45 first +.src_ref 2 "conv2d_bf16_params.h" 533 44 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 3420 "00101100" // ST.s8 r25, [p2], m1; MUL r26, r26, r24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3421 "00011111" // /* MW 5 */ + 3422 "01101011" // /* MW 4 */ + 3423 "11101101" // /* MW 3 */ + 3424 "01100100" // /* MW 2 */ + 3425 "01000101" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3427 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3428 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3429 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3431 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3433 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 48 first +.src_ref 2 "conv2d_bf16_params.h" 533 46 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3434 "10100100" // LSHL r25, r16, r15; ADD.NC r27, r21, r25 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3435 "11001010" // /* MW 5 */ + 3436 "10110101" // /* MW 4 */ + 3437 "10111101" // /* MW 3 */ + 3438 "01011111" // /* MW 2 */ + 3439 "10000110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 14 first + 3440 "10000100" // JNZ r31, #3568 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3568 delay_slots=5 */ + 3441 "00000001" // /* MW 5 */ + 3442 "01000000" // /* MW 4 */ + 3443 "11111000" // /* MW 3 */ + 3444 "00000110" // /* MW 2 */ + 3445 "11111000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 76 first +.src_ref 2 "conv2d_bf16_params.h" 529 122 +.delay_slot + 3446 "10100100" // ADD r21, r19, #3; ADD.NC r27, r27, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3447 "11110010" // /* MW 5 */ + 3448 "10111011" // /* MW 4 */ + 3449 "11101101" // /* MW 3 */ + 3450 "01000001" // /* MW 2 */ + 3451 "10011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 122 +.delay_slot + 3452 "10011000" // LSHL r21, r27, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3453 "01011101" // /* MW 3 */ + 3454 "11101011" // /* MW 2 */ + 3455 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 534 93 first +.delay_slot + 3456 "10011000" // AND r17, r25, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3457 "00010100" // /* MW 3 */ + 3458 "01100011" // /* MW 2 */ + 3459 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 534 44 +.src_ref 2 "conv2d_bf16_params.h" 539 139 first +.src_ref 2 "conv2d_bf16_params.h" 555 59 +.src_ref 2 "conv2d_bf16_params.h" 559 59 +.src_ref 2 "conv2d_bf16_params.h" 700 17 +.delay_slot + 3460 "00111010" // ST r17, [p2], m4; EQ r27, r6, r28; MOV r17, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3461 "01011001" // /* MW 9 */ + 3462 "00000001" // /* MW 8 */ + 3463 "00101000" // /* MW 7 */ + 3464 "00111110" // /* MW 6 */ + 3465 "10111110" // /* MW 5 */ + 3466 "00001101" // /* MW 4 */ + 3467 "00110000" // /* MW 3 */ + 3468 "01000110" // /* MW 2 */ + 3469 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 669 63 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.delay_slot + 3470 "11111000" // MOV el1, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3471 "10011100" // /* MW 3 */ + 3472 "10011011" // /* MW 2 */ + 3473 "00011000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 14 + 3474 "00011000" // LDA r28, [sp, #-32] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3475 "10010001" // /* MW 3 */ + 3476 "11100011" // /* MW 2 */ + 3477 "00000111" // /* MW 1 */ + 3478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3479 "00000000" // /* MW 1 */ + 3480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3481 "00000000" // /* MW 1 */ + 3482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3483 "00000000" // /* MW 1 */ + 3484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3485 "00000000" // /* MW 1 */ + 3486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3487 "00000000" // /* MW 1 */ + 3488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3489 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 14 + 3490 "10000100" // JNZ r28, #3568 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3568 delay_slots=5 */ + 3491 "00000001" // /* MW 5 */ + 3492 "01000000" // /* MW 4 */ + 3493 "11111000" // /* MW 3 */ + 3494 "00000110" // /* MW 2 */ + 3495 "11100000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3497 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3499 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3501 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3503 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3505 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 115 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 539 162 + 3506 "10111010" // MOVA r28, #5; MOVX r17, #4; MOV r25, #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3507 "01011000" // /* MW 9 */ + 3508 "01000000" // /* MW 8 */ + 3509 "00101000" // /* MW 7 */ + 3510 "10001011" // /* MW 6 */ + 3511 "00010000" // /* MW 5 */ + 3512 "00000001" // /* MW 4 */ + 3513 "00000000" // /* MW 3 */ + 3514 "10111100" // /* MW 2 */ + 3515 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 139 + 3516 "00011000" // SEL.EQZ r31, r17, r13, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3517 "11010010" // /* MW 3 */ + 3518 "01111110" // /* MW 2 */ + 3519 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 162 + 3520 "10011000" // EQ r27, r25, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3521 "01100111" // /* MW 3 */ + 3522 "01110110" // /* MW 2 */ + 3523 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 135 +.src_ref 2 "conv2d_bf16_params.h" 539 139 + 3524 "01100100" // SEL.EQZ r28, r31, r28, r27; MOV r31, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3525 "00000001" // /* MW 5 */ + 3526 "10100000" // /* MW 4 */ + 3527 "01001111" // /* MW 3 */ + 3528 "00111000" // /* MW 2 */ + 3529 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 46 + 3530 "00011000" // EXTEND.s8 r25, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3531 "01010000" // /* MW 3 */ + 3532 "00110010" // /* MW 2 */ + 3533 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 44 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 3534 "10011000" // MUL r30, r25, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3535 "11101111" // /* MW 3 */ + 3536 "01111101" // /* MW 2 */ + 3537 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 115 +.src_ref 2 "conv2d_bf16_params.h" 669 63 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 3538 "11100100" // LT r27, r25, r17; MOV r27, el1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3539 "00111001" // /* MW 5 */ + 3540 "11000100" // /* MW 4 */ + 3541 "01011101" // /* MW 3 */ + 3542 "11100011" // /* MW 2 */ + 3543 "11001110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 82 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3544 "00011000" // SEL.EQZ r17, r15, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3545 "10000010" // /* MW 3 */ + 3546 "11100011" // /* MW 2 */ + 3547 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 79 + 3548 "10011000" // MUL r17, r17, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3549 "11101111" // /* MW 3 */ + 3550 "01100011" // /* MW 2 */ + 3551 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 135 + 3552 "10011000" // SUB r28, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3553 "11000001" // /* MW 3 */ + 3554 "11111001" // /* MW 2 */ + 3555 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 135 + 3556 "10011000" // ASHL r17, r17, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3557 "11001110" // /* MW 3 */ + 3558 "01100011" // /* MW 2 */ + 3559 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 555 55 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 3560 "00100010" // EXTEND.u8 r17, r17; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3561 "00011100" // /* MW 7 */ + 3562 "00000000" // /* MW 6 */ + 3563 "00000000" // /* MW 5 */ + 3564 "10000001" // /* MW 4 */ + 3565 "00010100" // /* MW 3 */ + 3566 "00100011" // /* MW 2 */ + 3567 "00000000" // /* MW 1 */ +.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_816 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 669 63 +.src_ref 2 "conv2d_bf16_params.h" 669 63 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 3568 "10111010" // MOVA r25, #0; MOVX r28, #-1; MOV r27, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3569 "01111000" // /* MW 9 */ + 3570 "00001110" // /* MW 8 */ + 3571 "01110000" // /* MW 7 */ + 3572 "11101011" // /* MW 6 */ + 3573 "11000111" // /* MW 5 */ + 3574 "00111111" // /* MW 4 */ + 3575 "00000000" // /* MW 3 */ + 3576 "00011001" // /* MW 2 */ + 3577 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 669 63 first +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3578 "00011000" // SEL.EQZ r31, r25, r28, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3579 "11000010" // /* MW 3 */ + 3580 "01111111" // /* MW 2 */ + 3581 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 497 34 first +.src_ref 2 "conv2d_bf16_params.h" 641 32 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 3582 "10111010" // LDA r27, [sp, #-24]; EXTEND.u8 r16, r16; ADD.NC r26, r29, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3583 "10101000" // /* MW 9 */ + 3584 "01110100" // /* MW 8 */ + 3585 "01001111" // /* MW 7 */ + 3586 "10000011" // /* MW 6 */ + 3587 "00000100" // /* MW 5 */ + 3588 "00100001" // /* MW 4 */ + 3589 "00100000" // /* MW 3 */ + 3590 "01101110" // /* MW 2 */ + 3591 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 559 61 first +.src_ref 2 "conv2d_bf16_params.h" 640 16 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3592 "10111010" // MOVA r30, #72; EXTEND.u8 r20, r20; MOV r29, #9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3593 "01011000" // /* MW 9 */ + 3594 "00001001" // /* MW 8 */ + 3595 "10101000" // /* MW 7 */ + 3596 "10000011" // /* MW 6 */ + 3597 "01000100" // /* MW 5 */ + 3598 "00101001" // /* MW 4 */ + 3599 "00000000" // /* MW 3 */ + 3600 "00011110" // /* MW 2 */ + 3601 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 25 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3602 "00011000" // SEL.EQZ r25, r29, r30, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3603 "11100010" // /* MW 3 */ + 3604 "01110011" // /* MW 2 */ + 3605 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 578 47 first + 3606 "10011000" // NE r28, r23, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3607 "10001000" // /* MW 3 */ + 3608 "11111001" // /* MW 2 */ + 3609 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 640 16 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 3610 "10011000" // LSHL r29, r29, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3611 "00111101" // /* MW 3 */ + 3612 "01111011" // /* MW 2 */ + 3613 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 557 34 +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.aggressive_scheduled_block_id 6 +.noswbrkpt + 3614 "10111010" // LDA r23, [sp, #-20]; MOVXM r24, #1032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3615 "00010000" // /* MW 9 */ + 3616 "00000100" // /* MW 8 */ + 3617 "00001010" // /* MW 7 */ + 3618 "00000011" // /* MW 6 */ + 3619 "00000000" // /* MW 5 */ + 3620 "00000000" // /* MW 4 */ + 3621 "00100000" // /* MW 3 */ + 3622 "11011110" // /* MW 2 */ + 3623 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 641 44 first +.src_ref 2 "conv2d_bf16_params.h" 642 45 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 3624 "00100100" // LSHL r19, r25, r19; ADD.NC r30, r26, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3625 "11111111" // /* MW 5 */ + 3626 "00111010" // /* MW 4 */ + 3627 "10111111" // /* MW 3 */ + 3628 "11100111" // /* MW 2 */ + 3629 "11001100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 20 +.src_ref 2 "conv2d_bf16_params.h" 642 87 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 3630 "00011000" // MAC r7, r7, r19, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3631 "11100110" // /* MW 3 */ + 3632 "11001111" // /* MW 2 */ + 3633 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 55 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 3634 "01100100" // EXTEND.u8 r19, r22; MOV r23, #522 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3635 "00101001" // /* MW 5 */ + 3636 "10101000" // /* MW 4 */ + 3637 "00001011" // /* MW 3 */ + 3638 "11010010" // /* MW 2 */ + 3639 "10110100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 540 38 +.src_ref 2 "conv2d_bf16_params.h" 645 41 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3640 "01100100" // SEL.EQZ r22, r23, r24, r27; MOV r26, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3641 "00000001" // /* MW 5 */ + 3642 "00100001" // /* MW 4 */ + 3643 "01001101" // /* MW 3 */ + 3644 "10110000" // /* MW 2 */ + 3645 "10111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 540 38 first +.src_ref 2 "conv2d_bf16_params.h" 557 34 + 3646 "11100100" // NE r6, r6, r26; MOV r27, eh0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3647 "00111001" // /* MW 5 */ + 3648 "11000010" // /* MW 4 */ + 3649 "00011101" // /* MW 3 */ + 3650 "10110101" // /* MW 2 */ + 3651 "00110001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 99 first + 3652 "10011000" // AND r7, r7, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3653 "00100100" // /* MW 3 */ + 3654 "11001111" // /* MW 2 */ + 3655 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 557 34 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 + 3656 "11100100" // SEL.EQZ r23, r23, r15, r27; MOV r27, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3657 "01000001" // /* MW 5 */ + 3658 "10100110" // /* MW 4 */ + 3659 "01001101" // /* MW 3 */ + 3660 "11011110" // /* MW 2 */ + 3661 "10111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 119 +.src_ref 2 "conv2d_bf16_params.h" 655 23 first + 3662 "01100100" // SEL.EQZ r4, r5, r4, r27; MOV r18, #31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3663 "01111101" // /* MW 5 */ + 3664 "00100000" // /* MW 4 */ + 3665 "01001001" // /* MW 3 */ + 3666 "00001000" // /* MW 2 */ + 3667 "00101001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 119 first + 3668 "10011000" // AND r23, r23, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3669 "00100100" // /* MW 3 */ + 3670 "11101111" // /* MW 2 */ + 3671 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 540 15 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 + 3672 "10111010" // MOVA r30, #-288; LSHL r4, r16, r4; MOV r18, #-144 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3673 "01011000" // /* MW 9 */ + 3674 "01110000" // /* MW 8 */ + 3675 "01001111" // /* MW 7 */ + 3676 "01101110" // /* MW 6 */ + 3677 "01000010" // /* MW 5 */ + 3678 "00100000" // /* MW 4 */ + 3679 "00000000" // /* MW 3 */ + 3680 "00011110" // /* MW 2 */ + 3681 "11011100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first + 3682 "00011000" // SEL.EQZ r30, r30, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3683 "00100010" // /* MW 3 */ + 3684 "10111101" // /* MW 2 */ + 3685 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 85 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 + 3686 "10111010" // MOVA r5, #144; MUL r26, r23, r19; MOV r16, #288 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3687 "01011000" // /* MW 9 */ + 3688 "00100000" // /* MW 8 */ + 3689 "00001001" // /* MW 7 */ + 3690 "11111110" // /* MW 6 */ + 3691 "10101001" // /* MW 5 */ + 3692 "00101111" // /* MW 4 */ + 3693 "00000000" // /* MW 3 */ + 3694 "00000101" // /* MW 2 */ + 3695 "00010010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first + 3696 "00011000" // SEL.EQZ r16, r16, r5, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3697 "01010010" // /* MW 3 */ + 3698 "00100000" // /* MW 2 */ + 3699 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 126 21 first +.src_ref 2 "conv2d_bf16_params.h" 559 59 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 3700 "10100100" // MUL r24, r17, r4; ADD.NC r27, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3701 "11110010" // /* MW 5 */ + 3702 "10111101" // /* MW 4 */ + 3703 "11111101" // /* MW 3 */ + 3704 "00001001" // /* MW 2 */ + 3705 "10001110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 669 41 first +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.aggressive_scheduled_block_id 7 +.noswbrkpt + 3706 "11100100" // LSHL r16, r16, r31; MOV r27, el1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3707 "00111001" // /* MW 5 */ + 3708 "11000100" // /* MW 4 */ + 3709 "10111101" // /* MW 3 */ + 3710 "00111111" // /* MW 2 */ + 3711 "10000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 117 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3712 "01011100" // ST r27, [sp, #-36]; MUL r26, r14, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3713 "01011111" // /* MW 5 */ + 3714 "01101011" // /* MW 4 */ + 3715 "10110111" // /* MW 3 */ + 3716 "11101110" // /* MW 2 */ + 3717 "11111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 700 34 first + 3718 "00011000" // SEL.EQZ r2, r2, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3719 "00110010" // /* MW 3 */ + 3720 "10000100" // /* MW 2 */ + 3721 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 578 52 first + 3722 "10011000" // LTU r31, r13, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3723 "00001100" // /* MW 3 */ + 3724 "01111110" // /* MW 2 */ + 3725 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 559 92 first + 3726 "10011000" // MUL r24, r20, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3727 "10001111" // /* MW 3 */ + 3728 "00110001" // /* MW 2 */ + 3729 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 578 36 first + 3730 "10011000" // OR r27, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3731 "11000101" // /* MW 3 */ + 3732 "11110111" // /* MW 2 */ + 3733 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 610 64 first +.src_ref 2 "conv2d_bf16_params.h" 611 47 +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 629 82 + 3734 "01110110" // MOVA r3, #128; ST r20, [sp, #-20]; LSHL r28, r27, r1; MOV r20, #256 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3735 "01011000" // /* MW 11 */ + 3736 "00000000" // /* MW 10 */ + 3737 "10001001" // /* MW 9 */ + 3738 "11101110" // /* MW 8 */ + 3739 "11000000" // /* MW 7 */ + 3740 "10110111" // /* MW 6 */ + 3741 "10010101" // /* MW 5 */ + 3742 "11101110" // /* MW 4 */ + 3743 "00000111" // /* MW 3 */ + 3744 "00000011" // /* MW 2 */ + 3745 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 621 156 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.src_ref 2 "conv2d_bf16_params.h" 649 41 + 3746 "11100100" // SEL.EQZ r20, r3, r20, r27; MOV eh0, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3747 "00111001" // /* MW 5 */ + 3748 "10110111" // /* MW 4 */ + 3749 "01000000" // /* MW 3 */ + 3750 "00101000" // /* MW 2 */ + 3751 "00011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 645 41 + 3752 "01000100" // MOVXM r31, #1542 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3753 "00001100" // /* MW 5 */ + 3754 "10101100" // /* MW 4 */ + 3755 "00001111" // /* MW 3 */ + 3756 "00000000" // /* MW 2 */ + 3757 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 554 60 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 first + 3758 "00111010" // ST r4, [sp, #-24]; EQ r27, r15, r0; ADD.NC r4, r4, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3759 "11001001" // /* MW 9 */ + 3760 "00111111" // /* MW 8 */ + 3761 "10001001" // /* MW 7 */ + 3762 "00111100" // /* MW 6 */ + 3763 "10110000" // /* MW 5 */ + 3764 "00011111" // /* MW 4 */ + 3765 "10110000" // /* MW 3 */ + 3766 "00010010" // /* MW 2 */ + 3767 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 554 53 +.src_ref 2 "conv2d_bf16_params.h" 555 53 +.src_ref 2 "conv2d_bf16_params.h" 555 59 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 + 3768 "01110110" // MOVA m3, #-148; ST r4, [p2], #4; SEL.EQZ r31, r22, r31, r27; ADD.NC r22, r17, #-1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3769 "11001000" // /* MW 11 */ + 3770 "01111111" // /* MW 10 */ + 3771 "11001100" // /* MW 9 */ + 3772 "10010010" // /* MW 8 */ + 3773 "11111111" // /* MW 7 */ + 3774 "10101101" // /* MW 6 */ + 3775 "10010001" // /* MW 5 */ + 3776 "00011100" // /* MW 4 */ + 3777 "10000010" // /* MW 3 */ + 3778 "10001100" // /* MW 2 */ + 3779 "11101101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 555 53 +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 621 240 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 + 3780 "00111010" // ST r22, [p2], m3; LSHL r21, r21, r15; MOV r27, eh0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3781 "01111001" // /* MW 9 */ + 3782 "10001110" // /* MW 8 */ + 3783 "01110000" // /* MW 7 */ + 3784 "11101111" // /* MW 6 */ + 3785 "01010111" // /* MW 5 */ + 3786 "00101011" // /* MW 4 */ + 3787 "00110000" // /* MW 3 */ + 3788 "01011010" // /* MW 2 */ + 3789 "01001101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 53 first +.src_ref 2 "conv2d_bf16_params.h" 559 53 +.src_ref 2 "conv2d_bf16_params.h" 621 140 +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 645 41 + 3790 "01110110" // MOVA r25, #22; ST r26, [p2], #4; SUB r20, r20, r28; MOV m4, #88 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3791 "01011000" // /* MW 11 */ + 3792 "01011000" // /* MW 10 */ + 3793 "00000000" // /* MW 9 */ + 3794 "00001110" // /* MW 8 */ + 3795 "01001110" // /* MW 7 */ + 3796 "10101001" // /* MW 6 */ + 3797 "01010001" // /* MW 5 */ + 3798 "00011111" // /* MW 4 */ + 3799 "00000010" // /* MW 3 */ + 3800 "11011001" // /* MW 2 */ + 3801 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 559 53 first +.src_ref 2 "conv2d_bf16_params.h" 621 156 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 first +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 3802 "01011100" // ST r24, [p2], m4; SEL.EQZ r24, r31, r25, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3803 "00100100" // /* MW 5 */ + 3804 "11100011" // /* MW 4 */ + 3805 "00111111" // /* MW 3 */ + 3806 "01100010" // /* MW 2 */ + 3807 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 610 47 first +.src_ref 2 "conv2d_bf16_params.h" 621 222 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 +.aggressive_scheduled_block_id 8 +.noswbrkpt + 3808 "01110110" // LDA r27, [sp, #-32]; ST r28, [p2], #-8; SUB r28, r21, r28; MOV r27, r6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3809 "01111000" // /* MW 11 */ + 3810 "10010000" // /* MW 10 */ + 3811 "01101001" // /* MW 9 */ + 3812 "00001111" // /* MW 8 */ + 3813 "11001110" // /* MW 7 */ + 3814 "10101011" // /* MW 6 */ + 3815 "10010001" // /* MW 5 */ + 3816 "11101111" // /* MW 4 */ + 3817 "00100010" // /* MW 3 */ + 3818 "01101110" // /* MW 2 */ + 3819 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 649 41 +.src_ref 2 "conv2d_bf16_params.h" 655 23 first +.src_ref 2 "conv2d_bf16_params.h" 661 61 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3820 "10111010" // MOVA r19, #279; SEL.EQZ r28, r20, r28, r27; ADD.NC r20, r19, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3821 "11001000" // /* MW 9 */ + 3822 "11111111" // /* MW 8 */ + 3823 "10001100" // /* MW 7 */ + 3824 "00010010" // /* MW 6 */ + 3825 "11001110" // /* MW 5 */ + 3826 "00101001" // /* MW 4 */ + 3827 "00000000" // /* MW 3 */ + 3828 "11110011" // /* MW 2 */ + 3829 "00100010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 127 19 first +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 649 41 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 +.src_ref 2 "conv2d_bf16_params.h" 710 60 +.src_ref 2 "conv2d_bf16_params.h" 710 65 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3830 "10111010" // MOVA r29, #-72; MSC r30, r30, r29, r20; MOV r27, eh0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3831 "01111000" // /* MW 9 */ + 3832 "10001110" // /* MW 8 */ + 3833 "01110000" // /* MW 7 */ + 3834 "01110011" // /* MW 6 */ + 3835 "11101010" // /* MW 5 */ + 3836 "00111011" // /* MW 4 */ + 3837 "00000000" // /* MW 3 */ + 3838 "00011101" // /* MW 2 */ + 3839 "11110111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first +.src_ref 2 "conv2d_bf16_params.h" 679 23 first +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3840 "00101100" // LDA r27, [sp, #-28]; SEL.EQZ r18, r29, r18, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3841 "01000100" // /* MW 5 */ + 3842 "11001010" // /* MW 4 */ + 3843 "00101110" // /* MW 3 */ + 3844 "11101110" // /* MW 2 */ + 3845 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 621 156 first +.src_ref 2 "conv2d_bf16_params.h" 649 41 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3846 "10111010" // MOVA r31, #32; SEL.EQZ r19, r31, r19, r27; MOV r27, r6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3847 "01111000" // /* MW 9 */ + 3848 "10010000" // /* MW 8 */ + 3849 "01101001" // /* MW 7 */ + 3850 "10010011" // /* MW 6 */ + 3851 "00111001" // /* MW 5 */ + 3852 "00111111" // /* MW 4 */ + 3853 "00000000" // /* MW 3 */ + 3854 "00011111" // /* MW 2 */ + 3855 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first +.src_ref 2 "conv2d_bf16_params.h" 700 34 first +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3856 "00011000" // SEL.EQZ r2, r31, r2, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3857 "00100010" // /* MW 3 */ + 3858 "11000100" // /* MW 2 */ + 3859 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 629 82 first +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3860 "10011000" // SUB r21, r3, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3861 "01010001" // /* MW 3 */ + 3862 "11101011" // /* MW 2 */ + 3863 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 611 47 first +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3864 "00111010" // ST r3, [p2], #12; SEL.EQZ r2, r2, r15, r27; MOV r3, #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3865 "01011001" // /* MW 9 */ + 3866 "11000000" // /* MW 8 */ + 3867 "01101111" // /* MW 7 */ + 3868 "10010000" // /* MW 6 */ + 3869 "00100111" // /* MW 5 */ + 3870 "00000100" // /* MW 4 */ + 3871 "00110000" // /* MW 3 */ + 3872 "10001110" // /* MW 2 */ + 3873 "01000111" // /* MW 1 */ +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3874 "00011000" // SEL.EQZ r28, r28, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3875 "00110010" // /* MW 3 */ + 3876 "00111000" // /* MW 2 */ + 3877 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 643 22 first +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id first + 3878 "10011000" // MUL r31, r23, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3879 "01111111" // /* MW 3 */ + 3880 "11111110" // /* MW 2 */ + 3881 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.aggressive_scheduled_block_id 9 +.noswbrkpt + 3882 "00101100" // LDA r17, [sp, #-36]; SEL.EQZ r3, r28, r3, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3883 "01100100" // /* MW 5 */ + 3884 "00001100" // /* MW 4 */ + 3885 "00101110" // /* MW 3 */ + 3886 "11000110" // /* MW 2 */ + 3887 "11111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 621 47 first +.src_ref 2 "conv2d_bf16_params.h" 629 45 +.src_ref 2 "conv2d_bf16_params.h" 684 30 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3888 "00111010" // ST r3, [p2], #-8; MUL r18, r26, r18; MOV m1, #40 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3889 "01011001" // /* MW 9 */ + 3890 "00101000" // /* MW 8 */ + 3891 "10000000" // /* MW 7 */ + 3892 "01111100" // /* MW 6 */ + 3893 "00101001" // /* MW 5 */ + 3894 "00110101" // /* MW 4 */ + 3895 "00110000" // /* MW 3 */ + 3896 "10001110" // /* MW 2 */ + 3897 "01011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 629 45 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3898 "01011100" // ST r21, [p2], m1; SEL.EQZ r3, r2, r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3899 "11100100" // /* MW 5 */ + 3900 "00001101" // /* MW 4 */ + 3901 "00110001" // /* MW 3 */ + 3902 "01010110" // /* MW 2 */ + 3903 "01000101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 644 22 +.src_ref 2 "conv2d_bf16_params.h" 700 17 first +.src_ref 2 "conv2d_bf16_params.h" 705 50 +.src_ref 2 "conv2d_bf16_params.h" 705 61 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3904 "10111010" // LDA r0, [sp, #-16]; MUL r3, r3, r17; ADD.NC r21, r7, r30 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3905 "10101000" // /* MW 9 */ + 3906 "11111100" // /* MW 8 */ + 3907 "10101001" // /* MW 7 */ + 3908 "11111110" // /* MW 6 */ + 3909 "00111000" // /* MW 5 */ + 3910 "00000110" // /* MW 4 */ + 3911 "00100000" // /* MW 3 */ + 3912 "00000010" // /* MW 2 */ + 3913 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 645 38 first +.src_ref 2 "conv2d_bf16_params.h" 700 111 +.src_ref 2 "conv2d_bf16_params.h" 700 149 +.src_ref 2 "conv2d_bf16_params.h" 705 45 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3914 "01111010" // LDA r17, [sp, #-20]; ST r24, [p2], #4; MAC r3, r3, r20, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3915 "00000110" // /* MW 9 */ + 3916 "00000110" // /* MW 8 */ + 3917 "00000101" // /* MW 7 */ + 3918 "10000000" // /* MW 6 */ + 3919 "00010001" // /* MW 5 */ + 3920 "00011111" // /* MW 4 */ + 3921 "00100010" // /* MW 3 */ + 3922 "11000110" // /* MW 2 */ + 3923 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 644 14 +.src_ref 2 "conv2d_bf16_params.h" 649 38 first +.src_ref 2 "conv2d_bf16_params.h" 674 24 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3924 "00111010" // ST r19, [p2], #28; MOVXM r19, #65520 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3925 "00010001" // /* MW 9 */ + 3926 "11111000" // /* MW 8 */ + 3927 "01101111" // /* MW 7 */ + 3928 "00111110" // /* MW 6 */ + 3929 "00000000" // /* MW 5 */ + 3930 "00000000" // /* MW 4 */ + 3931 "00110000" // /* MW 3 */ + 3932 "11001110" // /* MW 2 */ + 3933 "01001111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 644 14 first +.src_ref 2 "conv2d_bf16_params.h" 662 61 +.src_ref 2 "conv2d_bf16_params.h" 664 38 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3934 "00111010" // ST r20, [p2], #4; AND r20, r31, r19; ADD.NC r2, r14, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3935 "11001001" // /* MW 9 */ + 3936 "10111111" // /* MW 8 */ + 3937 "01001011" // /* MW 7 */ + 3938 "10100100" // /* MW 6 */ + 3939 "01001001" // /* MW 5 */ + 3940 "00111111" // /* MW 4 */ + 3941 "00110000" // /* MW 3 */ + 3942 "11010010" // /* MW 2 */ + 3943 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 19 first +.src_ref 2 "conv2d_bf16_params.h" 663 22 first +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3944 "01011100" // ST r17, [p2], #4; MSC r21, r21, r2, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3945 "10011100" // /* MW 5 */ + 3946 "01010110" // /* MW 4 */ + 3947 "00110001" // /* MW 3 */ + 3948 "11000110" // /* MW 2 */ + 3949 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 126 21 first +.src_ref 2 "conv2d_bf16_params.h" 664 38 first + 3950 "01011100" // ST r2, [p2], #4; ADD r30, r30, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3951 "10000001" // /* MW 5 */ + 3952 "01111010" // /* MW 4 */ + 3953 "00111111" // /* MW 3 */ + 3954 "10001010" // /* MW 2 */ + 3955 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 126 21 +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id first + 3956 "01011100" // ST r30, [p2], #4; SUB r28, r16, r31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3957 "11100011" // /* MW 5 */ + 3958 "01110011" // /* MW 4 */ + 3959 "00111000" // /* MW 3 */ + 3960 "11111010" // /* MW 2 */ + 3961 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 127 19 first +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.src_ref 2 "conv2d_bf16_params.h" 675 38 +.src_ref 2 "conv2d_bf16_params.h" 696 37 +.aggressive_scheduled_block_id 10 +.noswbrkpt + 3962 "00111010" // ST r21, [p2], #4; MAC r31, r31, r22, r16; MOV dc0, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3963 "01011001" // /* MW 9 */ + 3964 "00000000" // /* MW 8 */ + 3965 "01100000" // /* MW 7 */ + 3966 "00110000" // /* MW 6 */ + 3967 "11111000" // /* MW 5 */ + 3968 "00101101" // /* MW 4 */ + 3969 "00110000" // /* MW 3 */ + 3970 "11010110" // /* MW 2 */ + 3971 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 644 22 first +.src_ref 2 "conv2d_bf16_params.h" 664 38 first +.src_ref 2 "conv2d_bf16_params.h" 705 45 +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3972 "00111010" // ST dc0, [p2], #4; MUL r2, r31, r0; ADD.NC r17, r17, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3973 "11001001" // /* MW 9 */ + 3974 "01111111" // /* MW 8 */ + 3975 "00101100" // /* MW 7 */ + 3976 "01111110" // /* MW 6 */ + 3977 "00100000" // /* MW 5 */ + 3978 "00111110" // /* MW 4 */ + 3979 "00110000" // /* MW 3 */ + 3980 "10001100" // /* MW 2 */ + 3981 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.src_ref 2 "conv2d_bf16_params.h" 705 50 first +.src_ref 2 "conv2d_bf16_params.h" 705 61 first + 3982 "01011100" // ST dc0, [p2], #4; MAC r14, r14, r17, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3983 "00001100" // /* MW 5 */ + 3984 "10111000" // /* MW 4 */ + 3985 "00111000" // /* MW 3 */ + 3986 "10001100" // /* MW 2 */ + 3987 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 +.src_ref 2 "conv2d_bf16_params.h" 674 24 first +.src_ref 2 "conv2d_bf16_params.h" 675 38 first +.src_ref 2 "conv2d_bf16_params.h" 682 38 +.src_ref 2 "conv2d_bf16_params.h" 718 48 +.src_ref 2 "conv2d_bf16_params.h" 720 50 + 3988 "00111010" // ST r22, [p2], #4; AND r16, r19, r2; MOV r2, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3989 "01011001" // /* MW 9 */ + 3990 "00000000" // /* MW 8 */ + 3991 "01001000" // /* MW 7 */ + 3992 "00100100" // /* MW 6 */ + 3993 "00000001" // /* MW 5 */ + 3994 "00100111" // /* MW 4 */ + 3995 "00110000" // /* MW 3 */ + 3996 "11011010" // /* MW 2 */ + 3997 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 675 38 + 3998 "00111010" // ST r28, [p2], #4; SUB r17, r2, r31; MOV r27, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3999 "01111001" // /* MW 9 */ + 4000 "00001110" // /* MW 8 */ + 4001 "01110000" // /* MW 7 */ + 4002 "10001111" // /* MW 6 */ + 4003 "00011111" // /* MW 5 */ + 4004 "00000101" // /* MW 4 */ + 4005 "00110000" // /* MW 3 */ + 4006 "11110010" // /* MW 2 */ + 4007 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 675 38 first +.src_ref 2 "conv2d_bf16_params.h" 707 61 first + 4008 "01011100" // ST r4, [p2], #4; MUL r14, r23, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4009 "11011111" // /* MW 5 */ + 4010 "10111001" // /* MW 4 */ + 4011 "00111011" // /* MW 3 */ + 4012 "10010010" // /* MW 2 */ + 4013 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 674 22 first +.src_ref 2 "conv2d_bf16_params.h" 675 38 + 4014 "00111010" // ST r17, [p2], #4; SUB r16, r16, r31; MOV r0, #6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4015 "01011001" // /* MW 9 */ + 4016 "00000110" // /* MW 8 */ + 4017 "00001000" // /* MW 7 */ + 4018 "10001100" // /* MW 6 */ + 4019 "00001111" // /* MW 5 */ + 4020 "00100001" // /* MW 4 */ + 4021 "00110000" // /* MW 3 */ + 4022 "11000110" // /* MW 2 */ + 4023 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 25 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 675 38 first +.src_ref 2 "conv2d_bf16_params.h" 679 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id first + 4024 "01110110" // MOVA r0, #72; ST r16, [p2], #4; SEL.EQZ r16, r13, r0, r27; MOV r27, r6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4025 "01111000" // /* MW 11 */ + 4026 "10010000" // /* MW 10 */ + 4027 "01101001" // /* MW 9 */ + 4028 "00010011" // /* MW 8 */ + 4029 "00000000" // /* MW 7 */ + 4030 "10011011" // /* MW 6 */ + 4031 "00010001" // /* MW 5 */ + 4032 "00011110" // /* MW 4 */ + 4033 "00000010" // /* MW 3 */ + 4034 "00000000" // /* MW 2 */ + 4035 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first +.src_ref 2 "conv2d_bf16_params.h" 679 23 first +.src_ref 2 "conv2d_bf16_params.h" 713 12 +.aggressive_scheduled_block_id 11 +.noswbrkpt + 4036 "00101100" // LDA r5, [sp, #-24]; SEL.EQZ r5, r0, r5, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4037 "10100100" // /* MW 5 */ + 4038 "00010100" // /* MW 4 */ + 4039 "00100000" // /* MW 3 */ + 4040 "00010110" // /* MW 2 */ + 4041 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 691 56 first +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4042 "10011000" // MUL r17, r5, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4043 "10101111" // /* MW 3 */ + 4044 "01100011" // /* MW 2 */ + 4045 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 675 38 +.src_ref 2 "conv2d_bf16_params.h" 675 38 first +.src_ref 2 "conv2d_bf16_params.h" 709 71 first + 4046 "00111010" // ST dc0, [p2], #4; LSHL r16, r3, r16; MOV m2, #-56 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4047 "01011001" // /* MW 9 */ + 4048 "11001000" // /* MW 8 */ + 4049 "00000111" // /* MW 7 */ + 4050 "01101101" // /* MW 6 */ + 4051 "00001000" // /* MW 5 */ + 4052 "00000111" // /* MW 4 */ + 4053 "00110000" // /* MW 3 */ + 4054 "10001100" // /* MW 2 */ + 4055 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 675 38 +.src_ref 2 "conv2d_bf16_params.h" 706 23 first +.src_ref 2 "conv2d_bf16_params.h" 706 28 +.src_ref 2 "conv2d_bf16_params.h" 709 76 + 4056 "01110110" // MOVA r3, #-29; ST dc0, [p2], m2; LSHL r15, r16, r15; ADD.NC r13, r3, #7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4057 "11001000" // /* MW 11 */ + 4058 "11000001" // /* MW 10 */ + 4059 "10101000" // /* MW 9 */ + 4060 "11101101" // /* MW 8 */ + 4061 "11110111" // /* MW 7 */ + 4062 "10100000" // /* MW 6 */ + 4063 "01100001" // /* MW 5 */ + 4064 "01001000" // /* MW 4 */ + 4065 "00000010" // /* MW 3 */ + 4066 "01100011" // /* MW 2 */ + 4067 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 682 38 first +.src_ref 2 "conv2d_bf16_params.h" 706 28 + 4068 "01011100" // ST r2, [p2], m0; LSHL r16, r13, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4069 "01111011" // /* MW 5 */ + 4070 "11000000" // /* MW 4 */ + 4071 "00110110" // /* MW 3 */ + 4072 "00001010" // /* MW 2 */ + 4073 "01000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 126 21 first +.src_ref 2 "conv2d_bf16_params.h" 696 37 first + 4074 "01011100" // ST r22, [p2], #4; ADD r3, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4075 "01000001" // /* MW 5 */ + 4076 "10001110" // /* MW 4 */ + 4077 "00111000" // /* MW 3 */ + 4078 "11011010" // /* MW 2 */ + 4079 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 127 19 first +.src_ref 2 "conv2d_bf16_params.h" 696 37 + 4080 "01011100" // ST r18, [p2], #4; MSC r18, r18, r17, r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4081 "10011100" // /* MW 5 */ + 4082 "11001000" // /* MW 4 */ + 4083 "00111000" // /* MW 3 */ + 4084 "11001010" // /* MW 2 */ + 4085 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 first +.src_ref 2 "conv2d_bf16_params.h" 713 12 first + 4086 "01011100" // ST r4, [p2], #4; LSHL r5, r5, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4087 "11011011" // /* MW 5 */ + 4088 "10010100" // /* MW 4 */ + 4089 "00110010" // /* MW 3 */ + 4090 "10010010" // /* MW 2 */ + 4091 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 +.src_ref 2 "conv2d_bf16_params.h" 706 28 +.src_ref 2 "conv2d_bf16_params.h" 706 28 first + 4092 "00111010" // ST r3, [p2], #4; ADD r3, r13, r16; MOV r0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4093 "01011001" // /* MW 9 */ + 4094 "11111101" // /* MW 8 */ + 4095 "00001111" // /* MW 7 */ + 4096 "00000100" // /* MW 6 */ + 4097 "00111000" // /* MW 5 */ + 4098 "00011010" // /* MW 4 */ + 4099 "00110000" // /* MW 3 */ + 4100 "10001110" // /* MW 2 */ + 4101 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 706 28 + 4102 "10011000" // ASHL r0, r3, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4103 "00001110" // /* MW 3 */ + 4104 "11000000" // /* MW 2 */ + 4105 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 first +.src_ref 2 "conv2d_bf16_params.h" 707 66 first + 4106 "01011100" // ST r18, [p2], #4; MUL r4, r14, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4107 "00011111" // /* MW 5 */ + 4108 "00010000" // /* MW 4 */ + 4109 "00110111" // /* MW 3 */ + 4110 "11001010" // /* MW 2 */ + 4111 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 +.src_ref 2 "conv2d_bf16_params.h" 709 96 first + 4112 "01011100" // ST dc0, [p2], #4; LSHL r3, r0, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4113 "00111011" // /* MW 5 */ + 4114 "00001100" // /* MW 4 */ + 4115 "00110000" // /* MW 3 */ + 4116 "10001100" // /* MW 2 */ + 4117 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 first +.src_ref 2 "conv2d_bf16_params.h" 709 90 + 4118 "11111010" // LDA r13, [sp, #-4]; ST dc0, [p2], #4; SUB r3, r15, r3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4119 "00110001" // /* MW 9 */ + 4120 "11000110" // /* MW 8 */ + 4121 "00000011" // /* MW 7 */ + 4122 "10000000" // /* MW 6 */ + 4123 "01100001" // /* MW 5 */ + 4124 "00011100" // /* MW 4 */ + 4125 "00100010" // /* MW 3 */ + 4126 "10110110" // /* MW 2 */ + 4127 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 707 50 first +.src_ref 2 "conv2d_bf16_params.h" 708 59 +.src_ref 2 "conv2d_bf16_params.h" 710 60 first +.src_ref 2 "conv2d_bf16_params.h" 710 65 first + 4128 "01110110" // LDA r14, [sp, #-8]; ST r4, [p2], #4; MAC r7, r7, r29, r0; ADD.NC r1, r0, #-1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4129 "11001000" // /* MW 11 */ + 4130 "00111111" // /* MW 10 */ + 4131 "00101000" // /* MW 9 */ + 4132 "00110000" // /* MW 8 */ + 4133 "01110000" // /* MW 7 */ + 4134 "10111010" // /* MW 6 */ + 4135 "10010001" // /* MW 5 */ + 4136 "00011100" // /* MW 4 */ + 4137 "00100010" // /* MW 3 */ + 4138 "00111010" // /* MW 2 */ + 4139 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 708 48 first +.src_ref 2 "conv2d_bf16_params.h" 713 12 first + 4140 "11111010" // LDA r15, [sp, #-12]; ST r1, [p2], #4; MUL r0, r5, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4141 "10101111" // /* MW 9 */ + 4142 "01000001" // /* MW 8 */ + 4143 "00000001" // /* MW 7 */ + 4144 "10000000" // /* MW 6 */ + 4145 "00110001" // /* MW 5 */ + 4146 "00011100" // /* MW 4 */ + 4147 "00100010" // /* MW 3 */ + 4148 "10111110" // /* MW 2 */ + 4149 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 709 50 first +.src_ref 2 "conv2d_bf16_params.h" 800 first + 4150 "01011100" // ST r3, [p2], #4; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 4151 "00000000" // /* MW 5 */ + 4152 "01010000" // /* MW 4 */ + 4153 "00110000" // /* MW 3 */ + 4154 "10001110" // /* MW 2 */ + 4155 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 710 50 first +.delay_slot + 4156 "10011000" // ST r7, [p2], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4157 "11110001" // /* MW 3 */ + 4158 "01011100" // /* MW 2 */ + 4159 "00001010" // /* MW 1 */ +.delay_slot + 4160 "10011000" // ST r0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4161 "00010001" // /* MW 3 */ + 4162 "00011100" // /* MW 2 */ + 4163 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 718 48 first +.delay_slot + 4164 "10011000" // ST r2, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4165 "01010001" // /* MW 3 */ + 4166 "00011100" // /* MW 2 */ + 4167 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 718 48 +.delay_slot + 4168 "10011000" // ST r2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4169 "01010001" // /* MW 3 */ + 4170 "00000100" // /* MW 2 */ + 4171 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 720 50 first +.src_ref 2 "conv2d_bf16_params.h" 800 first +.delay_slot + 4172 "00111010" // ST r2, [p2, #4]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4173 "01110001" // /* MW 9 */ + 4174 "00000000" // /* MW 8 */ + 4175 "00000000" // /* MW 7 */ + 4176 "00000000" // /* MW 6 */ + 4177 "11111110" // /* MW 5 */ + 4178 "00111111" // /* MW 4 */ + 4179 "00110000" // /* MW 3 */ + 4180 "10001010" // /* MW 2 */ +.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh__end +.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_end0 + 4181 "01000010" // /* MW 1 */ +.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_begin0 +.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams +.function convert_bf16_to_bfp16 _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams +.src_ref 3 "utils.h" 526 11 +.src_ref 3 "utils.h" 531 4 +.src_ref 2 "conv2d_bf16.h" 689 first +.src_ref 2 "conv2d_bf16.h" 698 28 +.src_ref 2 "conv2d_bf16.h" 704 12 +.src_ref 2 "conv2d_bf16.h" 707 12 +.src_ref 2 "conv2d_bf16.h" 707 30 +.function_start + 4192 "01110110" // MOVA dc0, #0; MOVS p2, p1; MOVX r24, #0; MOV r0, p2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4193 "01111000" // /* MW 11 */ + 4194 "01100000" // /* MW 10 */ + 4195 "00001010" // /* MW 9 */ + 4196 "00001000" // /* MW 8 */ + 4197 "10000000" // /* MW 7 */ + 4198 "00000001" // /* MW 6 */ + 4199 "10001011" // /* MW 5 */ + 4200 "10000100" // /* MW 4 */ + 4201 "10000010" // /* MW 3 */ + 4202 "00000011" // /* MW 2 */ + 4203 "00000000" // /* MW 1 */ +.src_ref 3 "utils.h" 526 11 +.src_ref 3 "utils.h" 526 11 +.src_ref 2 "conv2d_bf16.h" 698 28 first +.src_ref 2 "conv2d_bf16.h" 704 12 first +.src_ref 2 "conv2d_bf16.h" 707 12 +.src_ref 2 "conv2d_bf16.h" 707 30 + 4204 "01111110" // MOVA dj1, #0; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc1, dc0; MOVX r26, #0; ADD.NC p3, r0, #4 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 4205 "01100000" // /* MW 13 */ + 4206 "00001001" // /* MW 12 */ + 4207 "00100000" // /* MW 11 */ + 4208 "00100001" // /* MW 10 */ + 4209 "00000000" // /* MW 9 */ + 4210 "00110110" // /* MW 8 */ + 4211 "00000001" // /* MW 7 */ + 4212 "00110100" // /* MW 6 */ + 4213 "00101000" // /* MW 5 */ + 4214 "00101000" // /* MW 4 */ + 4215 "10001000" // /* MW 3 */ + 4216 "00000110" // /* MW 2 */ + 4217 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 698 28 +.src_ref 2 "conv2d_bf16.h" 702 37 + 4218 "10111010" // LDA dn1, [p3], #4; MOVXM p4, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4219 "00010000" // /* MW 9 */ + 4220 "00110100" // /* MW 8 */ + 4221 "00110010" // /* MW 7 */ + 4222 "11110010" // /* MW 6 */ + 4223 "00000001" // /* MW 5 */ + 4224 "00000000" // /* MW 4 */ + 4225 "11010000" // /* MW 3 */ + 4226 "10010100" // /* MW 2 */ + 4227 "01100011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 698 43 +.src_ref 2 "conv2d_bf16.h" 702 4 first + 4228 "10111010" // LDA m1, [p3], #4; MOVXM ls, #4336 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4229 "00010000" // /* MW 9 */ + 4230 "01111000" // /* MW 8 */ + 4231 "01111000" // /* MW 7 */ + 4232 "00000100" // /* MW 6 */ + 4233 "00000000" // /* MW 5 */ + 4234 "00000000" // /* MW 4 */ + 4235 "11010000" // /* MW 3 */ + 4236 "10010000" // /* MW 2 */ + 4237 "01100011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 699 43 first +.src_ref 2 "conv2d_bf16.h" 702 4 + 4238 "10111010" // LDA m0, [p3]; MOVXM le, #4384 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4239 "00010000" // /* MW 9 */ + 4240 "10010000" // /* MW 8 */ + 4241 "10111000" // /* MW 7 */ + 4242 "00000101" // /* MW 6 */ + 4243 "00000000" // /* MW 5 */ + 4244 "00000000" // /* MW 4 */ + 4245 "11010000" // /* MW 3 */ + 4246 "10000000" // /* MW 2 */ + 4247 "01100000" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 +.src_ref 2 "conv2d_bf16.h" 702 37 first + 4248 "01010100" // LDA r0, [p3, #-12]; MOV dj0, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4249 "00000001" // /* MW 5 */ + 4250 "00000000" // /* MW 4 */ + 4251 "11010001" // /* MW 3 */ + 4252 "10000010" // /* MW 2 */ + 4253 "01111010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 702 37 + 4254 "10011000" // LDA.s8 r1, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4255 "00100010" // /* MW 3 */ + 4256 "00000100" // /* MW 2 */ + 4257 "00000100" // /* MW 1 */ + 4258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4259 "00000000" // /* MW 1 */ + 4260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4261 "00000000" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 +.src_ref 2 "conv2d_bf16.h" 705 66 first + 4262 "11110100" // VLDB.POP.512 x1, [p0, lf0, r24]; MOV dn0, dn1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4263 "00000001" // /* MW 5 */ + 4264 "10000101" // /* MW 4 */ + 4265 "10000000" // /* MW 3 */ + 4266 "00001010" // /* MW 2 */ + 4267 "00000000" // /* MW 1 */ +.src_ref 3 "utils.h" 526 11 first + 4268 "00011000" // VLDB.POP.512.2D x0, [p0, lf0, r24, d1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4269 "00010100" // /* MW 3 */ + 4270 "00110000" // /* MW 2 */ + 4271 "00111110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 704 12 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4272 "00011000" // VLDB.FILL.512 [p0, lf0, r24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4273 "00010100" // /* MW 3 */ + 4274 "00010100" // /* MW 2 */ + 4275 "00111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 702 4 first +.src_ref 2 "conv2d_bf16.h" 705 66 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4276 "00110100" // VLDB.POP.512 x1, [p0, lf0, r24]; ADD.NC lc, r0, #-3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4277 "11111101" // /* MW 5 */ + 4278 "11100000" // /* MW 4 */ + 4279 "10001010" // /* MW 3 */ + 4280 "00001010" // /* MW 2 */ + 4281 "00000000" // /* MW 1 */ +.src_ref 3 "utils.h" 526 11 first +.src_ref 2 "conv2d_bf16.h" 707 12 +.src_ref 2 "conv2d_bf16.h" 707 30 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4282 "00011100" // VLDB.POP.512.2D x0, [p0, lf0, r24, d1]; MOVX crRnd, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4283 "00000000" // /* MW 5 */ + 4284 "11110101" // /* MW 4 */ + 4285 "10000000" // /* MW 3 */ + 4286 "00000010" // /* MW 2 */ + 4287 "11000110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 704 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4288 "00011000" // VLDB.FILL.512 [p0, lf0, r24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4289 "00010100" // /* MW 3 */ + 4290 "00010100" // /* MW 2 */ + 4291 "00111100" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4293 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 705 66 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4294 "10111010" // NOPA; VLDB.POP.512 x1, [p0, lf0, r24]; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4295 "01111110" // /* MW 9 */ + 4296 "10100101" // /* MW 8 */ + 4297 "00000001" // /* MW 7 */ + 4298 "00000000" // /* MW 6 */ + 4299 "01010100" // /* MW 5 */ + 4300 "00000000" // /* MW 4 */ + 4301 "11110000" // /* MW 3 */ + 4302 "00101100" // /* MW 2 */ + 4303 "00000000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 first +.src_ref 3 "utils.h" 526 11 first +.src_ref 2 "conv2d_bf16.h" 705 30 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4304 "11100001" // NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];NOPS; NOPX; VCONV.fp32.bf16 cml0, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4305 "00000000" // /* MW 15 */ + 4306 "00000000" // /* MW 14 */ + 4307 "01111000" // /* MW 13 */ + 4308 "11000101" // /* MW 12 */ + 4309 "00000001" // /* MW 11 */ + 4310 "00000000" // /* MW 10 */ + 4311 "00000000" // /* MW 9 */ + 4312 "00000000" // /* MW 8 */ + 4313 "01011011" // /* MW 7 */ + 4314 "00000001" // /* MW 6 */ + 4315 "00101000" // /* MW 5 */ + 4316 "01100000" // /* MW 4 */ + 4317 "11111100" // /* MW 3 */ + 4318 "00101100" // /* MW 2 */ + 4319 "00000000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 +.src_ref 2 "conv2d_bf16.h" 706 18 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4320 "11100001" // NOPA; NOPB; NOPS; NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4321 "00000000" // /* MW 15 */ + 4322 "00000000" // /* MW 14 */ + 4323 "01111000" // /* MW 13 */ + 4324 "11000101" // /* MW 12 */ + 4325 "01000000" // /* MW 11 */ + 4326 "00000000" // /* MW 10 */ + 4327 "00000000" // /* MW 9 */ + 4328 "00000000" // /* MW 8 */ + 4329 "01011011" // /* MW 7 */ + 4330 "00000001" // /* MW 6 */ + 4331 "00100000" // /* MW 5 */ + 4332 "00000000" // /* MW 4 */ + 4333 "11110000" // /* MW 3 */ + 4334 "00101100" // /* MW 2 */ + 4335 "00000000" // /* MW 1 */ +.label ZLS_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_144 +.src_ref 2 "conv2d_bf16.h" 704 12 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 4336 "11100001" // NOPA; VLDB.FILL.512 [p0, lf0, r24]; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4337 "00000000" // /* MW 15 */ + 4338 "00000000" // /* MW 14 */ + 4339 "01111000" // /* MW 13 */ + 4340 "10100101" // /* MW 12 */ + 4341 "00000001" // /* MW 11 */ + 4342 "00000000" // /* MW 10 */ + 4343 "00000000" // /* MW 9 */ + 4344 "00000000" // /* MW 8 */ + 4345 "01011011" // /* MW 7 */ + 4346 "00000001" // /* MW 6 */ + 4347 "00101000" // /* MW 5 */ + 4348 "00101000" // /* MW 4 */ + 4349 "11111000" // /* MW 3 */ + 4350 "00101100" // /* MW 2 */ + 4351 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 705 66 first +.src_ref 2 "conv2d_bf16.h" 707 12 first +.src_ref 2 "conv2d_bf16.h" 707 30 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4352 "11100001" // NOPA; VLDB.POP.512 x1, [p0, lf0, r24];VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4353 "00000000" // /* MW 15 */ + 4354 "00000000" // /* MW 14 */ + 4355 "01111000" // /* MW 13 */ + 4356 "10100101" // /* MW 12 */ + 4357 "00000001" // /* MW 11 */ + 4358 "00000000" // /* MW 10 */ + 4359 "00000000" // /* MW 9 */ + 4360 "00000000" // /* MW 8 */ + 4361 "00000011" // /* MW 7 */ + 4362 "10000000" // /* MW 6 */ + 4363 "10101101" // /* MW 5 */ + 4364 "00000000" // /* MW 4 */ + 4365 "11110000" // /* MW 3 */ + 4366 "00101100" // /* MW 2 */ + 4367 "00000000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 first +.src_ref 3 "utils.h" 526 11 first +.src_ref 2 "conv2d_bf16.h" 705 30 +.src_ref 2 "conv2d_bf16.h" 708 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4368 "11100001" // NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];VST.FLUSH.512.CONV [p2, sf, r26];NOPX; VCONV.fp32.bf16 cml0, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4369 "00000000" // /* MW 15 */ + 4370 "00000000" // /* MW 14 */ + 4371 "01111000" // /* MW 13 */ + 4372 "11000101" // /* MW 12 */ + 4373 "00000001" // /* MW 11 */ + 4374 "00000000" // /* MW 10 */ + 4375 "00000000" // /* MW 9 */ + 4376 "00000000" // /* MW 8 */ + 4377 "00000011" // /* MW 7 */ + 4378 "00000000" // /* MW 6 */ + 4379 "00101001" // /* MW 5 */ + 4380 "01100000" // /* MW 4 */ + 4381 "11111100" // /* MW 3 */ + 4382 "00101100" // /* MW 2 */ + 4383 "00000000" // /* MW 1 */ +.label ZLE_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_192 +.src_ref 3 "kernel_helpers.h" 350 11 +.src_ref 3 "utils.h" 531 4 first +.src_ref 2 "conv2d_bf16.h" 706 18 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4384 "11100001" // NOPA; NOPB; VST.FLUSH.512.CONV.2D [p2, sf, r26, d0];NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4385 "00000000" // /* MW 15 */ + 4386 "00000000" // /* MW 14 */ + 4387 "01111000" // /* MW 13 */ + 4388 "11000101" // /* MW 12 */ + 4389 "01000000" // /* MW 11 */ + 4390 "00000000" // /* MW 10 */ + 4391 "00000000" // /* MW 9 */ + 4392 "00000000" // /* MW 8 */ + 4393 "00000011" // /* MW 7 */ + 4394 "00000000" // /* MW 6 */ + 4395 "00100011" // /* MW 5 */ + 4396 "00000000" // /* MW 4 */ + 4397 "11110000" // /* MW 3 */ + 4398 "00101100" // /* MW 2 */ + 4399 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 4400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4401 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 707 12 first +.src_ref 2 "conv2d_bf16.h" 707 30 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4402 "00011000" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4403 "00000011" // /* MW 3 */ + 4404 "10000000" // /* MW 2 */ + 4405 "00001101" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 first +.src_ref 2 "conv2d_bf16.h" 705 30 first +.src_ref 2 "conv2d_bf16.h" 708 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4406 "00000010" // VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4407 "01110000" // /* MW 7 */ + 4408 "11000101" // /* MW 6 */ + 4409 "00000001" // /* MW 5 */ + 4410 "00000000" // /* MW 4 */ + 4411 "01100000" // /* MW 3 */ + 4412 "00000000" // /* MW 2 */ + 4413 "00100000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 +.src_ref 2 "conv2d_bf16.h" 706 18 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4414 "11111000" // VCONV.fp32.bf16 cmh0, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4415 "10001010" // /* MW 3 */ + 4416 "10000001" // /* MW 2 */ + 4417 "00011000" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 first + 4418 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4419 "00000011" // /* MW 3 */ + 4420 "00000000" // /* MW 2 */ + 4421 "00001011" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 first +.src_ref 2 "conv2d_bf16.h" 705 30 first +.src_ref 2 "conv2d_bf16.h" 707 12 first +.src_ref 2 "conv2d_bf16.h" 707 30 first + 4422 "00000010" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4423 "01110000" // /* MW 7 */ + 4424 "11000101" // /* MW 6 */ + 4425 "00000001" // /* MW 5 */ + 4426 "00000000" // /* MW 4 */ + 4427 "01100000" // /* MW 3 */ + 4428 "00000000" // /* MW 2 */ + 4429 "10110000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 +.src_ref 2 "conv2d_bf16.h" 706 18 first +.src_ref 2 "conv2d_bf16.h" 708 12 first + 4430 "00000010" // VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cmh0, x0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4431 "01110000" // /* MW 7 */ + 4432 "11000101" // /* MW 6 */ + 4433 "01000000" // /* MW 5 */ + 4434 "00000000" // /* MW 4 */ + 4435 "01100000" // /* MW 3 */ + 4436 "00000000" // /* MW 2 */ + 4437 "00100000" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 first + 4438 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4439 "00000011" // /* MW 3 */ + 4440 "00000000" // /* MW 2 */ + 4441 "00001011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 707 12 first +.src_ref 2 "conv2d_bf16.h" 707 30 first +.src_ref 2 "conv2d_bf16.h" 723 first + 4442 "01011100" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 4443 "00000000" // /* MW 5 */ + 4444 "01010000" // /* MW 4 */ + 4445 "01100000" // /* MW 3 */ + 4446 "00000000" // /* MW 2 */ + 4447 "10110000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 708 12 first +.delay_slot + 4448 "00011000" // VST.FLUSH.512.CONV [p2, sf, r26] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4449 "00000011" // /* MW 3 */ + 4450 "00000000" // /* MW 2 */ + 4451 "00001001" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 first +.delay_slot + 4452 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4453 "00000011" // /* MW 3 */ + 4454 "00000000" // /* MW 2 */ + 4455 "00001011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4457 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4459 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams__end +.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_end0 + 4461 "00000000" // /* MW 1 */ +.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_begin0 +.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params +.function conv2d_bf16<(unsigned char)'\x01', (act_t)0, bfloat16, bfloat16, bfloat16, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::async, adf::addressing::linear, adf::margin<0U> >, false, false, true, false> _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 2 "conv2d_bf16.h" 1836 +.src_ref 2 "conv2d_bf16.h" 1836 first +.src_ref 2 "conv2d_bf16.h" 1836 first +.src_ref 2 "conv2d_bf16_params.h" 240 68 +.function_start + 4464 "01111110" // MOVA m0, #-81; PADDB [p3], #64; MOVS p4, p2; PADDXM [sp], #128 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 4465 "01100000" // /* MW 13 */ + 4466 "00010001" // /* MW 12 */ + 4467 "10010001" // /* MW 11 */ + 4468 "00001110" // /* MW 10 */ + 4469 "00000000" // /* MW 9 */ + 4470 "00000000" // /* MW 8 */ + 4471 "10000000" // /* MW 7 */ + 4472 "00000000" // /* MW 6 */ + 4473 "00100000" // /* MW 5 */ + 4474 "00111111" // /* MW 4 */ + 4475 "10000110" // /* MW 3 */ + 4476 "11100000" // /* MW 2 */ + 4477 "11110101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1836 +.src_ref 2 "conv2d_bf16_params.h" 241 95 +.src_ref 2 "conv2d_bf16_params.h" 242 153 +.src_ref 2 "conv2d_bf16_params.h" 250 129 + 4478 "01110110" // MOVA r19, #3; ST r12, [sp, #-16]; MOVX r28, #-24; MOV r17, p3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4479 "01111000" // /* MW 11 */ + 4480 "01100000" // /* MW 10 */ + 4481 "00101011" // /* MW 9 */ + 4482 "00001010" // /* MW 8 */ + 4483 "11000101" // /* MW 7 */ + 4484 "10111111" // /* MW 6 */ + 4485 "10010101" // /* MW 5 */ + 4486 "11110001" // /* MW 4 */ + 4487 "00000111" // /* MW 3 */ + 4488 "01110011" // /* MW 2 */ + 4489 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 866 21 +.src_ref 2 "conv2d_bf16.h" 876 12 +.src_ref 2 "conv2d_bf16.h" 876 51 +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16.h" 1836 +.src_ref 2 "conv2d_bf16_params.h" 242 41 +.src_ref 2 "conv2d_bf16_params.h" 242 54 +.src_ref 2 "conv2d_bf16_params.h" 242 94 +.src_ref 2 "conv2d_bf16_params.h" 242 100 +.src_ref 2 "conv2d_bf16_params.h" 242 153 +.src_ref 2 "conv2d_bf16_params.h" 243 80 +.src_ref 2 "conv2d_bf16_params.h" 245 28 +.src_ref 2 "conv2d_bf16_params.h" 250 106 +.src_ref 2 "conv2d_bf16_params.h" 250 133 + 4490 "01110110" // MOVA r25, #0; ST r17, [sp, #-40]; MOVX r17, #1; ADD.NC p2, r17, #28 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4491 "00001000" // /* MW 11 */ + 4492 "01000111" // /* MW 10 */ + 4493 "00110100" // /* MW 9 */ + 4494 "00101001" // /* MW 8 */ + 4495 "00010000" // /* MW 7 */ + 4496 "10000001" // /* MW 6 */ + 4497 "00110101" // /* MW 5 */ + 4498 "11011010" // /* MW 4 */ + 4499 "00000111" // /* MW 3 */ + 4500 "00011001" // /* MW 2 */ + 4501 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 240 68 +.src_ref 2 "conv2d_bf16_params.h" 240 68 first + 4502 "01110110" // LDA r18, [p2]; ST r9, [sp, #-12]; MOVXM r29, #16777216 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4503 "00010000" // /* MW 11 */ + 4504 "00000000" // /* MW 10 */ + 4505 "10101000" // /* MW 9 */ + 4506 "00000011" // /* MW 8 */ + 4507 "01000000" // /* MW 7 */ + 4508 "10000000" // /* MW 6 */ + 4509 "00110101" // /* MW 5 */ + 4510 "11110101" // /* MW 4 */ + 4511 "11010111" // /* MW 3 */ + 4512 "11001010" // /* MW 2 */ + 4513 "01000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 171 +.src_ref 2 "conv2d_bf16_params.h" 245 20 + 4514 "01110110" // MOVA m6, #88; ST r14, [sp, #-4]; MOVXM r31, #33554431 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4515 "10010000" // /* MW 11 */ + 4516 "11111111" // /* MW 10 */ + 4517 "11101111" // /* MW 9 */ + 4518 "11111111" // /* MW 8 */ + 4519 "01111111" // /* MW 7 */ + 4520 "10000000" // /* MW 6 */ + 4521 "11010101" // /* MW 5 */ + 4522 "11111101" // /* MW 4 */ + 4523 "10000111" // /* MW 3 */ + 4524 "00011000" // /* MW 2 */ + 4525 "00001011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 39 +.src_ref 2 "conv2d_bf16_params.h" 244 82 +.src_ref 2 "conv2d_bf16_params.h" 244 156 +.src_ref 2 "conv2d_bf16_params.h" 249 87 + 4526 "01110110" // MOVA r20, #5; ST r13, [sp, #-32]; MOVX r22, #8; MOV m4, #-20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4527 "01011000" // /* MW 11 */ + 4528 "11101100" // /* MW 10 */ + 4529 "00000111" // /* MW 9 */ + 4530 "00001010" // /* MW 8 */ + 4531 "01100001" // /* MW 7 */ + 4532 "10000001" // /* MW 6 */ + 4533 "10110101" // /* MW 5 */ + 4534 "11100001" // /* MW 4 */ + 4535 "00000111" // /* MW 3 */ + 4536 "10110100" // /* MW 2 */ + 4537 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 243 39 +.src_ref 2 "conv2d_bf16_params.h" 244 87 +.src_ref 2 "conv2d_bf16_params.h" 250 71 + 4538 "01110110" // MOVA r21, #12; ST r15, [sp, #-20]; MOVX r23, #254; MOV m5, #-60 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4539 "01011000" // /* MW 11 */ + 4540 "11000100" // /* MW 10 */ + 4541 "10000111" // /* MW 9 */ + 4542 "11001010" // /* MW 8 */ + 4543 "01110111" // /* MW 7 */ + 4544 "10000111" // /* MW 6 */ + 4545 "11110101" // /* MW 5 */ + 4546 "11101101" // /* MW 4 */ + 4547 "00000111" // /* MW 3 */ + 4548 "10010101" // /* MW 2 */ + 4549 "00000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 44 + 4550 "00000010" // ST p7, [sp, #-8]; MOV m7, #64 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4551 "01010000" // /* MW 7 */ + 4552 "01000000" // /* MW 6 */ + 4553 "10000000" // /* MW 5 */ + 4554 "00000011" // /* MW 4 */ + 4555 "10110000" // /* MW 3 */ + 4556 "01110011" // /* MW 2 */ + 4557 "11111111" // /* MW 1 */ + 4558 "10011000" // ST lr, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4559 "00111101" // /* MW 3 */ + 4560 "11100100" // /* MW 2 */ + 4561 "00001111" // /* MW 1 */ + 4562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4563 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 240 68 + 4564 "10011000" // ADD r12, r29, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4565 "00100000" // /* MW 3 */ + 4566 "01011001" // /* MW 2 */ + 4567 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 240 68 +.src_ref 2 "conv2d_bf16_params.h" 241 95 first + 4568 "01011100" // ST r12, [p2], m0; LSHL r29, r12, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4569 "10011011" // /* MW 5 */ + 4570 "01110111" // /* MW 4 */ + 4571 "00110110" // /* MW 3 */ + 4572 "00110010" // /* MW 2 */ + 4573 "01000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 54 first +.src_ref 2 "conv2d_bf16_params.h" 242 94 first + 4574 "00101100" // LDA.u8 r30, [p2], #-3; EQ r28, r29, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4575 "00101111" // /* MW 5 */ + 4576 "11110010" // /* MW 4 */ + 4577 "01011110" // /* MW 3 */ + 4578 "11111001" // /* MW 2 */ + 4579 "01011011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 245 20 first + 4580 "10011000" // LDA.u8 r9, [p2], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4581 "00101010" // /* MW 3 */ + 4582 "11001001" // /* MW 2 */ + 4583 "00000010" // /* MW 1 */ + 4584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4585 "00000000" // /* MW 1 */ + 4586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4587 "00000000" // /* MW 1 */ + 4588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4589 "00000000" // /* MW 1 */ + 4590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4591 "00000000" // /* MW 1 */ + 4592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4593 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 174 first + 4594 "10011000" // LTU r27, r29, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4595 "11101100" // /* MW 3 */ + 4596 "01110111" // /* MW 2 */ + 4597 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 153 + 4598 "00011000" // SEL.EQZ r14, r25, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4599 "00110010" // /* MW 3 */ + 4600 "01011101" // /* MW 2 */ + 4601 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 171 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4602 "10011000" // LTU r27, r31, r12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4603 "11001100" // /* MW 3 */ + 4604 "11110110" // /* MW 2 */ + 4605 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 241 95 first +.src_ref 2 "conv2d_bf16_params.h" 242 39 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4606 "00101100" // ST.s8 r28, [p2], m4; EQ r13, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4607 "11001111" // /* MW 5 */ + 4608 "10110111" // /* MW 4 */ + 4609 "11101110" // /* MW 3 */ + 4610 "01110000" // /* MW 2 */ + 4611 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 100 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4612 "10011000" // LSHL r31, r13, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4613 "00011101" // /* MW 3 */ + 4614 "01111111" // /* MW 2 */ + 4615 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 153 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4616 "00011000" // SEL.EQZ r12, r25, r14, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4617 "11100010" // /* MW 3 */ + 4618 "01011000" // /* MW 2 */ + 4619 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 98 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4620 "10011000" // OR r28, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4621 "11000101" // /* MW 3 */ + 4622 "11111001" // /* MW 2 */ + 4623 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 54 +.src_ref 2 "conv2d_bf16_params.h" 242 151 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4624 "10100100" // LTU r27, r17, r30; ADD.NC r28, r28, r12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4625 "01100010" // /* MW 5 */ + 4626 "00111100" // /* MW 4 */ + 4627 "10011110" // /* MW 3 */ + 4628 "11111101" // /* MW 2 */ + 4629 "10001110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 41 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4630 "00011000" // SEL.EQZ r28, r25, r28, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4631 "11000010" // /* MW 3 */ + 4632 "01111001" // /* MW 2 */ + 4633 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 243 80 first + 4634 "10011000" // LTU r31, r17, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4635 "11001100" // /* MW 3 */ + 4636 "01111111" // /* MW 2 */ + 4637 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 117 first +.src_ref 2 "conv2d_bf16_params.h" 243 39 + 4638 "01011100" // ST r31, [p2], m5; NE r29, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4639 "11010001" // /* MW 5 */ + 4640 "11110111" // /* MW 4 */ + 4641 "00111110" // /* MW 3 */ + 4642 "01111110" // /* MW 2 */ + 4643 "01010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 44 first +.src_ref 2 "conv2d_bf16_params.h" 245 28 first + 4644 "00101100" // LDA.u8 r30, [p2], m7; NE r12, r9, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4645 "00110001" // /* MW 5 */ + 4646 "10110010" // /* MW 4 */ + 4647 "01010100" // /* MW 3 */ + 4648 "01111001" // /* MW 2 */ + 4649 "01011101" // /* MW 1 */ + 4650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4651 "00000000" // /* MW 1 */ + 4652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4653 "00000000" // /* MW 1 */ + 4654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4655 "00000000" // /* MW 1 */ + 4656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4657 "00000000" // /* MW 1 */ + 4658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4659 "00000000" // /* MW 1 */ + 4660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4661 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 82 +.src_ref 2 "conv2d_bf16_params.h" 244 87 + 4662 "00100100" // NE r22, r30, r22; ADD.NC r31, r30, #-4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4663 "11111100" // /* MW 5 */ + 4664 "10111110" // /* MW 4 */ + 4665 "00011111" // /* MW 3 */ + 4666 "10101101" // /* MW 2 */ + 4667 "11110101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 245 33 + 4668 "10000100" // JNZ r12, #4736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4736 delay_slots=5 */ + 4669 "00000001" // /* MW 5 */ + 4670 "01000000" // /* MW 4 */ + 4671 "01000000" // /* MW 3 */ + 4672 "00001001" // /* MW 2 */ + 4673 "01100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 156 +.delay_slot + 4674 "10011000" // NE r9, r30, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4675 "01001000" // /* MW 3 */ + 4676 "10010011" // /* MW 2 */ + 4677 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 87 +.delay_slot + 4678 "00011000" // EXTEND.u8 r31, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4679 "10010000" // /* MW 3 */ + 4680 "11111110" // /* MW 2 */ + 4681 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 87 +.delay_slot + 4682 "10011000" // AND r22, r9, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4683 "01100100" // /* MW 3 */ + 4684 "01101101" // /* MW 2 */ + 4685 "00010010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 87 +.delay_slot + 4686 "10011000" // LTU r23, r31, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4687 "01111100" // /* MW 3 */ + 4688 "11101111" // /* MW 2 */ + 4689 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 132 +.delay_slot + 4690 "10011000" // AND r16, r23, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4691 "01100100" // /* MW 3 */ + 4692 "11100001" // /* MW 2 */ + 4693 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 245 33 + 4694 "10000100" // JNZ r29, #4736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4736 delay_slots=5 */ + 4695 "00000001" // /* MW 5 */ + 4696 "01000000" // /* MW 4 */ + 4697 "01000000" // /* MW 3 */ + 4698 "00001001" // /* MW 2 */ + 4699 "11101000" // /* MW 1 */ +.delay_slot + 4700 "10011000" // ST p6, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4701 "00011101" // /* MW 3 */ + 4702 "11101011" // /* MW 2 */ + 4703 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4704 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4705 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4707 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4709 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4711 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 +.src_ref 2 "conv2d_bf16.h" 876 51 + 4712 "10111010" // MOVA r27, #1; J #4784 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=4784 delay_slots=5 */ + 4713 "00100000" // /* MW 9 */ + 4714 "00000000" // /* MW 8 */ + 4715 "00000000" // /* MW 7 */ + 4716 "01010110" // /* MW 6 */ + 4717 "00000010" // /* MW 5 */ + 4718 "00000000" // /* MW 4 */ + 4719 "00000000" // /* MW 3 */ + 4720 "00111011" // /* MW 2 */ + 4721 "00000000" // /* MW 1 */ +.delay_slot + 4722 "11111000" // MOV el0, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4723 "10011100" // /* MW 3 */ + 4724 "00011001" // /* MW 2 */ + 4725 "00011000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1849 12 +.delay_slot + 4726 "00011000" // MOVX r19, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4727 "00000101" // /* MW 3 */ + 4728 "00100110" // /* MW 2 */ + 4729 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4731 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4732 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4733 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4735 "00000000" // /* MW 1 */ +.label __ll6__Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params +.src_ref 2 "conv2d_bf16_params.h" 250 71 first +.src_ref 2 "conv2d_bf16_params.h" 250 101 + 4736 "01110110" // MOVA r21, #4; ST p6, [sp, #-24]; EQ r27, r21, r30; MOV el0, r25 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4737 "01111000" // /* MW 11 */ + 4738 "11001110" // /* MW 10 */ + 4739 "00001100" // /* MW 9 */ + 4740 "00111100" // /* MW 8 */ + 4741 "10111111" // /* MW 7 */ + 4742 "10101011" // /* MW 6 */ + 4743 "00011101" // /* MW 5 */ + 4744 "11101011" // /* MW 4 */ + 4745 "00000111" // /* MW 3 */ + 4746 "10010101" // /* MW 2 */ + 4747 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 101 + 4748 "10011000" // LSHL r21, r30, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4749 "01011101" // /* MW 3 */ + 4750 "10101011" // /* MW 2 */ + 4751 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 106 + 4752 "00011000" // SEL.EQZ r21, r21, r25, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4753 "10010010" // /* MW 3 */ + 4754 "01101011" // /* MW 2 */ + 4755 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 129 + 4756 "10011000" // EQ r27, r19, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4757 "11100111" // /* MW 3 */ + 4758 "11110111" // /* MW 2 */ + 4759 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 106 +.src_ref 2 "conv2d_bf16_params.h" 250 133 + 4760 "11100100" // SEL.EQZ r19, r21, r25, r27; MOV r27, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4761 "01000001" // /* MW 5 */ + 4762 "10110000" // /* MW 4 */ + 4763 "01001101" // /* MW 3 */ + 4764 "11110010" // /* MW 2 */ + 4765 "10101100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 133 + 4766 "00011000" // SEL.EQZ r19, r25, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4767 "00110010" // /* MW 3 */ + 4768 "01100111" // /* MW 2 */ + 4769 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 249 87 first + 4770 "10011000" // AND r20, r28, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4771 "01000100" // /* MW 3 */ + 4772 "00101001" // /* MW 2 */ + 4773 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 249 87 + 4774 "00011000" // NEZ r27, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4775 "11110000" // /* MW 3 */ + 4776 "00110110" // /* MW 2 */ + 4777 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 152 first + 4778 "00101100" // NOPA; OR r19, r19, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4779 "10001011" // /* MW 5 */ + 4780 "11001111" // /* MW 4 */ + 4781 "11111001" // /* MW 3 */ + 4782 "00101100" // /* MW 2 */ + 4783 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_320 +.src_ref 2 "conv2d_bf16_params.h" 258 8 first + 4784 "01110110" // MOVA m4, #12; ST r27, [p2], #24; JNZ r29, #4832 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4832 delay_slots=5 */ + 4785 "01100000" // /* MW 11 */ + 4786 "00000000" // /* MW 10 */ + 4787 "00010000" // /* MW 9 */ + 4788 "01011100" // /* MW 8 */ + 4789 "00000010" // /* MW 7 */ + 4790 "10111010" // /* MW 6 */ + 4791 "01110001" // /* MW 5 */ + 4792 "01101111" // /* MW 4 */ + 4793 "10000010" // /* MW 3 */ + 4794 "10010000" // /* MW 2 */ + 4795 "00000001" // /* MW 1 */ +.delay_slot + 4796 "00011000" // ST.s8 r19, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4797 "01100111" // /* MW 3 */ + 4798 "10001010" // /* MW 2 */ + 4799 "00000010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4801 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4803 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4805 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4807 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 259 71 + 4808 "01000100" // MOVXM r20, #16777215 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4809 "11111110" // /* MW 5 */ + 4810 "00111111" // /* MW 4 */ + 4811 "11111010" // /* MW 3 */ + 4812 "11111111" // /* MW 2 */ + 4813 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 259 71 first + 4814 "10011000" // AND r18, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4815 "01000100" // /* MW 3 */ + 4816 "10100101" // /* MW 2 */ + 4817 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 259 71 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4818 "00101110" // NOPA; ST r18, [p3, #28]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 4819 "00011100" // /* MW 13 */ + 4820 "00000000" // /* MW 12 */ + 4821 "00000000" // /* MW 11 */ + 4822 "01010111" // /* MW 10 */ + 4823 "00011010" // /* MW 9 */ + 4824 "01000000" // /* MW 8 */ + 4825 "00000000" // /* MW 7 */ + 4826 "00000000" // /* MW 6 */ + 4827 "10100011" // /* MW 5 */ + 4828 "11101100" // /* MW 4 */ + 4829 "11110110" // /* MW 3 */ + 4830 "00101100" // /* MW 2 */ + 4831 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_368 +.src_ref 2 "conv2d_bf16.h" 1841 65 first +.src_ref 2 "conv2d_bf16.h" 1849 4 +.src_ref 2 "conv2d_bf16.h" 1849 12 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4832 "10111010" // LDA r20, [p2], #-32; EXTEND.u8 r20, r19; MOV r22, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4833 "01011000" // /* MW 9 */ + 4834 "11111101" // /* MW 8 */ + 4835 "11001111" // /* MW 7 */ + 4836 "10000010" // /* MW 6 */ + 4837 "01000100" // /* MW 5 */ + 4838 "00100111" // /* MW 4 */ + 4839 "11010000" // /* MW 3 */ + 4840 "11010010" // /* MW 2 */ + 4841 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16.h" 1841 34 +.src_ref 2 "conv2d_bf16.h" 1842 36 +.src_ref 2 "conv2d_bf16.h" 1842 67 +.src_ref 2 "conv2d_bf16.h" 1842 75 +.src_ref 2 "conv2d_bf16.h" 1845 31 +.src_ref 2 "conv2d_bf16.h" 1849 4 +.src_ref 2 "conv2d_bf16_params.h" 243 80 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4842 "10111010" // MOVA r18, #2; ADD r21, r20, #-1; MOV m4, #36 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4843 "01011000" // /* MW 9 */ + 4844 "00100100" // /* MW 8 */ + 4845 "00000000" // /* MW 7 */ + 4846 "11111010" // /* MW 6 */ + 4847 "01011111" // /* MW 5 */ + 4848 "00101001" // /* MW 4 */ + 4849 "00000000" // /* MW 3 */ + 4850 "01010010" // /* MW 2 */ + 4851 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1842 67 first +.src_ref 2 "conv2d_bf16.h" 1842 106 +.src_ref 2 "conv2d_bf16.h" 1849 4 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4852 "01110110" // LDA r22, [p2], m4; ST el0, [sp, #-48]; AND r22, r21, r22; MOV m4, #-52 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4853 "01011000" // /* MW 11 */ + 4854 "11001100" // /* MW 10 */ + 4855 "00000111" // /* MW 9 */ + 4856 "00100110" // /* MW 8 */ + 4857 "01101011" // /* MW 7 */ + 4858 "10101011" // /* MW 6 */ + 4859 "00101101" // /* MW 5 */ + 4860 "11010000" // /* MW 4 */ + 4861 "11010111" // /* MW 3 */ + 4862 "01011010" // /* MW 2 */ + 4863 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 862 52 +.src_ref 2 "conv2d_bf16.h" 1842 106 +.src_ref 2 "conv2d_bf16.h" 1845 80 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4864 "01110110" // LDA r23, [p2], m4; ST r22, [sp, #-36]; MOVX r19, #-1; MOV m4, #196 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4865 "01011000" // /* MW 11 */ + 4866 "11000100" // /* MW 10 */ + 4867 "00000000" // /* MW 9 */ + 4868 "11101010" // /* MW 8 */ + 4869 "00110111" // /* MW 7 */ + 4870 "10111111" // /* MW 6 */ + 4871 "11010101" // /* MW 5 */ + 4872 "11011110" // /* MW 4 */ + 4873 "11010111" // /* MW 3 */ + 4874 "01011110" // /* MW 2 */ + 4875 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1845 63 first + 4876 "10011000" // LDA r29, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4877 "10110110" // /* MW 3 */ + 4878 "11111111" // /* MW 2 */ + 4879 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 862 52 first + 4880 "10011000" // LDA r31, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4881 "11110110" // /* MW 3 */ + 4882 "10001011" // /* MW 2 */ + 4883 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 4884 "10011000" // LDA r21, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4885 "10110110" // /* MW 3 */ + 4886 "00000110" // /* MW 2 */ + 4887 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 2 "conv2d_bf16.h" 1841 34 first + 4888 "00101100" // LDA r20, [p0]; LSHL r9, r20, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4889 "01011011" // /* MW 5 */ + 4890 "00100110" // /* MW 4 */ + 4891 "11011010" // /* MW 3 */ + 4892 "11010010" // /* MW 2 */ + 4893 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 4894 "10011000" // LDA r30, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4895 "11010110" // /* MW 3 */ + 4896 "00000111" // /* MW 2 */ + 4897 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1842 36 first + 4898 "10011000" // LSHL r22, r22, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4899 "00101101" // /* MW 3 */ + 4900 "10101101" // /* MW 2 */ + 4901 "00010101" // /* MW 1 */ + 4902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4903 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1845 80 first + 4904 "10011000" // ASHL r19, r29, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4905 "00111110" // /* MW 3 */ + 4906 "01100111" // /* MW 2 */ + 4907 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 866 21 first + 4908 "10011000" // NE r17, r31, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4909 "00011000" // /* MW 3 */ + 4910 "11100011" // /* MW 2 */ + 4911 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 866 12 + 4912 "10000100" // JNZ r17, #5024 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5024 delay_slots=5 */ + 4913 "00000001" // /* MW 5 */ + 4914 "01000000" // /* MW 4 */ + 4915 "11010000" // /* MW 3 */ + 4916 "00001001" // /* MW 2 */ + 4917 "10001000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1842 36 first +.src_ref 2 "conv2d_bf16.h" 1842 75 first +.delay_slot + 4918 "10100100" // LSHL r22, r23, r18; ADD.NC r21, r21, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4919 "10110010" // /* MW 5 */ + 4920 "10110101" // /* MW 4 */ + 4921 "10111010" // /* MW 3 */ + 4922 "10100101" // /* MW 2 */ + 4923 "10111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1842 75 +.src_ref 2 "conv2d_bf16.h" 1845 31 first +.delay_slot + 4924 "10100100" // LSHL r21, r19, r18; ADD.NC dn0, r21, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4925 "10110010" // /* MW 5 */ + 4926 "10010101" // /* MW 4 */ + 4927 "10110000" // /* MW 3 */ + 4928 "01100101" // /* MW 2 */ + 4929 "10011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1841 34 first +.delay_slot + 4930 "00000010" // ST dn0, [sp, #-44]; ADD.NC r14, r9, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4931 "10100000" // /* MW 7 */ + 4932 "01101000" // /* MW 6 */ + 4933 "11001010" // /* MW 5 */ + 4934 "00000001" // /* MW 4 */ + 4935 "10110000" // /* MW 3 */ + 4936 "10000100" // /* MW 2 */ + 4937 "11111010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16.h" 885 4 +.delay_slot + 4938 "11111000" // MOV r15, dn0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4939 "10000000" // /* MW 3 */ + 4940 "11010000" // /* MW 2 */ + 4941 "00011011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1845 31 first +.delay_slot + 4942 "01011000" // ADD.NC p6, r21, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4943 "11111001" // /* MW 3 */ + 4944 "01101010" // /* MW 2 */ + 4945 "00011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 + 4946 "01000100" // MOVXM p7, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4947 "11010000" // /* MW 5 */ + 4948 "11001000" // /* MW 4 */ + 4949 "11001110" // /* MW 3 */ + 4950 "00000111" // /* MW 2 */ + 4951 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.src_ref 2 "conv2d_bf16.h" 867 18 first + 4952 "00101100" // LDA.s8 r17, [p7]; MOVX vaddSign0, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4953 "10000000" // /* MW 5 */ + 4954 "10110100" // /* MW 4 */ + 4955 "01010000" // /* MW 3 */ + 4956 "11000100" // /* MW 2 */ + 4957 "11100000" // /* MW 1 */ + 4958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4959 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 + 4960 "01000100" // MOVXM r20, #-8454144 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4961 "00000000" // /* MW 5 */ + 4962 "00100000" // /* MW 4 */ + 4963 "00001010" // /* MW 3 */ + 4964 "01111111" // /* MW 2 */ + 4965 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 + 4966 "01111000" // VINSERT.32 x0, x0, #0, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4967 "10010001" // /* MW 3 */ + 4968 "00000010" // /* MW 2 */ + 4969 "00011000" // /* MW 1 */ + 4970 "11111000" // MOV r20, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4971 "11100000" // /* MW 3 */ + 4972 "00010101" // /* MW 2 */ + 4973 "00011101" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 4974 "00011000" // ADD.NC p7, r20, #-66 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4975 "01011111" // /* MW 3 */ + 4976 "01101010" // /* MW 2 */ + 4977 "00011111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.src_ref 2 "conv2d_bf16.h" 867 18 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 4978 "11010100" // ST.s16 r17, [p7]; VMOV bmll0, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4979 "00100101" // /* MW 5 */ + 4980 "00000001" // /* MW 4 */ + 4981 "11100000" // /* MW 3 */ + 4982 "11000110" // /* MW 2 */ + 4983 "11100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4984 "00011000" // MOVX crRnd, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4985 "10000000" // /* MW 3 */ + 4986 "01111010" // /* MW 2 */ + 4987 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4988 "00011000" // VCONV.bf16.fp32 wl0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4989 "00010110" // /* MW 3 */ + 4990 "01000000" // /* MW 2 */ + 4991 "00001000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4993 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4994 "10111000" // VEXTRACT.16 r17, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4995 "00000001" // /* MW 3 */ + 4996 "01000001" // /* MW 2 */ + 4997 "00011100" // /* MW 1 */ + 4998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4999 "00000000" // /* MW 1 */ + 5000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5001 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 + 5002 "10011000" // LDA.s16 r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5003 "00110010" // /* MW 3 */ + 5004 "00000110" // /* MW 2 */ + 5005 "00000111" // /* MW 1 */ + 5006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5007 "00000000" // /* MW 1 */ + 5008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5009 "00000000" // /* MW 1 */ + 5010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5011 "00000000" // /* MW 1 */ + 5012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5013 "00000000" // /* MW 1 */ + 5014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5015 "00000000" // /* MW 1 */ + 5016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5017 "00000000" // /* MW 1 */ + 5018 "00001100" // NOPA; ST r17, [sp, #-48] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5019 "01101011" // /* MW 5 */ + 5020 "10100100" // /* MW 4 */ + 5021 "11111111" // /* MW 3 */ + 5022 "00101100" // /* MW 2 */ + 5023 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_560 +.src_ref 2 "conv2d_bf16.h" 881 76 +.src_ref 2 "conv2d_bf16.h" 883 4 +.src_ref 2 "conv2d_bf16.h" 884 4 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 5024 "01110110" // MOVA m4, #92; MOVS p1, r14; MOVXM p3, #509032 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5025 "00010000" // /* MW 11 */ + 5026 "00110100" // /* MW 10 */ + 5027 "10110010" // /* MW 9 */ + 5028 "11110001" // /* MW 8 */ + 5029 "00000001" // /* MW 7 */ + 5030 "00000000" // /* MW 6 */ + 5031 "00001011" // /* MW 5 */ + 5032 "10001110" // /* MW 4 */ + 5033 "10000001" // /* MW 3 */ + 5034 "10010000" // /* MW 2 */ + 5035 "00001011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 first +.src_ref 2 "conv2d_bf16.h" 876 51 first +.src_ref 2 "conv2d_bf16.h" 881 76 first +.src_ref 2 "conv2d_bf16.h" 883 4 +.src_ref 2 "conv2d_bf16.h" 884 4 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 5036 "01110110" // LDA.u8 r17, [p2], m4; MOVS p0, p1; SEL.EQZ r17, r25, r19, r27; MOV r19, #11 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5037 "01011000" // /* MW 11 */ + 5038 "00001011" // /* MW 10 */ + 5039 "01101000" // /* MW 9 */ + 5040 "10010010" // /* MW 8 */ + 5041 "00011001" // /* MW 7 */ + 5042 "00110011" // /* MW 6 */ + 5043 "10001011" // /* MW 5 */ + 5044 "10000100" // /* MW 4 */ + 5045 "01010000" // /* MW 3 */ + 5046 "01000101" // /* MW 2 */ + 5047 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 44 +.src_ref 2 "conv2d_bf16_params.h" 243 80 +.src_ref 2 "conv2d_bf16_params.h" 243 80 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5048 "10111010" // MOVA r22, #780; LTU r27, r28, r18; MOV r13, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5049 "01111000" // /* MW 9 */ + 5050 "01100000" // /* MW 8 */ + 5051 "10101010" // /* MW 7 */ + 5052 "01100101" // /* MW 6 */ + 5053 "10111001" // /* MW 5 */ + 5054 "00111001" // /* MW 4 */ + 5055 "00000000" // /* MW 3 */ + 5056 "10010110" // /* MW 2 */ + 5057 "01100001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 883 4 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5058 "00011000" // ST.s8 r19, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5059 "01100111" // /* MW 3 */ + 5060 "00000110" // /* MW 2 */ + 5061 "00000011" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5063 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 884 4 first +.aggressive_scheduled_block_id 4 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 5064 "00000100" // JL #4192 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4192 delay_slots=5 */ + 5065 "00000001" // /* MW 5 */ + 5066 "00000000" // /* MW 4 */ + 5067 "00110000" // /* MW 3 */ + 5068 "00001000" // /* MW 2 */ + 5069 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 first +.delay_slot +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5070 "10011000" // LSHL r21, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5071 "00101101" // /* MW 3 */ + 5072 "01101011" // /* MW 2 */ + 5073 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 +.delay_slot + 5074 "01011000" // ADD.NC p7, r21, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5075 "11111001" // /* MW 3 */ + 5076 "01101010" // /* MW 2 */ + 5077 "00011111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 881 45 first +.delay_slot + 5078 "10011000" // SUB r17, r25, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5079 "00010001" // /* MW 3 */ + 5080 "01100011" // /* MW 2 */ + 5081 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16_params.h" 243 80 +.delay_slot + 5082 "01100100" // LSHL r17, r17, r18; MOV r20, #781 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5083 "00110101" // /* MW 5 */ + 5084 "00101100" // /* MW 4 */ + 5085 "10111010" // /* MW 3 */ + 5086 "01100101" // /* MW 2 */ + 5087 "10001100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16_params.h" 243 80 first +.delay_slot + 5088 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r9, r22, r20, r27; ADD.NC r12, r15, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5089 "00000000" // /* MW 15 */ + 5090 "00000000" // /* MW 14 */ + 5091 "10101000" // /* MW 13 */ + 5092 "11100010" // /* MW 12 */ + 5093 "10001011" // /* MW 11 */ + 5094 "00010001" // /* MW 10 */ + 5095 "10011010" // /* MW 9 */ + 5096 "00101100" // /* MW 8 */ + 5097 "01011011" // /* MW 7 */ + 5098 "00000001" // /* MW 6 */ + 5099 "00100000" // /* MW 5 */ + 5100 "00000000" // /* MW 4 */ + 5101 "11110000" // /* MW 3 */ + 5102 "00101100" // /* MW 2 */ + 5103 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 4 +.return_address + 5104 "00011000" // LDA p1, [sp, #-44] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5105 "10011001" // /* MW 3 */ + 5106 "11010100" // /* MW 2 */ + 5107 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 4 first +.no_stack_arguments + 5108 "00000100" // JL #4192 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4192 delay_slots=5 */ + 5109 "00000001" // /* MW 5 */ + 5110 "00000000" // /* MW 4 */ + 5111 "00110000" // /* MW 3 */ + 5112 "00001000" // /* MW 2 */ + 5113 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5115 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5117 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 44 +.delay_slot + 5118 "00011000" // ADD.NC r13, r13, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5119 "10010000" // /* MW 3 */ + 5120 "01010110" // /* MW 2 */ + 5121 "00011011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 4 +.delay_slot + 5122 "11111000" // MOV p2, r13 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5123 "10100000" // /* MW 3 */ + 5124 "01100110" // /* MW 2 */ + 5125 "00011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 4 +.delay_slot + 5126 "01111010" // NOPA; MOVS p0, r15; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5127 "00000000" // /* MW 9 */ + 5128 "00000000" // /* MW 8 */ + 5129 "00000000" // /* MW 7 */ + 5130 "00000000" // /* MW 6 */ + 5131 "00001011" // /* MW 5 */ + 5132 "10001111" // /* MW 4 */ + 5133 "11110000" // /* MW 3 */ + 5134 "00101100" // /* MW 2 */ + 5135 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 +.src_ref 2 "conv2d_bf16.h" 1115 26 +.src_ref 2 "conv2d_bf16.h" 1115 26 +.return_address + 5136 "10111010" // MOVA dj6, #-332; MOVX r19, #63; ADD.NC p4, r13, #-116 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5137 "00001000" // /* MW 9 */ + 5138 "01100011" // /* MW 8 */ + 5139 "00110011" // /* MW 7 */ + 5140 "11101010" // /* MW 6 */ + 5141 "00110111" // /* MW 5 */ + 5142 "00000001" // /* MW 4 */ + 5143 "10000000" // /* MW 3 */ + 5144 "10011010" // /* MW 2 */ + 5145 "11010110" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 886 4 +.src_ref 2 "conv2d_bf16.h" 896 23 first +.src_ref 2 "conv2d_bf16.h" 1123 71 + 5146 "00101100" // LDA dn0, [p4], #4; MOVX r13, #12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5147 "01100010" // /* MW 5 */ + 5148 "00110100" // /* MW 4 */ + 5149 "11010000" // /* MW 3 */ + 5150 "10000100" // /* MW 2 */ + 5151 "10000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5152 "10011000" // LDA dj0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5153 "01000110" // /* MW 3 */ + 5154 "00011100" // /* MW 2 */ + 5155 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5156 "10011000" // LDA dn4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5157 "00100110" // /* MW 3 */ + 5158 "00011110" // /* MW 2 */ + 5159 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5160 "10011000" // LDA dj4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5161 "01000110" // /* MW 3 */ + 5162 "00011110" // /* MW 2 */ + 5163 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5164 "10011000" // LDA m0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5165 "00000110" // /* MW 3 */ + 5166 "00011100" // /* MW 2 */ + 5167 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5168 "10011000" // LDA dc0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5169 "01100110" // /* MW 3 */ + 5170 "00011100" // /* MW 2 */ + 5171 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5172 "10011000" // LDA dc4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5173 "01100110" // /* MW 3 */ + 5174 "00011110" // /* MW 2 */ + 5175 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 first + 5176 "10011000" // LDA r22, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5177 "11010110" // /* MW 3 */ + 5178 "00011110" // /* MW 2 */ + 5179 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5180 "10011000" // LDA r17, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5181 "00110110" // /* MW 3 */ + 5182 "00011110" // /* MW 2 */ + 5183 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5184 "10011000" // LDA r28, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5185 "10010110" // /* MW 3 */ + 5186 "00011111" // /* MW 2 */ + 5187 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5188 "10011000" // LDA r21, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5189 "10110110" // /* MW 3 */ + 5190 "00011110" // /* MW 2 */ + 5191 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5192 "10011000" // LDA r23, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5193 "11110110" // /* MW 3 */ + 5194 "00011110" // /* MW 2 */ + 5195 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5196 "10011000" // LDA p3, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5197 "10011110" // /* MW 3 */ + 5198 "00011101" // /* MW 2 */ + 5199 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5200 "10011000" // LDA dn2, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5201 "00100110" // /* MW 3 */ + 5202 "00011101" // /* MW 2 */ + 5203 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 first + 5204 "10011000" // LDA dn1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5205 "10100110" // /* MW 3 */ + 5206 "00011100" // /* MW 2 */ + 5207 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5208 "10011000" // LDA dj1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5209 "11000110" // /* MW 3 */ + 5210 "00011100" // /* MW 2 */ + 5211 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5212 "10011000" // LDA dn5, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5213 "10100110" // /* MW 3 */ + 5214 "00011110" // /* MW 2 */ + 5215 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5216 "10011000" // LDA r30, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5217 "11010110" // /* MW 3 */ + 5218 "00011111" // /* MW 2 */ + 5219 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5220 "10011000" // LDA r29, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5221 "10110110" // /* MW 3 */ + 5222 "00011111" // /* MW 2 */ + 5223 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5224 "10011000" // LDA dc1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5225 "11100110" // /* MW 3 */ + 5226 "00011100" // /* MW 2 */ + 5227 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1115 26 first + 5228 "10011000" // LDA.u8 r18, [p4, dj6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5229 "01001010" // /* MW 3 */ + 5230 "11000010" // /* MW 2 */ + 5231 "00000100" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 + 5232 "00011000" // LDA r20, [sp, #-48] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5233 "10010001" // /* MW 3 */ + 5234 "11010010" // /* MW 2 */ + 5235 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 first + 5236 "10011000" // LDA r2, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5237 "01010110" // /* MW 3 */ + 5238 "00000100" // /* MW 2 */ + 5239 "00000100" // /* MW 1 */ + 5240 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5241 "00000000" // /* MW 1 */ + 5242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5243 "00000000" // /* MW 1 */ + 5244 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5245 "00000000" // /* MW 1 */ + 5246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5247 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1115 26 first + 5248 "10011000" // LTU r19, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5249 "00101100" // /* MW 3 */ + 5250 "11100111" // /* MW 2 */ + 5251 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1115 12 + 5252 "10000100" // JNZ r19, #6176 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6176 delay_slots=5 */ + 5253 "00000001" // /* MW 5 */ + 5254 "01000000" // /* MW 4 */ + 5255 "00010000" // /* MW 3 */ + 5256 "00001100" // /* MW 2 */ + 5257 "10011000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 886 4 +.delay_slot + 5258 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5259 "11010000" // /* MW 5 */ + 5260 "11001000" // /* MW 4 */ + 5261 "11000100" // /* MW 3 */ + 5262 "00000111" // /* MW 2 */ + 5263 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 886 4 first +.delay_slot + 5264 "00011000" // ST.s8 r13, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5265 "10100111" // /* MW 3 */ + 5266 "00000101" // /* MW 2 */ + 5267 "00000010" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first +.delay_slot + 5268 "11111000" // VBCST.16 x9, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5269 "01110010" // /* MW 3 */ + 5270 "11010001" // /* MW 2 */ + 5271 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5273 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5275 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1123 71 first + 5276 "10111010" // LDA p4, [sp, #-40]; EQ r27, r13, r18; MOV m7, #132 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5277 "01011000" // /* MW 9 */ + 5278 "10000100" // /* MW 8 */ + 5279 "10000000" // /* MW 7 */ + 5280 "00111111" // /* MW 6 */ + 5281 "10111001" // /* MW 5 */ + 5282 "00011011" // /* MW 4 */ + 5283 "00100000" // /* MW 3 */ + 5284 "01000011" // /* MW 2 */ + 5285 "11111011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1125 16 +.src_ref 2 "conv2d_bf16.h" 1154 80 + 5286 "10111010" // MOVA r19, #0; MOVX r18, #-128; MOV m4, #60 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5287 "01011000" // /* MW 9 */ + 5288 "00111100" // /* MW 8 */ + 5289 "00000000" // /* MW 7 */ + 5290 "00001010" // /* MW 6 */ + 5291 "00100000" // /* MW 5 */ + 5292 "00111101" // /* MW 4 */ + 5293 "00000000" // /* MW 3 */ + 5294 "00010011" // /* MW 2 */ + 5295 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1125 16 + 5296 "10111010" // MOVA m5, #-64; MOVX r26, #0; MOV dc7, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5297 "01111000" // /* MW 9 */ + 5298 "11010000" // /* MW 8 */ + 5299 "11100100" // /* MW 7 */ + 5300 "00001011" // /* MW 6 */ + 5301 "10100000" // /* MW 5 */ + 5302 "00000001" // /* MW 4 */ + 5303 "10000000" // /* MW 3 */ + 5304 "00010100" // /* MW 2 */ + 5305 "11111000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 746 83 + 5306 "01110110" // MOVA m6, #-132; MOVS dc2, dc7; MOVX crRnd, r13; MOV dn3, dc7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5307 "01111000" // /* MW 11 */ + 5308 "11000000" // /* MW 10 */ + 5309 "10100111" // /* MW 9 */ + 5310 "00000001" // /* MW 8 */ + 5311 "11010100" // /* MW 7 */ + 5312 "00011011" // /* MW 6 */ + 5313 "01001011" // /* MW 5 */ + 5314 "00011100" // /* MW 4 */ + 5315 "10000010" // /* MW 3 */ + 5316 "10011000" // /* MW 2 */ + 5317 "11101111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 2 "conv2d_bf16.h" 738 8 +.src_ref 2 "conv2d_bf16.h" 1187 40 +.src_ref 2 "conv2d_bf16.h" 1199 26 +.src_ref 2 "conv2d_bf16.h" 1200 26 +.src_ref 2 "conv2d_bf16.h" 1201 26 +.src_ref 2 "conv2d_bf16.h" 1202 26 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 5318 "01110110" // LDA r5, [sp, #-44]; MOVS dc6, dc7; MOVX r31, #60; MOV r15, #7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5319 "01011000" // /* MW 11 */ + 5320 "00000111" // /* MW 10 */ + 5321 "11101000" // /* MW 9 */ + 5322 "10001001" // /* MW 8 */ + 5323 "11110111" // /* MW 7 */ + 5324 "00000001" // /* MW 6 */ + 5325 "01001011" // /* MW 5 */ + 5326 "00011100" // /* MW 4 */ + 5327 "00100110" // /* MW 3 */ + 5328 "10010110" // /* MW 2 */ + 5329 "11111010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1143 12 +.src_ref 2 "conv2d_bf16.h" 1218 20 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 5330 "10111010" // LDA r18, [sp, #-36]; MOVXM p2, #5440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5331 "00010000" // /* MW 9 */ + 5332 "10100000" // /* MW 8 */ + 5333 "00110010" // /* MW 7 */ + 5334 "00000101" // /* MW 6 */ + 5335 "00000000" // /* MW 5 */ + 5336 "00000000" // /* MW 4 */ + 5337 "00100000" // /* MW 3 */ + 5338 "11001010" // /* MW 2 */ + 5339 "11111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 749 26 +.src_ref 2 "conv2d_bf16.h" 750 26 +.src_ref 2 "conv2d_bf16.h" 751 26 +.src_ref 2 "conv2d_bf16.h" 752 26 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5340 "10111010" // LDA r13, [sp, #-32]; SEL.EQZ r6, r26, r18, r27; MOV r20, #780 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5341 "01011000" // /* MW 9 */ + 5342 "00001100" // /* MW 8 */ + 5343 "10001011" // /* MW 7 */ + 5344 "00010010" // /* MW 6 */ + 5345 "01101001" // /* MW 5 */ + 5346 "00110100" // /* MW 4 */ + 5347 "00100000" // /* MW 3 */ + 5348 "00110110" // /* MW 2 */ + 5349 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 736 8 +.src_ref 2 "conv2d_bf16.h" 738 8 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1873 + 5350 "10110110" // LDA lr, [sp, #-28]; PADDB [p4], m7; MOVX r25, #0; MOV r24, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5351 "01011000" // /* MW 11 */ + 5352 "00000000" // /* MW 10 */ + 5353 "00001000" // /* MW 9 */ + 5354 "00001011" // /* MW 8 */ + 5355 "10010000" // /* MW 7 */ + 5356 "00000001" // /* MW 6 */ + 5357 "00100000" // /* MW 5 */ + 5358 "11010111" // /* MW 4 */ + 5359 "00101001" // /* MW 3 */ + 5360 "10000111" // /* MW 2 */ + 5361 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1125 16 first + 5362 "10011000" // LDA r0, [p4], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5363 "00010110" // /* MW 3 */ + 5364 "10001000" // /* MW 2 */ + 5365 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1125 16 + 5366 "10011000" // LDA dn6, [p4], m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5367 "00100110" // /* MW 3 */ + 5368 "10101011" // /* MW 2 */ + 5369 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1125 16 + 5370 "10011000" // LDA r27, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5371 "01110110" // /* MW 3 */ + 5372 "00101111" // /* MW 2 */ + 5373 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1149 80 first + 5374 "10011000" // LDA m5, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5375 "10000110" // /* MW 3 */ + 5376 "00011110" // /* MW 2 */ + 5377 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1154 80 first + 5378 "10011000" // LDA dj5, [p4], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5379 "11000110" // /* MW 3 */ + 5380 "10001010" // /* MW 2 */ + 5381 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 743 87 first + 5382 "10011000" // LDA m4, [p4], #-28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5383 "00000110" // /* MW 3 */ + 5384 "10011110" // /* MW 2 */ + 5385 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 745 83 first + 5386 "10011000" // LDA r1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5387 "00110110" // /* MW 3 */ + 5388 "00011100" // /* MW 2 */ + 5389 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 746 83 first +.src_ref 2 "conv2d_bf16.h" 1125 16 first + 5390 "10010100" // LDA r0, [p4], m6; ADD.NC dj6, r6, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5391 "00000010" // /* MW 5 */ + 5392 "00000110" // /* MW 4 */ + 5393 "11011101" // /* MW 3 */ + 5394 "00000010" // /* MW 2 */ + 5395 "10011001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1143 66 first + 5396 "10011000" // LDA r3, [p4, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5397 "01110110" // /* MW 3 */ + 5398 "00010100" // /* MW 2 */ + 5399 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1206 63 first + 5400 "10011000" // LDA r4, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5401 "10010110" // /* MW 3 */ + 5402 "00000100" // /* MW 2 */ + 5403 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1149 89 + 5404 "11111000" // MOV r7, m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5405 "00000000" // /* MW 3 */ + 5406 "11011010" // /* MW 2 */ + 5407 "00011001" // /* MW 1 */ + 5408 "01011000" // ADD.NC dj2, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5409 "10011001" // /* MW 3 */ + 5410 "10000011" // /* MW 2 */ + 5411 "00011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1154 89 + 5412 "11111000" // MOV r16, dj5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5413 "00000000" // /* MW 3 */ + 5414 "00011011" // /* MW 2 */ + 5415 "00011100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1125 16 first + 5416 "01011000" // ADD.NC m2, r27, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5417 "10011001" // /* MW 3 */ + 5418 "00001101" // /* MW 2 */ + 5419 "00011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1149 89 first + 5420 "00011000" // ADD.NC m6, r7, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5421 "11100000" // /* MW 3 */ + 5422 "00000011" // /* MW 2 */ + 5423 "00011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1154 89 first + 5424 "00100100" // ADD r3, r3, #-1; ADD.NC m7, r16, #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5425 "11000000" // /* MW 5 */ + 5426 "00010000" // /* MW 4 */ + 5427 "11101110" // /* MW 3 */ + 5428 "11111111" // /* MW 2 */ + 5429 "00011000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1287 37 + 5430 "10111010" // NOPA; NOPB; MOV m1, dj2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5431 "01111110" // /* MW 9 */ + 5432 "10000000" // /* MW 8 */ + 5433 "10000010" // /* MW 7 */ + 5434 "00000000" // /* MW 6 */ + 5435 "00010000" // /* MW 5 */ + 5436 "00000000" // /* MW 4 */ + 5437 "11110000" // /* MW 3 */ + 5438 "00101100" // /* MW 2 */ + 5439 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_976 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 736 8 +.src_ref 2 "conv2d_bf16.h" 738 8 +.src_ref 2 "conv2d_bf16.h" 1147 31 first +.src_ref 2 "conv2d_bf16.h" 1187 40 first +.loop_nesting 1 + 5440 "01110110" // VLDA.CONV.fp32.bf16 cml0, [p6], #64; MOVS p1, r5; LSHL r14, r2, r15; MOV p0, r14 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5441 "01111000" // /* MW 11 */ + 5442 "10010000" // /* MW 10 */ + 5443 "00110011" // /* MW 9 */ + 5444 "11101100" // /* MW 8 */ + 5445 "11100111" // /* MW 7 */ + 5446 "00000100" // /* MW 6 */ + 5447 "00001011" // /* MW 5 */ + 5448 "10000101" // /* MW 4 */ + 5449 "01110001" // /* MW 3 */ + 5450 "10000101" // /* MW 2 */ + 5451 "11000011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 1188 50 first + 5452 "11110110" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc3, dn3; ADD.NC p4, r14, r12 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5453 "10100000" // /* MW 11 */ + 5454 "10011000" // /* MW 10 */ + 5455 "00110011" // /* MW 9 */ + 5456 "00000010" // /* MW 8 */ + 5457 "01001011" // /* MW 7 */ + 5458 "00001110" // /* MW 6 */ + 5459 "00101011" // /* MW 5 */ + 5460 "00101000" // /* MW 4 */ + 5461 "01111000" // /* MW 3 */ + 5462 "10000001" // /* MW 2 */ + 5463 "00100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 742 30 first + 5464 "11110110" // VLDA.POP.576 ex7, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];MOVS dn7, dn6; MOV dj7, dj6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5465 "01110000" // /* MW 11 */ + 5466 "10000000" // /* MW 10 */ + 5467 "11000110" // /* MW 9 */ + 5468 "00000011" // /* MW 8 */ + 5469 "01001011" // /* MW 7 */ + 5470 "01011010" // /* MW 6 */ + 5471 "00101111" // /* MW 5 */ + 5472 "00101000" // /* MW 4 */ + 5473 "01111000" // /* MW 3 */ + 5474 "00111001" // /* MW 2 */ + 5475 "10100000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.src_ref 2 "conv2d_bf16.h" 1149 31 first + 5476 "11110110" // VLDA.CONV.fp32.bf16 cmh0, [p6], m6;VLDB.POP.576 ex6, [p0, lf0, r24];MOVS dn3, r19; MOV m3, m2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5477 "01110000" // /* MW 11 */ + 5478 "00000000" // /* MW 10 */ + 5479 "10000010" // /* MW 9 */ + 5480 "00000001" // /* MW 8 */ + 5481 "00001011" // /* MW 7 */ + 5482 "01010011" // /* MW 6 */ + 5483 "00101011" // /* MW 5 */ + 5484 "00000011" // /* MW 4 */ + 5485 "01110100" // /* MW 3 */ + 5486 "00001101" // /* MW 2 */ + 5487 "11011001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 743 30 first + 5488 "10111010" // VLDA.POP.576 ex8, [p1, lf1, r25, m4];VLDB.POP.576.3D ex11, [p0, lf0, r24, d0]; MOV dj3, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5489 "01011110" // /* MW 9 */ + 5490 "00000000" // /* MW 8 */ + 5491 "11000000" // /* MW 7 */ + 5492 "00000001" // /* MW 6 */ + 5493 "11010100" // /* MW 5 */ + 5494 "00010010" // /* MW 4 */ + 5495 "01110100" // /* MW 3 */ + 5496 "01000001" // /* MW 2 */ + 5497 "01110001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 1152 31 first +.src_ref 2 "conv2d_bf16.h" 1206 8 first + 5498 "10110110" // VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.FILL.512 [p0, lf0, r24]; MOVXM le, #5760 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5499 "00010000" // /* MW 11 */ + 5500 "01000000" // /* MW 10 */ + 5501 "10111011" // /* MW 9 */ + 5502 "00000101" // /* MW 8 */ + 5503 "00000000" // /* MW 7 */ + 5504 "00000000" // /* MW 6 */ + 5505 "00101000" // /* MW 5 */ + 5506 "00101000" // /* MW 4 */ + 5507 "01111000" // /* MW 3 */ + 5508 "10010101" // /* MW 2 */ + 5509 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 1154 31 first +.src_ref 2 "conv2d_bf16.h" 1206 8 + 5510 "10110110" // VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.FILL.512 [p0, lf0, r24]; MOVXM ls, #5712 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5511 "00010000" // /* MW 11 */ + 5512 "00101000" // /* MW 10 */ + 5513 "01111011" // /* MW 9 */ + 5514 "00000100" // /* MW 8 */ + 5515 "00000000" // /* MW 7 */ + 5516 "00000000" // /* MW 6 */ + 5517 "00101000" // /* MW 5 */ + 5518 "00101000" // /* MW 4 */ + 5519 "01111000" // /* MW 3 */ + 5520 "00011101" // /* MW 2 */ + 5521 "11011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 740 30 first + 5522 "00111100" // VLDA.CONV.fp32.bf16 cml3, [p4];VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5523 "00101000" // /* MW 5 */ + 5524 "00000001" // /* MW 4 */ + 5525 "01110100" // /* MW 3 */ + 5526 "10110101" // /* MW 2 */ + 5527 "10000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 1157 31 first + 5528 "00111100" // VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.POP.576.3D ex4, [p0, lf0, r24, d0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5529 "00101000" // /* MW 5 */ + 5530 "00100010" // /* MW 4 */ + 5531 "01111000" // /* MW 3 */ + 5532 "10100101" // /* MW 2 */ + 5533 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 1159 31 first + 5534 "00111100" // VLDA.CONV.fp32.bf16 cmh2, [p6], m6;VLDB.FILL.512 [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5535 "00101000" // /* MW 5 */ + 5536 "00101000" // /* MW 4 */ + 5537 "01111000" // /* MW 3 */ + 5538 "00101101" // /* MW 2 */ + 5539 "11011001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 738 8 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 5540 "00111100" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5541 "00101000" // /* MW 5 */ + 5542 "00101000" // /* MW 4 */ + 5543 "01111000" // /* MW 3 */ + 5544 "10000001" // /* MW 2 */ + 5545 "00100010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.src_ref 2 "conv2d_bf16.h" 1192 29 first +.aggressive_scheduled_block_id 6 +.noswbrkpt + 5546 "00111100" // VLDA.CONV.fp32.bf16 cmh3, [p4], #64;VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5547 "00101000" // /* MW 5 */ + 5548 "00000001" // /* MW 4 */ + 5549 "01110100" // /* MW 3 */ + 5550 "10111101" // /* MW 2 */ + 5551 "10000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 745 30 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5552 "10111010" // VLDA.CONV.fp32.bf16 cmh4, [p4];VLDB.POP.576.3D ex4, [p0, lf0, r24, d0]; VSHUFFLE ex10, ex6, ex11, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5553 "11101110" // /* MW 9 */ + 5554 "11000011" // /* MW 8 */ + 5555 "10011010" // /* MW 7 */ + 5556 "00000010" // /* MW 6 */ + 5557 "00010100" // /* MW 5 */ + 5558 "00010001" // /* MW 4 */ + 5559 "01110100" // /* MW 3 */ + 5560 "11001101" // /* MW 2 */ + 5561 "10000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 1162 81 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5562 "11110110" // VLDA.CONV.fp32.bf16 cml4, [p4];VLDB.FILL.512 [p0, lf0, r24];MOVS p4, p6; VSHUFFLE ex6, ex6, ex11, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5563 "11100000" // /* MW 11 */ + 5564 "11000001" // /* MW 10 */ + 5565 "10011010" // /* MW 9 */ + 5566 "00000001" // /* MW 8 */ + 5567 "10001011" // /* MW 7 */ + 5568 "10011000" // /* MW 6 */ + 5569 "00101100" // /* MW 5 */ + 5570 "00101000" // /* MW 4 */ + 5571 "01111000" // /* MW 3 */ + 5572 "11000101" // /* MW 2 */ + 5573 "10000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 749 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5574 "01001010" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex1, [p1, lf1, r25]; VMAC.f dm0, dm0, ex10, ex7, r9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5575 "11101001" // /* MW 9 */ + 5576 "00010100" // /* MW 8 */ + 5577 "01001000" // /* MW 7 */ + 5578 "00011101" // /* MW 6 */ + 5579 "01010100" // /* MW 5 */ + 5580 "00000000" // /* MW 4 */ + 5581 "01110011" // /* MW 3 */ + 5582 "10000001" // /* MW 2 */ + 5583 "00000010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.src_ref 2 "conv2d_bf16.h" 1286 32 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5584 "01101110" // VLDA.3D.CONV.fp32.bf16 cml3, [p6], d3; MOVS dn3, dn2; MOV dj3, dj5; VMAC.f dm1, dm1, ex6, ex7, r9 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5585 "11101001" // /* MW 13 */ + 5586 "00101100" // /* MW 12 */ + 5587 "01001001" // /* MW 11 */ + 5588 "00000111" // /* MW 10 */ + 5589 "01011000" // /* MW 9 */ + 5590 "01011100" // /* MW 8 */ + 5591 "00000000" // /* MW 7 */ + 5592 "00000000" // /* MW 6 */ + 5593 "10010110" // /* MW 5 */ + 5594 "10010100" // /* MW 4 */ + 5595 "01110110" // /* MW 3 */ + 5596 "00110101" // /* MW 2 */ + 5597 "11001111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 751 26 first +.src_ref 2 "conv2d_bf16.h" 1162 81 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5598 "01101110" // VLDA.CONV.fp32.bf16 cmh3, [p4, #64]; MOVS dc5, dc7; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm2, dm2, ex10, ex8, r9 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5599 "00001001" // /* MW 13 */ + 5600 "01010101" // /* MW 12 */ + 5601 "01001010" // /* MW 11 */ + 5602 "00111110" // /* MW 10 */ + 5603 "10010000" // /* MW 9 */ + 5604 "01001100" // /* MW 8 */ + 5605 "00000000" // /* MW 7 */ + 5606 "00000000" // /* MW 6 */ + 5607 "10010110" // /* MW 5 */ + 5608 "00111000" // /* MW 4 */ + 5609 "01111010" // /* MW 3 */ + 5610 "10111101" // /* MW 2 */ + 5611 "10000010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 1199 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5612 "01101110" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dn2, dc3; VSHUFFLE ex5, ex2, ex4, r0; VADD.f dm0, dm3, dm0, r31 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5613 "00111101" // /* MW 13 */ + 5614 "01100000" // /* MW 12 */ + 5615 "11111000" // /* MW 11 */ + 5616 "00011110" // /* MW 10 */ + 5617 "10010000" // /* MW 9 */ + 5618 "01010100" // /* MW 8 */ + 5619 "00000000" // /* MW 7 */ + 5620 "00000000" // /* MW 6 */ + 5621 "10010110" // /* MW 5 */ + 5622 "10011000" // /* MW 4 */ + 5623 "01110100" // /* MW 3 */ + 5624 "00000001" // /* MW 2 */ + 5625 "01110001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 1200 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5626 "01100010" // VLDA.FILL.512 [p1, lf1, r25]; VADD.f dm1, dm3, dm1, r31 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5627 "00111101" // /* MW 7 */ + 5628 "01100100" // /* MW 6 */ + 5629 "11111001" // /* MW 5 */ + 5630 "00000100" // /* MW 4 */ + 5631 "01110000" // /* MW 3 */ + 5632 "10000001" // /* MW 2 */ + 5633 "00100010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 1201 26 first +.aggressive_scheduled_block_id 6 +.noswbrkpt + 5634 "01100010" // VLDA.POP.576 ex1, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r31 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5635 "00111101" // /* MW 7 */ + 5636 "10001000" // /* MW 6 */ + 5637 "11111010" // /* MW 5 */ + 5638 "00000100" // /* MW 4 */ + 5639 "01110000" // /* MW 3 */ + 5640 "00001001" // /* MW 2 */ + 5641 "10100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5642 "01100010" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; VMAC.f dm3, dm3, ex6, ex8, r9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5643 "00001001" // /* MW 7 */ + 5644 "01101101" // /* MW 6 */ + 5645 "01001011" // /* MW 5 */ + 5646 "00000100" // /* MW 4 */ + 5647 "01110000" // /* MW 3 */ + 5648 "00000001" // /* MW 2 */ + 5649 "01110001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5650 "00111100" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5651 "00101000" // /* MW 5 */ + 5652 "00000001" // /* MW 4 */ + 5653 "01110100" // /* MW 3 */ + 5654 "10000001" // /* MW 2 */ + 5655 "00100010" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 978 11 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5656 "00011000" // VLDB.POP.576.3D ex4, [p0, lf0, r24, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5657 "00010100" // /* MW 3 */ + 5658 "00010001" // /* MW 2 */ + 5659 "00111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 1202 26 first +.src_ref 2 "conv2d_bf16.h" 1206 8 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5660 "01100110" // VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24]; ADD.NC lc, r4, #-5; VADD.f dm3, dm4, dm3, r31 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5661 "00111101" // /* MW 11 */ + 5662 "10001100" // /* MW 10 */ + 5663 "11111011" // /* MW 9 */ + 5664 "10000010" // /* MW 8 */ + 5665 "01111101" // /* MW 7 */ + 5666 "01110010" // /* MW 6 */ + 5667 "00101101" // /* MW 5 */ + 5668 "00101000" // /* MW 4 */ + 5669 "01111000" // /* MW 3 */ + 5670 "00001001" // /* MW 2 */ + 5671 "10100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 749 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5672 "01001010" // VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24]; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5673 "00101001" // /* MW 9 */ + 5674 "00000110" // /* MW 8 */ + 5675 "10100000" // /* MW 7 */ + 5676 "00011101" // /* MW 6 */ + 5677 "00010100" // /* MW 5 */ + 5678 "00010100" // /* MW 4 */ + 5679 "01110100" // /* MW 3 */ + 5680 "00000001" // /* MW 2 */ + 5681 "01110001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.src_ref 2 "conv2d_bf16.h" 751 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5682 "01001110" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24]; NOPX; MOV dj5, r21; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5683 "00001001" // /* MW 13 */ + 5684 "01000110" // /* MW 12 */ + 5685 "10100010" // /* MW 11 */ + 5686 "00001111" // /* MW 10 */ + 5687 "10101010" // /* MW 9 */ + 5688 "01011000" // /* MW 8 */ + 5689 "00000000" // /* MW 7 */ + 5690 "00000000" // /* MW 6 */ + 5691 "00101000" // /* MW 5 */ + 5692 "00000001" // /* MW 4 */ + 5693 "01110100" // /* MW 3 */ + 5694 "10000001" // /* MW 2 */ + 5695 "00100010" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5696 "01001011" // NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5697 "01010001" // /* MW 15 */ + 5698 "00001001" // /* MW 14 */ + 5699 "11101101" // /* MW 13 */ + 5700 "00000011" // /* MW 12 */ + 5701 "11001001" // /* MW 11 */ + 5702 "00000000" // /* MW 10 */ + 5703 "00000000" // /* MW 9 */ + 5704 "00000000" // /* MW 8 */ + 5705 "01011011" // /* MW 7 */ + 5706 "00000001" // /* MW 6 */ + 5707 "00101000" // /* MW 5 */ + 5708 "00100010" // /* MW 4 */ + 5709 "11111000" // /* MW 3 */ + 5710 "00101100" // /* MW 2 */ + 5711 "00000000" // /* MW 1 */ +.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1248 +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.begin_of_loop +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt +.loop_nesting 2 + 5712 "01001011" // VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5713 "01010000" // /* MW 15 */ + 5714 "00011011" // /* MW 14 */ + 5715 "11101101" // /* MW 13 */ + 5716 "00000001" // /* MW 12 */ + 5717 "01001001" // /* MW 11 */ + 5718 "00000001" // /* MW 10 */ + 5719 "00000000" // /* MW 9 */ + 5720 "00000000" // /* MW 8 */ + 5721 "01011011" // /* MW 7 */ + 5722 "00000001" // /* MW 6 */ + 5723 "00101000" // /* MW 5 */ + 5724 "00101000" // /* MW 4 */ + 5725 "01111000" // /* MW 3 */ + 5726 "00001001" // /* MW 2 */ + 5727 "10100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 749 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5728 "01001011" // VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5729 "00110001" // /* MW 15 */ + 5730 "00000000" // /* MW 14 */ + 5731 "01111101" // /* MW 13 */ + 5732 "10100101" // /* MW 12 */ + 5733 "00000001" // /* MW 11 */ + 5734 "00000000" // /* MW 10 */ + 5735 "00000000" // /* MW 9 */ + 5736 "00000000" // /* MW 8 */ + 5737 "01011011" // /* MW 7 */ + 5738 "00000001" // /* MW 6 */ + 5739 "00101000" // /* MW 5 */ + 5740 "00101000" // /* MW 4 */ + 5741 "01111000" // /* MW 3 */ + 5742 "00000001" // /* MW 2 */ + 5743 "01110001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.src_ref 2 "conv2d_bf16.h" 751 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5744 "01001011" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5745 "00110000" // /* MW 15 */ + 5746 "00010010" // /* MW 14 */ + 5747 "01111101" // /* MW 13 */ + 5748 "10100101" // /* MW 12 */ + 5749 "00000001" // /* MW 11 */ + 5750 "00000000" // /* MW 10 */ + 5751 "00000000" // /* MW 9 */ + 5752 "00000000" // /* MW 8 */ + 5753 "01011011" // /* MW 7 */ + 5754 "00000001" // /* MW 6 */ + 5755 "00101000" // /* MW 5 */ + 5756 "00000001" // /* MW 4 */ + 5757 "01110100" // /* MW 3 */ + 5758 "10000001" // /* MW 2 */ + 5759 "00100010" // /* MW 1 */ +.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1296 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.end_of_loop +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5760 "01001011" // NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5761 "01010001" // /* MW 15 */ + 5762 "00001001" // /* MW 14 */ + 5763 "11101101" // /* MW 13 */ + 5764 "00000011" // /* MW 12 */ + 5765 "11001001" // /* MW 11 */ + 5766 "00000000" // /* MW 10 */ + 5767 "00000000" // /* MW 9 */ + 5768 "00000000" // /* MW 8 */ + 5769 "01011011" // /* MW 7 */ + 5770 "00000001" // /* MW 6 */ + 5771 "00101000" // /* MW 5 */ + 5772 "00100010" // /* MW 4 */ + 5773 "11111000" // /* MW 3 */ + 5774 "00101100" // /* MW 2 */ + 5775 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 5776 "01101110" // VLDA.POP.576 ex1, [p1, lf1, r25]; MOVS dn6, dn7; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5777 "00001001" // /* MW 13 */ + 5778 "01101010" // /* MW 12 */ + 5779 "10100011" // /* MW 11 */ + 5780 "00011110" // /* MW 10 */ + 5781 "10010000" // /* MW 9 */ + 5782 "01010100" // /* MW 8 */ + 5783 "00000000" // /* MW 7 */ + 5784 "00000000" // /* MW 6 */ + 5785 "10010110" // /* MW 5 */ + 5786 "10111100" // /* MW 4 */ + 5787 "01111100" // /* MW 3 */ + 5788 "00001001" // /* MW 2 */ + 5789 "10100000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 749 26 first +.src_ref 2 "conv2d_bf16.h" 1286 32 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5790 "01101110" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dc7, dn3; MOV dj7, dj3; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5791 "00101001" // /* MW 13 */ + 5792 "00000110" // /* MW 12 */ + 5793 "10100000" // /* MW 11 */ + 5794 "00000111" // /* MW 10 */ + 5795 "00111000" // /* MW 9 */ + 5796 "01111100" // /* MW 8 */ + 5797 "00000000" // /* MW 7 */ + 5798 "00000000" // /* MW 6 */ + 5799 "10010110" // /* MW 5 */ + 5800 "00011100" // /* MW 4 */ + 5801 "01111110" // /* MW 3 */ + 5802 "00000001" // /* MW 2 */ + 5803 "01110001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 751 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5804 "01001010" // MOVS dc3, p3; MOV r5, dj2; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5805 "00001001" // /* MW 9 */ + 5806 "01000110" // /* MW 8 */ + 5807 "10100010" // /* MW 7 */ + 5808 "11100100" // /* MW 6 */ + 5809 "00000000" // /* MW 5 */ + 5810 "01010101" // /* MW 4 */ + 5811 "01100001" // /* MW 3 */ + 5812 "10010001" // /* MW 2 */ + 5813 "01100001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5814 "01001010" // MOVS dn3, r22; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5815 "00101001" // /* MW 9 */ + 5816 "00101010" // /* MW 8 */ + 5817 "10100001" // /* MW 7 */ + 5818 "11000100" // /* MW 6 */ + 5819 "00000111" // /* MW 5 */ + 5820 "10010010" // /* MW 4 */ + 5821 "01100001" // /* MW 3 */ + 5822 "11000001" // /* MW 2 */ + 5823 "01101010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5824 "01001010" // MOVS dn7, r28; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5825 "00001001" // /* MW 9 */ + 5826 "01101010" // /* MW 8 */ + 5827 "10100011" // /* MW 7 */ + 5828 "11000100" // /* MW 6 */ + 5829 "00000011" // /* MW 5 */ + 5830 "10010010" // /* MW 4 */ + 5831 "01100010" // /* MW 3 */ + 5832 "10000001" // /* MW 2 */ + 5833 "11101011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 749 26 first +.src_ref 2 "conv2d_bf16.h" 1285 32 first +.src_ref 2 "conv2d_bf16.h" 1286 32 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5834 "01100110" // PADDB [p7], m5; MOVS p5, p7; MOV dj2, dj7; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5835 "00101001" // /* MW 11 */ + 5836 "00000110" // /* MW 10 */ + 5837 "10100000" // /* MW 9 */ + 5838 "11100110" // /* MW 8 */ + 5839 "00000000" // /* MW 7 */ + 5840 "10001111" // /* MW 6 */ + 5841 "00100010" // /* MW 5 */ + 5842 "01010111" // /* MW 4 */ + 5843 "01101111" // /* MW 3 */ + 5844 "10010001" // /* MW 2 */ + 5845 "10110011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 751 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5846 "01001010" // MOVS p4, p7; MOV m2, m3; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5847 "00001001" // /* MW 9 */ + 5848 "01000110" // /* MW 8 */ + 5849 "10100010" // /* MW 7 */ + 5850 "11100100" // /* MW 6 */ + 5851 "00000000" // /* MW 5 */ + 5852 "00000110" // /* MW 4 */ + 5853 "01100010" // /* MW 3 */ + 5854 "10010001" // /* MW 2 */ + 5855 "10010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5856 "01100010" // VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5857 "00101001" // /* MW 7 */ + 5858 "00101010" // /* MW 6 */ + 5859 "10100001" // /* MW 5 */ + 5860 "11000110" // /* MW 4 */ + 5861 "00000011" // /* MW 3 */ + 5862 "10010010" // /* MW 2 */ + 5863 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5864 "01100010" // VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5865 "00001001" // /* MW 7 */ + 5866 "01101010" // /* MW 6 */ + 5867 "10100011" // /* MW 5 */ + 5868 "11000110" // /* MW 4 */ + 5869 "00000111" // /* MW 3 */ + 5870 "10010010" // /* MW 2 */ + 5871 "00000001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 + 5872 "11111000" // MOV dj7, dj5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5873 "00000000" // /* MW 3 */ + 5874 "10001011" // /* MW 2 */ + 5875 "00011111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 750 26 first + 5876 "01100010" // MOV m3, r23; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5877 "00101001" // /* MW 7 */ + 5878 "00101010" // /* MW 6 */ + 5879 "10100001" // /* MW 5 */ + 5880 "11100110" // /* MW 4 */ + 5881 "10100000" // /* MW 3 */ + 5882 "00001011" // /* MW 2 */ + 5883 "00000011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 749 26 first + 5884 "01100010" // MOV dj3, r17; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5885 "00101001" // /* MW 7 */ + 5886 "00000110" // /* MW 6 */ + 5887 "10100000" // /* MW 5 */ + 5888 "11100110" // /* MW 4 */ + 5889 "10100000" // /* MW 3 */ + 5890 "10001000" // /* MW 2 */ + 5891 "00000011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.src_ref 2 "conv2d_bf16.h" 1286 32 + 5892 "01001010" // PADDB.3D [p0], d3; MOV m3, dj2; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5893 "00001001" // /* MW 9 */ + 5894 "01101010" // /* MW 8 */ + 5895 "10100011" // /* MW 7 */ + 5896 "11100110" // /* MW 6 */ + 5897 "00000000" // /* MW 5 */ + 5898 "00000101" // /* MW 4 */ + 5899 "00100011" // /* MW 3 */ + 5900 "11110111" // /* MW 2 */ + 5901 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 751 26 first +.src_ref 2 "conv2d_bf16.h" 1286 32 first + 5902 "01100110" // PADDB [p7], m3; MOVS p3, dc3; MOV dj5, r5; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5903 "00001001" // /* MW 11 */ + 5904 "01000110" // /* MW 10 */ + 5905 "10100010" // /* MW 9 */ + 5906 "11100110" // /* MW 8 */ + 5907 "10100000" // /* MW 7 */ + 5908 "10000010" // /* MW 6 */ + 5909 "00100101" // /* MW 5 */ + 5910 "11010111" // /* MW 4 */ + 5911 "01101110" // /* MW 3 */ + 5912 "10001001" // /* MW 2 */ + 5913 "01110001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 + 5914 "00000010" // MOVS dc3, dc5; MOV dj7, dj5 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5915 "01110000" // /* MW 7 */ + 5916 "10000000" // /* MW 6 */ + 5917 "11000101" // /* MW 5 */ + 5918 "00000011" // /* MW 4 */ + 5919 "01100000" // /* MW 3 */ + 5920 "10001001" // /* MW 2 */ + 5921 "01100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 1287 37 + 5922 "00000010" // MOVS dc5, r2; MOV m3, m1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5923 "01110000" // /* MW 7 */ + 5924 "00000000" // /* MW 6 */ + 5925 "10000001" // /* MW 5 */ + 5926 "00000001" // /* MW 4 */ + 5927 "01100000" // /* MW 3 */ + 5928 "01000001" // /* MW 2 */ + 5929 "10100000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first + 5930 "00000010" // VCONV.bf16.fp32 x11, cml1; MOV m1, r29 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5931 "01110000" // /* MW 7 */ + 5932 "01010000" // /* MW 6 */ + 5933 "10000111" // /* MW 5 */ + 5934 "00000000" // /* MW 4 */ + 5935 "11000000" // /* MW 3 */ + 5936 "00010010" // /* MW 2 */ + 5937 "10110010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 + 5938 "00000010" // VCONV.bf16.fp32 x10, cml0; MOV dj5, r30 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5939 "01110000" // /* MW 7 */ + 5940 "10010000" // /* MW 6 */ + 5941 "11000111" // /* MW 5 */ + 5942 "00000010" // /* MW 4 */ + 5943 "11000000" // /* MW 3 */ + 5944 "00000010" // /* MW 2 */ + 5945 "10100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 first +.src_ref 2 "conv2d_bf16.h" 736 8 +.src_ref 2 "conv2d_bf16.h" 1287 37 + 5946 "10111010" // PADDB.3D [p1], d1; MOVS p0, p7; MOV r14, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5947 "01110110" // /* MW 9 */ + 5948 "01100000" // /* MW 8 */ + 5949 "11001000" // /* MW 7 */ + 5950 "00000001" // /* MW 6 */ + 5951 "10010000" // /* MW 5 */ + 5952 "00111011" // /* MW 4 */ + 5953 "01100001" // /* MW 3 */ + 5954 "10010001" // /* MW 2 */ + 5955 "00010011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 2 "conv2d_bf16.h" 1287 37 + 5956 "00000010" // VCONV.bf16.fp32 x6, cmh0; MOV m1, m3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5957 "01110000" // /* MW 7 */ + 5958 "00000000" // /* MW 6 */ + 5959 "10000011" // /* MW 5 */ + 5960 "00000000" // /* MW 4 */ + 5961 "11000000" // /* MW 3 */ + 5962 "00001010" // /* MW 2 */ + 5963 "01100010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 1218 20 first +.src_ref 2 "conv2d_bf16.h" 1287 37 first + 5964 "00110110" // PADDB [p0], m1; VCONV.bf16.fp32 x5, cml2; JZ r18, #6096 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6096 delay_slots=5 */ + 5965 "01100000" // /* MW 11 */ + 5966 "00000000" // /* MW 10 */ + 5967 "00000000" // /* MW 9 */ + 5968 "11111010" // /* MW 8 */ + 5969 "00000010" // /* MW 7 */ + 5970 "00100100" // /* MW 6 */ + 5971 "00100000" // /* MW 5 */ + 5972 "01010111" // /* MW 4 */ + 5973 "11000000" // /* MW 3 */ + 5974 "00100010" // /* MW 2 */ + 5975 "01010010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 2 "conv2d_bf16.h" 738 8 +.delay_slot + 5976 "00000010" // VCONV.bf16.fp32 x7, cmh1; MOV r5, p1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5977 "01110000" // /* MW 7 */ + 5978 "01100000" // /* MW 6 */ + 5979 "10101001" // /* MW 5 */ + 5980 "00000000" // /* MW 4 */ + 5981 "11000000" // /* MW 3 */ + 5982 "00011010" // /* MW 2 */ + 5983 "01110010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 5984 "00000010" // VCONV.bf16.fp32 x8, cml3; MOV dn7, dc7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5985 "01110000" // /* MW 7 */ + 5986 "11000000" // /* MW 6 */ + 5987 "10100111" // /* MW 5 */ + 5988 "00000011" // /* MW 4 */ + 5989 "11000000" // /* MW 3 */ + 5990 "00110010" // /* MW 2 */ + 5991 "10000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 5992 "10111010" // PADDB [p5], m1; VCONV.bf16.fp32 x1, cmh3; MOV p1, p5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5993 "01110110" // /* MW 9 */ + 5994 "01100000" // /* MW 8 */ + 5995 "10110101" // /* MW 7 */ + 5996 "00000000" // /* MW 6 */ + 5997 "10010000" // /* MW 5 */ + 5998 "00101011" // /* MW 4 */ + 5999 "11000101" // /* MW 3 */ + 6000 "00111010" // /* MW 2 */ + 6001 "00010010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 2 "conv2d_bf16.h" 1286 32 +.delay_slot + 6002 "00000010" // VCONV.bf16.fp32 x2, cmh2; MOV dj5, dj2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6003 "01110000" // /* MW 7 */ + 6004 "10000000" // /* MW 6 */ + 6005 "11000010" // /* MW 5 */ + 6006 "00000010" // /* MW 4 */ + 6007 "11000000" // /* MW 3 */ + 6008 "00101010" // /* MW 2 */ + 6009 "00100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 1187 40 +.delay_slot + 6010 "00000010" // MOVS dc7, dc3; MOV r2, dc5 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6011 "01110000" // /* MW 7 */ + 6012 "11000000" // /* MW 6 */ + 6013 "01001101" // /* MW 5 */ + 6014 "00000000" // /* MW 4 */ + 6015 "01100000" // /* MW 3 */ + 6016 "10001001" // /* MW 2 */ + 6017 "11100001" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first + 6018 "11111000" // VMAX_LT.bf16 x11, r16, x11, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6019 "11101100" // /* MW 3 */ + 6020 "11011100" // /* MW 2 */ + 6021 "00011101" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 + 6022 "11111000" // VMAX_LT.bf16 x7, r16, x7, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6023 "11101100" // /* MW 3 */ + 6024 "10111100" // /* MW 2 */ + 6025 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.src_ref 4 "max_min.hpp" 20 104 + 6026 "00000010" // VST x11, [p1, dj7]; VMAX_LT.bf16 x10, r16, x10, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6027 "01110000" // /* MW 7 */ + 6028 "01110110" // /* MW 6 */ + 6029 "10101010" // /* MW 5 */ + 6030 "00000010" // /* MW 4 */ + 6031 "01100000" // /* MW 3 */ + 6032 "01011010" // /* MW 2 */ + 6033 "00111100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first + 6034 "00000010" // VST x7, [p5, #64]; VMAX_LT.bf16 x7, r16, x6, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6035 "01110000" // /* MW 7 */ + 6036 "01110110" // /* MW 6 */ + 6037 "11011010" // /* MW 5 */ + 6038 "00000001" // /* MW 4 */ + 6039 "01100000" // /* MW 3 */ + 6040 "10111010" // /* MW 2 */ + 6041 "10100010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first + 6042 "00111010" // VST x10, [p1]; J #6128 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=6128 delay_slots=5 */ + 6043 "00100001" // /* MW 9 */ + 6044 "00000000" // /* MW 8 */ + 6045 "00000000" // /* MW 7 */ + 6046 "11111110" // /* MW 6 */ + 6047 "00000010" // /* MW 5 */ + 6048 "00000000" // /* MW 4 */ + 6049 "01100000" // /* MW 3 */ + 6050 "11010010" // /* MW 2 */ + 6051 "00100000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 6052 "00000010" // VST x7, [p1, #64]; VMAX_LT.bf16 x10, r16, x8, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6053 "01110000" // /* MW 7 */ + 6054 "01110110" // /* MW 6 */ + 6055 "10100010" // /* MW 5 */ + 6056 "00000010" // /* MW 4 */ + 6057 "01100000" // /* MW 3 */ + 6058 "10111010" // /* MW 2 */ + 6059 "00100010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 +.delay_slot + 6060 "11111000" // VMAX_LT.bf16 x7, r16, x1, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6061 "11101100" // /* MW 3 */ + 6062 "10001100" // /* MW 2 */ + 6063 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.src_ref 4 "max_min.hpp" 20 104 +.delay_slot + 6064 "00000010" // VST x10, [p0]; VMAX_LT.bf16 x10, r16, x5, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6065 "01110000" // /* MW 7 */ + 6066 "01110110" // /* MW 6 */ + 6067 "10010110" // /* MW 5 */ + 6068 "00000010" // /* MW 4 */ + 6069 "01100000" // /* MW 3 */ + 6070 "11010010" // /* MW 2 */ + 6071 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 6072 "00000010" // VST x7, [p0, #64]; VMAX_LT.bf16 x2, r16, x2, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6073 "01110000" // /* MW 7 */ + 6074 "01110110" // /* MW 6 */ + 6075 "10001010" // /* MW 5 */ + 6076 "00000000" // /* MW 4 */ + 6077 "01100000" // /* MW 3 */ + 6078 "10111010" // /* MW 2 */ + 6079 "00000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.delay_slot + 6080 "11100001" // NOPA; NOPB; VST x10, [p4, dj5]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6081 "00000000" // /* MW 15 */ + 6082 "00000000" // /* MW 14 */ + 6083 "01111000" // /* MW 13 */ + 6084 "10100101" // /* MW 12 */ + 6085 "00000001" // /* MW 11 */ + 6086 "00000000" // /* MW 10 */ + 6087 "00000000" // /* MW 9 */ + 6088 "00000000" // /* MW 8 */ + 6089 "10010011" // /* MW 7 */ + 6090 "10100010" // /* MW 6 */ + 6091 "00100100" // /* MW 5 */ + 6092 "00000000" // /* MW 4 */ + 6093 "11110000" // /* MW 3 */ + 6094 "00101100" // /* MW 2 */ + 6095 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1632 +.src_ref 4 "vector.hpp" 1152 43 + 6096 "00011000" // VST.CONV.bf16.fp32 cml1, [p1, dj7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6097 "10100011" // /* MW 3 */ + 6098 "11100000" // /* MW 2 */ + 6099 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6100 "00011000" // VST.CONV.bf16.fp32 cmh1, [p5, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6101 "11100011" // /* MW 3 */ + 6102 "00010100" // /* MW 2 */ + 6103 "00001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6104 "00011000" // VST.CONV.bf16.fp32 cml0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6105 "00100011" // /* MW 3 */ + 6106 "00000100" // /* MW 2 */ + 6107 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6108 "00011000" // VST.CONV.bf16.fp32 cmh0, [p1, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6109 "01100011" // /* MW 3 */ + 6110 "00010100" // /* MW 2 */ + 6111 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6112 "00011000" // VST x8, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6113 "00010011" // /* MW 3 */ + 6114 "00000110" // /* MW 2 */ + 6115 "00001000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6116 "00011000" // VST.CONV.bf16.fp32 cmh3, [p0, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6117 "11100011" // /* MW 3 */ + 6118 "00010101" // /* MW 2 */ + 6119 "00001000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6120 "00000010" // VST.CONV.bf16.fp32 cml2, [p4, dj5]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6121 "01110000" // /* MW 7 */ + 6122 "10100101" // /* MW 6 */ + 6123 "00000001" // /* MW 5 */ + 6124 "00000000" // /* MW 4 */ + 6125 "01100000" // /* MW 3 */ + 6126 "00100100" // /* MW 2 */ + 6127 "10010100" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1664 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 1143 12 first + 6128 "00110110" // PADDB [p7], m5; VST x2, [p7, #64]; JNZD r3, r3, p2; MOV dj2, #0 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 6129 "01011000" // /* MW 11 */ + 6130 "00000000" // /* MW 10 */ + 6131 "01000000" // /* MW 9 */ + 6132 "00000001" // /* MW 8 */ + 6133 "00110101" // /* MW 7 */ + 6134 "00000110" // /* MW 6 */ + 6135 "00100000" // /* MW 5 */ + 6136 "01010111" // /* MW 4 */ + 6137 "01101111" // /* MW 3 */ + 6138 "10010010" // /* MW 2 */ + 6139 "11100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.delay_slot + 6140 "11111000" // MOV dn3, dn2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6141 "10000000" // /* MW 3 */ + 6142 "01000100" // /* MW 2 */ + 6143 "00011011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.delay_slot + 6144 "11111000" // MOV dn2, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6145 "10100000" // /* MW 3 */ + 6146 "01001001" // /* MW 2 */ + 6147 "00011010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.delay_slot + 6148 "11110100" // PADDB.3D [p7], d2; MOV dj2, dj7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6149 "00000001" // /* MW 5 */ + 6150 "00011110" // /* MW 4 */ + 6151 "00000101" // /* MW 3 */ + 6152 "01110010" // /* MW 2 */ + 6153 "11101011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.delay_slot + 6154 "11111000" // MOV dn2, dn7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6155 "10000000" // /* MW 3 */ + 6156 "01001110" // /* MW 2 */ + 6157 "00011010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6159 "00000000" // /* MW 1 */ +.loop_nesting 0 + 6160 "10000100" // J #6832 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6832 delay_slots=5 */ + 6161 "00000000" // /* MW 5 */ + 6162 "00000000" // /* MW 4 */ + 6163 "01011000" // /* MW 3 */ + 6164 "00001101" // /* MW 2 */ + 6165 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6167 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6168 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6169 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6171 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6173 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6175 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1712 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 792 8 +.src_ref 2 "conv2d_bf16.h" 1364 80 +.src_ref 2 "conv2d_bf16.h" 1364 80 + 6176 "01110110" // LDA r31, [sp, #-40]; MOVS dc2, p3; MOVX r14, #136; MOV p1, r14 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6177 "01111000" // /* MW 11 */ + 6178 "10010000" // /* MW 10 */ + 6179 "10110011" // /* MW 9 */ + 6180 "00001000" // /* MW 8 */ + 6181 "11100001" // /* MW 7 */ + 6182 "00000100" // /* MW 6 */ + 6183 "10001011" // /* MW 5 */ + 6184 "00001100" // /* MW 4 */ + 6185 "00100010" // /* MW 3 */ + 6186 "01111110" // /* MW 2 */ + 6187 "11111011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 1369 80 + 6188 "01110110" // MOVA m4, #60; MOVS dn2, r22; MOVX crRnd, r13; MOV dc6, dn2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6189 "01111000" // /* MW 11 */ + 6190 "01000000" // /* MW 10 */ + 6191 "01100010" // /* MW 9 */ + 6192 "00000011" // /* MW 8 */ + 6193 "11010100" // /* MW 7 */ + 6194 "00011011" // /* MW 6 */ + 6195 "00001011" // /* MW 5 */ + 6196 "01010110" // /* MW 4 */ + 6197 "10000010" // /* MW 3 */ + 6198 "10010000" // /* MW 2 */ + 6199 "00000111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 807 26 +.src_ref 2 "conv2d_bf16.h" 808 26 +.src_ref 2 "conv2d_bf16.h" 809 26 +.src_ref 2 "conv2d_bf16.h" 810 26 +.src_ref 2 "conv2d_bf16.h" 1436 26 +.src_ref 2 "conv2d_bf16.h" 1437 26 +.src_ref 2 "conv2d_bf16.h" 1438 26 +.src_ref 2 "conv2d_bf16.h" 1439 26 + 6200 "10111010" // MOVA r20, #60; MOVX r19, #780; MOV m2, r23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6201 "01111000" // /* MW 9 */ + 6202 "11010000" // /* MW 8 */ + 6203 "00000101" // /* MW 7 */ + 6204 "10001001" // /* MW 6 */ + 6205 "00110001" // /* MW 5 */ + 6206 "00011001" // /* MW 4 */ + 6207 "00000000" // /* MW 3 */ + 6208 "10010100" // /* MW 2 */ + 6209 "00000111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 802 83 +.src_ref 2 "conv2d_bf16.h" 1428 39 + 6210 "01110110" // MOVA m6, #-132; MOVS dn6, r28; MOVX r18, #6; MOV dj5, r30 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6211 "01111000" // /* MW 11 */ + 6212 "10010000" // /* MW 10 */ + 6213 "11000111" // /* MW 9 */ + 6214 "11001010" // /* MW 8 */ + 6215 "00100000" // /* MW 7 */ + 6216 "00000001" // /* MW 6 */ + 6217 "00001011" // /* MW 5 */ + 6218 "01011100" // /* MW 4 */ + 6219 "10000110" // /* MW 3 */ + 6220 "10011000" // /* MW 2 */ + 6221 "11101111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 792 8 +.src_ref 2 "conv2d_bf16.h" 794 8 + 6222 "01110110" // LDA p0, [sp, #-44]; MOVS dc5, r2; MOVX r25, #0; MOV m1, r29 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6223 "01111000" // /* MW 11 */ + 6224 "01010000" // /* MW 10 */ + 6225 "10000111" // /* MW 9 */ + 6226 "00001000" // /* MW 8 */ + 6227 "10010000" // /* MW 7 */ + 6228 "00000001" // /* MW 6 */ + 6229 "00001011" // /* MW 5 */ + 6230 "00000010" // /* MW 4 */ + 6231 "00100101" // /* MW 3 */ + 6232 "10000011" // /* MW 2 */ + 6233 "11111010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 794 8 +.src_ref 2 "conv2d_bf16.h" 1455 20 + 6234 "10111010" // LDA r21, [sp, #-36]; MOVX r24, #0; MOV dj6, r21 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6235 "01111000" // /* MW 9 */ + 6236 "01010000" // /* MW 8 */ + 6237 "01000101" // /* MW 7 */ + 6238 "00001011" // /* MW 6 */ + 6239 "10000000" // /* MW 5 */ + 6240 "00000001" // /* MW 4 */ + 6241 "00100000" // /* MW 3 */ + 6242 "11010110" // /* MW 2 */ + 6243 "11111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1337 12 + 6244 "10111010" // LDA r13, [sp, #-32]; MOVXM p2, #6320 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6245 "00010000" // /* MW 9 */ + 6246 "01011000" // /* MW 8 */ + 6247 "00110100" // /* MW 7 */ + 6248 "00000101" // /* MW 6 */ + 6249 "00000000" // /* MW 5 */ + 6250 "00000000" // /* MW 4 */ + 6251 "00100000" // /* MW 3 */ + 6252 "00110110" // /* MW 2 */ + 6253 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 80 first +.src_ref 2 "conv2d_bf16.h" 1873 + 6254 "10010100" // LDA lr, [sp, #-28]; ADD.NC p3, r31, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6255 "01110010" // /* MW 5 */ + 6256 "11011111" // /* MW 4 */ + 6257 "00100110" // /* MW 3 */ + 6258 "10000111" // /* MW 2 */ + 6259 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 80 + 6260 "10011000" // LDA dj3, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6261 "11000110" // /* MW 3 */ + 6262 "00011101" // /* MW 2 */ + 6263 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1369 80 first + 6264 "10011000" // LDA m4, [p3], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6265 "00000110" // /* MW 3 */ + 6266 "10001010" // /* MW 2 */ + 6267 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 799 87 first + 6268 "10011000" // LDA m5, [p3], #-28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6269 "10000110" // /* MW 3 */ + 6270 "10011110" // /* MW 2 */ + 6271 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 801 83 first + 6272 "10011000" // LDA r22, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6273 "11010110" // /* MW 3 */ + 6274 "00011110" // /* MW 2 */ + 6275 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 802 83 first + 6276 "10011000" // LDA r23, [p3], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6277 "11110110" // /* MW 3 */ + 6278 "11001010" // /* MW 2 */ + 6279 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1337 66 first + 6280 "10011000" // LDA r29, [p3, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6281 "10110110" // /* MW 3 */ + 6282 "00010111" // /* MW 2 */ + 6283 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1443 71 first + 6284 "10011000" // LDA r28, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6285 "10010110" // /* MW 3 */ + 6286 "00000111" // /* MW 2 */ + 6287 "00000011" // /* MW 1 */ + 6288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6289 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 1369 89 + 6290 "11111000" // MOV r30, m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6291 "00000000" // /* MW 3 */ + 6292 "10011000" // /* MW 2 */ + 6293 "00011111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 89 +.src_ref 2 "conv2d_bf16.h" 1518 37 + 6294 "11111000" // MOV m6, dj3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6295 "00000000" // /* MW 3 */ + 6296 "00000111" // /* MW 2 */ + 6297 "00011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 89 + 6298 "11111000" // MOV r31, m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6299 "00000000" // /* MW 3 */ + 6300 "11011100" // /* MW 2 */ + 6301 "00011111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 89 first + 6302 "00011000" // ADD.NC m3, r31, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6303 "11100000" // /* MW 3 */ + 6304 "00001111" // /* MW 2 */ + 6305 "00011011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1369 89 first + 6306 "00100100" // ADD r29, r29, #-1; ADD.NC m7, r30, #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6307 "11000000" // /* MW 5 */ + 6308 "00011110" // /* MW 4 */ + 6309 "11101110" // /* MW 3 */ + 6310 "01111111" // /* MW 2 */ + 6311 "11101111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 + 6312 "00000010" // NOPS; MOV dj7, r30 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6313 "01110000" // /* MW 7 */ + 6314 "10010000" // /* MW 6 */ + 6315 "11000111" // /* MW 5 */ + 6316 "00000011" // /* MW 4 */ + 6317 "01100000" // /* MW 3 */ + 6318 "00101011" // /* MW 2 */ + 6319 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1856 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 792 8 first +.src_ref 2 "conv2d_bf16.h" 1362 31 first +.src_ref 2 "conv2d_bf16.h" 1429 50 +.src_ref 2 "conv2d_bf16.h" 1443 16 first +.loop_nesting 1 + 6320 "01111110" // VLDA.CONV.fp32.bf16 cml0, [p6], #64;VLDB.FILL.512 [p1, lf1, r25];MOVS p3, r12; MOVXM ls, #6496 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 6321 "01100000" // /* MW 13 */ + 6322 "10000001" // /* MW 12 */ + 6323 "01110001" // /* MW 11 */ + 6324 "00000010" // /* MW 10 */ + 6325 "10010110" // /* MW 9 */ + 6326 "10001111" // /* MW 8 */ + 6327 "00000000" // /* MW 7 */ + 6328 "00000000" // /* MW 6 */ + 6329 "00101000" // /* MW 5 */ + 6330 "00101000" // /* MW 4 */ + 6331 "01111010" // /* MW 3 */ + 6332 "10000101" // /* MW 2 */ + 6333 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 793 8 first +.src_ref 2 "conv2d_bf16.h" 1364 31 first +.src_ref 2 "conv2d_bf16.h" 1443 16 + 6334 "10110110" // VLDA.CONV.fp32.bf16 cmh0, [p6], m3;VLDB.FILL.512 [p1, lf1, r25]; MOVXM le, #6544 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6335 "00010000" // /* MW 11 */ + 6336 "11001000" // /* MW 10 */ + 6337 "10111100" // /* MW 9 */ + 6338 "00000101" // /* MW 8 */ + 6339 "00000000" // /* MW 7 */ + 6340 "00000000" // /* MW 6 */ + 6341 "00101000" // /* MW 5 */ + 6342 "00101000" // /* MW 4 */ + 6343 "01111010" // /* MW 3 */ + 6344 "00001101" // /* MW 2 */ + 6345 "11001101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 794 8 first +.src_ref 2 "conv2d_bf16.h" 795 30 first +.src_ref 2 "conv2d_bf16.h" 1428 39 first +.src_ref 2 "conv2d_bf16.h" 1443 16 first + 6346 "10110110" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex10, [p1, lf1, r25]; LSHL r30, r2, r18; ADD.NC lc, r28, #-3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6347 "01001000" // /* MW 11 */ + 6348 "00111111" // /* MW 10 */ + 6349 "10111111" // /* MW 9 */ + 6350 "01101110" // /* MW 8 */ + 6351 "11101001" // /* MW 7 */ + 6352 "00000101" // /* MW 6 */ + 6353 "00101000" // /* MW 5 */ + 6354 "00000101" // /* MW 4 */ + 6355 "01110110" // /* MW 3 */ + 6356 "10000001" // /* MW 2 */ + 6357 "00000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 796 30 first +.src_ref 2 "conv2d_bf16.h" 799 30 first +.src_ref 2 "conv2d_bf16.h" 1429 50 + 6358 "10111010" // VLDA.POP.576 ex11, [p0, lf0, r24, m5];VLDB.POP.576 ex4, [p1, lf1, r25]; MOV dj2, r30 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6359 "01111110" // /* MW 9 */ + 6360 "10010000" // /* MW 8 */ + 6361 "01000111" // /* MW 7 */ + 6362 "00000001" // /* MW 6 */ + 6363 "00010100" // /* MW 5 */ + 6364 "00000001" // /* MW 4 */ + 6365 "01110011" // /* MW 3 */ + 6366 "01011001" // /* MW 2 */ + 6367 "01010101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 797 30 first +.src_ref 2 "conv2d_bf16.h" 1367 31 first + 6368 "00111100" // VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.POP.576 ex2, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6369 "00101000" // /* MW 5 */ + 6370 "00000001" // /* MW 4 */ + 6371 "01110110" // /* MW 3 */ + 6372 "10010101" // /* MW 2 */ + 6373 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 1369 31 first + 6374 "00111100" // VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.POP.576.3D ex3, [p1, lf1, r25, d0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6375 "10101000" // /* MW 5 */ + 6376 "00100001" // /* MW 4 */ + 6377 "01111010" // /* MW 3 */ + 6378 "00011101" // /* MW 2 */ + 6379 "11011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 792 8 first +.src_ref 2 "conv2d_bf16.h" 1372 31 first + 6380 "00111100" // VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.FILL.512 [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6381 "00101000" // /* MW 5 */ + 6382 "00101000" // /* MW 4 */ + 6383 "01111010" // /* MW 3 */ + 6384 "10100101" // /* MW 2 */ + 6385 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 793 8 first +.src_ref 2 "conv2d_bf16.h" 1374 31 first + 6386 "00111100" // VLDA.CONV.fp32.bf16 cmh2, [p6], m3;VLDB.FILL.512 [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6387 "00101000" // /* MW 5 */ + 6388 "00101000" // /* MW 4 */ + 6389 "01111010" // /* MW 3 */ + 6390 "00101101" // /* MW 2 */ + 6391 "11001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 795 30 first +.src_ref 2 "conv2d_bf16.h" 1377 31 first + 6392 "00111100" // VLDA.CONV.fp32.bf16 cml3, [p6], #64;VLDB.POP.576 ex1, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6393 "10101000" // /* MW 5 */ + 6394 "00000000" // /* MW 4 */ + 6395 "01110110" // /* MW 3 */ + 6396 "10110101" // /* MW 2 */ + 6397 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 796 30 first +.src_ref 2 "conv2d_bf16.h" 1379 31 first + 6398 "00111100" // VLDA.CONV.fp32.bf16 cmh3, [p6], m7;VLDB.POP.576 ex6, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6399 "00101000" // /* MW 5 */ + 6400 "00000011" // /* MW 4 */ + 6401 "01110110" // /* MW 3 */ + 6402 "00111101" // /* MW 2 */ + 6403 "11011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 797 30 first +.src_ref 2 "conv2d_bf16.h" 1429 50 first + 6404 "00111100" // VLDA.CONV.fp32.bf16 cml4, [p3, dj2];VLDB.POP.576 ex7, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6405 "10101000" // /* MW 5 */ + 6406 "00000011" // /* MW 4 */ + 6407 "01110110" // /* MW 3 */ + 6408 "01000101" // /* MW 2 */ + 6409 "01101000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 801 30 first +.src_ref 2 "conv2d_bf16.h" 1429 50 + 6410 "10111010" // VLDA.CONV.fp32.bf16 cmh4, [p3, dj2];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VSHUFFLE ex5, ex10, ex4, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6411 "11101110" // /* MW 9 */ + 6412 "00101101" // /* MW 8 */ + 6413 "01101001" // /* MW 7 */ + 6414 "00000001" // /* MW 6 */ + 6415 "00010100" // /* MW 5 */ + 6416 "00010010" // /* MW 4 */ + 6417 "01110101" // /* MW 3 */ + 6418 "01001101" // /* MW 2 */ + 6419 "01101000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 792 8 first +.src_ref 2 "conv2d_bf16.h" 794 8 first +.src_ref 2 "conv2d_bf16.h" 802 30 first + 6420 "10111010" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex10, ex10, ex4, r23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6421 "11101110" // /* MW 9 */ + 6422 "00101111" // /* MW 8 */ + 6423 "10101001" // /* MW 7 */ + 6424 "00000010" // /* MW 6 */ + 6425 "00010100" // /* MW 5 */ + 6426 "00010100" // /* MW 4 */ + 6427 "01110101" // /* MW 3 */ + 6428 "10000001" // /* MW 2 */ + 6429 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 793 8 first +.src_ref 2 "conv2d_bf16.h" 799 30 first +.src_ref 2 "conv2d_bf16.h" 803 30 first +.src_ref 2 "conv2d_bf16.h" 807 26 first + 6430 "01100110" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex2, ex3, r22; VMAC.f dm0, dm0, ex5, ex11, r9 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6431 "01101001" // /* MW 11 */ + 6432 "00001011" // /* MW 10 */ + 6433 "01001000" // /* MW 9 */ + 6434 "11000010" // /* MW 8 */ + 6435 "11011011" // /* MW 7 */ + 6436 "00010001" // /* MW 6 */ + 6437 "00101010" // /* MW 5 */ + 6438 "00101000" // /* MW 4 */ + 6439 "01111010" // /* MW 3 */ + 6440 "00000001" // /* MW 2 */ + 6441 "01010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 794 8 first +.src_ref 2 "conv2d_bf16.h" 804 30 first +.src_ref 2 "conv2d_bf16.h" 808 26 first + 6442 "01001010" // VLDA.FILL.512 [p0, lf0, r24]; VSHUFFLE ex10, ex2, ex3, r23; VMAC.f dm1, dm1, ex10, ex11, r9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6443 "01101001" // /* MW 9 */ + 6444 "00110101" // /* MW 8 */ + 6445 "01001001" // /* MW 7 */ + 6446 "11000010" // /* MW 6 */ + 6447 "11011111" // /* MW 5 */ + 6448 "00010001" // /* MW 4 */ + 6449 "01110101" // /* MW 3 */ + 6450 "10000001" // /* MW 2 */ + 6451 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 809 26 first + 6452 "01001000" // VMAC.f dm2, dm2, ex4, ex11, r9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6453 "01101001" // /* MW 3 */ + 6454 "01001001" // /* MW 2 */ + 6455 "01001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 810 26 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 6456 "01001000" // VMAC.f dm3, dm3, ex10, ex11, r9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6457 "01101001" // /* MW 3 */ + 6458 "01110101" // /* MW 2 */ + 6459 "01001011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 795 30 first +.src_ref 2 "conv2d_bf16.h" 802 30 first +.src_ref 2 "conv2d_bf16.h" 1437 26 first +.aggressive_scheduled_block_id 7 +.noswbrkpt + 6460 "01001010" // VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex5, ex1, ex6, r23; VADD.f dm1, dm4, dm1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6461 "00111101" // /* MW 9 */ + 6462 "10000100" // /* MW 8 */ + 6463 "10100001" // /* MW 7 */ + 6464 "11000110" // /* MW 6 */ + 6465 "01011111" // /* MW 5 */ + 6466 "10001011" // /* MW 4 */ + 6467 "10101010" // /* MW 3 */ + 6468 "00000000" // /* MW 2 */ + 6469 "00000110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 796 30 first +.src_ref 2 "conv2d_bf16.h" 1436 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6470 "01100010" // VLDB.POP.576 ex6, [p1, lf1, r25]; VADD.f dm0, dm4, dm0, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6471 "00111101" // /* MW 7 */ + 6472 "10000000" // /* MW 6 */ + 6473 "10100000" // /* MW 5 */ + 6474 "00000000" // /* MW 4 */ + 6475 "10010100" // /* MW 3 */ + 6476 "00000001" // /* MW 2 */ + 6477 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 797 30 first +.src_ref 2 "conv2d_bf16.h" 1438 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6478 "01100010" // VLDB.POP.576 ex7, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6479 "00111101" // /* MW 7 */ + 6480 "10001000" // /* MW 6 */ + 6481 "10100010" // /* MW 5 */ + 6482 "00000000" // /* MW 4 */ + 6483 "11010100" // /* MW 3 */ + 6484 "00000001" // /* MW 2 */ + 6485 "00000011" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 799 30 first +.src_ref 2 "conv2d_bf16.h" 1439 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6486 "01001010" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VADD.f dm3, dm4, dm3, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6487 "00111101" // /* MW 9 */ + 6488 "10001100" // /* MW 8 */ + 6489 "10100011" // /* MW 7 */ + 6490 "00011101" // /* MW 6 */ + 6491 "00010100" // /* MW 5 */ + 6492 "00010010" // /* MW 4 */ + 6493 "01110101" // /* MW 3 */ + 6494 "00000001" // /* MW 2 */ + 6495 "01010101" // /* MW 1 */ +.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2032 +.src_ref 2 "conv2d_bf16.h" 792 8 first +.src_ref 2 "conv2d_bf16.h" 801 30 first +.begin_of_loop +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt +.loop_nesting 2 + 6496 "10110100" // VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex2, ex1, ex6, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6497 "10110111" // /* MW 5 */ + 6498 "00010110" // /* MW 4 */ + 6499 "10000010" // /* MW 3 */ + 6500 "10000010" // /* MW 2 */ + 6501 "10100010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 793 8 first +.src_ref 2 "conv2d_bf16.h" 804 30 first +.src_ref 2 "conv2d_bf16.h" 808 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6502 "01001010" // VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6503 "00001001" // /* MW 9 */ + 6504 "00101010" // /* MW 8 */ + 6505 "10011001" // /* MW 7 */ + 6506 "11000110" // /* MW 6 */ + 6507 "01011111" // /* MW 5 */ + 6508 "00111100" // /* MW 4 */ + 6509 "00101010" // /* MW 3 */ + 6510 "00101000" // /* MW 2 */ + 6511 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 795 30 first +.src_ref 2 "conv2d_bf16.h" 803 30 first +.src_ref 2 "conv2d_bf16.h" 807 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6512 "01001010" // VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6513 "00001001" // /* MW 9 */ + 6514 "00000100" // /* MW 8 */ + 6515 "10011000" // /* MW 7 */ + 6516 "11000110" // /* MW 6 */ + 6517 "01011011" // /* MW 5 */ + 6518 "10111100" // /* MW 4 */ + 6519 "10101001" // /* MW 3 */ + 6520 "00000000" // /* MW 2 */ + 6521 "00000110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 796 30 first +.src_ref 2 "conv2d_bf16.h" 810 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6522 "01100010" // VLDB.POP.576 ex6, [p1, lf1, r25]; VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6523 "00001001" // /* MW 7 */ + 6524 "01101000" // /* MW 6 */ + 6525 "10011011" // /* MW 5 */ + 6526 "00000000" // /* MW 4 */ + 6527 "10010100" // /* MW 3 */ + 6528 "00000001" // /* MW 2 */ + 6529 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 794 8 first +.src_ref 2 "conv2d_bf16.h" 797 30 first +.src_ref 2 "conv2d_bf16.h" 809 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6530 "01101110" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex7, [p1, lf1, r25];NOPS; NOPX; VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 6531 "00001001" // /* MW 13 */ + 6532 "01000110" // /* MW 12 */ + 6533 "10011010" // /* MW 11 */ + 6534 "01101100" // /* MW 10 */ + 6535 "00000101" // /* MW 9 */ + 6536 "00000000" // /* MW 8 */ + 6537 "00000000" // /* MW 7 */ + 6538 "00000000" // /* MW 6 */ + 6539 "10101000" // /* MW 5 */ + 6540 "00000011" // /* MW 4 */ + 6541 "01110110" // /* MW 3 */ + 6542 "10000001" // /* MW 2 */ + 6543 "00000010" // /* MW 1 */ +.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2080 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 799 30 first +.src_ref 2 "conv2d_bf16.h" 802 30 first +.end_of_loop +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6544 "11100001" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0];NOPS; NOPX; VSHUFFLE ex5, ex1, ex6, r23; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6545 "00000000" // /* MW 15 */ + 6546 "00000000" // /* MW 14 */ + 6547 "11101000" // /* MW 13 */ + 6548 "10101111" // /* MW 12 */ + 6549 "01000101" // /* MW 11 */ + 6550 "00000001" // /* MW 10 */ + 6551 "00000000" // /* MW 9 */ + 6552 "00000000" // /* MW 8 */ + 6553 "01011011" // /* MW 7 */ + 6554 "00000001" // /* MW 6 */ + 6555 "00101000" // /* MW 5 */ + 6556 "00100100" // /* MW 4 */ + 6557 "01111010" // /* MW 3 */ + 6558 "00000001" // /* MW 2 */ + 6559 "01010101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 first +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 801 30 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 6560 "11110110" // PADDA.3D [p0], d1; PADDB [p7], m6; MOVS p5, p7; VSHUFFLE ex2, ex1, ex6, r22 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6561 "11100000" // /* MW 11 */ + 6562 "10101101" // /* MW 10 */ + 6563 "10000101" // /* MW 9 */ + 6564 "00000000" // /* MW 8 */ + 6565 "10001011" // /* MW 7 */ + 6566 "10011100" // /* MW 6 */ + 6567 "00100101" // /* MW 5 */ + 6568 "10010111" // /* MW 4 */ + 6569 "11111111" // /* MW 3 */ + 6570 "00001100" // /* MW 2 */ + 6571 "00000111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 804 30 first +.src_ref 2 "conv2d_bf16.h" 808 26 first +.src_ref 2 "conv2d_bf16.h" 1517 32 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6572 "01100110" // PADDB [p7], m4; MOVS p4, p7; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6573 "00001001" // /* MW 11 */ + 6574 "00101010" // /* MW 10 */ + 6575 "10011001" // /* MW 9 */ + 6576 "11000110" // /* MW 8 */ + 6577 "01011111" // /* MW 7 */ + 6578 "00111100" // /* MW 6 */ + 6579 "00100010" // /* MW 5 */ + 6580 "00010111" // /* MW 4 */ + 6581 "01101111" // /* MW 3 */ + 6582 "10010001" // /* MW 2 */ + 6583 "10010011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 803 30 first +.src_ref 2 "conv2d_bf16.h" 807 26 first +.src_ref 2 "conv2d_bf16.h" 1518 37 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6584 "01100110" // PADDB [p7], m6; MOVS p3, p7; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6585 "00001001" // /* MW 11 */ + 6586 "00000100" // /* MW 10 */ + 6587 "10011000" // /* MW 9 */ + 6588 "11000110" // /* MW 8 */ + 6589 "01011011" // /* MW 7 */ + 6590 "10111100" // /* MW 6 */ + 6591 "00100001" // /* MW 5 */ + 6592 "10010111" // /* MW 4 */ + 6593 "01101111" // /* MW 3 */ + 6594 "10010001" // /* MW 2 */ + 6595 "01110011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 810 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6596 "01100010" // MOV dj2, r17; VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6597 "00001001" // /* MW 7 */ + 6598 "01101000" // /* MW 6 */ + 6599 "10011011" // /* MW 5 */ + 6600 "11100110" // /* MW 4 */ + 6601 "10100000" // /* MW 3 */ + 6602 "10001000" // /* MW 2 */ + 6603 "00000010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 first +.src_ref 2 "conv2d_bf16.h" 809 26 first +.src_ref 2 "conv2d_bf16.h" 1428 39 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6604 "01001010" // PADDB.3D [p1], d2; MOV r2, dc5; VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6605 "00001001" // /* MW 9 */ + 6606 "01000110" // /* MW 8 */ + 6607 "10011010" // /* MW 7 */ + 6608 "11100110" // /* MW 6 */ + 6609 "10000000" // /* MW 5 */ + 6610 "10011011" // /* MW 4 */ + 6611 "00100000" // /* MW 3 */ + 6612 "10110111" // /* MW 2 */ + 6613 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 801 30 first + 6614 "11011000" // VSHUFFLE ex2, ex1, ex6, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6615 "01011011" // /* MW 3 */ + 6616 "00001011" // /* MW 2 */ + 6617 "00011001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 802 30 first + 6618 "11011000" // VSHUFFLE ex5, ex1, ex6, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6619 "01011111" // /* MW 3 */ + 6620 "10001011" // /* MW 2 */ + 6621 "00011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 803 30 first +.src_ref 2 "conv2d_bf16.h" 807 26 first + 6622 "01100010" // VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6623 "00001001" // /* MW 7 */ + 6624 "00000100" // /* MW 6 */ + 6625 "10011000" // /* MW 5 */ + 6626 "11000110" // /* MW 4 */ + 6627 "01011011" // /* MW 3 */ + 6628 "10111100" // /* MW 2 */ + 6629 "00000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 804 30 first +.src_ref 2 "conv2d_bf16.h" 808 26 first + 6630 "01100010" // VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6631 "00001001" // /* MW 7 */ + 6632 "00101010" // /* MW 6 */ + 6633 "10011001" // /* MW 5 */ + 6634 "11000110" // /* MW 4 */ + 6635 "01011111" // /* MW 3 */ + 6636 "00111100" // /* MW 2 */ + 6637 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 809 26 first + 6638 "01001000" // VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6639 "00001001" // /* MW 3 */ + 6640 "01000110" // /* MW 2 */ + 6641 "10011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 810 26 first + 6642 "01001000" // VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6643 "00001001" // /* MW 3 */ + 6644 "01101000" // /* MW 2 */ + 6645 "10011011" // /* MW 1 */ + 6646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6647 "00000000" // /* MW 1 */ + 6648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6649 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first + 6650 "00011000" // VCONV.bf16.fp32 x10, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6651 "00010110" // /* MW 3 */ + 6652 "00010000" // /* MW 2 */ + 6653 "00001101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 + 6654 "00011000" // VCONV.bf16.fp32 x11, cml1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6655 "10010110" // /* MW 3 */ + 6656 "10010000" // /* MW 2 */ + 6657 "00001101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 1455 20 first + 6658 "00111010" // VCONV.bf16.fp32 x1, cmh1; JZ r21, #6768 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6768 delay_slots=5 */ + 6659 "01100001" // /* MW 9 */ + 6660 "00000000" // /* MW 8 */ + 6661 "00000000" // /* MW 7 */ + 6662 "01001110" // /* MW 6 */ + 6663 "00000011" // /* MW 5 */ + 6664 "00101010" // /* MW 4 */ + 6665 "11000000" // /* MW 3 */ + 6666 "00011010" // /* MW 2 */ + 6667 "00010010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first +.delay_slot + 6668 "00011000" // VCONV.bf16.fp32 x6, cmh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6669 "01010110" // /* MW 3 */ + 6670 "00010000" // /* MW 2 */ + 6671 "00001011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 6672 "00011000" // VCONV.bf16.fp32 x2, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6673 "10010110" // /* MW 3 */ + 6674 "00010001" // /* MW 2 */ + 6675 "00001001" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 6676 "00011000" // VCONV.bf16.fp32 x7, cmh3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6677 "11010110" // /* MW 3 */ + 6678 "10010001" // /* MW 2 */ + 6679 "00001011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 6680 "00011000" // VCONV.bf16.fp32 x5, cml2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6681 "00010110" // /* MW 3 */ + 6682 "10010001" // /* MW 2 */ + 6683 "00001010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 6684 "00011000" // VCONV.bf16.fp32 x8, cmh2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6685 "01010110" // /* MW 3 */ + 6686 "00010001" // /* MW 2 */ + 6687 "00001100" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first + 6688 "11111000" // VMAX_LT.bf16 x11, r16, x11, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6689 "11101100" // /* MW 3 */ + 6690 "11011100" // /* MW 2 */ + 6691 "00011101" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 + 6692 "11111000" // VMAX_LT.bf16 x1, r16, x1, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6693 "11101100" // /* MW 3 */ + 6694 "10001100" // /* MW 2 */ + 6695 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.src_ref 4 "max_min.hpp" 20 104 + 6696 "00000010" // VST x11, [p5, dj3]; VMAX_LT.bf16 x10, r16, x10, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6697 "01110000" // /* MW 7 */ + 6698 "01110110" // /* MW 6 */ + 6699 "10101010" // /* MW 5 */ + 6700 "00000010" // /* MW 4 */ + 6701 "01100000" // /* MW 3 */ + 6702 "01011010" // /* MW 2 */ + 6703 "10101100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first + 6704 "00000010" // VST x1, [p4, #64]; VMAX_LT.bf16 x1, r16, x6, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6705 "01110000" // /* MW 7 */ + 6706 "01110110" // /* MW 6 */ + 6707 "01011010" // /* MW 5 */ + 6708 "00000000" // /* MW 4 */ + 6709 "01100000" // /* MW 3 */ + 6710 "10001010" // /* MW 2 */ + 6711 "10000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first + 6712 "00111010" // VST x10, [p5]; J #6800 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=6800 delay_slots=5 */ + 6713 "00100001" // /* MW 9 */ + 6714 "00000000" // /* MW 8 */ + 6715 "00000000" // /* MW 7 */ + 6716 "01010010" // /* MW 6 */ + 6717 "00000011" // /* MW 5 */ + 6718 "00000000" // /* MW 4 */ + 6719 "01100000" // /* MW 3 */ + 6720 "11010010" // /* MW 2 */ + 6721 "10100000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 6722 "00000010" // VST x1, [p5, #64]; VMAX_LT.bf16 x10, r16, x2, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6723 "01110000" // /* MW 7 */ + 6724 "01110110" // /* MW 6 */ + 6725 "10001010" // /* MW 5 */ + 6726 "00000010" // /* MW 4 */ + 6727 "01100000" // /* MW 3 */ + 6728 "10001010" // /* MW 2 */ + 6729 "10100010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 +.delay_slot + 6730 "11111000" // VMAX_LT.bf16 x1, r16, x7, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6731 "11101100" // /* MW 3 */ + 6732 "10111100" // /* MW 2 */ + 6733 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.src_ref 4 "max_min.hpp" 20 104 +.delay_slot + 6734 "00000010" // VST x10, [p3, dj3]; VMAX_LT.bf16 x10, r16, x5, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6735 "01110000" // /* MW 7 */ + 6736 "01110110" // /* MW 6 */ + 6737 "10010110" // /* MW 5 */ + 6738 "00000010" // /* MW 4 */ + 6739 "01100000" // /* MW 3 */ + 6740 "01010010" // /* MW 2 */ + 6741 "01101100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 6742 "10111010" // NOPA; VST x1, [p7, #64]; VMAX_LT.bf16 x8, r16, x8, x9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6743 "01110010" // /* MW 9 */ + 6744 "01110110" // /* MW 8 */ + 6745 "00100010" // /* MW 7 */ + 6746 "00000010" // /* MW 6 */ + 6747 "01010011" // /* MW 5 */ + 6748 "00010100" // /* MW 4 */ + 6749 "11110111" // /* MW 3 */ + 6750 "00101100" // /* MW 2 */ + 6751 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.delay_slot + 6752 "11100001" // NOPA; NOPB; VST x10, [p4, dj7]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6753 "00000000" // /* MW 15 */ + 6754 "00000000" // /* MW 14 */ + 6755 "01111000" // /* MW 13 */ + 6756 "10100101" // /* MW 12 */ + 6757 "00000001" // /* MW 11 */ + 6758 "00000000" // /* MW 10 */ + 6759 "00000000" // /* MW 9 */ + 6760 "00000000" // /* MW 8 */ + 6761 "10010011" // /* MW 7 */ + 6762 "11100010" // /* MW 6 */ + 6763 "00100100" // /* MW 5 */ + 6764 "00000000" // /* MW 4 */ + 6765 "11110000" // /* MW 3 */ + 6766 "00101100" // /* MW 2 */ + 6767 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2304 +.src_ref 4 "vector.hpp" 1152 43 + 6768 "00011000" // VST.CONV.bf16.fp32 cml1, [p5, dj3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6769 "10100011" // /* MW 3 */ + 6770 "01100000" // /* MW 2 */ + 6771 "00001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6772 "00011000" // VST.CONV.bf16.fp32 cmh1, [p4, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6773 "11100011" // /* MW 3 */ + 6774 "00010100" // /* MW 2 */ + 6775 "00001100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6776 "00011000" // VST.CONV.bf16.fp32 cml0, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6777 "00100011" // /* MW 3 */ + 6778 "00000100" // /* MW 2 */ + 6779 "00001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6780 "00011000" // VST.CONV.bf16.fp32 cmh0, [p5, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6781 "01100011" // /* MW 3 */ + 6782 "00010100" // /* MW 2 */ + 6783 "00001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6784 "00011000" // VST.CONV.bf16.fp32 cml3, [p3, dj3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6785 "10100011" // /* MW 3 */ + 6786 "01100001" // /* MW 2 */ + 6787 "00001011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6788 "00011000" // VST.CONV.bf16.fp32 cmh3, [p7, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6789 "11100011" // /* MW 3 */ + 6790 "00010101" // /* MW 2 */ + 6791 "00001111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6792 "00000010" // VST.CONV.bf16.fp32 cml2, [p4, dj7]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6793 "01110000" // /* MW 7 */ + 6794 "10100101" // /* MW 6 */ + 6795 "00000001" // /* MW 5 */ + 6796 "00000000" // /* MW 4 */ + 6797 "01100000" // /* MW 3 */ + 6798 "00100100" // /* MW 2 */ + 6799 "10011100" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2336 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 1337 12 first + 6800 "01011100" // VST x8, [p3, #64]; JNZD r29, r29, p2 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 6801 "01000000" // /* MW 5 */ + 6802 "11110101" // /* MW 4 */ + 6803 "01101110" // /* MW 3 */ + 6804 "11000010" // /* MW 2 */ + 6805 "01100010" // /* MW 1 */ +.delay_slot + 6806 "00011000" // PADDB [p7], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6807 "10010000" // /* MW 3 */ + 6808 "10001011" // /* MW 2 */ + 6809 "00111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6811 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6813 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6815 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6816 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6817 "00000000" // /* MW 15 */ + 6818 "00000000" // /* MW 14 */ + 6819 "01111000" // /* MW 13 */ + 6820 "10100101" // /* MW 12 */ + 6821 "00000001" // /* MW 11 */ + 6822 "00000000" // /* MW 10 */ + 6823 "00000000" // /* MW 9 */ + 6824 "00000000" // /* MW 8 */ + 6825 "01011011" // /* MW 7 */ + 6826 "00000001" // /* MW 6 */ + 6827 "00100000" // /* MW 5 */ + 6828 "00000000" // /* MW 4 */ + 6829 "11110000" // /* MW 3 */ + 6830 "00101100" // /* MW 2 */ + 6831 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2368 +.loop_nesting 0 + 6832 "00011000" // LDA r15, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6833 "11110001" // /* MW 3 */ + 6834 "11101101" // /* MW 2 */ + 6835 "00000111" // /* MW 1 */ + 6836 "00011000" // LDA r12, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6837 "10010001" // /* MW 3 */ + 6838 "11110001" // /* MW 2 */ + 6839 "00000111" // /* MW 1 */ + 6840 "00011000" // LDA r9, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6841 "00110001" // /* MW 3 */ + 6842 "11110101" // /* MW 2 */ + 6843 "00000111" // /* MW 1 */ + 6844 "00011000" // LDA p6, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6845 "00011001" // /* MW 3 */ + 6846 "11101011" // /* MW 2 */ + 6847 "00000111" // /* MW 1 */ + 6848 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6849 "10011001" // /* MW 3 */ + 6850 "11111011" // /* MW 2 */ + 6851 "00000111" // /* MW 1 */ + 6852 "00011000" // LDA r14, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6853 "11010001" // /* MW 3 */ + 6854 "11111101" // /* MW 2 */ + 6855 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1873 first + 6856 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 6857 "00000000" // /* MW 3 */ + 6858 "00101000" // /* MW 2 */ + 6859 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1873 +.delay_slot + 6860 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6861 "00000001" // /* MW 5 */ + 6862 "00000000" // /* MW 4 */ + 6863 "00000000" // /* MW 3 */ + 6864 "11110000" // /* MW 2 */ + 6865 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6867 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6869 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6871 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params__end +.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_end0 + 6873 "00000000" // /* MW 1 */ +.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function conv2d_maxpool _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 7 "superkernels.cpp" 74 first +.src_ref 7 "superkernels.cpp" 79 6 +.src_ref 7 "superkernels.cpp" 81 4 +.function_start + 6880 "10111010" // MOVA r0, #1; MOVXM p4, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6881 "00010000" // /* MW 9 */ + 6882 "00100000" // /* MW 8 */ + 6883 "00110010" // /* MW 7 */ + 6884 "11110010" // /* MW 6 */ + 6885 "00000001" // /* MW 5 */ + 6886 "00000000" // /* MW 4 */ + 6887 "00000000" // /* MW 3 */ + 6888 "00100000" // /* MW 2 */ + 6889 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 79 6 first +.src_ref 7 "superkernels.cpp" 81 4 + 6890 "10111010" // LDA r16, [p4]; MOVX r1, #0; MOV r2, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6891 "01111000" // /* MW 9 */ + 6892 "11010000" // /* MW 8 */ + 6893 "01001011" // /* MW 7 */ + 6894 "00001000" // /* MW 6 */ + 6895 "00010000" // /* MW 5 */ + 6896 "00000000" // /* MW 4 */ + 6897 "11010000" // /* MW 3 */ + 6898 "11000010" // /* MW 2 */ + 6899 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 74 + 6900 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6901 "00000001" // /* MW 5 */ + 6902 "00000000" // /* MW 4 */ + 6903 "00000000" // /* MW 3 */ + 6904 "00001000" // /* MW 2 */ + 6905 "00000000" // /* MW 1 */ + 6906 "10011000" // ST r2, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6907 "01010101" // /* MW 3 */ + 6908 "11110000" // /* MW 2 */ + 6909 "00001111" // /* MW 1 */ + 6910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6911 "00000000" // /* MW 1 */ + 6912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6913 "00000000" // /* MW 1 */ + 6914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6915 "00000000" // /* MW 1 */ + 6916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6917 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 79 6 +.src_ref 7 "superkernels.cpp" 79 16 + 6918 "10000100" // JNZ r16, #7088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7088 delay_slots=5 */ + 6919 "00000001" // /* MW 5 */ + 6920 "01000000" // /* MW 4 */ + 6921 "11011000" // /* MW 3 */ + 6922 "00001101" // /* MW 2 */ + 6923 "10000000" // /* MW 1 */ +.delay_slot + 6924 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6925 "10011101" // /* MW 3 */ + 6926 "11111011" // /* MW 2 */ + 6927 "00001111" // /* MW 1 */ +.delay_slot + 6928 "10011000" // ST p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6929 "00011101" // /* MW 3 */ + 6930 "11111111" // /* MW 2 */ + 6931 "00001111" // /* MW 1 */ +.delay_slot + 6932 "10011000" // ST p3, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6933 "10011101" // /* MW 3 */ + 6934 "11101101" // /* MW 2 */ + 6935 "00001111" // /* MW 1 */ +.delay_slot + 6936 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6937 "00111101" // /* MW 3 */ + 6938 "11110100" // /* MW 2 */ + 6939 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 +.delay_slot + 6940 "01000100" // MOVXM r15, #509440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6941 "00000000" // /* MW 5 */ + 6942 "10101100" // /* MW 4 */ + 6943 "11000111" // /* MW 3 */ + 6944 "00000111" // /* MW 2 */ + 6945 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 113 2 +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 6946 "00111010" // MOVS p6, p1; MOVXM p7, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6947 "00010001" // /* MW 9 */ + 6948 "00110100" // /* MW 8 */ + 6949 "10110010" // /* MW 7 */ + 6950 "11110011" // /* MW 6 */ + 6951 "00000001" // /* MW 5 */ + 6952 "00000000" // /* MW 4 */ + 6953 "01100000" // /* MW 3 */ + 6954 "10010001" // /* MW 2 */ + 6955 "11010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 6956 "01110110" // ST.s8 r16, [p7]; MOVS p1, r15; MOVXM p7, #509028 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6957 "00010000" // /* MW 11 */ + 6958 "00110010" // /* MW 10 */ + 6959 "10110010" // /* MW 9 */ + 6960 "11110011" // /* MW 8 */ + 6961 "00000001" // /* MW 7 */ + 6962 "00000000" // /* MW 6 */ + 6963 "00001011" // /* MW 5 */ + 6964 "10001111" // /* MW 4 */ + 6965 "11100001" // /* MW 3 */ + 6966 "11000000" // /* MW 2 */ + 6967 "11100000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6969 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6971 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6972 "00000100" // JL #2752 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=2752 delay_slots=5 */ + 6973 "00000001" // /* MW 5 */ + 6974 "00000000" // /* MW 4 */ + 6975 "01100000" // /* MW 3 */ + 6976 "00000101" // /* MW 2 */ + 6977 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6979 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6980 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6981 "00110001" // /* MW 3 */ + 6982 "00100000" // /* MW 2 */ + 6983 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 6984 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6985 "00000101" // /* MW 3 */ + 6986 "00100000" // /* MW 2 */ + 6987 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 113 2 +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 6988 "00000010" // ST r16, [p7]; MOV p7, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6989 "01110000" // /* MW 7 */ + 6990 "01100000" // /* MW 6 */ + 6991 "10110000" // /* MW 5 */ + 6992 "00000011" // /* MW 4 */ + 6993 "00110000" // /* MW 3 */ + 6994 "11000010" // /* MW 2 */ + 6995 "11100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 +.delay_slot + 6996 "11110110" // NOPA; NOPB; NOPS; MOV p0, p2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6997 "01110000" // /* MW 11 */ + 6998 "01100000" // /* MW 10 */ + 6999 "00110010" // /* MW 9 */ + 7000 "00000000" // /* MW 8 */ + 7001 "01011011" // /* MW 7 */ + 7002 "00000001" // /* MW 6 */ + 7003 "00100000" // /* MW 5 */ + 7004 "00000000" // /* MW 4 */ + 7005 "11110000" // /* MW 3 */ + 7006 "00101100" // /* MW 2 */ + 7007 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 +.return_address + 7008 "10011000" // ADD.NC p2, r15, #11 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7009 "10000101" // /* MW 3 */ + 7010 "01100111" // /* MW 2 */ + 7011 "00011010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 87 19 +.src_ref 7 "superkernels.cpp" 87 35 first + 7012 "10111010" // LDA.u8 r16, [p2], #7; MOVXM p1, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7013 "00010000" // /* MW 9 */ + 7014 "00100010" // /* MW 8 */ + 7015 "10110010" // /* MW 7 */ + 7016 "11110000" // /* MW 6 */ + 7017 "00000001" // /* MW 5 */ + 7018 "00000000" // /* MW 4 */ + 7019 "01010000" // /* MW 3 */ + 7020 "11000001" // /* MW 2 */ + 7021 "01001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 37 first +.src_ref 7 "superkernels.cpp" 89 13 + 7022 "10111010" // LDA.u16 r19, [p2], #2; MOVXM p0, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7023 "00010000" // /* MW 9 */ + 7024 "00110000" // /* MW 8 */ + 7025 "00110010" // /* MW 7 */ + 7026 "11110000" // /* MW 6 */ + 7027 "00000001" // /* MW 5 */ + 7028 "00000000" // /* MW 4 */ + 7029 "01010000" // /* MW 3 */ + 7030 "11001111" // /* MW 2 */ + 7031 "01000011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 73 + 7032 "10011000" // LDA.u16 r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7033 "00111010" // /* MW 3 */ + 7034 "00000110" // /* MW 2 */ + 7035 "00000010" // /* MW 1 */ + 7036 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7037 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 110 + 7038 "10011000" // LDA.u16 r18, [p2, #2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7039 "01011010" // /* MW 3 */ + 7040 "00010110" // /* MW 2 */ + 7041 "00000010" // /* MW 1 */ + 7042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7043 "00000000" // /* MW 1 */ + 7044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7045 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 87 19 first +.src_ref 7 "superkernels.cpp" 113 2 + 7046 "00000010" // ST r16, [p1]; MOV p1, p6 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7047 "01110000" // /* MW 7 */ + 7048 "01100000" // /* MW 6 */ + 7049 "10110110" // /* MW 5 */ + 7050 "00000000" // /* MW 4 */ + 7051 "00110000" // /* MW 3 */ + 7052 "11000010" // /* MW 2 */ + 7053 "00100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 57 first + 7054 "10011000" // MUL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7055 "00001111" // /* MW 3 */ + 7056 "11100001" // /* MW 2 */ + 7057 "00010100" // /* MW 1 */ + 7058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7059 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 94 + 7060 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7061 "00001111" // /* MW 3 */ + 7062 "01100001" // /* MW 2 */ + 7063 "00010100" // /* MW 1 */ + 7064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7065 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 89 28 first + 7066 "10011000" // MUL r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7067 "00001111" // /* MW 3 */ + 7068 "10100001" // /* MW 2 */ + 7069 "00010100" // /* MW 1 */ + 7070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7071 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 89 13 +.src_ref 7 "superkernels.cpp" 113 2 + 7072 "11100001" // NOPA; NOPB; ST r16, [p0]; NOPX; MOV p0, p7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7073 "00000000" // /* MW 15 */ + 7074 "00000000" // /* MW 14 */ + 7075 "01111000" // /* MW 13 */ + 7076 "01100000" // /* MW 12 */ + 7077 "00110111" // /* MW 11 */ + 7078 "00000000" // /* MW 10 */ + 7079 "00000000" // /* MW 9 */ + 7080 "10000000" // /* MW 8 */ + 7081 "00010001" // /* MW 7 */ + 7082 "00000110" // /* MW 6 */ + 7083 "00100000" // /* MW 5 */ + 7084 "00000000" // /* MW 4 */ + 7085 "11110000" // /* MW 3 */ + 7086 "00101100" // /* MW 2 */ + 7087 "00000000" // /* MW 1 */ +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 7 "superkernels.cpp" 106 12 +.src_ref 7 "superkernels.cpp" 113 2 +.src_ref 7 "superkernels.cpp" 117 6 +.src_ref 7 "superkernels.cpp" 136 15 +.src_ref 1 "io_buffer_main.h" 218 49 +.src_ref 1 "io_buffer_main.h" 324 51 + 7088 "10111010" // LDA r15, [sp, #-20]; MOVXM p6, #509000 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7089 "00010000" // /* MW 9 */ + 7090 "00100100" // /* MW 8 */ + 7091 "00110010" // /* MW 7 */ + 7092 "11110011" // /* MW 6 */ + 7093 "00000001" // /* MW 5 */ + 7094 "00000000" // /* MW 4 */ + 7095 "00100000" // /* MW 3 */ + 7096 "10111110" // /* MW 2 */ + 7097 "11111101" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 106 12 first +.src_ref 7 "superkernels.cpp" 108 13 + 7098 "10111010" // LDA r16, [p6]; MOVXM p2, #509004 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7099 "00010000" // /* MW 9 */ + 7100 "00100110" // /* MW 8 */ + 7101 "00110010" // /* MW 7 */ + 7102 "11110001" // /* MW 6 */ + 7103 "00000001" // /* MW 5 */ + 7104 "00000000" // /* MW 4 */ + 7105 "11010000" // /* MW 3 */ + 7106 "11000010" // /* MW 2 */ + 7107 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 107 11 +.src_ref 7 "superkernels.cpp" 108 13 first +.src_ref 7 "superkernels.cpp" 139 6 +.src_ref 7 "superkernels.cpp" 140 14 + 7108 "10111010" // LDA r17, [p2]; MOVXM p7, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7109 "00010000" // /* MW 9 */ + 7110 "00100000" // /* MW 8 */ + 7111 "10110010" // /* MW 7 */ + 7112 "11110011" // /* MW 6 */ + 7113 "00000001" // /* MW 5 */ + 7114 "00000000" // /* MW 4 */ + 7115 "11010000" // /* MW 3 */ + 7116 "11000110" // /* MW 2 */ + 7117 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 107 11 first + 7118 "10011000" // LDA r18, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7119 "01010110" // /* MW 3 */ + 7120 "00000110" // /* MW 2 */ + 7121 "00000111" // /* MW 1 */ + 7122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7123 "00000000" // /* MW 1 */ + 7124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7125 "00000000" // /* MW 1 */ + 7126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7127 "00000000" // /* MW 1 */ + 7128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7129 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 110 6 first +.src_ref 7 "superkernels.cpp" 110 17 first + 7130 "10000100" // JNZ r16, #7216 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7216 delay_slots=5 */ + 7131 "00000001" // /* MW 5 */ + 7132 "01000000" // /* MW 4 */ + 7133 "00011000" // /* MW 3 */ + 7134 "00001110" // /* MW 2 */ + 7135 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 108 13 first +.delay_slot + 7136 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7137 "00000111" // /* MW 3 */ + 7138 "01100010" // /* MW 2 */ + 7139 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 107 11 first +.src_ref 7 "superkernels.cpp" 108 13 +.delay_slot + 7140 "01011100" // ST r17, [p2]; ADD r17, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7141 "00001110" // /* MW 5 */ + 7142 "01000100" // /* MW 4 */ + 7143 "00111001" // /* MW 3 */ + 7144 "11000110" // /* MW 2 */ + 7145 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 106 12 first +.delay_slot + 7146 "00011000" // ADD r19, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7147 "00000111" // /* MW 3 */ + 7148 "00100110" // /* MW 2 */ + 7149 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 106 12 +.delay_slot + 7150 "10011000" // ST r19, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7151 "01110001" // /* MW 3 */ + 7152 "00000110" // /* MW 2 */ + 7153 "00001110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 107 11 first +.delay_slot + 7154 "10011000" // ST r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7155 "00110001" // /* MW 3 */ + 7156 "00000110" // /* MW 2 */ + 7157 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 first + 7158 "00011000" // ADD.NC p2, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7159 "10000110" // /* MW 3 */ + 7160 "01100111" // /* MW 2 */ + 7161 "00011010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 7162 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7163 "01110110" // /* MW 3 */ + 7164 "11111111" // /* MW 2 */ + 7165 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 7166 "10011000" // LDA r16, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7167 "00010110" // /* MW 3 */ + 7168 "11111110" // /* MW 2 */ + 7169 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 7170 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7171 "00110110" // /* MW 3 */ + 7172 "11111110" // /* MW 2 */ + 7173 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 7174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7175 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 7176 "10011000" // LDA r16, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7177 "00010110" // /* MW 3 */ + 7178 "01000110" // /* MW 2 */ + 7179 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7181 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7183 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7185 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7187 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7188 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7189 "00000010" // /* MW 3 */ + 7190 "01100001" // /* MW 2 */ + 7191 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7192 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7193 "00010001" // /* MW 3 */ + 7194 "00000110" // /* MW 2 */ + 7195 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 + 7196 "00011000" // MOVX r17, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7197 "11111101" // /* MW 3 */ + 7198 "11100010" // /* MW 2 */ + 7199 "00010111" // /* MW 1 */ + 7200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7201 "00000000" // /* MW 1 */ + 7202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7203 "00000000" // /* MW 1 */ + 7204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7205 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 7206 "01111010" // NOPA; NOPS; ACQ r16, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7207 "00011000" // /* MW 9 */ + 7208 "00010011" // /* MW 8 */ + 7209 "00000100" // /* MW 7 */ + 7210 "00000000" // /* MW 6 */ + 7211 "01011011" // /* MW 5 */ + 7212 "00000001" // /* MW 4 */ + 7213 "11110000" // /* MW 3 */ + 7214 "00101100" // /* MW 2 */ + 7215 "00000000" // /* MW 1 */ +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_336 +.src_ref 7 "superkernels.cpp" 113 2 first +.no_stack_arguments + 7216 "00000100" // JL #4464 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4464 delay_slots=5 */ + 7217 "00000001" // /* MW 5 */ + 7218 "00000000" // /* MW 4 */ + 7219 "10111000" // /* MW 3 */ + 7220 "00001000" // /* MW 2 */ + 7221 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 113 2 +.delay_slot + 7222 "01000100" // MOVXM p3, #509440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7223 "00000000" // /* MW 5 */ + 7224 "11001100" // /* MW 4 */ + 7225 "11000110" // /* MW 3 */ + 7226 "00000111" // /* MW 2 */ + 7227 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7229 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7231 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7233 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 113 2 +.delay_slot + 7234 "00101110" // NOPA; NOPS; MOV p2, r15; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 7235 "00011100" // /* MW 13 */ + 7236 "00000000" // /* MW 12 */ + 7237 "00000000" // /* MW 11 */ + 7238 "00000111" // /* MW 10 */ + 7239 "00111101" // /* MW 9 */ + 7240 "01010011" // /* MW 8 */ + 7241 "00000000" // /* MW 7 */ + 7242 "00000000" // /* MW 6 */ + 7243 "10110110" // /* MW 5 */ + 7244 "00000010" // /* MW 4 */ + 7245 "11110000" // /* MW 3 */ + 7246 "00101100" // /* MW 2 */ + 7247 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 117 6 first +.src_ref 7 "superkernels.cpp" 117 20 +.return_address + 7248 "10111010" // LDA r16, [p6]; MOVXM p1, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7249 "00010000" // /* MW 9 */ + 7250 "00100010" // /* MW 8 */ + 7251 "10110010" // /* MW 7 */ + 7252 "11110000" // /* MW 6 */ + 7253 "00000001" // /* MW 5 */ + 7254 "00000000" // /* MW 4 */ + 7255 "11010000" // /* MW 3 */ + 7256 "11000010" // /* MW 2 */ + 7257 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 117 20 + 7258 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7259 "00110110" // /* MW 3 */ + 7260 "00000110" // /* MW 2 */ + 7261 "00000001" // /* MW 1 */ + 7262 "00011000" // LDA r0, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7263 "00010001" // /* MW 3 */ + 7264 "11110000" // /* MW 2 */ + 7265 "00000111" // /* MW 1 */ + 7266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7267 "00000000" // /* MW 1 */ + 7268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7269 "00000000" // /* MW 1 */ + 7270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7271 "00000000" // /* MW 1 */ + 7272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7273 "00000000" // /* MW 1 */ + 7274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7275 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 117 17 + 7276 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7277 "00001000" // /* MW 3 */ + 7278 "01100001" // /* MW 2 */ + 7279 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 117 6 + 7280 "10000100" // JNZ r16, #7360 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7360 delay_slots=5 */ + 7281 "00000001" // /* MW 5 */ + 7282 "01000000" // /* MW 4 */ + 7283 "01100000" // /* MW 3 */ + 7284 "00001110" // /* MW 2 */ + 7285 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 136 15 +.src_ref 7 "superkernels.cpp" 140 14 +.delay_slot + 7286 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7287 "00000001" // /* MW 3 */ + 7288 "00110000" // /* MW 2 */ + 7289 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7291 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7293 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7294 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7295 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7297 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 first +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 + 7298 "00100100" // MOVX r16, #1; ADD.NC p1, r15, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7299 "00010100" // /* MW 5 */ + 7300 "11001111" // /* MW 4 */ + 7301 "10100010" // /* MW 3 */ + 7302 "00000000" // /* MW 2 */ + 7303 "00000100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 + 7304 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7305 "00110110" // /* MW 3 */ + 7306 "00000110" // /* MW 2 */ + 7307 "00000001" // /* MW 1 */ + 7308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7309 "00000000" // /* MW 1 */ + 7310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7311 "00000000" // /* MW 1 */ + 7312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7313 "00000000" // /* MW 1 */ + 7314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7315 "00000000" // /* MW 1 */ + 7316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7317 "00000000" // /* MW 1 */ + 7318 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7319 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 7320 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7321 "00001000" // /* MW 3 */ + 7322 "01010001" // /* MW 2 */ + 7323 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 136 15 first +.src_ref 1 "io_buffer_main.h" 327 40 first + 7324 "00001100" // LDA r17, [p1, #-8]; ST r24, [p6] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7325 "00100011" // /* MW 5 */ + 7326 "00001110" // /* MW 4 */ + 7327 "11011100" // /* MW 3 */ + 7328 "11000110" // /* MW 2 */ + 7329 "00111100" // /* MW 1 */ + 7330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7331 "00000000" // /* MW 1 */ + 7332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7333 "00000000" // /* MW 1 */ + 7334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7335 "00000000" // /* MW 1 */ + 7336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7337 "00000000" // /* MW 1 */ + 7338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7339 "00000000" // /* MW 1 */ + 7340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7341 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 + 7342 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7343 "00010001" // /* MW 3 */ + 7344 "00100001" // /* MW 2 */ + 7345 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 7346 "00101110" // NOPA; ST r16, [p1, #-8]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 7347 "00011100" // /* MW 13 */ + 7348 "00000000" // /* MW 12 */ + 7349 "00000000" // /* MW 11 */ + 7350 "01010111" // /* MW 10 */ + 7351 "00011010" // /* MW 9 */ + 7352 "01000000" // /* MW 8 */ + 7353 "00000000" // /* MW 7 */ + 7354 "00000000" // /* MW 6 */ + 7355 "00100011" // /* MW 5 */ + 7356 "11001100" // /* MW 4 */ + 7357 "11110011" // /* MW 3 */ + 7358 "00101100" // /* MW 2 */ + 7359 "00000000" // /* MW 1 */ +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_480 +.src_ref 7 "superkernels.cpp" 139 6 first +.src_ref 7 "superkernels.cpp" 139 19 + 7360 "10111010" // LDA r16, [p7]; MOVXM p6, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7361 "00010000" // /* MW 9 */ + 7362 "00110000" // /* MW 8 */ + 7363 "00110010" // /* MW 7 */ + 7364 "11110011" // /* MW 6 */ + 7365 "00000001" // /* MW 5 */ + 7366 "00000000" // /* MW 4 */ + 7367 "11010000" // /* MW 3 */ + 7368 "11000010" // /* MW 2 */ + 7369 "11100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 139 19 + 7370 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7371 "00110110" // /* MW 3 */ + 7372 "00000110" // /* MW 2 */ + 7373 "00000110" // /* MW 1 */ + 7374 "00011000" // LDA p1, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7375 "10011001" // /* MW 3 */ + 7376 "11111000" // /* MW 2 */ + 7377 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 142 + 7378 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7379 "00111001" // /* MW 3 */ + 7380 "11110100" // /* MW 2 */ + 7381 "00000111" // /* MW 1 */ + 7382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7383 "00000000" // /* MW 1 */ + 7384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7385 "00000000" // /* MW 1 */ + 7386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7387 "00000000" // /* MW 1 */ + 7388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7389 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 139 16 + 7390 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7391 "00001000" // /* MW 3 */ + 7392 "01100001" // /* MW 2 */ + 7393 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 139 6 + 7394 "10000100" // JNZ r16, #7424 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7424 delay_slots=5 */ + 7395 "00000001" // /* MW 5 */ + 7396 "01000000" // /* MW 4 */ + 7397 "10000000" // /* MW 3 */ + 7398 "00001110" // /* MW 2 */ + 7399 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7401 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7403 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7405 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7407 "00000000" // /* MW 1 */ +.delay_slot + 7408 "11111000" // MOV r15, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7409 "00100000" // /* MW 3 */ + 7410 "11010000" // /* MW 2 */ + 7411 "00011011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 140 14 first + 7412 "00110110" // NOPA; NOPB; ST r24, [p7]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7413 "11000001" // /* MW 11 */ + 7414 "10001000" // /* MW 10 */ + 7415 "10000011" // /* MW 9 */ + 7416 "00000011" // /* MW 8 */ + 7417 "00000000" // /* MW 7 */ + 7418 "00000000" // /* MW 6 */ + 7419 "00100000" // /* MW 5 */ + 7420 "00000000" // /* MW 4 */ + 7421 "11110000" // /* MW 3 */ + 7422 "00101100" // /* MW 2 */ + 7423 "00000000" // /* MW 1 */ +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_544 + 7424 "00011000" // LDA p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7425 "00011001" // /* MW 3 */ + 7426 "11111111" // /* MW 2 */ + 7427 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 142 first + 7428 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 7429 "00000000" // /* MW 3 */ + 7430 "00101000" // /* MW 2 */ + 7431 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 142 +.delay_slot + 7432 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7433 "00000001" // /* MW 5 */ + 7434 "00000000" // /* MW 4 */ + 7435 "00000000" // /* MW 3 */ + 7436 "11111000" // /* MW 2 */ + 7437 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7439 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7441 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7443 "00000000" // /* MW 1 */ +.delay_slot + 7444 "00011000" // MOVS p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7445 "10001011" // /* MW 3 */ + 7446 "10000100" // /* MW 2 */ +.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 7447 "00001111" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.src_ref 3 "elementwise_binary.h" 141 first +.src_ref 3 "elementwise_binary.h" 142 23 +.src_ref 3 "elementwise_binary.h" 144 4 first +.function_start + 7456 "01100100" // RET lr; MOV r0, #64 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 7457 "00000001" // /* MW 5 */ + 7458 "00100001" // /* MW 4 */ + 7459 "00000000" // /* MW 3 */ + 7460 "00000000" // /* MW 2 */ + 7461 "00000101" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 141 +.delay_slot + 7462 "11111000" // MOV r1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7463 "11000000" // /* MW 3 */ + 7464 "01010000" // /* MW 2 */ + 7465 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 141 +.delay_slot + 7466 "00011000" // ADD.NC p0, r1, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7467 "10010000" // /* MW 3 */ + 7468 "01100000" // /* MW 2 */ + 7469 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 142 23 first +.delay_slot + 7470 "10011000" // ST r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7471 "00010001" // /* MW 3 */ + 7472 "00000100" // /* MW 2 */ + 7473 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 142 23 +.delay_slot + 7474 "10011000" // ST r0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7475 "00010001" // /* MW 3 */ + 7476 "00010100" // /* MW 2 */ + 7477 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_end0 + 7479 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 3 "elementwise_binary.h" 130 first +.src_ref 3 "elementwise_binary.h" 133 24 first +.function_start + 7488 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7489 "00101110" // /* MW 3 */ + 7490 "00011100" // /* MW 2 */ + 7491 "00000001" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 130 + 7492 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7493 "00000001" // /* MW 5 */ + 7494 "00000000" // /* MW 4 */ + 7495 "00000000" // /* MW 3 */ + 7496 "00001000" // /* MW 2 */ + 7497 "00000000" // /* MW 1 */ + 7498 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7499 "00111101" // /* MW 3 */ + 7500 "11111000" // /* MW 2 */ + 7501 "00001111" // /* MW 1 */ + 7502 "10011000" // ST r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7503 "11110101" // /* MW 3 */ + 7504 "11111101" // /* MW 2 */ + 7505 "00001111" // /* MW 1 */ + 7506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7507 "00000000" // /* MW 1 */ + 7508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7509 "00000000" // /* MW 1 */ + 7510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7511 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 133 22 first + 7512 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7513 "00101001" // /* MW 3 */ + 7514 "00011100" // /* MW 2 */ + 7515 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 134 24 first + 7516 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7517 "00101110" // /* MW 3 */ + 7518 "00011100" // /* MW 2 */ + 7519 "00000001" // /* MW 1 */ + 7520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7521 "00000000" // /* MW 1 */ + 7522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7523 "00000000" // /* MW 1 */ + 7524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7525 "00000000" // /* MW 1 */ + 7526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7527 "00000000" // /* MW 1 */ + 7528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7529 "00000000" // /* MW 1 */ + 7530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7531 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 134 22 + 7532 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7533 "00101001" // /* MW 3 */ + 7534 "00011100" // /* MW 2 */ + 7535 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 135 24 first + 7536 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7537 "00101110" // /* MW 3 */ + 7538 "00000100" // /* MW 2 */ + 7539 "00000001" // /* MW 1 */ + 7540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7541 "00000000" // /* MW 1 */ + 7542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7543 "00000000" // /* MW 1 */ + 7544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7545 "00000000" // /* MW 1 */ + 7546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7547 "00000000" // /* MW 1 */ + 7548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7549 "00000000" // /* MW 1 */ + 7550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7551 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 135 22 + 7552 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7553 "00101001" // /* MW 3 */ + 7554 "00011100" // /* MW 2 */ + 7555 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 136 24 first + 7556 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7557 "00101110" // /* MW 3 */ + 7558 "00010100" // /* MW 2 */ + 7559 "00000001" // /* MW 1 */ + 7560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7561 "00000000" // /* MW 1 */ + 7562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7563 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 137 8 first +.no_stack_arguments + 7564 "00000100" // JL #7456 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7456 delay_slots=5 */ + 7565 "00000001" // /* MW 5 */ + 7566 "00000000" // /* MW 4 */ + 7567 "10010000" // /* MW 3 */ + 7568 "00001110" // /* MW 2 */ + 7569 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7571 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7572 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7573 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7575 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 136 22 first +.delay_slot + 7576 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7577 "00101001" // /* MW 3 */ + 7578 "11011100" // /* MW 2 */ + 7579 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 137 8 +.delay_slot + 7580 "11111000" // MOV r15, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7581 "11000000" // /* MW 3 */ + 7582 "11010000" // /* MW 2 */ + 7583 "00011011" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 137 8 +.src_ref 3 "elementwise_binary.h" 139 4 +.src_ref 8 "add_impl.h" 146 29 +.return_address + 7584 "10111010" // LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7585 "00001000" // /* MW 9 */ + 7586 "11000100" // /* MW 8 */ + 7587 "00110011" // /* MW 7 */ + 7588 "01101000" // /* MW 6 */ + 7589 "00000000" // /* MW 5 */ + 7590 "00000001" // /* MW 4 */ + 7591 "00100000" // /* MW 3 */ + 7592 "00000111" // /* MW 2 */ + 7593 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 146 29 +.src_ref 8 "add_impl.h" 147 37 +.src_ref 8 "add_impl.h" 147 39 + 7594 "10111010" // MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7595 "01011000" // /* MW 9 */ + 7596 "11111101" // /* MW 8 */ + 7597 "00000111" // /* MW 7 */ + 7598 "00001000" // /* MW 6 */ + 7599 "10000000" // /* MW 5 */ + 7600 "00000001" // /* MW 4 */ + 7601 "10000000" // /* MW 3 */ + 7602 "11100010" // /* MW 2 */ + 7603 "00000001" // /* MW 1 */ +.src_ref 8 "add_impl.h" 146 29 first +.src_ref 8 "add_impl.h" 147 39 + 7604 "01111010" // LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7605 "00000001" // /* MW 9 */ + 7606 "10100000" // /* MW 8 */ + 7607 "00000111" // /* MW 7 */ + 7608 "10000000" // /* MW 6 */ + 7609 "00010001" // /* MW 5 */ + 7610 "00001010" // /* MW 4 */ + 7611 "00100000" // /* MW 3 */ + 7612 "10111110" // /* MW 2 */ + 7613 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 50 first + 7614 "10011000" // LDA.u8 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7615 "01001010" // /* MW 3 */ + 7616 "00000110" // /* MW 2 */ + 7617 "00000000" // /* MW 1 */ + 7618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7619 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7621 "00000000" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 37 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7622 "00011000" // ST.s16 r16, [p0, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7623 "00010111" // /* MW 3 */ + 7624 "00000010" // /* MW 2 */ + 7625 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7626 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 7627 "00000000" // /* MW 3 */ + 7628 "00101000" // /* MW 2 */ + 7629 "00010000" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 54 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7630 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7631 "00000101" // /* MW 3 */ + 7632 "00100010" // /* MW 2 */ + 7633 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7634 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7635 "00000001" // /* MW 5 */ + 7636 "00000000" // /* MW 4 */ + 7637 "00000000" // /* MW 3 */ + 7638 "11111000" // /* MW 2 */ + 7639 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 54 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7640 "10011000" // EQ r27, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7641 "00100111" // /* MW 3 */ + 7642 "01110111" // /* MW 2 */ + 7643 "00010100" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 39 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7644 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7645 "10000010" // /* MW 3 */ + 7646 "00100001" // /* MW 2 */ + 7647 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 7649 "00000000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.src_ref 3 "elementwise_binary_broadcasting.h" 81 +.src_ref 3 "elementwise_binary_broadcasting.h" 81 first +.src_ref 3 "elementwise_binary_broadcasting.h" 82 25 +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 +.src_ref 3 "elementwise_binary_broadcasting.h" 83 25 +.function_start + 7664 "10111010" // MOVA m0, #20; MOVX r1, #6; MOV r0, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7665 "01111000" // /* MW 9 */ + 7666 "01100000" // /* MW 8 */ + 7667 "00001000" // /* MW 7 */ + 7668 "11001000" // /* MW 6 */ + 7669 "00010000" // /* MW 5 */ + 7670 "00000000" // /* MW 4 */ + 7671 "10000000" // /* MW 3 */ + 7672 "10000000" // /* MW 2 */ + 7673 "00000010" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 81 +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 + 7674 "00100100" // MOVX r0, #1; ADD.NC p0, r0, #12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7675 "00001100" // /* MW 5 */ + 7676 "11000000" // /* MW 4 */ + 7677 "10100000" // /* MW 3 */ + 7678 "00000000" // /* MW 2 */ + 7679 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first + 7680 "10011000" // LDA.u8 r2, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7681 "01001010" // /* MW 3 */ + 7682 "00001000" // /* MW 2 */ + 7683 "00000000" // /* MW 1 */ + 7684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7685 "00000000" // /* MW 1 */ + 7686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7687 "00000000" // /* MW 1 */ + 7688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7689 "00000000" // /* MW 1 */ + 7690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7691 "00000000" // /* MW 1 */ + 7692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7693 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 84 4 first + 7694 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 7695 "00000000" // /* MW 3 */ + 7696 "00101000" // /* MW 2 */ + 7697 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first +.delay_slot + 7698 "10011000" // NE r0, r2, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7699 "00001000" // /* MW 3 */ + 7700 "10000000" // /* MW 2 */ + 7701 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 83 25 first +.delay_slot + 7702 "10011000" // LSHL r0, r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7703 "00011101" // /* MW 3 */ + 7704 "00000000" // /* MW 2 */ + 7705 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first +.src_ref 3 "elementwise_binary_broadcasting.h" 83 23 +.delay_slot + 7706 "01011100" // ST r0, [p0, #4]; NEZ r3, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7707 "11100000" // /* MW 5 */ + 7708 "00001101" // /* MW 4 */ + 7709 "00110001" // /* MW 3 */ + 7710 "10000010" // /* MW 2 */ + 7711 "00000010" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 25 +.delay_slot + 7712 "10011000" // LSHL r2, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7713 "00011101" // /* MW 3 */ + 7714 "11000100" // /* MW 2 */ + 7715 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 23 +.delay_slot + 7716 "10011000" // ST r2, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7717 "01010001" // /* MW 3 */ + 7718 "00000100" // /* MW 2 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_end0 + 7719 "00001000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 3 "elementwise_binary_broadcasting.h" 76 +.src_ref 3 "elementwise_binary_broadcasting.h" 76 first +.function_start + 7728 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7729 "00000001" // /* MW 5 */ + 7730 "00000000" // /* MW 4 */ + 7731 "00000000" // /* MW 3 */ + 7732 "00001000" // /* MW 2 */ + 7733 "00000000" // /* MW 1 */ + 7734 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7735 "00111101" // /* MW 3 */ + 7736 "11111100" // /* MW 2 */ + 7737 "00001111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 77 8 first +.no_stack_arguments + 7738 "00000100" // JL #7488 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7488 delay_slots=5 */ + 7739 "00000001" // /* MW 5 */ + 7740 "00000000" // /* MW 4 */ + 7741 "10100000" // /* MW 3 */ + 7742 "00001110" // /* MW 2 */ + 7743 "00000000" // /* MW 1 */ +.delay_slot + 7744 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7745 "10011101" // /* MW 3 */ + 7746 "11111011" // /* MW 2 */ + 7747 "00001111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 78 8 +.delay_slot + 7748 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7749 "11000000" // /* MW 3 */ + 7750 "01100000" // /* MW 2 */ + 7751 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7753 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7755 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7756 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7757 "01100111" // /* MW 3 */ + 7758 "00000001" // /* MW 2 */ + 7759 "00000000" // /* MW 1 */ +.return_address +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7760 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7761 "10011001" // /* MW 3 */ + 7762 "11111011" // /* MW 2 */ + 7763 "00000111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 78 8 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7764 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7765 "00111001" // /* MW 3 */ + 7766 "11111100" // /* MW 2 */ + 7767 "00000111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 78 8 first +.tail_call +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7768 "10000100" // J #7664 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=7664 delay_slots=5 */ + 7769 "00000000" // /* MW 5 */ + 7770 "00000000" // /* MW 4 */ + 7771 "11111000" // /* MW 3 */ + 7772 "00001110" // /* MW 2 */ + 7773 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 78 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7774 "11111000" // MOV p0, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7775 "11000000" // /* MW 3 */ + 7776 "01101110" // /* MW 2 */ + 7777 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 79 4 first +.delay_slot + 7778 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7779 "00000001" // /* MW 5 */ + 7780 "00000000" // /* MW 4 */ + 7781 "00000000" // /* MW 3 */ + 7782 "11111000" // /* MW 2 */ + 7783 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7785 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7787 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 7789 "00000000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.src_ref 3 "elementwise_binary_broadcasting.h" 89 first +.src_ref 3 "elementwise_binary_broadcasting.h" 96 37 first +.src_ref 3 "elementwise_binary_broadcasting.h" 102 19 +.function_start + 7792 "01010100" // LDA r0, [p3], #12; MOV m0, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7793 "01010001" // /* MW 5 */ + 7794 "00000000" // /* MW 4 */ + 7795 "11010000" // /* MW 3 */ + 7796 "10000010" // /* MW 2 */ + 7797 "01100111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 102 19 first +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 7798 "11010100" // LDA.u8 r1, [p3], m0; MOV p4, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7799 "10000001" // /* MW 5 */ + 7800 "11001101" // /* MW 4 */ + 7801 "01011000" // /* MW 3 */ + 7802 "00000101" // /* MW 2 */ + 7803 "01100001" // /* MW 1 */ + 7804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7805 "00000000" // /* MW 1 */ + 7806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7807 "00000000" // /* MW 1 */ + 7808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7809 "00000000" // /* MW 1 */ + 7810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7811 "00000000" // /* MW 1 */ + 7812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7813 "00000000" // /* MW 1 */ + 7814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7815 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 102 12 +.src_ref 3 "elementwise_binary_broadcasting.h" 102 35 + 7816 "10000100" // JNZ r1, #7872 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7872 delay_slots=5 */ + 7817 "00000001" // /* MW 5 */ + 7818 "01000000" // /* MW 4 */ + 7819 "01100000" // /* MW 3 */ + 7820 "00001111" // /* MW 2 */ + 7821 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 96 78 +.delay_slot + 7822 "00011000" // MOVX r2, #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7823 "11101001" // /* MW 3 */ + 7824 "11000100" // /* MW 2 */ + 7825 "00010111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 96 78 first +.delay_slot + 7826 "10011000" // LSHL r0, r0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7827 "00101101" // /* MW 3 */ + 7828 "00000000" // /* MW 2 */ + 7829 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7831 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7833 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7835 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 103 28 first + 7836 "10011000" // LDA.s16 r1, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7837 "00110010" // /* MW 3 */ + 7838 "00000100" // /* MW 2 */ + 7839 "00000000" // /* MW 1 */ + 7840 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7841 "00000000" // /* MW 1 */ + 7842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7843 "00000000" // /* MW 1 */ + 7844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7845 "00000000" // /* MW 1 */ + 7846 "10000100" // J #7904 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=7904 delay_slots=5 */ + 7847 "00000000" // /* MW 5 */ + 7848 "00000000" // /* MW 4 */ + 7849 "01110000" // /* MW 3 */ + 7850 "00001111" // /* MW 2 */ + 7851 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7853 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7854 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7855 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first +.delay_slot + 7856 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7857 "01110010" // /* MW 3 */ + 7858 "00000101" // /* MW 2 */ + 7859 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7861 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.delay_slot + 7862 "01111010" // NOPA; VST x0, [p0]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7863 "00000000" // /* MW 9 */ + 7864 "00000000" // /* MW 8 */ + 7865 "00000000" // /* MW 7 */ + 7866 "00000000" // /* MW 6 */ + 7867 "00010011" // /* MW 5 */ + 7868 "00000100" // /* MW 4 */ + 7869 "11110000" // /* MW 3 */ + 7870 "00101100" // /* MW 2 */ + 7871 "00000000" // /* MW 1 */ +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_80 +.src_ref 3 "elementwise_binary_broadcasting.h" 106 28 first + 7872 "10011000" // LDA.s16 r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7873 "00110010" // /* MW 3 */ + 7874 "00000100" // /* MW 2 */ + 7875 "00000001" // /* MW 1 */ + 7876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7877 "00000000" // /* MW 1 */ + 7878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7879 "00000000" // /* MW 1 */ + 7880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7881 "00000000" // /* MW 1 */ + 7882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7883 "00000000" // /* MW 1 */ + 7884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7885 "00000000" // /* MW 1 */ + 7886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7887 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first + 7888 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7889 "01110010" // /* MW 3 */ + 7890 "00000101" // /* MW 2 */ + 7891 "00011000" // /* MW 1 */ + 7892 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7893 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first + 7894 "01111010" // NOPA; VST x0, [p1]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7895 "00000000" // /* MW 9 */ + 7896 "00000000" // /* MW 8 */ + 7897 "00000000" // /* MW 7 */ + 7898 "00000000" // /* MW 6 */ + 7899 "00010011" // /* MW 5 */ + 7900 "00000100" // /* MW 4 */ + 7901 "11110001" // /* MW 3 */ + 7902 "00101100" // /* MW 2 */ + 7903 "00000000" // /* MW 1 */ +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_112 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 first +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 first + 7904 "10111010" // LDA m0, [p4, #20]; MOVX r0, #60; ADD.NC lc, r0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7905 "01001000" // /* MW 9 */ + 7906 "00111111" // /* MW 8 */ + 7907 "10111000" // /* MW 7 */ + 7908 "10001010" // /* MW 6 */ + 7909 "00000111" // /* MW 5 */ + 7910 "00000000" // /* MW 4 */ + 7911 "11010000" // /* MW 3 */ + 7912 "10000000" // /* MW 2 */ + 7913 "10001010" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 7914 "10111010" // LDA m1, [p3, #4]; MOVXM ls, #8016 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7915 "00010000" // /* MW 9 */ + 7916 "10101000" // /* MW 8 */ + 7917 "01111111" // /* MW 7 */ + 7918 "00000100" // /* MW 6 */ + 7919 "00000000" // /* MW 5 */ + 7920 "00000000" // /* MW 4 */ + 7921 "11010000" // /* MW 3 */ + 7922 "10010000" // /* MW 2 */ + 7923 "01100010" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 7924 "01000100" // MOVXM le, #8048 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7925 "11100000" // /* MW 5 */ + 7926 "11111110" // /* MW 4 */ + 7927 "00010110" // /* MW 3 */ + 7928 "00000000" // /* MW 2 */ + 7929 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 7930 "01000100" // MOVXM p4, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7931 "11010000" // /* MW 5 */ + 7932 "11001000" // /* MW 4 */ + 7933 "11001000" // /* MW 3 */ + 7934 "00000111" // /* MW 2 */ + 7935 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 7936 "10011000" // LDA.s8 r1, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7937 "00100010" // /* MW 3 */ + 7938 "00000100" // /* MW 2 */ + 7939 "00000100" // /* MW 1 */ + 7940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7941 "00000000" // /* MW 1 */ + 7942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7943 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 187 20 first + 7944 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7945 "10101011" // /* MW 3 */ + 7946 "00001000" // /* MW 2 */ + 7947 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary.h" 189 20 first + 7948 "10011000" // VLDA.CONV.fp32.bf16 cml2, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7949 "00101011" // /* MW 3 */ + 7950 "00101001" // /* MW 2 */ + 7951 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 211 20 first + 7952 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7953 "00101011" // /* MW 3 */ + 7954 "00001000" // /* MW 2 */ + 7955 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7956 "10011000" // VLDA.CONV.fp32.bf16 cml4, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7957 "00101011" // /* MW 3 */ + 7958 "00101010" // /* MW 2 */ + 7959 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 195 20 +.src_ref 3 "elementwise_binary.h" 218 20 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7960 "00101100" // VLDA.CONV.fp32.bf16 cml1, [p0], m0; MOVX crRnd, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7961 "00000000" // /* MW 5 */ + 7962 "11110101" // /* MW 4 */ + 7963 "01110000" // /* MW 3 */ + 7964 "00010101" // /* MW 2 */ + 7965 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7966 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7967 "00111101" // /* MW 7 */ + 7968 "00101000" // /* MW 6 */ + 7969 "00000011" // /* MW 5 */ + 7970 "00000100" // /* MW 4 */ + 7971 "01110000" // /* MW 3 */ + 7972 "00100101" // /* MW 2 */ + 7973 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7974 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7975 "00101011" // /* MW 3 */ + 7976 "00001000" // /* MW 2 */ + 7977 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7978 "01100010" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7979 "00111101" // /* MW 7 */ + 7980 "00010000" // /* MW 6 */ + 7981 "00000100" // /* MW 5 */ + 7982 "00000100" // /* MW 4 */ + 7983 "01110000" // /* MW 3 */ + 7984 "01000101" // /* MW 2 */ + 7985 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 187 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7986 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7987 "10101011" // /* MW 3 */ + 7988 "00001000" // /* MW 2 */ + 7989 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7990 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7991 "00111101" // /* MW 7 */ + 7992 "00101000" // /* MW 6 */ + 7993 "00000011" // /* MW 5 */ + 7994 "00000100" // /* MW 4 */ + 7995 "01110000" // /* MW 3 */ + 7996 "00100101" // /* MW 2 */ + 7997 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7998 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7999 "00101011" // /* MW 3 */ + 8000 "00001000" // /* MW 2 */ + 8001 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8002 "01101110" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VST.CONV.bf16.fp32 cml3, [p2], #64; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 8003 "00111101" // /* MW 13 */ + 8004 "00010000" // /* MW 12 */ + 8005 "00000100" // /* MW 11 */ + 8006 "01010111" // /* MW 10 */ + 8007 "00011010" // /* MW 9 */ + 8008 "01000000" // /* MW 8 */ + 8009 "00000000" // /* MW 7 */ + 8010 "00000000" // /* MW 6 */ + 8011 "01000110" // /* MW 5 */ + 8012 "00111011" // /* MW 4 */ + 8013 "01110100" // /* MW 3 */ + 8014 "01000101" // /* MW 2 */ + 8015 "00100101" // /* MW 1 */ +.label ZLS_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_224 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 187 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 8016 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8017 "10101011" // /* MW 3 */ + 8018 "00001000" // /* MW 2 */ + 8019 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8020 "01100110" // VLDA.CONV.fp32.bf16 cml2, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8021 "00111101" // /* MW 11 */ + 8022 "00101000" // /* MW 10 */ + 8023 "00000011" // /* MW 9 */ + 8024 "10001110" // /* MW 8 */ + 8025 "00010001" // /* MW 7 */ + 8026 "00001111" // /* MW 6 */ + 8027 "00100001" // /* MW 5 */ + 8028 "00000000" // /* MW 4 */ + 8029 "01110000" // /* MW 3 */ + 8030 "00100101" // /* MW 2 */ + 8031 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8032 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8033 "00000000" // /* MW 15 */ + 8034 "00000000" // /* MW 14 */ + 8035 "01111000" // /* MW 13 */ + 8036 "10100101" // /* MW 12 */ + 8037 "00000001" // /* MW 11 */ + 8038 "00000000" // /* MW 10 */ + 8039 "00000000" // /* MW 9 */ + 8040 "00000000" // /* MW 8 */ + 8041 "01011011" // /* MW 7 */ + 8042 "00000001" // /* MW 6 */ + 8043 "00100000" // /* MW 5 */ + 8044 "00000000" // /* MW 4 */ + 8045 "01110000" // /* MW 3 */ + 8046 "00000101" // /* MW 2 */ + 8047 "00000001" // /* MW 1 */ +.label ZLE_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_256 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8048 "11101011" // VLDA.CONV.fp32.bf16 cml4, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml3, [p2], #64;NOPX; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8049 "10000001" // /* MW 15 */ + 8050 "00100000" // /* MW 14 */ + 8051 "01111000" // /* MW 13 */ + 8052 "10100101" // /* MW 12 */ + 8053 "00000001" // /* MW 11 */ + 8054 "00000000" // /* MW 10 */ + 8055 "00000000" // /* MW 9 */ + 8056 "00000000" // /* MW 8 */ + 8057 "10100011" // /* MW 7 */ + 8058 "00011101" // /* MW 6 */ + 8059 "00100010" // /* MW 5 */ + 8060 "00000000" // /* MW 4 */ + 8061 "01110000" // /* MW 3 */ + 8062 "01000101" // /* MW 2 */ + 8063 "00100101" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 8064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8065 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8066 "01100010" // VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8067 "00111101" // /* MW 7 */ + 8068 "00101000" // /* MW 6 */ + 8069 "00000011" // /* MW 5 */ + 8070 "00000010" // /* MW 4 */ + 8071 "01100000" // /* MW 3 */ + 8072 "11000100" // /* MW 2 */ + 8073 "01000011" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8075 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8076 "01100010" // VST.CONV.bf16.fp32 cml3, [p2], #64; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8077 "00111101" // /* MW 7 */ + 8078 "00010000" // /* MW 6 */ + 8079 "00000100" // /* MW 5 */ + 8080 "00000010" // /* MW 4 */ + 8081 "01100000" // /* MW 3 */ + 8082 "10110100" // /* MW 2 */ + 8083 "01000011" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8085 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 195 20 first +.src_ref 3 "elementwise_binary_broadcasting.h" 121 4 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8086 "01011100" // VST.CONV.bf16.fp32 cml4, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 8087 "00000000" // /* MW 5 */ + 8088 "01010000" // /* MW 4 */ + 8089 "01100000" // /* MW 3 */ + 8090 "11000100" // /* MW 2 */ + 8091 "01000011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8093 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.delay_slot + 8094 "00011000" // VST.CONV.bf16.fp32 cml3, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8095 "10100011" // /* MW 3 */ + 8096 "00011101" // /* MW 2 */ + 8097 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8099 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 195 20 first +.delay_slot + 8100 "00011000" // VST.CONV.bf16.fp32 cml4, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8101 "00100011" // /* MW 3 */ + 8102 "00011110" // /* MW 2 */ + 8103 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 8105 "00000000" // /* MW 1 */ +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 82 +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 82 first +.function_start + 8112 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8113 "00000001" // /* MW 5 */ + 8114 "00000000" // /* MW 4 */ + 8115 "00000000" // /* MW 3 */ + 8116 "00010000" // /* MW 2 */ + 8117 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 90 24 + 8118 "00000010" // ST lr, [sp, #-4]; MOV r16, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8119 "01110000" // /* MW 7 */ + 8120 "01100000" // /* MW 6 */ + 8121 "00001010" // /* MW 5 */ + 8122 "00000010" // /* MW 4 */ + 8123 "10110000" // /* MW 3 */ + 8124 "10000111" // /* MW 2 */ + 8125 "11111111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 90 24 first +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8 + 8126 "00000010" // MOVS p2, p1; ADD.NC p3, r16, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8127 "00000000" // /* MW 7 */ + 8128 "00000011" // /* MW 6 */ + 8129 "10110100" // /* MW 5 */ + 8130 "00000001" // /* MW 4 */ + 8131 "01100000" // /* MW 3 */ + 8132 "10010001" // /* MW 2 */ + 8133 "01010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 19 first +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35 + 8134 "11010100" // LDA.u8 r27, [p3], #2; MOV r16, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8135 "10000001" // /* MW 5 */ + 8136 "00100001" // /* MW 4 */ + 8137 "01011000" // /* MW 3 */ + 8138 "11101101" // /* MW 2 */ + 8139 "01100101" // /* MW 1 */ + 8140 "11010100" // LDA.s16 r18, [p3], #-14; MOV r17, sp /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8141 "11000001" // /* MW 5 */ + 8142 "10101011" // /* MW 4 */ + 8143 "01011000" // /* MW 3 */ + 8144 "11001010" // /* MW 2 */ + 8145 "01110011" // /* MW 1 */ + 8146 "00011000" // ADD.NC p0, r17, #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8147 "11000000" // /* MW 3 */ + 8148 "01101000" // /* MW 2 */ + 8149 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 538 13 first +.src_ref 4 "vector_native_types.hpp" 374 137 first + 8150 "00011000" // VST sfh, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8151 "00101011" // /* MW 3 */ + 8152 "00000111" // /* MW 2 */ + 8153 "00001000" // /* MW 1 */ + 8154 "00011000" // ST.s16 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8155 "01010111" // /* MW 3 */ + 8156 "00000110" // /* MW 2 */ + 8157 "00000000" // /* MW 1 */ + 8158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8159 "00000000" // /* MW 1 */ + 8160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8161 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8 first +.no_stack_arguments + 8162 "00000100" // JL #7792 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7792 delay_slots=5 */ + 8163 "00000001" // /* MW 5 */ + 8164 "00000000" // /* MW 4 */ + 8165 "00111000" // /* MW 3 */ + 8166 "00001111" // /* MW 2 */ + 8167 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35 +.delay_slot + 8168 "11111000" // MOV r17, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8169 "11000000" // /* MW 3 */ + 8170 "01010000" // /* MW 2 */ + 8171 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8173 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35 first +.delay_slot + 8174 "00011000" // SEL.EQZ r18, r16, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8175 "00010010" // /* MW 3 */ + 8176 "00100101" // /* MW 2 */ + 8177 "00010100" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35 +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8 +.delay_slot + 8178 "11100100" // SEL.EQZ r16, r17, r16, r27; MOV p1, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8179 "01000001" // /* MW 5 */ + 8180 "11010010" // /* MW 4 */ + 8181 "01000010" // /* MW 3 */ + 8182 "00100000" // /* MW 2 */ + 8183 "10001100" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8 +.delay_slot + 8184 "00000010" // NOPS; MOV p0, r16 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8185 "01110000" // /* MW 7 */ + 8186 "00010000" // /* MW 6 */ + 8187 "00110100" // /* MW 5 */ + 8188 "00000000" // /* MW 4 */ + 8189 "01100000" // /* MW 3 */ + 8190 "00101011" // /* MW 2 */ + 8191 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4 +.return_address + 8192 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8193 "00111001" // /* MW 3 */ + 8194 "11111100" // /* MW 2 */ + 8195 "00000111" // /* MW 1 */ + 8196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8197 "00000000" // /* MW 1 */ + 8198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8199 "00000000" // /* MW 1 */ + 8200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8201 "00000000" // /* MW 1 */ + 8202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8203 "00000000" // /* MW 1 */ + 8204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8205 "00000000" // /* MW 1 */ + 8206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8207 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4 first + 8208 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 8209 "00000000" // /* MW 3 */ + 8210 "00101000" // /* MW 2 */ + 8211 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4 +.delay_slot + 8212 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8213 "00000001" // /* MW 5 */ + 8214 "00000000" // /* MW 4 */ + 8215 "00000000" // /* MW 3 */ + 8216 "11110000" // /* MW 2 */ + 8217 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8219 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8221 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8223 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 8225 "00000000" // /* MW 1 */ +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_add1d_attribute_broadcasting _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 7 "superkernels.cpp" 147 first +.src_ref 7 "superkernels.cpp" 152 6 +.function_start + 8240 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8241 "10000000" // /* MW 5 */ + 8242 "11001000" // /* MW 4 */ + 8243 "11000110" // /* MW 3 */ + 8244 "00000111" // /* MW 2 */ + 8245 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 152 6 first + 8246 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8247 "11000001" // /* MW 5 */ + 8248 "10110101" // /* MW 4 */ + 8249 "11011000" // /* MW 3 */ + 8250 "11000010" // /* MW 2 */ + 8251 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 147 + 8252 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8253 "00000001" // /* MW 5 */ + 8254 "00000000" // /* MW 4 */ + 8255 "00000000" // /* MW 3 */ + 8256 "00001000" // /* MW 2 */ + 8257 "00000000" // /* MW 1 */ + 8258 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8259 "01110000" // /* MW 7 */ + 8260 "11010000" // /* MW 6 */ + 8261 "00001011" // /* MW 5 */ + 8262 "00000000" // /* MW 4 */ + 8263 "10110000" // /* MW 3 */ + 8264 "01100011" // /* MW 2 */ + 8265 "11111111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 149 11 + 8266 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8267 "00010001" // /* MW 9 */ + 8268 "00101000" // /* MW 8 */ + 8269 "00110010" // /* MW 7 */ + 8270 "11110011" // /* MW 6 */ + 8271 "00000001" // /* MW 5 */ + 8272 "00000000" // /* MW 4 */ + 8273 "10110000" // /* MW 3 */ + 8274 "10000010" // /* MW 2 */ + 8275 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 8276 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8277 "11000000" // /* MW 3 */ + 8278 "11010100" // /* MW 2 */ + 8279 "00011011" // /* MW 1 */ + 8280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8281 "00000000" // /* MW 1 */ + 8282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8283 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 152 6 +.src_ref 7 "superkernels.cpp" 152 16 + 8284 "10000100" // JNZ r16, #8448 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8448 delay_slots=5 */ + 8285 "00000001" // /* MW 5 */ + 8286 "01000000" // /* MW 4 */ + 8287 "10000000" // /* MW 3 */ + 8288 "00010000" // /* MW 2 */ + 8289 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 149 22 first +.delay_slot + 8290 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8291 "10010000" // /* MW 3 */ + 8292 "01100010" // /* MW 2 */ + 8293 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 149 30 +.delay_slot + 8294 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8295 "11111011" // /* MW 3 */ + 8296 "01100011" // /* MW 2 */ + 8297 "00010100" // /* MW 1 */ +.delay_slot + 8298 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8299 "00111101" // /* MW 3 */ + 8300 "11110100" // /* MW 2 */ + 8301 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 149 11 +.src_ref 1 "io_buffer_main.h" 125 25 +.delay_slot + 8302 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8303 "01110000" // /* MW 7 */ + 8304 "01100000" // /* MW 6 */ + 8305 "00110000" // /* MW 5 */ + 8306 "00000011" // /* MW 4 */ + 8307 "00110000" // /* MW 3 */ + 8308 "11000110" // /* MW 2 */ + 8309 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 155 4 +.src_ref 7 "superkernels.cpp" 166 2 +.delay_slot + 8310 "01000100" // MOVXM p0, #509120 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8311 "10000000" // /* MW 5 */ + 8312 "11001001" // /* MW 4 */ + 8313 "11000000" // /* MW 3 */ + 8314 "00000111" // /* MW 2 */ + 8315 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8316 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8317 "11010000" // /* MW 5 */ + 8318 "11001000" // /* MW 4 */ + 8319 "11000100" // /* MW 3 */ + 8320 "00000111" // /* MW 2 */ + 8321 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8322 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8323 "00010000" // /* MW 9 */ + 8324 "00110010" // /* MW 8 */ + 8325 "00110010" // /* MW 7 */ + 8326 "11110001" // /* MW 6 */ + 8327 "00000001" // /* MW 5 */ + 8328 "00000000" // /* MW 4 */ + 8329 "11100000" // /* MW 3 */ + 8330 "11000000" // /* MW 2 */ + 8331 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8333 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 155 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 8334 "00000100" // JL #7728 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7728 delay_slots=5 */ + 8335 "00000001" // /* MW 5 */ + 8336 "00000000" // /* MW 4 */ + 8337 "00011000" // /* MW 3 */ + 8338 "00001111" // /* MW 2 */ + 8339 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8341 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8343 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8344 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8345 "00110001" // /* MW 3 */ + 8346 "00100000" // /* MW 2 */ + 8347 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 8348 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8349 "00000101" // /* MW 3 */ + 8350 "00100000" // /* MW 2 */ + 8351 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 8352 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8353 "00000000" // /* MW 15 */ + 8354 "00000000" // /* MW 14 */ + 8355 "01111000" // /* MW 13 */ + 8356 "10100101" // /* MW 12 */ + 8357 "00000001" // /* MW 11 */ + 8358 "00000000" // /* MW 10 */ + 8359 "00000000" // /* MW 9 */ + 8360 "10000000" // /* MW 8 */ + 8361 "00010001" // /* MW 7 */ + 8362 "00000110" // /* MW 6 */ + 8363 "00100010" // /* MW 5 */ + 8364 "00000000" // /* MW 4 */ + 8365 "11110000" // /* MW 3 */ + 8366 "00101100" // /* MW 2 */ + 8367 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 159 18 +.return_address + 8368 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8369 "10100000" // /* MW 5 */ + 8370 "11001000" // /* MW 4 */ + 8371 "11000100" // /* MW 3 */ + 8372 "00000111" // /* MW 2 */ + 8373 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 159 18 first +.src_ref 7 "superkernels.cpp" 159 65 + 8374 "10111010" // LDA r16, [p2]; MOVXM p2, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8375 "00010000" // /* MW 9 */ + 8376 "01100000" // /* MW 8 */ + 8377 "00110010" // /* MW 7 */ + 8378 "11110001" // /* MW 6 */ + 8379 "00000001" // /* MW 5 */ + 8380 "00000000" // /* MW 4 */ + 8381 "11010000" // /* MW 3 */ + 8382 "11000010" // /* MW 2 */ + 8383 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 157 51 +.src_ref 7 "superkernels.cpp" 159 65 +.src_ref 7 "superkernels.cpp" 166 2 + 8384 "10111010" // LDA r17, [p2]; MOVXM p2, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8385 "00010000" // /* MW 9 */ + 8386 "01100000" // /* MW 8 */ + 8387 "00110010" // /* MW 7 */ + 8388 "11110001" // /* MW 6 */ + 8389 "00000001" // /* MW 5 */ + 8390 "00000000" // /* MW 4 */ + 8391 "11010000" // /* MW 3 */ + 8392 "11000110" // /* MW 2 */ + 8393 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 157 51 first +.src_ref 7 "superkernels.cpp" 159 16 +.src_ref 7 "superkernels.cpp" 164 47 + 8394 "10111010" // LDA.u16 r18, [p2, #10]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8395 "00010000" // /* MW 9 */ + 8396 "00101010" // /* MW 8 */ + 8397 "10110010" // /* MW 7 */ + 8398 "11110000" // /* MW 6 */ + 8399 "00000001" // /* MW 5 */ + 8400 "00000000" // /* MW 4 */ + 8401 "01010000" // /* MW 3 */ + 8402 "11001011" // /* MW 2 */ + 8403 "01001010" // /* MW 1 */ + 8404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8405 "00000000" // /* MW 1 */ + 8406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8407 "00000000" // /* MW 1 */ + 8408 "10000100" // J #8464 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8464 delay_slots=5 */ + 8409 "00000000" // /* MW 5 */ + 8410 "00000000" // /* MW 4 */ + 8411 "10001000" // /* MW 3 */ + 8412 "00010000" // /* MW 2 */ + 8413 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 157 13 +.delay_slot + 8414 "01000100" // MOVXM p0, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8415 "11000000" // /* MW 5 */ + 8416 "11001000" // /* MW 4 */ + 8417 "11000000" // /* MW 3 */ + 8418 "00000111" // /* MW 2 */ + 8419 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8421 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 159 27 first +.delay_slot + 8422 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8423 "00001111" // /* MW 3 */ + 8424 "01100001" // /* MW 2 */ + 8425 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 157 13 first +.delay_slot + 8426 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8427 "10100011" // /* MW 5 */ + 8428 "00001100" // /* MW 4 */ + 8429 "11110000" // /* MW 3 */ + 8430 "00101100" // /* MW 2 */ + 8431 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 159 16 first +.delay_slot + 8432 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8433 "00000000" // /* MW 15 */ + 8434 "00000000" // /* MW 14 */ + 8435 "01111000" // /* MW 13 */ + 8436 "10100101" // /* MW 12 */ + 8437 "00000001" // /* MW 11 */ + 8438 "00000000" // /* MW 10 */ + 8439 "00000000" // /* MW 9 */ + 8440 "10000000" // /* MW 8 */ + 8441 "00010001" // /* MW 7 */ + 8442 "00000110" // /* MW 6 */ + 8443 "00100001" // /* MW 5 */ + 8444 "00000000" // /* MW 4 */ + 8445 "11110000" // /* MW 3 */ + 8446 "00101100" // /* MW 2 */ + 8447 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 7 "superkernels.cpp" 164 47 +.src_ref 7 "superkernels.cpp" 166 2 + 8448 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8449 "00000000" // /* MW 15 */ + 8450 "00000000" // /* MW 14 */ + 8451 "00010000" // /* MW 13 */ + 8452 "00101010" // /* MW 12 */ + 8453 "10110010" // /* MW 11 */ + 8454 "11110000" // /* MW 10 */ + 8455 "00000001" // /* MW 9 */ + 8456 "00000000" // /* MW 8 */ + 8457 "10001011" // /* MW 7 */ + 8458 "10000000" // /* MW 6 */ + 8459 "00100010" // /* MW 5 */ + 8460 "00000000" // /* MW 4 */ + 8461 "11110000" // /* MW 3 */ + 8462 "00101100" // /* MW 2 */ + 8463 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 218 49 first + 8464 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8465 "00000000" // /* MW 7 */ + 8466 "11000011" // /* MW 6 */ + 8467 "10110011" // /* MW 5 */ + 8468 "00000011" // /* MW 4 */ + 8469 "01100000" // /* MW 3 */ + 8470 "10010001" // /* MW 2 */ + 8471 "01110011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 163 2 +.src_ref 1 "io_buffer_main.h" 218 49 + 8472 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8473 "00010000" // /* MW 9 */ + 8474 "00100000" // /* MW 8 */ + 8475 "00110010" // /* MW 7 */ + 8476 "11110000" // /* MW 6 */ + 8477 "00000001" // /* MW 5 */ + 8478 "00000000" // /* MW 4 */ + 8479 "11010000" // /* MW 3 */ + 8480 "11101110" // /* MW 2 */ + 8481 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 8482 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8483 "00010110" // /* MW 3 */ + 8484 "11111110" // /* MW 2 */ + 8485 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 8486 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8487 "00110110" // /* MW 3 */ + 8488 "11111110" // /* MW 2 */ + 8489 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 8490 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8491 "01010110" // /* MW 3 */ + 8492 "01000110" // /* MW 2 */ + 8493 "00000111" // /* MW 1 */ + 8494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8495 "00000000" // /* MW 1 */ + 8496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8497 "00000000" // /* MW 1 */ + 8498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8499 "00000000" // /* MW 1 */ + 8500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8501 "00000000" // /* MW 1 */ + 8502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8503 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 8504 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8505 "00000010" // /* MW 3 */ + 8506 "01100001" // /* MW 2 */ + 8507 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 8508 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8509 "00010001" // /* MW 3 */ + 8510 "00000110" // /* MW 2 */ + 8511 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 + 8512 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8513 "11111101" // /* MW 3 */ + 8514 "11100000" // /* MW 2 */ + 8515 "00010111" // /* MW 1 */ + 8516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8517 "00000000" // /* MW 1 */ + 8518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8519 "00000000" // /* MW 1 */ + 8520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8521 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 8522 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8523 "00001000" // /* MW 3 */ + 8524 "10010011" // /* MW 2 */ + 8525 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 164 45 + 8526 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8527 "10000001" // /* MW 5 */ + 8528 "10101101" // /* MW 4 */ + 8529 "10100111" // /* MW 3 */ + 8530 "00000000" // /* MW 2 */ + 8531 "00000100" // /* MW 1 */ + 8532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8533 "00000000" // /* MW 1 */ + 8534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8535 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 163 2 first + 8536 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8537 "00110110" // /* MW 3 */ + 8538 "00000110" // /* MW 2 */ + 8539 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 1 "io_buffer_main.h" 324 51 + 8540 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8541 "10000001" // /* MW 5 */ + 8542 "11011101" // /* MW 4 */ + 8543 "11011100" // /* MW 3 */ + 8544 "11001010" // /* MW 2 */ + 8545 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 164 47 first + 8546 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8547 "01110110" // /* MW 3 */ + 8548 "00000110" // /* MW 2 */ + 8549 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 8550 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8551 "10011110" // /* MW 3 */ + 8552 "01011100" // /* MW 2 */ + 8553 "00000111" // /* MW 1 */ + 8554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8555 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 166 2 first +.no_stack_arguments + 8556 "00000100" // JL #8112 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8112 delay_slots=5 */ + 8557 "00000001" // /* MW 5 */ + 8558 "00000000" // /* MW 4 */ + 8559 "11011000" // /* MW 3 */ + 8560 "00001111" // /* MW 2 */ + 8561 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8563 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 163 2 first +.delay_slot + 8564 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8565 "00000111" // /* MW 3 */ + 8566 "01100010" // /* MW 2 */ + 8567 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 163 2 +.delay_slot + 8568 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8569 "00110001" // /* MW 3 */ + 8570 "00000110" // /* MW 2 */ + 8571 "00001000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 164 45 first +.delay_slot + 8572 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8573 "00001101" // /* MW 3 */ + 8574 "11100001" // /* MW 2 */ + 8575 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 164 45 +.delay_slot + 8576 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8577 "00000000" // /* MW 15 */ + 8578 "00000000" // /* MW 14 */ + 8579 "10101000" // /* MW 13 */ + 8580 "10100000" // /* MW 12 */ + 8581 "00110100" // /* MW 11 */ + 8582 "00000000" // /* MW 10 */ + 8583 "00000000" // /* MW 9 */ + 8584 "00000000" // /* MW 8 */ + 8585 "01011011" // /* MW 7 */ + 8586 "00000001" // /* MW 6 */ + 8587 "00100000" // /* MW 5 */ + 8588 "00000000" // /* MW 4 */ + 8589 "11110000" // /* MW 3 */ + 8590 "00101100" // /* MW 2 */ + 8591 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 6 +.src_ref 7 "superkernels.cpp" 169 14 +.src_ref 1 "io_buffer_main.h" 324 51 first +.return_address + 8592 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8593 "00010000" // /* MW 9 */ + 8594 "00100000" // /* MW 8 */ + 8595 "00110010" // /* MW 7 */ + 8596 "11110011" // /* MW 6 */ + 8597 "00000001" // /* MW 5 */ + 8598 "00000000" // /* MW 4 */ + 8599 "11010000" // /* MW 3 */ + 8600 "11000110" // /* MW 2 */ + 8601 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 + 8602 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8603 "00000101" // /* MW 3 */ + 8604 "00100000" // /* MW 2 */ + 8605 "00010000" // /* MW 1 */ + 8606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8607 "00000000" // /* MW 1 */ + 8608 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8609 "00000000" // /* MW 1 */ + 8610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8611 "00000000" // /* MW 1 */ + 8612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8613 "00000000" // /* MW 1 */ + 8614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8615 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 8616 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8617 "00001000" // /* MW 3 */ + 8618 "01010001" // /* MW 2 */ + 8619 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 19 +.src_ref 1 "io_buffer_main.h" 327 40 first + 8620 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8621 "00010000" // /* MW 9 */ + 8622 "00110000" // /* MW 8 */ + 8623 "00110010" // /* MW 7 */ + 8624 "11110001" // /* MW 6 */ + 8625 "00000001" // /* MW 5 */ + 8626 "00000000" // /* MW 4 */ + 8627 "11010000" // /* MW 3 */ + 8628 "11001110" // /* MW 2 */ + 8629 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 6 first + 8630 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8631 "00110110" // /* MW 3 */ + 8632 "00000110" // /* MW 2 */ + 8633 "00000110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 19 + 8634 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8635 "01010110" // /* MW 3 */ + 8636 "00000110" // /* MW 2 */ + 8637 "00000010" // /* MW 1 */ + 8638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8639 "00000000" // /* MW 1 */ + 8640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8641 "00000000" // /* MW 1 */ + 8642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8643 "00000000" // /* MW 1 */ + 8644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8645 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 8646 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8647 "00110001" // /* MW 3 */ + 8648 "00100001" // /* MW 2 */ + 8649 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 8650 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8651 "00010001" // /* MW 3 */ + 8652 "11100110" // /* MW 2 */ + 8653 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 16 first + 8654 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8655 "00101000" // /* MW 3 */ + 8656 "01100001" // /* MW 2 */ + 8657 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 6 + 8658 "10000100" // JNZ r16, #8688 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8688 delay_slots=5 */ + 8659 "00000001" // /* MW 5 */ + 8660 "01000000" // /* MW 4 */ + 8661 "11111000" // /* MW 3 */ + 8662 "00010000" // /* MW 2 */ + 8663 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8665 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8667 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8669 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8671 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8673 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 169 14 + 8674 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8675 "00000001" // /* MW 3 */ + 8676 "00100000" // /* MW 2 */ + 8677 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 169 14 first + 8678 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8679 "00000000" // /* MW 9 */ + 8680 "00000000" // /* MW 8 */ + 8681 "00000000" // /* MW 7 */ + 8682 "10000000" // /* MW 6 */ + 8683 "00010001" // /* MW 5 */ + 8684 "00000110" // /* MW 4 */ + 8685 "11110110" // /* MW 3 */ + 8686 "00101100" // /* MW 2 */ + 8687 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 7 "superkernels.cpp" 171 + 8688 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8689 "00111001" // /* MW 3 */ + 8690 "11110100" // /* MW 2 */ + 8691 "00000111" // /* MW 1 */ + 8692 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8693 "00011001" // /* MW 3 */ + 8694 "11111011" // /* MW 2 */ + 8695 "00000111" // /* MW 1 */ + 8696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8697 "00000000" // /* MW 1 */ + 8698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8699 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 8700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8701 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 8702 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8703 "11110001" // /* MW 3 */ + 8704 "11111101" // /* MW 2 */ + 8705 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 8706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8707 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 171 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 8708 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 8709 "00000000" // /* MW 3 */ + 8710 "00101000" // /* MW 2 */ + 8711 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8712 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8713 "10100000" // /* MW 3 */ + 8714 "01100111" // /* MW 2 */ + 8715 "00011111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 171 +.delay_slot + 8716 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8717 "00000001" // /* MW 5 */ + 8718 "00000000" // /* MW 4 */ + 8719 "00000000" // /* MW 3 */ + 8720 "11111000" // /* MW 2 */ + 8721 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8723 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8725 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 8727 "00000000" // /* MW 1 */ +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.function setup _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.src_ref 3 "elementwise_unary.h" 124 first +.src_ref 3 "elementwise_unary.h" 126 24 first +.function_start + 8736 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8737 "00101110" // /* MW 3 */ + 8738 "00011100" // /* MW 2 */ + 8739 "00000001" // /* MW 1 */ + 8740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8741 "00000000" // /* MW 1 */ + 8742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8743 "00000000" // /* MW 1 */ + 8744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8745 "00000000" // /* MW 1 */ + 8746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8747 "00000000" // /* MW 1 */ + 8748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8749 "00000000" // /* MW 1 */ + 8750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8751 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 126 22 first + 8752 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8753 "00101001" // /* MW 3 */ + 8754 "00011100" // /* MW 2 */ + 8755 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 127 24 first + 8756 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8757 "00101110" // /* MW 3 */ + 8758 "00011100" // /* MW 2 */ + 8759 "00000001" // /* MW 1 */ + 8760 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8761 "00000000" // /* MW 1 */ + 8762 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8763 "00000000" // /* MW 1 */ + 8764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8765 "00000000" // /* MW 1 */ + 8766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8767 "00000000" // /* MW 1 */ + 8768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8769 "00000000" // /* MW 1 */ + 8770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8771 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 127 22 + 8772 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8773 "00101001" // /* MW 3 */ + 8774 "00011100" // /* MW 2 */ + 8775 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 128 24 first + 8776 "10011000" // LDA el0, [p1], #24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8777 "00101110" // /* MW 3 */ + 8778 "01101100" // /* MW 2 */ + 8779 "00000001" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 113 33 first + 8780 "10011000" // LDA.s16 r0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8781 "00010010" // /* MW 3 */ + 8782 "00000100" // /* MW 2 */ + 8783 "00000001" // /* MW 1 */ + 8784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8785 "00000000" // /* MW 1 */ + 8786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8787 "00000000" // /* MW 1 */ + 8788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8789 "00000000" // /* MW 1 */ + 8790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8791 "00000000" // /* MW 1 */ + 8792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8793 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 128 22 first + 8794 "10011000" // ST el0, [p0], #24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8795 "00101001" // /* MW 3 */ + 8796 "01101100" // /* MW 2 */ + 8797 "00001000" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 113 33 first + 8798 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8799 "00010111" // /* MW 3 */ + 8800 "00000100" // /* MW 2 */ + 8801 "00000000" // /* MW 1 */ + 8802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8803 "00000000" // /* MW 1 */ + 8804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8805 "00000000" // /* MW 1 */ + 8806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8807 "00000000" // /* MW 1 */ + 8808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8809 "00000000" // /* MW 1 */ + 8810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8811 "00000000" // /* MW 1 */ + 8812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8813 "00000000" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 114 33 first + 8814 "10011000" // LDA.s16 r0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8815 "00010010" // /* MW 3 */ + 8816 "00100100" // /* MW 2 */ + 8817 "00000001" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 114 33 + 8818 "00011000" // ST.s16 r0, [p0, #2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8819 "00010111" // /* MW 3 */ + 8820 "00010100" // /* MW 2 */ + 8821 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 130 4 first + 8822 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 8823 "00000000" // /* MW 3 */ + 8824 "00101000" // /* MW 2 */ + 8825 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8827 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8829 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8831 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8833 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv__end +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_end0 + 8835 "00000000" // /* MW 1 */ +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.function run _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_unary.h" 136 first +.src_ref 3 "elementwise_unary.h" 142 37 +.src_ref 3 "elementwise_unary.h" 154 8 first +.src_ref 3 "elementwise_unary.h" 171 19 +.function_start + 8848 "10110110" // MOVA dj0, #-34; VLDB x4, [p0], #64; MOVXM ls, #8976 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8849 "00010000" // /* MW 11 */ + 8850 "10001000" // /* MW 10 */ + 8851 "01111001" // /* MW 9 */ + 8852 "00001000" // /* MW 8 */ + 8853 "00000000" // /* MW 7 */ + 8854 "00000000" // /* MW 6 */ + 8855 "01101000" // /* MW 5 */ + 8856 "00111010" // /* MW 4 */ + 8857 "10000000" // /* MW 3 */ + 8858 "11000010" // /* MW 2 */ + 8859 "11111011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_unary.h" 142 78 +.src_ref 3 "elementwise_unary.h" 154 8 first +.src_ref 3 "elementwise_unary.h" 190 19 first + 8860 "10110110" // MOVA r17, #-6; VLDB x2, [p0], #64; MOVXM le, #9024 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8861 "00010000" // /* MW 11 */ + 8862 "10100000" // /* MW 10 */ + 8863 "10111001" // /* MW 9 */ + 8864 "00001001" // /* MW 8 */ + 8865 "00000000" // /* MW 7 */ + 8866 "00000000" // /* MW 6 */ + 8867 "01101000" // /* MW 5 */ + 8868 "00111001" // /* MW 4 */ + 8869 "00000000" // /* MW 3 */ + 8870 "01010001" // /* MW 2 */ + 8871 "11111111" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 136 + 8872 "11111000" // MOV r0, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8873 "11000000" // /* MW 3 */ + 8874 "00010100" // /* MW 2 */ + 8875 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 136 first + 8876 "00011000" // ADD.NC p2, r0, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8877 "00010000" // /* MW 3 */ + 8878 "01100000" // /* MW 2 */ + 8879 "00011010" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 103 16 first + 8880 "10011000" // LDA.s16 r2, [p2], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8881 "01010010" // /* MW 3 */ + 8882 "00011100" // /* MW 2 */ + 8883 "00000010" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 142 37 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8884 "10011000" // LDA r0, [p2, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8885 "00010110" // /* MW 3 */ + 8886 "00000000" // /* MW 2 */ + 8887 "00000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_unary.h" 171 19 first +.src_ref 8 "clip_impl.h" 104 16 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8888 "00111100" // LDA.s16 r1, [p2]; VLDB x4, [p0], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8889 "01101000" // /* MW 5 */ + 8890 "00111010" // /* MW 4 */ + 8891 "01010000" // /* MW 3 */ + 8892 "10000110" // /* MW 2 */ + 8893 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8895 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8897 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8899 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_unary.h" 190 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8900 "00011000" // VLDB x2, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8901 "10110100" // /* MW 3 */ + 8902 "00011100" // /* MW 2 */ + 8903 "00111000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8904 "11111000" // VBCST.16 x0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8905 "01110010" // /* MW 3 */ + 8906 "00001001" // /* MW 2 */ + 8907 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 3 "elementwise_unary.h" 142 78 first +.src_ref 3 "elementwise_unary.h" 171 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8908 "00111010" // VLDB x4, [p0], #64; LSHL r17, r0, r17; VMAX_LT.bf16 x5, r16, x4, x0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8909 "01111000" // /* MW 9 */ + 8910 "00110110" // /* MW 8 */ + 8911 "01010000" // /* MW 7 */ + 8912 "11101101" // /* MW 6 */ + 8913 "00011000" // /* MW 5 */ + 8914 "00000001" // /* MW 4 */ + 8915 "01101000" // /* MW 3 */ + 8916 "00111010" // /* MW 2 */ + 8917 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 154 8 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8918 "10011000" // ADD.NC lc, r17, #-3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8919 "11111110" // /* MW 3 */ + 8920 "01111000" // /* MW 2 */ + 8921 "00011101" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8922 "11111000" // VBCST.16 x1, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8923 "01110010" // /* MW 3 */ + 8924 "10000101" // /* MW 2 */ + 8925 "00011000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8926 "11111000" // VMIN_GE.bf16 x3, r16, x5, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8927 "10101100" // /* MW 3 */ + 8928 "10101000" // /* MW 2 */ + 8929 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 3 "elementwise_unary.h" 190 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8930 "01111110" // NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 8931 "01100000" // /* MW 13 */ + 8932 "00101011" // /* MW 12 */ + 8933 "00000000" // /* MW 11 */ + 8934 "11001111" // /* MW 10 */ + 8935 "00000110" // /* MW 9 */ + 8936 "00110001" // /* MW 8 */ + 8937 "00000000" // /* MW 7 */ + 8938 "00000000" // /* MW 6 */ + 8939 "01101000" // /* MW 5 */ + 8940 "00111001" // /* MW 4 */ + 8941 "11110000" // /* MW 3 */ + 8942 "00101100" // /* MW 2 */ + 8943 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8944 "11100001" // NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8945 "00000000" // /* MW 15 */ + 8946 "00000000" // /* MW 14 */ + 8947 "01111000" // /* MW 13 */ + 8948 "01010110" // /* MW 12 */ + 8949 "11011000" // /* MW 11 */ + 8950 "00000001" // /* MW 10 */ + 8951 "00000000" // /* MW 9 */ + 8952 "00000000" // /* MW 8 */ + 8953 "11010011" // /* MW 7 */ + 8954 "00011100" // /* MW 6 */ + 8955 "00100001" // /* MW 5 */ + 8956 "00000000" // /* MW 4 */ + 8957 "11110000" // /* MW 3 */ + 8958 "00101100" // /* MW 2 */ + 8959 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8960 "11100001" // NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8961 "00000000" // /* MW 15 */ + 8962 "00000000" // /* MW 14 */ + 8963 "01111000" // /* MW 13 */ + 8964 "00110110" // /* MW 12 */ + 8965 "01010000" // /* MW 11 */ + 8966 "00000001" // /* MW 10 */ + 8967 "00000000" // /* MW 9 */ + 8968 "00000000" // /* MW 8 */ + 8969 "01011011" // /* MW 7 */ + 8970 "00000001" // /* MW 6 */ + 8971 "00100000" // /* MW 5 */ + 8972 "00000000" // /* MW 4 */ + 8973 "11110000" // /* MW 3 */ + 8974 "00101100" // /* MW 2 */ + 8975 "00000000" // /* MW 1 */ +.label ZLS_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_128 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 171 19 first +.src_ref 3 "elementwise_unary.h" 176 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 8976 "11100001" // NOPA; VLDB x4, [p0], #64; VST x7, [p1], #64; NOPX; VMIN_GE.bf16 x3, r16, x5, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8977 "00000000" // /* MW 15 */ + 8978 "00000000" // /* MW 14 */ + 8979 "01111000" // /* MW 13 */ + 8980 "01010110" // /* MW 12 */ + 8981 "11010100" // /* MW 11 */ + 8982 "00000000" // /* MW 10 */ + 8983 "00000000" // /* MW 9 */ + 8984 "00000000" // /* MW 8 */ + 8985 "11010011" // /* MW 7 */ + 8986 "00011101" // /* MW 6 */ + 8987 "01101001" // /* MW 5 */ + 8988 "00111010" // /* MW 4 */ + 8989 "11110000" // /* MW 3 */ + 8990 "00101100" // /* MW 2 */ + 8991 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 3 "elementwise_unary.h" 190 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8992 "11100001" // NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8993 "00000000" // /* MW 15 */ + 8994 "00000000" // /* MW 14 */ + 8995 "01111000" // /* MW 13 */ + 8996 "00110110" // /* MW 12 */ + 8997 "10001000" // /* MW 11 */ + 8998 "00000001" // /* MW 10 */ + 8999 "00000000" // /* MW 9 */ + 9000 "00000000" // /* MW 8 */ + 9001 "01011011" // /* MW 7 */ + 9002 "00000001" // /* MW 6 */ + 9003 "01101000" // /* MW 5 */ + 9004 "00111001" // /* MW 4 */ + 9005 "11110000" // /* MW 3 */ + 9006 "00101100" // /* MW 2 */ + 9007 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9008 "11100001" // NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9009 "00000000" // /* MW 15 */ + 9010 "00000000" // /* MW 14 */ + 9011 "01111000" // /* MW 13 */ + 9012 "01010110" // /* MW 12 */ + 9013 "11011000" // /* MW 11 */ + 9014 "00000001" // /* MW 10 */ + 9015 "00000000" // /* MW 9 */ + 9016 "00000000" // /* MW 8 */ + 9017 "11010011" // /* MW 7 */ + 9018 "00011100" // /* MW 6 */ + 9019 "00100001" // /* MW 5 */ + 9020 "00000000" // /* MW 4 */ + 9021 "11110000" // /* MW 3 */ + 9022 "00101100" // /* MW 2 */ + 9023 "00000000" // /* MW 1 */ +.label ZLE_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_176 +.src_ref 4 "max_min.hpp" 20 104 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9024 "11100001" // NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9025 "00000000" // /* MW 15 */ + 9026 "00000000" // /* MW 14 */ + 9027 "01111000" // /* MW 13 */ + 9028 "00110110" // /* MW 12 */ + 9029 "01010000" // /* MW 11 */ + 9030 "00000001" // /* MW 10 */ + 9031 "00000000" // /* MW 9 */ + 9032 "00000000" // /* MW 8 */ + 9033 "01011011" // /* MW 7 */ + 9034 "00000001" // /* MW 6 */ + 9035 "00100000" // /* MW 5 */ + 9036 "00000000" // /* MW 4 */ + 9037 "11110000" // /* MW 3 */ + 9038 "00101100" // /* MW 2 */ + 9039 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 176 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 9040 "00000010" // VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9041 "01110000" // /* MW 7 */ + 9042 "01010110" // /* MW 6 */ + 9043 "11010100" // /* MW 5 */ + 9044 "00000000" // /* MW 4 */ + 9045 "01100000" // /* MW 3 */ + 9046 "10111010" // /* MW 2 */ + 9047 "00100011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9048 "11111000" // VMAX_LT.bf16 x6, r16, x2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9049 "01101100" // /* MW 3 */ + 9050 "00010000" // /* MW 2 */ + 9051 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 195 20 first + 9052 "00000010" // VST x3, [p1], #64; VMIN_GE.bf16 x7, r16, x6, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9053 "01110000" // /* MW 7 */ + 9054 "01010110" // /* MW 6 */ + 9055 "11011000" // /* MW 5 */ + 9056 "00000001" // /* MW 4 */ + 9057 "01100000" // /* MW 3 */ + 9058 "10011010" // /* MW 2 */ + 9059 "00100011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 3 "elementwise_unary.h" 158 4 first + 9060 "11100100" // RET lr; VMAX_LT.bf16 x5, r16, x4, x0 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 9061 "11011001" // /* MW 5 */ + 9062 "01000000" // /* MW 4 */ + 9063 "00000101" // /* MW 3 */ + 9064 "00000000" // /* MW 2 */ + 9065 "00000101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 176 20 first +.delay_slot + 9066 "00000010" // VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9067 "01110000" // /* MW 7 */ + 9068 "01010110" // /* MW 6 */ + 9069 "11010100" // /* MW 5 */ + 9070 "00000000" // /* MW 4 */ + 9071 "01100000" // /* MW 3 */ + 9072 "10111010" // /* MW 2 */ + 9073 "00100011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 9074 "11111000" // VMAX_LT.bf16 x6, r16, x2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9075 "01101100" // /* MW 3 */ + 9076 "00010000" // /* MW 2 */ + 9077 "00011011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.delay_slot + 9078 "11111000" // VMIN_GE.bf16 x7, r16, x6, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9079 "10101100" // /* MW 3 */ + 9080 "10110000" // /* MW 2 */ + 9081 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "elementwise_unary.h" 195 20 first +.delay_slot + 9082 "00011000" // VST x3, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9083 "11010011" // /* MW 3 */ + 9084 "00011100" // /* MW 2 */ + 9085 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "elementwise_unary.h" 176 20 first +.delay_slot + 9086 "00011000" // VST x7, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9087 "11010011" // /* MW 3 */ + 9088 "00011101" // /* MW 2 */ +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E__end +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_end0 + 9089 "00001001" // /* MW 1 */ +.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_clip1d _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 7 "superkernels.cpp" 176 first +.src_ref 7 "superkernels.cpp" 181 6 +.function_start + 9104 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9105 "10000000" // /* MW 5 */ + 9106 "11001000" // /* MW 4 */ + 9107 "11000110" // /* MW 3 */ + 9108 "00000111" // /* MW 2 */ + 9109 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 181 6 first + 9110 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9111 "11000001" // /* MW 5 */ + 9112 "10110101" // /* MW 4 */ + 9113 "11011000" // /* MW 3 */ + 9114 "11000010" // /* MW 2 */ + 9115 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 176 + 9116 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9117 "00000001" // /* MW 5 */ + 9118 "00000000" // /* MW 4 */ + 9119 "00000000" // /* MW 3 */ + 9120 "00001000" // /* MW 2 */ + 9121 "00000000" // /* MW 1 */ + 9122 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9123 "01110000" // /* MW 7 */ + 9124 "11010000" // /* MW 6 */ + 9125 "00001011" // /* MW 5 */ + 9126 "00000000" // /* MW 4 */ + 9127 "10110000" // /* MW 3 */ + 9128 "01100011" // /* MW 2 */ + 9129 "11111111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 178 11 + 9130 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9131 "00010001" // /* MW 9 */ + 9132 "00101000" // /* MW 8 */ + 9133 "00110010" // /* MW 7 */ + 9134 "11110011" // /* MW 6 */ + 9135 "00000001" // /* MW 5 */ + 9136 "00000000" // /* MW 4 */ + 9137 "10110000" // /* MW 3 */ + 9138 "10000010" // /* MW 2 */ + 9139 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 9140 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9141 "11000000" // /* MW 3 */ + 9142 "11010100" // /* MW 2 */ + 9143 "00011011" // /* MW 1 */ + 9144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9145 "00000000" // /* MW 1 */ + 9146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9147 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 181 6 +.src_ref 7 "superkernels.cpp" 181 16 + 9148 "10000100" // JNZ r16, #9312 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9312 delay_slots=5 */ + 9149 "00000001" // /* MW 5 */ + 9150 "01000000" // /* MW 4 */ + 9151 "00110000" // /* MW 3 */ + 9152 "00010010" // /* MW 2 */ + 9153 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 178 22 first +.delay_slot + 9154 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9155 "10010000" // /* MW 3 */ + 9156 "01100010" // /* MW 2 */ + 9157 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 178 30 +.delay_slot + 9158 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9159 "11111011" // /* MW 3 */ + 9160 "01100011" // /* MW 2 */ + 9161 "00010100" // /* MW 1 */ +.delay_slot + 9162 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9163 "00111101" // /* MW 3 */ + 9164 "11110100" // /* MW 2 */ + 9165 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 178 11 +.src_ref 1 "io_buffer_main.h" 125 25 +.delay_slot + 9166 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9167 "01110000" // /* MW 7 */ + 9168 "01100000" // /* MW 6 */ + 9169 "00110000" // /* MW 5 */ + 9170 "00000011" // /* MW 4 */ + 9171 "00110000" // /* MW 3 */ + 9172 "11000110" // /* MW 2 */ + 9173 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 184 4 +.src_ref 7 "superkernels.cpp" 195 2 +.delay_slot + 9174 "01000100" // MOVXM p0, #509376 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9175 "10000000" // /* MW 5 */ + 9176 "11001011" // /* MW 4 */ + 9177 "11000000" // /* MW 3 */ + 9178 "00000111" // /* MW 2 */ + 9179 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9180 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9181 "11010000" // /* MW 5 */ + 9182 "11001000" // /* MW 4 */ + 9183 "11000100" // /* MW 3 */ + 9184 "00000111" // /* MW 2 */ + 9185 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9186 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9187 "00010000" // /* MW 9 */ + 9188 "00110010" // /* MW 8 */ + 9189 "00110010" // /* MW 7 */ + 9190 "11110001" // /* MW 6 */ + 9191 "00000001" // /* MW 5 */ + 9192 "00000000" // /* MW 4 */ + 9193 "11100000" // /* MW 3 */ + 9194 "11000000" // /* MW 2 */ + 9195 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9197 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 184 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 9198 "00000100" // JL #8736 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8736 delay_slots=5 */ + 9199 "00000001" // /* MW 5 */ + 9200 "00000000" // /* MW 4 */ + 9201 "00010000" // /* MW 3 */ + 9202 "00010001" // /* MW 2 */ + 9203 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9205 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9207 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9208 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9209 "00110001" // /* MW 3 */ + 9210 "00100000" // /* MW 2 */ + 9211 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 9212 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9213 "00000101" // /* MW 3 */ + 9214 "00100000" // /* MW 2 */ + 9215 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 9216 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9217 "00000000" // /* MW 15 */ + 9218 "00000000" // /* MW 14 */ + 9219 "01111000" // /* MW 13 */ + 9220 "10100101" // /* MW 12 */ + 9221 "00000001" // /* MW 11 */ + 9222 "00000000" // /* MW 10 */ + 9223 "00000000" // /* MW 9 */ + 9224 "10000000" // /* MW 8 */ + 9225 "00010001" // /* MW 7 */ + 9226 "00000110" // /* MW 6 */ + 9227 "00100010" // /* MW 5 */ + 9228 "00000000" // /* MW 4 */ + 9229 "11110000" // /* MW 3 */ + 9230 "00101100" // /* MW 2 */ + 9231 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 188 18 +.return_address + 9232 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9233 "10100000" // /* MW 5 */ + 9234 "11001000" // /* MW 4 */ + 9235 "11000100" // /* MW 3 */ + 9236 "00000111" // /* MW 2 */ + 9237 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 188 18 first +.src_ref 7 "superkernels.cpp" 188 43 + 9238 "10111010" // LDA r16, [p2]; MOVXM p2, #509376 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9239 "00010000" // /* MW 9 */ + 9240 "11100000" // /* MW 8 */ + 9241 "00110010" // /* MW 7 */ + 9242 "11110001" // /* MW 6 */ + 9243 "00000001" // /* MW 5 */ + 9244 "00000000" // /* MW 4 */ + 9245 "11010000" // /* MW 3 */ + 9246 "11000010" // /* MW 2 */ + 9247 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 186 29 +.src_ref 7 "superkernels.cpp" 188 43 +.src_ref 7 "superkernels.cpp" 195 2 + 9248 "10111010" // LDA r17, [p2]; MOVXM p2, #509376 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9249 "00010000" // /* MW 9 */ + 9250 "11100000" // /* MW 8 */ + 9251 "00110010" // /* MW 7 */ + 9252 "11110001" // /* MW 6 */ + 9253 "00000001" // /* MW 5 */ + 9254 "00000000" // /* MW 4 */ + 9255 "11010000" // /* MW 3 */ + 9256 "11000110" // /* MW 2 */ + 9257 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 186 29 first +.src_ref 7 "superkernels.cpp" 188 16 +.src_ref 7 "superkernels.cpp" 193 47 + 9258 "10111010" // LDA.u16 r18, [p2, #8]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9259 "00010000" // /* MW 9 */ + 9260 "00101010" // /* MW 8 */ + 9261 "10110010" // /* MW 7 */ + 9262 "11110000" // /* MW 6 */ + 9263 "00000001" // /* MW 5 */ + 9264 "00000000" // /* MW 4 */ + 9265 "01010000" // /* MW 3 */ + 9266 "11001011" // /* MW 2 */ + 9267 "01001000" // /* MW 1 */ + 9268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9269 "00000000" // /* MW 1 */ + 9270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9271 "00000000" // /* MW 1 */ + 9272 "10000100" // J #9328 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9328 delay_slots=5 */ + 9273 "00000000" // /* MW 5 */ + 9274 "00000000" // /* MW 4 */ + 9275 "00111000" // /* MW 3 */ + 9276 "00010010" // /* MW 2 */ + 9277 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 186 13 +.delay_slot + 9278 "01000100" // MOVXM p0, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9279 "11000000" // /* MW 5 */ + 9280 "11001000" // /* MW 4 */ + 9281 "11000000" // /* MW 3 */ + 9282 "00000111" // /* MW 2 */ + 9283 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9285 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 188 27 first +.delay_slot + 9286 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9287 "00001111" // /* MW 3 */ + 9288 "01100001" // /* MW 2 */ + 9289 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 186 13 first +.delay_slot + 9290 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9291 "10100011" // /* MW 5 */ + 9292 "00001100" // /* MW 4 */ + 9293 "11110000" // /* MW 3 */ + 9294 "00101100" // /* MW 2 */ + 9295 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 188 16 first +.delay_slot + 9296 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9297 "00000000" // /* MW 15 */ + 9298 "00000000" // /* MW 14 */ + 9299 "01111000" // /* MW 13 */ + 9300 "10100101" // /* MW 12 */ + 9301 "00000001" // /* MW 11 */ + 9302 "00000000" // /* MW 10 */ + 9303 "00000000" // /* MW 9 */ + 9304 "10000000" // /* MW 8 */ + 9305 "00010001" // /* MW 7 */ + 9306 "00000110" // /* MW 6 */ + 9307 "00100001" // /* MW 5 */ + 9308 "00000000" // /* MW 4 */ + 9309 "11110000" // /* MW 3 */ + 9310 "00101100" // /* MW 2 */ + 9311 "00000000" // /* MW 1 */ +.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 7 "superkernels.cpp" 193 47 +.src_ref 7 "superkernels.cpp" 195 2 + 9312 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9313 "00000000" // /* MW 15 */ + 9314 "00000000" // /* MW 14 */ + 9315 "00010000" // /* MW 13 */ + 9316 "00101010" // /* MW 12 */ + 9317 "10110010" // /* MW 11 */ + 9318 "11110000" // /* MW 10 */ + 9319 "00000001" // /* MW 9 */ + 9320 "00000000" // /* MW 8 */ + 9321 "10001011" // /* MW 7 */ + 9322 "10000000" // /* MW 6 */ + 9323 "00100010" // /* MW 5 */ + 9324 "00000000" // /* MW 4 */ + 9325 "11110000" // /* MW 3 */ + 9326 "00101100" // /* MW 2 */ + 9327 "00000000" // /* MW 1 */ +.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 218 49 first + 9328 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9329 "00000000" // /* MW 7 */ + 9330 "11000011" // /* MW 6 */ + 9331 "10110011" // /* MW 5 */ + 9332 "00000011" // /* MW 4 */ + 9333 "01100000" // /* MW 3 */ + 9334 "10010001" // /* MW 2 */ + 9335 "01110011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 192 2 +.src_ref 1 "io_buffer_main.h" 218 49 + 9336 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9337 "00010000" // /* MW 9 */ + 9338 "00100000" // /* MW 8 */ + 9339 "00110010" // /* MW 7 */ + 9340 "11110000" // /* MW 6 */ + 9341 "00000001" // /* MW 5 */ + 9342 "00000000" // /* MW 4 */ + 9343 "11010000" // /* MW 3 */ + 9344 "11101110" // /* MW 2 */ + 9345 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 9346 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9347 "00010110" // /* MW 3 */ + 9348 "11111110" // /* MW 2 */ + 9349 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 9350 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9351 "00110110" // /* MW 3 */ + 9352 "11111110" // /* MW 2 */ + 9353 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 9354 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9355 "01010110" // /* MW 3 */ + 9356 "01000110" // /* MW 2 */ + 9357 "00000111" // /* MW 1 */ + 9358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9359 "00000000" // /* MW 1 */ + 9360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9361 "00000000" // /* MW 1 */ + 9362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9363 "00000000" // /* MW 1 */ + 9364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9365 "00000000" // /* MW 1 */ + 9366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9367 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 9368 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9369 "00000010" // /* MW 3 */ + 9370 "01100001" // /* MW 2 */ + 9371 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 9372 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9373 "00010001" // /* MW 3 */ + 9374 "00000110" // /* MW 2 */ + 9375 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 + 9376 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9377 "11111101" // /* MW 3 */ + 9378 "11100000" // /* MW 2 */ + 9379 "00010111" // /* MW 1 */ + 9380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9381 "00000000" // /* MW 1 */ + 9382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9383 "00000000" // /* MW 1 */ + 9384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9385 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 9386 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9387 "00001000" // /* MW 3 */ + 9388 "10010011" // /* MW 2 */ + 9389 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 193 45 + 9390 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9391 "10000001" // /* MW 5 */ + 9392 "10101101" // /* MW 4 */ + 9393 "10100111" // /* MW 3 */ + 9394 "00000000" // /* MW 2 */ + 9395 "00000100" // /* MW 1 */ + 9396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9397 "00000000" // /* MW 1 */ + 9398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9399 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 192 2 first + 9400 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9401 "00110110" // /* MW 3 */ + 9402 "00000110" // /* MW 2 */ + 9403 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 1 "io_buffer_main.h" 324 51 + 9404 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9405 "10000001" // /* MW 5 */ + 9406 "11011101" // /* MW 4 */ + 9407 "11011100" // /* MW 3 */ + 9408 "11001010" // /* MW 2 */ + 9409 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 193 47 first + 9410 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9411 "01110110" // /* MW 3 */ + 9412 "00000110" // /* MW 2 */ + 9413 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 9414 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9415 "10011110" // /* MW 3 */ + 9416 "01011100" // /* MW 2 */ + 9417 "00000111" // /* MW 1 */ + 9418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9419 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 195 2 first +.no_stack_arguments + 9420 "00000100" // JL #8848 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8848 delay_slots=5 */ + 9421 "00000001" // /* MW 5 */ + 9422 "00000000" // /* MW 4 */ + 9423 "01001000" // /* MW 3 */ + 9424 "00010001" // /* MW 2 */ + 9425 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9427 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 192 2 first +.delay_slot + 9428 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9429 "00000111" // /* MW 3 */ + 9430 "01100010" // /* MW 2 */ + 9431 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 192 2 +.delay_slot + 9432 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9433 "00110001" // /* MW 3 */ + 9434 "00000110" // /* MW 2 */ + 9435 "00001000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 193 45 first +.delay_slot + 9436 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9437 "00001101" // /* MW 3 */ + 9438 "11100001" // /* MW 2 */ + 9439 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 193 45 +.delay_slot + 9440 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9441 "00000000" // /* MW 15 */ + 9442 "00000000" // /* MW 14 */ + 9443 "10101000" // /* MW 13 */ + 9444 "10100000" // /* MW 12 */ + 9445 "00110100" // /* MW 11 */ + 9446 "00000000" // /* MW 10 */ + 9447 "00000000" // /* MW 9 */ + 9448 "00000000" // /* MW 8 */ + 9449 "01011011" // /* MW 7 */ + 9450 "00000001" // /* MW 6 */ + 9451 "00100000" // /* MW 5 */ + 9452 "00000000" // /* MW 4 */ + 9453 "11110000" // /* MW 3 */ + 9454 "00101100" // /* MW 2 */ + 9455 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 6 +.src_ref 7 "superkernels.cpp" 198 14 +.src_ref 1 "io_buffer_main.h" 324 51 first +.return_address + 9456 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9457 "00010000" // /* MW 9 */ + 9458 "00100000" // /* MW 8 */ + 9459 "00110010" // /* MW 7 */ + 9460 "11110011" // /* MW 6 */ + 9461 "00000001" // /* MW 5 */ + 9462 "00000000" // /* MW 4 */ + 9463 "11010000" // /* MW 3 */ + 9464 "11000110" // /* MW 2 */ + 9465 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 + 9466 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9467 "00000101" // /* MW 3 */ + 9468 "00100000" // /* MW 2 */ + 9469 "00010000" // /* MW 1 */ + 9470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9471 "00000000" // /* MW 1 */ + 9472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9473 "00000000" // /* MW 1 */ + 9474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9475 "00000000" // /* MW 1 */ + 9476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9477 "00000000" // /* MW 1 */ + 9478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9479 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 9480 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9481 "00001000" // /* MW 3 */ + 9482 "01010001" // /* MW 2 */ + 9483 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 19 +.src_ref 1 "io_buffer_main.h" 327 40 first + 9484 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9485 "00010000" // /* MW 9 */ + 9486 "00110000" // /* MW 8 */ + 9487 "00110010" // /* MW 7 */ + 9488 "11110001" // /* MW 6 */ + 9489 "00000001" // /* MW 5 */ + 9490 "00000000" // /* MW 4 */ + 9491 "11010000" // /* MW 3 */ + 9492 "11001110" // /* MW 2 */ + 9493 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 6 first + 9494 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9495 "00110110" // /* MW 3 */ + 9496 "00000110" // /* MW 2 */ + 9497 "00000110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 19 + 9498 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9499 "01010110" // /* MW 3 */ + 9500 "00000110" // /* MW 2 */ + 9501 "00000010" // /* MW 1 */ + 9502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9503 "00000000" // /* MW 1 */ + 9504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9505 "00000000" // /* MW 1 */ + 9506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9507 "00000000" // /* MW 1 */ + 9508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9509 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 9510 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9511 "00110001" // /* MW 3 */ + 9512 "00100001" // /* MW 2 */ + 9513 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 9514 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9515 "00010001" // /* MW 3 */ + 9516 "11100110" // /* MW 2 */ + 9517 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 16 first + 9518 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9519 "00101000" // /* MW 3 */ + 9520 "01100001" // /* MW 2 */ + 9521 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 6 + 9522 "10000100" // JNZ r16, #9552 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9552 delay_slots=5 */ + 9523 "00000001" // /* MW 5 */ + 9524 "01000000" // /* MW 4 */ + 9525 "10101000" // /* MW 3 */ + 9526 "00010010" // /* MW 2 */ + 9527 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9529 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9531 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9533 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9535 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9536 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9537 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 198 14 + 9538 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9539 "00000001" // /* MW 3 */ + 9540 "00100000" // /* MW 2 */ + 9541 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 198 14 first + 9542 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9543 "00000000" // /* MW 9 */ + 9544 "00000000" // /* MW 8 */ + 9545 "00000000" // /* MW 7 */ + 9546 "10000000" // /* MW 6 */ + 9547 "00010001" // /* MW 5 */ + 9548 "00000110" // /* MW 4 */ + 9549 "11110110" // /* MW 3 */ + 9550 "00101100" // /* MW 2 */ + 9551 "00000000" // /* MW 1 */ +.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 7 "superkernels.cpp" 200 + 9552 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9553 "00111001" // /* MW 3 */ + 9554 "11110100" // /* MW 2 */ + 9555 "00000111" // /* MW 1 */ + 9556 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9557 "00011001" // /* MW 3 */ + 9558 "11111011" // /* MW 2 */ + 9559 "00000111" // /* MW 1 */ + 9560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9561 "00000000" // /* MW 1 */ + 9562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9563 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 9564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9565 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 9566 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9567 "11110001" // /* MW 3 */ + 9568 "11111101" // /* MW 2 */ + 9569 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9571 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 200 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9572 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9573 "00000000" // /* MW 3 */ + 9574 "00101000" // /* MW 2 */ + 9575 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9576 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9577 "10100000" // /* MW 3 */ + 9578 "01100111" // /* MW 2 */ + 9579 "00011111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 200 +.delay_slot + 9580 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9581 "00000001" // /* MW 5 */ + 9582 "00000000" // /* MW 4 */ + 9583 "00000000" // /* MW 3 */ + 9584 "11111000" // /* MW 2 */ + 9585 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9587 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9589 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 9591 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv +.function shared_setup_backbone _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv +.src_ref 3 "elementwise_binary_shared.h" 205 first +.src_ref 3 "elementwise_binary_shared.h" 211 24 first +.src_ref 3 "elementwise_binary_shared.h" 216 36 +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.function_start + 9600 "10111010" // LDA el0, [p1], #4; MOVX r2, #256; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9601 "01011000" // /* MW 9 */ + 9602 "00000000" // /* MW 8 */ + 9603 "00001000" // /* MW 7 */ + 9604 "00001011" // /* MW 6 */ + 9605 "00100000" // /* MW 5 */ + 9606 "00001000" // /* MW 4 */ + 9607 "11010000" // /* MW 3 */ + 9608 "10000101" // /* MW 2 */ + 9609 "00100011" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 36 + 9610 "00011000" // MOVX r0, #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9611 "00000001" // /* MW 3 */ + 9612 "10000000" // /* MW 2 */ + 9613 "00010111" // /* MW 1 */ + 9614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9615 "00000000" // /* MW 1 */ + 9616 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9617 "00000000" // /* MW 1 */ + 9618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9619 "00000000" // /* MW 1 */ + 9620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9621 "00000000" // /* MW 1 */ + 9622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9623 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 211 22 first + 9624 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9625 "00101001" // /* MW 3 */ + 9626 "00011100" // /* MW 2 */ + 9627 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 212 24 first + 9628 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9629 "00101110" // /* MW 3 */ + 9630 "00011100" // /* MW 2 */ + 9631 "00000001" // /* MW 1 */ + 9632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9633 "00000000" // /* MW 1 */ + 9634 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9635 "00000000" // /* MW 1 */ + 9636 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9637 "00000000" // /* MW 1 */ + 9638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9639 "00000000" // /* MW 1 */ + 9640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9641 "00000000" // /* MW 1 */ + 9642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9643 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 212 22 + 9644 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9645 "00101001" // /* MW 3 */ + 9646 "00011100" // /* MW 2 */ + 9647 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 213 24 first + 9648 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9649 "00101110" // /* MW 3 */ + 9650 "00000100" // /* MW 2 */ + 9651 "00000001" // /* MW 1 */ + 9652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9653 "00000000" // /* MW 1 */ + 9654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9655 "00000000" // /* MW 1 */ + 9656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9657 "00000000" // /* MW 1 */ + 9658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9659 "00000000" // /* MW 1 */ + 9660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9661 "00000000" // /* MW 1 */ + 9662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9663 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 213 22 + 9664 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9665 "00101001" // /* MW 3 */ + 9666 "00011100" // /* MW 2 */ + 9667 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 214 24 first + 9668 "10011000" // LDA r3, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9669 "01110110" // /* MW 3 */ + 9670 "00010100" // /* MW 2 */ + 9671 "00000001" // /* MW 1 */ + 9672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9673 "00000000" // /* MW 1 */ + 9674 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9675 "00000000" // /* MW 1 */ + 9676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9677 "00000000" // /* MW 1 */ + 9678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9679 "00000000" // /* MW 1 */ + 9680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9681 "00000000" // /* MW 1 */ + 9682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9683 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 214 22 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9684 "10011000" // ST r3, [p0], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9685 "01110001" // /* MW 3 */ + 9686 "01001100" // /* MW 2 */ + 9687 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 34 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9688 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9689 "00010111" // /* MW 3 */ + 9690 "00000100" // /* MW 2 */ + 9691 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 217 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9692 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9693 "00000000" // /* MW 3 */ + 9694 "00101000" // /* MW 2 */ + 9695 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9696 "01000100" // MOVXM r1, #65280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9697 "00000000" // /* MW 5 */ + 9698 "10111110" // /* MW 4 */ + 9699 "11110000" // /* MW 3 */ + 9700 "00000000" // /* MW 2 */ + 9701 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9702 "10011000" // AND r1, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9703 "00010100" // /* MW 3 */ + 9704 "11000010" // /* MW 2 */ + 9705 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9706 "10011000" // EQ r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9707 "00100111" // /* MW 3 */ + 9708 "01110110" // /* MW 2 */ + 9709 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 36 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9710 "00011000" // SEL.EQZ r0, r0, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9711 "10000010" // /* MW 3 */ + 9712 "00000001" // /* MW 2 */ + 9713 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_end0 + 9715 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv +.function setup _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv +.src_ref 3 "elementwise_binary_shared.h" 219 +.src_ref 3 "elementwise_binary_shared.h" 219 first +.function_start + 9728 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9729 "00000001" // /* MW 5 */ + 9730 "00000000" // /* MW 4 */ + 9731 "00000000" // /* MW 3 */ + 9732 "00001000" // /* MW 2 */ + 9733 "00000000" // /* MW 1 */ + 9734 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9735 "00111101" // /* MW 3 */ + 9736 "11111000" // /* MW 2 */ + 9737 "00001111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 220 8 first +.no_stack_arguments + 9738 "00000100" // JL #9600 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9600 delay_slots=5 */ + 9739 "00000001" // /* MW 5 */ + 9740 "00000000" // /* MW 4 */ + 9741 "11000000" // /* MW 3 */ + 9742 "00010010" // /* MW 2 */ + 9743 "00000000" // /* MW 1 */ +.delay_slot + 9744 "10011000" // ST p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9745 "10011101" // /* MW 3 */ + 9746 "11111111" // /* MW 2 */ + 9747 "00001111" // /* MW 1 */ +.src_ref 8 "mul_impl.h" 193 25 +.delay_slot + 9748 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9749 "11000000" // /* MW 3 */ + 9750 "01100000" // /* MW 2 */ + 9751 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9753 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9755 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9756 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9757 "01100111" // /* MW 3 */ + 9758 "00000001" // /* MW 2 */ + 9759 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 222 4 +.return_address + 9760 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9761 "00111001" // /* MW 3 */ + 9762 "11111000" // /* MW 2 */ + 9763 "00000111" // /* MW 1 */ + 9764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9765 "00000000" // /* MW 1 */ + 9766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9767 "00000000" // /* MW 1 */ + 9768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9769 "00000000" // /* MW 1 */ + 9770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9771 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9773 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9774 "00011000" // LDA p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9775 "10011001" // /* MW 3 */ + 9776 "11111111" // /* MW 2 */ + 9777 "00000111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 222 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9778 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9779 "00000000" // /* MW 3 */ + 9780 "00101000" // /* MW 2 */ + 9781 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9783 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9785 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9787 "00000000" // /* MW 1 */ +.src_ref 8 "mul_impl.h" 193 25 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9788 "00011000" // MOVX r16, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9789 "00001001" // /* MW 3 */ + 9790 "00100000" // /* MW 2 */ + 9791 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 222 4 +.src_ref 8 "mul_impl.h" 193 25 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9792 "00111010" // ST r16, [p7, #16]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9793 "01110001" // /* MW 9 */ + 9794 "00000000" // /* MW 8 */ + 9795 "00000000" // /* MW 7 */ + 9796 "00000000" // /* MW 6 */ + 9797 "11111110" // /* MW 5 */ + 9798 "00111111" // /* MW 4 */ + 9799 "00110000" // /* MW 3 */ + 9800 "11000010" // /* MW 2 */ +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + 9801 "11101000" // /* MW 1 */ +.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE +.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_begin0 +.function shared_run_backbone _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE +.src_ref 3 "elementwise_binary_shared.h" 107 first +.src_ref 3 "elementwise_binary_shared.h" 119 37 +.src_ref 3 "elementwise_binary_shared.h" 126 34 +.src_ref 3 "elementwise_binary_shared.h" 131 19 +.function_start + 9808 "11111000" // MOV r0, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9809 "11000000" // /* MW 3 */ + 9810 "00010110" // /* MW 2 */ + 9811 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 119 37 first + 9812 "00011000" // ADD.NC p3, r0, #14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9813 "00000111" // /* MW 3 */ + 9814 "01100000" // /* MW 2 */ + 9815 "00011011" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 122 22 first + 9816 "10011000" // LDA.s16 r2, [p3], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9817 "01010010" // /* MW 3 */ + 9818 "00011100" // /* MW 2 */ + 9819 "00000011" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 124 15 first + 9820 "10011000" // LDA r4, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9821 "10010110" // /* MW 3 */ + 9822 "00000100" // /* MW 2 */ + 9823 "00000011" // /* MW 1 */ + 9824 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9825 "00000000" // /* MW 1 */ + 9826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9827 "00000000" // /* MW 1 */ + 9828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9829 "00000000" // /* MW 1 */ + 9830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9831 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 124 26 + 9832 "00011000" // MOVX r3, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9833 "00001001" // /* MW 3 */ + 9834 "00000110" // /* MW 2 */ + 9835 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 107 + 9836 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9837 "00000001" // /* MW 5 */ + 9838 "00000000" // /* MW 4 */ + 9839 "00000000" // /* MW 3 */ + 9840 "00010000" // /* MW 2 */ + 9841 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 124 26 + 9842 "10011000" // LTU r3, r3, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9843 "01001100" // /* MW 3 */ + 9844 "11000110" // /* MW 2 */ + 9845 "00010000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 65 25 +.src_ref 3 "elementwise_binary_shared.h" 124 8 + 9846 "10111010" // MOVA r1, #0; JNZ r3, #10000 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10000 delay_slots=5 */ + 9847 "01100000" // /* MW 9 */ + 9848 "00000000" // /* MW 8 */ + 9849 "00010000" // /* MW 7 */ + 9850 "11100010" // /* MW 6 */ + 9851 "00000100" // /* MW 5 */ + 9852 "00000110" // /* MW 4 */ + 9853 "00000000" // /* MW 3 */ + 9854 "00000001" // /* MW 2 */ + 9855 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 65 25 first +.delay_slot + 9856 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9857 "01110010" // /* MW 3 */ + 9858 "00000101" // /* MW 2 */ + 9859 "00011000" // /* MW 1 */ +.delay_slot + 9860 "11111000" // MOV r1, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9861 "11000000" // /* MW 3 */ + 9862 "01011110" // /* MW 2 */ + 9863 "00011000" // /* MW 1 */ +.delay_slot + 9864 "11111000" // MOV p7, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9865 "11100000" // /* MW 3 */ + 9866 "01100101" // /* MW 2 */ + 9867 "00011111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.delay_slot + 9868 "11110100" // PADDB [p7], #-64; MOV p5, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9869 "10000001" // /* MW 5 */ + 9870 "11011101" // /* MW 4 */ + 9871 "00001010" // /* MW 3 */ + 9872 "11110010" // /* MW 2 */ + 9873 "11111111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 119 37 first +.delay_slot + 9874 "00011000" // VST x0, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9875 "00010011" // /* MW 3 */ + 9876 "00000100" // /* MW 2 */ + 9877 "00001111" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first +.src_ref 3 "elementwise_binary_shared.h" 126 34 +.src_ref 3 "elementwise_binary_shared.h" 126 34 +.src_ref 3 "elementwise_binary_shared.h" 131 19 +.src_ref 3 "elementwise_binary_shared.h" 131 19 + 9878 "10111010" // MOVA dj0, #12; MOVS p4, r0; VBCST.16 x0, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9879 "01110010" // /* MW 9 */ + 9880 "10111001" // /* MW 8 */ + 9881 "00000100" // /* MW 7 */ + 9882 "00000000" // /* MW 6 */ + 9883 "00001011" // /* MW 5 */ + 9884 "10000000" // /* MW 4 */ + 9885 "10000100" // /* MW 3 */ + 9886 "10000010" // /* MW 2 */ + 9887 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 126 34 first +.src_ref 3 "elementwise_binary_shared.h" 131 19 first +.src_ref 3 "elementwise_binary_shared.h" 171 16 + 9888 "01010100" // LDA.u8 r0, [p4, dj0]; MOV m2, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9889 "00000001" // /* MW 5 */ + 9890 "00000001" // /* MW 4 */ + 9891 "01010100" // /* MW 3 */ + 9892 "00000001" // /* MW 2 */ + 9893 "10000000" // /* MW 1 */ + 9894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9895 "00000000" // /* MW 1 */ + 9896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9897 "00000000" // /* MW 1 */ + 9898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9899 "00000000" // /* MW 1 */ + 9900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9901 "00000000" // /* MW 1 */ + 9902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9903 "00000000" // /* MW 1 */ + 9904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9905 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 131 12 +.src_ref 3 "elementwise_binary_shared.h" 131 35 + 9906 "10000100" // JNZ r0, #9952 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9952 delay_slots=5 */ + 9907 "00000001" // /* MW 5 */ + 9908 "01000000" // /* MW 4 */ + 9909 "01110000" // /* MW 3 */ + 9910 "00010011" // /* MW 2 */ + 9911 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary_shared.h" 173 18 +.delay_slot + 9912 "10111000" // MOV m0, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9913 "00000000" // /* MW 3 */ + 9914 "00000000" // /* MW 2 */ + 9915 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 166 31 +.delay_slot + 9916 "01000100" // MOVXM p4, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9917 "11010000" // /* MW 5 */ + 9918 "11001000" // /* MW 4 */ + 9919 "11001000" // /* MW 3 */ + 9920 "00000111" // /* MW 2 */ + 9921 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9923 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9925 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9927 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 169 16 + 9928 "10111010" // MOVA m1, #0; J #9968 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=9968 delay_slots=5 */ + 9929 "00100000" // /* MW 9 */ + 9930 "00000000" // /* MW 8 */ + 9931 "00000000" // /* MW 7 */ + 9932 "11011110" // /* MW 6 */ + 9933 "00000100" // /* MW 5 */ + 9934 "00000000" // /* MW 4 */ + 9935 "10000000" // /* MW 3 */ + 9936 "00000100" // /* MW 2 */ + 9937 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9939 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9941 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9943 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9945 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.delay_slot + 9946 "00001100" // NOPA; VST x0, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9947 "00100110" // /* MW 5 */ + 9948 "00001000" // /* MW 4 */ + 9949 "11110000" // /* MW 3 */ + 9950 "00101100" // /* MW 2 */ + 9951 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_144 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 169 16 + 9952 "10111000" // MOV m1, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9953 "10000000" // /* MW 3 */ + 9954 "00000000" // /* MW 2 */ + 9955 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "elementwise_binary_shared.h" 171 16 + 9956 "11110110" // NOPA; NOPB; VST x0, [p1]; MOV m2, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9957 "01010000" // /* MW 11 */ + 9958 "00000000" // /* MW 10 */ + 9959 "00000000" // /* MW 9 */ + 9960 "00000001" // /* MW 8 */ + 9961 "00010011" // /* MW 7 */ + 9962 "00000100" // /* MW 6 */ + 9963 "00100001" // /* MW 5 */ + 9964 "00000000" // /* MW 4 */ + 9965 "11110000" // /* MW 3 */ + 9966 "00101100" // /* MW 2 */ + 9967 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_160 + 9968 "10000100" // J #10128 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10128 delay_slots=5 */ + 9969 "00000000" // /* MW 5 */ + 9970 "00000000" // /* MW 4 */ + 9971 "11001000" // /* MW 3 */ + 9972 "00010011" // /* MW 2 */ + 9973 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary_shared.h" 169 16 +.src_ref 3 "elementwise_binary_shared.h" 173 18 +.delay_slot + 9974 "00000010" // MOVS p0, p7; MOV p7, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9975 "01110000" // /* MW 7 */ + 9976 "01100000" // /* MW 6 */ + 9977 "10110000" // /* MW 5 */ + 9978 "00000011" // /* MW 4 */ + 9979 "01100000" // /* MW 3 */ + 9980 "10010001" // /* MW 2 */ + 9981 "00010011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9983 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9985 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9987 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9988 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9989 "10000001" // /* MW 11 */ + 9990 "10101101" // /* MW 10 */ + 9991 "00000000" // /* MW 9 */ + 9992 "00000000" // /* MW 8 */ + 9993 "00000000" // /* MW 7 */ + 9994 "00000000" // /* MW 6 */ + 9995 "00100000" // /* MW 5 */ + 9996 "00000000" // /* MW 4 */ + 9997 "11110000" // /* MW 3 */ + 9998 "00101100" // /* MW 2 */ + 9999 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_192 +.src_ref 3 "elementwise_binary_shared.h" 150 97 + 10000 "00011000" // MOVX r2, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10001 "00001101" // /* MW 3 */ + 10002 "00000100" // /* MW 2 */ + 10003 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 97 first + 10004 "10011000" // EQ r2, r2, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10005 "01000111" // /* MW 3 */ + 10006 "10000100" // /* MW 2 */ + 10007 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 + 10008 "10000100" // JNZ r2, #10048 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10048 delay_slots=5 */ + 10009 "00000001" // /* MW 5 */ + 10010 "01000000" // /* MW 4 */ + 10011 "10100000" // /* MW 3 */ + 10012 "00010011" // /* MW 2 */ + 10013 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.delay_slot + 10014 "01000100" // MOVXM r0, #1065353216 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10015 "00000000" // /* MW 5 */ + 10016 "00100000" // /* MW 4 */ + 10017 "00000000" // /* MW 3 */ + 10018 "10000000" // /* MW 2 */ + 10019 "00111111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.src_ref 3 "elementwise_binary_shared.h" 166 31 +.delay_slot + 10020 "01000100" // MOVXM p4, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10021 "11010000" // /* MW 5 */ + 10022 "11001000" // /* MW 4 */ + 10023 "11001000" // /* MW 3 */ + 10024 "00000111" // /* MW 2 */ + 10025 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10027 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10029 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10031 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 10032 "11100001" // NOPA; NOPB; NOPS; MOVXM r0, #-1082130432; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10033 "00000000" // /* MW 15 */ + 10034 "00000000" // /* MW 14 */ + 10035 "00010000" // /* MW 13 */ + 10036 "00000000" // /* MW 12 */ + 10037 "00001000" // /* MW 11 */ + 10038 "00000000" // /* MW 10 */ + 10039 "11100000" // /* MW 9 */ + 10040 "00101111" // /* MW 8 */ + 10041 "01011011" // /* MW 7 */ + 10042 "00000001" // /* MW 6 */ + 10043 "00100000" // /* MW 5 */ + 10044 "00000000" // /* MW 4 */ + 10045 "11110000" // /* MW 3 */ + 10046 "00101100" // /* MW 2 */ + 10047 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_240 +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10048 "10111010" // LDA.s8 r0, [p4]; MOVX vaddSign0, #1; MOV dj0, #-66 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10049 "01011000" // /* MW 9 */ + 10050 "10111110" // /* MW 8 */ + 10051 "01000111" // /* MW 7 */ + 10052 "00000000" // /* MW 6 */ + 10053 "11010010" // /* MW 5 */ + 10054 "00000010" // /* MW 4 */ + 10055 "01010000" // /* MW 3 */ + 10056 "10000000" // /* MW 2 */ + 10057 "10000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary_shared.h" 173 18 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10058 "10111000" // MOV m0, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10059 "10000000" // /* MW 3 */ + 10060 "00000000" // /* MW 2 */ + 10061 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 169 16 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10062 "10111000" // MOV m1, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10063 "00000000" // /* MW 3 */ + 10064 "00000000" // /* MW 2 */ + 10065 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 171 16 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10066 "10111000" // MOV m2, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10067 "10000000" // /* MW 3 */ + 10068 "00000000" // /* MW 2 */ + 10069 "00011010" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10071 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10072 "01111000" // VINSERT.32 x0, x0, #0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10073 "00010001" // /* MW 3 */ + 10074 "00000000" // /* MW 2 */ + 10075 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10076 "11010100" // ST.s16 r0, [p5, dj0]; VMOV bmll1, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10077 "00100101" // /* MW 5 */ + 10078 "00000001" // /* MW 4 */ + 10079 "11100010" // /* MW 3 */ + 10080 "00000010" // /* MW 2 */ + 10081 "10100000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10082 "00011000" // MOVX crRnd, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10083 "10000000" // /* MW 3 */ + 10084 "00111010" // /* MW 2 */ + 10085 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10086 "00011000" // VCONV.bf16.fp32 wl0, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10087 "10010110" // /* MW 3 */ + 10088 "01000000" // /* MW 2 */ + 10089 "00001000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10091 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10092 "10111000" // VEXTRACT.16 r0, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10093 "00000001" // /* MW 3 */ + 10094 "00000001" // /* MW 2 */ + 10095 "00011000" // /* MW 1 */ + 10096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10097 "00000000" // /* MW 1 */ + 10098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10099 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 + 10100 "10011000" // LDA.s16 r0, [p5, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10101 "00010010" // /* MW 3 */ + 10102 "00000000" // /* MW 2 */ + 10103 "00000101" // /* MW 1 */ + 10104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10105 "00000000" // /* MW 1 */ + 10106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10107 "00000000" // /* MW 1 */ + 10108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10109 "00000000" // /* MW 1 */ + 10110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10111 "00000000" // /* MW 1 */ + 10112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10113 "00000000" // /* MW 1 */ + 10114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10115 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first + 10116 "11111000" // VBCST.16 x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10117 "01110010" // /* MW 3 */ + 10118 "00000001" // /* MW 2 */ + 10119 "00011000" // /* MW 1 */ + 10120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10121 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first + 10122 "00001100" // NOPA; VST x0, [sp, #-64] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10123 "01100110" // /* MW 5 */ + 10124 "11111000" // /* MW 4 */ + 10125 "11111111" // /* MW 3 */ + 10126 "00101100" // /* MW 2 */ + 10127 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_320 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary_shared.h" 166 4 first +.src_ref 3 "elementwise_binary_shared.h" 166 31 first +.src_ref 3 "elementwise_binary_shared.h" 169 16 first + 10128 "10110110" // LDA r2, [p3, #-16]; VLDB x1, [p7], m1; MOVXM ls, #10240 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10129 "00010000" // /* MW 11 */ + 10130 "00000000" // /* MW 10 */ + 10131 "01111100" // /* MW 9 */ + 10132 "00001000" // /* MW 8 */ + 10133 "00000000" // /* MW 7 */ + 10134 "00000000" // /* MW 6 */ + 10135 "11101000" // /* MW 5 */ + 10136 "01010000" // /* MW 4 */ + 10137 "11011110" // /* MW 3 */ + 10138 "10001010" // /* MW 2 */ + 10139 "01111000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 166 4 +.src_ref 3 "elementwise_binary_shared.h" 166 31 +.src_ref 3 "elementwise_binary_shared.h" 171 16 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 10140 "10110110" // MOVA r3, #-5; VLDB x0, [p1], m2; MOVXM le, #10288 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10141 "00010000" // /* MW 11 */ + 10142 "00011000" // /* MW 10 */ + 10143 "10111100" // /* MW 9 */ + 10144 "00001001" // /* MW 8 */ + 10145 "00000000" // /* MW 7 */ + 10146 "00000000" // /* MW 6 */ + 10147 "01101000" // /* MW 5 */ + 10148 "10010000" // /* MW 4 */ + 10149 "00000010" // /* MW 3 */ + 10150 "01100011" // /* MW 2 */ + 10151 "11111111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary_shared.h" 169 16 first +.src_ref 3 "elementwise_binary_shared.h" 173 18 first +.src_ref 3 "elementwise_binary_shared.h" 177 44 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10152 "00010010" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;VLDB x1, [p7], m1; MOVX r0, #60 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10153 "11110001" // /* MW 7 */ + 10154 "00000000" // /* MW 6 */ + 10155 "11101000" // /* MW 5 */ + 10156 "01010000" // /* MW 4 */ + 10157 "01111110" // /* MW 3 */ + 10158 "00000101" // /* MW 2 */ + 10159 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 166 31 first +.src_ref 3 "elementwise_binary_shared.h" 171 16 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10160 "00111100" // LDA.s8 r4, [p4]; VLDB x0, [p1], m2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10161 "01101000" // /* MW 5 */ + 10162 "10010000" // /* MW 4 */ + 10163 "01010010" // /* MW 3 */ + 10164 "10010000" // /* MW 2 */ + 10165 "10000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10167 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary_shared.h" 173 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10168 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10169 "00101011" // /* MW 3 */ + 10170 "00001000" // /* MW 2 */ + 10171 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10173 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 166 31 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10174 "10011000" // LSHL r2, r2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10175 "00111101" // /* MW 3 */ + 10176 "10000100" // /* MW 2 */ + 10177 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 166 4 +.src_ref 3 "elementwise_binary_shared.h" 177 44 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10178 "01100010" // ADD.NC lc, r2, #-3; VMAC.f dm1, dm0, x1, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10179 "00000001" // /* MW 7 */ + 10180 "00000010" // /* MW 6 */ + 10181 "00000001" // /* MW 5 */ + 10182 "10000110" // /* MW 4 */ + 10183 "01111110" // /* MW 3 */ + 10184 "01110001" // /* MW 2 */ + 10185 "00000101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary_shared.h" 169 16 first +.src_ref 3 "elementwise_binary_shared.h" 171 16 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10186 "00111100" // VLDA x0, [p1], m2; VLDB x1, [p7], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10187 "11101000" // /* MW 5 */ + 10188 "01010000" // /* MW 4 */ + 10189 "01111110" // /* MW 3 */ + 10190 "00000011" // /* MW 2 */ + 10191 "00101001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary_shared.h" 173 18 first +.src_ref 3 "elementwise_binary_shared.h" 185 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10192 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; MOVX crRnd, r4; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10193 "00000000" // /* MW 15 */ + 10194 "00000000" // /* MW 14 */ + 10195 "01111000" // /* MW 13 */ + 10196 "10100101" // /* MW 12 */ + 10197 "00000001" // /* MW 11 */ + 10198 "00000000" // /* MW 10 */ + 10199 "11010100" // /* MW 9 */ + 10200 "00001001" // /* MW 8 */ + 10201 "01011011" // /* MW 7 */ + 10202 "00000001" // /* MW 6 */ + 10203 "00100000" // /* MW 5 */ + 10204 "00000000" // /* MW 4 */ + 10205 "01110000" // /* MW 3 */ + 10206 "00000101" // /* MW 2 */ + 10207 "00000001" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10208 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10209 "00000000" // /* MW 15 */ + 10210 "00000000" // /* MW 14 */ + 10211 "01111000" // /* MW 13 */ + 10212 "10100101" // /* MW 12 */ + 10213 "00000001" // /* MW 11 */ + 10214 "00000000" // /* MW 10 */ + 10215 "00000000" // /* MW 9 */ + 10216 "00000000" // /* MW 8 */ + 10217 "01011011" // /* MW 7 */ + 10218 "00000001" // /* MW 6 */ + 10219 "00100000" // /* MW 5 */ + 10220 "00000000" // /* MW 4 */ + 10221 "11110000" // /* MW 3 */ + 10222 "00101100" // /* MW 2 */ + 10223 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 177 44 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10224 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10225 "00010000" // /* MW 15 */ + 10226 "00001000" // /* MW 14 */ + 10227 "01111000" // /* MW 13 */ + 10228 "10100101" // /* MW 12 */ + 10229 "00000001" // /* MW 11 */ + 10230 "00000000" // /* MW 10 */ + 10231 "00000000" // /* MW 9 */ + 10232 "00000000" // /* MW 8 */ + 10233 "01011011" // /* MW 7 */ + 10234 "00000001" // /* MW 6 */ + 10235 "00100000" // /* MW 5 */ + 10236 "00000000" // /* MW 4 */ + 10237 "11110000" // /* MW 3 */ + 10238 "00101100" // /* MW 2 */ + 10239 "00000000" // /* MW 1 */ +.label ZLS_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_432 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary_shared.h" 169 16 first +.src_ref 3 "elementwise_binary_shared.h" 171 16 first +.begin_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 10240 "11100001" // VLDA x0, [p1], m2; VLDB x1, [p7], m1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10241 "00000000" // /* MW 15 */ + 10242 "00000000" // /* MW 14 */ + 10243 "01111000" // /* MW 13 */ + 10244 "10100101" // /* MW 12 */ + 10245 "00000001" // /* MW 11 */ + 10246 "00000000" // /* MW 10 */ + 10247 "00000000" // /* MW 9 */ + 10248 "00000000" // /* MW 8 */ + 10249 "01011011" // /* MW 7 */ + 10250 "00000001" // /* MW 6 */ + 10251 "11101000" // /* MW 5 */ + 10252 "01010000" // /* MW 4 */ + 10253 "01111110" // /* MW 3 */ + 10254 "00000011" // /* MW 2 */ + 10255 "00101001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary_shared.h" 173 18 first +.src_ref 3 "elementwise_binary_shared.h" 185 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10256 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10257 "00000000" // /* MW 15 */ + 10258 "00000000" // /* MW 14 */ + 10259 "01111000" // /* MW 13 */ + 10260 "10100101" // /* MW 12 */ + 10261 "00000001" // /* MW 11 */ + 10262 "00000000" // /* MW 10 */ + 10263 "00000000" // /* MW 9 */ + 10264 "00000000" // /* MW 8 */ + 10265 "10100011" // /* MW 7 */ + 10266 "00011100" // /* MW 6 */ + 10267 "00100010" // /* MW 5 */ + 10268 "00000000" // /* MW 4 */ + 10269 "01110000" // /* MW 3 */ + 10270 "00000101" // /* MW 2 */ + 10271 "00000001" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10272 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10273 "00000000" // /* MW 15 */ + 10274 "00000000" // /* MW 14 */ + 10275 "01111000" // /* MW 13 */ + 10276 "10100101" // /* MW 12 */ + 10277 "00000001" // /* MW 11 */ + 10278 "00000000" // /* MW 10 */ + 10279 "00000000" // /* MW 9 */ + 10280 "00000000" // /* MW 8 */ + 10281 "01011011" // /* MW 7 */ + 10282 "00000001" // /* MW 6 */ + 10283 "00100000" // /* MW 5 */ + 10284 "00000000" // /* MW 4 */ + 10285 "11110000" // /* MW 3 */ + 10286 "00101100" // /* MW 2 */ + 10287 "00000000" // /* MW 1 */ +.label ZLE_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_480 +.src_ref 3 "elementwise_binary_shared.h" 177 44 first +.end_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10288 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10289 "00010000" // /* MW 15 */ + 10290 "00001000" // /* MW 14 */ + 10291 "01111000" // /* MW 13 */ + 10292 "10100101" // /* MW 12 */ + 10293 "00000001" // /* MW 11 */ + 10294 "00000000" // /* MW 10 */ + 10295 "00000000" // /* MW 9 */ + 10296 "00000000" // /* MW 8 */ + 10297 "01011011" // /* MW 7 */ + 10298 "00000001" // /* MW 6 */ + 10299 "00100000" // /* MW 5 */ + 10300 "00000000" // /* MW 4 */ + 10301 "11110000" // /* MW 3 */ + 10302 "00101100" // /* MW 2 */ + 10303 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 187 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 10304 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10305 "00000001" // /* MW 5 */ + 10306 "00000000" // /* MW 4 */ + 10307 "00000000" // /* MW 3 */ + 10308 "11110000" // /* MW 2 */ + 10309 "11111111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary_shared.h" 185 18 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10310 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10311 "10100011" // /* MW 3 */ + 10312 "00011100" // /* MW 2 */ + 10313 "00001010" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 10314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10315 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 177 44 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 10316 "01001000" // VMAC.f dm1, dm0, x1, x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10317 "00000001" // /* MW 3 */ + 10318 "00000010" // /* MW 2 */ + 10319 "00000001" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 10320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10321 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 187 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 10322 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10323 "00000000" // /* MW 3 */ + 10324 "00101000" // /* MW 2 */ + 10325 "00010000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary_shared.h" 185 18 first +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10326 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10327 "10100011" // /* MW 3 */ + 10328 "00011100" // /* MW 2 */ + 10329 "00001010" // /* MW 1 */ +.delay_slot + 10330 "11111000" // MOV p7, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10331 "10100000" // /* MW 3 */ + 10332 "01100000" // /* MW 2 */ + 10333 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10335 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary_shared.h" 185 18 +.delay_slot + 10336 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10337 "10100011" // /* MW 3 */ + 10338 "00011100" // /* MW 2 */ + 10339 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE__end +.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_end0 + 10341 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E +.function run _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E +.src_ref 4 "vector.hpp" 538 13 +.src_ref 3 "elementwise_binary_shared.h" 237 first +.src_ref 3 "elementwise_binary_shared.h" 244 19 +.src_ref 3 "elementwise_binary_shared.h" 245 12 +.src_ref 3 "elementwise_binary_shared.h" 247 12 +.src_ref 3 "elementwise_binary_shared.h" 250 4 +.function_start + 10352 "10111010" // MOVA dj0, #12; MOVS p3, p2; MOV dc0, lr /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10353 "01110010" // /* MW 9 */ + 10354 "11110000" // /* MW 8 */ + 10355 "01100000" // /* MW 7 */ + 10356 "00000000" // /* MW 6 */ + 10357 "10001011" // /* MW 5 */ + 10358 "10001000" // /* MW 4 */ + 10359 "10000011" // /* MW 3 */ + 10360 "10000010" // /* MW 2 */ + 10361 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 538 13 first +.src_ref 3 "elementwise_binary_shared.h" 244 19 first +.src_ref 3 "elementwise_binary_shared.h" 245 12 +.src_ref 3 "elementwise_binary_shared.h" 247 12 + 10362 "11010100" // LDA.u8 r0, [p2, dj0]; MOV p2, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10363 "10000001" // /* MW 5 */ + 10364 "11000101" // /* MW 4 */ + 10365 "01010100" // /* MW 3 */ + 10366 "00000001" // /* MW 2 */ + 10367 "01000000" // /* MW 1 */ + 10368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10369 "00000000" // /* MW 1 */ + 10370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10371 "00000000" // /* MW 1 */ + 10372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10373 "00000000" // /* MW 1 */ + 10374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10375 "00000000" // /* MW 1 */ + 10376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10377 "00000000" // /* MW 1 */ + 10378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10379 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 244 12 +.src_ref 3 "elementwise_binary_shared.h" 244 35 + 10380 "10000100" // JZ r0, #10448 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10448 delay_slots=5 */ + 10381 "00000001" // /* MW 5 */ + 10382 "00000000" // /* MW 4 */ + 10383 "01101000" // /* MW 3 */ + 10384 "00010100" // /* MW 2 */ + 10385 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 237 +.delay_slot + 10386 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10387 "00000001" // /* MW 5 */ + 10388 "00000000" // /* MW 4 */ + 10389 "00000000" // /* MW 3 */ + 10390 "00001000" // /* MW 2 */ + 10391 "00000000" // /* MW 1 */ +.delay_slot + 10392 "11111000" // MOV r1, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10393 "11100000" // /* MW 3 */ + 10394 "01010101" // /* MW 2 */ + 10395 "00011000" // /* MW 1 */ +.delay_slot + 10396 "00011000" // ADD.NC p1, r1, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10397 "11100000" // /* MW 3 */ + 10398 "01100000" // /* MW 2 */ + 10399 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 538 13 +.src_ref 4 "vector_native_types.hpp" 374 137 first +.delay_slot + 10400 "00011000" // VST sfh, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10401 "00101011" // /* MW 3 */ + 10402 "00000111" // /* MW 2 */ + 10403 "00001001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10405 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 247 12 first +.no_stack_arguments + 10406 "00000100" // JL #9808 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9808 delay_slots=5 */ + 10407 "00000001" // /* MW 5 */ + 10408 "00000000" // /* MW 4 */ + 10409 "00101000" // /* MW 3 */ + 10410 "00010011" // /* MW 2 */ + 10411 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10413 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10415 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10417 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10419 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10420 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10421 "10000001" // /* MW 11 */ + 10422 "10101101" // /* MW 10 */ + 10423 "00000000" // /* MW 9 */ + 10424 "00000000" // /* MW 8 */ + 10425 "00000000" // /* MW 7 */ + 10426 "00000000" // /* MW 6 */ + 10427 "00100000" // /* MW 5 */ + 10428 "00000000" // /* MW 4 */ + 10429 "11110000" // /* MW 3 */ + 10430 "00101100" // /* MW 2 */ + 10431 "00000000" // /* MW 1 */ +.return_address + 10432 "10000100" // J #10480 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10480 delay_slots=5 */ + 10433 "00000000" // /* MW 5 */ + 10434 "00000000" // /* MW 4 */ + 10435 "01111000" // /* MW 3 */ + 10436 "00010100" // /* MW 2 */ + 10437 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10439 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10441 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10443 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10445 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10447 "00000000" // /* MW 1 */ +.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_96 +.src_ref 3 "elementwise_binary_shared.h" 245 12 first +.no_stack_arguments + 10448 "00000100" // JL #9808 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9808 delay_slots=5 */ + 10449 "00000001" // /* MW 5 */ + 10450 "00000000" // /* MW 4 */ + 10451 "00101000" // /* MW 3 */ + 10452 "00010011" // /* MW 2 */ + 10453 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 245 12 +.src_ref 3 "elementwise_binary_shared.h" 245 12 +.delay_slot + 10454 "00000010" // MOVS p0, p1; MOV p1, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10455 "01110000" // /* MW 7 */ + 10456 "01100000" // /* MW 6 */ + 10457 "10110000" // /* MW 5 */ + 10458 "00000000" // /* MW 4 */ + 10459 "01100000" // /* MW 3 */ + 10460 "10010001" // /* MW 2 */ + 10461 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10463 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10465 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10467 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10468 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10469 "10000001" // /* MW 11 */ + 10470 "10101101" // /* MW 10 */ + 10471 "00000000" // /* MW 9 */ + 10472 "00000000" // /* MW 8 */ + 10473 "00000000" // /* MW 7 */ + 10474 "00000000" // /* MW 6 */ + 10475 "00100000" // /* MW 5 */ + 10476 "00000000" // /* MW 4 */ + 10477 "11110000" // /* MW 3 */ + 10478 "00101100" // /* MW 2 */ + 10479 "00000000" // /* MW 1 */ +.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_128 +.src_ref 3 "elementwise_binary_shared.h" 250 4 +.return_address + 10480 "11111000" // MOV lr, dc0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10481 "10000000" // /* MW 3 */ + 10482 "01110001" // /* MW 2 */ + 10483 "00011111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 250 4 first + 10484 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10485 "00000000" // /* MW 3 */ + 10486 "00101000" // /* MW 2 */ + 10487 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 250 4 +.delay_slot + 10488 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10489 "00000001" // /* MW 5 */ + 10490 "00000000" // /* MW 4 */ + 10491 "00000000" // /* MW 3 */ + 10492 "11111000" // /* MW 2 */ + 10493 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10495 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10497 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10499 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_end0 + 10501 "00000000" // /* MW 1 */ +.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_mul1d_attribute_broadcasting _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 7 "superkernels.cpp" 205 first +.src_ref 7 "superkernels.cpp" 210 6 +.function_start + 10512 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10513 "10000000" // /* MW 5 */ + 10514 "11001000" // /* MW 4 */ + 10515 "11000110" // /* MW 3 */ + 10516 "00000111" // /* MW 2 */ + 10517 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 210 6 first + 10518 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10519 "11000001" // /* MW 5 */ + 10520 "10110101" // /* MW 4 */ + 10521 "11011000" // /* MW 3 */ + 10522 "11000010" // /* MW 2 */ + 10523 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 205 + 10524 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10525 "00000001" // /* MW 5 */ + 10526 "00000000" // /* MW 4 */ + 10527 "00000000" // /* MW 3 */ + 10528 "00001000" // /* MW 2 */ + 10529 "00000000" // /* MW 1 */ + 10530 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10531 "01110000" // /* MW 7 */ + 10532 "11010000" // /* MW 6 */ + 10533 "00001011" // /* MW 5 */ + 10534 "00000000" // /* MW 4 */ + 10535 "10110000" // /* MW 3 */ + 10536 "01100011" // /* MW 2 */ + 10537 "11111111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 207 11 + 10538 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10539 "00010001" // /* MW 9 */ + 10540 "00101000" // /* MW 8 */ + 10541 "00110010" // /* MW 7 */ + 10542 "11110011" // /* MW 6 */ + 10543 "00000001" // /* MW 5 */ + 10544 "00000000" // /* MW 4 */ + 10545 "10110000" // /* MW 3 */ + 10546 "10000010" // /* MW 2 */ + 10547 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 10548 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10549 "11000000" // /* MW 3 */ + 10550 "11010100" // /* MW 2 */ + 10551 "00011011" // /* MW 1 */ + 10552 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10553 "00000000" // /* MW 1 */ + 10554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10555 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 210 6 +.src_ref 7 "superkernels.cpp" 210 16 + 10556 "10000100" // JNZ r16, #10720 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10720 delay_slots=5 */ + 10557 "00000001" // /* MW 5 */ + 10558 "01000000" // /* MW 4 */ + 10559 "11110000" // /* MW 3 */ + 10560 "00010100" // /* MW 2 */ + 10561 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 207 22 first +.delay_slot + 10562 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10563 "10010000" // /* MW 3 */ + 10564 "01100010" // /* MW 2 */ + 10565 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 207 30 +.delay_slot + 10566 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10567 "11111011" // /* MW 3 */ + 10568 "01100011" // /* MW 2 */ + 10569 "00010100" // /* MW 1 */ +.delay_slot + 10570 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10571 "00111101" // /* MW 3 */ + 10572 "11110100" // /* MW 2 */ + 10573 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 207 11 +.src_ref 1 "io_buffer_main.h" 125 25 +.delay_slot + 10574 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10575 "01110000" // /* MW 7 */ + 10576 "01100000" // /* MW 6 */ + 10577 "00110000" // /* MW 5 */ + 10578 "00000011" // /* MW 4 */ + 10579 "00110000" // /* MW 3 */ + 10580 "11000110" // /* MW 2 */ + 10581 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 213 4 +.src_ref 7 "superkernels.cpp" 224 2 +.delay_slot + 10582 "01000100" // MOVXM p0, #509184 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10583 "00000000" // /* MW 5 */ + 10584 "11001010" // /* MW 4 */ + 10585 "11000000" // /* MW 3 */ + 10586 "00000111" // /* MW 2 */ + 10587 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 10588 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10589 "11010000" // /* MW 5 */ + 10590 "11001000" // /* MW 4 */ + 10591 "11000100" // /* MW 3 */ + 10592 "00000111" // /* MW 2 */ + 10593 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10594 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10595 "00010000" // /* MW 9 */ + 10596 "00110010" // /* MW 8 */ + 10597 "00110010" // /* MW 7 */ + 10598 "11110001" // /* MW 6 */ + 10599 "00000001" // /* MW 5 */ + 10600 "00000000" // /* MW 4 */ + 10601 "11100000" // /* MW 3 */ + 10602 "11000000" // /* MW 2 */ + 10603 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10605 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 213 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 10606 "00000100" // JL #9728 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9728 delay_slots=5 */ + 10607 "00000001" // /* MW 5 */ + 10608 "00000000" // /* MW 4 */ + 10609 "00000000" // /* MW 3 */ + 10610 "00010011" // /* MW 2 */ + 10611 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10613 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10615 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10616 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10617 "00110001" // /* MW 3 */ + 10618 "00100000" // /* MW 2 */ + 10619 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 10620 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10621 "00000101" // /* MW 3 */ + 10622 "00100000" // /* MW 2 */ + 10623 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 10624 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10625 "00000000" // /* MW 15 */ + 10626 "00000000" // /* MW 14 */ + 10627 "01111000" // /* MW 13 */ + 10628 "10100101" // /* MW 12 */ + 10629 "00000001" // /* MW 11 */ + 10630 "00000000" // /* MW 10 */ + 10631 "00000000" // /* MW 9 */ + 10632 "10000000" // /* MW 8 */ + 10633 "00010001" // /* MW 7 */ + 10634 "00000110" // /* MW 6 */ + 10635 "00100010" // /* MW 5 */ + 10636 "00000000" // /* MW 4 */ + 10637 "11110000" // /* MW 3 */ + 10638 "00101100" // /* MW 2 */ + 10639 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 217 18 +.return_address + 10640 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10641 "10100000" // /* MW 5 */ + 10642 "11001000" // /* MW 4 */ + 10643 "11000100" // /* MW 3 */ + 10644 "00000111" // /* MW 2 */ + 10645 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 217 18 first +.src_ref 7 "superkernels.cpp" 217 65 + 10646 "10111010" // LDA r16, [p2]; MOVXM p2, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10647 "00010000" // /* MW 9 */ + 10648 "10000000" // /* MW 8 */ + 10649 "00110010" // /* MW 7 */ + 10650 "11110001" // /* MW 6 */ + 10651 "00000001" // /* MW 5 */ + 10652 "00000000" // /* MW 4 */ + 10653 "11010000" // /* MW 3 */ + 10654 "11000010" // /* MW 2 */ + 10655 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 215 51 +.src_ref 7 "superkernels.cpp" 217 65 +.src_ref 7 "superkernels.cpp" 224 2 + 10656 "10111010" // LDA r17, [p2]; MOVXM p2, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10657 "00010000" // /* MW 9 */ + 10658 "10000000" // /* MW 8 */ + 10659 "00110010" // /* MW 7 */ + 10660 "11110001" // /* MW 6 */ + 10661 "00000001" // /* MW 5 */ + 10662 "00000000" // /* MW 4 */ + 10663 "11010000" // /* MW 3 */ + 10664 "11000110" // /* MW 2 */ + 10665 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 215 51 first +.src_ref 7 "superkernels.cpp" 217 16 +.src_ref 7 "superkernels.cpp" 222 47 + 10666 "10111010" // LDA.u16 r18, [p2, #10]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10667 "00010000" // /* MW 9 */ + 10668 "00101010" // /* MW 8 */ + 10669 "10110010" // /* MW 7 */ + 10670 "11110000" // /* MW 6 */ + 10671 "00000001" // /* MW 5 */ + 10672 "00000000" // /* MW 4 */ + 10673 "01010000" // /* MW 3 */ + 10674 "11001011" // /* MW 2 */ + 10675 "01001010" // /* MW 1 */ + 10676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10677 "00000000" // /* MW 1 */ + 10678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10679 "00000000" // /* MW 1 */ + 10680 "10000100" // J #10736 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10736 delay_slots=5 */ + 10681 "00000000" // /* MW 5 */ + 10682 "00000000" // /* MW 4 */ + 10683 "11111000" // /* MW 3 */ + 10684 "00010100" // /* MW 2 */ + 10685 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 215 13 +.delay_slot + 10686 "01000100" // MOVXM p0, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10687 "11000000" // /* MW 5 */ + 10688 "11001000" // /* MW 4 */ + 10689 "11000000" // /* MW 3 */ + 10690 "00000111" // /* MW 2 */ + 10691 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10693 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 217 27 first +.delay_slot + 10694 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10695 "00001111" // /* MW 3 */ + 10696 "01100001" // /* MW 2 */ + 10697 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 215 13 first +.delay_slot + 10698 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10699 "10100011" // /* MW 5 */ + 10700 "00001100" // /* MW 4 */ + 10701 "11110000" // /* MW 3 */ + 10702 "00101100" // /* MW 2 */ + 10703 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 217 16 first +.delay_slot + 10704 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10705 "00000000" // /* MW 15 */ + 10706 "00000000" // /* MW 14 */ + 10707 "01111000" // /* MW 13 */ + 10708 "10100101" // /* MW 12 */ + 10709 "00000001" // /* MW 11 */ + 10710 "00000000" // /* MW 10 */ + 10711 "00000000" // /* MW 9 */ + 10712 "10000000" // /* MW 8 */ + 10713 "00010001" // /* MW 7 */ + 10714 "00000110" // /* MW 6 */ + 10715 "00100001" // /* MW 5 */ + 10716 "00000000" // /* MW 4 */ + 10717 "11110000" // /* MW 3 */ + 10718 "00101100" // /* MW 2 */ + 10719 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 7 "superkernels.cpp" 222 47 +.src_ref 7 "superkernels.cpp" 224 2 + 10720 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10721 "00000000" // /* MW 15 */ + 10722 "00000000" // /* MW 14 */ + 10723 "00010000" // /* MW 13 */ + 10724 "00101010" // /* MW 12 */ + 10725 "10110010" // /* MW 11 */ + 10726 "11110000" // /* MW 10 */ + 10727 "00000001" // /* MW 9 */ + 10728 "00000000" // /* MW 8 */ + 10729 "10001011" // /* MW 7 */ + 10730 "10000000" // /* MW 6 */ + 10731 "00100010" // /* MW 5 */ + 10732 "00000000" // /* MW 4 */ + 10733 "11110000" // /* MW 3 */ + 10734 "00101100" // /* MW 2 */ + 10735 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 218 49 first + 10736 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10737 "00000000" // /* MW 7 */ + 10738 "11000011" // /* MW 6 */ + 10739 "10110011" // /* MW 5 */ + 10740 "00000011" // /* MW 4 */ + 10741 "01100000" // /* MW 3 */ + 10742 "10010001" // /* MW 2 */ + 10743 "01110011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 221 2 +.src_ref 1 "io_buffer_main.h" 218 49 + 10744 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10745 "00010000" // /* MW 9 */ + 10746 "00100000" // /* MW 8 */ + 10747 "00110010" // /* MW 7 */ + 10748 "11110000" // /* MW 6 */ + 10749 "00000001" // /* MW 5 */ + 10750 "00000000" // /* MW 4 */ + 10751 "11010000" // /* MW 3 */ + 10752 "11101110" // /* MW 2 */ + 10753 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 10754 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10755 "00010110" // /* MW 3 */ + 10756 "11111110" // /* MW 2 */ + 10757 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 10758 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10759 "00110110" // /* MW 3 */ + 10760 "11111110" // /* MW 2 */ + 10761 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 10762 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10763 "01010110" // /* MW 3 */ + 10764 "01000110" // /* MW 2 */ + 10765 "00000111" // /* MW 1 */ + 10766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10767 "00000000" // /* MW 1 */ + 10768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10769 "00000000" // /* MW 1 */ + 10770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10771 "00000000" // /* MW 1 */ + 10772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10773 "00000000" // /* MW 1 */ + 10774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10775 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 10776 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10777 "00000010" // /* MW 3 */ + 10778 "01100001" // /* MW 2 */ + 10779 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 10780 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10781 "00010001" // /* MW 3 */ + 10782 "00000110" // /* MW 2 */ + 10783 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 + 10784 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10785 "11111101" // /* MW 3 */ + 10786 "11100000" // /* MW 2 */ + 10787 "00010111" // /* MW 1 */ + 10788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10789 "00000000" // /* MW 1 */ + 10790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10791 "00000000" // /* MW 1 */ + 10792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10793 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 10794 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10795 "00001000" // /* MW 3 */ + 10796 "10010011" // /* MW 2 */ + 10797 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 222 45 + 10798 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10799 "10000001" // /* MW 5 */ + 10800 "10101101" // /* MW 4 */ + 10801 "10100111" // /* MW 3 */ + 10802 "00000000" // /* MW 2 */ + 10803 "00000100" // /* MW 1 */ + 10804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10805 "00000000" // /* MW 1 */ + 10806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10807 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 221 2 first + 10808 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10809 "00110110" // /* MW 3 */ + 10810 "00000110" // /* MW 2 */ + 10811 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 1 "io_buffer_main.h" 324 51 + 10812 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10813 "10000001" // /* MW 5 */ + 10814 "11011101" // /* MW 4 */ + 10815 "11011100" // /* MW 3 */ + 10816 "11001010" // /* MW 2 */ + 10817 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 222 47 first + 10818 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10819 "01110110" // /* MW 3 */ + 10820 "00000110" // /* MW 2 */ + 10821 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 10822 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10823 "10011110" // /* MW 3 */ + 10824 "01011100" // /* MW 2 */ + 10825 "00000111" // /* MW 1 */ + 10826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10827 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 224 2 first +.no_stack_arguments + 10828 "00000100" // JL #10352 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10352 delay_slots=5 */ + 10829 "00000001" // /* MW 5 */ + 10830 "00000000" // /* MW 4 */ + 10831 "00111000" // /* MW 3 */ + 10832 "00010100" // /* MW 2 */ + 10833 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10835 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 221 2 first +.delay_slot + 10836 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10837 "00000111" // /* MW 3 */ + 10838 "01100010" // /* MW 2 */ + 10839 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 221 2 +.delay_slot + 10840 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10841 "00110001" // /* MW 3 */ + 10842 "00000110" // /* MW 2 */ + 10843 "00001000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 222 45 first +.delay_slot + 10844 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10845 "00001101" // /* MW 3 */ + 10846 "11100001" // /* MW 2 */ + 10847 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 222 45 +.delay_slot + 10848 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10849 "00000000" // /* MW 15 */ + 10850 "00000000" // /* MW 14 */ + 10851 "10101000" // /* MW 13 */ + 10852 "10100000" // /* MW 12 */ + 10853 "00110100" // /* MW 11 */ + 10854 "00000000" // /* MW 10 */ + 10855 "00000000" // /* MW 9 */ + 10856 "00000000" // /* MW 8 */ + 10857 "01011011" // /* MW 7 */ + 10858 "00000001" // /* MW 6 */ + 10859 "00100000" // /* MW 5 */ + 10860 "00000000" // /* MW 4 */ + 10861 "11110000" // /* MW 3 */ + 10862 "00101100" // /* MW 2 */ + 10863 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 6 +.src_ref 7 "superkernels.cpp" 227 14 +.src_ref 1 "io_buffer_main.h" 324 51 first +.return_address + 10864 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10865 "00010000" // /* MW 9 */ + 10866 "00100000" // /* MW 8 */ + 10867 "00110010" // /* MW 7 */ + 10868 "11110011" // /* MW 6 */ + 10869 "00000001" // /* MW 5 */ + 10870 "00000000" // /* MW 4 */ + 10871 "11010000" // /* MW 3 */ + 10872 "11000110" // /* MW 2 */ + 10873 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 + 10874 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10875 "00000101" // /* MW 3 */ + 10876 "00100000" // /* MW 2 */ + 10877 "00010000" // /* MW 1 */ + 10878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10879 "00000000" // /* MW 1 */ + 10880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10881 "00000000" // /* MW 1 */ + 10882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10883 "00000000" // /* MW 1 */ + 10884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10885 "00000000" // /* MW 1 */ + 10886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10887 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 10888 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10889 "00001000" // /* MW 3 */ + 10890 "01010001" // /* MW 2 */ + 10891 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 19 +.src_ref 1 "io_buffer_main.h" 327 40 first + 10892 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10893 "00010000" // /* MW 9 */ + 10894 "00110000" // /* MW 8 */ + 10895 "00110010" // /* MW 7 */ + 10896 "11110001" // /* MW 6 */ + 10897 "00000001" // /* MW 5 */ + 10898 "00000000" // /* MW 4 */ + 10899 "11010000" // /* MW 3 */ + 10900 "11001110" // /* MW 2 */ + 10901 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 6 first + 10902 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10903 "00110110" // /* MW 3 */ + 10904 "00000110" // /* MW 2 */ + 10905 "00000110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 19 + 10906 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10907 "01010110" // /* MW 3 */ + 10908 "00000110" // /* MW 2 */ + 10909 "00000010" // /* MW 1 */ + 10910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10911 "00000000" // /* MW 1 */ + 10912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10913 "00000000" // /* MW 1 */ + 10914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10915 "00000000" // /* MW 1 */ + 10916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10917 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 10918 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10919 "00110001" // /* MW 3 */ + 10920 "00100001" // /* MW 2 */ + 10921 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 10922 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10923 "00010001" // /* MW 3 */ + 10924 "11100110" // /* MW 2 */ + 10925 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 16 first + 10926 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10927 "00101000" // /* MW 3 */ + 10928 "01100001" // /* MW 2 */ + 10929 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 6 + 10930 "10000100" // JNZ r16, #10960 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10960 delay_slots=5 */ + 10931 "00000001" // /* MW 5 */ + 10932 "01000000" // /* MW 4 */ + 10933 "01101000" // /* MW 3 */ + 10934 "00010101" // /* MW 2 */ + 10935 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10937 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10939 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10941 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10943 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10945 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 227 14 + 10946 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10947 "00000001" // /* MW 3 */ + 10948 "00100000" // /* MW 2 */ + 10949 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 227 14 first + 10950 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10951 "00000000" // /* MW 9 */ + 10952 "00000000" // /* MW 8 */ + 10953 "00000000" // /* MW 7 */ + 10954 "10000000" // /* MW 6 */ + 10955 "00010001" // /* MW 5 */ + 10956 "00000110" // /* MW 4 */ + 10957 "11110110" // /* MW 3 */ + 10958 "00101100" // /* MW 2 */ + 10959 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 7 "superkernels.cpp" 229 + 10960 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10961 "00111001" // /* MW 3 */ + 10962 "11110100" // /* MW 2 */ + 10963 "00000111" // /* MW 1 */ + 10964 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10965 "00011001" // /* MW 3 */ + 10966 "11111011" // /* MW 2 */ + 10967 "00000111" // /* MW 1 */ + 10968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10969 "00000000" // /* MW 1 */ + 10970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10971 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 10972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10973 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10974 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10975 "11110001" // /* MW 3 */ + 10976 "11111101" // /* MW 2 */ + 10977 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10979 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 229 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10980 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10981 "00000000" // /* MW 3 */ + 10982 "00101000" // /* MW 2 */ + 10983 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10984 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10985 "10100000" // /* MW 3 */ + 10986 "01100111" // /* MW 2 */ + 10987 "00011111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 229 +.delay_slot + 10988 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10989 "00000001" // /* MW 5 */ + 10990 "00000000" // /* MW 4 */ + 10991 "00000000" // /* MW 3 */ + 10992 "11111000" // /* MW 2 */ + 10993 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10995 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10997 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 10999 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv +.function shared_setup_backbone _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv +.src_ref 3 "elementwise_binary_shared.h" 205 first +.src_ref 3 "elementwise_binary_shared.h" 211 24 first +.src_ref 3 "elementwise_binary_shared.h" 216 36 +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.function_start + 11008 "10111010" // LDA el0, [p1], #4; MOVX r2, #256; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11009 "01011000" // /* MW 9 */ + 11010 "00000000" // /* MW 8 */ + 11011 "00001000" // /* MW 7 */ + 11012 "00001011" // /* MW 6 */ + 11013 "00100000" // /* MW 5 */ + 11014 "00001000" // /* MW 4 */ + 11015 "11010000" // /* MW 3 */ + 11016 "10000101" // /* MW 2 */ + 11017 "00100011" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 36 + 11018 "00011000" // MOVX r0, #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11019 "00000001" // /* MW 3 */ + 11020 "10000000" // /* MW 2 */ + 11021 "00010111" // /* MW 1 */ + 11022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11023 "00000000" // /* MW 1 */ + 11024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11025 "00000000" // /* MW 1 */ + 11026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11027 "00000000" // /* MW 1 */ + 11028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11029 "00000000" // /* MW 1 */ + 11030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11031 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 211 22 first + 11032 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11033 "00101001" // /* MW 3 */ + 11034 "00011100" // /* MW 2 */ + 11035 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 212 24 first + 11036 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11037 "00101110" // /* MW 3 */ + 11038 "00011100" // /* MW 2 */ + 11039 "00000001" // /* MW 1 */ + 11040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11041 "00000000" // /* MW 1 */ + 11042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11043 "00000000" // /* MW 1 */ + 11044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11045 "00000000" // /* MW 1 */ + 11046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11047 "00000000" // /* MW 1 */ + 11048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11049 "00000000" // /* MW 1 */ + 11050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11051 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 212 22 + 11052 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11053 "00101001" // /* MW 3 */ + 11054 "00011100" // /* MW 2 */ + 11055 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 213 24 first + 11056 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11057 "00101110" // /* MW 3 */ + 11058 "00000100" // /* MW 2 */ + 11059 "00000001" // /* MW 1 */ + 11060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11061 "00000000" // /* MW 1 */ + 11062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11063 "00000000" // /* MW 1 */ + 11064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11065 "00000000" // /* MW 1 */ + 11066 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11067 "00000000" // /* MW 1 */ + 11068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11069 "00000000" // /* MW 1 */ + 11070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11071 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 213 22 + 11072 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11073 "00101001" // /* MW 3 */ + 11074 "00011100" // /* MW 2 */ + 11075 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 214 24 first + 11076 "10011000" // LDA r3, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11077 "01110110" // /* MW 3 */ + 11078 "00010100" // /* MW 2 */ + 11079 "00000001" // /* MW 1 */ + 11080 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11081 "00000000" // /* MW 1 */ + 11082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11083 "00000000" // /* MW 1 */ + 11084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11085 "00000000" // /* MW 1 */ + 11086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11087 "00000000" // /* MW 1 */ + 11088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11089 "00000000" // /* MW 1 */ + 11090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11091 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 214 22 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11092 "10011000" // ST r3, [p0], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11093 "01110001" // /* MW 3 */ + 11094 "01001100" // /* MW 2 */ + 11095 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 34 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11096 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11097 "00010111" // /* MW 3 */ + 11098 "00000100" // /* MW 2 */ + 11099 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 217 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11100 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11101 "00000000" // /* MW 3 */ + 11102 "00101000" // /* MW 2 */ + 11103 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11104 "01000100" // MOVXM r1, #65280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11105 "00000000" // /* MW 5 */ + 11106 "10111110" // /* MW 4 */ + 11107 "11110000" // /* MW 3 */ + 11108 "00000000" // /* MW 2 */ + 11109 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11110 "10011000" // AND r1, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11111 "00010100" // /* MW 3 */ + 11112 "11000010" // /* MW 2 */ + 11113 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11114 "10011000" // EQ r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11115 "00100111" // /* MW 3 */ + 11116 "01110110" // /* MW 2 */ + 11117 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 36 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11118 "00011000" // SEL.EQZ r0, r0, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11119 "10000010" // /* MW 3 */ + 11120 "00000001" // /* MW 2 */ + 11121 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_end0 + 11123 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 3 "elementwise_binary_shared.h" 219 +.src_ref 3 "elementwise_binary_shared.h" 219 first +.function_start + 11136 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11137 "00000001" // /* MW 5 */ + 11138 "00000000" // /* MW 4 */ + 11139 "00000000" // /* MW 3 */ + 11140 "00001000" // /* MW 2 */ + 11141 "00000000" // /* MW 1 */ + 11142 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11143 "00111101" // /* MW 3 */ + 11144 "11111000" // /* MW 2 */ + 11145 "00001111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 220 8 first +.no_stack_arguments + 11146 "00000100" // JL #11008 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11008 delay_slots=5 */ + 11147 "00000001" // /* MW 5 */ + 11148 "00000000" // /* MW 4 */ + 11149 "10000000" // /* MW 3 */ + 11150 "00010101" // /* MW 2 */ + 11151 "00000000" // /* MW 1 */ +.delay_slot + 11152 "11111000" // MOV r0, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11153 "10100000" // /* MW 3 */ + 11154 "00010111" // /* MW 2 */ + 11155 "00011000" // /* MW 1 */ +.delay_slot + 11156 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11157 "00010101" // /* MW 3 */ + 11158 "11111100" // /* MW 2 */ + 11159 "00001111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 220 8 +.delay_slot + 11160 "11111000" // MOV r15, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11161 "11000000" // /* MW 3 */ + 11162 "11010000" // /* MW 2 */ + 11163 "00011011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11165 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11167 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 220 8 +.src_ref 3 "elementwise_binary_shared.h" 222 4 +.src_ref 8 "add_impl.h" 146 29 +.return_address + 11168 "10111010" // LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11169 "00001000" // /* MW 9 */ + 11170 "11000100" // /* MW 8 */ + 11171 "00110011" // /* MW 7 */ + 11172 "01101000" // /* MW 6 */ + 11173 "00000000" // /* MW 5 */ + 11174 "00000001" // /* MW 4 */ + 11175 "00100000" // /* MW 3 */ + 11176 "00000111" // /* MW 2 */ + 11177 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 146 29 +.src_ref 8 "add_impl.h" 147 37 +.src_ref 8 "add_impl.h" 147 39 + 11178 "10111010" // MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11179 "01011000" // /* MW 9 */ + 11180 "11111101" // /* MW 8 */ + 11181 "00000111" // /* MW 7 */ + 11182 "00001000" // /* MW 6 */ + 11183 "10000000" // /* MW 5 */ + 11184 "00000001" // /* MW 4 */ + 11185 "10000000" // /* MW 3 */ + 11186 "11100010" // /* MW 2 */ + 11187 "00000001" // /* MW 1 */ +.src_ref 8 "add_impl.h" 146 29 first +.src_ref 8 "add_impl.h" 147 39 + 11188 "01111010" // LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11189 "00000001" // /* MW 9 */ + 11190 "10100000" // /* MW 8 */ + 11191 "00000111" // /* MW 7 */ + 11192 "10000000" // /* MW 6 */ + 11193 "00010001" // /* MW 5 */ + 11194 "00001010" // /* MW 4 */ + 11195 "00100000" // /* MW 3 */ + 11196 "10111110" // /* MW 2 */ + 11197 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 50 first + 11198 "10011000" // LDA.u8 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11199 "01001010" // /* MW 3 */ + 11200 "00000110" // /* MW 2 */ + 11201 "00000000" // /* MW 1 */ + 11202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11203 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11205 "00000000" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 37 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11206 "00011000" // ST.s16 r16, [p0, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11207 "00010111" // /* MW 3 */ + 11208 "00000010" // /* MW 2 */ + 11209 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 222 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11210 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11211 "00000000" // /* MW 3 */ + 11212 "00101000" // /* MW 2 */ + 11213 "00010000" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 54 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11214 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11215 "00000101" // /* MW 3 */ + 11216 "00100010" // /* MW 2 */ + 11217 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 222 4 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11218 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11219 "00000001" // /* MW 5 */ + 11220 "00000000" // /* MW 4 */ + 11221 "00000000" // /* MW 3 */ + 11222 "11111000" // /* MW 2 */ + 11223 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 54 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11224 "10011000" // EQ r27, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11225 "00100111" // /* MW 3 */ + 11226 "01110111" // /* MW 2 */ + 11227 "00010100" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 39 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11228 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11229 "10000010" // /* MW 3 */ + 11230 "00100001" // /* MW 2 */ + 11231 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 11233 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.src_ref 3 "elementwise_binary_shared.h" 227 first +.src_ref 3 "elementwise_binary_shared.h" 232 8 first +.tail_call +.function_start + 11248 "10000100" // J #9808 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=9808 delay_slots=5 */ + 11249 "00000000" // /* MW 5 */ + 11250 "00000000" // /* MW 4 */ + 11251 "00101000" // /* MW 3 */ + 11252 "00010011" // /* MW 2 */ + 11253 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11255 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11256 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11257 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11259 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11261 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 11263 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.src_ref 3 "elementwise_binary.h" 141 first +.src_ref 3 "elementwise_binary.h" 142 23 +.src_ref 3 "elementwise_binary.h" 144 4 first +.function_start + 11264 "01100100" // RET lr; MOV r0, #64 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 11265 "00000001" // /* MW 5 */ + 11266 "00100001" // /* MW 4 */ + 11267 "00000000" // /* MW 3 */ + 11268 "00000000" // /* MW 2 */ + 11269 "00000101" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 141 +.delay_slot + 11270 "11111000" // MOV r1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11271 "11000000" // /* MW 3 */ + 11272 "01010000" // /* MW 2 */ + 11273 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 141 +.delay_slot + 11274 "00011000" // ADD.NC p0, r1, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11275 "10010000" // /* MW 3 */ + 11276 "01100000" // /* MW 2 */ + 11277 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 142 23 first +.delay_slot + 11278 "10011000" // ST r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11279 "00010001" // /* MW 3 */ + 11280 "00000100" // /* MW 2 */ + 11281 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 142 23 +.delay_slot + 11282 "10011000" // ST r0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11283 "00010001" // /* MW 3 */ + 11284 "00010100" // /* MW 2 */ + 11285 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_end0 + 11287 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.src_ref 3 "elementwise_binary.h" 130 first +.src_ref 3 "elementwise_binary.h" 133 24 first +.function_start + 11296 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11297 "00101110" // /* MW 3 */ + 11298 "00011100" // /* MW 2 */ + 11299 "00000001" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 130 + 11300 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11301 "00000001" // /* MW 5 */ + 11302 "00000000" // /* MW 4 */ + 11303 "00000000" // /* MW 3 */ + 11304 "00001000" // /* MW 2 */ + 11305 "00000000" // /* MW 1 */ + 11306 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11307 "00111101" // /* MW 3 */ + 11308 "11111100" // /* MW 2 */ + 11309 "00001111" // /* MW 1 */ + 11310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11311 "00000000" // /* MW 1 */ + 11312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11313 "00000000" // /* MW 1 */ + 11314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11315 "00000000" // /* MW 1 */ + 11316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11317 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 133 22 first + 11318 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11319 "00101001" // /* MW 3 */ + 11320 "00011100" // /* MW 2 */ + 11321 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 134 24 first + 11322 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11323 "00101110" // /* MW 3 */ + 11324 "00011100" // /* MW 2 */ + 11325 "00000001" // /* MW 1 */ + 11326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11327 "00000000" // /* MW 1 */ + 11328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11329 "00000000" // /* MW 1 */ + 11330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11331 "00000000" // /* MW 1 */ + 11332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11333 "00000000" // /* MW 1 */ + 11334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11335 "00000000" // /* MW 1 */ + 11336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11337 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 134 22 + 11338 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11339 "00101001" // /* MW 3 */ + 11340 "00011100" // /* MW 2 */ + 11341 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 135 24 first + 11342 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11343 "00101110" // /* MW 3 */ + 11344 "00000100" // /* MW 2 */ + 11345 "00000001" // /* MW 1 */ + 11346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11347 "00000000" // /* MW 1 */ + 11348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11349 "00000000" // /* MW 1 */ + 11350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11351 "00000000" // /* MW 1 */ + 11352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11353 "00000000" // /* MW 1 */ + 11354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11355 "00000000" // /* MW 1 */ + 11356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11357 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 135 22 + 11358 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11359 "00101001" // /* MW 3 */ + 11360 "00011100" // /* MW 2 */ + 11361 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 136 24 first + 11362 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11363 "00101110" // /* MW 3 */ + 11364 "00010100" // /* MW 2 */ + 11365 "00000001" // /* MW 1 */ + 11366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11367 "00000000" // /* MW 1 */ + 11368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11369 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 137 8 first +.no_stack_arguments + 11370 "00000100" // JL #11264 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11264 delay_slots=5 */ + 11371 "00000001" // /* MW 5 */ + 11372 "00000000" // /* MW 4 */ + 11373 "00000000" // /* MW 3 */ + 11374 "00010110" // /* MW 2 */ + 11375 "00000000" // /* MW 1 */ +.delay_slot + 11376 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11377 "10011101" // /* MW 3 */ + 11378 "11111011" // /* MW 2 */ + 11379 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11381 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11383 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 136 22 first +.delay_slot + 11384 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11385 "00101001" // /* MW 3 */ + 11386 "11011100" // /* MW 2 */ + 11387 "00001000" // /* MW 1 */ +.src_ref 8 "mul_impl.h" 134 25 +.delay_slot + 11388 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11389 "11000000" // /* MW 3 */ + 11390 "01100000" // /* MW 2 */ + 11391 "00011111" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 +.return_address + 11392 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11393 "00111001" // /* MW 3 */ + 11394 "11111100" // /* MW 2 */ + 11395 "00000111" // /* MW 1 */ + 11396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11397 "00000000" // /* MW 1 */ + 11398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11399 "00000000" // /* MW 1 */ + 11400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11401 "00000000" // /* MW 1 */ + 11402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11403 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11405 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11406 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11407 "10011001" // /* MW 3 */ + 11408 "11111011" // /* MW 2 */ + 11409 "00000111" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11410 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11411 "00000000" // /* MW 3 */ + 11412 "00101000" // /* MW 2 */ + 11413 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11415 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11417 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11419 "00000000" // /* MW 1 */ +.src_ref 8 "mul_impl.h" 134 25 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11420 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11421 "00000001" // /* MW 3 */ + 11422 "00100000" // /* MW 2 */ + 11423 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 +.src_ref 8 "mul_impl.h" 134 25 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11424 "00111010" // ST r16, [p7, #16]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11425 "01110001" // /* MW 9 */ + 11426 "00000000" // /* MW 8 */ + 11427 "00000000" // /* MW 7 */ + 11428 "00000000" // /* MW 6 */ + 11429 "11111110" // /* MW 5 */ + 11430 "00111111" // /* MW 4 */ + 11431 "00110000" // /* MW 3 */ + 11432 "11000010" // /* MW 2 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + 11433 "11101000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.function run _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.src_ref 3 "elementwise_binary.h" 149 first +.src_ref 3 "elementwise_binary.h" 156 37 +.src_ref 3 "elementwise_binary.h" 168 8 first +.function_start + 11440 "10111010" // MOVA m0, #32; MOVXM ls, #11616 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11441 "00010000" // /* MW 9 */ + 11442 "10110000" // /* MW 8 */ + 11443 "01111110" // /* MW 7 */ + 11444 "00001000" // /* MW 6 */ + 11445 "00000000" // /* MW 5 */ + 11446 "00000000" // /* MW 4 */ + 11447 "10000000" // /* MW 3 */ + 11448 "00000000" // /* MW 2 */ + 11449 "00000100" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 156 37 first +.src_ref 3 "elementwise_binary.h" 168 8 first + 11450 "10111010" // LDA r3, [p3], m0; MOVXM le, #11632 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11451 "00010000" // /* MW 9 */ + 11452 "10111000" // /* MW 8 */ + 11453 "10111110" // /* MW 7 */ + 11454 "00001001" // /* MW 6 */ + 11455 "00000000" // /* MW 5 */ + 11456 "00000000" // /* MW 4 */ + 11457 "11010000" // /* MW 3 */ + 11458 "00001110" // /* MW 2 */ + 11459 "01100001" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 156 78 +.src_ref 3 "elementwise_binary.h" 156 78 + 11460 "10111010" // LDA m1, [p3]; MOVX r1, #-6; MOV r0, #828 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11461 "01011000" // /* MW 9 */ + 11462 "00111100" // /* MW 8 */ + 11463 "00001011" // /* MW 7 */ + 11464 "01001000" // /* MW 6 */ + 11465 "00010111" // /* MW 5 */ + 11466 "00111110" // /* MW 4 */ + 11467 "11010000" // /* MW 3 */ + 11468 "10010000" // /* MW 2 */ + 11469 "01100000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 156 78 +.src_ref 3 "elementwise_binary.h" 156 78 + 11470 "10111010" // LDA m0, [p3, #4]; MOVXM p4, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11471 "00010000" // /* MW 9 */ + 11472 "00110100" // /* MW 8 */ + 11473 "00110010" // /* MW 7 */ + 11474 "11110010" // /* MW 6 */ + 11475 "00000001" // /* MW 5 */ + 11476 "00000000" // /* MW 4 */ + 11477 "11010000" // /* MW 3 */ + 11478 "10000000" // /* MW 2 */ + 11479 "01100010" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 156 78 + 11480 "10011000" // LDA.s8 r2, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11481 "01000010" // /* MW 3 */ + 11482 "00000100" // /* MW 2 */ + 11483 "00000100" // /* MW 1 */ + 11484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11485 "00000000" // /* MW 1 */ + 11486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11487 "00000000" // /* MW 1 */ + 11488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11489 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 156 78 + 11490 "10011000" // LSHL r1, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11491 "00011101" // /* MW 3 */ + 11492 "11000010" // /* MW 2 */ + 11493 "00010000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary.h" 168 8 +.src_ref 3 "elementwise_binary.h" 187 20 first + 11494 "00110100" // VLDB x1, [p0], m1; ADD.NC lc, r1, #-7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11495 "11111001" // /* MW 5 */ + 11496 "11100001" // /* MW 4 */ + 11497 "10001010" // /* MW 3 */ + 11498 "00001110" // /* MW 2 */ + 11499 "00000101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary.h" 189 20 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11500 "00111100" // VLDA x2, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11501 "01101000" // /* MW 5 */ + 11502 "01010000" // /* MW 4 */ + 11503 "01110000" // /* MW 3 */ + 11504 "00010011" // /* MW 2 */ + 11505 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 195 20 +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11506 "00010010" // VLDA x3, [p1], m0; VLDB x1, [p0], m1; MOVX crRnd, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11507 "10000000" // /* MW 7 */ + 11508 "10111010" // /* MW 6 */ + 11509 "11101000" // /* MW 5 */ + 11510 "01010000" // /* MW 4 */ + 11511 "01110000" // /* MW 3 */ + 11512 "00011011" // /* MW 2 */ + 11513 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary.h" 189 20 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11514 "00111100" // VLDA x2, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11515 "01101000" // /* MW 5 */ + 11516 "01010000" // /* MW 4 */ + 11517 "01110000" // /* MW 3 */ + 11518 "00010011" // /* MW 2 */ + 11519 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11520 "00111100" // VLDA x3, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11521 "11101000" // /* MW 5 */ + 11522 "01010000" // /* MW 4 */ + 11523 "01110000" // /* MW 3 */ + 11524 "00011011" // /* MW 2 */ + 11525 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11526 "10011000" // VLDA x2, [p1], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11527 "10011011" // /* MW 3 */ + 11528 "00001000" // /* MW 2 */ + 11529 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11530 "00111100" // VLDA x3, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11531 "01101000" // /* MW 5 */ + 11532 "01010000" // /* MW 4 */ + 11533 "01110000" // /* MW 3 */ + 11534 "00011011" // /* MW 2 */ + 11535 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11536 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11537 "11101000" // /* MW 5 */ + 11538 "01010000" // /* MW 4 */ + 11539 "01110000" // /* MW 3 */ + 11540 "00010011" // /* MW 2 */ + 11541 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11542 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11543 "01000001" // /* MW 9 */ + 11544 "11100010" // /* MW 8 */ + 11545 "00000000" // /* MW 7 */ + 11546 "00011101" // /* MW 6 */ + 11547 "00110100" // /* MW 5 */ + 11548 "00101000" // /* MW 4 */ + 11549 "01110000" // /* MW 3 */ + 11550 "00011011" // /* MW 2 */ + 11551 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11552 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11553 "01100001" // /* MW 9 */ + 11554 "11100000" // /* MW 8 */ + 11555 "00000001" // /* MW 7 */ + 11556 "00011101" // /* MW 6 */ + 11557 "01110100" // /* MW 5 */ + 11558 "00101000" // /* MW 4 */ + 11559 "01110000" // /* MW 3 */ + 11560 "00010011" // /* MW 2 */ + 11561 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11562 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11563 "01000001" // /* MW 9 */ + 11564 "11100010" // /* MW 8 */ + 11565 "00000000" // /* MW 7 */ + 11566 "00011101" // /* MW 6 */ + 11567 "00110100" // /* MW 5 */ + 11568 "00101000" // /* MW 4 */ + 11569 "01110000" // /* MW 3 */ + 11570 "00011011" // /* MW 2 */ + 11571 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11572 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11573 "01100001" // /* MW 9 */ + 11574 "11100000" // /* MW 8 */ + 11575 "00000001" // /* MW 7 */ + 11576 "00011101" // /* MW 6 */ + 11577 "01110100" // /* MW 5 */ + 11578 "00101000" // /* MW 4 */ + 11579 "01110000" // /* MW 3 */ + 11580 "00010011" // /* MW 2 */ + 11581 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11582 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11583 "01000001" // /* MW 9 */ + 11584 "11100010" // /* MW 8 */ + 11585 "00000000" // /* MW 7 */ + 11586 "00011101" // /* MW 6 */ + 11587 "00110100" // /* MW 5 */ + 11588 "00101000" // /* MW 4 */ + 11589 "01110000" // /* MW 3 */ + 11590 "00011011" // /* MW 2 */ + 11591 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11592 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11593 "01100001" // /* MW 9 */ + 11594 "11100000" // /* MW 8 */ + 11595 "00000001" // /* MW 7 */ + 11596 "00011101" // /* MW 6 */ + 11597 "01110100" // /* MW 5 */ + 11598 "00101000" // /* MW 4 */ + 11599 "01110000" // /* MW 3 */ + 11600 "00010011" // /* MW 2 */ + 11601 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11602 "01101110" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; VMUL.f dm0, x1, x2, r0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 11603 "01000001" // /* MW 13 */ + 11604 "11100010" // /* MW 12 */ + 11605 "00000000" // /* MW 11 */ + 11606 "10001100" // /* MW 10 */ + 11607 "01110000" // /* MW 9 */ + 11608 "00001000" // /* MW 8 */ + 11609 "00000000" // /* MW 7 */ + 11610 "00000000" // /* MW 6 */ + 11611 "01101000" // /* MW 5 */ + 11612 "01010000" // /* MW 4 */ + 11613 "01110000" // /* MW 3 */ + 11614 "00011011" // /* MW 2 */ + 11615 "00100001" // /* MW 1 */ +.label ZLS_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_176 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.src_ref 3 "elementwise_binary.h" 195 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 11616 "00001011" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; VMUL.f dm1, x0, x3, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11617 "00000011" // /* MW 15 */ + 11618 "00001111" // /* MW 14 */ + 11619 "01111000" // /* MW 13 */ + 11620 "10100101" // /* MW 12 */ + 11621 "00000001" // /* MW 11 */ + 11622 "00000000" // /* MW 10 */ + 11623 "00000000" // /* MW 9 */ + 11624 "00000000" // /* MW 8 */ + 11625 "10100011" // /* MW 7 */ + 11626 "00011100" // /* MW 6 */ + 11627 "11101010" // /* MW 5 */ + 11628 "01010000" // /* MW 4 */ + 11629 "01110000" // /* MW 3 */ + 11630 "00010011" // /* MW 2 */ + 11631 "00100001" // /* MW 1 */ +.label ZLE_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_192 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11632 "00001011" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11633 "00010010" // /* MW 15 */ + 11634 "00000111" // /* MW 14 */ + 11635 "01111000" // /* MW 13 */ + 11636 "10100101" // /* MW 12 */ + 11637 "00000001" // /* MW 11 */ + 11638 "00000000" // /* MW 10 */ + 11639 "00000000" // /* MW 9 */ + 11640 "00000000" // /* MW 8 */ + 11641 "00100011" // /* MW 7 */ + 11642 "00011100" // /* MW 6 */ + 11643 "01101010" // /* MW 5 */ + 11644 "01010000" // /* MW 4 */ + 11645 "01110000" // /* MW 3 */ + 11646 "00011011" // /* MW 2 */ + 11647 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 11648 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11649 "01100001" // /* MW 7 */ + 11650 "11100000" // /* MW 6 */ + 11651 "00000001" // /* MW 5 */ + 11652 "00000010" // /* MW 4 */ + 11653 "01100000" // /* MW 3 */ + 11654 "10010100" // /* MW 2 */ + 11655 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11656 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11657 "01000001" // /* MW 7 */ + 11658 "11100010" // /* MW 6 */ + 11659 "00000000" // /* MW 5 */ + 11660 "00000010" // /* MW 4 */ + 11661 "01100000" // /* MW 3 */ + 11662 "10000100" // /* MW 2 */ + 11663 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11664 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11665 "01100001" // /* MW 7 */ + 11666 "11100000" // /* MW 6 */ + 11667 "00000001" // /* MW 5 */ + 11668 "00000010" // /* MW 4 */ + 11669 "01100000" // /* MW 3 */ + 11670 "10010100" // /* MW 2 */ + 11671 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11672 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11673 "01000001" // /* MW 7 */ + 11674 "11100010" // /* MW 6 */ + 11675 "00000000" // /* MW 5 */ + 11676 "00000010" // /* MW 4 */ + 11677 "01100000" // /* MW 3 */ + 11678 "10000100" // /* MW 2 */ + 11679 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11680 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11681 "01100001" // /* MW 7 */ + 11682 "11100000" // /* MW 6 */ + 11683 "00000001" // /* MW 5 */ + 11684 "00000010" // /* MW 4 */ + 11685 "01100000" // /* MW 3 */ + 11686 "10010100" // /* MW 2 */ + 11687 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11688 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11689 "01000001" // /* MW 7 */ + 11690 "11100010" // /* MW 6 */ + 11691 "00000000" // /* MW 5 */ + 11692 "00000010" // /* MW 4 */ + 11693 "01100000" // /* MW 3 */ + 11694 "10000100" // /* MW 2 */ + 11695 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11696 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11697 "01100001" // /* MW 7 */ + 11698 "11100000" // /* MW 6 */ + 11699 "00000001" // /* MW 5 */ + 11700 "00000010" // /* MW 4 */ + 11701 "01100000" // /* MW 3 */ + 11702 "10010100" // /* MW 2 */ + 11703 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11704 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11705 "00100011" // /* MW 3 */ + 11706 "00011100" // /* MW 2 */ + 11707 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 172 4 first +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11708 "01011100" // VST.CONV.bf16.fp32 cml1, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 11709 "00000000" // /* MW 5 */ + 11710 "01010000" // /* MW 4 */ + 11711 "01100000" // /* MW 3 */ + 11712 "10010100" // /* MW 2 */ + 11713 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11714 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11715 "00100011" // /* MW 3 */ + 11716 "00011100" // /* MW 2 */ + 11717 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 195 20 first +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11718 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11719 "10100011" // /* MW 3 */ + 11720 "00011100" // /* MW 2 */ + 11721 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.delay_slot + 11722 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11723 "00100011" // /* MW 3 */ + 11724 "00011100" // /* MW 2 */ + 11725 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 195 20 first +.delay_slot + 11726 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11727 "10100011" // /* MW 3 */ + 11728 "00011100" // /* MW 2 */ + 11729 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_end0 + 11731 "00000000" // /* MW 1 */ +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.function superkernel_mul1d _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.src_ref 7 "superkernels.cpp" 369 first +.src_ref 7 "superkernels.cpp" 374 6 +.function_start + 11744 "01000100" // MOVXM p4, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11745 "10000000" // /* MW 5 */ + 11746 "11001000" // /* MW 4 */ + 11747 "11001000" // /* MW 3 */ + 11748 "00000111" // /* MW 2 */ + 11749 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 374 6 first + 11750 "11010100" // LDA r16, [p4]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11751 "11000001" // /* MW 5 */ + 11752 "10110101" // /* MW 4 */ + 11753 "11011000" // /* MW 3 */ + 11754 "11000010" // /* MW 2 */ + 11755 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 369 + 11756 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11757 "00000001" // /* MW 5 */ + 11758 "00000000" // /* MW 4 */ + 11759 "00000000" // /* MW 3 */ + 11760 "00001000" // /* MW 2 */ + 11761 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 371 22 first +.src_ref 1 "io_buffer_main.h" 218 49 + 11762 "00111010" // ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11763 "01111001" // /* MW 9 */ + 11764 "01100000" // /* MW 8 */ + 11765 "11001010" // /* MW 7 */ + 11766 "10000001" // /* MW 6 */ + 11767 "00010100" // /* MW 5 */ + 11768 "00100011" // /* MW 4 */ + 11769 "10110000" // /* MW 3 */ + 11770 "00111010" // /* MW 2 */ + 11771 "11111111" // /* MW 1 */ + 11772 "00000010" // ST p0, [sp, #-20]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11773 "01110000" // /* MW 7 */ + 11774 "11010000" // /* MW 6 */ + 11775 "00001011" // /* MW 5 */ + 11776 "00000000" // /* MW 4 */ + 11777 "10110000" // /* MW 3 */ + 11778 "10000011" // /* MW 2 */ + 11779 "11111101" // /* MW 1 */ + 11780 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11781 "00010101" // /* MW 3 */ + 11782 "11111100" // /* MW 2 */ + 11783 "00001111" // /* MW 1 */ + 11784 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11785 "00111101" // /* MW 3 */ + 11786 "11110000" // /* MW 2 */ + 11787 "00001111" // /* MW 1 */ + 11788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11789 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 374 6 first +.src_ref 7 "superkernels.cpp" 374 16 first + 11790 "10000100" // JNZ r16, #11936 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11936 delay_slots=5 */ + 11791 "00000001" // /* MW 5 */ + 11792 "01000000" // /* MW 4 */ + 11793 "01010000" // /* MW 3 */ + 11794 "00010111" // /* MW 2 */ + 11795 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 371 30 first +.delay_slot + 11796 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11797 "11111011" // /* MW 3 */ + 11798 "01100011" // /* MW 2 */ + 11799 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 371 11 +.delay_slot + 11800 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11801 "10100000" // /* MW 5 */ + 11802 "11001000" // /* MW 4 */ + 11803 "11000100" // /* MW 3 */ + 11804 "00000111" // /* MW 2 */ + 11805 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 371 11 +.delay_slot + 11806 "00000010" // ST r17, [p2]; MOV p2, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11807 "01110000" // /* MW 7 */ + 11808 "01100000" // /* MW 6 */ + 11809 "00110111" // /* MW 5 */ + 11810 "00000001" // /* MW 4 */ + 11811 "00110000" // /* MW 3 */ + 11812 "11000110" // /* MW 2 */ + 11813 "01000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 +.delay_slot + 11814 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11815 "11000000" // /* MW 3 */ + 11816 "11010110" // /* MW 2 */ + 11817 "00011011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 377 4 +.src_ref 7 "superkernels.cpp" 379 28 +.src_ref 7 "superkernels.cpp" 381 42 +.src_ref 7 "superkernels.cpp" 393 2 +.delay_slot + 11818 "00111010" // ST p2, [sp, #-12]; MOVXM p7, #509312 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11819 "00010001" // /* MW 9 */ + 11820 "11000000" // /* MW 8 */ + 11821 "10110010" // /* MW 7 */ + 11822 "11110011" // /* MW 6 */ + 11823 "00000001" // /* MW 5 */ + 11824 "00000000" // /* MW 4 */ + 11825 "10110000" // /* MW 3 */ + 11826 "10100011" // /* MW 2 */ + 11827 "11111110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 377 4 +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11828 "00111010" // MOVS p0, p7; MOVXM p2, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11829 "00010001" // /* MW 9 */ + 11830 "00110100" // /* MW 8 */ + 11831 "00110010" // /* MW 7 */ + 11832 "11110001" // /* MW 6 */ + 11833 "00000001" // /* MW 5 */ + 11834 "00000000" // /* MW 4 */ + 11835 "01100000" // /* MW 3 */ + 11836 "10010001" // /* MW 2 */ + 11837 "00010011" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11838 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11839 "00010000" // /* MW 9 */ + 11840 "00110010" // /* MW 8 */ + 11841 "00110010" // /* MW 7 */ + 11842 "11110001" // /* MW 6 */ + 11843 "00000001" // /* MW 5 */ + 11844 "00000000" // /* MW 4 */ + 11845 "11100000" // /* MW 3 */ + 11846 "11000000" // /* MW 2 */ + 11847 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11849 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 377 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 11850 "00000100" // JL #11296 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11296 delay_slots=5 */ + 11851 "00000001" // /* MW 5 */ + 11852 "00000000" // /* MW 4 */ + 11853 "00010000" // /* MW 3 */ + 11854 "00010110" // /* MW 2 */ + 11855 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11857 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11859 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11860 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11861 "00110001" // /* MW 3 */ + 11862 "00100000" // /* MW 2 */ + 11863 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 11864 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11865 "00000101" // /* MW 3 */ + 11866 "00100000" // /* MW 2 */ + 11867 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 11868 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11869 "00010001" // /* MW 3 */ + 11870 "00000110" // /* MW 2 */ + 11871 "00001010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 381 18 +.src_ref 7 "superkernels.cpp" 381 42 first +.return_address + 11872 "10111010" // LDA r16, [p7]; MOVXM p1, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11873 "00010000" // /* MW 9 */ + 11874 "00101000" // /* MW 8 */ + 11875 "10110010" // /* MW 7 */ + 11876 "11110000" // /* MW 6 */ + 11877 "00000001" // /* MW 5 */ + 11878 "00000000" // /* MW 4 */ + 11879 "11010000" // /* MW 3 */ + 11880 "11000010" // /* MW 2 */ + 11881 "11100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 381 16 +.src_ref 7 "superkernels.cpp" 381 18 +.src_ref 7 "superkernels.cpp" 390 48 + 11882 "10111010" // LDA r17, [p1]; MOVXM p3, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11883 "00010000" // /* MW 9 */ + 11884 "00101010" // /* MW 8 */ + 11885 "10110010" // /* MW 7 */ + 11886 "11110001" // /* MW 6 */ + 11887 "00000001" // /* MW 5 */ + 11888 "00000000" // /* MW 4 */ + 11889 "11010000" // /* MW 3 */ + 11890 "11000110" // /* MW 2 */ + 11891 "00100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 379 28 first +.src_ref 7 "superkernels.cpp" 382 16 +.src_ref 7 "superkernels.cpp" 391 48 + 11892 "10111010" // LDA.u16 r18, [p7, #10]; MOVXM p1, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11893 "00010000" // /* MW 9 */ + 11894 "00101110" // /* MW 8 */ + 11895 "10110010" // /* MW 7 */ + 11896 "11110000" // /* MW 6 */ + 11897 "00000001" // /* MW 5 */ + 11898 "00000000" // /* MW 4 */ + 11899 "01010000" // /* MW 3 */ + 11900 "11001011" // /* MW 2 */ + 11901 "11101010" // /* MW 1 */ + 11902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11903 "00000000" // /* MW 1 */ + 11904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11905 "00000000" // /* MW 1 */ + 11906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11907 "00000000" // /* MW 1 */ + 11908 "10000100" // J #11952 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11952 delay_slots=5 */ + 11909 "00000000" // /* MW 5 */ + 11910 "00000000" // /* MW 4 */ + 11911 "01011000" // /* MW 3 */ + 11912 "00010111" // /* MW 2 */ + 11913 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 379 13 +.delay_slot + 11914 "01000100" // MOVXM p2, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11915 "11000000" // /* MW 5 */ + 11916 "11001000" // /* MW 4 */ + 11917 "11000100" // /* MW 3 */ + 11918 "00000111" // /* MW 2 */ + 11919 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 381 27 first +.delay_slot + 11920 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11921 "00001111" // /* MW 3 */ + 11922 "01100001" // /* MW 2 */ + 11923 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 379 13 first +.delay_slot + 11924 "10011000" // ST r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11925 "01010001" // /* MW 3 */ + 11926 "00000110" // /* MW 2 */ + 11927 "00001010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 381 16 first +.delay_slot + 11928 "10011000" // ST r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11929 "00010001" // /* MW 3 */ + 11930 "00000110" // /* MW 2 */ + 11931 "00001011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 382 16 first +.delay_slot + 11932 "10011000" // ST r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11933 "00010001" // /* MW 3 */ + 11934 "00000110" // /* MW 2 */ + 11935 "00001001" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 +.src_ref 7 "superkernels.cpp" 390 48 + 11936 "01000100" // MOVXM p3, #509012 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11937 "10101000" // /* MW 5 */ + 11938 "11001000" // /* MW 4 */ + 11939 "11000110" // /* MW 3 */ + 11940 "00000111" // /* MW 2 */ + 11941 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 391 48 + 11942 "10111010" // NOPA; MOVXM p1, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11943 "00010000" // /* MW 9 */ + 11944 "00101110" // /* MW 8 */ + 11945 "10110010" // /* MW 7 */ + 11946 "11110000" // /* MW 6 */ + 11947 "00000001" // /* MW 5 */ + 11948 "00000000" // /* MW 4 */ + 11949 "11110000" // /* MW 3 */ + 11950 "00101100" // /* MW 2 */ + 11951 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 +.src_ref 1 "io_buffer_main.h" 218 49 first + 11952 "00011000" // ADD.NC p0, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11953 "10000110" // /* MW 3 */ + 11954 "01100111" // /* MW 2 */ + 11955 "00011000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 385 2 +.src_ref 1 "io_buffer_main.h" 218 49 + 11956 "10111010" // LDA r27, [p0], #-4; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11957 "00010000" // /* MW 9 */ + 11958 "00100000" // /* MW 8 */ + 11959 "00110010" // /* MW 7 */ + 11960 "11110001" // /* MW 6 */ + 11961 "00000001" // /* MW 5 */ + 11962 "00000000" // /* MW 4 */ + 11963 "11010000" // /* MW 3 */ + 11964 "11101110" // /* MW 2 */ + 11965 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 11966 "10011000" // LDA r16, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11967 "00010110" // /* MW 3 */ + 11968 "11111110" // /* MW 2 */ + 11969 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 11970 "10011000" // LDA r17, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11971 "00110110" // /* MW 3 */ + 11972 "11111110" // /* MW 2 */ + 11973 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 385 2 first + 11974 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11975 "01010110" // /* MW 3 */ + 11976 "00000110" // /* MW 2 */ + 11977 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 11978 "10011000" // LDA r19, [p0, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11979 "01110110" // /* MW 3 */ + 11980 "01000110" // /* MW 2 */ + 11981 "00000000" // /* MW 1 */ + 11982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11983 "00000000" // /* MW 1 */ + 11984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11985 "00000000" // /* MW 1 */ + 11986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11987 "00000000" // /* MW 1 */ + 11988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11989 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 11990 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11991 "00000010" // /* MW 3 */ + 11992 "01100001" // /* MW 2 */ + 11993 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 385 2 first +.src_ref 1 "io_buffer_main.h" 218 20 + 11994 "01011100" // ST r16, [p0]; ADD r16, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11995 "00001110" // /* MW 5 */ + 11996 "01000000" // /* MW 4 */ + 11997 "00111001" // /* MW 3 */ + 11998 "11000010" // /* MW 2 */ + 11999 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 385 2 + 12000 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12001 "00010001" // /* MW 3 */ + 12002 "00000110" // /* MW 2 */ + 12003 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 +.src_ref 1 "io_buffer_main.h" 395 8 + 12004 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12005 "11111101" // /* MW 3 */ + 12006 "11100000" // /* MW 2 */ + 12007 "00010111" // /* MW 1 */ + 12008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12009 "00000000" // /* MW 1 */ + 12010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12011 "00000000" // /* MW 1 */ + 12012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12013 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 12014 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12015 "00001000" // /* MW 3 */ + 12016 "11010011" // /* MW 2 */ + 12017 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 first + 12018 "00011000" // ADD.NC p2, r14, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12019 "00000110" // /* MW 3 */ + 12020 "01100111" // /* MW 2 */ + 12021 "00011010" // /* MW 1 */ + 12022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12023 "00000000" // /* MW 1 */ + 12024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12025 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 12026 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12027 "01110110" // /* MW 3 */ + 12028 "11111111" // /* MW 2 */ + 12029 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 12030 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12031 "00110110" // /* MW 3 */ + 12032 "11111110" // /* MW 2 */ + 12033 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 12034 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12035 "01010110" // /* MW 3 */ + 12036 "11111110" // /* MW 2 */ + 12037 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 47 first + 12038 "10011000" // LDA r19, [p2, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12039 "01110110" // /* MW 3 */ + 12040 "01010110" // /* MW 2 */ + 12041 "00000010" // /* MW 1 */ + 12042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12043 "00000000" // /* MW 1 */ + 12044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12045 "00000000" // /* MW 1 */ + 12046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12047 "00000000" // /* MW 1 */ + 12048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12049 "00000000" // /* MW 1 */ + 12050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12051 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 12052 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12053 "00010010" // /* MW 3 */ + 12054 "10100011" // /* MW 2 */ + 12055 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 12056 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12057 "00110001" // /* MW 3 */ + 12058 "00000110" // /* MW 2 */ + 12059 "00001010" // /* MW 1 */ + 12060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12061 "00000000" // /* MW 1 */ + 12062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12063 "00000000" // /* MW 1 */ + 12064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12065 "00000000" // /* MW 1 */ + 12066 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12067 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 12068 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12069 "00001000" // /* MW 3 */ + 12070 "11010011" // /* MW 2 */ + 12071 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 390 46 +.src_ref 7 "superkernels.cpp" 391 46 +.src_ref 1 "io_buffer_main.h" 324 32 + 12072 "00111010" // MOVS p6, p2; MOVX r16, #1; MOV r14, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12073 "01111001" // /* MW 9 */ + 12074 "01100000" // /* MW 8 */ + 12075 "11001110" // /* MW 7 */ + 12076 "00101001" // /* MW 6 */ + 12077 "00000000" // /* MW 5 */ + 12078 "00000001" // /* MW 4 */ + 12079 "01100000" // /* MW 3 */ + 12080 "00010001" // /* MW 2 */ + 12081 "11010001" // /* MW 1 */ + 12082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12083 "00000000" // /* MW 1 */ + 12084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12085 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 + 12086 "00011000" // LDA p4, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12087 "00011001" // /* MW 3 */ + 12088 "11101110" // /* MW 2 */ + 12089 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 390 48 first + 12090 "00001100" // LDA r17, [p3]; ST p0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12091 "00111011" // /* MW 5 */ + 12092 "11011000" // /* MW 4 */ + 12093 "11011111" // /* MW 3 */ + 12094 "11000110" // /* MW 2 */ + 12095 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 391 48 first +.src_ref 7 "superkernels.cpp" 393 2 + 12096 "11010100" // LDA r20, [p1]; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12097 "10000001" // /* MW 5 */ + 12098 "11011101" // /* MW 4 */ + 12099 "11010110" // /* MW 3 */ + 12100 "11010010" // /* MW 2 */ + 12101 "00100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 12102 "10011000" // LDA r18, [p2], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12103 "01010110" // /* MW 3 */ + 12104 "01001110" // /* MW 2 */ + 12105 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 12106 "10011000" // LDA p2, [p0], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12107 "00011110" // /* MW 3 */ + 12108 "01011101" // /* MW 2 */ + 12109 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 +.src_ref 1 "io_buffer_main.h" 327 40 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12110 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12111 "11000000" // /* MW 3 */ + 12112 "01100000" // /* MW 2 */ + 12113 "00011111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12115 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12116 "10011000" // LDA r19, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12117 "01110110" // /* MW 3 */ + 12118 "00000110" // /* MW 2 */ + 12119 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12121 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 393 2 first +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 12122 "00000100" // JL #11440 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11440 delay_slots=5 */ + 12123 "00000001" // /* MW 5 */ + 12124 "00000000" // /* MW 4 */ + 12125 "01011000" // /* MW 3 */ + 12126 "00010110" // /* MW 2 */ + 12127 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 +.src_ref 1 "io_buffer_main.h" 327 40 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12128 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12129 "11000000" // /* MW 3 */ + 12130 "11010100" // /* MW 2 */ + 12131 "00011011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 390 46 first +.delay_slot + 12132 "10011000" // LSHL r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12133 "00001101" // /* MW 3 */ + 12134 "01100011" // /* MW 2 */ + 12135 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 391 46 first +.delay_slot + 12136 "10011000" // LSHL r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12137 "00001101" // /* MW 3 */ + 12138 "00100001" // /* MW 2 */ + 12139 "00010101" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 391 46 +.delay_slot + 12140 "01011000" // ADD.NC p1, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12141 "01000001" // /* MW 3 */ + 12142 "01101001" // /* MW 2 */ + 12143 "00011001" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 390 46 first +.delay_slot + 12144 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12145 "00000000" // /* MW 15 */ + 12146 "00000000" // /* MW 14 */ + 12147 "10101000" // /* MW 13 */ + 12148 "11100010" // /* MW 12 */ + 12149 "00110100" // /* MW 11 */ + 12150 "00000000" // /* MW 10 */ + 12151 "00000000" // /* MW 9 */ + 12152 "00000000" // /* MW 8 */ + 12153 "01011011" // /* MW 7 */ + 12154 "00000001" // /* MW 6 */ + 12155 "00100000" // /* MW 5 */ + 12156 "00000000" // /* MW 4 */ + 12157 "11110000" // /* MW 3 */ + 12158 "00101100" // /* MW 2 */ + 12159 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 32 first +.src_ref 1 "io_buffer_main.h" 327 28 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 40 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 +.return_address + 12160 "10111010" // LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12161 "01111000" // /* MW 9 */ + 12162 "11010000" // /* MW 8 */ + 12163 "10110011" // /* MW 7 */ + 12164 "00101000" // /* MW 6 */ + 12165 "00000000" // /* MW 5 */ + 12166 "00000001" // /* MW 4 */ + 12167 "11010000" // /* MW 3 */ + 12168 "11000110" // /* MW 2 */ + 12169 "11001000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 19 + 12170 "01000100" // MOVXM p6, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12171 "11000000" // /* MW 5 */ + 12172 "11001000" // /* MW 4 */ + 12173 "11001100" // /* MW 3 */ + 12174 "00000111" // /* MW 2 */ + 12175 "00000000" // /* MW 1 */ + 12176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12177 "00000000" // /* MW 1 */ + 12178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12179 "00000000" // /* MW 1 */ + 12180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12181 "00000000" // /* MW 1 */ + 12182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12183 "00000000" // /* MW 1 */ + 12184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12185 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 12186 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12187 "00001000" // /* MW 3 */ + 12188 "01010001" // /* MW 2 */ + 12189 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 40 first + 12190 "10011000" // LDA r17, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12191 "00110110" // /* MW 3 */ + 12192 "11110110" // /* MW 2 */ + 12193 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 + 12194 "00011000" // LDA p2, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12195 "00011001" // /* MW 3 */ + 12196 "11101101" // /* MW 2 */ + 12197 "00000111" // /* MW 1 */ + 12198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12199 "00000000" // /* MW 1 */ + 12200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12201 "00000000" // /* MW 1 */ + 12202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12203 "00000000" // /* MW 1 */ + 12204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12205 "00000000" // /* MW 1 */ + 12206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12207 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 + 12208 "10011000" // SUB r17, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12209 "00010001" // /* MW 3 */ + 12210 "00100011" // /* MW 2 */ + 12211 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 first +.src_ref 1 "io_buffer_main.h" 327 28 + 12212 "00001100" // LDA r17, [p2, #20]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12213 "01100011" // /* MW 5 */ + 12214 "11101100" // /* MW 4 */ + 12215 "11010011" // /* MW 3 */ + 12216 "11000110" // /* MW 2 */ + 12217 "01001010" // /* MW 1 */ + 12218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12219 "00000000" // /* MW 1 */ + 12220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12221 "00000000" // /* MW 1 */ + 12222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12223 "00000000" // /* MW 1 */ + 12224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12225 "00000000" // /* MW 1 */ + 12226 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12227 "00000000" // /* MW 1 */ + 12228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12229 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 12230 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12231 "00001000" // /* MW 3 */ + 12232 "01010001" // /* MW 2 */ + 12233 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 6 +.src_ref 7 "superkernels.cpp" 398 14 +.src_ref 1 "io_buffer_main.h" 327 40 first + 12234 "10111010" // LDA r19, [p7, #-8]; MOVXM p1, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12235 "00010000" // /* MW 9 */ + 12236 "00100000" // /* MW 8 */ + 12237 "10110010" // /* MW 7 */ + 12238 "11110000" // /* MW 6 */ + 12239 "00000001" // /* MW 5 */ + 12240 "00000000" // /* MW 4 */ + 12241 "11010000" // /* MW 3 */ + 12242 "11001110" // /* MW 2 */ + 12243 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 19 first + 12244 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12245 "01010110" // /* MW 3 */ + 12246 "00000110" // /* MW 2 */ + 12247 "00000110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 6 + 12248 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12249 "00110110" // /* MW 3 */ + 12250 "00000110" // /* MW 2 */ + 12251 "00000001" // /* MW 1 */ + 12252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12253 "00000000" // /* MW 1 */ + 12254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12255 "00000000" // /* MW 1 */ + 12256 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12257 "00000000" // /* MW 1 */ + 12258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12259 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 12260 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12261 "00110001" // /* MW 3 */ + 12262 "00100001" // /* MW 2 */ + 12263 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 12264 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12265 "00010001" // /* MW 3 */ + 12266 "11100110" // /* MW 2 */ + 12267 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 16 first + 12268 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12269 "00101000" // /* MW 3 */ + 12270 "01100001" // /* MW 2 */ + 12271 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 6 + 12272 "10000100" // JNZ r16, #12304 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12304 delay_slots=5 */ + 12273 "00000001" // /* MW 5 */ + 12274 "01000000" // /* MW 4 */ + 12275 "00001000" // /* MW 3 */ + 12276 "00011000" // /* MW 2 */ + 12277 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12279 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12281 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12283 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12285 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12287 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 398 14 + 12288 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12289 "00000001" // /* MW 3 */ + 12290 "00100000" // /* MW 2 */ + 12291 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 398 14 first + 12292 "00110110" // NOPA; NOPB; ST r16, [p1]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12293 "11000001" // /* MW 11 */ + 12294 "00001000" // /* MW 10 */ + 12295 "10000011" // /* MW 9 */ + 12296 "00000000" // /* MW 8 */ + 12297 "00000000" // /* MW 7 */ + 12298 "00000000" // /* MW 6 */ + 12299 "00100000" // /* MW 5 */ + 12300 "00000000" // /* MW 4 */ + 12301 "11110000" // /* MW 3 */ + 12302 "00101100" // /* MW 2 */ + 12303 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 +.src_ref 7 "superkernels.cpp" 400 + 12304 "00011000" // LDA lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12305 "00111001" // /* MW 3 */ + 12306 "11110000" // /* MW 2 */ + 12307 "00000111" // /* MW 1 */ + 12308 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12309 "11110001" // /* MW 3 */ + 12310 "11111101" // /* MW 2 */ + 12311 "00000111" // /* MW 1 */ + 12312 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12313 "10011001" // /* MW 3 */ + 12314 "11110111" // /* MW 2 */ + 12315 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 12316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12317 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.noswbrkpt + 12318 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12319 "11010001" // /* MW 3 */ + 12320 "11111001" // /* MW 2 */ + 12321 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 12322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12323 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 12324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12325 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 400 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 12326 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12327 "00000000" // /* MW 3 */ + 12328 "00101000" // /* MW 2 */ + 12329 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12330 "00011000" // MOVS p6, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12331 "00001011" // /* MW 3 */ + 12332 "10001110" // /* MW 2 */ + 12333 "00001110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 400 +.delay_slot + 12334 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12335 "00000001" // /* MW 5 */ + 12336 "00000000" // /* MW 4 */ + 12337 "00000000" // /* MW 3 */ + 12338 "11111000" // /* MW 2 */ + 12339 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12341 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12343 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 + 12345 "00000000" // /* MW 1 */ +.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh +.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_begin0 +.function setup_conv2d_dw_params_bf16 _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh +.src_ref 2 "conv2d_dw_bf16_params.h" 211 first +.src_ref 2 "conv2d_dw_bf16_params.h" 215 15 +.src_ref 2 "conv2d_dw_bf16_params.h" 215 17 first +.function_start + 12352 "10111010" // LDA el0, [p0], #4; MOVXM p1, #509888 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12353 "00010000" // /* MW 9 */ + 12354 "11100000" // /* MW 8 */ + 12355 "10110011" // /* MW 7 */ + 12356 "11110000" // /* MW 6 */ + 12357 "00000001" // /* MW 5 */ + 12358 "00000000" // /* MW 4 */ + 12359 "11010000" // /* MW 3 */ + 12360 "10000101" // /* MW 2 */ + 12361 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 17 first +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.src_ref 2 "conv2d_dw_bf16_params.h" 218 80 + 12362 "10111010" // LDA eh0, [p0], #4; MOVX r16, #2; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12363 "01011000" // /* MW 9 */ + 12364 "00000000" // /* MW 8 */ + 12365 "00001000" // /* MW 7 */ + 12366 "01001011" // /* MW 6 */ + 12367 "00000000" // /* MW 5 */ + 12368 "00000001" // /* MW 4 */ + 12369 "11010000" // /* MW 3 */ + 12370 "10000001" // /* MW 2 */ + 12371 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 211 + 12372 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12373 "00000001" // /* MW 5 */ + 12374 "00000000" // /* MW 4 */ + 12375 "00000000" // /* MW 3 */ + 12376 "00001000" // /* MW 2 */ + 12377 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 32 + 12378 "00111010" // ST p7, [sp, #-12]; MOVXM p7, #509888 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12379 "00010001" // /* MW 9 */ + 12380 "11100000" // /* MW 8 */ + 12381 "10110011" // /* MW 7 */ + 12382 "11110011" // /* MW 6 */ + 12383 "00000001" // /* MW 5 */ + 12384 "00000000" // /* MW 4 */ + 12385 "10110000" // /* MW 3 */ + 12386 "11110011" // /* MW 2 */ + 12387 "11111110" // /* MW 1 */ + 12388 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12389 "00111101" // /* MW 3 */ + 12390 "11111100" // /* MW 2 */ + 12391 "00001111" // /* MW 1 */ + 12392 "10011000" // ST r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12393 "11110101" // /* MW 3 */ + 12394 "11111001" // /* MW 2 */ + 12395 "00001111" // /* MW 1 */ + 12396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12397 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 15 + 12398 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12399 "00101001" // /* MW 3 */ + 12400 "00011100" // /* MW 2 */ + 12401 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 15 + 12402 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12403 "00001001" // /* MW 3 */ + 12404 "00011100" // /* MW 2 */ + 12405 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 17 + 12406 "10011000" // LDA el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12407 "00101110" // /* MW 3 */ + 12408 "00000100" // /* MW 2 */ + 12409 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 17 + 12410 "10011000" // LDA eh0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12411 "00001110" // /* MW 3 */ + 12412 "00010100" // /* MW 2 */ + 12413 "00000000" // /* MW 1 */ + 12414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12415 "00000000" // /* MW 1 */ + 12416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12417 "00000000" // /* MW 1 */ + 12418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12419 "00000000" // /* MW 1 */ + 12420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12421 "00000000" // /* MW 1 */ + 12422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12423 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 15 + 12424 "10011000" // ST el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12425 "00101001" // /* MW 3 */ + 12426 "00000100" // /* MW 2 */ + 12427 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 15 + 12428 "10011000" // ST eh0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12429 "00001001" // /* MW 3 */ + 12430 "00010100" // /* MW 2 */ + 12431 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 32 first + 12432 "10011000" // LDA.u8 r17, [p7], #5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12433 "00101010" // /* MW 3 */ + 12434 "01011110" // /* MW 2 */ + 12435 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 52 + 12436 "10011000" // LDA.u8 r18, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12437 "01001010" // /* MW 3 */ + 12438 "11101110" // /* MW 2 */ + 12439 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 80 + 12440 "10011000" // LDA.u8 r1, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12441 "00101010" // /* MW 3 */ + 12442 "11101100" // /* MW 2 */ + 12443 "00000111" // /* MW 1 */ + 12444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12445 "00000000" // /* MW 1 */ + 12446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12447 "00000000" // /* MW 1 */ + 12448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12449 "00000000" // /* MW 1 */ + 12450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12451 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.no_stack_arguments + 12452 "00000100" // JL #15664 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=15664 delay_slots=5 */ + 12453 "00000001" // /* MW 5 */ + 12454 "00000000" // /* MW 4 */ + 12455 "10011000" // /* MW 3 */ + 12456 "00011110" // /* MW 2 */ + 12457 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 38 +.delay_slot + 12458 "01011100" // ST r18, [sp, #-28]; SUB r15, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12459 "01000011" // /* MW 5 */ + 12460 "10111110" // /* MW 4 */ + 12461 "10111000" // /* MW 3 */ + 12462 "11001010" // /* MW 2 */ + 12463 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 80 +.delay_slot + 12464 "01011100" // ST r1, [sp, #-20]; NE r16, r1, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12465 "00010001" // /* MW 5 */ + 12466 "11000010" // /* MW 4 */ + 12467 "10110000" // /* MW 3 */ + 12468 "10000110" // /* MW 2 */ + 12469 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.delay_slot + 12470 "01011100" // ST r16, [sp, #-16]; LT r27, r15, r24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12471 "00010101" // /* MW 5 */ + 12472 "11101111" // /* MW 4 */ + 12473 "10110111" // /* MW 3 */ + 12474 "01000010" // /* MW 2 */ + 12475 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.delay_slot + 12476 "10011000" // SUB r17, r24, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12477 "11110001" // /* MW 3 */ + 12478 "00100010" // /* MW 2 */ + 12479 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.delay_slot + 12480 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r0, r15, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12481 "00000000" // /* MW 15 */ + 12482 "00000000" // /* MW 14 */ + 12483 "01111000" // /* MW 13 */ + 12484 "10100101" // /* MW 12 */ + 12485 "00000001" // /* MW 11 */ + 12486 "10010000" // /* MW 10 */ + 12487 "00001000" // /* MW 9 */ + 12488 "00011110" // /* MW 8 */ + 12489 "01011011" // /* MW 7 */ + 12490 "00000001" // /* MW 6 */ + 12491 "00100000" // /* MW 5 */ + 12492 "00000000" // /* MW 4 */ + 12493 "11110000" // /* MW 3 */ + 12494 "00101100" // /* MW 2 */ + 12495 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.return_address + 12496 "00101100" // LDA r20, [sp, #-20]; MOVX r16, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12497 "00000010" // /* MW 5 */ + 12498 "01000000" // /* MW 4 */ + 12499 "00100000" // /* MW 3 */ + 12500 "11010010" // /* MW 2 */ + 12501 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.src_ref 2 "conv2d_dw_bf16_params.h" 219 32 first + 12502 "00101100" // LDA.u8 r17, [p7], #3; SUB r18, r16, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12503 "01000011" // /* MW 5 */ + 12504 "01001000" // /* MW 4 */ + 12505 "01011000" // /* MW 3 */ + 12506 "11000101" // /* MW 2 */ + 12507 "11100111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 52 + 12508 "10011000" // LDA.u8 r19, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12509 "01101010" // /* MW 3 */ + 12510 "11101110" // /* MW 2 */ + 12511 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 + 12512 "00011000" // LDA r1, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12513 "00110001" // /* MW 3 */ + 12514 "11101100" // /* MW 2 */ + 12515 "00000111" // /* MW 1 */ + 12516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12517 "00000000" // /* MW 1 */ + 12518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12519 "00000000" // /* MW 1 */ + 12520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12521 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 first + 12522 "10011000" // XOR r20, r15, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12523 "01000110" // /* MW 3 */ + 12524 "11101001" // /* MW 2 */ + 12525 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 + 12526 "10011000" // LT r27, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12527 "00001010" // /* MW 3 */ + 12528 "00110111" // /* MW 2 */ + 12529 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 38 first + 12530 "01011100" // ST r19, [sp, #-24]; SUB r17, r17, r19 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12531 "01100011" // /* MW 5 */ + 12532 "11000110" // /* MW 4 */ + 12533 "10111000" // /* MW 3 */ + 12534 "01001110" // /* MW 2 */ + 12535 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.no_stack_arguments + 12536 "00111010" // ST r17, [sp, #-32]; JL #15664 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=15664 delay_slots=5 */ + 12537 "01000001" // /* MW 9 */ + 12538 "00000000" // /* MW 8 */ + 12539 "00000000" // /* MW 7 */ + 12540 "10100110" // /* MW 6 */ + 12541 "00000111" // /* MW 5 */ + 12542 "00000000" // /* MW 4 */ + 12543 "10110000" // /* MW 3 */ + 12544 "01000110" // /* MW 2 */ + 12545 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 first +.delay_slot + 12546 "00011000" // SEL.EQZ r20, r2, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12547 "00100010" // /* MW 3 */ + 12548 "10101001" // /* MW 2 */ + 12549 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first +.delay_slot + 12550 "10011000" // LT r27, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12551 "00001010" // /* MW 3 */ + 12552 "01110111" // /* MW 2 */ + 12553 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.delay_slot + 12554 "10011000" // SUB r18, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12555 "00010001" // /* MW 3 */ + 12556 "00100101" // /* MW 2 */ + 12557 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 first +.delay_slot + 12558 "00011000" // EXTEND.s16 r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12559 "01110000" // /* MW 3 */ + 12560 "00100110" // /* MW 2 */ + 12561 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 87 +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first +.delay_slot + 12562 "01111110" // NOPA; NOPB; NOPS; SEL.EQZ r0, r17, r18, r27; ADD.NC r15, r19, #1 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 12563 "01100000" // /* MW 13 */ + 12564 "00101011" // /* MW 12 */ + 12565 "00000000" // /* MW 11 */ + 12566 "00001001" // /* MW 10 */ + 12567 "10011000" // /* MW 9 */ + 12568 "00111101" // /* MW 8 */ + 12569 "00100010" // /* MW 7 */ + 12570 "01000001" // /* MW 6 */ + 12571 "00100100" // /* MW 5 */ + 12572 "00000000" // /* MW 4 */ + 12573 "11110000" // /* MW 3 */ + 12574 "00101100" // /* MW 2 */ + 12575 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.src_ref 2 "conv2d_dw_bf16_params.h" 226 50 +.src_ref 2 "conv2d_dw_bf16_params.h" 231 68 +.return_address + 12576 "10111010" // LDA r3, [sp, #-32]; MOVX r19, #-2; MOV m0, #66 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12577 "01011000" // /* MW 9 */ + 12578 "01000010" // /* MW 8 */ + 12579 "00000000" // /* MW 7 */ + 12580 "11001000" // /* MW 6 */ + 12581 "00110111" // /* MW 5 */ + 12582 "00111111" // /* MW 4 */ + 12583 "00100000" // /* MW 3 */ + 12584 "00001110" // /* MW 2 */ + 12585 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.src_ref 2 "conv2d_dw_bf16_params.h" 220 23 +.src_ref 2 "conv2d_dw_bf16_params.h" 220 84 +.src_ref 2 "conv2d_dw_bf16_params.h" 232 45 +.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 +.src_ref 2 "conv2d_dw_bf16_params.h" 234 64 +.src_ref 2 "conv2d_dw_bf16_params.h" 243 100 +.src_ref 2 "conv2d_dw_bf16_params.h" 250 53 +.src_ref 2 "conv2d_dw_bf16_params.h" 253 63 +.src_ref 2 "conv2d_dw_bf16_params.h" 260 49 +.src_ref 2 "conv2d_dw_bf16_params.h" 264 47 + 12586 "10111010" // LDA r16, [sp, #-20]; MOVX r24, #0; MOV r1, #508 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12587 "01011000" // /* MW 9 */ + 12588 "11111100" // /* MW 8 */ + 12589 "00101001" // /* MW 7 */ + 12590 "00001000" // /* MW 6 */ + 12591 "10000000" // /* MW 5 */ + 12592 "00000001" // /* MW 4 */ + 12593 "00100000" // /* MW 3 */ + 12594 "11000010" // /* MW 2 */ + 12595 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 220 74 +.src_ref 2 "conv2d_dw_bf16_params.h" 234 64 +.src_ref 2 "conv2d_dw_bf16_params.h" 246 52 +.src_ref 2 "conv2d_dw_bf16_params.h" 253 53 + 12596 "10111010" // LDA r22, [sp, #-28]; MOVX r6, #4; MOV r4, #2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12597 "01011000" // /* MW 9 */ + 12598 "00000010" // /* MW 8 */ + 12599 "10001000" // /* MW 7 */ + 12600 "10001000" // /* MW 6 */ + 12601 "01100000" // /* MW 5 */ + 12602 "00000000" // /* MW 4 */ + 12603 "00100000" // /* MW 3 */ + 12604 "11011010" // /* MW 2 */ + 12605 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 226 50 first +.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 +.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 + 12606 "10111010" // LDA.u8 r17, [p7], m0; MOVX r5, #8; MOV r28, #23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12607 "01011000" // /* MW 9 */ + 12608 "00010111" // /* MW 8 */ + 12609 "10001000" // /* MW 7 */ + 12610 "00001011" // /* MW 6 */ + 12611 "01010001" // /* MW 5 */ + 12612 "00000000" // /* MW 4 */ + 12613 "01010000" // /* MW 3 */ + 12614 "01000101" // /* MW 2 */ + 12615 "11100001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 231 78 +.src_ref 2 "conv2d_dw_bf16_params.h" 232 39 +.src_ref 2 "conv2d_dw_bf16_params.h" 232 76 + 12616 "10111010" // LDA r21, [sp, #-24]; MOVX r18, #-6; MOV m1, #32 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12617 "01011000" // /* MW 9 */ + 12618 "00100000" // /* MW 8 */ + 12619 "10000000" // /* MW 7 */ + 12620 "01001000" // /* MW 6 */ + 12621 "00100111" // /* MW 5 */ + 12622 "00111111" // /* MW 4 */ + 12623 "00100000" // /* MW 3 */ + 12624 "01010110" // /* MW 2 */ + 12625 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 235 50 +.src_ref 2 "conv2d_dw_bf16_params.h" 235 50 +.src_ref 2 "conv2d_dw_bf16_params.h" 242 16 +.src_ref 2 "conv2d_dw_bf16_params.h" 242 63 +.src_ref 2 "conv2d_dw_bf16_params.h" 243 93 +.src_ref 2 "conv2d_dw_bf16_params.h" 250 53 +.src_ref 2 "conv2d_dw_bf16_params.h" 255 73 +.src_ref 2 "conv2d_dw_bf16_params.h" 260 49 +.src_ref 2 "conv2d_dw_bf16_params.h" 264 47 + 12626 "10111010" // LDA r30, [sp, #-16]; MOVX r23, #6; MOV r26, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12627 "01011000" // /* MW 9 */ + 12628 "00000001" // /* MW 8 */ + 12629 "01001000" // /* MW 7 */ + 12630 "11001011" // /* MW 6 */ + 12631 "01110000" // /* MW 5 */ + 12632 "00000001" // /* MW 4 */ + 12633 "00100000" // /* MW 3 */ + 12634 "01111010" // /* MW 2 */ + 12635 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 +.src_ref 2 "conv2d_dw_bf16_params.h" 239 42 +.src_ref 2 "conv2d_dw_bf16_params.h" 242 71 +.src_ref 2 "conv2d_dw_bf16_params.h" 248 72 +.src_ref 2 "conv2d_dw_bf16_params.h" 253 63 +.src_ref 2 "conv2d_dw_bf16_params.h" 264 41 + 12636 "10111010" // MOVA m0, #-178; MOVX r29, #128; MOV r31, #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12637 "01011000" // /* MW 9 */ + 12638 "11000000" // /* MW 8 */ + 12639 "11101111" // /* MW 7 */ + 12640 "00001011" // /* MW 6 */ + 12641 "11010000" // /* MW 5 */ + 12642 "00000101" // /* MW 4 */ + 12643 "10000000" // /* MW 3 */ + 12644 "11000000" // /* MW 2 */ + 12645 "11101001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first + 12646 "10011000" // SUB r20, r24, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12647 "00100001" // /* MW 3 */ + 12648 "00101000" // /* MW 2 */ + 12649 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 + 12650 "10011000" // XOR r3, r3, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12651 "00000110" // /* MW 3 */ + 12652 "11000111" // /* MW 2 */ + 12653 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.src_ref 2 "conv2d_dw_bf16_params.h" 220 74 + 12654 "00100100" // LT r27, r3, r24; ADD.NC r0, r22, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12655 "00000010" // /* MW 5 */ + 12656 "00110110" // /* MW 4 */ + 12657 "01010000" // /* MW 3 */ + 12658 "11110001" // /* MW 2 */ + 12659 "00011110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.src_ref 2 "conv2d_dw_bf16_params.h" 243 42 +.src_ref 2 "conv2d_dw_bf16_params.h" 247 69 + 12660 "01100100" // SEL.EQZ r20, r2, r20, r27; MOV r22, #-3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12661 "11110101" // /* MW 5 */ + 12662 "00111111" // /* MW 4 */ + 12663 "01001011" // /* MW 3 */ + 12664 "00101000" // /* MW 2 */ + 12665 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 220 23 first +.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 + 12666 "01100100" // MUL r3, r15, r16; MOV r2, #7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12667 "00011101" // /* MW 5 */ + 12668 "00100000" // /* MW 4 */ + 12669 "11110001" // /* MW 3 */ + 12670 "11100001" // /* MW 2 */ + 12671 "01111000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first + 12672 "00011000" // EXTEND.s16 r20, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12673 "01110000" // /* MW 3 */ + 12674 "00101000" // /* MW 2 */ + 12675 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 220 84 first +.src_ref 2 "conv2d_dw_bf16_params.h" 231 68 + 12676 "00100100" // AND r0, r1, r0; ADD.NC r1, r0, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12677 "00000001" // /* MW 5 */ + 12678 "10100000" // /* MW 4 */ + 12679 "10010000" // /* MW 3 */ + 12680 "00000000" // /* MW 2 */ + 12681 "00001000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 88 first +.src_ref 2 "conv2d_dw_bf16_params.h" 231 68 first + 12682 "00100100" // LSHL r19, r1, r19; ADD.NC r27, r20, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12683 "00000001" // /* MW 5 */ + 12684 "10110100" // /* MW 4 */ + 12685 "10111101" // /* MW 3 */ + 12686 "11100111" // /* MW 2 */ + 12687 "00001100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 220 44 first +.src_ref 2 "conv2d_dw_bf16_params.h" 253 53 first + 12688 "10100100" // LSHL r20, r15, r6; ADD.NC r1, r3, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12689 "00000010" // /* MW 5 */ + 12690 "10100011" // /* MW 4 */ + 12691 "10110000" // /* MW 3 */ + 12692 "00001101" // /* MW 2 */ + 12693 "01111101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 240 70 +.src_ref 2 "conv2d_dw_bf16_params.h" 246 52 first + 12694 "00100100" // LSHL r7, r1, r6; ADD.NC r0, r21, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12695 "11111111" // /* MW 5 */ + 12696 "00110101" // /* MW 4 */ + 12697 "10110000" // /* MW 3 */ + 12698 "11001101" // /* MW 2 */ + 12699 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 232 45 first + 12700 "10011000" // MUL r6, r27, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12701 "00001111" // /* MW 3 */ + 12702 "11001101" // /* MW 2 */ + 12703 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 226 22 first + 12704 "10011000" // MUL r15, r15, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12705 "00011111" // /* MW 3 */ + 12706 "11011111" // /* MW 2 */ + 12707 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 231 78 first +.src_ref 2 "conv2d_dw_bf16_params.h" 238 79 + 12708 "00100100" // MUL r21, r19, r21; ADD.NC r19, r19, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12709 "11111111" // /* MW 5 */ + 12710 "10110011" // /* MW 4 */ + 12711 "11111001" // /* MW 3 */ + 12712 "01101011" // /* MW 2 */ + 12713 "10011101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 234 64 first + 12714 "10011000" // EQ r27, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12715 "00000111" // /* MW 3 */ + 12716 "00110111" // /* MW 2 */ + 12717 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 231 39 first +.src_ref 2 "conv2d_dw_bf16_params.h" 232 55 first + 12718 "01011100" // ST r21, [p7], #-4; MUL r4, r15, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12719 "11011111" // /* MW 5 */ + 12720 "10010000" // /* MW 4 */ + 12721 "00110111" // /* MW 3 */ + 12722 "11010110" // /* MW 2 */ + 12723 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 first + 12724 "00011000" // SEL.EQZ r28, r28, r5, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12725 "01010010" // /* MW 3 */ + 12726 "00111000" // /* MW 2 */ + 12727 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 232 76 first + 12728 "10011000" // LSHL r18, r4, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12729 "00101101" // /* MW 3 */ + 12730 "00100101" // /* MW 2 */ + 12731 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 227 22 first +.src_ref 2 "conv2d_dw_bf16_params.h" 232 39 + 12732 "01011100" // ST r18, [p7], m1; MUL r18, r17, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12733 "00111111" // /* MW 5 */ + 12734 "11001000" // /* MW 4 */ + 12735 "00111000" // /* MW 3 */ + 12736 "01001010" // /* MW 2 */ + 12737 "11100101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 234 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 235 50 first + 12738 "01011100" // ST r28, [p7], #-16; LSHL r28, r30, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12739 "11111011" // /* MW 5 */ + 12740 "01110010" // /* MW 4 */ + 12741 "00111111" // /* MW 3 */ + 12742 "11110010" // /* MW 2 */ + 12743 "11111001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 235 47 +.src_ref 2 "conv2d_dw_bf16_params.h" 243 53 first + 12744 "01011100" // ST r28, [p7], #24; MUL r28, r18, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12745 "00011111" // /* MW 5 */ + 12746 "01110000" // /* MW 4 */ + 12747 "00111001" // /* MW 3 */ + 12748 "11110010" // /* MW 2 */ + 12749 "11101101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 238 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 242 63 first + 12750 "01011100" // ST r19, [p7], #4; LSHL r19, r19, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12751 "11111011" // /* MW 5 */ + 12752 "11001110" // /* MW 4 */ + 12753 "00111001" // /* MW 3 */ + 12754 "11001110" // /* MW 2 */ + 12755 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 242 71 +.src_ref 2 "conv2d_dw_bf16_params.h" 243 93 first + 12756 "10100100" // LSHL r28, r28, r26; ADD.NC r19, r19, r29 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12757 "11101010" // /* MW 5 */ + 12758 "10110011" // /* MW 4 */ + 12759 "10111001" // /* MW 3 */ + 12760 "00110101" // /* MW 2 */ + 12761 "11100111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 239 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 242 16 first + 12762 "01011100" // ST r31, [p7], #4; LSHL r30, r18, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12763 "01011011" // /* MW 5 */ + 12764 "01111011" // /* MW 4 */ + 12765 "00111001" // /* MW 3 */ + 12766 "11111110" // /* MW 2 */ + 12767 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 243 100 first +.src_ref 2 "conv2d_dw_bf16_params.h" 250 53 first + 12768 "10100100" // MUL r16, r18, r16; ADD.NC r18, r19, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12769 "11100010" // /* MW 5 */ + 12770 "00110011" // /* MW 4 */ + 12771 "11111001" // /* MW 3 */ + 12772 "00100001" // /* MW 2 */ + 12773 "10010100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 first +.src_ref 2 "conv2d_dw_bf16_params.h" 240 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 253 63 first + 12774 "01011100" // ST r0, [p7], #4; SEL.EQZ r28, r31, r24, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12775 "00000100" // /* MW 5 */ + 12776 "11110011" // /* MW 4 */ + 12777 "00111111" // /* MW 3 */ + 12778 "10000010" // /* MW 2 */ + 12779 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 247 69 first + 12780 "10011000" // LSHL r31, r3, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12781 "01101101" // /* MW 3 */ + 12782 "11111111" // /* MW 2 */ + 12783 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 242 23 first +.src_ref 2 "conv2d_dw_bf16_params.h" 247 73 + 12784 "00100100" // SUB r1, r30, r19; ADD.NC r19, r31, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12785 "11111111" // /* MW 5 */ + 12786 "10111111" // /* MW 4 */ + 12787 "00111001" // /* MW 3 */ + 12788 "01100110" // /* MW 2 */ + 12789 "11110000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 241 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 243 42 first + 12790 "01011100" // ST r1, [p7], #4; LSHL r17, r17, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12791 "11011011" // /* MW 5 */ + 12792 "11000110" // /* MW 4 */ + 12793 "00111000" // /* MW 3 */ + 12794 "10000110" // /* MW 2 */ + 12795 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 243 100 +.src_ref 2 "conv2d_dw_bf16_params.h" 245 77 first + 12796 "00100100" // SUB r22, r24, r18; ADD.NC r18, r17, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12797 "11111111" // /* MW 5 */ + 12798 "00110001" // /* MW 4 */ + 12799 "00111001" // /* MW 3 */ + 12800 "10100100" // /* MW 2 */ + 12801 "11000101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 243 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 248 72 first + 12802 "01011100" // ST r22, [p7], #4; SUB r22, r7, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12803 "11000011" // /* MW 5 */ + 12804 "11011011" // /* MW 4 */ + 12805 "00110011" // /* MW 3 */ + 12806 "11011010" // /* MW 2 */ + 12807 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 245 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 250 53 first + 12808 "01011100" // ST r18, [p7], #4; LSHL r16, r16, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12809 "01011011" // /* MW 5 */ + 12810 "01000011" // /* MW 4 */ + 12811 "00111000" // /* MW 3 */ + 12812 "11001010" // /* MW 2 */ + 12813 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 246 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 + 12814 "01011100" // ST r7, [p7], #4; LSHL r31, r19, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12815 "01011011" // /* MW 5 */ + 12816 "11111100" // /* MW 4 */ + 12817 "00111001" // /* MW 3 */ + 12818 "10011110" // /* MW 2 */ + 12819 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 247 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 248 72 first + 12820 "01011100" // ST r19, [p7], #4; ADD r22, r29, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12821 "11000001" // /* MW 5 */ + 12822 "11011010" // /* MW 4 */ + 12823 "00111110" // /* MW 3 */ + 12824 "11001110" // /* MW 2 */ + 12825 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 first +.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 first + 12826 "10100100" // ADD r16, r7, r16; ADD.NC r29, r31, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12827 "11110010" // /* MW 5 */ + 12828 "10111111" // /* MW 4 */ + 12829 "00011110" // /* MW 3 */ + 12830 "00100000" // /* MW 2 */ + 12831 "00111100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 248 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 + 12832 "01011100" // ST r22, [p7], #4; SUB r16, r16, r29 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12833 "10100011" // /* MW 5 */ + 12834 "01000011" // /* MW 4 */ + 12835 "00111000" // /* MW 3 */ + 12836 "11011010" // /* MW 2 */ + 12837 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 249 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 255 73 first +.src_ref 2 "conv2d_dw_bf16_params.h" 258 116 +.src_ref 2 "conv2d_dw_bf16_params.h" 258 140 + 12838 "00111010" // ST r16, [p7], #4; LSHL r22, r15, r26; MOV r16, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12839 "01011001" // /* MW 9 */ + 12840 "11111111" // /* MW 8 */ + 12841 "00001111" // /* MW 7 */ + 12842 "01101110" // /* MW 6 */ + 12843 "01101101" // /* MW 5 */ + 12844 "00011111" // /* MW 4 */ + 12845 "00110000" // /* MW 3 */ + 12846 "11000010" // /* MW 2 */ + 12847 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 252 43 first +.src_ref 2 "conv2d_dw_bf16_params.h" 253 60 first + 12848 "01011100" // ST r18, [p7], #4; ADD r26, r28, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12849 "10000001" // /* MW 5 */ + 12850 "01101010" // /* MW 4 */ + 12851 "00111110" // /* MW 3 */ + 12852 "11001010" // /* MW 2 */ + 12853 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 253 43 +.src_ref 2 "conv2d_dw_bf16_params.h" 255 73 first + 12854 "01011100" // ST r26, [p7], #4; SUB r20, r20, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12855 "11000011" // /* MW 5 */ + 12856 "01010010" // /* MW 4 */ + 12857 "00111010" // /* MW 3 */ + 12858 "11101010" // /* MW 2 */ + 12859 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 254 43 first +.src_ref 2 "conv2d_dw_bf16_params.h" 255 73 +.src_ref 2 "conv2d_dw_bf16_params.h" 256 43 +.src_ref 2 "conv2d_dw_bf16_params.h" 258 116 first +.src_ref 2 "conv2d_dw_bf16_params.h" 258 140 first +.src_ref 2 "conv2d_dw_bf16_params.h" 259 43 +.src_ref 2 "conv2d_dw_bf16_params.h" 263 41 + 12860 "01110110" // MOVA r17, #64; ST r19, [p7], #4; MAC r16, r16, r21, r17; ADD.NC r19, r20, #64 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12861 "00001000" // /* MW 11 */ + 12862 "00010000" // /* MW 10 */ + 12863 "01101101" // /* MW 9 */ + 12864 "10110010" // /* MW 8 */ + 12865 "00001000" // /* MW 7 */ + 12866 "10101011" // /* MW 6 */ + 12867 "01110001" // /* MW 5 */ + 12868 "00011110" // /* MW 4 */ + 12869 "00000111" // /* MW 3 */ + 12870 "00010001" // /* MW 2 */ + 12871 "00001000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 255 43 first + 12872 "10011000" // ST r19, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12873 "01110001" // /* MW 3 */ + 12874 "00011110" // /* MW 2 */ + 12875 "00001111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 256 43 first +.src_ref 2 "conv2d_dw_bf16_params.h" 260 49 first + 12876 "01011100" // ST r17, [p7], #4; LSHL r20, r16, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12877 "11111011" // /* MW 5 */ + 12878 "01010010" // /* MW 4 */ + 12879 "00111000" // /* MW 3 */ + 12880 "11000110" // /* MW 2 */ + 12881 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 258 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 260 49 + 12882 "01011100" // ST r16, [p7], #4; SUB r16, r24, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12883 "10000011" // /* MW 5 */ + 12884 "01000010" // /* MW 4 */ + 12885 "00111100" // /* MW 3 */ + 12886 "11000010" // /* MW 2 */ + 12887 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 259 43 first +.src_ref 2 "conv2d_dw_bf16_params.h" 264 47 first + 12888 "01011100" // ST r17, [p7], #4; LSHL r20, r18, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12889 "11111011" // /* MW 5 */ + 12890 "01010010" // /* MW 4 */ + 12891 "00111001" // /* MW 3 */ + 12892 "11000110" // /* MW 2 */ + 12893 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 260 43 first +.src_ref 2 "conv2d_dw_bf16_params.h" 264 47 + 12894 "01011100" // ST r16, [p7], #4; SUB r16, r24, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12895 "10000011" // /* MW 5 */ + 12896 "01000010" // /* MW 4 */ + 12897 "00111100" // /* MW 3 */ + 12898 "11000010" // /* MW 2 */ + 12899 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 262 40 first + 12900 "10011000" // ST r18, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12901 "01010001" // /* MW 3 */ + 12902 "00011110" // /* MW 2 */ + 12903 "00001111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 263 41 first + 12904 "10011000" // ST r17, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12905 "00110001" // /* MW 3 */ + 12906 "00011110" // /* MW 2 */ + 12907 "00001111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 264 41 first + 12908 "10011000" // ST r16, [p7], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12909 "00010001" // /* MW 3 */ + 12910 "00001010" // /* MW 2 */ + 12911 "00001111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 54 first + 12912 "10011000" // LDA.u8 r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12913 "00001010" // /* MW 3 */ + 12914 "00000110" // /* MW 2 */ + 12915 "00000111" // /* MW 1 */ + 12916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12917 "00000000" // /* MW 1 */ + 12918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12919 "00000000" // /* MW 1 */ + 12920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12921 "00000000" // /* MW 1 */ + 12922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12923 "00000000" // /* MW 1 */ + 12924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12925 "00000000" // /* MW 1 */ + 12926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12927 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.src_ref 2 "conv2d_dw_bf16_params.h" 266 58 + 12928 "10000100" // JZ r16, #12960 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12960 delay_slots=5 */ + 12929 "00000001" // /* MW 5 */ + 12930 "00000000" // /* MW 4 */ + 12931 "01010000" // /* MW 3 */ + 12932 "00011001" // /* MW 2 */ + 12933 "10000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.delay_slot + 12934 "11111000" // MOV vaddSign0, crMCDEn /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12935 "01100000" // /* MW 3 */ + 12936 "00111011" // /* MW 2 */ + 12937 "00011001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.delay_slot + 12938 "01000100" // MOVXM r19, #-8454144 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12939 "00000000" // /* MW 5 */ + 12940 "10100000" // /* MW 4 */ + 12941 "00001001" // /* MW 3 */ + 12942 "01111111" // /* MW 2 */ + 12943 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12945 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12947 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12949 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 + 12950 "01111010" // NOPA; NOPS; MOVX r19, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12951 "00000001" // /* MW 9 */ + 12952 "00100110" // /* MW 8 */ + 12953 "00000000" // /* MW 7 */ + 12954 "00000000" // /* MW 6 */ + 12955 "01011011" // /* MW 5 */ + 12956 "00000001" // /* MW 4 */ + 12957 "11110000" // /* MW 3 */ + 12958 "00101100" // /* MW 2 */ + 12959 "00000000" // /* MW 1 */ +.label TGT_F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_608 +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.src_ref 2 "conv2d_dw_bf16_params.h" 267 + 12960 "10111010" // LDA lr, [sp, #-4]; MOVXM p0, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12961 "00010000" // /* MW 9 */ + 12962 "00110100" // /* MW 8 */ + 12963 "00110010" // /* MW 7 */ + 12964 "11110000" // /* MW 6 */ + 12965 "00000001" // /* MW 5 */ + 12966 "00000000" // /* MW 4 */ + 12967 "00100000" // /* MW 3 */ + 12968 "10000111" // /* MW 2 */ + 12969 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 + 12970 "11010100" // LDA.s8 r16, [p0]; VINSERT.32 x0, x0, #0, r19 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12971 "11100010" // /* MW 5 */ + 12972 "00000100" // /* MW 4 */ + 12973 "01010000" // /* MW 3 */ + 12974 "11000000" // /* MW 2 */ + 12975 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 39 + 12976 "01010100" // LDA p0, [sp, #-12]; MOV dj0, #186 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12977 "11101001" // /* MW 5 */ + 12978 "00000010" // /* MW 4 */ + 12979 "00100001" // /* MW 3 */ + 12980 "10000011" // /* MW 2 */ + 12981 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 + 12982 "11010100" // LDA r15, [sp, #-8]; VMOV bmll0, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12983 "00100101" // /* MW 5 */ + 12984 "00000001" // /* MW 4 */ + 12985 "00100000" // /* MW 3 */ + 12986 "00111110" // /* MW 2 */ + 12987 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 267 first + 12988 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12989 "00000001" // /* MW 5 */ + 12990 "00000000" // /* MW 4 */ + 12991 "00000000" // /* MW 3 */ + 12992 "11111000" // /* MW 2 */ + 12993 "11111111" // /* MW 1 */ + 12994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12995 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 12996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12997 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 39 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 12998 "00011000" // ST.s16 r16, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12999 "00010111" // /* MW 3 */ + 13000 "00000010" // /* MW 2 */ + 13001 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.src_ref 2 "conv2d_dw_bf16_params.h" 267 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13002 "11100100" // RET lr; MOV crRnd, r16 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 13003 "01000001" // /* MW 5 */ + 13004 "01110000" // /* MW 4 */ + 13005 "00001111" // /* MW 3 */ + 13006 "00000000" // /* MW 2 */ + 13007 "00000101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13008 "00011000" // VCONV.bf16.fp32 wl0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13009 "00010110" // /* MW 3 */ + 13010 "01000000" // /* MW 2 */ + 13011 "00001000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13012 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13013 "11000000" // /* MW 3 */ + 13014 "01100000" // /* MW 2 */ + 13015 "00011111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13016 "10111000" // VEXTRACT.16 r16, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13017 "00000001" // /* MW 3 */ + 13018 "00000001" // /* MW 2 */ + 13019 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13021 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh__end +.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_end0 + 13023 "00000000" // /* MW 1 */ +.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_begin0 +.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params +.function conv2d_dw<(unsigned char)'\x01', bfloat16, bfloat16, bfloat16, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::async, adf::addressing::linear, adf::margin<0U> > > _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params +.src_ref 1 "io_buffer_main.h" 125 12 +.src_ref 2 "conv2d_dw_bf16.h" 199 first +.function_start + 13024 "11111000" // MOV r17, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13025 "11000000" // /* MW 3 */ + 13026 "01010110" // /* MW 2 */ + 13027 "00011100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 2 "conv2d_dw_bf16.h" 204 82 + 13028 "01010100" // LDA p1, [p1]; MOV m7, #106 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13029 "10101001" // /* MW 5 */ + 13030 "00000001" // /* MW 4 */ + 13031 "11011110" // /* MW 3 */ + 13032 "10010011" // /* MW 2 */ + 13033 "00100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 12 +.src_ref 1 "io_buffer_main.h" 125 25 + 13034 "00010100" // LDA p0, [p0]; ADD.NC p3, r17, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13035 "00000010" // /* MW 5 */ + 13036 "11010001" // /* MW 4 */ + 13037 "11010110" // /* MW 3 */ + 13038 "10000011" // /* MW 2 */ + 13039 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 204 82 first + 13040 "10011000" // LDA.u8 r4, [p3], m7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13041 "10001010" // /* MW 3 */ + 13042 "11101000" // /* MW 2 */ + 13043 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 221 4 first + 13044 "10011000" // LDA dj2, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13045 "01000110" // /* MW 3 */ + 13046 "11111101" // /* MW 2 */ + 13047 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 221 4 + 13048 "10011000" // LDA dn2, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13049 "00100110" // /* MW 3 */ + 13050 "00111101" // /* MW 2 */ + 13051 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 221 4 + 13052 "10011000" // LDA dj6, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13053 "01000110" // /* MW 3 */ + 13054 "11111111" // /* MW 2 */ + 13055 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 221 4 + 13056 "10011000" // LDA dn6, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13057 "00100110" // /* MW 3 */ + 13058 "00101111" // /* MW 2 */ + 13059 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 221 4 + 13060 "10011000" // LDA m2, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13061 "00000110" // /* MW 3 */ + 13062 "00101101" // /* MW 2 */ + 13063 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 222 4 first + 13064 "10011000" // LDA dj0, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13065 "01000110" // /* MW 3 */ + 13066 "11111100" // /* MW 2 */ + 13067 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 222 4 + 13068 "10011000" // LDA dn0, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13069 "00100110" // /* MW 3 */ + 13070 "00111100" // /* MW 2 */ + 13071 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 222 4 + 13072 "10011000" // LDA dj4, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13073 "01000110" // /* MW 3 */ + 13074 "11111110" // /* MW 2 */ + 13075 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 222 4 + 13076 "10011000" // LDA dn4, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13077 "00100110" // /* MW 3 */ + 13078 "00101110" // /* MW 2 */ + 13079 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 222 4 + 13080 "10011000" // LDA m0, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13081 "00000110" // /* MW 3 */ + 13082 "00101100" // /* MW 2 */ + 13083 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 223 4 first + 13084 "10011000" // LDA dj1, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13085 "11000110" // /* MW 3 */ + 13086 "11111100" // /* MW 2 */ + 13087 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 223 4 + 13088 "10011000" // LDA dn1, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13089 "10100110" // /* MW 3 */ + 13090 "00111100" // /* MW 2 */ + 13091 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 223 4 + 13092 "10011000" // LDA dj5, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13093 "11000110" // /* MW 3 */ + 13094 "11111110" // /* MW 2 */ + 13095 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 223 4 + 13096 "10011000" // LDA dn5, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13097 "10100110" // /* MW 3 */ + 13098 "00101110" // /* MW 2 */ + 13099 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 223 4 + 13100 "10011000" // LDA m1, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13101 "10000110" // /* MW 3 */ + 13102 "00101100" // /* MW 2 */ + 13103 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 224 4 first + 13104 "10011000" // LDA dj7, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13105 "11000110" // /* MW 3 */ + 13106 "11111111" // /* MW 2 */ + 13107 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 224 4 + 13108 "10011000" // LDA dn7, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13109 "10100110" // /* MW 3 */ + 13110 "00101111" // /* MW 2 */ + 13111 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 224 4 +.src_ref 2 "conv2d_dw_bf16.h" 244 56 + 13112 "10111010" // LDA m7, [p3], #8; MOVXM p4, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13113 "00010000" // /* MW 9 */ + 13114 "00110100" // /* MW 8 */ + 13115 "00110010" // /* MW 7 */ + 13116 "11110010" // /* MW 6 */ + 13117 "00000001" // /* MW 5 */ + 13118 "00000000" // /* MW 4 */ + 13119 "11010000" // /* MW 3 */ + 13120 "11110000" // /* MW 2 */ + 13121 "01100101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_dw_bf16.h" 244 56 first + 13122 "11010100" // LDA.s8 r6, [p4]; MOV p4, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13123 "10000001" // /* MW 5 */ + 13124 "11000101" // /* MW 4 */ + 13125 "01011000" // /* MW 3 */ + 13126 "10011000" // /* MW 2 */ + 13127 "10000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 225 4 + 13128 "10111000" // MOV m3, #-120 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13129 "00010000" // /* MW 3 */ + 13130 "00001111" // /* MW 2 */ + 13131 "00011011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 6 "aie_core.h" 81 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_dw_bf16.h" 204 43 + 13132 "10110110" // VLDA.CONV.fp32.bf16 cml0, [p4];VLDB x6, [p0], #64; MOVX r2, #3; MOV dc4, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 13133 "01011000" // /* MW 11 */ + 13134 "00000000" // /* MW 10 */ + 13135 "01100000" // /* MW 9 */ + 13136 "01101010" // /* MW 8 */ + 13137 "00100000" // /* MW 7 */ + 13138 "00000000" // /* MW 6 */ + 13139 "01101000" // /* MW 5 */ + 13140 "00111011" // /* MW 4 */ + 13141 "01110000" // /* MW 3 */ + 13142 "10000101" // /* MW 2 */ + 13143 "10000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 6 "aie_core.h" 81 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 2 "conv2d_dw_bf16.h" 204 43 first +.src_ref 2 "conv2d_dw_bf16.h" 225 4 first +.src_ref 2 "conv2d_dw_bf16.h" 244 56 + 13144 "01111110" // LDA dj3, [p3], #-4; VLDB x1, [p0], #64; MOVS dc3, dc4; LSHL r2, r4, r2; MOV m6, #128 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 13145 "01100000" // /* MW 13 */ + 13146 "00001001" // /* MW 12 */ + 13147 "01100010" // /* MW 11 */ + 13148 "00001011" // /* MW 10 */ + 13149 "00010000" // /* MW 9 */ + 13150 "11100000" // /* MW 8 */ + 13151 "00101101" // /* MW 7 */ + 13152 "00000100" // /* MW 6 */ + 13153 "11101001" // /* MW 5 */ + 13154 "00111000" // /* MW 4 */ + 13155 "11010000" // /* MW 3 */ + 13156 "10111000" // /* MW 2 */ + 13157 "01111111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 2 "conv2d_dw_bf16.h" 204 43 +.src_ref 2 "conv2d_dw_bf16.h" 225 4 + 13158 "10111010" // LDA dn3, [p3], #8; MOVS dc1, dc3; MOV m5, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13159 "01110010" // /* MW 9 */ + 13160 "10010000" // /* MW 8 */ + 13161 "10000000" // /* MW 7 */ + 13162 "00000010" // /* MW 6 */ + 13163 "01001011" // /* MW 5 */ + 13164 "00001100" // /* MW 4 */ + 13165 "11010001" // /* MW 3 */ + 13166 "10110100" // /* MW 2 */ + 13167 "01100101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "conv2d_dw_bf16.h" 204 43 +.src_ref 2 "conv2d_dw_bf16.h" 225 4 + 13168 "10111010" // LDA m3, [p3], m3; PADDB [p1], m5; MOV dc7, dc1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13169 "01111110" // /* MW 9 */ + 13170 "11000000" // /* MW 8 */ + 13171 "11100001" // /* MW 7 */ + 13172 "00000011" // /* MW 6 */ + 13173 "10010000" // /* MW 5 */ + 13174 "10101011" // /* MW 4 */ + 13175 "11010001" // /* MW 3 */ + 13176 "00110000" // /* MW 2 */ + 13177 "01101101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "conv2d_dw_bf16.h" 244 56 +.src_ref 2 "conv2d_dw_bf16.h" 244 56 first + 13178 "10111010" // LDA r2, [p3], m6; VLDB.2D x3, [p1], d7; MOV m4, #-112 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13179 "01011110" // /* MW 9 */ + 13180 "10010000" // /* MW 8 */ + 13181 "00000111" // /* MW 7 */ + 13182 "00000010" // /* MW 6 */ + 13183 "11110100" // /* MW 5 */ + 13184 "11110000" // /* MW 4 */ + 13185 "11010001" // /* MW 3 */ + 13186 "00001010" // /* MW 2 */ + 13187 "01111001" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 2 "conv2d_dw_bf16.h" 244 56 + 13188 "00101100" // LDA.s16 r7, [p3], m4; MOVX r0, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13189 "10000010" // /* MW 5 */ + 13190 "00000000" // /* MW 4 */ + 13191 "01010000" // /* MW 3 */ + 13192 "00011110" // /* MW 2 */ + 13193 "01110001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "conv2d_dw_bf16.h" 250 8 first + 13194 "01110110" // LDA m4, [p3], #16; MOVS dc6, dc4; MOVXM ls, #13296 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 13195 "00010000" // /* MW 11 */ + 13196 "11111000" // /* MW 10 */ + 13197 "01111001" // /* MW 9 */ + 13198 "00001100" // /* MW 8 */ + 13199 "00000000" // /* MW 7 */ + 13200 "00000000" // /* MW 6 */ + 13201 "01001011" // /* MW 5 */ + 13202 "00010000" // /* MW 4 */ + 13203 "11010110" // /* MW 3 */ + 13204 "11000000" // /* MW 2 */ + 13205 "01101001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "conv2d_dw_bf16.h" 244 56 first +.src_ref 2 "conv2d_dw_bf16.h" 250 8 + 13206 "01110110" // LDA r4, [p3, #-28]; MOVS dc2, dc4; MOVXM le, #13392 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 13207 "00010000" // /* MW 11 */ + 13208 "00101000" // /* MW 10 */ + 13209 "10111010" // /* MW 9 */ + 13210 "00001101" // /* MW 8 */ + 13211 "00000000" // /* MW 7 */ + 13212 "00000000" // /* MW 6 */ + 13213 "01001011" // /* MW 5 */ + 13214 "00010000" // /* MW 4 */ + 13215 "11010010" // /* MW 3 */ + 13216 "10010010" // /* MW 2 */ + 13217 "01110010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 142 18 first + 13218 "10110100" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13219 "00000101" // /* MW 5 */ + 13220 "01100001" // /* MW 4 */ + 13221 "10000100" // /* MW 3 */ + 13222 "00010110" // /* MW 2 */ + 13223 "00001011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 270 12 + 13224 "11111000" // VMOV cml3, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13225 "10001010" // /* MW 3 */ + 13226 "00000000" // /* MW 2 */ + 13227 "00011011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 244 4 + 13228 "10111010" // LDA r5, [p3]; MOVXM p3, #13456 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13229 "00010000" // /* MW 9 */ + 13230 "01001000" // /* MW 8 */ + 13231 "10110010" // /* MW 7 */ + 13232 "00001101" // /* MW 6 */ + 13233 "00000000" // /* MW 5 */ + 13234 "00000000" // /* MW 4 */ + 13235 "11010000" // /* MW 3 */ + 13236 "10010110" // /* MW 2 */ + 13237 "01100000" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 2 "conv2d_dw_bf16.h" 268 12 first + 13238 "10111010" // NOPA; MOVX r1, #32; VEXTBCST.128 x10, x3, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13239 "10101000" // /* MW 9 */ + 13240 "00000001" // /* MW 8 */ + 13241 "10001110" // /* MW 7 */ + 13242 "00001010" // /* MW 6 */ + 13243 "00010100" // /* MW 5 */ + 13244 "00000000" // /* MW 4 */ + 13245 "11110000" // /* MW 3 */ + 13246 "00101100" // /* MW 2 */ + 13247 "00000000" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 2 "conv2d_dw_bf16.h" 270 12 +.src_ref 2 "conv2d_dw_bf16.h" 271 12 +.src_ref 2 "conv2d_dw_bf16.h" 272 12 +.src_ref 2 "conv2d_dw_bf16.h" 273 12 +.src_ref 2 "conv2d_dw_bf16.h" 274 12 +.src_ref 2 "conv2d_dw_bf16.h" 275 12 +.src_ref 2 "conv2d_dw_bf16.h" 276 12 +.src_ref 2 "conv2d_dw_bf16.h" 277 12 + 13248 "11100001" // MOVA r17, #60; NOPB; NOPS; MOVX r3, #48; VBCST.16 x0, r7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13249 "00000000" // /* MW 15 */ + 13250 "00000000" // /* MW 14 */ + 13251 "01111000" // /* MW 13 */ + 13252 "10111001" // /* MW 12 */ + 13253 "00001110" // /* MW 11 */ + 13254 "00001000" // /* MW 10 */ + 13255 "00110110" // /* MW 9 */ + 13256 "00000000" // /* MW 8 */ + 13257 "01011011" // /* MW 7 */ + 13258 "00000001" // /* MW 6 */ + 13259 "00100000" // /* MW 5 */ + 13260 "00000000" // /* MW 4 */ + 13261 "00000000" // /* MW 3 */ + 13262 "10010001" // /* MW 2 */ + 13263 "00000111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_dw_bf16.h" 265 12 first +.src_ref 2 "conv2d_dw_bf16.h" 270 12 first + 13264 "00001011" // NOPA; NOPB; MOVS dc0, dc4; MOVX crRnd, r6; VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13265 "01101010" // /* MW 15 */ + 13266 "01100011" // /* MW 14 */ + 13267 "10101100" // /* MW 13 */ + 13268 "00000011" // /* MW 12 */ + 13269 "00001110" // /* MW 11 */ + 13270 "00000010" // /* MW 10 */ + 13271 "11010100" // /* MW 9 */ + 13272 "00001101" // /* MW 8 */ + 13273 "01001011" // /* MW 7 */ + 13274 "00010000" // /* MW 6 */ + 13275 "00100000" // /* MW 5 */ + 13276 "00000000" // /* MW 4 */ + 13277 "11110000" // /* MW 3 */ + 13278 "00101100" // /* MW 2 */ + 13279 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 2 "conv2d_dw_bf16.h" 250 8 first +.src_ref 2 "conv2d_dw_bf16.h" 274 12 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 13280 "00001011" // LDA p2, [p2]; NOPB; MOVS dc5, dc4; ADD r2, r2, #-2; ADD.NC lc, r4, #-1; VMAC.f dm1, dm0, x1, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13281 "00011010" // /* MW 15 */ + 13282 "01001000" // /* MW 14 */ + 13283 "11001100" // /* MW 13 */ + 13284 "00111111" // /* MW 12 */ + 13285 "10111001" // /* MW 11 */ + 13286 "11011010" // /* MW 10 */ + 13287 "00101111" // /* MW 9 */ + 13288 "00000100" // /* MW 8 */ + 13289 "01001011" // /* MW 7 */ + 13290 "00010000" // /* MW 6 */ + 13291 "00100101" // /* MW 5 */ + 13292 "00000000" // /* MW 4 */ + 13293 "11010000" // /* MW 3 */ + 13294 "10100011" // /* MW 2 */ + 13295 "01000000" // /* MW 1 */ +.label ZLS_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_272 +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 142 18 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt +.loop_nesting 1 + 13296 "10111010" // VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13297 "01101110" // /* MW 9 */ + 13298 "10000001" // /* MW 8 */ + 13299 "10000100" // /* MW 7 */ + 13300 "00000010" // /* MW 6 */ + 13301 "11110100" // /* MW 5 */ + 13302 "11110000" // /* MW 4 */ + 13303 "01110001" // /* MW 3 */ + 13304 "10110011" // /* MW 2 */ + 13305 "00000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "conv2d_dw_bf16.h" 266 12 first +.src_ref 2 "conv2d_dw_bf16.h" 271 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13306 "01001010" // VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13307 "00000001" // /* MW 9 */ + 13308 "10001001" // /* MW 8 */ + 13309 "10001010" // /* MW 7 */ + 13310 "01000110" // /* MW 6 */ + 13311 "00001011" // /* MW 5 */ + 13312 "10011100" // /* MW 4 */ + 13313 "11101010" // /* MW 3 */ + 13314 "00111000" // /* MW 2 */ + 13315 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 275 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13316 "01001010" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13317 "00000001" // /* MW 9 */ + 13318 "00110101" // /* MW 8 */ + 13319 "10001001" // /* MW 7 */ + 13320 "11000110" // /* MW 6 */ + 13321 "10000110" // /* MW 5 */ + 13322 "00110000" // /* MW 4 */ + 13323 "01101010" // /* MW 3 */ + 13324 "10110001" // /* MW 2 */ + 13325 "00000000" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13326 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13327 "00000110" // /* MW 3 */ + 13328 "10001001" // /* MW 2 */ + 13329 "00011101" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 2 "conv2d_dw_bf16.h" 272 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13330 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13331 "10100001" // /* MW 7 */ + 13332 "01001000" // /* MW 6 */ + 13333 "10001100" // /* MW 5 */ + 13334 "11000110" // /* MW 4 */ + 13335 "10001110" // /* MW 3 */ + 13336 "10110000" // /* MW 2 */ + 13337 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 267 12 first +.src_ref 2 "conv2d_dw_bf16.h" 276 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13338 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13339 "10100001" // /* MW 7 */ + 13340 "00110110" // /* MW 6 */ + 13341 "10001010" // /* MW 5 */ + 13342 "01000110" // /* MW 4 */ + 13343 "00001111" // /* MW 3 */ + 13344 "10011100" // /* MW 2 */ + 13345 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13346 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13347 "00001110" // /* MW 3 */ + 13348 "10001001" // /* MW 2 */ + 13349 "00011101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 268 12 first +.src_ref 2 "conv2d_dw_bf16.h" 273 12 first + 13350 "01100010" // VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13351 "11100001" // /* MW 7 */ + 13352 "10010010" // /* MW 6 */ + 13353 "10001011" // /* MW 5 */ + 13354 "01000110" // /* MW 4 */ + 13355 "00000011" // /* MW 3 */ + 13356 "00011100" // /* MW 2 */ + 13357 "00000101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 265 12 first +.src_ref 2 "conv2d_dw_bf16.h" 277 12 first + 13358 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13359 "11100001" // /* MW 7 */ + 13360 "01010110" // /* MW 6 */ + 13361 "10001000" // /* MW 5 */ + 13362 "01000110" // /* MW 4 */ + 13363 "00000111" // /* MW 3 */ + 13364 "00011100" // /* MW 2 */ + 13365 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first + 13366 "10111010" // NOPA; NOPB; VSHIFT x4, x6, x1, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13367 "01101110" // /* MW 9 */ + 13368 "01000001" // /* MW 8 */ + 13369 "00011000" // /* MW 7 */ + 13370 "00000001" // /* MW 6 */ + 13371 "00010000" // /* MW 5 */ + 13372 "00000000" // /* MW 4 */ + 13373 "11110000" // /* MW 3 */ + 13374 "00101100" // /* MW 2 */ + 13375 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 270 12 first + 13376 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm4, dm3, x6, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13377 "01101010" // /* MW 15 */ + 13378 "01100011" // /* MW 14 */ + 13379 "01111100" // /* MW 13 */ + 13380 "10100101" // /* MW 12 */ + 13381 "00000001" // /* MW 11 */ + 13382 "00000000" // /* MW 10 */ + 13383 "00000000" // /* MW 9 */ + 13384 "00000000" // /* MW 8 */ + 13385 "01011011" // /* MW 7 */ + 13386 "00000001" // /* MW 6 */ + 13387 "00100000" // /* MW 5 */ + 13388 "00000000" // /* MW 4 */ + 13389 "11110000" // /* MW 3 */ + 13390 "00101100" // /* MW 2 */ + 13391 "00000000" // /* MW 1 */ +.label ZLE_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_368 +.src_ref 2 "conv2d_dw_bf16.h" 274 12 first +.end_of_loop +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 13392 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13393 "00011010" // /* MW 15 */ + 13394 "01001000" // /* MW 14 */ + 13395 "01111100" // /* MW 13 */ + 13396 "10100101" // /* MW 12 */ + 13397 "00000001" // /* MW 11 */ + 13398 "00000000" // /* MW 10 */ + 13399 "00000000" // /* MW 9 */ + 13400 "00000000" // /* MW 8 */ + 13401 "01011011" // /* MW 7 */ + 13402 "00000001" // /* MW 6 */ + 13403 "00100000" // /* MW 5 */ + 13404 "00000000" // /* MW 4 */ + 13405 "11110000" // /* MW 3 */ + 13406 "00101100" // /* MW 2 */ + 13407 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "shuffle.hpp" 142 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 13408 "10111010" // PADDA.3D [p0], d0; PADDB.2D [p4], d3; VSHIFT x10, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13409 "01101110" // /* MW 9 */ + 13410 "10000001" // /* MW 8 */ + 13411 "10000100" // /* MW 7 */ + 13412 "00000010" // /* MW 6 */ + 13413 "10010000" // /* MW 5 */ + 13414 "01110011" // /* MW 4 */ + 13415 "11110100" // /* MW 3 */ + 13416 "00001100" // /* MW 2 */ + 13417 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 266 12 first +.src_ref 2 "conv2d_dw_bf16.h" 271 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13418 "01100010" // VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13419 "00000001" // /* MW 7 */ + 13420 "10001001" // /* MW 6 */ + 13421 "10001010" // /* MW 5 */ + 13422 "01000110" // /* MW 4 */ + 13423 "00001011" // /* MW 3 */ + 13424 "10011100" // /* MW 2 */ + 13425 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 275 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13426 "01100010" // VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13427 "00000001" // /* MW 7 */ + 13428 "00110101" // /* MW 6 */ + 13429 "10001001" // /* MW 5 */ + 13430 "11000110" // /* MW 4 */ + 13431 "10000110" // /* MW 3 */ + 13432 "00110000" // /* MW 2 */ + 13433 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13434 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13435 "00000110" // /* MW 3 */ + 13436 "10001001" // /* MW 2 */ + 13437 "00011101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 267 12 first +.src_ref 2 "conv2d_dw_bf16.h" 272 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13438 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13439 "10100001" // /* MW 7 */ + 13440 "01001000" // /* MW 6 */ + 13441 "10001100" // /* MW 5 */ + 13442 "01000110" // /* MW 4 */ + 13443 "00001111" // /* MW 3 */ + 13444 "10011100" // /* MW 2 */ + 13445 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 276 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13446 "01001010" // NOPA; VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13447 "10100001" // /* MW 9 */ + 13448 "00110110" // /* MW 8 */ + 13449 "10001010" // /* MW 7 */ + 13450 "11000010" // /* MW 6 */ + 13451 "10001110" // /* MW 5 */ + 13452 "10110000" // /* MW 4 */ + 13453 "11110100" // /* MW 3 */ + 13454 "00101100" // /* MW 2 */ + 13455 "00000000" // /* MW 1 */ +.label TGT_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_432 +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 142 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 13456 "10110100" // VLDB.2D x3, [p1], d7; VSHIFT x11, x1, x2, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13457 "00011101" // /* MW 5 */ + 13458 "00010010" // /* MW 4 */ + 13459 "10001011" // /* MW 3 */ + 13460 "00011110" // /* MW 2 */ + 13461 "00111110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 250 8 first +.src_ref 2 "conv2d_dw_bf16.h" 273 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13462 "01011010" // MOVXM le, #13632; VMAC.f dm3, dm4, x9, x7, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13463 "11100001" // /* MW 9 */ + 13464 "10010010" // /* MW 8 */ + 13465 "10001011" // /* MW 7 */ + 13466 "00000010" // /* MW 6 */ + 13467 "01010100" // /* MW 5 */ + 13468 "10110111" // /* MW 4 */ + 13469 "00000001" // /* MW 3 */ + 13470 "00000000" // /* MW 2 */ + 13471 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_dw_bf16.h" 250 8 +.src_ref 2 "conv2d_dw_bf16.h" 277 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13472 "01000110" // VLDA.CONV.fp32.bf16 cml0, [p4]; MOVXM ls, #13552; VMAC.f dm0, dm2, x11, x7, r17 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 13473 "11100001" // /* MW 11 */ + 13474 "01010110" // /* MW 10 */ + 13475 "10001000" // /* MW 9 */ + 13476 "00000010" // /* MW 8 */ + 13477 "01001111" // /* MW 7 */ + 13478 "10001111" // /* MW 6 */ + 13479 "00000001" // /* MW 5 */ + 13480 "00000000" // /* MW 4 */ + 13481 "01110000" // /* MW 3 */ + 13482 "10000101" // /* MW 2 */ + 13483 "10000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 250 8 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13484 "10011000" // ADD.NC lc, r4, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13485 "01111111" // /* MW 3 */ + 13486 "01110010" // /* MW 2 */ + 13487 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13488 "10011000" // VLDA x6, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13489 "10011011" // /* MW 3 */ + 13490 "00011101" // /* MW 2 */ + 13491 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13492 "00011000" // VLDB x1, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13493 "01110100" // /* MW 3 */ + 13494 "00011100" // /* MW 2 */ + 13495 "00111000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13496 "00011000" // VLDB.3D x2, [p0], d2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13497 "10110100" // /* MW 3 */ + 13498 "01011000" // /* MW 2 */ + 13499 "00111000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1110 102 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13500 "00011000" // VCONV.bf16.fp32 x10, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13501 "10010110" // /* MW 3 */ + 13502 "00010001" // /* MW 2 */ + 13503 "00001101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1110 102 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13504 "00011000" // VCONV.bf16.fp32 x6, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13505 "00010110" // /* MW 3 */ + 13506 "00010000" // /* MW 2 */ + 13507 "00001011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13508 "11111000" // VMAX_LT.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13509 "01101100" // /* MW 3 */ + 13510 "01010000" // /* MW 2 */ + 13511 "00011100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 286 17 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13512 "01111000" // VSHUFFLE x10, x10, x6, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13513 "00010100" // /* MW 3 */ + 13514 "01010011" // /* MW 2 */ + 13515 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 2 "conv2d_dw_bf16.h" 285 16 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13516 "00000010" // VST x8, [p2], m4; VMAX_LT.bf16 x10, r16, x10, x0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13517 "01110000" // /* MW 7 */ + 13518 "00110110" // /* MW 6 */ + 13519 "10101000" // /* MW 5 */ + 13520 "00000010" // /* MW 4 */ + 13521 "01100000" // /* MW 3 */ + 13522 "01000010" // /* MW 2 */ + 13523 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 268 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13524 "01011000" // VEXTBCST.128 x10, x3, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13525 "00000011" // /* MW 3 */ + 13526 "00011100" // /* MW 2 */ + 13527 "00011101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 2 "conv2d_dw_bf16.h" 270 12 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13528 "00000010" // VST.3D x10, [p2], d1; VMOV cml3, cml0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13529 "01110000" // /* MW 7 */ + 13530 "01000101" // /* MW 6 */ + 13531 "10000000" // /* MW 5 */ + 13532 "00000001" // /* MW 4 */ + 13533 "01100000" // /* MW 3 */ + 13534 "01010010" // /* MW 2 */ + 13535 "01000111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 265 12 first +.src_ref 2 "conv2d_dw_bf16.h" 270 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13536 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13537 "01000001" // /* MW 7 */ + 13538 "01101101" // /* MW 6 */ + 13539 "10001100" // /* MW 5 */ + 13540 "01000110" // /* MW 4 */ + 13541 "00000111" // /* MW 3 */ + 13542 "00011100" // /* MW 2 */ + 13543 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 274 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13544 "01100010" // VSHIFT x4, x6, x1, r0; VMAC.f dm1, dm0, x1, x10, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13545 "01000001" // /* MW 7 */ + 13546 "00000011" // /* MW 6 */ + 13547 "10001001" // /* MW 5 */ + 13548 "11000110" // /* MW 4 */ + 13549 "10000010" // /* MW 3 */ + 13550 "00110000" // /* MW 2 */ + 13551 "00000010" // /* MW 1 */ +.label ZLS_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_528 +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 142 18 +.begin_of_loop +.aggressive_scheduled_block_id 2 +.noswbrkpt +.loop_nesting 2 + 13552 "10111010" // VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13553 "01101110" // /* MW 9 */ + 13554 "10000001" // /* MW 8 */ + 13555 "10000100" // /* MW 7 */ + 13556 "00000010" // /* MW 6 */ + 13557 "11110100" // /* MW 5 */ + 13558 "11110000" // /* MW 4 */ + 13559 "01110001" // /* MW 3 */ + 13560 "10110011" // /* MW 2 */ + 13561 "00000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "conv2d_dw_bf16.h" 266 12 first +.src_ref 2 "conv2d_dw_bf16.h" 271 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13562 "01001010" // VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13563 "00000001" // /* MW 9 */ + 13564 "10001001" // /* MW 8 */ + 13565 "10001010" // /* MW 7 */ + 13566 "01000110" // /* MW 6 */ + 13567 "00001011" // /* MW 5 */ + 13568 "10011100" // /* MW 4 */ + 13569 "11101010" // /* MW 3 */ + 13570 "00111000" // /* MW 2 */ + 13571 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 275 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13572 "01001010" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13573 "00000001" // /* MW 9 */ + 13574 "00110101" // /* MW 8 */ + 13575 "10001001" // /* MW 7 */ + 13576 "11000110" // /* MW 6 */ + 13577 "10000110" // /* MW 5 */ + 13578 "00110000" // /* MW 4 */ + 13579 "01101010" // /* MW 3 */ + 13580 "10110001" // /* MW 2 */ + 13581 "00000000" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13582 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13583 "00000110" // /* MW 3 */ + 13584 "10001001" // /* MW 2 */ + 13585 "00011101" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 2 "conv2d_dw_bf16.h" 272 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13586 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13587 "10100001" // /* MW 7 */ + 13588 "01001000" // /* MW 6 */ + 13589 "10001100" // /* MW 5 */ + 13590 "11000110" // /* MW 4 */ + 13591 "10001110" // /* MW 3 */ + 13592 "10110000" // /* MW 2 */ + 13593 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 267 12 first +.src_ref 2 "conv2d_dw_bf16.h" 276 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13594 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13595 "10100001" // /* MW 7 */ + 13596 "00110110" // /* MW 6 */ + 13597 "10001010" // /* MW 5 */ + 13598 "01000110" // /* MW 4 */ + 13599 "00001111" // /* MW 3 */ + 13600 "10011100" // /* MW 2 */ + 13601 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13602 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13603 "00001110" // /* MW 3 */ + 13604 "10001001" // /* MW 2 */ + 13605 "00011101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 268 12 first +.src_ref 2 "conv2d_dw_bf16.h" 273 12 first + 13606 "01100010" // VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13607 "11100001" // /* MW 7 */ + 13608 "10010010" // /* MW 6 */ + 13609 "10001011" // /* MW 5 */ + 13610 "01000110" // /* MW 4 */ + 13611 "00000011" // /* MW 3 */ + 13612 "00011100" // /* MW 2 */ + 13613 "00000101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 265 12 first +.src_ref 2 "conv2d_dw_bf16.h" 277 12 first + 13614 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13615 "11100001" // /* MW 7 */ + 13616 "01010110" // /* MW 6 */ + 13617 "10001000" // /* MW 5 */ + 13618 "01000110" // /* MW 4 */ + 13619 "00000111" // /* MW 3 */ + 13620 "00011100" // /* MW 2 */ + 13621 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first + 13622 "10010100" // NOPA; VSHIFT x4, x6, x1, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13623 "00000101" // /* MW 5 */ + 13624 "01100001" // /* MW 4 */ + 13625 "11110100" // /* MW 3 */ + 13626 "00101100" // /* MW 2 */ + 13627 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 270 12 first + 13628 "01001000" // VMAC.f dm4, dm3, x6, x10, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13629 "01000001" // /* MW 3 */ + 13630 "01101101" // /* MW 2 */ + 13631 "10001100" // /* MW 1 */ +.label ZLE_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_608 +.src_ref 2 "conv2d_dw_bf16.h" 274 12 first +.end_of_loop +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 13632 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13633 "00011010" // /* MW 15 */ + 13634 "01001000" // /* MW 14 */ + 13635 "01111100" // /* MW 13 */ + 13636 "10100101" // /* MW 12 */ + 13637 "00000001" // /* MW 11 */ + 13638 "00000000" // /* MW 10 */ + 13639 "00000000" // /* MW 9 */ + 13640 "00000000" // /* MW 8 */ + 13641 "01011011" // /* MW 7 */ + 13642 "00000001" // /* MW 6 */ + 13643 "00100000" // /* MW 5 */ + 13644 "00000000" // /* MW 4 */ + 13645 "11110000" // /* MW 3 */ + 13646 "00101100" // /* MW 2 */ + 13647 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 244 4 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 13648 "10110110" // PADDA.3D [p0], d0; PADDB.2D [p4], d3; JNZD r2, r2, p3; VSHIFT x10, x1, x2, r0 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 13649 "01101000" // /* MW 11 */ + 13650 "10000001" // /* MW 10 */ + 13651 "10000100" // /* MW 9 */ + 13652 "00000010" // /* MW 8 */ + 13653 "00100111" // /* MW 7 */ + 13654 "00000100" // /* MW 6 */ + 13655 "00100000" // /* MW 5 */ + 13656 "11100111" // /* MW 4 */ + 13657 "11111000" // /* MW 3 */ + 13658 "00001100" // /* MW 2 */ + 13659 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 266 12 first +.src_ref 2 "conv2d_dw_bf16.h" 271 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13660 "01100010" // VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13661 "00000001" // /* MW 7 */ + 13662 "10001001" // /* MW 6 */ + 13663 "10001010" // /* MW 5 */ + 13664 "01000110" // /* MW 4 */ + 13665 "00001011" // /* MW 3 */ + 13666 "10011100" // /* MW 2 */ + 13667 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 275 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13668 "01100010" // VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13669 "00000001" // /* MW 7 */ + 13670 "00110101" // /* MW 6 */ + 13671 "10001001" // /* MW 5 */ + 13672 "11000110" // /* MW 4 */ + 13673 "10000110" // /* MW 3 */ + 13674 "00110000" // /* MW 2 */ + 13675 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13676 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13677 "00000110" // /* MW 3 */ + 13678 "10001001" // /* MW 2 */ + 13679 "00011101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 267 12 first +.src_ref 2 "conv2d_dw_bf16.h" 272 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13680 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13681 "10100001" // /* MW 7 */ + 13682 "01001000" // /* MW 6 */ + 13683 "10001100" // /* MW 5 */ + 13684 "01000110" // /* MW 4 */ + 13685 "00001111" // /* MW 3 */ + 13686 "10011100" // /* MW 2 */ + 13687 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 276 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13688 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13689 "10100001" // /* MW 7 */ + 13690 "00110110" // /* MW 6 */ + 13691 "10001010" // /* MW 5 */ + 13692 "11000110" // /* MW 4 */ + 13693 "10001110" // /* MW 3 */ + 13694 "10110000" // /* MW 2 */ + 13695 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 13696 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13697 "00001110" // /* MW 3 */ + 13698 "10001001" // /* MW 2 */ + 13699 "00011101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 273 12 first + 13700 "01001000" // VMAC.f dm3, dm4, x9, x7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13701 "11100001" // /* MW 3 */ + 13702 "10010010" // /* MW 2 */ + 13703 "10001011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 277 12 first + 13704 "01001000" // VMAC.f dm0, dm2, x11, x7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13705 "11100001" // /* MW 3 */ + 13706 "01010110" // /* MW 2 */ + 13707 "10001000" // /* MW 1 */ + 13708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13709 "00000000" // /* MW 1 */ + 13710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13711 "00000000" // /* MW 1 */ + 13712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13713 "00000000" // /* MW 1 */ + 13714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13715 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1110 102 first + 13716 "00011000" // VCONV.bf16.fp32 x10, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13717 "10010110" // /* MW 3 */ + 13718 "00010001" // /* MW 2 */ + 13719 "00001101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_dw_bf16.h" 290 first + 13720 "01011100" // VCONV.bf16.fp32 x6, cml0; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 13721 "00000000" // /* MW 5 */ + 13722 "01010000" // /* MW 4 */ + 13723 "11000000" // /* MW 3 */ + 13724 "00000010" // /* MW 2 */ + 13725 "01100010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 13726 "11111000" // VMAX_LT.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13727 "01101100" // /* MW 3 */ + 13728 "01010000" // /* MW 2 */ + 13729 "00011100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 286 17 first +.delay_slot + 13730 "01111000" // VSHUFFLE x10, x10, x6, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13731 "00010100" // /* MW 3 */ + 13732 "01010011" // /* MW 2 */ + 13733 "00011101" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 13734 "11111000" // VMAX_LT.bf16 x10, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13735 "01101100" // /* MW 3 */ + 13736 "01010000" // /* MW 2 */ + 13737 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 2 "conv2d_dw_bf16.h" 285 16 first +.delay_slot + 13738 "00011000" // VST x8, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13739 "00010011" // /* MW 3 */ + 13740 "10001010" // /* MW 2 */ + 13741 "00001010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1159 33 +.delay_slot + 13742 "00011000" // VST.3D x10, [p2], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13743 "10010011" // /* MW 3 */ + 13744 "00111010" // /* MW 2 */ +.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params__end +.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_end0 + 13745 "00001010" // /* MW 1 */ +.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_conv2d_dwc _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 7 "superkernels.cpp" 444 first +.src_ref 7 "superkernels.cpp" 449 6 +.function_start + 13760 "01000100" // MOVXM p4, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13761 "10000000" // /* MW 5 */ + 13762 "11001000" // /* MW 4 */ + 13763 "11001000" // /* MW 3 */ + 13764 "00000111" // /* MW 2 */ + 13765 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 449 6 first + 13766 "11010100" // LDA r16, [p4]; MOV r0, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13767 "01000001" // /* MW 5 */ + 13768 "00101111" // /* MW 4 */ + 13769 "11010000" // /* MW 3 */ + 13770 "11000010" // /* MW 2 */ + 13771 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 444 + 13772 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13773 "00000001" // /* MW 5 */ + 13774 "00000000" // /* MW 4 */ + 13775 "00000000" // /* MW 3 */ + 13776 "00010000" // /* MW 2 */ + 13777 "00000000" // /* MW 1 */ + 13778 "00000010" // ST r14, [sp, #-8]; MOV r17, CORE_ID /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13779 "01110000" // /* MW 7 */ + 13780 "01110000" // /* MW 6 */ + 13781 "00101101" // /* MW 5 */ + 13782 "00000010" // /* MW 4 */ + 13783 "10110000" // /* MW 3 */ + 13784 "00111010" // /* MW 2 */ + 13785 "11111111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 467 + 13786 "00000010" // ST r13, [sp, #-4]; MOV r13, lr /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13787 "01110000" // /* MW 7 */ + 13788 "11110000" // /* MW 6 */ + 13789 "10101000" // /* MW 5 */ + 13790 "00000001" // /* MW 4 */ + 13791 "10110000" // /* MW 3 */ + 13792 "10110110" // /* MW 2 */ + 13793 "11111111" // /* MW 1 */ + 13794 "10011000" // ST p0, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13795 "00011101" // /* MW 3 */ + 13796 "11101100" // /* MW 2 */ + 13797 "00001111" // /* MW 1 */ + 13798 "10011000" // ST p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13799 "10011101" // /* MW 3 */ + 13800 "11110111" // /* MW 2 */ + 13801 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 452 44 + 13802 "00000010" // ST r0, [sp, #-16]; MOV r14, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13803 "01110000" // /* MW 7 */ + 13804 "01100000" // /* MW 6 */ + 13805 "11001010" // /* MW 5 */ + 13806 "00000001" // /* MW 4 */ + 13807 "10110000" // /* MW 3 */ + 13808 "00000010" // /* MW 2 */ + 13809 "11111110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 449 6 +.src_ref 7 "superkernels.cpp" 449 16 + 13810 "10000100" // JNZ r16, #13936 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=13936 delay_slots=5 */ + 13811 "00000001" // /* MW 5 */ + 13812 "01000000" // /* MW 4 */ + 13813 "00111000" // /* MW 3 */ + 13814 "00011011" // /* MW 2 */ + 13815 "10000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 +.delay_slot + 13816 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13817 "11000000" // /* MW 3 */ + 13818 "11010110" // /* MW 2 */ + 13819 "00011011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 446 22 first +.delay_slot + 13820 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13821 "10010000" // /* MW 3 */ + 13822 "01100010" // /* MW 2 */ + 13823 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 446 30 +.delay_slot + 13824 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13825 "11111011" // /* MW 3 */ + 13826 "01100011" // /* MW 2 */ + 13827 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 446 11 +.delay_slot + 13828 "01000100" // MOVXM p3, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13829 "10100000" // /* MW 5 */ + 13830 "11001000" // /* MW 4 */ + 13831 "11000110" // /* MW 3 */ + 13832 "00000111" // /* MW 2 */ + 13833 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 446 11 +.delay_slot + 13834 "10011000" // ST r17, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13835 "00110001" // /* MW 3 */ + 13836 "00000110" // /* MW 2 */ + 13837 "00001011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 461 2 +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 13838 "00111010" // MOVS p7, p1; MOVXM p1, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13839 "00010001" // /* MW 9 */ + 13840 "00110100" // /* MW 8 */ + 13841 "10110010" // /* MW 7 */ + 13842 "11110000" // /* MW 6 */ + 13843 "00000001" // /* MW 5 */ + 13844 "00000000" // /* MW 4 */ + 13845 "01100000" // /* MW 3 */ + 13846 "10010001" // /* MW 2 */ + 13847 "11110000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 451 4 +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 13848 "01110110" // ST.s8 r16, [p1]; MOVS p0, p2; MOVXM p1, #509028 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 13849 "00010000" // /* MW 11 */ + 13850 "00110010" // /* MW 10 */ + 13851 "10110010" // /* MW 9 */ + 13852 "11110000" // /* MW 8 */ + 13853 "00000001" // /* MW 7 */ + 13854 "00000000" // /* MW 6 */ + 13855 "10001011" // /* MW 5 */ + 13856 "10001000" // /* MW 4 */ + 13857 "11100000" // /* MW 3 */ + 13858 "11000000" // /* MW 2 */ + 13859 "00100000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13861 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 451 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 13862 "00000100" // JL #12352 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12352 delay_slots=5 */ + 13863 "00000001" // /* MW 5 */ + 13864 "00000000" // /* MW 4 */ + 13865 "00100000" // /* MW 3 */ + 13866 "00011000" // /* MW 2 */ + 13867 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13869 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13871 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13872 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13873 "00110001" // /* MW 3 */ + 13874 "00100000" // /* MW 2 */ + 13875 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 13876 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13877 "00000101" // /* MW 3 */ + 13878 "00100000" // /* MW 2 */ + 13879 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 13880 "00000010" // ST r16, [p1]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13881 "01110000" // /* MW 7 */ + 13882 "10100101" // /* MW 6 */ + 13883 "00000001" // /* MW 5 */ + 13884 "00000000" // /* MW 4 */ + 13885 "00110000" // /* MW 3 */ + 13886 "11000010" // /* MW 2 */ + 13887 "00100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 452 44 +.src_ref 7 "superkernels.cpp" 461 2 +.return_address + 13888 "00000010" // MOVS p1, p7; ADD.NC p2, r14, #8 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13889 "00000000" // /* MW 7 */ + 13890 "10000010" // /* MW 6 */ + 13891 "00110011" // /* MW 5 */ + 13892 "00000001" // /* MW 4 */ + 13893 "01100000" // /* MW 3 */ + 13894 "10010001" // /* MW 2 */ + 13895 "00110011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 452 17 first + 13896 "10011000" // LDA.u16 r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13897 "00111010" // /* MW 3 */ + 13898 "00000110" // /* MW 2 */ + 13899 "00000010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 453 13 +.src_ref 7 "superkernels.cpp" 453 15 first + 13900 "10111010" // LDA.u16 r16, [p2, #4]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13901 "00010000" // /* MW 9 */ + 13902 "00110000" // /* MW 8 */ + 13903 "00110010" // /* MW 7 */ + 13904 "11110001" // /* MW 6 */ + 13905 "00000001" // /* MW 5 */ + 13906 "00000000" // /* MW 4 */ + 13907 "01010000" // /* MW 3 */ + 13908 "11000011" // /* MW 2 */ + 13909 "01000100" // /* MW 1 */ + 13910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13911 "00000000" // /* MW 1 */ + 13912 "10000100" // J #13952 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=13952 delay_slots=5 */ + 13913 "00000000" // /* MW 5 */ + 13914 "00000000" // /* MW 4 */ + 13915 "01000000" // /* MW 3 */ + 13916 "00011011" // /* MW 2 */ + 13917 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 452 15 +.src_ref 7 "superkernels.cpp" 457 26 +.delay_slot + 13918 "01000100" // MOVXM p3, #509016 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13919 "10110000" // /* MW 5 */ + 13920 "11001000" // /* MW 4 */ + 13921 "11000110" // /* MW 3 */ + 13922 "00000111" // /* MW 2 */ + 13923 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13925 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13927 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 452 15 first +.delay_slot + 13928 "10011000" // ST r17, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13929 "00110001" // /* MW 3 */ + 13930 "00000110" // /* MW 2 */ + 13931 "00001011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 453 13 first +.delay_slot + 13932 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13933 "00010001" // /* MW 3 */ + 13934 "00000110" // /* MW 2 */ + 13935 "00001010" // /* MW 1 */ +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_176 +.src_ref 7 "superkernels.cpp" 457 26 + 13936 "11100001" // NOPA; NOPB; NOPS; MOVXM p3, #509016; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13937 "00000000" // /* MW 15 */ + 13938 "00000000" // /* MW 14 */ + 13939 "00010000" // /* MW 13 */ + 13940 "00101100" // /* MW 12 */ + 13941 "10110010" // /* MW 11 */ + 13942 "11110001" // /* MW 10 */ + 13943 "00000001" // /* MW 9 */ + 13944 "00000000" // /* MW 8 */ + 13945 "01011011" // /* MW 7 */ + 13946 "00000001" // /* MW 6 */ + 13947 "00100000" // /* MW 5 */ + 13948 "00000000" // /* MW 4 */ + 13949 "11110000" // /* MW 3 */ + 13950 "00101100" // /* MW 2 */ + 13951 "00000000" // /* MW 1 */ +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_192 +.src_ref 1 "io_buffer_main.h" 218 49 first + 13952 "00011000" // ADD.NC p2, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13953 "10000110" // /* MW 3 */ + 13954 "01100111" // /* MW 2 */ + 13955 "00011010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 457 15 +.src_ref 1 "io_buffer_main.h" 218 49 + 13956 "10111010" // LDA r27, [p2], #-4; MOVXM p4, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13957 "00010000" // /* MW 9 */ + 13958 "00101000" // /* MW 8 */ + 13959 "00110010" // /* MW 7 */ + 13960 "11110010" // /* MW 6 */ + 13961 "00000001" // /* MW 5 */ + 13962 "00000000" // /* MW 4 */ + 13963 "11010000" // /* MW 3 */ + 13964 "11101110" // /* MW 2 */ + 13965 "01011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 13966 "10011000" // LDA r16, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13967 "00010110" // /* MW 3 */ + 13968 "11111110" // /* MW 2 */ + 13969 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 13970 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13971 "00110110" // /* MW 3 */ + 13972 "11111110" // /* MW 2 */ + 13973 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 13974 "10011000" // LDA r18, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13975 "01010110" // /* MW 3 */ + 13976 "01000110" // /* MW 2 */ + 13977 "00000010" // /* MW 1 */ + 13978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13979 "00000000" // /* MW 1 */ + 13980 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13981 "00000000" // /* MW 1 */ + 13982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13983 "00000000" // /* MW 1 */ + 13984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13985 "00000000" // /* MW 1 */ + 13986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13987 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 13988 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13989 "00000010" // /* MW 3 */ + 13990 "01100001" // /* MW 2 */ + 13991 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 13992 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13993 "00010001" // /* MW 3 */ + 13994 "00000110" // /* MW 2 */ + 13995 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 + 13996 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13997 "11111101" // /* MW 3 */ + 13998 "11100000" // /* MW 2 */ + 13999 "00010111" // /* MW 1 */ + 14000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14001 "00000000" // /* MW 1 */ + 14002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14003 "00000000" // /* MW 1 */ + 14004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14005 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 14006 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14007 "00001000" // /* MW 3 */ + 14008 "10010011" // /* MW 2 */ + 14009 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 456 11 +.src_ref 7 "superkernels.cpp" 459 47 +.src_ref 7 "superkernels.cpp" 464 6 +.src_ref 7 "superkernels.cpp" 465 16 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 + 14010 "10111010" // MOVA r15, #1; MOVXM p7, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14011 "00010000" // /* MW 9 */ + 14012 "00100000" // /* MW 8 */ + 14013 "10110010" // /* MW 7 */ + 14014 "11110011" // /* MW 6 */ + 14015 "00000001" // /* MW 5 */ + 14016 "00000000" // /* MW 4 */ + 14017 "00000000" // /* MW 3 */ + 14018 "00101111" // /* MW 2 */ + 14019 "00000000" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 52 16 + 14020 "11100100" // MOVX r24, #0; MOV r16, sp /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14021 "11000001" // /* MW 5 */ + 14022 "00101011" // /* MW 4 */ + 14023 "00101000" // /* MW 3 */ + 14024 "00000000" // /* MW 2 */ + 14025 "00000110" // /* MW 1 */ + 14026 "00011000" // ADD.NC p0, r16, #-76 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14027 "01011010" // /* MW 3 */ + 14028 "01101000" // /* MW 2 */ + 14029 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 1 "io_buffer_main.h" 324 51 + 14030 "11010100" // LDA p5, [sp, #-20]; MOV r14, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14031 "10000001" // /* MW 5 */ + 14032 "00101001" // /* MW 4 */ + 14033 "00100111" // /* MW 3 */ + 14034 "11010011" // /* MW 2 */ + 14035 "11111101" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 457 15 first + 14036 "10011000" // LDA r17, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14037 "00110110" // /* MW 3 */ + 14038 "00000110" // /* MW 2 */ + 14039 "00000100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 457 26 +.src_ref 7 "superkernels.cpp" 461 2 + 14040 "10111010" // LDA r16, [p3]; MOVXM p3, #509888 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14041 "00010000" // /* MW 9 */ + 14042 "11100000" // /* MW 8 */ + 14043 "10110011" // /* MW 7 */ + 14044 "11110001" // /* MW 6 */ + 14045 "00000001" // /* MW 5 */ + 14046 "00000000" // /* MW 4 */ + 14047 "11010000" // /* MW 3 */ + 14048 "11000010" // /* MW 2 */ + 14049 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 456 11 first + 14050 "10011000" // LDA r18, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14051 "01010110" // /* MW 3 */ + 14052 "00000110" // /* MW 2 */ + 14053 "00000111" // /* MW 1 */ + 14054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14055 "00000000" // /* MW 1 */ + 14056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14057 "00000000" // /* MW 1 */ + 14058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14059 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 14060 "10011000" // LDA r19, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14061 "01110110" // /* MW 3 */ + 14062 "00000110" // /* MW 2 */ + 14063 "00000101" // /* MW 1 */ + 14064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14065 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 457 24 first + 14066 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14067 "00001111" // /* MW 3 */ + 14068 "01100001" // /* MW 2 */ + 14069 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 456 11 first + 14070 "00011000" // ADD r17, r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14071 "00000111" // /* MW 3 */ + 14072 "10100010" // /* MW 2 */ + 14073 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 459 47 first + 14074 "10011000" // LSHL r16, r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14075 "11111101" // /* MW 3 */ + 14076 "00100000" // /* MW 2 */ + 14077 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 461 2 first +.no_stack_arguments + 14078 "00000100" // JL #13024 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=13024 delay_slots=5 */ + 14079 "00000001" // /* MW 5 */ + 14080 "00000000" // /* MW 4 */ + 14081 "01110000" // /* MW 3 */ + 14082 "00011001" // /* MW 2 */ + 14083 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 456 11 first +.delay_slot + 14084 "10011000" // ST r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14085 "00110001" // /* MW 3 */ + 14086 "00000110" // /* MW 2 */ + 14087 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 459 47 first +.delay_slot + 14088 "01011000" // ADD.NC dn0, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14089 "11000001" // /* MW 3 */ + 14090 "01001001" // /* MW 2 */ + 14091 "00011000" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 201 10 first +.delay_slot + 14092 "10011000" // ST dn0, [sp, #-76] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14093 "00100101" // /* MW 3 */ + 14094 "10110100" // /* MW 2 */ + 14095 "00001111" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 52 16 first +.delay_slot + 14096 "10011000" // ST r24, [sp, #-72] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14097 "00010101" // /* MW 3 */ + 14098 "10111011" // /* MW 2 */ + 14099 "00001111" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 52 16 +.delay_slot + 14100 "00110110" // NOPA; NOPB; ST r24, [sp, #-68]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 14101 "11000001" // /* MW 11 */ + 14102 "10001010" // /* MW 10 */ + 14103 "11011111" // /* MW 9 */ + 14104 "00000011" // /* MW 8 */ + 14105 "00000000" // /* MW 7 */ + 14106 "00000000" // /* MW 6 */ + 14107 "00100000" // /* MW 5 */ + 14108 "00000000" // /* MW 4 */ + 14109 "11110000" // /* MW 3 */ + 14110 "00101100" // /* MW 2 */ + 14111 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 +.return_address + 14112 "00011000" // ADD.NC p2, r14, #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14113 "00001010" // /* MW 3 */ + 14114 "01100111" // /* MW 2 */ + 14115 "00011010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 first + 14116 "10011000" // LDA r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14117 "00010110" // /* MW 3 */ + 14118 "00000110" // /* MW 2 */ + 14119 "00000010" // /* MW 1 */ + 14120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14121 "00000000" // /* MW 1 */ + 14122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14123 "00000000" // /* MW 1 */ + 14124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14125 "00000000" // /* MW 1 */ + 14126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14127 "00000000" // /* MW 1 */ + 14128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14129 "00000000" // /* MW 1 */ + 14130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14131 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 14132 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14133 "11111000" // /* MW 3 */ + 14134 "00010000" // /* MW 2 */ + 14135 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 464 19 +.src_ref 1 "io_buffer_main.h" 327 40 first + 14136 "10111010" // LDA r16, [p2, #-8]; MOVXM p1, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14137 "00010000" // /* MW 9 */ + 14138 "00110000" // /* MW 8 */ + 14139 "10110010" // /* MW 7 */ + 14140 "11110000" // /* MW 6 */ + 14141 "00000001" // /* MW 5 */ + 14142 "00000000" // /* MW 4 */ + 14143 "11010000" // /* MW 3 */ + 14144 "11000010" // /* MW 2 */ + 14145 "01011100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 464 19 first + 14146 "10011000" // LDA r18, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14147 "01010110" // /* MW 3 */ + 14148 "00000110" // /* MW 2 */ + 14149 "00000001" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 464 6 + 14150 "10011000" // LDA r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14151 "00110110" // /* MW 3 */ + 14152 "00000110" // /* MW 2 */ + 14153 "00000111" // /* MW 1 */ + 14154 "00011000" // LDA p1, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14155 "10011001" // /* MW 3 */ + 14156 "11110100" // /* MW 2 */ + 14157 "00000111" // /* MW 1 */ + 14158 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14159 "11010001" // /* MW 3 */ + 14160 "11111001" // /* MW 2 */ + 14161 "00000111" // /* MW 1 */ + 14162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14163 "00000000" // /* MW 1 */ + 14164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14165 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 14166 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14167 "00000001" // /* MW 3 */ + 14168 "11100001" // /* MW 2 */ + 14169 "00010011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 14170 "10011000" // ST r16, [p2, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14171 "00010001" // /* MW 3 */ + 14172 "11100110" // /* MW 2 */ + 14173 "00001010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 464 16 first + 14174 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14175 "00101000" // /* MW 3 */ + 14176 "01100001" // /* MW 2 */ + 14177 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 464 6 + 14178 "10000100" // JNZ r16, #14208 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14208 delay_slots=5 */ + 14179 "00000001" // /* MW 5 */ + 14180 "01000000" // /* MW 4 */ + 14181 "11000000" // /* MW 3 */ + 14182 "00011011" // /* MW 2 */ + 14183 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 465 16 +.delay_slot + 14184 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14185 "00000001" // /* MW 3 */ + 14186 "00110000" // /* MW 2 */ + 14187 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14189 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14191 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14193 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14195 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 465 16 first + 14196 "00110110" // NOPA; NOPB; ST r24, [p7]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 14197 "11000001" // /* MW 11 */ + 14198 "10001000" // /* MW 10 */ + 14199 "10000011" // /* MW 9 */ + 14200 "00000011" // /* MW 8 */ + 14201 "00000000" // /* MW 7 */ + 14202 "00000000" // /* MW 6 */ + 14203 "00100000" // /* MW 5 */ + 14204 "00000000" // /* MW 4 */ + 14205 "11110000" // /* MW 3 */ + 14206 "00101100" // /* MW 2 */ + 14207 "00000000" // /* MW 1 */ +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 7 "superkernels.cpp" 467 + 14208 "11010100" // LDA r13, [sp, #-4]; MOV lr, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14209 "01000001" // /* MW 5 */ + 14210 "11101101" // /* MW 4 */ + 14211 "00101110" // /* MW 3 */ + 14212 "10110110" // /* MW 2 */ + 14213 "11111111" // /* MW 1 */ + 14214 "00011000" // LDA r15, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14215 "11110001" // /* MW 3 */ + 14216 "11110001" // /* MW 2 */ + 14217 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 467 first + 14218 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 14219 "00000000" // /* MW 3 */ + 14220 "00101000" // /* MW 2 */ + 14221 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 467 +.delay_slot + 14222 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14223 "00000001" // /* MW 5 */ + 14224 "00000000" // /* MW 4 */ + 14225 "00000000" // /* MW 3 */ + 14226 "11110000" // /* MW 2 */ + 14227 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14229 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14231 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14233 "00000000" // /* MW 1 */ +.delay_slot + 14234 "11111000" // MOV p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14235 "11000000" // /* MW 3 */ + 14236 "01100010" // /* MW 2 */ +.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 14237 "00011111" // /* MW 1 */ +.label __Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE___func_begin0 +.label _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE +.function superkernel_conv_eltbinary _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE +.src_ref 7 "superkernels.cpp" 578 +.src_ref 7 "superkernels.cpp" 578 first +.function_start + 14240 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14241 "00000001" // /* MW 5 */ + 14242 "00000000" // /* MW 4 */ + 14243 "00000000" // /* MW 3 */ + 14244 "00001000" // /* MW 2 */ + 14245 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 583 6 + 14246 "00111010" // ST p7, [sp, #-8]; MOVXM p7, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14247 "00010001" // /* MW 9 */ + 14248 "00100000" // /* MW 8 */ + 14249 "10110010" // /* MW 7 */ + 14250 "11110011" // /* MW 6 */ + 14251 "00000001" // /* MW 5 */ + 14252 "00000000" // /* MW 4 */ + 14253 "10110000" // /* MW 3 */ + 14254 "01110011" // /* MW 2 */ + 14255 "11111111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 583 6 first + 14256 "10111010" // LDA r16, [p7]; ST p6, [sp, #-4]; MOV r17, CORE_ID /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14257 "01110010" // /* MW 9 */ + 14258 "01110000" // /* MW 8 */ + 14259 "00101101" // /* MW 7 */ + 14260 "10000010" // /* MW 6 */ + 14261 "00011101" // /* MW 5 */ + 14262 "11111111" // /* MW 4 */ + 14263 "11010111" // /* MW 3 */ + 14264 "11000010" // /* MW 2 */ + 14265 "11100000" // /* MW 1 */ + 14266 "10011000" // ST p4, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14267 "00011101" // /* MW 3 */ + 14268 "11110110" // /* MW 2 */ + 14269 "00001111" // /* MW 1 */ + 14270 "10011000" // ST p2, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14271 "00011101" // /* MW 3 */ + 14272 "11110001" // /* MW 2 */ + 14273 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 587 4 +.src_ref 7 "superkernels.cpp" 590 35 +.src_ref 7 "superkernels.cpp" 599 105 +.src_ref 7 "superkernels.cpp" 629 34 + 14274 "00000010" // ST lr, [sp, #-20]; MOV p7, p3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 14275 "01110000" // /* MW 7 */ + 14276 "01100000" // /* MW 6 */ + 14277 "10110011" // /* MW 5 */ + 14278 "00000011" // /* MW 4 */ + 14279 "10110000" // /* MW 3 */ + 14280 "10000111" // /* MW 2 */ + 14281 "11111101" // /* MW 1 */ + 14282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14283 "00000000" // /* MW 1 */ + 14284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14285 "00000000" // /* MW 1 */ + 14286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14287 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 583 6 +.src_ref 7 "superkernels.cpp" 583 16 + 14288 "10000100" // JNZ r16, #14688 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14688 delay_slots=5 */ + 14289 "00000001" // /* MW 5 */ + 14290 "01000000" // /* MW 4 */ + 14291 "10110000" // /* MW 3 */ + 14292 "00011100" // /* MW 2 */ + 14293 "10000000" // /* MW 1 */ +.delay_slot + 14294 "10011000" // ST p0, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14295 "00011101" // /* MW 3 */ + 14296 "11101000" // /* MW 2 */ + 14297 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 580 22 first +.delay_slot + 14298 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14299 "10010000" // /* MW 3 */ + 14300 "01100010" // /* MW 2 */ + 14301 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 580 30 +.delay_slot + 14302 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14303 "11111011" // /* MW 3 */ + 14304 "01100011" // /* MW 2 */ + 14305 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 580 11 +.delay_slot + 14306 "01000100" // MOVXM p6, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14307 "10100000" // /* MW 5 */ + 14308 "11001000" // /* MW 4 */ + 14309 "11001100" // /* MW 3 */ + 14310 "00000111" // /* MW 2 */ + 14311 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 580 11 +.delay_slot + 14312 "10011000" // ST r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14313 "00110001" // /* MW 3 */ + 14314 "00000110" // /* MW 2 */ + 14315 "00001110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 587 4 +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 14316 "10111010" // MOVA r0, #1; MOVXM p6, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14317 "00010000" // /* MW 9 */ + 14318 "00110100" // /* MW 8 */ + 14319 "00110010" // /* MW 7 */ + 14320 "11110011" // /* MW 6 */ + 14321 "00000001" // /* MW 5 */ + 14322 "00000000" // /* MW 4 */ + 14323 "00000000" // /* MW 3 */ + 14324 "00100000" // /* MW 2 */ + 14325 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 621 2 +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 14326 "01110110" // ST.s8 r16, [p6]; MOVS p6, p1; MOVXM p0, #509028 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 14327 "00010000" // /* MW 11 */ + 14328 "00110010" // /* MW 10 */ + 14329 "00110010" // /* MW 9 */ + 14330 "11110000" // /* MW 8 */ + 14331 "00000001" // /* MW 7 */ + 14332 "00000000" // /* MW 6 */ + 14333 "10001011" // /* MW 5 */ + 14334 "10000100" // /* MW 4 */ + 14335 "11100110" // /* MW 3 */ + 14336 "11000000" // /* MW 2 */ + 14337 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 587 4 +.src_ref 7 "superkernels.cpp" 587 4 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14338 "10111010" // MOVA r1, #0; MOVXM p1, #509440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14339 "00010000" // /* MW 9 */ + 14340 "00000000" // /* MW 8 */ + 14341 "10110011" // /* MW 7 */ + 14342 "11110000" // /* MW 6 */ + 14343 "00000001" // /* MW 5 */ + 14344 "00000000" // /* MW 4 */ + 14345 "00000000" // /* MW 3 */ + 14346 "00000001" // /* MW 2 */ + 14347 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 587 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 14348 "00000100" // JL #2752 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=2752 delay_slots=5 */ + 14349 "00000001" // /* MW 5 */ + 14350 "00000000" // /* MW 4 */ + 14351 "01100000" // /* MW 3 */ + 14352 "00000101" // /* MW 2 */ + 14353 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14355 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14357 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 14358 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14359 "00110001" // /* MW 3 */ + 14360 "00100000" // /* MW 2 */ + 14361 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 14362 "00101100" // NOPA; MOVX r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14363 "00001010" // /* MW 5 */ + 14364 "01000000" // /* MW 4 */ + 14365 "11110000" // /* MW 3 */ + 14366 "00101100" // /* MW 2 */ + 14367 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 587 4 +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 14368 "11100001" // NOPA; NOPB; ST r16, [p0]; NOPX; MOV p0, p7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14369 "00000000" // /* MW 15 */ + 14370 "00000000" // /* MW 14 */ + 14371 "01111000" // /* MW 13 */ + 14372 "01100000" // /* MW 12 */ + 14373 "00110111" // /* MW 11 */ + 14374 "00000000" // /* MW 10 */ + 14375 "00000000" // /* MW 9 */ + 14376 "10000000" // /* MW 8 */ + 14377 "00010001" // /* MW 7 */ + 14378 "00000110" // /* MW 6 */ + 14379 "00100000" // /* MW 5 */ + 14380 "00000000" // /* MW 4 */ + 14381 "11110000" // /* MW 3 */ + 14382 "00101100" // /* MW 2 */ + 14383 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 590 35 +.src_ref 7 "superkernels.cpp" 591 4 +.return_address + 14384 "01100100" // MOVX r16, #1; MOV dj0, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14385 "00000001" // /* MW 5 */ + 14386 "00000001" // /* MW 4 */ + 14387 "10100001" // /* MW 3 */ + 14388 "00000000" // /* MW 2 */ + 14389 "00000100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 590 35 first + 14390 "10011000" // LDA r18, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14391 "01010110" // /* MW 3 */ + 14392 "00000010" // /* MW 2 */ + 14393 "00000111" // /* MW 1 */ + 14394 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14395 "00000000" // /* MW 1 */ + 14396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14397 "00000000" // /* MW 1 */ + 14398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14399 "00000000" // /* MW 1 */ + 14400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14401 "00000000" // /* MW 1 */ + 14402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14403 "00000000" // /* MW 1 */ + 14404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14405 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 591 4 first + 14406 "10011000" // EQ r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14407 "00000111" // /* MW 3 */ + 14408 "10100001" // /* MW 2 */ + 14409 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 591 4 + 14410 "10000100" // JNZ r16, #14544 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14544 delay_slots=5 */ + 14411 "00000001" // /* MW 5 */ + 14412 "01000000" // /* MW 4 */ + 14413 "01101000" // /* MW 3 */ + 14414 "00011100" // /* MW 2 */ + 14415 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 599 105 +.delay_slot + 14416 "11111000" // MOV r17, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14417 "11000000" // /* MW 3 */ + 14418 "01011110" // /* MW 2 */ + 14419 "00011100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 599 105 first +.delay_slot + 14420 "00011000" // ADD.NC dc0, r17, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14421 "10010000" // /* MW 3 */ + 14422 "11001000" // /* MW 2 */ + 14423 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14425 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14427 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14428 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14429 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 591 4 first + 14430 "10000100" // JNZ r18, #14512 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14512 delay_slots=5 */ + 14431 "00000001" // /* MW 5 */ + 14432 "01000000" // /* MW 4 */ + 14433 "01011000" // /* MW 3 */ + 14434 "00011100" // /* MW 2 */ + 14435 "10010000" // /* MW 1 */ +.delay_slot + 14436 "01000100" // MOVXM r16, #509440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14437 "00000000" // /* MW 5 */ + 14438 "00101100" // /* MW 4 */ + 14439 "11001000" // /* MW 3 */ + 14440 "00000111" // /* MW 2 */ + 14441 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 611 27 +.delay_slot + 14442 "00011000" // MOVX r17, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14443 "00000001" // /* MW 3 */ + 14444 "00100010" // /* MW 2 */ + 14445 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14447 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14449 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14451 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 594 8 first +.no_stack_arguments + 14452 "00111010" // ST p6, [sp, #-28]; JL #11136 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=11136 delay_slots=5 */ + 14453 "01000001" // /* MW 9 */ + 14454 "00000000" // /* MW 8 */ + 14455 "00000000" // /* MW 7 */ + 14456 "01110000" // /* MW 6 */ + 14457 "00000101" // /* MW 5 */ + 14458 "00000000" // /* MW 4 */ + 14459 "10110000" // /* MW 3 */ + 14460 "11100011" // /* MW 2 */ + 14461 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 595 38 +.delay_slot + 14462 "01000100" // MOVXM p6, #509248 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14463 "10000000" // /* MW 5 */ + 14464 "11001010" // /* MW 4 */ + 14465 "11001100" // /* MW 3 */ + 14466 "00000111" // /* MW 2 */ + 14467 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 594 8 +.delay_slot + 14468 "01000100" // MOVXM p0, #509248 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14469 "10000000" // /* MW 5 */ + 14470 "11001010" // /* MW 4 */ + 14471 "11000000" // /* MW 3 */ + 14472 "00000111" // /* MW 2 */ + 14473 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 594 8 +.delay_slot + 14474 "11111000" // MOV p1, dc0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14475 "10000000" // /* MW 3 */ + 14476 "01100001" // /* MW 2 */ + 14477 "00011001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14479 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14480 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14481 "00000000" // /* MW 15 */ + 14482 "00000000" // /* MW 14 */ + 14483 "01111000" // /* MW 13 */ + 14484 "10100101" // /* MW 12 */ + 14485 "00000001" // /* MW 11 */ + 14486 "00000000" // /* MW 10 */ + 14487 "00000000" // /* MW 9 */ + 14488 "00000000" // /* MW 8 */ + 14489 "01011011" // /* MW 7 */ + 14490 "00000001" // /* MW 6 */ + 14491 "00100000" // /* MW 5 */ + 14492 "00000000" // /* MW 4 */ + 14493 "11110000" // /* MW 3 */ + 14494 "00101100" // /* MW 2 */ + 14495 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 595 38 first +.return_address + 14496 "10111010" // LDA r17, [p6]; MOVXM r16, #509440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14497 "00010000" // /* MW 9 */ + 14498 "00000000" // /* MW 8 */ + 14499 "00001011" // /* MW 7 */ + 14500 "11110010" // /* MW 6 */ + 14501 "00000001" // /* MW 5 */ + 14502 "00000000" // /* MW 4 */ + 14503 "11010000" // /* MW 3 */ + 14504 "11000110" // /* MW 2 */ + 14505 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 621 2 + 14506 "00111100" // LDA p6, [sp, #-28]; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14507 "00100000" // /* MW 5 */ + 14508 "00000000" // /* MW 4 */ + 14509 "00100000" // /* MW 3 */ + 14510 "11100011" // /* MW 2 */ + 14511 "11111100" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_272 + 14512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14513 "00000000" // /* MW 1 */ + 14514 "10000100" // J #14592 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=14592 delay_slots=5 */ + 14515 "00000000" // /* MW 5 */ + 14516 "00000000" // /* MW 4 */ + 14517 "10000000" // /* MW 3 */ + 14518 "00011100" // /* MW 2 */ + 14519 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14521 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14523 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14525 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14527 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 621 2 +.delay_slot + 14528 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV p1, p6; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14529 "00000000" // /* MW 15 */ + 14530 "00000000" // /* MW 14 */ + 14531 "01111000" // /* MW 13 */ + 14532 "01100000" // /* MW 12 */ + 14533 "10110110" // /* MW 11 */ + 14534 "00000000" // /* MW 10 */ + 14535 "00000000" // /* MW 9 */ + 14536 "00000000" // /* MW 8 */ + 14537 "01011011" // /* MW 7 */ + 14538 "00000001" // /* MW 6 */ + 14539 "00100000" // /* MW 5 */ + 14540 "00000000" // /* MW 4 */ + 14541 "11110000" // /* MW 3 */ + 14542 "00101100" // /* MW 2 */ + 14543 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_304 +.src_ref 7 "superkernels.cpp" 599 8 first +.no_stack_arguments + 14544 "00111010" // ST p6, [sp, #-28]; JL #11296 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=11296 delay_slots=5 */ + 14545 "01000001" // /* MW 9 */ + 14546 "00000000" // /* MW 8 */ + 14547 "00000000" // /* MW 7 */ + 14548 "10000100" // /* MW 6 */ + 14549 "00000101" // /* MW 5 */ + 14550 "00000000" // /* MW 4 */ + 14551 "10110000" // /* MW 3 */ + 14552 "11100011" // /* MW 2 */ + 14553 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 600 38 +.delay_slot + 14554 "01000100" // MOVXM p6, #509312 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14555 "00000000" // /* MW 5 */ + 14556 "11001011" // /* MW 4 */ + 14557 "11001100" // /* MW 3 */ + 14558 "00000111" // /* MW 2 */ + 14559 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 599 8 +.delay_slot + 14560 "01000100" // MOVXM p0, #509312 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14561 "00000000" // /* MW 5 */ + 14562 "11001011" // /* MW 4 */ + 14563 "11000000" // /* MW 3 */ + 14564 "00000111" // /* MW 2 */ + 14565 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 599 8 +.delay_slot + 14566 "11111000" // MOV p1, dc0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14567 "10000000" // /* MW 3 */ + 14568 "01100001" // /* MW 2 */ + 14569 "00011001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14571 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14572 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14573 "01100111" // /* MW 3 */ + 14574 "00000001" // /* MW 2 */ + 14575 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 600 38 first +.return_address + 14576 "10111010" // LDA r17, [p6]; MOVXM r16, #509440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14577 "00010000" // /* MW 9 */ + 14578 "00000000" // /* MW 8 */ + 14579 "00001011" // /* MW 7 */ + 14580 "11110010" // /* MW 6 */ + 14581 "00000001" // /* MW 5 */ + 14582 "00000000" // /* MW 4 */ + 14583 "11010000" // /* MW 3 */ + 14584 "11000110" // /* MW 2 */ + 14585 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 621 2 + 14586 "00111100" // LDA p1, [sp, #-28]; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14587 "00100000" // /* MW 5 */ + 14588 "00000000" // /* MW 4 */ + 14589 "00100000" // /* MW 3 */ + 14590 "10010011" // /* MW 2 */ + 14591 "11111100" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_352 + 14592 "10011000" // ADD.NC p3, r16, #11 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14593 "00000101" // /* MW 3 */ + 14594 "01101000" // /* MW 2 */ + 14595 "00011011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 606 35 first +.src_ref 7 "superkernels.cpp" 611 18 + 14596 "10111010" // LDA.u8 r19, [p3], #7; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14597 "00010000" // /* MW 9 */ + 14598 "00101000" // /* MW 8 */ + 14599 "00110010" // /* MW 7 */ + 14600 "11110011" // /* MW 6 */ + 14601 "00000001" // /* MW 5 */ + 14602 "00000000" // /* MW 4 */ + 14603 "01010000" // /* MW 3 */ + 14604 "11001101" // /* MW 2 */ + 14605 "01101111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 611 18 first + 14606 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14607 "01010110" // /* MW 3 */ + 14608 "00000110" // /* MW 2 */ + 14609 "00000110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 607 37 first + 14610 "10011000" // LDA.u16 r21, [p3], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14611 "10111010" // /* MW 3 */ + 14612 "00011110" // /* MW 2 */ + 14613 "00000011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 607 73 + 14614 "10011000" // LDA.u16 r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14615 "00011010" // /* MW 3 */ + 14616 "00000110" // /* MW 2 */ + 14617 "00000011" // /* MW 1 */ + 14618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14619 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 607 110 + 14620 "10011000" // LDA.u16 r20, [p3, #2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14621 "10011010" // /* MW 3 */ + 14622 "00010110" // /* MW 2 */ + 14623 "00000011" // /* MW 1 */ + 14624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14625 "00000000" // /* MW 1 */ + 14626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14627 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 606 19 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 14628 "01000100" // MOVXM p0, #508996 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14629 "10001000" // /* MW 5 */ + 14630 "11001000" // /* MW 4 */ + 14631 "11000000" // /* MW 3 */ + 14632 "00000111" // /* MW 2 */ + 14633 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 607 57 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 14634 "10011000" // MUL r19, r19, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14635 "01011111" // /* MW 3 */ + 14636 "11100111" // /* MW 2 */ + 14637 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 606 19 first +.src_ref 7 "superkernels.cpp" 611 16 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 14638 "00111010" // ST r19, [p0]; MOVXM p2, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14639 "00010001" // /* MW 9 */ + 14640 "00101110" // /* MW 8 */ + 14641 "00110010" // /* MW 7 */ + 14642 "11110001" // /* MW 6 */ + 14643 "00000001" // /* MW 5 */ + 14644 "00000000" // /* MW 4 */ + 14645 "00110000" // /* MW 3 */ + 14646 "11001110" // /* MW 2 */ + 14647 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 607 94 first + 14648 "10011000" // MUL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14649 "00001111" // /* MW 3 */ + 14650 "11100001" // /* MW 2 */ + 14651 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 611 27 first + 14652 "10011000" // MUL r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14653 "00101111" // /* MW 3 */ + 14654 "01100011" // /* MW 2 */ + 14655 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 608 28 first + 14656 "10011000" // MUL r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14657 "00001111" // /* MW 3 */ + 14658 "00100001" // /* MW 2 */ + 14659 "00010101" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 608 13 +.src_ref 7 "superkernels.cpp" 611 16 first + 14660 "01110110" // NOPA; ST r17, [p2]; MOVXM p6, #509024 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 14661 "00010000" // /* MW 11 */ + 14662 "00110000" // /* MW 10 */ + 14663 "00110010" // /* MW 9 */ + 14664 "11110011" // /* MW 8 */ + 14665 "00000001" // /* MW 7 */ + 14666 "10000000" // /* MW 6 */ + 14667 "00110001" // /* MW 5 */ + 14668 "00000110" // /* MW 4 */ + 14669 "11110010" // /* MW 3 */ + 14670 "00101100" // /* MW 2 */ + 14671 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 608 13 first + 14672 "11100001" // NOPA; NOPB; ST r16, [p6]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14673 "00000000" // /* MW 15 */ + 14674 "00000000" // /* MW 14 */ + 14675 "01111000" // /* MW 13 */ + 14676 "10100101" // /* MW 12 */ + 14677 "00000001" // /* MW 11 */ + 14678 "00000000" // /* MW 10 */ + 14679 "00000000" // /* MW 9 */ + 14680 "10000000" // /* MW 8 */ + 14681 "00010001" // /* MW 7 */ + 14682 "00000110" // /* MW 6 */ + 14683 "00100110" // /* MW 5 */ + 14684 "00000000" // /* MW 4 */ + 14685 "11110000" // /* MW 3 */ + 14686 "00101100" // /* MW 2 */ + 14687 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_448 +.src_ref 7 "superkernels.cpp" 614 12 + 14688 "01000100" // MOVXM p0, #509000 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14689 "10010000" // /* MW 5 */ + 14690 "11001000" // /* MW 4 */ + 14691 "11000000" // /* MW 3 */ + 14692 "00000111" // /* MW 2 */ + 14693 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 614 12 first +.src_ref 7 "superkernels.cpp" 616 11 + 14694 "10111010" // LDA r16, [p0]; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14695 "00010000" // /* MW 9 */ + 14696 "00100000" // /* MW 8 */ + 14697 "00110010" // /* MW 7 */ + 14698 "11110001" // /* MW 6 */ + 14699 "00000001" // /* MW 5 */ + 14700 "00000000" // /* MW 4 */ + 14701 "11010000" // /* MW 3 */ + 14702 "11000010" // /* MW 2 */ + 14703 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 615 13 +.src_ref 7 "superkernels.cpp" 616 11 first + 14704 "10111010" // LDA r17, [p2]; MOVXM p6, #509004 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14705 "00010000" // /* MW 9 */ + 14706 "00100110" // /* MW 8 */ + 14707 "00110010" // /* MW 7 */ + 14708 "11110011" // /* MW 6 */ + 14709 "00000001" // /* MW 5 */ + 14710 "00000000" // /* MW 4 */ + 14711 "11010000" // /* MW 3 */ + 14712 "11000110" // /* MW 2 */ + 14713 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 615 13 first + 14714 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14715 "01010110" // /* MW 3 */ + 14716 "00000110" // /* MW 2 */ + 14717 "00000110" // /* MW 1 */ + 14718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14719 "00000000" // /* MW 1 */ + 14720 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14721 "00000000" // /* MW 1 */ + 14722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14723 "00000000" // /* MW 1 */ + 14724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14725 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 618 6 first +.src_ref 7 "superkernels.cpp" 618 17 first + 14726 "10000100" // JNZ r16, #14832 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14832 delay_slots=5 */ + 14727 "00000001" // /* MW 5 */ + 14728 "01000000" // /* MW 4 */ + 14729 "11111000" // /* MW 3 */ + 14730 "00011100" // /* MW 2 */ + 14731 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 614 12 first +.src_ref 7 "superkernels.cpp" 616 11 first +.delay_slot + 14732 "00100100" // ADD r17, r17, #1; ADD.NC r19, r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14733 "00000001" // /* MW 5 */ + 14734 "10110000" // /* MW 4 */ + 14735 "11101001" // /* MW 3 */ + 14736 "01000000" // /* MW 2 */ + 14737 "10001100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 615 13 first +.delay_slot + 14738 "00011000" // ADD r18, r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14739 "00000111" // /* MW 3 */ + 14740 "10100100" // /* MW 2 */ + 14741 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 616 11 first +.delay_slot + 14742 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14743 "00110001" // /* MW 3 */ + 14744 "00000110" // /* MW 2 */ + 14745 "00001010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 615 13 first +.delay_slot + 14746 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14747 "01010001" // /* MW 3 */ + 14748 "00000110" // /* MW 2 */ + 14749 "00001110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 614 12 first +.delay_slot + 14750 "10011000" // ST r19, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14751 "01110001" // /* MW 3 */ + 14752 "00000110" // /* MW 2 */ + 14753 "00001000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 14754 "00011000" // LDA r17, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14755 "00110001" // /* MW 3 */ + 14756 "11110110" // /* MW 2 */ + 14757 "00000111" // /* MW 1 */ + 14758 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14759 "00000000" // /* MW 1 */ + 14760 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14761 "00000000" // /* MW 1 */ + 14762 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14763 "00000000" // /* MW 1 */ + 14764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14765 "00000000" // /* MW 1 */ + 14766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14767 "00000000" // /* MW 1 */ + 14768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14769 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 first + 14770 "00011000" // ADD.NC p6, r17, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14771 "10000110" // /* MW 3 */ + 14772 "01101000" // /* MW 2 */ + 14773 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 14774 "10011000" // LDA r27, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14775 "01110110" // /* MW 3 */ + 14776 "11111111" // /* MW 2 */ + 14777 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 14778 "10011000" // LDA r17, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14779 "00110110" // /* MW 3 */ + 14780 "11111110" // /* MW 2 */ + 14781 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 14782 "10011000" // LDA r18, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14783 "01010110" // /* MW 3 */ + 14784 "11111110" // /* MW 2 */ + 14785 "00000110" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 14786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14787 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 14788 "10011000" // LDA r17, [p6, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14789 "00110110" // /* MW 3 */ + 14790 "01000110" // /* MW 2 */ + 14791 "00000110" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 14792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14793 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 14794 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14795 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 14796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14797 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 14798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14799 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 14800 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14801 "00010010" // /* MW 3 */ + 14802 "10100011" // /* MW 2 */ + 14803 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 +.src_ref 1 "io_buffer_main.h" 395 8 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 14804 "01011100" // ST r17, [p6]; MOVX r16, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14805 "11111010" // /* MW 5 */ + 14806 "11000001" // /* MW 4 */ + 14807 "00111111" // /* MW 3 */ + 14808 "11000110" // /* MW 2 */ + 14809 "11000000" // /* MW 1 */ + 14810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14811 "00000000" // /* MW 1 */ + 14812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14813 "00000000" // /* MW 1 */ + 14814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14815 "00000000" // /* MW 1 */ + 14816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14817 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 14818 "01111110" // NOPA; NOPB; NOPS; ACQ r17, r16; NOPM /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 14819 "01100000" // /* MW 13 */ + 14820 "00101011" // /* MW 12 */ + 14821 "00000000" // /* MW 11 */ + 14822 "10101111" // /* MW 10 */ + 14823 "00110100" // /* MW 9 */ + 14824 "00000000" // /* MW 8 */ + 14825 "00001000" // /* MW 7 */ + 14826 "01010011" // /* MW 6 */ + 14827 "00100100" // /* MW 5 */ + 14828 "00000000" // /* MW 4 */ + 14829 "11110000" // /* MW 3 */ + 14830 "00101100" // /* MW 2 */ + 14831 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_592 + 14832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14833 "00000000" // /* MW 1 */ + 14834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14835 "00000000" // /* MW 1 */ + 14836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14837 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 621 2 +.src_ref 1 "io_buffer_main.h" 125 25 + 14838 "00011000" // LDA p2, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14839 "00011001" // /* MW 3 */ + 14840 "11110101" // /* MW 2 */ + 14841 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 621 2 + 14842 "00011000" // LDA p0, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14843 "00011001" // /* MW 3 */ + 14844 "11101000" // /* MW 2 */ + 14845 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 621 2 first +.no_stack_arguments + 14846 "00000100" // JL #4464 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4464 delay_slots=5 */ + 14847 "00000001" // /* MW 5 */ + 14848 "00000000" // /* MW 4 */ + 14849 "10111000" // /* MW 3 */ + 14850 "00001000" // /* MW 2 */ + 14851 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 621 2 +.delay_slot + 14852 "01000100" // MOVXM p3, #509440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14853 "00000000" // /* MW 5 */ + 14854 "11001100" // /* MW 4 */ + 14855 "11000110" // /* MW 3 */ + 14856 "00000111" // /* MW 2 */ + 14857 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14859 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14861 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14863 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.delay_slot + 14864 "11100001" // NOPA; NOPB; MOVS p6, p2; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14865 "00000000" // /* MW 15 */ + 14866 "00000000" // /* MW 14 */ + 14867 "01111000" // /* MW 13 */ + 14868 "10100101" // /* MW 12 */ + 14869 "00000001" // /* MW 11 */ + 14870 "00000000" // /* MW 10 */ + 14871 "00000000" // /* MW 9 */ + 14872 "00000000" // /* MW 8 */ + 14873 "10001011" // /* MW 7 */ + 14874 "10001000" // /* MW 6 */ + 14875 "00100110" // /* MW 5 */ + 14876 "00000000" // /* MW 4 */ + 14877 "11110000" // /* MW 3 */ + 14878 "00101100" // /* MW 2 */ + 14879 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 623 6 +.src_ref 1 "io_buffer_main.h" 218 49 +.return_address + 14880 "10111010" // LDA r16, [sp, #-16]; MOVXM p1, #509000 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14881 "00010000" // /* MW 9 */ + 14882 "00100100" // /* MW 8 */ + 14883 "10110010" // /* MW 7 */ + 14884 "11110000" // /* MW 6 */ + 14885 "00000001" // /* MW 5 */ + 14886 "00000000" // /* MW 4 */ + 14887 "00100000" // /* MW 3 */ + 14888 "01000010" // /* MW 2 */ + 14889 "11111110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 623 6 first +.src_ref 7 "superkernels.cpp" 623 20 + 14890 "10111010" // LDA r17, [p1]; MOVXM p1, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14891 "00010000" // /* MW 9 */ + 14892 "00100010" // /* MW 8 */ + 14893 "10110010" // /* MW 7 */ + 14894 "11110000" // /* MW 6 */ + 14895 "00000001" // /* MW 5 */ + 14896 "00000000" // /* MW 4 */ + 14897 "11010000" // /* MW 3 */ + 14898 "11000110" // /* MW 2 */ + 14899 "00100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 623 20 + 14900 "10011000" // LDA r18, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14901 "01010110" // /* MW 3 */ + 14902 "00000110" // /* MW 2 */ + 14903 "00000001" // /* MW 1 */ + 14904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14905 "00000000" // /* MW 1 */ + 14906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14907 "00000000" // /* MW 1 */ + 14908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14909 "00000000" // /* MW 1 */ + 14910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14911 "00000000" // /* MW 1 */ + 14912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14913 "00000000" // /* MW 1 */ + 14914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14915 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 623 17 + 14916 "10011000" // NE r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14917 "00101000" // /* MW 3 */ + 14918 "01100011" // /* MW 2 */ + 14919 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 623 6 + 14920 "10000100" // JNZ r17, #15264 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=15264 delay_slots=5 */ + 14921 "00000001" // /* MW 5 */ + 14922 "01000000" // /* MW 4 */ + 14923 "11010000" // /* MW 3 */ + 14924 "00011101" // /* MW 2 */ + 14925 "10001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14927 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14929 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14931 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14933 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14935 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 629 34 +.src_ref 1 "io_buffer_main.h" 218 49 first +.src_ref 1 "io_buffer_main.h" 395 8 + 14936 "10111010" // MOVA dj0, #64; MOVX r17, #-1; ADD.NC p1, r16, #12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14937 "00001000" // /* MW 9 */ + 14938 "00000011" // /* MW 8 */ + 14939 "10110100" // /* MW 7 */ + 14940 "11101000" // /* MW 6 */ + 14941 "00010111" // /* MW 5 */ + 14942 "00111111" // /* MW 4 */ + 14943 "10000000" // /* MW 3 */ + 14944 "00000010" // /* MW 2 */ + 14945 "00001000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 626 52 +.src_ref 1 "io_buffer_main.h" 218 49 + 14946 "10111010" // LDA r27, [p1], #-4; MOVXM p0, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14947 "00010000" // /* MW 9 */ + 14948 "00101110" // /* MW 8 */ + 14949 "00110010" // /* MW 7 */ + 14950 "11110000" // /* MW 6 */ + 14951 "00000001" // /* MW 5 */ + 14952 "00000000" // /* MW 4 */ + 14953 "11010000" // /* MW 3 */ + 14954 "11101110" // /* MW 2 */ + 14955 "00111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 14956 "10011000" // LDA r18, [p1], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14957 "01010110" // /* MW 3 */ + 14958 "11111110" // /* MW 2 */ + 14959 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 14960 "10011000" // LDA r19, [p1], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14961 "01110110" // /* MW 3 */ + 14962 "11111110" // /* MW 2 */ + 14963 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 47 first + 14964 "10011000" // LDA r20, [p1, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14965 "10010110" // /* MW 3 */ + 14966 "01010110" // /* MW 2 */ + 14967 "00000001" // /* MW 1 */ + 14968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14969 "00000000" // /* MW 1 */ + 14970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14971 "00000000" // /* MW 1 */ + 14972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14973 "00000000" // /* MW 1 */ + 14974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14975 "00000000" // /* MW 1 */ + 14976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14977 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 14978 "00011000" // SEL.EQZ r18, r19, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14979 "00100010" // /* MW 3 */ + 14980 "11100101" // /* MW 2 */ + 14981 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 626 50 +.src_ref 7 "superkernels.cpp" 630 3 +.src_ref 1 "io_buffer_main.h" 218 20 + 14982 "01011100" // ST r18, [p1]; MOVX r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14983 "00001010" // /* MW 5 */ + 14984 "01000000" // /* MW 4 */ + 14985 "00110000" // /* MW 3 */ + 14986 "11001010" // /* MW 2 */ + 14987 "00100000" // /* MW 1 */ + 14988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14989 "00000000" // /* MW 1 */ + 14990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14991 "00000000" // /* MW 1 */ + 14992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14993 "00000000" // /* MW 1 */ + 14994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14995 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 14996 "00011000" // ACQ r20, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14997 "00011000" // /* MW 3 */ + 14998 "00010011" // /* MW 2 */ + 14999 "00010101" // /* MW 1 */ + 15000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15001 "00000000" // /* MW 1 */ + 15002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15003 "00000000" // /* MW 1 */ + 15004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15005 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 626 52 first + 15006 "10011000" // LDA r19, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15007 "01110110" // /* MW 3 */ + 15008 "00000110" // /* MW 2 */ + 15009 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 629 34 first + 15010 "10011000" // LDA r18, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15011 "01010110" // /* MW 3 */ + 15012 "00000010" // /* MW 2 */ + 15013 "00000111" // /* MW 1 */ + 15014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15015 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 15016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15017 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.aggressive_scheduled_block_id 4 +.noswbrkpt + 15018 "10011000" // LDA p0, [p6], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15019 "00011110" // /* MW 3 */ + 15020 "01011100" // /* MW 2 */ + 15021 "00000110" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 15022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15023 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 15024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15025 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 626 50 first +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 1 "io_buffer_main.h" 324 32 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 15026 "10111010" // LDA r17, [p1], #16; LSHL r19, r19, r16; MOV p0, p1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15027 "01111000" // /* MW 9 */ + 15028 "01100000" // /* MW 8 */ + 15029 "00110001" // /* MW 7 */ + 15030 "01101100" // /* MW 6 */ + 15031 "00111000" // /* MW 5 */ + 15032 "00100111" // /* MW 4 */ + 15033 "11010000" // /* MW 3 */ + 15034 "11000110" // /* MW 2 */ + 15035 "00101001" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 630 3 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 15036 "10011000" // EQ r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15037 "00000111" // /* MW 3 */ + 15038 "10100001" // /* MW 2 */ + 15039 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 630 3 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 15040 "10000100" // JNZ r16, #15120 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=15120 delay_slots=5 */ + 15041 "00000001" // /* MW 5 */ + 15042 "01000000" // /* MW 4 */ + 15043 "10001000" // /* MW 3 */ + 15044 "00011101" // /* MW 2 */ + 15045 "10000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 32 +.delay_slot +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 15046 "00011000" // MOVS p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15047 "10001011" // /* MW 3 */ + 15048 "10000000" // /* MW 2 */ + 15049 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15051 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15053 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15055 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 626 50 first +.delay_slot + 15056 "00000010" // ST p1, [sp, #-16]; ADD.NC p1, r19, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 15057 "10100000" // /* MW 7 */ + 15058 "11100010" // /* MW 6 */ + 15059 "10110100" // /* MW 5 */ + 15060 "00000000" // /* MW 4 */ + 15061 "10110000" // /* MW 3 */ + 15062 "00010011" // /* MW 2 */ + 15063 "11111110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 630 3 first + 15064 "10000100" // JNZ r18, #15152 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=15152 delay_slots=5 */ + 15065 "00000001" // /* MW 5 */ + 15066 "01000000" // /* MW 4 */ + 15067 "10011000" // /* MW 3 */ + 15068 "00011101" // /* MW 2 */ + 15069 "10010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15071 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15073 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15075 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15077 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15078 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15079 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 633 8 first +.no_stack_arguments + 15080 "00000100" // JL #11248 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11248 delay_slots=5 */ + 15081 "00000001" // /* MW 5 */ + 15082 "00000000" // /* MW 4 */ + 15083 "11111000" // /* MW 3 */ + 15084 "00010101" // /* MW 2 */ + 15085 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 633 8 +.delay_slot + 15086 "01000100" // MOVXM p3, #509248 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15087 "10000000" // /* MW 5 */ + 15088 "11001010" // /* MW 4 */ + 15089 "11000110" // /* MW 3 */ + 15090 "00000111" // /* MW 2 */ + 15091 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15093 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15095 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15097 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 633 8 +.delay_slot + 15098 "11010100" // NOPA; MOV p2, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15099 "10000001" // /* MW 5 */ + 15100 "11000001" // /* MW 4 */ + 15101 "11110100" // /* MW 3 */ + 15102 "00101100" // /* MW 2 */ + 15103 "00000000" // /* MW 1 */ +.return_address + 15104 "10000100" // J #15152 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=15152 delay_slots=5 */ + 15105 "00000000" // /* MW 5 */ + 15106 "00000000" // /* MW 4 */ + 15107 "10011000" // /* MW 3 */ + 15108 "00011101" // /* MW 2 */ + 15109 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15111 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15113 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15115 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15117 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15119 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_880 +.src_ref 7 "superkernels.cpp" 637 8 first +.no_stack_arguments + 15120 "00000100" // JL #11440 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11440 delay_slots=5 */ + 15121 "00000001" // /* MW 5 */ + 15122 "00000000" // /* MW 4 */ + 15123 "01011000" // /* MW 3 */ + 15124 "00010110" // /* MW 2 */ + 15125 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 637 8 +.delay_slot + 15126 "01000100" // MOVXM p3, #509312 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15127 "00000000" // /* MW 5 */ + 15128 "11001011" // /* MW 4 */ + 15129 "11000110" // /* MW 3 */ + 15130 "00000111" // /* MW 2 */ + 15131 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 637 8 +.delay_slot + 15132 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15133 "11000000" // /* MW 3 */ + 15134 "01100000" // /* MW 2 */ + 15135 "00011010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15137 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15139 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15140 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 15141 "10000001" // /* MW 11 */ + 15142 "10101101" // /* MW 10 */ + 15143 "00000000" // /* MW 9 */ + 15144 "00000000" // /* MW 8 */ + 15145 "00000000" // /* MW 7 */ + 15146 "00000000" // /* MW 6 */ + 15147 "00100000" // /* MW 5 */ + 15148 "00000000" // /* MW 4 */ + 15149 "11110000" // /* MW 3 */ + 15150 "00101100" // /* MW 2 */ + 15151 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_912 +.src_ref 1 "io_buffer_main.h" 327 28 +.src_ref 1 "io_buffer_main.h" 327 40 +.return_address + 15152 "00011000" // LDA p1, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15153 "10011001" // /* MW 3 */ + 15154 "11110000" // /* MW 2 */ + 15155 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 + 15156 "00101100" // LDA p0, [sp, #-12]; MOVX r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15157 "00001010" // /* MW 5 */ + 15158 "01000100" // /* MW 4 */ + 15159 "00100000" // /* MW 3 */ + 15160 "10000011" // /* MW 2 */ + 15161 "11111110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 645 15 +.src_ref 1 "io_buffer_main.h" 324 32 first + 15162 "10111010" // LDA r16, [p7, #16]; MOVXM p7, #509000 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15163 "00010000" // /* MW 9 */ + 15164 "00100100" // /* MW 8 */ + 15165 "10110010" // /* MW 7 */ + 15166 "11110011" // /* MW 6 */ + 15167 "00000001" // /* MW 5 */ + 15168 "00000000" // /* MW 4 */ + 15169 "11010000" // /* MW 3 */ + 15170 "11000010" // /* MW 2 */ + 15171 "11101000" // /* MW 1 */ + 15172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15173 "00000000" // /* MW 1 */ + 15174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15175 "00000000" // /* MW 1 */ + 15176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15177 "00000000" // /* MW 1 */ + 15178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15179 "00000000" // /* MW 1 */ + 15180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15181 "00000000" // /* MW 1 */ + 15182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15183 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 15184 "00011000" // REL r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15185 "00011000" // /* MW 3 */ + 15186 "00010001" // /* MW 2 */ + 15187 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 40 first + 15188 "10011000" // LDA r18, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15189 "01010110" // /* MW 3 */ + 15190 "11110110" // /* MW 2 */ + 15191 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 first + 15192 "10011000" // LDA r16, [p0, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15193 "00010110" // /* MW 3 */ + 15194 "01010110" // /* MW 2 */ + 15195 "00000000" // /* MW 1 */ + 15196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15197 "00000000" // /* MW 1 */ + 15198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15199 "00000000" // /* MW 1 */ + 15200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15201 "00000000" // /* MW 1 */ + 15202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15203 "00000000" // /* MW 1 */ + 15204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15205 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 15206 "10011000" // SUB r18, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15207 "00100001" // /* MW 3 */ + 15208 "01100101" // /* MW 2 */ + 15209 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 15210 "10011000" // ST r18, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15211 "01010001" // /* MW 3 */ + 15212 "11110110" // /* MW 2 */ + 15213 "00001001" // /* MW 1 */ + 15214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15215 "00000000" // /* MW 1 */ + 15216 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15217 "00000000" // /* MW 1 */ + 15218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15219 "00000000" // /* MW 1 */ + 15220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15221 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 15222 "00011000" // REL r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15223 "00011000" // /* MW 3 */ + 15224 "00010001" // /* MW 2 */ + 15225 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 40 first + 15226 "10011000" // LDA r18, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15227 "01010110" // /* MW 3 */ + 15228 "11100110" // /* MW 2 */ + 15229 "00000110" // /* MW 1 */ + 15230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15231 "00000000" // /* MW 1 */ + 15232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15233 "00000000" // /* MW 1 */ + 15234 "10000100" // J #15280 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=15280 delay_slots=5 */ + 15235 "00000000" // /* MW 5 */ + 15236 "00000000" // /* MW 4 */ + 15237 "11011000" // /* MW 3 */ + 15238 "00011101" // /* MW 2 */ + 15239 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15240 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15241 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15243 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 645 15 +.src_ref 7 "superkernels.cpp" 649 14 +.delay_slot + 15244 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15245 "00000001" // /* MW 3 */ + 15246 "00100000" // /* MW 2 */ + 15247 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 645 15 first +.src_ref 1 "io_buffer_main.h" 327 32 +.delay_slot + 15248 "01011100" // ST r16, [p7]; SUB r17, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15249 "01000011" // /* MW 5 */ + 15250 "11000110" // /* MW 4 */ + 15251 "00111000" // /* MW 3 */ + 15252 "11000010" // /* MW 2 */ + 15253 "11100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 first +.delay_slot + 15254 "01111010" // NOPA; ST r17, [p6, #-8]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15255 "00000000" // /* MW 9 */ + 15256 "00000000" // /* MW 8 */ + 15257 "00000000" // /* MW 7 */ + 15258 "10000000" // /* MW 6 */ + 15259 "00110001" // /* MW 5 */ + 15260 "11100110" // /* MW 4 */ + 15261 "11110110" // /* MW 3 */ + 15262 "00101100" // /* MW 2 */ + 15263 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_1024 +.src_ref 7 "superkernels.cpp" 649 14 + 15264 "11100001" // NOPA; NOPB; NOPS; MOVX r16, #0; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 15265 "00000000" // /* MW 15 */ + 15266 "00000000" // /* MW 14 */ + 15267 "01111000" // /* MW 13 */ + 15268 "10100101" // /* MW 12 */ + 15269 "00000001" // /* MW 11 */ + 15270 "00001000" // /* MW 10 */ + 15271 "00000000" // /* MW 9 */ + 15272 "00000001" // /* MW 8 */ + 15273 "01011011" // /* MW 7 */ + 15274 "00000001" // /* MW 6 */ + 15275 "00100000" // /* MW 5 */ + 15276 "00000000" // /* MW 4 */ + 15277 "11110000" // /* MW 3 */ + 15278 "00101100" // /* MW 2 */ + 15279 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_1040 +.src_ref 7 "superkernels.cpp" 648 19 +.src_ref 7 "superkernels.cpp" 651 + 15280 "10111010" // LDA lr, [sp, #-20]; MOVXM p7, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15281 "00010000" // /* MW 9 */ + 15282 "00110000" // /* MW 8 */ + 15283 "10110010" // /* MW 7 */ + 15284 "11110011" // /* MW 6 */ + 15285 "00000001" // /* MW 5 */ + 15286 "00000000" // /* MW 4 */ + 15287 "00100000" // /* MW 3 */ + 15288 "10000111" // /* MW 2 */ + 15289 "11111101" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 648 6 +.src_ref 7 "superkernels.cpp" 648 19 first +.src_ref 7 "superkernels.cpp" 649 14 + 15290 "10111010" // LDA r18, [p7]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15291 "00010000" // /* MW 9 */ + 15292 "00100000" // /* MW 8 */ + 15293 "00110010" // /* MW 7 */ + 15294 "11110011" // /* MW 6 */ + 15295 "00000001" // /* MW 5 */ + 15296 "00000000" // /* MW 4 */ + 15297 "11010000" // /* MW 3 */ + 15298 "11001010" // /* MW 2 */ + 15299 "11100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 648 6 + 15300 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15301 "00110110" // /* MW 3 */ + 15302 "00000110" // /* MW 2 */ + 15303 "00000110" // /* MW 1 */ + 15304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15305 "00000000" // /* MW 1 */ + 15306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15307 "00000000" // /* MW 1 */ + 15308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15309 "00000000" // /* MW 1 */ + 15310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15311 "00000000" // /* MW 1 */ + 15312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15313 "00000000" // /* MW 1 */ + 15314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15315 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 648 16 + 15316 "10011000" // NE r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15317 "00101000" // /* MW 3 */ + 15318 "01100011" // /* MW 2 */ + 15319 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 648 6 + 15320 "10000100" // JNZ r17, #15344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=15344 delay_slots=5 */ + 15321 "00000001" // /* MW 5 */ + 15322 "01000000" // /* MW 4 */ + 15323 "11111000" // /* MW 3 */ + 15324 "00011101" // /* MW 2 */ + 15325 "10001000" // /* MW 1 */ +.delay_slot + 15326 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15327 "10011001" // /* MW 3 */ + 15328 "11111011" // /* MW 2 */ + 15329 "00000111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15331 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15333 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15335 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15337 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 649 14 first + 15338 "00001100" // NOPA; ST r16, [p6] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15339 "00100011" // /* MW 5 */ + 15340 "00001100" // /* MW 4 */ + 15341 "11111100" // /* MW 3 */ + 15342 "00101100" // /* MW 2 */ + 15343 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_1104 + 15344 "00011000" // LDA p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15345 "00011001" // /* MW 3 */ + 15346 "11111111" // /* MW 2 */ + 15347 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 651 first + 15348 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 15349 "00000000" // /* MW 3 */ + 15350 "00101000" // /* MW 2 */ + 15351 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 651 +.delay_slot + 15352 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15353 "00000001" // /* MW 5 */ + 15354 "00000000" // /* MW 4 */ + 15355 "00000000" // /* MW 3 */ + 15356 "11111000" // /* MW 2 */ + 15357 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15359 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15361 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15363 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE__end +.label __Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE___func_end0 + 15365 "00000000" // /* MW 1 */ +.label __Z13_b896_wrapperPPv___func_begin0 +.label _Z13_b896_wrapperPPv +.function _b896_wrapper _Z13_b896_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 21 first +.src_ref 0 "0_0_reloadable5.cc" 23 79 +.function_start + 15376 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15377 "11000000" // /* MW 3 */ + 15378 "01100000" // /* MW 2 */ + 15379 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 23 79 first + 15380 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15381 "00011110" // /* MW 3 */ + 15382 "00011100" // /* MW 2 */ + 15383 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 24 79 first + 15384 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15385 "10011110" // /* MW 3 */ + 15386 "00101100" // /* MW 2 */ + 15387 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 26 81 first + 15388 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15389 "10011110" // /* MW 3 */ + 15390 "11110101" // /* MW 2 */ + 15391 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 25 47 first + 15392 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15393 "00011110" // /* MW 3 */ + 15394 "00000101" // /* MW 2 */ + 15395 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 22 4 first +.tail_call + 15396 "10000100" // J #6880 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=6880 delay_slots=5 */ + 15397 "00000000" // /* MW 5 */ + 15398 "00000000" // /* MW 4 */ + 15399 "01110000" // /* MW 3 */ + 15400 "00001101" // /* MW 2 */ + 15401 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15403 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15405 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15407 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15409 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b896_wrapperPPv__end +.label __Z13_b896_wrapperPPv___func_end0 + 15411 "00000000" // /* MW 1 */ +.label __Z13_b901_wrapperPPv___func_begin0 +.label _Z13_b901_wrapperPPv +.function _b901_wrapper _Z13_b901_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 30 first +.src_ref 0 "0_0_reloadable5.cc" 32 79 +.function_start + 15424 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15425 "11000000" // /* MW 3 */ + 15426 "01100000" // /* MW 2 */ + 15427 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 32 79 first + 15428 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15429 "00011110" // /* MW 3 */ + 15430 "00101100" // /* MW 2 */ + 15431 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 34 81 first + 15432 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15433 "00011110" // /* MW 3 */ + 15434 "11110101" // /* MW 2 */ + 15435 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 33 47 first + 15436 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15437 "10011110" // /* MW 3 */ + 15438 "00000100" // /* MW 2 */ + 15439 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 31 4 first +.tail_call + 15440 "10000100" // J #8240 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=8240 delay_slots=5 */ + 15441 "00000000" // /* MW 5 */ + 15442 "00000000" // /* MW 4 */ + 15443 "00011000" // /* MW 3 */ + 15444 "00010000" // /* MW 2 */ + 15445 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15447 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15449 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15451 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15453 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b901_wrapperPPv__end +.label __Z13_b901_wrapperPPv___func_end0 + 15455 "00000000" // /* MW 1 */ +.label __Z13_b906_wrapperPPv___func_begin0 +.label _Z13_b906_wrapperPPv +.function _b906_wrapper _Z13_b906_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 38 first +.src_ref 0 "0_0_reloadable5.cc" 40 79 +.function_start + 15456 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15457 "11000000" // /* MW 3 */ + 15458 "01100000" // /* MW 2 */ + 15459 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 40 79 first + 15460 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15461 "00011110" // /* MW 3 */ + 15462 "00101100" // /* MW 2 */ + 15463 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 42 81 first + 15464 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15465 "00011110" // /* MW 3 */ + 15466 "11110101" // /* MW 2 */ + 15467 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 41 47 first + 15468 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15469 "10011110" // /* MW 3 */ + 15470 "00000100" // /* MW 2 */ + 15471 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 39 4 first +.tail_call + 15472 "10000100" // J #9104 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=9104 delay_slots=5 */ + 15473 "00000000" // /* MW 5 */ + 15474 "00000000" // /* MW 4 */ + 15475 "11001000" // /* MW 3 */ + 15476 "00010001" // /* MW 2 */ + 15477 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15479 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15481 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15483 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15485 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b906_wrapperPPv__end +.label __Z13_b906_wrapperPPv___func_end0 + 15487 "00000000" // /* MW 1 */ +.label __Z13_b881_wrapperPPv___func_begin0 +.label _Z13_b881_wrapperPPv +.function _b881_wrapper _Z13_b881_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 46 first +.src_ref 0 "0_0_reloadable5.cc" 48 79 +.function_start + 15488 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15489 "11000000" // /* MW 3 */ + 15490 "01100000" // /* MW 2 */ + 15491 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 48 79 first + 15492 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15493 "00011110" // /* MW 3 */ + 15494 "00101100" // /* MW 2 */ + 15495 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 50 81 first + 15496 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15497 "00011110" // /* MW 3 */ + 15498 "11110101" // /* MW 2 */ + 15499 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 49 47 first + 15500 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15501 "10011110" // /* MW 3 */ + 15502 "00000100" // /* MW 2 */ + 15503 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 47 4 first +.tail_call + 15504 "10000100" // J #10512 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10512 delay_slots=5 */ + 15505 "00000000" // /* MW 5 */ + 15506 "00000000" // /* MW 4 */ + 15507 "10001000" // /* MW 3 */ + 15508 "00010100" // /* MW 2 */ + 15509 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15511 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15513 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15515 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15517 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b881_wrapperPPv__end +.label __Z13_b881_wrapperPPv___func_end0 + 15519 "00000000" // /* MW 1 */ +.label __Z13_b891_wrapperPPv___func_begin0 +.label _Z13_b891_wrapperPPv +.function _b891_wrapper _Z13_b891_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 54 first +.src_ref 0 "0_0_reloadable5.cc" 56 79 +.function_start + 15520 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15521 "11000000" // /* MW 3 */ + 15522 "01100000" // /* MW 2 */ + 15523 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 56 79 first + 15524 "10011000" // LDA p0, [p2], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15525 "00011110" // /* MW 3 */ + 15526 "00111100" // /* MW 2 */ + 15527 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 57 47 first + 15528 "10011000" // LDA p1, [p2], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15529 "10011110" // /* MW 3 */ + 15530 "11101100" // /* MW 2 */ + 15531 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 59 81 first + 15532 "10011000" // LDA p3, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15533 "10011110" // /* MW 3 */ + 15534 "00010101" // /* MW 2 */ + 15535 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 58 80 first + 15536 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15537 "00011110" // /* MW 3 */ + 15538 "00000101" // /* MW 2 */ + 15539 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 55 4 first +.tail_call + 15540 "10000100" // J #11744 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=11744 delay_slots=5 */ + 15541 "00000000" // /* MW 5 */ + 15542 "00000000" // /* MW 4 */ + 15543 "11110000" // /* MW 3 */ + 15544 "00010110" // /* MW 2 */ + 15545 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15547 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15549 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15551 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15552 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15553 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b891_wrapperPPv__end +.label __Z13_b891_wrapperPPv___func_end0 + 15555 "00000000" // /* MW 1 */ +.label __Z13_b924_wrapperPPv___func_begin0 +.label _Z13_b924_wrapperPPv +.function _b924_wrapper _Z13_b924_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 63 first +.src_ref 0 "0_0_reloadable5.cc" 65 79 +.function_start + 15568 "11111000" // MOV p3, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15569 "11000000" // /* MW 3 */ + 15570 "01100000" // /* MW 2 */ + 15571 "00011011" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 65 79 first + 15572 "10011000" // LDA p0, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15573 "00011110" // /* MW 3 */ + 15574 "00011100" // /* MW 2 */ + 15575 "00000011" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 66 79 first + 15576 "10011000" // LDA p1, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15577 "10011110" // /* MW 3 */ + 15578 "00011100" // /* MW 2 */ + 15579 "00000011" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 67 80 first + 15580 "10011000" // LDA p2, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15581 "00011110" // /* MW 3 */ + 15582 "00101101" // /* MW 2 */ + 15583 "00000011" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 69 81 first + 15584 "10011000" // LDA p4, [p3, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15585 "00011110" // /* MW 3 */ + 15586 "11110110" // /* MW 2 */ + 15587 "00000011" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 68 47 first + 15588 "10011000" // LDA p3, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15589 "10011110" // /* MW 3 */ + 15590 "00000101" // /* MW 2 */ + 15591 "00000011" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 64 4 first +.tail_call + 15592 "10000100" // J #14240 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=14240 delay_slots=5 */ + 15593 "00000000" // /* MW 5 */ + 15594 "00000000" // /* MW 4 */ + 15595 "11010000" // /* MW 3 */ + 15596 "00011011" // /* MW 2 */ + 15597 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15598 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15599 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15601 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15603 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15605 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b924_wrapperPPv__end +.label __Z13_b924_wrapperPPv___func_end0 + 15607 "00000000" // /* MW 1 */ +.label __Z13_b919_wrapperPPv___func_begin0 +.label _Z13_b919_wrapperPPv +.function _b919_wrapper _Z13_b919_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 73 first +.src_ref 0 "0_0_reloadable5.cc" 75 79 +.function_start + 15616 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15617 "11000000" // /* MW 3 */ + 15618 "01100000" // /* MW 2 */ + 15619 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 75 79 first + 15620 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15621 "00011110" // /* MW 3 */ + 15622 "00011100" // /* MW 2 */ + 15623 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 76 79 first + 15624 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15625 "10011110" // /* MW 3 */ + 15626 "00101100" // /* MW 2 */ + 15627 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 78 81 first + 15628 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15629 "10011110" // /* MW 3 */ + 15630 "11110101" // /* MW 2 */ + 15631 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 77 47 first + 15632 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15633 "00011110" // /* MW 3 */ + 15634 "00000101" // /* MW 2 */ + 15635 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 74 4 first +.tail_call + 15636 "10000100" // J #13760 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=13760 delay_slots=5 */ + 15637 "00000000" // /* MW 5 */ + 15638 "00000000" // /* MW 4 */ + 15639 "11100000" // /* MW 3 */ + 15640 "00011010" // /* MW 2 */ + 15641 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15643 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15645 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15647 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15649 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b919_wrapperPPv__end +.label __Z13_b919_wrapperPPv___func_end0 + 15651 "00000000" // /* MW 1 */ +.label _ZN12me_primitive10udiv_dstepEjjRjS0_ +.function udiv_dstep _ZN12me_primitive10udiv_dstepEjjRjS0_ +.src_ref 10 "me_div.c" 108 19 +.src_ref 10 "me_div.c" 108 19 +.src_ref 10 "me_div.c" 115 4 first +.function_start + 15664 "11100100" // MOVX r3, #0; MOV r31, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15665 "01000001" // /* MW 5 */ + 15666 "10100000" // /* MW 4 */ + 15667 "00101111" // /* MW 3 */ + 15668 "11000000" // /* MW 2 */ + 15669 "00000000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15670 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15671 "00011100" // /* MW 3 */ + 15672 "11000110" // /* MW 2 */ + 15673 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15674 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15675 "00011100" // /* MW 3 */ + 15676 "11000110" // /* MW 2 */ + 15677 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15678 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15679 "00011100" // /* MW 3 */ + 15680 "11000110" // /* MW 2 */ + 15681 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15682 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15683 "00011100" // /* MW 3 */ + 15684 "11000110" // /* MW 2 */ + 15685 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15686 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15687 "00011100" // /* MW 3 */ + 15688 "11000110" // /* MW 2 */ + 15689 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15690 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15691 "00011100" // /* MW 3 */ + 15692 "11000110" // /* MW 2 */ + 15693 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15694 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15695 "00011100" // /* MW 3 */ + 15696 "11000110" // /* MW 2 */ + 15697 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15698 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15699 "00011100" // /* MW 3 */ + 15700 "11000110" // /* MW 2 */ + 15701 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15702 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15703 "00011100" // /* MW 3 */ + 15704 "11000110" // /* MW 2 */ + 15705 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15706 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15707 "00011100" // /* MW 3 */ + 15708 "11000110" // /* MW 2 */ + 15709 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15710 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15711 "00011100" // /* MW 3 */ + 15712 "11000110" // /* MW 2 */ + 15713 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15714 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15715 "00011100" // /* MW 3 */ + 15716 "11000110" // /* MW 2 */ + 15717 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15718 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15719 "00011100" // /* MW 3 */ + 15720 "11000110" // /* MW 2 */ + 15721 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15722 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15723 "00011100" // /* MW 3 */ + 15724 "11000110" // /* MW 2 */ + 15725 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15726 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15727 "00011100" // /* MW 3 */ + 15728 "11000110" // /* MW 2 */ + 15729 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15730 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15731 "00011100" // /* MW 3 */ + 15732 "11000110" // /* MW 2 */ + 15733 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15734 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15735 "00011100" // /* MW 3 */ + 15736 "11000110" // /* MW 2 */ + 15737 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15738 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15739 "00011100" // /* MW 3 */ + 15740 "11000110" // /* MW 2 */ + 15741 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15742 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15743 "00011100" // /* MW 3 */ + 15744 "11000110" // /* MW 2 */ + 15745 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15746 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15747 "00011100" // /* MW 3 */ + 15748 "11000110" // /* MW 2 */ + 15749 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15750 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15751 "00011100" // /* MW 3 */ + 15752 "11000110" // /* MW 2 */ + 15753 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15754 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15755 "00011100" // /* MW 3 */ + 15756 "11000110" // /* MW 2 */ + 15757 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15758 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15759 "00011100" // /* MW 3 */ + 15760 "11000110" // /* MW 2 */ + 15761 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15762 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15763 "00011100" // /* MW 3 */ + 15764 "11000110" // /* MW 2 */ + 15765 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15766 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15767 "00011100" // /* MW 3 */ + 15768 "11000110" // /* MW 2 */ + 15769 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15770 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15771 "00011100" // /* MW 3 */ + 15772 "11000110" // /* MW 2 */ + 15773 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15774 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15775 "00011100" // /* MW 3 */ + 15776 "11000110" // /* MW 2 */ + 15777 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15778 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15779 "00011100" // /* MW 3 */ + 15780 "11000110" // /* MW 2 */ + 15781 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 119 first + 15782 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 15783 "00000000" // /* MW 3 */ + 15784 "00101000" // /* MW 2 */ + 15785 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 first +.delay_slot + 15786 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15787 "00011100" // /* MW 3 */ + 15788 "11000110" // /* MW 2 */ + 15789 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 +.delay_slot + 15790 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15791 "00011100" // /* MW 3 */ + 15792 "11000110" // /* MW 2 */ + 15793 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 +.delay_slot + 15794 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15795 "00011100" // /* MW 3 */ + 15796 "11000110" // /* MW 2 */ + 15797 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 +.delay_slot + 15798 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15799 "00011100" // /* MW 3 */ + 15800 "11000110" // /* MW 2 */ + 15801 "00010000" // /* MW 1 */ +.delay_slot + 15802 "11111000" // MOV r2, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15803 "10100000" // /* MW 3 */ + 15804 "10011111" // /* MW 2 */ +.label _ZN12me_primitive10udiv_dstepEjjRjS0___end + 15805 "00011000" // /* MW 1 */ +.dir 0 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src" +.dir 1 "/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer" +.dir 2 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/conv" +.dir 3 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common" +.dir 4 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2" +.dir 5 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p" +.dir 6 "/usr/local/lib/python3.10/dist-packages/data/aie2p/lib" +.dir 7 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend" +.dir 8 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc" +.dir 9 "/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/detail" +.dir 10 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21708/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib" diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable5/Release/0_2_reloadable5.cmico b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable5/Release/0_2_reloadable5.cmico new file mode 100644 index 0000000000000000000000000000000000000000..f377058758269f564988080a1597f499edc1b997 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable5/Release/0_2_reloadable5.cmico @@ -0,0 +1 @@ ++Mdec diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable5/Release/0_2_reloadable5.lst b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable5/Release/0_2_reloadable5.lst new file mode 100644 index 0000000000000000000000000000000000000000..4a0bb9c3b02d8c2df3b5faeb6f4b950508fce7fd --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable5/Release/0_2_reloadable5.lst @@ -0,0 +1,5518 @@ + +// File generated by darts version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:49:22 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// darts -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -d -h -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable5 me + +// Release: ipp V-2024.06-TGT-241219 + +.text_segment PM 2352 +.entry_point +.label __Z13kernelWrapperPPvjjjj___func_begin0 +.label _Z13kernelWrapperPPvjjjj +.function_start + 2352 0x00 0xc6 0xd1 0x21 0x41 0xd4 LDA r17, [p0]; MOV r2, r1 + 2358 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 2364 0xfe 0xf3 0xb0 0x00 0x2b 0xd0 0x70 0x02 ST p7, [sp, #-12]; MOV r1, r15 + 2372 0xff 0x87 0xb0 0x01 0xe8 0x90 0x70 0x02 ST lr, [sp, #-4]; MOV r15, r2 + 2380 0xff 0x06 0xb7 0xc1 0xe0 0x5c ST r1, [sp, #-8]; NEZ r16, r15 + 2386 0x1e 0x98 0x20 0xf8 MOV r26, r16 + 2390 0x00 0x00 NOPX + 2392 0x1f 0x68 0x82 0x18 ADD.NC p7, r17, #4 + 2396 0x07 0x1e 0x36 0x98 LDA r17, [p7], #4 + 2400 0x07 0x3e 0x76 0x98 LDA r19, [p7], #12 + 2404 0x07 0xee 0x56 0x98 LDA r18, [p7], #-8 + 2408 0x07 0x07 0x76 0x98 LDA r27, [p7] + 2412 0x00 0x00 NOPX + 2414 0x00 0x00 NOPX + 2416 0x00 0x00 NOPX + 2418 0x00 0x00 NOPX + 2420 0x00 0x00 NOPX + 2422 0x00 0x00 NOPX + 2424 0x14 0x63 0x32 0x18 SEL.EQZ r17, r17, r19, r27 + 2428 0x0f 0xd6 0x31 0x98 ST r17, [p7, #-12] + 2432 0x17 0xe2 0xfd 0x18 MOVX r17, #-1 + 2436 0x00 0x00 NOPX + 2438 0x00 0x00 NOPX + 2440 0x00 0x00 NOPX + 2442 0x14 0x97 0x18 0x18 ACQ.COND r18, r17, r26 + 2446 0x10 0x24 0x09 0x18 MOVX r18, #2 + 2450 0x14 0x29 0x2d 0x98 LSHL r20, r16, r18 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 2454 0x18 0x8a 0x20 0xf8 MOV dj0, r20 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 2458 0x00 0x4e 0xdf 0xd8 0x8b 0x0c LDA r19, [p0, dj0]; ST dj0, [sp, #-20] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2464 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2466 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2468 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2470 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2472 0x10 0x26 0x05 0x18 MOVX r19, #1 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2476 0x14 0xf4 0xfc 0x98 LTU r26, r19, r15 + 2480 0xfe 0x6a 0xb0 0x03 0xb4 0xc1 0x00 0x02 ST r26, [sp, #-16]; ADD.NC p7, r19, #4 + 2488 0x07 0x1e 0x76 0x98 LDA r19, [p7], #4 + 2492 0x07 0x3e 0xb6 0x98 LDA r21, [p7], #12 + 2496 0x07 0xee 0x96 0x98 LDA r20, [p7], #-8 + 2500 0x07 0x07 0x76 0x98 LDA r27, [p7] + 2504 0x00 0x00 NOPX + 2506 0x00 0x00 NOPX + 2508 0x00 0x00 NOPX + 2510 0x00 0x00 NOPX + 2512 0x00 0x00 NOPX + 2514 0x00 0x00 NOPX + 2516 0x14 0xe7 0x52 0x18 SEL.EQZ r19, r19, r21, r27 + 2520 0x0f 0xd6 0x71 0x98 ST r19, [p7, #-12] + 2524 0x00 0x00 NOPX + 2526 0x00 0x00 NOPX + 2528 0x00 0x00 NOPX + 2530 0x00 0x00 NOPX + 2532 0x15 0x17 0x18 0x18 ACQ.COND r20, r17, r26 + 2536 0x10 0x23 0x2d 0x98 LSHL r17, r0, r18 + 2540 0x18 0x88 0xa0 0xf8 MOV dj0, r17 + 2544 0x00 0x07 0xce 0xc9 0x00 0x44 MOVXM p7, #509056 + 2550 0xe0 0x13 0xdf 0xd4 0x2b 0x0c LDA p1, [p7, dj0]; ST r16, [sp, #-24] + 2556 0x00 0x00 NOPX + 2558 0x00 0x00 NOPX + 2560 0x00 0x00 NOPX + 2562 0x00 0x00 NOPX + 2564 0x00 0x00 NOPX + 2566 0x00 0x00 NOPX +.no_stack_arguments + 2568 0x10 0x30 0x40 0x18 JL p1 +.delay_slot + 2572 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.delay_slot +.swstall delay_slot + 2576 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2578 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2580 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2582 0x00 0x2c 0xf0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba NOPA; NOPB; NOPM +.return_address + 2592 0xe0 0xc6 0xd0 0x40 0x0a 0x2c LDA r17, [p7]; MOVX r16, #1 + 2598 0x07 0xeb 0x51 0x18 LDA r26, [sp, #-24] + 2602 0x07 0xec 0x41 0x18 LDA dj0, [sp, #-20] + 2606 0x07 0xf0 0x29 0x18 LDA el0, [sp, #-16] + 2610 0x00 0x00 NOPX + 2612 0x00 0x00 NOPX + 2614 0x00 0x00 NOPX + 2616 0x19 0x68 0x88 0x18 ADD.NC p1, r17, #16 + 2620 0x01 0x06 0x36 0x98 LDA r17, [p1] + 2624 0x00 0x00 NOPX + 2626 0x00 0x00 NOPX + 2628 0x00 0x00 NOPX + 2630 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 2632 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.noswbrkpt + 2634 0x1e 0xa0 0x1c 0xf8 MOV r26, el0 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2638 0x14 0x55 0x08 0x18 REL.COND r17, r16, r26 + 2642 0x3e 0xc6 0xdd 0xaf 0x41 0xd4 LDA r17, [p1, #-4]; MOV r27, r15 + 2648 0x07 0x02 0x56 0x98 LDA r18, [p7, dj0] + 2652 0x00 0x00 NOPX + 2654 0x00 0x00 NOPX + 2656 0x00 0x00 NOPX + 2658 0x00 0x00 NOPX + 2660 0x00 0x00 NOPX + 2662 0x14 0x27 0x11 0x98 SUB r19, r16, r17 + 2666 0x8c 0x66 0x40 0xd2 0x10 0x24 SEL.EQZ r17, r17, r19, r27; ADD.NC p0, r18, #16 + 2672 0x00 0xc6 0xd3 0xec 0x63 0x0c LDA r17, [p0]; ST r17, [p1, #-4] + 2678 0x00 0x00 NOPX + 2680 0x00 0x00 NOPX + 2682 0x00 0x00 NOPX + 2684 0x00 0x00 NOPX + 2686 0x00 0x00 NOPX + 2688 0x00 0x00 NOPX + 2690 0x14 0x55 0x08 0x18 REL.COND r17, r16, r26 + 2694 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] + 2698 0x00 0xf6 0x36 0x98 LDA r17, [p0, #-4] + 2702 0x07 0xf7 0x99 0x18 LDA p7, [sp, #-12] + 2706 0x07 0xf9 0xf1 0x18 LDA r15, [sp, #-8] + 2710 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 + 2716 0x00 0x00 NOPX + 2718 0x00 0x00 NOPX + 2720 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 2724 0x1e 0xe0 0x1c 0xf8 MOV r27, el0 +.delay_slot + 2728 0x14 0x21 0x11 0x98 SUB r16, r16, r17 +.delay_slot + 2732 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 +.delay_slot + 2736 0x08 0xf6 0x11 0x98 ST r16, [p0, #-4] +.delay_slot +.swstall delay_slot + 2740 0x00 0x00 NOPX +.label _Z13kernelWrapperPPvjjjj__end +.label __Z13kernelWrapperPPvjjjj___func_end0 + +.text_segment PM 2752 +.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_begin0 +.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh +.function_start + 2752 0x03 0x85 0xd0 0x00 0x40 0x88 0x49 0x60 0x78 0xba LDA el0, [p0], #4; MOVX r4, #4; MOV r2, p1 + 2762 0x03 0x81 0xd0 0x3e 0x57 0xe9 0x30 0x82 0x48 0xba LDA eh0, [p0], #4; MOVX r5, #-1; ADD.NC p2, r2, #9 + 2772 0xff 0x81 0x00 0x00 0x02 0x00 0x00 0x00 0x70 0xba MOVA r1, #-4; PADDXM [sp], #64 + 2782 0x01 0x86 0x07 0xfd 0xb5 0x81 0x00 0x28 0x00 0x10 0x58 0x76 MOVA r6, #12; ST r13, [sp, #-4]; MOVX r16, #1; MOV m0, #16 + 2794 0x00 0x63 0x07 0xf9 0xd5 0xbf 0x57 0xaa 0x88 0x0f 0x58 0x76 MOVA r3, #3; ST r14, [sp, #-8]; MOVX r21, #-3; MOV r20, #15 + 2806 0xfe 0xbe 0xb0 0x60 0x02 0x5c ST r15, [sp, #-12]; MOVX r24, #0 + 2812 0x00 0x00 NOPX + 2814 0x09 0x1c 0x29 0x98 ST el0, [p1], #4 + 2818 0x09 0x1c 0x09 0x98 ST eh0, [p1], #4 + 2822 0x00 0x1c 0x2e 0x98 LDA el0, [p0], #4 + 2826 0x00 0x1c 0x0e 0x98 LDA eh0, [p0], #4 + 2830 0x00 0x00 NOPX + 2832 0x00 0x00 NOPX + 2834 0x00 0x00 NOPX + 2836 0x00 0x00 NOPX + 2838 0x00 0x00 NOPX + 2840 0x09 0x1c 0x29 0x98 ST el0, [p1], #4 + 2844 0x09 0x1c 0x09 0x98 ST eh0, [p1], #4 + 2848 0x00 0x1c 0x2e 0x98 LDA el0, [p0], #4 + 2852 0x00 0x1c 0x0e 0x98 LDA eh0, [p0], #4 + 2856 0x00 0x00 NOPX + 2858 0x00 0x00 NOPX + 2860 0x00 0x00 NOPX + 2862 0x00 0x00 NOPX + 2864 0x00 0x00 NOPX + 2866 0x09 0x1c 0x29 0x98 ST el0, [p1], #4 + 2870 0x09 0x1c 0x09 0x98 ST eh0, [p1], #4 + 2874 0x00 0x04 0x0e 0x98 LDA eh0, [p0] + 2878 0x00 0x14 0x2e 0x98 LDA el0, [p0, #4] + 2882 0x00 0x00 NOPX + 2884 0x00 0x00 NOPX + 2886 0x00 0x00 NOPX + 2888 0x00 0x00 NOPX + 2890 0x00 0x00 NOPX + 2892 0x09 0x04 0x09 0x98 ST eh0, [p1] + 2896 0x09 0x14 0x29 0x98 ST el0, [p1, #4] + 2900 0x02 0xdd 0xaa 0x98 LDA.u8 r13, [p2], #-3 + 2904 0x02 0x1e 0x2a 0x98 LDA.u8 r17, [p2], #1 + 2908 0x02 0xbd 0xca 0x98 LDA.u8 r14, [p2], #-5 + 2912 0x02 0xfd 0xfa 0x98 LDA.u16 r15, [p2], #-2 + 2916 0x02 0x0a 0x6a 0x98 LDA.u8 r19, [p2], m0 + 2920 0x02 0xac 0xea 0x98 LDA.u8 r7, [p2], #-6 + 2924 0x00 0x00 NOPX + 2926 0x13 0x42 0x1d 0x98 LSHL r1, r13, r1 + 2930 0x0c 0x20 0xf9 0x31 0x01 0x24 EQ r16, r1, r16; ADD.NC r18, r17, #1 + 2936 0x14 0xa4 0x5d 0x98 LSHL r18, r18, r5 + 2940 0x13 0xf6 0x47 0x98 EQ r27, r15, r4 + 2944 0xc1 0x4a 0x40 0xb7 0x39 0xe4 SEL.EQZ r5, r24, r5, r27; MOV eh0, r27 + 2950 0x14 0x7b 0x22 0x18 SEL.EQZ r29, r17, r18, r27 + 2954 0x11 0xcc 0x67 0x98 EQ r6, r7, r6 + 2958 0x11 0xb7 0x04 0x98 AND r27, r6, r16 + 2962 0x7b 0xeb 0xbc 0xbb 0x41 0xe4 LSHL r15, r15, r21; MOV r25, r27 + 2968 0xfd 0xbe 0xb3 0x9b 0x04 0x5c ST r15, [sp, #-20]; SEL.EQZ r6, r7, r24, r27 + 2974 0xc8 0x05 0xf8 0x40 0x01 0x84 JNZ r25, #3056 +.delay_slot + 2980 0x11 0xb6 0x47 0x98 EQ r27, r6, r4 +.delay_slot + 2984 0x13 0x71 0x44 0x98 AND r24, r13, r20 +.delay_slot + 2988 0x14 0xfc 0x5d 0x98 LSHL r30, r19, r5 +.delay_slot + 2992 0x16 0xe8 0x4d 0x98 LSHL r20, r27, r4 +.delay_slot + 2996 0x11 0x8c 0x32 0x18 SEL.EQZ r6, r6, r3, r27 + 3000 0xd8 0x05 0xf8 0x40 0x01 0x84 JNZ r27, #3056 +.delay_slot +.swstall delay_slot + 3006 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3008 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3010 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3012 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3014 0x00 0x00 NOPX + 3016 0x00 0x2f 0x00 0x00 0x01 0x84 0x00 0x00 0x20 0xba MOVA r15, #1; J #3104 +.delay_slot + 3026 0x00 0x1a 0x00 0x3e 0x57 0xab 0x88 0x0c 0x58 0xba MOVA r26, #0; MOVX r5, #-3; MOV r28, #12 +.delay_slot + 3036 0x05 0x42 0x21 0x20 0x41 0x64 MOVX r21, #4; MOV r2, #16 +.delay_slot + 3042 0x10 0x1a 0x0d 0x18 MOVX r13, #3 +.delay_slot + 3046 0x10 0x0e 0x3d 0x18 MOVX r7, #15 +.delay_slot + 3050 0x00 0x2c 0xff 0x91 0xe2 0x2c NOPA; MOVX r4, #-4 +.label __ll6__Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh + 3056 0x02 0x02 0x81 0x82 0x0b 0x01 0x50 0x88 0x8f 0xfc 0x58 0x76 MOVA dj0, #16; MOVS p1, r2; MOVX r21, #4; MOV r4, #-4 + 3068 0x20 0x18 0xe0 0x01 0xa0 0x0b 0x88 0x0c 0x58 0xba ST.s8 r6, [p1, dj0]; MOVX r26, #0; MOV r28, #12 + 3078 0x02 0x02 0x00 0x3e 0x57 0xa9 0xe8 0x01 0x58 0xba MOVA r2, #16; MOVX r5, #-3; MOV r15, #1 + 3088 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x71 0xe9 0xa8 0x03 0x58 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVX r7, #15; MOV r13, #3; NOPV +.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_352 + 3104 0x5d 0xc5 0x50 0x1b 0xb3 0x3c 0x00 0x3c 0x58 0xba LDA.u8 r17, [p2], #-2; EQ r27, r13, r6; MOV m0, #60 + 3114 0x41 0x05 0x50 0x03 0x2d 0x12 0x87 0xcd 0x58 0xba LDA.u8 r1, [p2], m0; SEL.EQZ r18, r1, r26, r27; MOV m5, #-51 + 3124 0x00 0x57 0x00 0x3b 0xda 0x91 0x80 0x37 0x58 0xba MOVA r23, #2; SEL.EQZ r29, r29, r21, r27; MOV m3, #55 + 3134 0x01 0x03 0x00 0x2b 0xb0 0x3d 0x07 0xbc 0x58 0xba MOVA r3, #8; EQ r27, r21, r0; MOV m2, #-68 + 3144 0x40 0x10 0x00 0x1f 0x6c 0x6c 0x80 0x70 0x58 0xba MOVA r16, #512; LSHL r22, r15, r24; MOV m1, #112 + 3154 0xb5 0x92 0x08 0x1e 0x5d 0x64 EXTEND.u8 r22, r22; MOV m4, #-105 + 3160 0xfe 0x5a 0xb0 0x2d 0x61 0x6f 0x80 0x31 0x59 0x3a ST r22, [sp, #-16]; LSHL r22, r22, r2; MOV m7, #49 + 3170 0xf7 0xba 0x3c 0x1f 0x05 0x64 SUB r30, r30, r29; MOV m6, #-63 + 3176 0x13 0xc2 0x11 0x98 SUB r1, r15, r1 + 3180 0x8f 0xc3 0xf0 0xa0 0x1d 0x64 MUL r31, r17, r1; MOV r1, #7 + 3186 0x16 0xa3 0x21 0x98 SUB r17, r26, r18 + 3190 0x17 0xfe 0x1d 0x98 LSHL r31, r31, r1 + 3194 0x55 0x7e 0x30 0x3b 0xf1 0xee 0x80 0x57 0x59 0x3a ST r31, [p2], m5; LSHL r31, r29, r3; MOV m5, #87 + 3204 0x4d 0x55 0x50 0x2f 0x30 0x3d 0x87 0xb2 0x58 0xba LDA.u8 r21, [p2], m3; EQ r19, r23, r0; MOV m3, #-78 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3214 0xfd 0x4e 0xb9 0xcc 0x7b 0x5c ST r19, [sp, #-24]; LSHL r19, r19, r3 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3220 0x49 0x54 0xe0 0x3f 0x6b 0x2d 0x00 0xf6 0x58 0xba ST.s8 r21, [p2], m2; OR r22, r31, r22; MOV m2, #246 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3230 0x45 0x43 0x50 0x27 0x38 0x10 0x87 0x50 0x58 0xba LDA.u16 r16, [p2], m1; SEL.EQZ r19, r19, r16, r27; MOV m1, #-176 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3240 0x15 0xfe 0x67 0x98 EQ r31, r23, r6 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3244 0x10 0xe0 0x67 0x98 EQ r16, r3, r6 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3248 0x17 0xf7 0x05 0x98 OR r27, r31, r16 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3252 0x11 0xeb 0x54 0x98 AND r21, r7, r21 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3256 0xf7 0xa3 0xd8 0xa0 0x61 0x64 ASHL r30, r30, r17; MOV r17, #24 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3262 0xfc 0x42 0xb0 0x1f 0x29 0x6f 0xcf 0x80 0x49 0x3a ST r16, [sp, #-32]; LSHL r18, r15, r18; ADD.NC r30, r30, #1 + 3272 0x43 0xea 0x3f 0x46 0x3b 0x5c ST r26, [p2], #4; LSHL r17, r30, r17 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 3278 0x51 0x6a 0x30 0x02 0x00 0xa8 0x50 0x02 ST r26, [p2], m4; MOV m4, #168 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 3286 0x5d 0x49 0x57 0xe7 0xf5 0xa7 0xb0 0x2c 0x0d 0xce 0x78 0x76 LDA.u8 r18, [p2], m7; ST r31, [sp, #-28]; OR r27, r19, r0; MOV el0, r27 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3298 0x16 0xe3 0x15 0x98 OR r17, r27, r17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3302 0x16 0xb7 0x81 0x98 SUB r27, r26, r24 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3306 0x14 0xb0 0x90 0x18 EXTEND.u8 r24, r18 + 3310 0x00 0x00 NOPX + 3312 0x00 0x00 NOPX + 3314 0x00 0x00 NOPX + 3316 0x13 0xe5 0x21 0x98 SUB r18, r15, r18 + 3320 0x0a 0xca 0x51 0x98 ST r18, [p2], m6 + 3324 0x02 0xaa 0x4a 0x98 LDA.u8 r18, [p2], m5 + 3328 0x00 0x00 NOPX + 3330 0x00 0x00 NOPX + 3332 0x00 0x00 NOPX + 3334 0x00 0x00 NOPX + 3336 0x00 0x00 NOPX + 3338 0x00 0x00 NOPX + 3340 0x14 0xa4 0xe1 0x98 SUB r18, r18, r14 + 3344 0x14 0xa5 0xbe 0x98 ASHL r18, r18, r27 + 3348 0x14 0xa4 0x2d 0x98 LSHL r18, r18, r2 + 3352 0x00 0x01 0x0d 0xa0 0x00 0x44 MOVXM r27, #65536 + 3358 0x16 0xe5 0x20 0x98 ADD r18, r27, r18 + 3362 0x00 0xff 0x0d 0xa0 0x00 0x44 MOVXM r27, #16711680 + 3368 0xde 0xe4 0x99 0x3f 0xc1 0x64 AND r27, r27, r18; MOV r18, #-16 + 3374 0xde 0xe2 0xb8 0xbf 0xe1 0x64 OR r27, r27, r17; MOV r17, #-8 + 3380 0x43 0xee 0x39 0xce 0x3b 0x5c ST r27, [p2], #4; LSHL r19, r19, r17 + 3386 0x16 0xb5 0x31 0x98 SUB r26, r26, r19 + 3390 0x15 0x29 0xad 0x98 LSHL r20, r20, r26 + 3394 0x13 0xb5 0x65 0x98 OR r26, r14, r22 + 3398 0x4d 0x6a 0x3f 0x69 0x20 0x5c ST r26, [p2], m3; EXTEND.u8 r26, r30 + 3404 0x49 0x65 0x50 0x37 0x49 0x6f 0xce 0xa8 0xa8 0xba LDA.u8 r25, [p2], m2; LSHL r20, r27, r18; ADD.NC r30, r26, r20 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 3414 0xb5 0xa3 0xb8 0xa3 0xf9 0x64 LSHL r22, r22, r17; MOV r17, #254 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 3420 0x45 0x64 0xed 0x6b 0x1f 0x2c ST.s8 r25, [p2], m1; MUL r26, r26, r24 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3426 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3428 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3430 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3432 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3434 0x86 0x5f 0xbd 0xb5 0xca 0xa4 LSHL r25, r16, r15; ADD.NC r27, r21, r25 + 3440 0xf8 0x06 0xf8 0x40 0x01 0x84 JNZ r31, #3568 +.delay_slot + 3446 0x9d 0x41 0xed 0xbb 0xf2 0xa4 ADD r21, r19, #3; ADD.NC r27, r27, r30 +.delay_slot + 3452 0x16 0xeb 0x5d 0x98 LSHL r21, r27, r21 +.delay_slot + 3456 0x16 0x63 0x14 0x98 AND r17, r25, r17 +.delay_slot + 3460 0x51 0x46 0x30 0x0d 0xbe 0x3e 0x28 0x01 0x59 0x3a ST r17, [p2], m4; EQ r27, r6, r28; MOV r17, #1 +.delay_slot + 3470 0x18 0x9b 0x9c 0xf8 MOV el1, r27 + 3474 0x07 0xe3 0x91 0x18 LDA r28, [sp, #-32] + 3478 0x00 0x00 NOPX + 3480 0x00 0x00 NOPX + 3482 0x00 0x00 NOPX + 3484 0x00 0x00 NOPX + 3486 0x00 0x00 NOPX + 3488 0x00 0x00 NOPX + 3490 0xe0 0x06 0xf8 0x40 0x01 0x84 JNZ r28, #3568 +.delay_slot +.swstall delay_slot + 3496 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3498 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3500 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3502 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3504 0x00 0x00 NOPX + 3506 0x00 0xbc 0x00 0x01 0x10 0x8b 0x28 0x40 0x58 0xba MOVA r28, #5; MOVX r17, #4; MOV r25, #64 + 3516 0x14 0x7e 0xd2 0x18 SEL.EQZ r31, r17, r13, r27 + 3520 0x16 0x76 0x67 0x98 EQ r27, r25, r6 + 3524 0xff 0x38 0x4f 0xa0 0x01 0x64 SEL.EQZ r28, r31, r28, r27; MOV r31, #0 + 3530 0x10 0x32 0x50 0x18 EXTEND.s8 r25, r0 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 3534 0x16 0x7d 0xef 0x98 MUL r30, r25, r30 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 3538 0xce 0xe3 0x5d 0xc4 0x39 0xe4 LT r27, r25, r17; MOV r27, el1 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3544 0x13 0xe3 0x82 0x18 SEL.EQZ r17, r15, r24, r27 + 3548 0x14 0x63 0xef 0x98 MUL r17, r17, r30 + 3552 0x17 0xf9 0xc1 0x98 SUB r28, r31, r28 + 3556 0x14 0x63 0xce 0x98 ASHL r17, r17, r28 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 3560 0x00 0x23 0x14 0x81 0x00 0x00 0x1c 0x22 EXTEND.u8 r17, r17; NOPV +.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_816 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 3568 0x00 0x19 0x00 0x3f 0xc7 0xeb 0x70 0x0e 0x78 0xba MOVA r25, #0; MOVX r28, #-1; MOV r27, el0 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3578 0x16 0x7f 0xc2 0x18 SEL.EQZ r31, r25, r28, r27 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 3582 0xfd 0x6e 0x20 0x21 0x04 0x83 0x4f 0x74 0xa8 0xba LDA r27, [sp, #-24]; EXTEND.u8 r16, r16; ADD.NC r26, r29, r26 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3592 0x09 0x1e 0x00 0x29 0x44 0x83 0xa8 0x09 0x58 0xba MOVA r30, #72; EXTEND.u8 r20, r20; MOV r29, #9 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3602 0x17 0x73 0xe2 0x18 SEL.EQZ r25, r29, r30, r27 + 3606 0x15 0xf9 0x88 0x98 NE r28, r23, r24 +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 3610 0x17 0x7b 0x3d 0x98 LSHL r29, r29, r19 +.aggressive_scheduled_block_id 6 +.noswbrkpt + 3614 0xfd 0xde 0x20 0x00 0x00 0x03 0x0a 0x04 0x10 0xba LDA r23, [sp, #-20]; MOVXM r24, #1032 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 3624 0xcc 0xe7 0xbf 0x3a 0xff 0x24 LSHL r19, r25, r19; ADD.NC r30, r26, #-1 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 3630 0x14 0xcf 0xe6 0x18 MAC r7, r7, r19, r30 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 3634 0xb4 0xd2 0x0b 0xa8 0x29 0x64 EXTEND.u8 r19, r22; MOV r23, #522 +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3640 0xbd 0xb0 0x4d 0x21 0x01 0x64 SEL.EQZ r22, r23, r24, r27; MOV r26, #64 + 3646 0x31 0xb5 0x1d 0xc2 0x39 0xe4 NE r6, r6, r26; MOV r27, eh0 + 3652 0x11 0xcf 0x24 0x98 AND r7, r7, r18 + 3656 0xbd 0xde 0x4d 0xa6 0x41 0xe4 SEL.EQZ r23, r23, r15, r27; MOV r27, r6 + 3662 0x29 0x08 0x49 0x20 0x7d 0x64 SEL.EQZ r4, r5, r4, r27; MOV r18, #31 + 3668 0x15 0xef 0x24 0x98 AND r23, r23, r18 + 3672 0xdc 0x1e 0x00 0x20 0x42 0x6e 0x4f 0x70 0x58 0xba MOVA r30, #-288; LSHL r4, r16, r4; MOV r18, #-144 + 3682 0x17 0xbd 0x22 0x18 SEL.EQZ r30, r30, r18, r27 + 3686 0x12 0x05 0x00 0x2f 0xa9 0xfe 0x09 0x20 0x58 0xba MOVA r5, #144; MUL r26, r23, r19; MOV r16, #288 + 3696 0x14 0x20 0x52 0x18 SEL.EQZ r16, r16, r5, r27 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 3700 0x8e 0x09 0xfd 0xbd 0xf2 0xa4 MUL r24, r17, r4; ADD.NC r27, r29, r30 +.aggressive_scheduled_block_id 7 +.noswbrkpt + 3706 0x84 0x3f 0xbd 0xc4 0x39 0xe4 LSHL r16, r16, r31; MOV r27, el1 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3712 0xfb 0xee 0xb7 0x6b 0x5f 0x5c ST r27, [sp, #-36]; MUL r26, r14, r26 + 3718 0x10 0x84 0x32 0x18 SEL.EQZ r2, r2, r3, r27 + 3722 0x13 0x7e 0x0c 0x98 LTU r31, r13, r0 + 3726 0x15 0x31 0x8f 0x98 MUL r24, r20, r24 + 3730 0x17 0xf7 0xc5 0x98 OR r27, r31, r28 + 3734 0x10 0x03 0x07 0xee 0x95 0xb7 0xc0 0xee 0x89 0x00 0x58 0x76 MOVA r3, #128; ST r20, [sp, #-20]; LSHL r28, r27, r1; MOV r20, #256 + 3746 0x1d 0x28 0x40 0xb7 0x39 0xe4 SEL.EQZ r20, r3, r20, r27; MOV eh0, r27 + 3752 0x00 0x00 0x0f 0xac 0x0c 0x44 MOVXM r31, #1542 + 3758 0xfd 0x12 0xb0 0x1f 0xb0 0x3c 0x89 0x3f 0xc9 0x3a ST r4, [sp, #-24]; EQ r27, r15, r0; ADD.NC r4, r4, #-1 + 3768 0xed 0x8c 0x82 0x1c 0x91 0xad 0xff 0x92 0xcc 0x7f 0xc8 0x76 MOVA m3, #-148; ST r4, [p2], #4; SEL.EQZ r31, r22, r31, r27; ADD.NC r22, r17, #-1 + 3780 0x4d 0x5a 0x30 0x2b 0x57 0xef 0x70 0x8e 0x79 0x3a ST r22, [p2], m3; LSHL r21, r21, r15; MOV r27, eh0 + 3790 0x02 0xd9 0x02 0x1f 0x51 0xa9 0x4e 0x0e 0x00 0x58 0x58 0x76 MOVA r25, #22; ST r26, [p2], #4; SUB r20, r20, r28; MOV m4, #88 +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 3802 0x51 0x62 0x3f 0xe3 0x24 0x5c ST r24, [p2], m4; SEL.EQZ r24, r31, r25, r27 +.aggressive_scheduled_block_id 8 +.noswbrkpt + 3808 0xfc 0x6e 0x22 0xef 0x91 0xab 0xce 0x0f 0x69 0x90 0x78 0x76 LDA r27, [sp, #-32]; ST r28, [p2], #-8; SUB r28, r21, r28; MOV r27, r6 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3820 0x22 0xf3 0x00 0x29 0xce 0x12 0x8c 0xff 0xc8 0xba MOVA r19, #279; SEL.EQZ r28, r20, r28, r27; ADD.NC r20, r19, #-1 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3830 0xf7 0x1d 0x00 0x3b 0xea 0x73 0x70 0x8e 0x78 0xba MOVA r29, #-72; MSC r30, r30, r29, r20; MOV r27, eh0 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3840 0xfc 0xee 0x2e 0xca 0x44 0x2c LDA r27, [sp, #-28]; SEL.EQZ r18, r29, r18, r27 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3846 0x04 0x1f 0x00 0x3f 0x39 0x93 0x69 0x90 0x78 0xba MOVA r31, #32; SEL.EQZ r19, r31, r19, r27; MOV r27, r6 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3856 0x17 0xc4 0x22 0x18 SEL.EQZ r2, r31, r2, r27 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3860 0x10 0xeb 0x51 0x98 SUB r21, r3, r21 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3864 0x47 0x8e 0x30 0x04 0x27 0x90 0x6f 0xc0 0x59 0x3a ST r3, [p2], #12; SEL.EQZ r2, r2, r15, r27; MOV r3, #-64 +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3874 0x17 0x38 0x32 0x18 SEL.EQZ r28, r28, r3, r27 +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id first + 3878 0x15 0xfe 0x7f 0x98 MUL r31, r23, r7 +.aggressive_scheduled_block_id 9 +.noswbrkpt + 3882 0xfb 0xc6 0x2e 0x0c 0x64 0x2c LDA r17, [sp, #-36]; SEL.EQZ r3, r28, r3, r27 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3888 0x5d 0x8e 0x30 0x35 0x29 0x7c 0x80 0x28 0x59 0x3a ST r3, [p2], #-8; MUL r18, r26, r18; MOV m1, #40 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3898 0x45 0x56 0x31 0x0d 0xe4 0x5c ST r21, [p2], m1; SEL.EQZ r3, r2, r15, r27 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3904 0xfe 0x02 0x20 0x06 0x38 0xfe 0xa9 0xfc 0xa8 0xba LDA r0, [sp, #-16]; MUL r3, r3, r17; ADD.NC r21, r7, r30 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3914 0xfd 0xc6 0x22 0x1f 0x11 0x80 0x05 0x06 0x06 0x7a LDA r17, [sp, #-20]; ST r24, [p2], #4; MAC r3, r3, r20, r0 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3924 0x4f 0xce 0x30 0x00 0x00 0x3e 0x6f 0xf8 0x11 0x3a ST r19, [p2], #28; MOVXM r19, #65520 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3934 0x43 0xd2 0x30 0x3f 0x49 0xa4 0x4b 0xbf 0xc9 0x3a ST r20, [p2], #4; AND r20, r31, r19; ADD.NC r2, r14, #-1 +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3944 0x43 0xc6 0x31 0x56 0x9c 0x5c ST r17, [p2], #4; MSC r21, r21, r2, r20 + 3950 0x43 0x8a 0x3f 0x7a 0x81 0x5c ST r2, [p2], #4; ADD r30, r30, r20 +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id first + 3956 0x43 0xfa 0x38 0x73 0xe3 0x5c ST r30, [p2], #4; SUB r28, r16, r31 +.aggressive_scheduled_block_id 10 +.noswbrkpt + 3962 0x43 0xd6 0x30 0x2d 0xf8 0x30 0x60 0x00 0x59 0x3a ST r21, [p2], #4; MAC r31, r31, r22, r16; MOV dc0, #0 +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3972 0x43 0x8c 0x30 0x3e 0x20 0x7e 0x2c 0x7f 0xc9 0x3a ST dc0, [p2], #4; MUL r2, r31, r0; ADD.NC r17, r17, #-1 + 3982 0x43 0x8c 0x38 0xb8 0x0c 0x5c ST dc0, [p2], #4; MAC r14, r14, r17, r0 + 3988 0x43 0xda 0x30 0x27 0x01 0x24 0x48 0x00 0x59 0x3a ST r22, [p2], #4; AND r16, r19, r2; MOV r2, #0 + 3998 0x43 0xf2 0x30 0x05 0x1f 0x8f 0x70 0x0e 0x79 0x3a ST r28, [p2], #4; SUB r17, r2, r31; MOV r27, el0 + 4008 0x43 0x92 0x3b 0xb9 0xdf 0x5c ST r4, [p2], #4; MUL r14, r23, r14 + 4014 0x43 0xc6 0x30 0x21 0x0f 0x8c 0x08 0x06 0x59 0x3a ST r17, [p2], #4; SUB r16, r16, r31; MOV r0, #6 +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id first + 4024 0x09 0x00 0x02 0x1e 0x11 0x9b 0x00 0x13 0x69 0x90 0x78 0x76 MOVA r0, #72; ST r16, [p2], #4; SEL.EQZ r16, r13, r0, r27; MOV r27, r6 +.aggressive_scheduled_block_id 11 +.noswbrkpt + 4036 0xfd 0x16 0x20 0x14 0xa4 0x2c LDA r5, [sp, #-24]; SEL.EQZ r5, r0, r5, r27 +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4042 0x11 0x63 0xaf 0x98 MUL r17, r5, r26 + 4046 0x43 0x8c 0x30 0x07 0x08 0x6d 0x07 0xc8 0x59 0x3a ST dc0, [p2], #4; LSHL r16, r3, r16; MOV m2, #-56 + 4056 0xfc 0x63 0x02 0x48 0x61 0xa0 0xf7 0xed 0xa8 0xc1 0xc8 0x76 MOVA r3, #-29; ST dc0, [p2], m2; LSHL r15, r16, r15; ADD.NC r13, r3, #7 + 4068 0x41 0x0a 0x36 0xc0 0x7b 0x5c ST r2, [p2], m0; LSHL r16, r13, r3 + 4074 0x43 0xda 0x38 0x8e 0x41 0x5c ST r22, [p2], #4; ADD r3, r17, r18 + 4080 0x43 0xca 0x38 0xc8 0x9c 0x5c ST r18, [p2], #4; MSC r18, r18, r17, r4 + 4086 0x43 0x92 0x32 0x94 0xdb 0x5c ST r4, [p2], #4; LSHL r5, r5, r6 + 4092 0x43 0x8e 0x30 0x1a 0x38 0x04 0x0f 0xfd 0x59 0x3a ST r3, [p2], #4; ADD r3, r13, r16; MOV r0, #-3 + 4102 0x10 0xc0 0x0e 0x98 ASHL r0, r3, r0 + 4106 0x43 0xca 0x37 0x10 0x1f 0x5c ST r18, [p2], #4; MUL r4, r14, r0 + 4112 0x43 0x8c 0x30 0x0c 0x3b 0x5c ST dc0, [p2], #4; LSHL r3, r0, r1 + 4118 0xff 0xb6 0x22 0x1c 0x61 0x80 0x03 0xc6 0x31 0xfa LDA r13, [sp, #-4]; ST dc0, [p2], #4; SUB r3, r15, r3 + 4128 0xff 0x3a 0x22 0x1c 0x91 0xba 0x70 0x30 0x28 0x3f 0xc8 0x76 LDA r14, [sp, #-8]; ST r4, [p2], #4; MAC r7, r7, r29, r0; ADD.NC r1, r0, #-1 + 4140 0xfe 0xbe 0x22 0x1c 0x31 0x80 0x01 0x41 0xaf 0xfa LDA r15, [sp, #-12]; ST r1, [p2], #4; MUL r0, r5, r26 + 4150 0x43 0x8e 0x30 0x50 0x00 0x5c ST r3, [p2], #4; RET lr +.delay_slot + 4156 0x0a 0x5c 0xf1 0x98 ST r7, [p2], #20 +.delay_slot + 4160 0x0a 0x1c 0x11 0x98 ST r0, [p2], #4 +.delay_slot + 4164 0x0a 0x1c 0x51 0x98 ST r2, [p2], #4 +.delay_slot + 4168 0x0a 0x04 0x51 0x98 ST r2, [p2] +.delay_slot + 4172 0x42 0x8a 0x30 0x3f 0xfe 0x00 0x00 0x00 0x71 0x3a ST r2, [p2, #4]; PADDXM [sp], #-64 +.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh__end +.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_end0 + +.text_segment PM 4192 +.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_begin0 +.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams +.function_start + 4192 0x00 0x03 0x82 0x84 0x8b 0x01 0x80 0x08 0x0a 0x60 0x78 0x76 MOVA dc0, #0; MOVS p2, p1; MOVX r24, #0; MOV r0, p2 + 4204 0x00 0x06 0x88 0x28 0x28 0x34 0x01 0x36 0x00 0x21 0x20 0x09 0x60 0x7e MOVA dj1, #0; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc1, dc0; MOVX r26, #0; ADD.NC p3, r0, #4 + 4218 0x63 0x94 0xd0 0x00 0x01 0xf2 0x32 0x34 0x10 0xba LDA dn1, [p3], #4; MOVXM p4, #509032 + 4228 0x63 0x90 0xd0 0x00 0x00 0x04 0x78 0x78 0x10 0xba LDA m1, [p3], #4; MOVXM ls, #4336 + 4238 0x60 0x80 0xd0 0x00 0x00 0x05 0xb8 0x90 0x10 0xba LDA m0, [p3]; MOVXM le, #4384 + 4248 0x7a 0x82 0xd1 0x00 0x01 0x54 LDA r0, [p3, #-12]; MOV dj0, #0 + 4254 0x04 0x04 0x22 0x98 LDA.s8 r1, [p4] + 4258 0x00 0x00 NOPX + 4260 0x00 0x00 NOPX + 4262 0x00 0x0a 0x80 0x85 0x01 0xf4 VLDB.POP.512 x1, [p0, lf0, r24]; MOV dn0, dn1 + 4268 0x3e 0x30 0x14 0x18 VLDB.POP.512.2D x0, [p0, lf0, r24, d1] +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4272 0x3c 0x14 0x14 0x18 VLDB.FILL.512 [p0, lf0, r24] +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4276 0x00 0x0a 0x8a 0xe0 0xfd 0x34 VLDB.POP.512 x1, [p0, lf0, r24]; ADD.NC lc, r0, #-3 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4282 0xc6 0x02 0x80 0xf5 0x00 0x1c VLDB.POP.512.2D x0, [p0, lf0, r24, d1]; MOVX crRnd, r1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4288 0x3c 0x14 0x14 0x18 VLDB.FILL.512 [p0, lf0, r24] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4292 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4294 0x00 0x2c 0xf0 0x00 0x54 0x00 0x01 0xa5 0x7e 0xba NOPA; VLDB.POP.512 x1, [p0, lf0, r24]; NOPM +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4304 0x00 0x2c 0xfc 0x60 0x28 0x01 0x5b 0x00 0x00 0x00 0x01 0xc5 0x78 0x00 0x00 0xe1 NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];NOPS; NOPX; VCONV.fp32.bf16 cml0, x1; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4320 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x40 0xc5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV +.label ZLS_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_144 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4336 0x00 0x2c 0xf8 0x28 0x28 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB.FILL.512 [p0, lf0, r24]; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4352 0x00 0x2c 0xf0 0x00 0xad 0x80 0x03 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB.POP.512 x1, [p0, lf0, r24];VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4368 0x00 0x2c 0xfc 0x60 0x29 0x00 0x03 0x00 0x00 0x00 0x01 0xc5 0x78 0x00 0x00 0xe1 NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];VST.FLUSH.512.CONV [p2, sf, r26];NOPX; VCONV.fp32.bf16 cml0, x1; NOPV +.label ZLE_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_192 +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4384 0x00 0x2c 0xf0 0x00 0x23 0x00 0x03 0x00 0x00 0x00 0x40 0xc5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST.FLUSH.512.CONV.2D [p2, sf, r26, d0];NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV +.loop_nesting 0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4400 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4402 0x0d 0x80 0x03 0x18 VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4406 0x20 0x00 0x60 0x00 0x01 0xc5 0x70 0x02 VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4414 0x18 0x81 0x8a 0xf8 VCONV.fp32.bf16 cmh0, x0 + 4418 0x0b 0x00 0x03 0x18 VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] + 4422 0xb0 0x00 0x60 0x00 0x01 0xc5 0x70 0x02 VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1 + 4430 0x20 0x00 0x60 0x00 0x40 0xc5 0x70 0x02 VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cmh0, x0 + 4438 0x0b 0x00 0x03 0x18 VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] + 4442 0xb0 0x00 0x60 0x50 0x00 0x5c VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];RET lr +.delay_slot + 4448 0x09 0x00 0x03 0x18 VST.FLUSH.512.CONV [p2, sf, r26] +.delay_slot + 4452 0x0b 0x00 0x03 0x18 VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] +.delay_slot +.swstall delay_slot + 4456 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4458 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4460 0x00 0x00 NOPX +.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams__end +.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_end0 + +.text_segment PM 4464 +.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_begin0 +.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params +.function_start + 4464 0xf5 0xe0 0x86 0x3f 0x20 0x00 0x80 0x00 0x00 0x0e 0x91 0x11 0x60 0x7e MOVA m0, #-81; PADDB [p3], #64; MOVS p4, p2; PADDXM [sp], #128 + 4478 0x00 0x73 0x07 0xf1 0x95 0xbf 0xc5 0x0a 0x2b 0x60 0x78 0x76 MOVA r19, #3; ST r12, [sp, #-16]; MOVX r28, #-24; MOV r17, p3 + 4490 0x00 0x19 0x07 0xda 0x35 0x81 0x10 0x29 0x34 0x47 0x08 0x76 MOVA r25, #0; ST r17, [sp, #-40]; MOVX r17, #1; ADD.NC p2, r17, #28 + 4502 0x40 0xca 0xd7 0xf5 0x35 0x80 0x40 0x03 0xa8 0x00 0x10 0x76 LDA r18, [p2]; ST r9, [sp, #-12]; MOVXM r29, #16777216 + 4514 0x0b 0x18 0x87 0xfd 0xd5 0x80 0x7f 0xff 0xef 0xff 0x90 0x76 MOVA m6, #88; ST r14, [sp, #-4]; MOVXM r31, #33554431 + 4526 0x00 0xb4 0x07 0xe1 0xb5 0x81 0x61 0x0a 0x07 0xec 0x58 0x76 MOVA r20, #5; ST r13, [sp, #-32]; MOVX r22, #8; MOV m4, #-20 + 4538 0x01 0x95 0x07 0xed 0xf5 0x87 0x77 0xca 0x87 0xc4 0x58 0x76 MOVA r21, #12; ST r15, [sp, #-20]; MOVX r23, #254; MOV m5, #-60 + 4550 0xff 0x73 0xb0 0x03 0x80 0x40 0x50 0x02 ST p7, [sp, #-8]; MOV m7, #64 + 4558 0x0f 0xe4 0x3d 0x98 ST lr, [sp, #-28] + 4562 0x00 0x00 NOPX + 4564 0x17 0x59 0x20 0x98 ADD r12, r29, r18 + 4568 0x41 0x32 0x36 0x77 0x9b 0x5c ST r12, [p2], m0; LSHL r29, r12, r28 + 4574 0x5b 0xf9 0x5e 0xf2 0x2f 0x2c LDA.u8 r30, [p2], #-3; EQ r28, r29, r17 + 4580 0x02 0xc9 0x2a 0x98 LDA.u8 r9, [p2], m6 + 4584 0x00 0x00 NOPX + 4586 0x00 0x00 NOPX + 4588 0x00 0x00 NOPX + 4590 0x00 0x00 NOPX + 4592 0x00 0x00 NOPX + 4594 0x17 0x77 0xec 0x98 LTU r27, r29, r30 + 4598 0x16 0x5d 0x32 0x18 SEL.EQZ r14, r25, r19, r27 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4602 0x17 0xf6 0xcc 0x98 LTU r27, r31, r12 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4606 0x51 0x70 0xee 0xb7 0xcf 0x2c ST.s8 r28, [p2], m4; EQ r13, r29, r30 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4612 0x13 0x7f 0x1d 0x98 LSHL r31, r13, r17 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4616 0x16 0x58 0xe2 0x18 SEL.EQZ r12, r25, r14, r27 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4620 0x17 0xf9 0xc5 0x98 OR r28, r31, r28 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4624 0x8e 0xfd 0x9e 0x3c 0x62 0xa4 LTU r27, r17, r30; ADD.NC r28, r28, r12 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4630 0x16 0x79 0xc2 0x18 SEL.EQZ r28, r25, r28, r27 + 4634 0x14 0x7f 0xcc 0x98 LTU r31, r17, r28 + 4638 0x55 0x7e 0x3e 0xf7 0xd1 0x5c ST r31, [p2], m5; NE r29, r29, r30 + 4644 0x5d 0x79 0x54 0xb2 0x31 0x2c LDA.u8 r30, [p2], m7; NE r12, r9, r17 + 4650 0x00 0x00 NOPX + 4652 0x00 0x00 NOPX + 4654 0x00 0x00 NOPX + 4656 0x00 0x00 NOPX + 4658 0x00 0x00 NOPX + 4660 0x00 0x00 NOPX + 4662 0xf5 0xad 0x1f 0xbe 0xfc 0x24 NE r22, r30, r22; ADD.NC r31, r30, #-4 + 4668 0x60 0x09 0x40 0x40 0x01 0x84 JNZ r12, #4736 +.delay_slot + 4674 0x17 0x93 0x48 0x98 NE r9, r30, r20 +.delay_slot + 4678 0x17 0xfe 0x90 0x18 EXTEND.u8 r31, r31 +.delay_slot + 4682 0x12 0x6d 0x64 0x98 AND r22, r9, r22 +.delay_slot + 4686 0x17 0xef 0x7c 0x98 LTU r23, r31, r23 +.delay_slot + 4690 0x15 0xe1 0x64 0x98 AND r16, r23, r22 + 4694 0xe8 0x09 0x40 0x40 0x01 0x84 JNZ r29, #4736 +.delay_slot + 4700 0x0f 0xeb 0x1d 0x98 ST p6, [sp, #-24] +.delay_slot +.swstall delay_slot + 4704 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4706 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4708 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4710 0x00 0x00 NOPX + 4712 0x00 0x3b 0x00 0x00 0x02 0x56 0x00 0x00 0x20 0xba MOVA r27, #1; J #4784 +.delay_slot + 4722 0x18 0x19 0x9c 0xf8 MOV el0, r25 +.delay_slot + 4726 0x10 0x26 0x05 0x18 MOVX r19, #1 +.delay_slot +.swstall delay_slot + 4730 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4732 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4734 0x00 0x00 NOPX +.label __ll6__Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params + 4736 0x00 0x95 0x07 0xeb 0x1d 0xab 0xbf 0x3c 0x0c 0xce 0x78 0x76 MOVA r21, #4; ST p6, [sp, #-24]; EQ r27, r21, r30; MOV el0, r25 + 4748 0x17 0xab 0x5d 0x98 LSHL r21, r30, r21 + 4752 0x15 0x6b 0x92 0x18 SEL.EQZ r21, r21, r25, r27 + 4756 0x14 0xf7 0xe7 0x98 EQ r27, r19, r30 + 4760 0xac 0xf2 0x4d 0xb0 0x41 0xe4 SEL.EQZ r19, r21, r25, r27; MOV r27, r16 + 4766 0x16 0x67 0x32 0x18 SEL.EQZ r19, r25, r19, r27 + 4770 0x17 0x29 0x44 0x98 AND r20, r28, r20 + 4774 0x15 0x36 0xf0 0x18 NEZ r27, r20 + 4778 0x00 0x2c 0xf9 0xcf 0x8b 0x2c NOPA; OR r19, r19, r28 +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_320 + 4784 0x01 0x90 0x82 0x6f 0x71 0xba 0x02 0x5c 0x10 0x00 0x60 0x76 MOVA m4, #12; ST r27, [p2], #24; JNZ r29, #4832 +.delay_slot + 4796 0x02 0x8a 0x67 0x18 ST.s8 r19, [p2], m4 +.delay_slot +.swstall delay_slot + 4800 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4802 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4804 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4806 0x00 0x00 NOPX + 4808 0x00 0xff 0xfa 0x3f 0xfe 0x44 MOVXM r20, #16777215 + 4814 0x14 0xa5 0x44 0x98 AND r18, r18, r20 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4818 0x00 0x2c 0xf6 0xec 0xa3 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; ST r18, [p3, #28]; NOPM; NOPV +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_368 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4832 0x51 0xd2 0xd0 0x27 0x44 0x82 0xcf 0xfd 0x58 0xba LDA r20, [p2], #-32; EXTEND.u8 r20, r19; MOV r22, #-3 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4842 0x00 0x52 0x00 0x29 0x5f 0xfa 0x00 0x24 0x58 0xba MOVA r18, #2; ADD r21, r20, #-1; MOV m4, #36 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4852 0x51 0x5a 0xd7 0xd0 0x2d 0xab 0x6b 0x26 0x07 0xcc 0x58 0x76 LDA r22, [p2], m4; ST el0, [sp, #-48]; AND r22, r21, r22; MOV m4, #-52 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4864 0x51 0x5e 0xd7 0xde 0xd5 0xbf 0x37 0xea 0x00 0xc4 0x58 0x76 LDA r23, [p2], m4; ST r22, [sp, #-36]; MOVX r19, #-1; MOV m4, #196 + 4876 0x02 0xff 0xb6 0x98 LDA r29, [p2], #-4 + 4880 0x02 0x8b 0xf6 0x98 LDA r31, [p2], m4 + 4884 0x01 0x06 0xb6 0x98 LDA r21, [p1] + 4888 0x00 0xd2 0xda 0x26 0x5b 0x2c LDA r20, [p0]; LSHL r9, r20, r18 + 4894 0x04 0x07 0xd6 0x98 LDA r30, [p4] + 4898 0x15 0xad 0x2d 0x98 LSHL r22, r22, r18 + 4902 0x00 0x00 NOPX + 4904 0x17 0x67 0x3e 0x98 ASHL r19, r29, r19 + 4908 0x17 0xe3 0x18 0x98 NE r17, r31, r17 + 4912 0x88 0x09 0xd0 0x40 0x01 0x84 JNZ r17, #5024 +.delay_slot + 4918 0xbd 0xa5 0xba 0xb5 0xb2 0xa4 LSHL r22, r23, r18; ADD.NC r21, r21, r22 +.delay_slot + 4924 0x9d 0x65 0xb0 0x95 0xb2 0xa4 LSHL r21, r19, r18; ADD.NC dn0, r21, r22 +.delay_slot + 4930 0xfa 0x84 0xb0 0x01 0xca 0x68 0xa0 0x02 ST dn0, [sp, #-44]; ADD.NC r14, r9, r20 +.delay_slot + 4938 0x1b 0xd0 0x80 0xf8 MOV r15, dn0 +.delay_slot + 4942 0x1e 0x6a 0xf9 0x58 ADD.NC p6, r21, r30 + 4946 0x00 0x07 0xce 0xc8 0xd0 0x44 MOVXM p7, #509032 + 4952 0xe0 0xc4 0x50 0xb4 0x80 0x2c LDA.s8 r17, [p7]; MOVX vaddSign0, #1 + 4958 0x00 0x00 NOPX + 4960 0xff 0x7f 0x0a 0x20 0x00 0x44 MOVXM r20, #-8454144 + 4966 0x18 0x02 0x91 0x78 VINSERT.32 x0, x0, #0, r20 + 4970 0x1d 0x15 0xe0 0xf8 MOV r20, sp +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 4974 0x1f 0x6a 0x5f 0x18 ADD.NC p7, r20, #-66 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 4978 0xe0 0xc6 0xe0 0x01 0x25 0xd4 ST.s16 r17, [p7]; VMOV bmll0, x0 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4984 0x14 0x7a 0x80 0x18 MOVX crRnd, r17 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4988 0x08 0x40 0x16 0x18 VCONV.bf16.fp32 wl0, bmll0 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4992 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4994 0x1c 0x41 0x01 0xb8 VEXTRACT.16 r17, x0, #0, vaddSign0 + 4998 0x00 0x00 NOPX + 5000 0x00 0x00 NOPX + 5002 0x07 0x06 0x32 0x98 LDA.s16 r17, [p7] + 5006 0x00 0x00 NOPX + 5008 0x00 0x00 NOPX + 5010 0x00 0x00 NOPX + 5012 0x00 0x00 NOPX + 5014 0x00 0x00 NOPX + 5016 0x00 0x00 NOPX + 5018 0x00 0x2c 0xff 0xa4 0x6b 0x0c NOPA; ST r17, [sp, #-48] +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_560 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 5024 0x0b 0x90 0x81 0x8e 0x0b 0x00 0x01 0xf1 0xb2 0x34 0x10 0x76 MOVA m4, #92; MOVS p1, r14; MOVXM p3, #509032 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 5036 0x51 0x45 0x50 0x84 0x8b 0x33 0x19 0x92 0x68 0x0b 0x58 0x76 LDA.u8 r17, [p2], m4; MOVS p0, p1; SEL.EQZ r17, r25, r19, r27; MOV r19, #11 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5048 0x61 0x96 0x00 0x39 0xb9 0x65 0xaa 0x60 0x78 0xba MOVA r22, #780; LTU r27, r28, r18; MOV r13, p2 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5058 0x03 0x06 0x67 0x18 ST.s8 r19, [p3] +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5062 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 5064 0x00 0x08 0x30 0x00 0x01 0x04 JL #4192 +.delay_slot +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5070 0x14 0x6b 0x2d 0x98 LSHL r21, r17, r18 +.delay_slot + 5074 0x1f 0x6a 0xf9 0x58 ADD.NC p7, r21, r30 +.delay_slot + 5078 0x16 0x63 0x11 0x98 SUB r17, r25, r17 +.delay_slot + 5082 0x8c 0x65 0xba 0x2c 0x35 0x64 LSHL r17, r17, r18; MOV r20, #781 +.delay_slot + 5088 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x2c 0x9a 0x11 0x8b 0xe2 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SEL.EQZ r9, r22, r20, r27; ADD.NC r12, r15, r17; NOPV +.return_address + 5104 0x07 0xd4 0x99 0x18 LDA p1, [sp, #-44] +.no_stack_arguments + 5108 0x00 0x08 0x30 0x00 0x01 0x04 JL #4192 +.delay_slot +.swstall delay_slot + 5114 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5116 0x00 0x00 NOPX +.delay_slot + 5118 0x1b 0x56 0x90 0x18 ADD.NC r13, r13, #32 +.delay_slot + 5122 0x1a 0x66 0xa0 0xf8 MOV p2, r13 +.delay_slot + 5126 0x00 0x2c 0xf0 0x8f 0x0b 0x00 0x00 0x00 0x00 0x7a NOPA; MOVS p0, r15; NOPX +.return_address + 5136 0xd6 0x9a 0x80 0x01 0x37 0xea 0x33 0x63 0x08 0xba MOVA dj6, #-332; MOVX r19, #63; ADD.NC p4, r13, #-116 + 5146 0x83 0x84 0xd0 0x34 0x62 0x2c LDA dn0, [p4], #4; MOVX r13, #12 + 5152 0x04 0x1c 0x46 0x98 LDA dj0, [p4], #4 + 5156 0x04 0x1e 0x26 0x98 LDA dn4, [p4], #4 + 5160 0x04 0x1e 0x46 0x98 LDA dj4, [p4], #4 + 5164 0x04 0x1c 0x06 0x98 LDA m0, [p4], #4 + 5168 0x04 0x1c 0x66 0x98 LDA dc0, [p4], #4 + 5172 0x04 0x1e 0x66 0x98 LDA dc4, [p4], #4 + 5176 0x04 0x1e 0xd6 0x98 LDA r22, [p4], #4 + 5180 0x04 0x1e 0x36 0x98 LDA r17, [p4], #4 + 5184 0x04 0x1f 0x96 0x98 LDA r28, [p4], #4 + 5188 0x04 0x1e 0xb6 0x98 LDA r21, [p4], #4 + 5192 0x04 0x1e 0xf6 0x98 LDA r23, [p4], #4 + 5196 0x04 0x1d 0x9e 0x98 LDA p3, [p4], #4 + 5200 0x04 0x1d 0x26 0x98 LDA dn2, [p4], #4 + 5204 0x04 0x1c 0xa6 0x98 LDA dn1, [p4], #4 + 5208 0x04 0x1c 0xc6 0x98 LDA dj1, [p4], #4 + 5212 0x04 0x1e 0xa6 0x98 LDA dn5, [p4], #4 + 5216 0x04 0x1f 0xd6 0x98 LDA r30, [p4], #4 + 5220 0x04 0x1f 0xb6 0x98 LDA r29, [p4], #4 + 5224 0x04 0x1c 0xe6 0x98 LDA dc1, [p4], #4 + 5228 0x04 0xc2 0x4a 0x98 LDA.u8 r18, [p4, dj6] + 5232 0x07 0xd2 0x91 0x18 LDA r20, [sp, #-48] + 5236 0x04 0x04 0x56 0x98 LDA r2, [p4] + 5240 0x00 0x00 NOPX + 5242 0x00 0x00 NOPX + 5244 0x00 0x00 NOPX + 5246 0x00 0x00 NOPX + 5248 0x14 0xe7 0x2c 0x98 LTU r19, r19, r18 + 5252 0x98 0x0c 0x10 0x40 0x01 0x84 JNZ r19, #6176 +.delay_slot + 5258 0x00 0x07 0xc4 0xc8 0xd0 0x44 MOVXM p2, #509032 +.delay_slot + 5264 0x02 0x05 0xa7 0x18 ST.s8 r13, [p2] +.delay_slot + 5268 0x1c 0xd1 0x72 0xf8 VBCST.16 x9, r20 +.delay_slot +.swstall delay_slot + 5272 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5274 0x00 0x00 NOPX + 5276 0xfb 0x43 0x20 0x1b 0xb9 0x3f 0x80 0x84 0x58 0xba LDA p4, [sp, #-40]; EQ r27, r13, r18; MOV m7, #132 + 5286 0x00 0x13 0x00 0x3d 0x20 0x0a 0x00 0x3c 0x58 0xba MOVA r19, #0; MOVX r18, #-128; MOV m4, #60 + 5296 0xf8 0x14 0x80 0x01 0xa0 0x0b 0xe4 0xd0 0x78 0xba MOVA m5, #-64; MOVX r26, #0; MOV dc7, r19 + 5306 0xef 0x98 0x82 0x1c 0x4b 0x1b 0xd4 0x01 0xa7 0xc0 0x78 0x76 MOVA m6, #-132; MOVS dc2, dc7; MOVX crRnd, r13; MOV dn3, dc7 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 5318 0xfa 0x96 0x26 0x1c 0x4b 0x01 0xf7 0x89 0xe8 0x07 0x58 0x76 LDA r5, [sp, #-44]; MOVS dc6, dc7; MOVX r31, #60; MOV r15, #7 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 5330 0xfb 0xca 0x20 0x00 0x00 0x05 0x32 0xa0 0x10 0xba LDA r18, [sp, #-36]; MOVXM p2, #5440 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5340 0xfc 0x36 0x20 0x34 0x69 0x12 0x8b 0x0c 0x58 0xba LDA r13, [sp, #-32]; SEL.EQZ r6, r26, r18, r27; MOV r20, #780 + 5350 0xfc 0x87 0x29 0xd7 0x20 0x01 0x90 0x0b 0x08 0x00 0x58 0xb6 LDA lr, [sp, #-28]; PADDB [p4], m7; MOVX r25, #0; MOV r24, #0 + 5362 0x04 0x88 0x16 0x98 LDA r0, [p4], m4 + 5366 0x04 0xab 0x26 0x98 LDA dn6, [p4], m5 + 5370 0x04 0x2f 0x76 0x98 LDA r27, [p4], #8 + 5374 0x04 0x1e 0x86 0x98 LDA m5, [p4], #4 + 5378 0x04 0x8a 0xc6 0x98 LDA dj5, [p4], m4 + 5382 0x04 0x9e 0x06 0x98 LDA m4, [p4], #-28 + 5386 0x04 0x1c 0x36 0x98 LDA r1, [p4], #4 + 5390 0x99 0x02 0xdd 0x06 0x02 0x94 LDA r0, [p4], m6; ADD.NC dj6, r6, r0 + 5396 0x04 0x14 0x76 0x98 LDA r3, [p4, #4] + 5400 0x04 0x04 0x96 0x98 LDA r4, [p4] + 5404 0x19 0xda 0x00 0xf8 MOV r7, m5 + 5408 0x1a 0x83 0x99 0x58 ADD.NC dj2, r7, r6 + 5412 0x1c 0x1b 0x00 0xf8 MOV r16, dj5 + 5416 0x1a 0x0d 0x99 0x58 ADD.NC m2, r27, r6 + 5420 0x1e 0x03 0xe0 0x18 ADD.NC m6, r7, #-64 + 5424 0x18 0xff 0xee 0x10 0xc0 0x24 ADD r3, r3, #-1; ADD.NC m7, r16, #-64 + 5430 0x00 0x2c 0xf0 0x00 0x10 0x00 0x82 0x80 0x7e 0xba NOPA; NOPB; MOV m1, dj2 +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_976 +.loop_nesting 1 + 5440 0xc3 0x85 0x71 0x85 0x0b 0x04 0xe7 0xec 0x33 0x90 0x78 0x76 VLDA.CONV.fp32.bf16 cml0, [p6], #64; MOVS p1, r5; LSHL r14, r2, r15; MOV p0, r14 + 5452 0x22 0x81 0x78 0x28 0x2b 0x0e 0x4b 0x02 0x33 0x98 0xa0 0xf6 VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc3, dn3; ADD.NC p4, r14, r12 + 5464 0xa0 0x39 0x78 0x28 0x2f 0x5a 0x4b 0x03 0xc6 0x80 0x70 0xf6 VLDA.POP.576 ex7, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];MOVS dn7, dn6; MOV dj7, dj6 + 5476 0xd9 0x0d 0x74 0x03 0x2b 0x53 0x0b 0x01 0x82 0x00 0x70 0xf6 VLDA.CONV.fp32.bf16 cmh0, [p6], m6;VLDB.POP.576 ex6, [p0, lf0, r24];MOVS dn3, r19; MOV m3, m2 + 5488 0x71 0x41 0x74 0x12 0xd4 0x01 0xc0 0x00 0x5e 0xba VLDA.POP.576 ex8, [p1, lf1, r25, m4];VLDB.POP.576.3D ex11, [p0, lf0, r24, d0]; MOV dj3, #0 + 5498 0xc3 0x95 0x78 0x28 0x28 0x00 0x00 0x05 0xbb 0x40 0x10 0xb6 VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.FILL.512 [p0, lf0, r24]; MOVXM le, #5760 + 5510 0xdd 0x1d 0x78 0x28 0x28 0x00 0x00 0x04 0x7b 0x28 0x10 0xb6 VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.FILL.512 [p0, lf0, r24]; MOVXM ls, #5712 + 5522 0x80 0xb5 0x74 0x01 0x28 0x3c VLDA.CONV.fp32.bf16 cml3, [p4];VLDB.POP.576 ex2, [p0, lf0, r24] + 5528 0xc3 0xa5 0x78 0x22 0x28 0x3c VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.POP.576.3D ex4, [p0, lf0, r24, d0] + 5534 0xd9 0x2d 0x78 0x28 0x28 0x3c VLDA.CONV.fp32.bf16 cmh2, [p6], m6;VLDB.FILL.512 [p0, lf0, r24] +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 5540 0x22 0x81 0x78 0x28 0x28 0x3c VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24] +.aggressive_scheduled_block_id 6 +.noswbrkpt + 5546 0x83 0xbd 0x74 0x01 0x28 0x3c VLDA.CONV.fp32.bf16 cmh3, [p4], #64;VLDB.POP.576 ex2, [p0, lf0, r24] +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5552 0x80 0xcd 0x74 0x11 0x14 0x02 0x9a 0xc3 0xee 0xba VLDA.CONV.fp32.bf16 cmh4, [p4];VLDB.POP.576.3D ex4, [p0, lf0, r24, d0]; VSHUFFLE ex10, ex6, ex11, r1 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5562 0x80 0xc5 0x78 0x28 0x2c 0x98 0x8b 0x01 0x9a 0xc1 0xe0 0xf6 VLDA.CONV.fp32.bf16 cml4, [p4];VLDB.FILL.512 [p0, lf0, r24];MOVS p4, p6; VSHUFFLE ex6, ex6, ex11, r0 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5574 0x02 0x81 0x73 0x00 0x54 0x1d 0x48 0x14 0xe9 0x4a VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex1, [p1, lf1, r25]; VMAC.f dm0, dm0, ex10, ex7, r9 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5584 0xcf 0x35 0x76 0x94 0x96 0x00 0x00 0x5c 0x58 0x07 0x49 0x2c 0xe9 0x6e VLDA.3D.CONV.fp32.bf16 cml3, [p6], d3; MOVS dn3, dn2; MOV dj3, dj5; VMAC.f dm1, dm1, ex6, ex7, r9 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5598 0x82 0xbd 0x7a 0x38 0x96 0x00 0x00 0x4c 0x90 0x3e 0x4a 0x55 0x09 0x6e VLDA.CONV.fp32.bf16 cmh3, [p4, #64]; MOVS dc5, dc7; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm2, dm2, ex10, ex8, r9 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5612 0x71 0x01 0x74 0x98 0x96 0x00 0x00 0x54 0x90 0x1e 0xf8 0x60 0x3d 0x6e VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dn2, dc3; VSHUFFLE ex5, ex2, ex4, r0; VADD.f dm0, dm3, dm0, r31 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5626 0x22 0x81 0x70 0x04 0xf9 0x64 0x3d 0x62 VLDA.FILL.512 [p1, lf1, r25]; VADD.f dm1, dm3, dm1, r31 +.aggressive_scheduled_block_id 6 +.noswbrkpt + 5634 0xa0 0x09 0x70 0x04 0xfa 0x88 0x3d 0x62 VLDA.POP.576 ex1, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r31 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5642 0x71 0x01 0x70 0x04 0x4b 0x6d 0x09 0x62 VLDA.POP.576 ex0, [p1, lf1, r25, m4]; VMAC.f dm3, dm3, ex6, ex8, r9 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5650 0x22 0x81 0x74 0x01 0x28 0x3c VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24] +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5656 0x3c 0x11 0x14 0x18 VLDB.POP.576.3D ex4, [p0, lf0, r24, d0] +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5660 0xa0 0x09 0x78 0x28 0x2d 0x72 0x7d 0x82 0xfb 0x8c 0x3d 0x66 VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24]; ADD.NC lc, r4, #-5; VADD.f dm3, dm4, dm3, r31 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5672 0x71 0x01 0x74 0x14 0x14 0x1d 0xa0 0x06 0x29 0x4a VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24]; VMAC.f dm0, dm0, ex3, ex1, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5682 0x22 0x81 0x74 0x01 0x28 0x00 0x00 0x58 0xaa 0x0f 0xa2 0x46 0x09 0x4e VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24]; NOPX; MOV dj5, r21; VMAC.f dm2, dm2, ex3, ex0, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5696 0x00 0x2c 0xf8 0x22 0x28 0x01 0x5b 0x00 0x00 0x00 0xc9 0x03 0xed 0x09 0x51 0x4b NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 +.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1248 +.loop_nesting 2 +.begin_of_loop +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5712 0xa0 0x09 0x78 0x28 0x28 0x01 0x5b 0x00 0x00 0x01 0x49 0x01 0xed 0x1b 0x50 0x4b VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5728 0x71 0x01 0x78 0x28 0x28 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7d 0x00 0x31 0x4b VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm0, dm0, ex3, ex1, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5744 0x22 0x81 0x74 0x01 0x28 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7d 0x12 0x30 0x4b VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm2, dm2, ex3, ex0, r20 +.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1296 +.end_of_loop +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5760 0x00 0x2c 0xf8 0x22 0x28 0x01 0x5b 0x00 0x00 0x00 0xc9 0x03 0xed 0x09 0x51 0x4b NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 +.loop_nesting 1 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5776 0xa0 0x09 0x7c 0xbc 0x96 0x00 0x00 0x54 0x90 0x1e 0xa3 0x6a 0x09 0x6e VLDA.POP.576 ex1, [p1, lf1, r25]; MOVS dn6, dn7; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5790 0x71 0x01 0x7e 0x1c 0x96 0x00 0x00 0x7c 0x38 0x07 0xa0 0x06 0x29 0x6e VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dc7, dn3; MOV dj7, dj3; VMAC.f dm0, dm0, ex3, ex1, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5804 0x61 0x91 0x61 0x55 0x00 0xe4 0xa2 0x46 0x09 0x4a MOVS dc3, p3; MOV r5, dj2; VMAC.f dm2, dm2, ex3, ex0, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5814 0x6a 0xc1 0x61 0x92 0x07 0xc4 0xa1 0x2a 0x29 0x4a MOVS dn3, r22; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5824 0xeb 0x81 0x62 0x92 0x03 0xc4 0xa3 0x6a 0x09 0x4a MOVS dn7, r28; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5834 0xb3 0x91 0x6f 0x57 0x22 0x8f 0x00 0xe6 0xa0 0x06 0x29 0x66 PADDB [p7], m5; MOVS p5, p7; MOV dj2, dj7; VMAC.f dm0, dm0, ex3, ex1, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5846 0x93 0x91 0x62 0x06 0x00 0xe4 0xa2 0x46 0x09 0x4a MOVS p4, p7; MOV m2, m3; VMAC.f dm2, dm2, ex3, ex0, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5856 0x02 0x92 0x03 0xc6 0xa1 0x2a 0x29 0x62 VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm1, dm1, ex5, ex1, r20 +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5864 0x01 0x92 0x07 0xc6 0xa3 0x6a 0x09 0x62 VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm3, dm3, ex5, ex0, r20 + 5872 0x1f 0x8b 0x00 0xf8 MOV dj7, dj5 + 5876 0x03 0x0b 0xa0 0xe6 0xa1 0x2a 0x29 0x62 MOV m3, r23; VMAC.f dm1, dm1, ex5, ex1, r20 + 5884 0x03 0x88 0xa0 0xe6 0xa0 0x06 0x29 0x62 MOV dj3, r17; VMAC.f dm0, dm0, ex3, ex1, r20 + 5892 0x00 0xf7 0x23 0x05 0x00 0xe6 0xa3 0x6a 0x09 0x4a PADDB.3D [p0], d3; MOV m3, dj2; VMAC.f dm3, dm3, ex5, ex0, r20 + 5902 0x71 0x89 0x6e 0xd7 0x25 0x82 0xa0 0xe6 0xa2 0x46 0x09 0x66 PADDB [p7], m3; MOVS p3, dc3; MOV dj5, r5; VMAC.f dm2, dm2, ex3, ex0, r20 + 5914 0x62 0x89 0x60 0x03 0xc5 0x80 0x70 0x02 MOVS dc3, dc5; MOV dj7, dj5 + 5922 0xa0 0x41 0x60 0x01 0x81 0x00 0x70 0x02 MOVS dc5, r2; MOV m3, m1 + 5930 0xb2 0x12 0xc0 0x00 0x87 0x50 0x70 0x02 VCONV.bf16.fp32 x11, cml1; MOV m1, r29 + 5938 0xa2 0x02 0xc0 0x02 0xc7 0x90 0x70 0x02 VCONV.bf16.fp32 x10, cml0; MOV dj5, r30 + 5946 0x13 0x91 0x61 0x3b 0x90 0x01 0xc8 0x60 0x76 0xba PADDB.3D [p1], d1; MOVS p0, p7; MOV r14, p0 + 5956 0x62 0x0a 0xc0 0x00 0x83 0x00 0x70 0x02 VCONV.bf16.fp32 x6, cmh0; MOV m1, m3 + 5964 0x52 0x22 0xc0 0x57 0x20 0x24 0x02 0xfa 0x00 0x00 0x60 0x36 PADDB [p0], m1; VCONV.bf16.fp32 x5, cml2; JZ r18, #6096 +.delay_slot + 5976 0x72 0x1a 0xc0 0x00 0xa9 0x60 0x70 0x02 VCONV.bf16.fp32 x7, cmh1; MOV r5, p1 +.delay_slot + 5984 0x82 0x32 0xc0 0x03 0xa7 0xc0 0x70 0x02 VCONV.bf16.fp32 x8, cml3; MOV dn7, dc7 +.delay_slot + 5992 0x12 0x3a 0xc5 0x2b 0x90 0x00 0xb5 0x60 0x76 0xba PADDB [p5], m1; VCONV.bf16.fp32 x1, cmh3; MOV p1, p5 +.delay_slot + 6002 0x22 0x2a 0xc0 0x02 0xc2 0x80 0x70 0x02 VCONV.bf16.fp32 x2, cmh2; MOV dj5, dj2 +.delay_slot + 6010 0xe1 0x89 0x60 0x00 0x4d 0xc0 0x70 0x02 MOVS dc7, dc3; MOV r2, dc5 + 6018 0x1d 0xdc 0xec 0xf8 VMAX_LT.bf16 x11, r16, x11, x9 + 6022 0x1b 0xbc 0xec 0xf8 VMAX_LT.bf16 x7, r16, x7, x9 + 6026 0x3c 0x5a 0x60 0x02 0xaa 0x76 0x70 0x02 VST x11, [p1, dj7]; VMAX_LT.bf16 x10, r16, x10, x9 + 6034 0xa2 0xba 0x60 0x01 0xda 0x76 0x70 0x02 VST x7, [p5, #64]; VMAX_LT.bf16 x7, r16, x6, x9 + 6042 0x20 0xd2 0x60 0x00 0x02 0xfe 0x00 0x00 0x21 0x3a VST x10, [p1]; J #6128 +.delay_slot + 6052 0x22 0xba 0x60 0x02 0xa2 0x76 0x70 0x02 VST x7, [p1, #64]; VMAX_LT.bf16 x10, r16, x8, x9 +.delay_slot + 6060 0x1b 0x8c 0xec 0xf8 VMAX_LT.bf16 x7, r16, x1, x9 +.delay_slot + 6064 0x00 0xd2 0x60 0x02 0x96 0x76 0x70 0x02 VST x10, [p0]; VMAX_LT.bf16 x10, r16, x5, x9 +.delay_slot + 6072 0x02 0xba 0x60 0x00 0x8a 0x76 0x70 0x02 VST x7, [p0, #64]; VMAX_LT.bf16 x2, r16, x2, x9 +.delay_slot + 6080 0x00 0x2c 0xf0 0x00 0x24 0xa2 0x93 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x10, [p4, dj5]; NOPX; NOPM; NOPV +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1632 + 6096 0x09 0xe0 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p1, dj7] + 6100 0x0d 0x14 0xe3 0x18 VST.CONV.bf16.fp32 cmh1, [p5, #64] + 6104 0x09 0x04 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p1] + 6108 0x09 0x14 0x63 0x18 VST.CONV.bf16.fp32 cmh0, [p1, #64] + 6112 0x08 0x06 0x13 0x18 VST x8, [p0] + 6116 0x08 0x15 0xe3 0x18 VST.CONV.bf16.fp32 cmh3, [p0, #64] + 6120 0x94 0x24 0x60 0x00 0x01 0xa5 0x70 0x02 VST.CONV.bf16.fp32 cml2, [p4, dj5]; NOPM +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1664 + 6128 0xe2 0x92 0x6f 0x57 0x20 0x06 0x35 0x01 0x40 0x00 0x58 0x36 PADDB [p7], m5; VST x2, [p7, #64]; JNZD r3, r3, p2; MOV dj2, #0 +.delay_slot + 6140 0x1b 0x44 0x80 0xf8 MOV dn3, dn2 +.delay_slot + 6144 0x1a 0x49 0xa0 0xf8 MOV dn2, r19 +.delay_slot + 6148 0xeb 0x72 0x05 0x1e 0x01 0xf4 PADDB.3D [p7], d2; MOV dj2, dj7 +.delay_slot + 6154 0x1a 0x4e 0x80 0xf8 MOV dn2, dn7 +.delay_slot +.swstall delay_slot + 6158 0x00 0x00 NOPX +.loop_nesting 0 + 6160 0x00 0x0d 0x58 0x00 0x00 0x84 J #6832 +.delay_slot +.swstall delay_slot + 6166 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6168 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6170 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6172 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6174 0x00 0x00 NOPX +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1712 + 6176 0xfb 0x7e 0x22 0x0c 0x8b 0x04 0xe1 0x08 0xb3 0x90 0x78 0x76 LDA r31, [sp, #-40]; MOVS dc2, p3; MOVX r14, #136; MOV p1, r14 + 6188 0x07 0x90 0x82 0x56 0x0b 0x1b 0xd4 0x03 0x62 0x40 0x78 0x76 MOVA m4, #60; MOVS dn2, r22; MOVX crRnd, r13; MOV dc6, dn2 + 6200 0x07 0x94 0x00 0x19 0x31 0x89 0x05 0xd0 0x78 0xba MOVA r20, #60; MOVX r19, #780; MOV m2, r23 + 6210 0xef 0x98 0x86 0x5c 0x0b 0x01 0x20 0xca 0xc7 0x90 0x78 0x76 MOVA m6, #-132; MOVS dn6, r28; MOVX r18, #6; MOV dj5, r30 + 6222 0xfa 0x83 0x25 0x02 0x0b 0x01 0x90 0x08 0x87 0x50 0x78 0x76 LDA p0, [sp, #-44]; MOVS dc5, r2; MOVX r25, #0; MOV m1, r29 + 6234 0xfb 0xd6 0x20 0x01 0x80 0x0b 0x45 0x50 0x78 0xba LDA r21, [sp, #-36]; MOVX r24, #0; MOV dj6, r21 + 6244 0xfc 0x36 0x20 0x00 0x00 0x05 0x34 0x58 0x10 0xba LDA r13, [sp, #-32]; MOVXM p2, #6320 + 6254 0xfc 0x87 0x26 0xdf 0x72 0x94 LDA lr, [sp, #-28]; ADD.NC p3, r31, r14 + 6260 0x03 0x1d 0xc6 0x98 LDA dj3, [p3], #4 + 6264 0x03 0x8a 0x06 0x98 LDA m4, [p3], m4 + 6268 0x03 0x9e 0x86 0x98 LDA m5, [p3], #-28 + 6272 0x03 0x1e 0xd6 0x98 LDA r22, [p3], #4 + 6276 0x03 0xca 0xf6 0x98 LDA r23, [p3], m6 + 6280 0x03 0x17 0xb6 0x98 LDA r29, [p3, #4] + 6284 0x03 0x07 0x96 0x98 LDA r28, [p3] + 6288 0x00 0x00 NOPX + 6290 0x1f 0x98 0x00 0xf8 MOV r30, m4 + 6294 0x1e 0x07 0x00 0xf8 MOV m6, dj3 + 6298 0x1f 0xdc 0x00 0xf8 MOV r31, m6 + 6302 0x1b 0x0f 0xe0 0x18 ADD.NC m3, r31, #-64 + 6306 0xef 0x7f 0xee 0x1e 0xc0 0x24 ADD r29, r29, #-1; ADD.NC m7, r30, #-64 + 6312 0x00 0x2b 0x60 0x03 0xc7 0x90 0x70 0x02 NOPS; MOV dj7, r30 +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1856 +.loop_nesting 1 + 6320 0xc3 0x85 0x7a 0x28 0x28 0x00 0x00 0x8f 0x96 0x02 0x71 0x81 0x60 0x7e VLDA.CONV.fp32.bf16 cml0, [p6], #64;VLDB.FILL.512 [p1, lf1, r25];MOVS p3, r12; MOVXM ls, #6496 + 6334 0xcd 0x0d 0x7a 0x28 0x28 0x00 0x00 0x05 0xbc 0xc8 0x10 0xb6 VLDA.CONV.fp32.bf16 cmh0, [p6], m3;VLDB.FILL.512 [p1, lf1, r25]; MOVXM le, #6544 + 6346 0x02 0x81 0x76 0x05 0x28 0x05 0xe9 0x6e 0xbf 0x3f 0x48 0xb6 VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex10, [p1, lf1, r25]; LSHL r30, r2, r18; ADD.NC lc, r28, #-3 + 6358 0x55 0x59 0x73 0x01 0x14 0x01 0x47 0x90 0x7e 0xba VLDA.POP.576 ex11, [p0, lf0, r24, m5];VLDB.POP.576 ex4, [p1, lf1, r25]; MOV dj2, r30 + 6368 0xc3 0x95 0x76 0x01 0x28 0x3c VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.POP.576 ex2, [p1, lf1, r25] + 6374 0xdd 0x1d 0x7a 0x21 0xa8 0x3c VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.POP.576.3D ex3, [p1, lf1, r25, d0] + 6380 0xc3 0xa5 0x7a 0x28 0x28 0x3c VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.FILL.512 [p1, lf1, r25] + 6386 0xcd 0x2d 0x7a 0x28 0x28 0x3c VLDA.CONV.fp32.bf16 cmh2, [p6], m3;VLDB.FILL.512 [p1, lf1, r25] + 6392 0xc3 0xb5 0x76 0x00 0xa8 0x3c VLDA.CONV.fp32.bf16 cml3, [p6], #64;VLDB.POP.576 ex1, [p1, lf1, r25] + 6398 0xdd 0x3d 0x76 0x03 0x28 0x3c VLDA.CONV.fp32.bf16 cmh3, [p6], m7;VLDB.POP.576 ex6, [p1, lf1, r25] + 6404 0x68 0x45 0x76 0x03 0xa8 0x3c VLDA.CONV.fp32.bf16 cml4, [p3, dj2];VLDB.POP.576 ex7, [p1, lf1, r25] + 6410 0x68 0x4d 0x75 0x12 0x14 0x01 0x69 0x2d 0xee 0xba VLDA.CONV.fp32.bf16 cmh4, [p3, dj2];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VSHUFFLE ex5, ex10, ex4, r22 + 6420 0x02 0x81 0x75 0x14 0x14 0x02 0xa9 0x2f 0xee 0xba VLDA.FILL.512 [p0, lf0, r24]; VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex10, ex10, ex4, r23 + 6430 0x55 0x01 0x7a 0x28 0x2a 0x11 0xdb 0xc2 0x48 0x0b 0x69 0x66 VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex2, ex3, r22; VMAC.f dm0, dm0, ex5, ex11, r9 + 6442 0x02 0x81 0x75 0x11 0xdf 0xc2 0x49 0x35 0x69 0x4a VLDA.FILL.512 [p0, lf0, r24]; VSHUFFLE ex10, ex2, ex3, r23; VMAC.f dm1, dm1, ex10, ex11, r9 + 6452 0x4a 0x49 0x69 0x48 VMAC.f dm2, dm2, ex4, ex11, r9 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 6456 0x4b 0x75 0x69 0x48 VMAC.f dm3, dm3, ex10, ex11, r9 +.aggressive_scheduled_block_id 7 +.noswbrkpt + 6460 0x06 0x00 0xaa 0x8b 0x5f 0xc6 0xa1 0x84 0x3d 0x4a VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex5, ex1, ex6, r23; VADD.f dm1, dm4, dm1, r20 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6470 0x03 0x01 0x94 0x00 0xa0 0x80 0x3d 0x62 VLDB.POP.576 ex6, [p1, lf1, r25]; VADD.f dm0, dm4, dm0, r20 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6478 0x03 0x01 0xd4 0x00 0xa2 0x88 0x3d 0x62 VLDB.POP.576 ex7, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r20 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6486 0x55 0x01 0x75 0x12 0x14 0x1d 0xa3 0x8c 0x3d 0x4a VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VADD.f dm3, dm4, dm3, r20 +.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2032 +.loop_nesting 2 +.begin_of_loop +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6496 0xa2 0x82 0x82 0x16 0xb7 0xb4 VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex2, ex1, ex6, r22 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6502 0x0a 0x28 0x2a 0x3c 0x5f 0xc6 0x99 0x2a 0x09 0x4a VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6512 0x06 0x00 0xa9 0xbc 0x5b 0xc6 0x98 0x04 0x09 0x4a VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6522 0x03 0x01 0x94 0x00 0x9b 0x68 0x09 0x62 VLDB.POP.576 ex6, [p1, lf1, r25]; VMAC.f dm3, dm3, ex4, ex0, r19 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6530 0x02 0x81 0x76 0x03 0xa8 0x00 0x00 0x00 0x05 0x6c 0x9a 0x46 0x09 0x6e VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex7, [p1, lf1, r25];NOPS; NOPX; VMAC.f dm2, dm2, ex3, ex0, r19 +.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2080 +.end_of_loop +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6544 0x55 0x01 0x7a 0x24 0x28 0x01 0x5b 0x00 0x00 0x01 0x45 0xaf 0xe8 0x00 0x00 0xe1 VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0];NOPS; NOPX; VSHUFFLE ex5, ex1, ex6, r23; NOPV +.loop_nesting 1 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6560 0x07 0x0c 0xff 0x97 0x25 0x9c 0x8b 0x00 0x85 0xad 0xe0 0xf6 PADDA.3D [p0], d1; PADDB [p7], m6; MOVS p5, p7; VSHUFFLE ex2, ex1, ex6, r22 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6572 0x93 0x91 0x6f 0x17 0x22 0x3c 0x5f 0xc6 0x99 0x2a 0x09 0x66 PADDB [p7], m4; MOVS p4, p7; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6584 0x73 0x91 0x6f 0x97 0x21 0xbc 0x5b 0xc6 0x98 0x04 0x09 0x66 PADDB [p7], m6; MOVS p3, p7; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6596 0x02 0x88 0xa0 0xe6 0x9b 0x68 0x09 0x62 MOV dj2, r17; VMAC.f dm3, dm3, ex4, ex0, r19 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6604 0x02 0xb7 0x20 0x9b 0x80 0xe6 0x9a 0x46 0x09 0x4a PADDB.3D [p1], d2; MOV r2, dc5; VMAC.f dm2, dm2, ex3, ex0, r19 + 6614 0x19 0x0b 0x5b 0xd8 VSHUFFLE ex2, ex1, ex6, r22 + 6618 0x1a 0x8b 0x5f 0xd8 VSHUFFLE ex5, ex1, ex6, r23 + 6622 0x01 0xbc 0x5b 0xc6 0x98 0x04 0x09 0x62 VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 + 6630 0x02 0x3c 0x5f 0xc6 0x99 0x2a 0x09 0x62 VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 + 6638 0x9a 0x46 0x09 0x48 VMAC.f dm2, dm2, ex3, ex0, r19 + 6642 0x9b 0x68 0x09 0x48 VMAC.f dm3, dm3, ex4, ex0, r19 + 6646 0x00 0x00 NOPX + 6648 0x00 0x00 NOPX + 6650 0x0d 0x10 0x16 0x18 VCONV.bf16.fp32 x10, cml0 + 6654 0x0d 0x90 0x96 0x18 VCONV.bf16.fp32 x11, cml1 + 6658 0x12 0x1a 0xc0 0x2a 0x03 0x4e 0x00 0x00 0x61 0x3a VCONV.bf16.fp32 x1, cmh1; JZ r21, #6768 +.delay_slot + 6668 0x0b 0x10 0x56 0x18 VCONV.bf16.fp32 x6, cmh0 +.delay_slot + 6672 0x09 0x11 0x96 0x18 VCONV.bf16.fp32 x2, cml3 +.delay_slot + 6676 0x0b 0x91 0xd6 0x18 VCONV.bf16.fp32 x7, cmh3 +.delay_slot + 6680 0x0a 0x91 0x16 0x18 VCONV.bf16.fp32 x5, cml2 +.delay_slot + 6684 0x0c 0x11 0x56 0x18 VCONV.bf16.fp32 x8, cmh2 + 6688 0x1d 0xdc 0xec 0xf8 VMAX_LT.bf16 x11, r16, x11, x9 + 6692 0x18 0x8c 0xec 0xf8 VMAX_LT.bf16 x1, r16, x1, x9 + 6696 0xac 0x5a 0x60 0x02 0xaa 0x76 0x70 0x02 VST x11, [p5, dj3]; VMAX_LT.bf16 x10, r16, x10, x9 + 6704 0x82 0x8a 0x60 0x00 0x5a 0x76 0x70 0x02 VST x1, [p4, #64]; VMAX_LT.bf16 x1, r16, x6, x9 + 6712 0xa0 0xd2 0x60 0x00 0x03 0x52 0x00 0x00 0x21 0x3a VST x10, [p5]; J #6800 +.delay_slot + 6722 0xa2 0x8a 0x60 0x02 0x8a 0x76 0x70 0x02 VST x1, [p5, #64]; VMAX_LT.bf16 x10, r16, x2, x9 +.delay_slot + 6730 0x18 0xbc 0xec 0xf8 VMAX_LT.bf16 x1, r16, x7, x9 +.delay_slot + 6734 0x6c 0x52 0x60 0x02 0x96 0x76 0x70 0x02 VST x10, [p3, dj3]; VMAX_LT.bf16 x10, r16, x5, x9 +.delay_slot + 6742 0x00 0x2c 0xf7 0x14 0x53 0x02 0x22 0x76 0x72 0xba NOPA; VST x1, [p7, #64]; VMAX_LT.bf16 x8, r16, x8, x9 +.delay_slot + 6752 0x00 0x2c 0xf0 0x00 0x24 0xe2 0x93 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x10, [p4, dj7]; NOPX; NOPM; NOPV +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2304 + 6768 0x0d 0x60 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p5, dj3] + 6772 0x0c 0x14 0xe3 0x18 VST.CONV.bf16.fp32 cmh1, [p4, #64] + 6776 0x0d 0x04 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p5] + 6780 0x0d 0x14 0x63 0x18 VST.CONV.bf16.fp32 cmh0, [p5, #64] + 6784 0x0b 0x61 0xa3 0x18 VST.CONV.bf16.fp32 cml3, [p3, dj3] + 6788 0x0f 0x15 0xe3 0x18 VST.CONV.bf16.fp32 cmh3, [p7, #64] + 6792 0x9c 0x24 0x60 0x00 0x01 0xa5 0x70 0x02 VST.CONV.bf16.fp32 cml2, [p4, dj7]; NOPM +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2336 + 6800 0x62 0xc2 0x6e 0xf5 0x40 0x5c VST x8, [p3, #64]; JNZD r29, r29, p2 +.delay_slot + 6806 0x3f 0x8b 0x90 0x18 PADDB [p7], m4 +.delay_slot +.swstall delay_slot + 6810 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6812 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6814 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6816 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2368 +.loop_nesting 0 + 6832 0x07 0xed 0xf1 0x18 LDA r15, [sp, #-20] + 6836 0x07 0xf1 0x91 0x18 LDA r12, [sp, #-16] + 6840 0x07 0xf5 0x31 0x18 LDA r9, [sp, #-12] + 6844 0x07 0xeb 0x19 0x18 LDA p6, [sp, #-24] + 6848 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8] + 6852 0x07 0xfd 0xd1 0x18 LDA r14, [sp, #-4] + 6856 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 6860 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128 +.delay_slot +.swstall delay_slot + 6866 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6868 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6870 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6872 0x00 0x00 NOPX +.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params__end +.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_end0 + +.text_segment PM 6880 +.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function_start + 6880 0x00 0x20 0x00 0x00 0x01 0xf2 0x32 0x20 0x10 0xba MOVA r0, #1; MOVXM p4, #508992 + 6890 0x80 0xc2 0xd0 0x00 0x10 0x08 0x4b 0xd0 0x78 0xba LDA r16, [p4]; MOVX r1, #0; MOV r2, r15 + 6900 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 6906 0x0f 0xf0 0x55 0x98 ST r2, [sp, #-16] + 6910 0x00 0x00 NOPX + 6912 0x00 0x00 NOPX + 6914 0x00 0x00 NOPX + 6916 0x00 0x00 NOPX + 6918 0x80 0x0d 0xd8 0x40 0x01 0x84 JNZ r16, #7088 +.delay_slot + 6924 0x0f 0xfb 0x9d 0x98 ST p7, [sp, #-8] +.delay_slot + 6928 0x0f 0xff 0x1d 0x98 ST p6, [sp, #-4] +.delay_slot + 6932 0x0f 0xed 0x9d 0x98 ST p3, [sp, #-20] +.delay_slot + 6936 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12] +.delay_slot + 6940 0x00 0x07 0xc7 0xac 0x00 0x44 MOVXM r15, #509440 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 6946 0xd0 0x91 0x60 0x00 0x01 0xf3 0xb2 0x34 0x11 0x3a MOVS p6, p1; MOVXM p7, #509032 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 6956 0xe0 0xc0 0xe1 0x8f 0x0b 0x00 0x01 0xf3 0xb2 0x32 0x10 0x76 ST.s8 r16, [p7]; MOVS p1, r15; MOVXM p7, #509028 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6968 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6970 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6972 0x00 0x05 0x60 0x00 0x01 0x04 JL #2752 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6978 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6980 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 6984 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 6988 0xe0 0xc2 0x30 0x03 0xb0 0x60 0x70 0x02 ST r16, [p7]; MOV p7, p0 +.delay_slot + 6996 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x32 0x60 0x70 0xf6 NOPA; NOPB; NOPS; MOV p0, p2 +.return_address + 7008 0x1a 0x67 0x85 0x98 ADD.NC p2, r15, #11 + 7012 0x4f 0xc1 0x50 0x00 0x01 0xf0 0xb2 0x22 0x10 0xba LDA.u8 r16, [p2], #7; MOVXM p1, #508996 + 7022 0x43 0xcf 0x50 0x00 0x01 0xf0 0x32 0x30 0x10 0xba LDA.u16 r19, [p2], #2; MOVXM p0, #509024 + 7032 0x02 0x06 0x3a 0x98 LDA.u16 r17, [p2] + 7036 0x00 0x00 NOPX + 7038 0x02 0x16 0x5a 0x98 LDA.u16 r18, [p2, #2] + 7042 0x00 0x00 NOPX + 7044 0x00 0x00 NOPX + 7046 0x20 0xc2 0x30 0x00 0xb6 0x60 0x70 0x02 ST r16, [p1]; MOV p1, p6 + 7054 0x14 0xe1 0x0f 0x98 MUL r16, r19, r16 + 7058 0x00 0x00 NOPX + 7060 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 + 7064 0x00 0x00 NOPX + 7066 0x14 0xa1 0x0f 0x98 MUL r16, r18, r16 + 7070 0x00 0x00 NOPX + 7072 0x00 0x2c 0xf0 0x00 0x20 0x06 0x11 0x80 0x00 0x00 0x37 0x60 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p0]; NOPX; MOV p0, p7; NOPV +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 + 7088 0xfd 0xbe 0x20 0x00 0x01 0xf3 0x32 0x24 0x10 0xba LDA r15, [sp, #-20]; MOVXM p6, #509000 + 7098 0xc0 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x26 0x10 0xba LDA r16, [p6]; MOVXM p2, #509004 + 7108 0x40 0xc6 0xd0 0x00 0x01 0xf3 0xb2 0x20 0x10 0xba LDA r17, [p2]; MOVXM p7, #508992 + 7118 0x07 0x06 0x56 0x98 LDA r18, [p7] + 7122 0x00 0x00 NOPX + 7124 0x00 0x00 NOPX + 7126 0x00 0x00 NOPX + 7128 0x00 0x00 NOPX + 7130 0x80 0x0e 0x18 0x40 0x01 0x84 JNZ r16, #7216 +.delay_slot + 7136 0x14 0x62 0x07 0x18 ADD r17, r17, #1 +.delay_slot + 7140 0x40 0xc6 0x39 0x44 0x0e 0x5c ST r17, [p2]; ADD r17, r18, #1 +.delay_slot + 7146 0x14 0x26 0x07 0x18 ADD r19, r16, #1 +.delay_slot + 7150 0x0e 0x06 0x71 0x98 ST r19, [p6] +.delay_slot + 7154 0x0f 0x06 0x31 0x98 ST r17, [p7] + 7158 0x1a 0x67 0x86 0x18 ADD.NC p2, r15, #12 + 7162 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4 + 7166 0x02 0xfe 0x16 0x98 LDA r16, [p2], #-4 + 7170 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 7174 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.noswbrkpt + 7176 0x02 0x46 0x16 0x98 LDA r16, [p2, #16] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7180 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7182 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7184 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7186 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7188 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7192 0x0a 0x06 0x11 0x98 ST r16, [p2] + 7196 0x17 0xe2 0xfd 0x18 MOVX r17, #-1 + 7200 0x00 0x00 NOPX + 7202 0x00 0x00 NOPX + 7204 0x00 0x00 NOPX + 7206 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x04 0x13 0x18 0x7a NOPA; NOPS; ACQ r16, r17 +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_336 +.no_stack_arguments + 7216 0x00 0x08 0xb8 0x00 0x01 0x04 JL #4464 +.delay_slot + 7222 0x00 0x07 0xc6 0xcc 0x00 0x44 MOVXM p3, #509440 +.delay_slot +.swstall delay_slot + 7228 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7230 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7232 0x00 0x00 NOPX +.delay_slot + 7234 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x53 0x3d 0x07 0x00 0x00 0x1c 0x2e NOPA; NOPS; MOV p2, r15; NOPV +.return_address + 7248 0xc0 0xc2 0xd0 0x00 0x01 0xf0 0xb2 0x22 0x10 0xba LDA r16, [p6]; MOVXM p1, #508996 + 7258 0x01 0x06 0x36 0x98 LDA r17, [p1] + 7262 0x07 0xf0 0x11 0x18 LDA r0, [sp, #-16] + 7266 0x00 0x00 NOPX + 7268 0x00 0x00 NOPX + 7270 0x00 0x00 NOPX + 7272 0x00 0x00 NOPX + 7274 0x00 0x00 NOPX + 7276 0x14 0x61 0x08 0x98 NE r16, r17, r16 + 7280 0x80 0x0e 0x60 0x40 0x01 0x84 JNZ r16, #7360 +.delay_slot + 7286 0x10 0x30 0x01 0x18 MOVX r24, #0 +.delay_slot +.swstall delay_slot + 7290 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7292 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7294 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7296 0x00 0x00 NOPX + 7298 0x04 0x00 0xa2 0xcf 0x14 0x24 MOVX r16, #1; ADD.NC p1, r15, #20 + 7304 0x01 0x06 0x36 0x98 LDA r17, [p1] + 7308 0x00 0x00 NOPX + 7310 0x00 0x00 NOPX + 7312 0x00 0x00 NOPX + 7314 0x00 0x00 NOPX + 7316 0x00 0x00 NOPX + 7318 0x00 0x00 NOPX + 7320 0x14 0x51 0x08 0x18 REL r17, r16 + 7324 0x3c 0xc6 0xdc 0x0e 0x23 0x0c LDA r17, [p1, #-8]; ST r24, [p6] + 7330 0x00 0x00 NOPX + 7332 0x00 0x00 NOPX + 7334 0x00 0x00 NOPX + 7336 0x00 0x00 NOPX + 7338 0x00 0x00 NOPX + 7340 0x00 0x00 NOPX + 7342 0x14 0x21 0x11 0x98 SUB r16, r16, r17 + 7346 0x00 0x2c 0xf3 0xcc 0x23 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; ST r16, [p1, #-8]; NOPM; NOPV +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_480 + 7360 0xe0 0xc2 0xd0 0x00 0x01 0xf3 0x32 0x30 0x10 0xba LDA r16, [p7]; MOVXM p6, #509024 + 7370 0x06 0x06 0x36 0x98 LDA r17, [p6] + 7374 0x07 0xf8 0x99 0x18 LDA p1, [sp, #-8] + 7378 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12] + 7382 0x00 0x00 NOPX + 7384 0x00 0x00 NOPX + 7386 0x00 0x00 NOPX + 7388 0x00 0x00 NOPX + 7390 0x14 0x61 0x08 0x98 NE r16, r17, r16 + 7394 0x80 0x0e 0x80 0x40 0x01 0x84 JNZ r16, #7424 +.delay_slot +.swstall delay_slot + 7400 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7402 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7404 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7406 0x00 0x00 NOPX +.delay_slot + 7408 0x1b 0xd0 0x20 0xf8 MOV r15, r0 + 7412 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x03 0x83 0x88 0xc1 0x36 NOPA; NOPB; ST r24, [p7]; NOPX +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_544 + 7424 0x07 0xff 0x19 0x18 LDA p6, [sp, #-4] + 7428 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 7432 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 7438 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7440 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7442 0x00 0x00 NOPX +.delay_slot + 7444 0x0f 0x84 0x8b 0x18 MOVS p7, p1 +.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + +.text_segment PM 7456 +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.function_start + 7456 0x05 0x00 0x00 0x21 0x01 0x64 RET lr; MOV r0, #64 +.delay_slot + 7462 0x18 0x50 0xc0 0xf8 MOV r1, p0 +.delay_slot + 7466 0x18 0x60 0x90 0x18 ADD.NC p0, r1, #32 +.delay_slot + 7470 0x08 0x04 0x11 0x98 ST r0, [p0] +.delay_slot + 7474 0x08 0x14 0x11 0x98 ST r0, [p0, #4] +.delay_slot +.swstall delay_slot + 7478 0x00 0x00 NOPX +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_end0 + +.text_segment PM 7488 +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.function_start + 7488 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 7492 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 7498 0x0f 0xf8 0x3d 0x98 ST lr, [sp, #-8] + 7502 0x0f 0xfd 0xf5 0x98 ST r15, [sp, #-4] + 7506 0x00 0x00 NOPX + 7508 0x00 0x00 NOPX + 7510 0x00 0x00 NOPX + 7512 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 7516 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 7520 0x00 0x00 NOPX + 7522 0x00 0x00 NOPX + 7524 0x00 0x00 NOPX + 7526 0x00 0x00 NOPX + 7528 0x00 0x00 NOPX + 7530 0x00 0x00 NOPX + 7532 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 7536 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 7540 0x00 0x00 NOPX + 7542 0x00 0x00 NOPX + 7544 0x00 0x00 NOPX + 7546 0x00 0x00 NOPX + 7548 0x00 0x00 NOPX + 7550 0x00 0x00 NOPX + 7552 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 7556 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4] + 7560 0x00 0x00 NOPX + 7562 0x00 0x00 NOPX +.no_stack_arguments + 7564 0x00 0x0e 0x90 0x00 0x01 0x04 JL #7456 +.delay_slot +.swstall delay_slot + 7570 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7572 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7574 0x00 0x00 NOPX +.delay_slot + 7576 0x08 0xdc 0x29 0x98 ST el0, [p0], #-12 +.delay_slot + 7580 0x1b 0xd0 0xc0 0xf8 MOV r15, p0 +.return_address + 7584 0xff 0x07 0x20 0x01 0x00 0x68 0x33 0xc4 0x08 0xba LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 + 7594 0x01 0xe2 0x80 0x01 0x80 0x08 0x07 0xfd 0x58 0xba MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 + 7604 0xff 0xbe 0x20 0x0a 0x11 0x80 0x07 0xa0 0x01 0x7a LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 + 7614 0x00 0x06 0x4a 0x98 LDA.u8 r18, [p0] + 7618 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7620 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7622 0x00 0x02 0x17 0x18 ST.s16 r16, [p0, dj0] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7626 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7630 0x10 0x22 0x05 0x18 MOVX r17, #1 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7634 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7640 0x14 0x77 0x27 0x98 EQ r27, r17, r18 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7644 0x14 0x21 0x82 0x18 SEL.EQZ r16, r16, r24, r27 +.delay_slot +.swstall delay_slot + 7648 0x00 0x00 NOPX +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + +.text_segment PM 7664 +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.function_start + 7664 0x02 0x80 0x80 0x00 0x10 0xc8 0x08 0x60 0x78 0xba MOVA m0, #20; MOVX r1, #6; MOV r0, p0 + 7674 0x00 0x00 0xa0 0xc0 0x0c 0x24 MOVX r0, #1; ADD.NC p0, r0, #12 + 7680 0x00 0x08 0x4a 0x98 LDA.u8 r2, [p0], m0 + 7684 0x00 0x00 NOPX + 7686 0x00 0x00 NOPX + 7688 0x00 0x00 NOPX + 7690 0x00 0x00 NOPX + 7692 0x00 0x00 NOPX + 7694 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 7698 0x10 0x80 0x08 0x98 NE r0, r2, r0 +.delay_slot + 7702 0x10 0x00 0x1d 0x98 LSHL r0, r0, r1 +.delay_slot + 7706 0x02 0x82 0x31 0x0d 0xe0 0x5c ST r0, [p0, #4]; NEZ r3, r2 +.delay_slot + 7712 0x10 0xc4 0x1d 0x98 LSHL r2, r3, r1 +.delay_slot + 7716 0x08 0x04 0x51 0x98 ST r2, [p0] +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_end0 + +.text_segment PM 7728 +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.function_start + 7728 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 7734 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] +.no_stack_arguments + 7738 0x00 0x0e 0xa0 0x00 0x01 0x04 JL #7488 +.delay_slot + 7744 0x0f 0xfb 0x9d 0x98 ST p7, [sp, #-8] +.delay_slot + 7748 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.delay_slot +.swstall delay_slot + 7752 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7754 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7756 0x00 0x01 0x67 0x98 NOPA +.return_address +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7760 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7764 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] +.tail_call +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7768 0x00 0x0e 0xf8 0x00 0x00 0x84 J #7664 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7774 0x18 0x6e 0xc0 0xf8 MOV p0, p7 +.delay_slot + 7778 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 7784 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7786 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7788 0x00 0x00 NOPX +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + +.text_segment PM 7792 +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.function_start + 7792 0x67 0x82 0xd0 0x00 0x51 0x54 LDA r0, [p3], #12; MOV m0, #20 + 7798 0x61 0x05 0x58 0xcd 0x81 0xd4 LDA.u8 r1, [p3], m0; MOV p4, p3 + 7804 0x00 0x00 NOPX + 7806 0x00 0x00 NOPX + 7808 0x00 0x00 NOPX + 7810 0x00 0x00 NOPX + 7812 0x00 0x00 NOPX + 7814 0x00 0x00 NOPX + 7816 0x08 0x0f 0x60 0x40 0x01 0x84 JNZ r1, #7872 +.delay_slot + 7822 0x17 0xc4 0xe9 0x18 MOVX r2, #-6 +.delay_slot + 7826 0x10 0x00 0x2d 0x98 LSHL r0, r0, r2 +.delay_slot +.swstall delay_slot + 7830 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7832 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7834 0x00 0x00 NOPX + 7836 0x00 0x04 0x32 0x98 LDA.s16 r1, [p0] + 7840 0x00 0x00 NOPX + 7842 0x00 0x00 NOPX + 7844 0x00 0x00 NOPX + 7846 0x00 0x0f 0x70 0x00 0x00 0x84 J #7904 +.delay_slot +.swstall delay_slot + 7852 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7854 0x00 0x00 NOPX +.delay_slot + 7856 0x18 0x05 0x72 0xf8 VBCST.16 x0, r1 +.delay_slot +.swstall delay_slot + 7860 0x00 0x00 NOPX +.delay_slot + 7862 0x00 0x2c 0xf0 0x04 0x13 0x00 0x00 0x00 0x00 0x7a NOPA; VST x0, [p0]; NOPX +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_80 + 7872 0x01 0x04 0x32 0x98 LDA.s16 r1, [p1] + 7876 0x00 0x00 NOPX + 7878 0x00 0x00 NOPX + 7880 0x00 0x00 NOPX + 7882 0x00 0x00 NOPX + 7884 0x00 0x00 NOPX + 7886 0x00 0x00 NOPX + 7888 0x18 0x05 0x72 0xf8 VBCST.16 x0, r1 + 7892 0x00 0x00 NOPX + 7894 0x00 0x2c 0xf1 0x04 0x13 0x00 0x00 0x00 0x00 0x7a NOPA; VST x0, [p1]; NOPX +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_112 + 7904 0x8a 0x80 0xd0 0x00 0x07 0x8a 0xb8 0x3f 0x48 0xba LDA m0, [p4, #20]; MOVX r0, #60; ADD.NC lc, r0, #-3 + 7914 0x62 0x90 0xd0 0x00 0x00 0x04 0x7f 0xa8 0x10 0xba LDA m1, [p3, #4]; MOVXM ls, #8016 + 7924 0x00 0x00 0x16 0xfe 0xe0 0x44 MOVXM le, #8048 + 7930 0x00 0x07 0xc8 0xc8 0xd0 0x44 MOVXM p4, #509032 + 7936 0x04 0x04 0x22 0x98 LDA.s8 r1, [p4] + 7940 0x00 0x00 NOPX + 7942 0x00 0x00 NOPX + 7944 0x00 0x08 0xab 0x98 VLDA.CONV.fp32.bf16 cml1, [p0], m0 + 7948 0x01 0x29 0x2b 0x98 VLDA.CONV.fp32.bf16 cml2, [p1], m1 + 7952 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7956 0x01 0x2a 0x2b 0x98 VLDA.CONV.fp32.bf16 cml4, [p1], m1 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7960 0x01 0x15 0x70 0xf5 0x00 0x2c VLDA.CONV.fp32.bf16 cml1, [p0], m0; MOVX crRnd, r1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7966 0x25 0x25 0x70 0x04 0x03 0x28 0x3d 0x62 VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7974 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7978 0x25 0x45 0x70 0x04 0x04 0x10 0x3d 0x62 VLDA.CONV.fp32.bf16 cml4, [p1], m1; VADD.f dm4, dm0, dm4, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7986 0x00 0x08 0xab 0x98 VLDA.CONV.fp32.bf16 cml1, [p0], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7990 0x25 0x25 0x70 0x04 0x03 0x28 0x3d 0x62 VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7998 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8002 0x25 0x45 0x74 0x3b 0x46 0x00 0x00 0x40 0x1a 0x57 0x04 0x10 0x3d 0x6e VLDA.CONV.fp32.bf16 cml4, [p1], m1; VST.CONV.bf16.fp32 cml3, [p2], #64; NOPM; VADD.f dm4, dm0, dm4, r0 +.label ZLS_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_224 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8016 0x00 0x08 0xab 0x98 VLDA.CONV.fp32.bf16 cml1, [p0], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8020 0x25 0x25 0x70 0x00 0x21 0x0f 0x11 0x8e 0x03 0x28 0x3d 0x66 VLDA.CONV.fp32.bf16 cml2, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8032 0x01 0x05 0x70 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLE_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_256 +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8048 0x25 0x45 0x70 0x00 0x22 0x1d 0xa3 0x00 0x00 0x00 0x01 0xa5 0x78 0x20 0x81 0xeb VLDA.CONV.fp32.bf16 cml4, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml3, [p2], #64;NOPX; NOPM; VADD.f dm4, dm0, dm4, r0 +.loop_nesting 0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8064 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8066 0x43 0xc4 0x60 0x02 0x03 0x28 0x3d 0x62 VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8074 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8076 0x43 0xb4 0x60 0x02 0x04 0x10 0x3d 0x62 VST.CONV.bf16.fp32 cml3, [p2], #64; VADD.f dm4, dm0, dm4, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8084 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8086 0x43 0xc4 0x60 0x50 0x00 0x5c VST.CONV.bf16.fp32 cml4, [p2], #64;RET lr +.delay_slot +.swstall delay_slot + 8092 0x00 0x00 NOPX +.delay_slot + 8094 0x0a 0x1d 0xa3 0x18 VST.CONV.bf16.fp32 cml3, [p2], #64 +.delay_slot +.swstall delay_slot + 8098 0x00 0x00 NOPX +.delay_slot + 8100 0x0a 0x1e 0x23 0x18 VST.CONV.bf16.fp32 cml4, [p2], #64 +.delay_slot +.swstall delay_slot + 8104 0x00 0x00 NOPX +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 + +.text_segment PM 8112 +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.function_start + 8112 0x00 0x10 0x00 0x00 0x01 0xc4 PADDXM [sp], #128 + 8118 0xff 0x87 0xb0 0x02 0x0a 0x60 0x70 0x02 ST lr, [sp, #-4]; MOV r16, p2 + 8126 0x50 0x91 0x60 0x01 0xb4 0x03 0x00 0x02 MOVS p2, p1; ADD.NC p3, r16, #12 + 8134 0x65 0xed 0x58 0x21 0x81 0xd4 LDA.u8 r27, [p3], #2; MOV r16, p0 + 8140 0x73 0xca 0x58 0xab 0xc1 0xd4 LDA.s16 r18, [p3], #-14; MOV r17, sp + 8146 0x18 0x68 0xc0 0x18 ADD.NC p0, r17, #-128 + 8150 0x08 0x07 0x2b 0x18 VST sfh, [p0] + 8154 0x00 0x06 0x57 0x18 ST.s16 r18, [p0] + 8158 0x00 0x00 NOPX + 8160 0x00 0x00 NOPX +.no_stack_arguments + 8162 0x00 0x0f 0x38 0x00 0x01 0x04 JL #7792 +.delay_slot + 8168 0x1c 0x50 0xc0 0xf8 MOV r17, p0 +.delay_slot +.swstall delay_slot + 8172 0x00 0x00 NOPX +.delay_slot + 8174 0x14 0x25 0x12 0x18 SEL.EQZ r18, r16, r17, r27 +.delay_slot + 8178 0x8c 0x20 0x42 0xd2 0x41 0xe4 SEL.EQZ r16, r17, r16, r27; MOV p1, r18 +.delay_slot + 8184 0x00 0x2b 0x60 0x00 0x34 0x10 0x70 0x02 NOPS; MOV p0, r16 +.return_address + 8192 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] + 8196 0x00 0x00 NOPX + 8198 0x00 0x00 NOPX + 8200 0x00 0x00 NOPX + 8202 0x00 0x00 NOPX + 8204 0x00 0x00 NOPX + 8206 0x00 0x00 NOPX + 8208 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 8212 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128 +.delay_slot +.swstall delay_slot + 8218 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8220 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8222 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8224 0x00 0x00 NOPX +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_end0 + +.text_segment PM 8240 +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function_start + 8240 0x00 0x07 0xc6 0xc8 0x80 0x44 MOVXM p3, #508992 + 8246 0x60 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p3]; MOV r17, CORE_ID + 8252 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 8258 0xff 0x63 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p6, [sp, #-8]; MOV r0, r15 + 8266 0xff 0x82 0xb0 0x00 0x01 0xf3 0x32 0x28 0x11 0x3a ST r0, [sp, #-4]; MOVXM p6, #509008 + 8276 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 + 8280 0x00 0x00 NOPX + 8282 0x00 0x00 NOPX + 8284 0x80 0x10 0x80 0x40 0x01 0x84 JNZ r16, #8448 +.delay_slot + 8290 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17 +.delay_slot + 8294 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 8298 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12] +.delay_slot + 8302 0xc0 0xc6 0x30 0x03 0x30 0x60 0x70 0x02 ST r17, [p6]; MOV p6, p0 +.delay_slot + 8310 0x00 0x07 0xc0 0xc9 0x80 0x44 MOVXM p0, #509120 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8316 0x00 0x07 0xc4 0xc8 0xd0 0x44 MOVXM p2, #509032 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8322 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x32 0x32 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #509028 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8332 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 8334 0x00 0x0f 0x18 0x00 0x01 0x04 JL #7728 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8340 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8342 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8344 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 8348 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 8352 0x00 0x2c 0xf0 0x00 0x22 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV +.return_address + 8368 0x00 0x07 0xc4 0xc8 0xa0 0x44 MOVXM p2, #509008 + 8374 0x40 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x60 0x10 0xba LDA r16, [p2]; MOVXM p2, #509120 + 8384 0x40 0xc6 0xd0 0x00 0x01 0xf1 0x32 0x60 0x10 0xba LDA r17, [p2]; MOVXM p2, #509120 + 8394 0x4a 0xcb 0x50 0x00 0x01 0xf0 0xb2 0x2a 0x10 0xba LDA.u16 r18, [p2, #10]; MOVXM p1, #509012 + 8404 0x00 0x00 NOPX + 8406 0x00 0x00 NOPX + 8408 0x00 0x10 0x88 0x00 0x00 0x84 J #8464 +.delay_slot + 8414 0x00 0x07 0xc0 0xc8 0xc0 0x44 MOVXM p0, #509024 +.delay_slot +.swstall delay_slot + 8420 0x00 0x00 NOPX +.delay_slot + 8422 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 8426 0x00 0x2c 0xf0 0x0c 0xa3 0x0c NOPA; ST r18, [p0] +.delay_slot + 8432 0x00 0x2c 0xf0 0x00 0x21 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 + 8448 0x00 0x2c 0xf0 0x00 0x22 0x80 0x8b 0x00 0x01 0xf0 0xb2 0x2a 0x10 0x00 0x00 0xe1 NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 + 8464 0x73 0x91 0x60 0x03 0xb3 0xc3 0x00 0x02 MOVS p3, p7; ADD.NC p7, r15, #12 + 8472 0xff 0xee 0xd0 0x00 0x01 0xf0 0x32 0x20 0x10 0xba LDA r27, [p7], #-4; MOVXM p0, #508992 + 8482 0x07 0xfe 0x16 0x98 LDA r16, [p7], #-4 + 8486 0x07 0xfe 0x36 0x98 LDA r17, [p7], #-4 + 8490 0x07 0x46 0x56 0x98 LDA r18, [p7, #16] + 8494 0x00 0x00 NOPX + 8496 0x00 0x00 NOPX + 8498 0x00 0x00 NOPX + 8500 0x00 0x00 NOPX + 8502 0x00 0x00 NOPX + 8504 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 8508 0x0f 0x06 0x11 0x98 ST r16, [p7] + 8512 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 8516 0x00 0x00 NOPX + 8518 0x00 0x00 NOPX + 8520 0x00 0x00 NOPX + 8522 0x14 0x93 0x08 0x18 ACQ r18, r16 + 8526 0x04 0x00 0xa7 0xad 0x81 0xe4 MOVX r16, #1; MOV r15, p3 + 8532 0x00 0x00 NOPX + 8534 0x00 0x00 NOPX + 8536 0x00 0x06 0x36 0x98 LDA r17, [p0] + 8540 0xc0 0xca 0xdc 0xdd 0x81 0xd4 LDA r18, [p6]; MOV p6, p7 + 8546 0x01 0x06 0x76 0x98 LDA r19, [p1] + 8550 0x07 0x5c 0x9e 0x98 LDA p1, [p7], #20 + 8554 0x00 0x00 NOPX +.no_stack_arguments + 8556 0x00 0x0f 0xd8 0x00 0x01 0x04 JL #8112 +.delay_slot +.swstall delay_slot + 8562 0x00 0x00 NOPX +.delay_slot + 8564 0x14 0x62 0x07 0x18 ADD r17, r17, #1 +.delay_slot + 8568 0x08 0x06 0x31 0x98 ST r17, [p0] +.delay_slot + 8572 0x14 0xe1 0x0d 0x98 LSHL r16, r19, r16 +.delay_slot + 8576 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xa0 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV +.return_address + 8592 0xca 0xc6 0xd0 0x00 0x01 0xf3 0x32 0x20 0x10 0xba LDA r17, [p6, #20]; MOVXM p6, #508992 + 8602 0x10 0x20 0x05 0x18 MOVX r16, #1 + 8606 0x00 0x00 NOPX + 8608 0x00 0x00 NOPX + 8610 0x00 0x00 NOPX + 8612 0x00 0x00 NOPX + 8614 0x00 0x00 NOPX + 8616 0x14 0x51 0x08 0x18 REL r17, r16 + 8620 0xfc 0xce 0xd0 0x00 0x01 0xf1 0x32 0x30 0x10 0xba LDA r19, [p7, #-8]; MOVXM p2, #509024 + 8630 0x06 0x06 0x36 0x98 LDA r17, [p6] + 8634 0x02 0x06 0x56 0x98 LDA r18, [p2] + 8638 0x00 0x00 NOPX + 8640 0x00 0x00 NOPX + 8642 0x00 0x00 NOPX + 8644 0x00 0x00 NOPX + 8646 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 8650 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 8654 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 8658 0x80 0x10 0xf8 0x40 0x01 0x84 JNZ r16, #8688 +.delay_slot +.swstall delay_slot + 8664 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8666 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8668 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8670 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8672 0x00 0x00 NOPX + 8674 0x10 0x20 0x01 0x18 MOVX r16, #0 + 8678 0x00 0x2c 0xf6 0x06 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r16, [p6]; NOPX +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 + 8688 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12] + 8692 0x07 0xfb 0x19 0x18 LDA p6, [sp, #-8] + 8696 0x00 0x00 NOPX + 8698 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 8700 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.noswbrkpt + 8702 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 8706 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 8708 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8712 0x1f 0x67 0xa0 0xf8 MOV p7, r15 +.delay_slot + 8716 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 8722 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8724 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8726 0x00 0x00 NOPX +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + +.text_segment PM 8736 +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.function_start + 8736 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 8740 0x00 0x00 NOPX + 8742 0x00 0x00 NOPX + 8744 0x00 0x00 NOPX + 8746 0x00 0x00 NOPX + 8748 0x00 0x00 NOPX + 8750 0x00 0x00 NOPX + 8752 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 8756 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 8760 0x00 0x00 NOPX + 8762 0x00 0x00 NOPX + 8764 0x00 0x00 NOPX + 8766 0x00 0x00 NOPX + 8768 0x00 0x00 NOPX + 8770 0x00 0x00 NOPX + 8772 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 8776 0x01 0x6c 0x2e 0x98 LDA el0, [p1], #24 + 8780 0x01 0x04 0x12 0x98 LDA.s16 r0, [p1] + 8784 0x00 0x00 NOPX + 8786 0x00 0x00 NOPX + 8788 0x00 0x00 NOPX + 8790 0x00 0x00 NOPX + 8792 0x00 0x00 NOPX + 8794 0x08 0x6c 0x29 0x98 ST el0, [p0], #24 + 8798 0x00 0x04 0x17 0x18 ST.s16 r0, [p0] + 8802 0x00 0x00 NOPX + 8804 0x00 0x00 NOPX + 8806 0x00 0x00 NOPX + 8808 0x00 0x00 NOPX + 8810 0x00 0x00 NOPX + 8812 0x00 0x00 NOPX + 8814 0x01 0x24 0x12 0x98 LDA.s16 r0, [p1, #4] + 8818 0x00 0x14 0x17 0x18 ST.s16 r0, [p0, #2] + 8822 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot + 8826 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8828 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8830 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8832 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8834 0x00 0x00 NOPX +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv__end +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_end0 + +.text_segment PM 8848 +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.function_start + 8848 0xfb 0xc2 0x80 0x3a 0x68 0x00 0x00 0x08 0x79 0x88 0x10 0xb6 MOVA dj0, #-34; VLDB x4, [p0], #64; MOVXM ls, #8976 + 8860 0xff 0x51 0x00 0x39 0x68 0x00 0x00 0x09 0xb9 0xa0 0x10 0xb6 MOVA r17, #-6; VLDB x2, [p0], #64; MOVXM le, #9024 + 8872 0x18 0x14 0xc0 0xf8 MOV r0, p2 + 8876 0x1a 0x60 0x10 0x18 ADD.NC p2, r0, #32 + 8880 0x02 0x1c 0x52 0x98 LDA.s16 r2, [p2], #2 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8884 0x02 0x00 0x16 0x98 LDA r0, [p2, dj0] +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8888 0x40 0x86 0x50 0x3a 0x68 0x3c LDA.s16 r1, [p2]; VLDB x4, [p0], #64 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8894 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8896 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8898 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8900 0x38 0x1c 0xb4 0x18 VLDB x2, [p0], #64 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8904 0x18 0x09 0x72 0xf8 VBCST.16 x0, r2 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8908 0x00 0x3a 0x68 0x01 0x18 0xed 0x50 0x36 0x78 0x3a VLDB x4, [p0], #64; LSHL r17, r0, r17; VMAX_LT.bf16 x5, r16, x4, x0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8918 0x1d 0x78 0xfe 0x98 ADD.NC lc, r17, #-3 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8922 0x18 0x85 0x72 0xf8 VBCST.16 x1, r1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8926 0x19 0xa8 0xac 0xf8 VMIN_GE.bf16 x3, r16, x5, x1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8930 0x00 0x2c 0xf0 0x39 0x68 0x00 0x00 0x31 0x06 0xcf 0x00 0x2b 0x60 0x7e NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8944 0x00 0x2c 0xf0 0x00 0x21 0x1c 0xd3 0x00 0x00 0x01 0xd8 0x56 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8960 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x01 0x50 0x36 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV +.label ZLS_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_128 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8976 0x00 0x2c 0xf0 0x3a 0x69 0x1d 0xd3 0x00 0x00 0x00 0xd4 0x56 0x78 0x00 0x00 0xe1 NOPA; VLDB x4, [p0], #64; VST x7, [p1], #64; NOPX; VMIN_GE.bf16 x3, r16, x5, x1; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8992 0x00 0x2c 0xf0 0x39 0x68 0x01 0x5b 0x00 0x00 0x01 0x88 0x36 0x78 0x00 0x00 0xe1 NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9008 0x00 0x2c 0xf0 0x00 0x21 0x1c 0xd3 0x00 0x00 0x01 0xd8 0x56 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV +.label ZLE_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_176 +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9024 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x01 0x50 0x36 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV +.loop_nesting 0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9040 0x23 0xba 0x60 0x00 0xd4 0x56 0x70 0x02 VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9048 0x1b 0x10 0x6c 0xf8 VMAX_LT.bf16 x6, r16, x2, x0 + 9052 0x23 0x9a 0x60 0x01 0xd8 0x56 0x70 0x02 VST x3, [p1], #64; VMIN_GE.bf16 x7, r16, x6, x1 + 9060 0x05 0x00 0x05 0x40 0xd9 0xe4 RET lr; VMAX_LT.bf16 x5, r16, x4, x0 +.delay_slot + 9066 0x23 0xba 0x60 0x00 0xd4 0x56 0x70 0x02 VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1 +.delay_slot + 9074 0x1b 0x10 0x6c 0xf8 VMAX_LT.bf16 x6, r16, x2, x0 +.delay_slot + 9078 0x1b 0xb0 0xac 0xf8 VMIN_GE.bf16 x7, r16, x6, x1 +.delay_slot + 9082 0x09 0x1c 0xd3 0x18 VST x3, [p1], #64 +.delay_slot + 9086 0x09 0x1d 0xd3 0x18 VST x7, [p1], #64 +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E__end +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_end0 + +.text_segment PM 9104 +.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function_start + 9104 0x00 0x07 0xc6 0xc8 0x80 0x44 MOVXM p3, #508992 + 9110 0x60 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p3]; MOV r17, CORE_ID + 9116 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 9122 0xff 0x63 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p6, [sp, #-8]; MOV r0, r15 + 9130 0xff 0x82 0xb0 0x00 0x01 0xf3 0x32 0x28 0x11 0x3a ST r0, [sp, #-4]; MOVXM p6, #509008 + 9140 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 + 9144 0x00 0x00 NOPX + 9146 0x00 0x00 NOPX + 9148 0x80 0x12 0x30 0x40 0x01 0x84 JNZ r16, #9312 +.delay_slot + 9154 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17 +.delay_slot + 9158 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 9162 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12] +.delay_slot + 9166 0xc0 0xc6 0x30 0x03 0x30 0x60 0x70 0x02 ST r17, [p6]; MOV p6, p0 +.delay_slot + 9174 0x00 0x07 0xc0 0xcb 0x80 0x44 MOVXM p0, #509376 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9180 0x00 0x07 0xc4 0xc8 0xd0 0x44 MOVXM p2, #509032 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9186 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x32 0x32 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #509028 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9196 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 9198 0x00 0x11 0x10 0x00 0x01 0x04 JL #8736 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9204 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9206 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9208 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 9212 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 9216 0x00 0x2c 0xf0 0x00 0x22 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV +.return_address + 9232 0x00 0x07 0xc4 0xc8 0xa0 0x44 MOVXM p2, #509008 + 9238 0x40 0xc2 0xd0 0x00 0x01 0xf1 0x32 0xe0 0x10 0xba LDA r16, [p2]; MOVXM p2, #509376 + 9248 0x40 0xc6 0xd0 0x00 0x01 0xf1 0x32 0xe0 0x10 0xba LDA r17, [p2]; MOVXM p2, #509376 + 9258 0x48 0xcb 0x50 0x00 0x01 0xf0 0xb2 0x2a 0x10 0xba LDA.u16 r18, [p2, #8]; MOVXM p1, #509012 + 9268 0x00 0x00 NOPX + 9270 0x00 0x00 NOPX + 9272 0x00 0x12 0x38 0x00 0x00 0x84 J #9328 +.delay_slot + 9278 0x00 0x07 0xc0 0xc8 0xc0 0x44 MOVXM p0, #509024 +.delay_slot +.swstall delay_slot + 9284 0x00 0x00 NOPX +.delay_slot + 9286 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 9290 0x00 0x2c 0xf0 0x0c 0xa3 0x0c NOPA; ST r18, [p0] +.delay_slot + 9296 0x00 0x2c 0xf0 0x00 0x21 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV +.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 + 9312 0x00 0x2c 0xf0 0x00 0x22 0x80 0x8b 0x00 0x01 0xf0 0xb2 0x2a 0x10 0x00 0x00 0xe1 NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV +.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 + 9328 0x73 0x91 0x60 0x03 0xb3 0xc3 0x00 0x02 MOVS p3, p7; ADD.NC p7, r15, #12 + 9336 0xff 0xee 0xd0 0x00 0x01 0xf0 0x32 0x20 0x10 0xba LDA r27, [p7], #-4; MOVXM p0, #508992 + 9346 0x07 0xfe 0x16 0x98 LDA r16, [p7], #-4 + 9350 0x07 0xfe 0x36 0x98 LDA r17, [p7], #-4 + 9354 0x07 0x46 0x56 0x98 LDA r18, [p7, #16] + 9358 0x00 0x00 NOPX + 9360 0x00 0x00 NOPX + 9362 0x00 0x00 NOPX + 9364 0x00 0x00 NOPX + 9366 0x00 0x00 NOPX + 9368 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 9372 0x0f 0x06 0x11 0x98 ST r16, [p7] + 9376 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 9380 0x00 0x00 NOPX + 9382 0x00 0x00 NOPX + 9384 0x00 0x00 NOPX + 9386 0x14 0x93 0x08 0x18 ACQ r18, r16 + 9390 0x04 0x00 0xa7 0xad 0x81 0xe4 MOVX r16, #1; MOV r15, p3 + 9396 0x00 0x00 NOPX + 9398 0x00 0x00 NOPX + 9400 0x00 0x06 0x36 0x98 LDA r17, [p0] + 9404 0xc0 0xca 0xdc 0xdd 0x81 0xd4 LDA r18, [p6]; MOV p6, p7 + 9410 0x01 0x06 0x76 0x98 LDA r19, [p1] + 9414 0x07 0x5c 0x9e 0x98 LDA p1, [p7], #20 + 9418 0x00 0x00 NOPX +.no_stack_arguments + 9420 0x00 0x11 0x48 0x00 0x01 0x04 JL #8848 +.delay_slot +.swstall delay_slot + 9426 0x00 0x00 NOPX +.delay_slot + 9428 0x14 0x62 0x07 0x18 ADD r17, r17, #1 +.delay_slot + 9432 0x08 0x06 0x31 0x98 ST r17, [p0] +.delay_slot + 9436 0x14 0xe1 0x0d 0x98 LSHL r16, r19, r16 +.delay_slot + 9440 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xa0 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV +.return_address + 9456 0xca 0xc6 0xd0 0x00 0x01 0xf3 0x32 0x20 0x10 0xba LDA r17, [p6, #20]; MOVXM p6, #508992 + 9466 0x10 0x20 0x05 0x18 MOVX r16, #1 + 9470 0x00 0x00 NOPX + 9472 0x00 0x00 NOPX + 9474 0x00 0x00 NOPX + 9476 0x00 0x00 NOPX + 9478 0x00 0x00 NOPX + 9480 0x14 0x51 0x08 0x18 REL r17, r16 + 9484 0xfc 0xce 0xd0 0x00 0x01 0xf1 0x32 0x30 0x10 0xba LDA r19, [p7, #-8]; MOVXM p2, #509024 + 9494 0x06 0x06 0x36 0x98 LDA r17, [p6] + 9498 0x02 0x06 0x56 0x98 LDA r18, [p2] + 9502 0x00 0x00 NOPX + 9504 0x00 0x00 NOPX + 9506 0x00 0x00 NOPX + 9508 0x00 0x00 NOPX + 9510 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 9514 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 9518 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 9522 0x80 0x12 0xa8 0x40 0x01 0x84 JNZ r16, #9552 +.delay_slot +.swstall delay_slot + 9528 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9530 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9532 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9534 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9536 0x00 0x00 NOPX + 9538 0x10 0x20 0x01 0x18 MOVX r16, #0 + 9542 0x00 0x2c 0xf6 0x06 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r16, [p6]; NOPX +.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 + 9552 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12] + 9556 0x07 0xfb 0x19 0x18 LDA p6, [sp, #-8] + 9560 0x00 0x00 NOPX + 9562 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 9564 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.noswbrkpt + 9566 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9570 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9572 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9576 0x1f 0x67 0xa0 0xf8 MOV p7, r15 +.delay_slot + 9580 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 9586 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9588 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9590 0x00 0x00 NOPX +.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + +.text_segment PM 9600 +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv +.function_start + 9600 0x23 0x85 0xd0 0x08 0x20 0x0b 0x08 0x00 0x58 0xba LDA el0, [p1], #4; MOVX r2, #256; MOV r24, #0 + 9610 0x17 0x80 0x01 0x18 MOVX r0, #-128 + 9614 0x00 0x00 NOPX + 9616 0x00 0x00 NOPX + 9618 0x00 0x00 NOPX + 9620 0x00 0x00 NOPX + 9622 0x00 0x00 NOPX + 9624 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 9628 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 9632 0x00 0x00 NOPX + 9634 0x00 0x00 NOPX + 9636 0x00 0x00 NOPX + 9638 0x00 0x00 NOPX + 9640 0x00 0x00 NOPX + 9642 0x00 0x00 NOPX + 9644 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 9648 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 9652 0x00 0x00 NOPX + 9654 0x00 0x00 NOPX + 9656 0x00 0x00 NOPX + 9658 0x00 0x00 NOPX + 9660 0x00 0x00 NOPX + 9662 0x00 0x00 NOPX + 9664 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 9668 0x01 0x14 0x76 0x98 LDA r3, [p1, #4] + 9672 0x00 0x00 NOPX + 9674 0x00 0x00 NOPX + 9676 0x00 0x00 NOPX + 9678 0x00 0x00 NOPX + 9680 0x00 0x00 NOPX + 9682 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9684 0x08 0x4c 0x71 0x98 ST r3, [p0], #16 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9688 0x00 0x04 0x17 0x18 ST.s16 r0, [p0] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9692 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9696 0x00 0x00 0xf0 0xbe 0x00 0x44 MOVXM r1, #65280 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9702 0x10 0xc2 0x14 0x98 AND r1, r3, r1 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9706 0x10 0x76 0x27 0x98 EQ r27, r1, r2 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9710 0x10 0x01 0x82 0x18 SEL.EQZ r0, r0, r24, r27 +.delay_slot +.swstall delay_slot + 9714 0x00 0x00 NOPX +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_end0 + +.text_segment PM 9728 +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv +.function_start + 9728 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 9734 0x0f 0xf8 0x3d 0x98 ST lr, [sp, #-8] +.no_stack_arguments + 9738 0x00 0x12 0xc0 0x00 0x01 0x04 JL #9600 +.delay_slot + 9744 0x0f 0xff 0x9d 0x98 ST p7, [sp, #-4] +.delay_slot + 9748 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.delay_slot +.swstall delay_slot + 9752 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9754 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9756 0x00 0x01 0x67 0x98 NOPA +.return_address + 9760 0x07 0xf8 0x39 0x18 LDA lr, [sp, #-8] + 9764 0x00 0x00 NOPX + 9766 0x00 0x00 NOPX + 9768 0x00 0x00 NOPX + 9770 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9772 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9774 0x07 0xff 0x99 0x18 LDA p7, [sp, #-4] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9778 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9782 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9784 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9786 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9788 0x10 0x20 0x09 0x18 MOVX r16, #2 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9792 0xe8 0xc2 0x30 0x3f 0xfe 0x00 0x00 0x00 0x71 0x3a ST r16, [p7, #16]; PADDXM [sp], #-64 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + +.text_segment PM 9808 +.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE +.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_begin0 +.function_start + 9808 0x18 0x16 0xc0 0xf8 MOV r0, p3 + 9812 0x1b 0x60 0x07 0x18 ADD.NC p3, r0, #14 + 9816 0x03 0x1c 0x52 0x98 LDA.s16 r2, [p3], #2 + 9820 0x03 0x04 0x96 0x98 LDA r4, [p3] + 9824 0x00 0x00 NOPX + 9826 0x00 0x00 NOPX + 9828 0x00 0x00 NOPX + 9830 0x00 0x00 NOPX + 9832 0x10 0x06 0x09 0x18 MOVX r3, #2 + 9836 0x00 0x10 0x00 0x00 0x01 0xc4 PADDXM [sp], #128 + 9842 0x10 0xc6 0x4c 0x98 LTU r3, r3, r4 + 9846 0x00 0x01 0x00 0x06 0x04 0xe2 0x10 0x00 0x60 0xba MOVA r1, #0; JNZ r3, #10000 +.delay_slot + 9856 0x18 0x05 0x72 0xf8 VBCST.16 x0, r1 +.delay_slot + 9860 0x18 0x5e 0xc0 0xf8 MOV r1, p7 +.delay_slot + 9864 0x1f 0x65 0xe0 0xf8 MOV p7, sp +.delay_slot + 9868 0xff 0xf2 0x0a 0xdd 0x81 0xf4 PADDB [p7], #-64; MOV p5, p7 +.delay_slot + 9874 0x0f 0x04 0x13 0x18 VST x0, [p7] + 9878 0x01 0x82 0x84 0x80 0x0b 0x00 0x04 0xb9 0x72 0xba MOVA dj0, #12; MOVS p4, r0; VBCST.16 x0, r2 + 9888 0x80 0x01 0x54 0x01 0x01 0x54 LDA.u8 r0, [p4, dj0]; MOV m2, #64 + 9894 0x00 0x00 NOPX + 9896 0x00 0x00 NOPX + 9898 0x00 0x00 NOPX + 9900 0x00 0x00 NOPX + 9902 0x00 0x00 NOPX + 9904 0x00 0x00 NOPX + 9906 0x00 0x13 0x70 0x40 0x01 0x84 JNZ r0, #9952 +.delay_slot + 9912 0x18 0x00 0x00 0xb8 MOV m0, #0 +.delay_slot + 9916 0x00 0x07 0xc8 0xc8 0xd0 0x44 MOVXM p4, #509032 +.delay_slot +.swstall delay_slot + 9922 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9924 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9926 0x00 0x00 NOPX + 9928 0x00 0x04 0x80 0x00 0x04 0xde 0x00 0x00 0x20 0xba MOVA m1, #0; J #9968 +.delay_slot +.swstall delay_slot + 9938 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9940 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9942 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9944 0x00 0x00 NOPX +.delay_slot + 9946 0x00 0x2c 0xf0 0x08 0x26 0x0c NOPA; VST x0, [p0] +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_144 + 9952 0x19 0x00 0x80 0xb8 MOV m1, #64 + 9956 0x00 0x2c 0xf0 0x00 0x21 0x04 0x13 0x01 0x00 0x00 0x50 0xf6 NOPA; NOPB; VST x0, [p1]; MOV m2, #0 +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_160 + 9968 0x00 0x13 0xc8 0x00 0x00 0x84 J #10128 +.delay_slot + 9974 0x13 0x91 0x60 0x03 0xb0 0x60 0x70 0x02 MOVS p0, p7; MOV p7, p0 +.delay_slot +.swstall delay_slot + 9982 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9984 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9986 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9988 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_192 + 10000 0x10 0x04 0x0d 0x18 MOVX r2, #3 + 10004 0x10 0x84 0x47 0x98 EQ r2, r2, r4 + 10008 0x10 0x13 0xa0 0x40 0x01 0x84 JNZ r2, #10048 +.delay_slot + 10014 0x3f 0x80 0x00 0x20 0x00 0x44 MOVXM r0, #1065353216 +.delay_slot + 10020 0x00 0x07 0xc8 0xc8 0xd0 0x44 MOVXM p4, #509032 +.delay_slot +.swstall delay_slot + 10026 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10028 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10030 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 10032 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x2f 0xe0 0x00 0x08 0x00 0x10 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVXM r0, #-1082130432; NOPV +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_240 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10048 0x80 0x80 0x50 0x02 0xd2 0x00 0x47 0xbe 0x58 0xba LDA.s8 r0, [p4]; MOVX vaddSign0, #1; MOV dj0, #-66 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10058 0x18 0x00 0x80 0xb8 MOV m0, #64 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10062 0x19 0x00 0x00 0xb8 MOV m1, #0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10066 0x1a 0x00 0x80 0xb8 MOV m2, #64 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10070 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10072 0x18 0x00 0x11 0x78 VINSERT.32 x0, x0, #0, r0 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10076 0xa0 0x02 0xe2 0x01 0x25 0xd4 ST.s16 r0, [p5, dj0]; VMOV bmll1, x0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10082 0x10 0x3a 0x80 0x18 MOVX crRnd, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10086 0x08 0x40 0x96 0x18 VCONV.bf16.fp32 wl0, bmll1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10090 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10092 0x18 0x01 0x01 0xb8 VEXTRACT.16 r0, x0, #0, vaddSign0 + 10096 0x00 0x00 NOPX + 10098 0x00 0x00 NOPX + 10100 0x05 0x00 0x12 0x98 LDA.s16 r0, [p5, dj0] + 10104 0x00 0x00 NOPX + 10106 0x00 0x00 NOPX + 10108 0x00 0x00 NOPX + 10110 0x00 0x00 NOPX + 10112 0x00 0x00 NOPX + 10114 0x00 0x00 NOPX + 10116 0x18 0x01 0x72 0xf8 VBCST.16 x0, r0 + 10120 0x00 0x00 NOPX + 10122 0x00 0x2c 0xff 0xf8 0x66 0x0c NOPA; VST x0, [sp, #-64] +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_320 + 10128 0x78 0x8a 0xde 0x50 0xe8 0x00 0x00 0x08 0x7c 0x00 0x10 0xb6 LDA r2, [p3, #-16]; VLDB x1, [p7], m1; MOVXM ls, #10240 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 10140 0xff 0x63 0x02 0x90 0x68 0x00 0x00 0x09 0xbc 0x18 0x10 0xb6 MOVA r3, #-5; VLDB x0, [p1], m2; MOVXM le, #10288 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10152 0x01 0x05 0x7e 0x50 0xe8 0x00 0xf1 0x12 VLDA.CONV.fp32.bf16 cml0, [p0], m0;VLDB x1, [p7], m1; MOVX r0, #60 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10160 0x80 0x90 0x52 0x90 0x68 0x3c LDA.s8 r4, [p4]; VLDB x0, [p1], m2 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10166 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10168 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10172 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10174 0x10 0x84 0x3d 0x98 LSHL r2, r2, r3 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10178 0x05 0x71 0x7e 0x86 0x01 0x02 0x01 0x62 ADD.NC lc, r2, #-3; VMAC.f dm1, dm0, x1, x0, r0 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10186 0x29 0x03 0x7e 0x50 0xe8 0x3c VLDA x0, [p1], m2; VLDB x1, [p7], m1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10192 0x01 0x05 0x70 0x00 0x20 0x01 0x5b 0x09 0xd4 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; MOVX crRnd, r4; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10208 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10224 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x08 0x10 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 +.label ZLS_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_432 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10240 0x29 0x03 0x7e 0x50 0xe8 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA x0, [p1], m2; VLDB x1, [p7], m1; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10256 0x01 0x05 0x70 0x00 0x22 0x1c 0xa3 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10272 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLE_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_480 +.end_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10288 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x08 0x10 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 +.loop_nesting 0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10304 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10310 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 10314 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.noswbrkpt + 10316 0x01 0x02 0x01 0x48 VMAC.f dm1, dm0, x1, x0, r0 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 10320 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 10322 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10326 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.delay_slot + 10330 0x1f 0x60 0xa0 0xf8 MOV p7, r1 +.delay_slot +.swstall delay_slot + 10334 0x00 0x00 NOPX +.delay_slot + 10336 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.delay_slot +.swstall delay_slot + 10340 0x00 0x00 NOPX +.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE__end +.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_end0 + +.text_segment PM 10352 +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E +.function_start + 10352 0x01 0x82 0x83 0x88 0x8b 0x00 0x60 0xf0 0x72 0xba MOVA dj0, #12; MOVS p3, p2; MOV dc0, lr + 10362 0x40 0x01 0x54 0xc5 0x81 0xd4 LDA.u8 r0, [p2, dj0]; MOV p2, p1 + 10368 0x00 0x00 NOPX + 10370 0x00 0x00 NOPX + 10372 0x00 0x00 NOPX + 10374 0x00 0x00 NOPX + 10376 0x00 0x00 NOPX + 10378 0x00 0x00 NOPX + 10380 0x00 0x14 0x68 0x00 0x01 0x84 JZ r0, #10448 +.delay_slot + 10386 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 +.delay_slot + 10392 0x18 0x55 0xe0 0xf8 MOV r1, sp +.delay_slot + 10396 0x19 0x60 0xe0 0x18 ADD.NC p1, r1, #-64 +.delay_slot + 10400 0x09 0x07 0x2b 0x18 VST sfh, [p1] +.delay_slot +.swstall delay_slot + 10404 0x00 0x00 NOPX +.no_stack_arguments + 10406 0x00 0x13 0x28 0x00 0x01 0x04 JL #9808 +.delay_slot +.swstall delay_slot + 10412 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10414 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10416 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10418 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10420 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.return_address + 10432 0x00 0x14 0x78 0x00 0x00 0x84 J #10480 +.delay_slot +.swstall delay_slot + 10438 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10440 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10442 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10444 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10446 0x00 0x00 NOPX +.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_96 +.no_stack_arguments + 10448 0x00 0x13 0x28 0x00 0x01 0x04 JL #9808 +.delay_slot + 10454 0x10 0x91 0x60 0x00 0xb0 0x60 0x70 0x02 MOVS p0, p1; MOV p1, p0 +.delay_slot +.swstall delay_slot + 10462 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10464 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10466 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10468 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_128 +.return_address + 10480 0x1f 0x71 0x80 0xf8 MOV lr, dc0 + 10484 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 10488 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 10494 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10496 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10498 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10500 0x00 0x00 NOPX +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_end0 + +.text_segment PM 10512 +.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function_start + 10512 0x00 0x07 0xc6 0xc8 0x80 0x44 MOVXM p3, #508992 + 10518 0x60 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p3]; MOV r17, CORE_ID + 10524 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 10530 0xff 0x63 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p6, [sp, #-8]; MOV r0, r15 + 10538 0xff 0x82 0xb0 0x00 0x01 0xf3 0x32 0x28 0x11 0x3a ST r0, [sp, #-4]; MOVXM p6, #509008 + 10548 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 + 10552 0x00 0x00 NOPX + 10554 0x00 0x00 NOPX + 10556 0x80 0x14 0xf0 0x40 0x01 0x84 JNZ r16, #10720 +.delay_slot + 10562 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17 +.delay_slot + 10566 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 10570 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12] +.delay_slot + 10574 0xc0 0xc6 0x30 0x03 0x30 0x60 0x70 0x02 ST r17, [p6]; MOV p6, p0 +.delay_slot + 10582 0x00 0x07 0xc0 0xca 0x00 0x44 MOVXM p0, #509184 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 10588 0x00 0x07 0xc4 0xc8 0xd0 0x44 MOVXM p2, #509032 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10594 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x32 0x32 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #509028 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10604 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 10606 0x00 0x13 0x00 0x00 0x01 0x04 JL #9728 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10612 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10614 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10616 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 10620 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 10624 0x00 0x2c 0xf0 0x00 0x22 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV +.return_address + 10640 0x00 0x07 0xc4 0xc8 0xa0 0x44 MOVXM p2, #509008 + 10646 0x40 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x80 0x10 0xba LDA r16, [p2]; MOVXM p2, #509184 + 10656 0x40 0xc6 0xd0 0x00 0x01 0xf1 0x32 0x80 0x10 0xba LDA r17, [p2]; MOVXM p2, #509184 + 10666 0x4a 0xcb 0x50 0x00 0x01 0xf0 0xb2 0x2a 0x10 0xba LDA.u16 r18, [p2, #10]; MOVXM p1, #509012 + 10676 0x00 0x00 NOPX + 10678 0x00 0x00 NOPX + 10680 0x00 0x14 0xf8 0x00 0x00 0x84 J #10736 +.delay_slot + 10686 0x00 0x07 0xc0 0xc8 0xc0 0x44 MOVXM p0, #509024 +.delay_slot +.swstall delay_slot + 10692 0x00 0x00 NOPX +.delay_slot + 10694 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 10698 0x00 0x2c 0xf0 0x0c 0xa3 0x0c NOPA; ST r18, [p0] +.delay_slot + 10704 0x00 0x2c 0xf0 0x00 0x21 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 + 10720 0x00 0x2c 0xf0 0x00 0x22 0x80 0x8b 0x00 0x01 0xf0 0xb2 0x2a 0x10 0x00 0x00 0xe1 NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 + 10736 0x73 0x91 0x60 0x03 0xb3 0xc3 0x00 0x02 MOVS p3, p7; ADD.NC p7, r15, #12 + 10744 0xff 0xee 0xd0 0x00 0x01 0xf0 0x32 0x20 0x10 0xba LDA r27, [p7], #-4; MOVXM p0, #508992 + 10754 0x07 0xfe 0x16 0x98 LDA r16, [p7], #-4 + 10758 0x07 0xfe 0x36 0x98 LDA r17, [p7], #-4 + 10762 0x07 0x46 0x56 0x98 LDA r18, [p7, #16] + 10766 0x00 0x00 NOPX + 10768 0x00 0x00 NOPX + 10770 0x00 0x00 NOPX + 10772 0x00 0x00 NOPX + 10774 0x00 0x00 NOPX + 10776 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 10780 0x0f 0x06 0x11 0x98 ST r16, [p7] + 10784 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 10788 0x00 0x00 NOPX + 10790 0x00 0x00 NOPX + 10792 0x00 0x00 NOPX + 10794 0x14 0x93 0x08 0x18 ACQ r18, r16 + 10798 0x04 0x00 0xa7 0xad 0x81 0xe4 MOVX r16, #1; MOV r15, p3 + 10804 0x00 0x00 NOPX + 10806 0x00 0x00 NOPX + 10808 0x00 0x06 0x36 0x98 LDA r17, [p0] + 10812 0xc0 0xca 0xdc 0xdd 0x81 0xd4 LDA r18, [p6]; MOV p6, p7 + 10818 0x01 0x06 0x76 0x98 LDA r19, [p1] + 10822 0x07 0x5c 0x9e 0x98 LDA p1, [p7], #20 + 10826 0x00 0x00 NOPX +.no_stack_arguments + 10828 0x00 0x14 0x38 0x00 0x01 0x04 JL #10352 +.delay_slot +.swstall delay_slot + 10834 0x00 0x00 NOPX +.delay_slot + 10836 0x14 0x62 0x07 0x18 ADD r17, r17, #1 +.delay_slot + 10840 0x08 0x06 0x31 0x98 ST r17, [p0] +.delay_slot + 10844 0x14 0xe1 0x0d 0x98 LSHL r16, r19, r16 +.delay_slot + 10848 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xa0 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV +.return_address + 10864 0xca 0xc6 0xd0 0x00 0x01 0xf3 0x32 0x20 0x10 0xba LDA r17, [p6, #20]; MOVXM p6, #508992 + 10874 0x10 0x20 0x05 0x18 MOVX r16, #1 + 10878 0x00 0x00 NOPX + 10880 0x00 0x00 NOPX + 10882 0x00 0x00 NOPX + 10884 0x00 0x00 NOPX + 10886 0x00 0x00 NOPX + 10888 0x14 0x51 0x08 0x18 REL r17, r16 + 10892 0xfc 0xce 0xd0 0x00 0x01 0xf1 0x32 0x30 0x10 0xba LDA r19, [p7, #-8]; MOVXM p2, #509024 + 10902 0x06 0x06 0x36 0x98 LDA r17, [p6] + 10906 0x02 0x06 0x56 0x98 LDA r18, [p2] + 10910 0x00 0x00 NOPX + 10912 0x00 0x00 NOPX + 10914 0x00 0x00 NOPX + 10916 0x00 0x00 NOPX + 10918 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 10922 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 10926 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 10930 0x80 0x15 0x68 0x40 0x01 0x84 JNZ r16, #10960 +.delay_slot +.swstall delay_slot + 10936 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10938 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10940 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10942 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10944 0x00 0x00 NOPX + 10946 0x10 0x20 0x01 0x18 MOVX r16, #0 + 10950 0x00 0x2c 0xf6 0x06 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r16, [p6]; NOPX +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 + 10960 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12] + 10964 0x07 0xfb 0x19 0x18 LDA p6, [sp, #-8] + 10968 0x00 0x00 NOPX + 10970 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 10972 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10974 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10978 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10980 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10984 0x1f 0x67 0xa0 0xf8 MOV p7, r15 +.delay_slot + 10988 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 10994 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10996 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10998 0x00 0x00 NOPX +.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + +.text_segment PM 11008 +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv +.function_start + 11008 0x23 0x85 0xd0 0x08 0x20 0x0b 0x08 0x00 0x58 0xba LDA el0, [p1], #4; MOVX r2, #256; MOV r24, #0 + 11018 0x17 0x80 0x01 0x18 MOVX r0, #-128 + 11022 0x00 0x00 NOPX + 11024 0x00 0x00 NOPX + 11026 0x00 0x00 NOPX + 11028 0x00 0x00 NOPX + 11030 0x00 0x00 NOPX + 11032 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 11036 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 11040 0x00 0x00 NOPX + 11042 0x00 0x00 NOPX + 11044 0x00 0x00 NOPX + 11046 0x00 0x00 NOPX + 11048 0x00 0x00 NOPX + 11050 0x00 0x00 NOPX + 11052 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 11056 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 11060 0x00 0x00 NOPX + 11062 0x00 0x00 NOPX + 11064 0x00 0x00 NOPX + 11066 0x00 0x00 NOPX + 11068 0x00 0x00 NOPX + 11070 0x00 0x00 NOPX + 11072 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 11076 0x01 0x14 0x76 0x98 LDA r3, [p1, #4] + 11080 0x00 0x00 NOPX + 11082 0x00 0x00 NOPX + 11084 0x00 0x00 NOPX + 11086 0x00 0x00 NOPX + 11088 0x00 0x00 NOPX + 11090 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11092 0x08 0x4c 0x71 0x98 ST r3, [p0], #16 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11096 0x00 0x04 0x17 0x18 ST.s16 r0, [p0] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11100 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11104 0x00 0x00 0xf0 0xbe 0x00 0x44 MOVXM r1, #65280 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11110 0x10 0xc2 0x14 0x98 AND r1, r3, r1 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11114 0x10 0x76 0x27 0x98 EQ r27, r1, r2 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11118 0x10 0x01 0x82 0x18 SEL.EQZ r0, r0, r24, r27 +.delay_slot +.swstall delay_slot + 11122 0x00 0x00 NOPX +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_end0 + +.text_segment PM 11136 +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv +.function_start + 11136 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 11142 0x0f 0xf8 0x3d 0x98 ST lr, [sp, #-8] +.no_stack_arguments + 11146 0x00 0x15 0x80 0x00 0x01 0x04 JL #11008 +.delay_slot + 11152 0x18 0x17 0xa0 0xf8 MOV r0, r15 +.delay_slot + 11156 0x0f 0xfc 0x15 0x98 ST r0, [sp, #-4] +.delay_slot + 11160 0x1b 0xd0 0xc0 0xf8 MOV r15, p0 +.delay_slot +.swstall delay_slot + 11164 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11166 0x00 0x00 NOPX +.return_address + 11168 0xff 0x07 0x20 0x01 0x00 0x68 0x33 0xc4 0x08 0xba LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 + 11178 0x01 0xe2 0x80 0x01 0x80 0x08 0x07 0xfd 0x58 0xba MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 + 11188 0xff 0xbe 0x20 0x0a 0x11 0x80 0x07 0xa0 0x01 0x7a LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 + 11198 0x00 0x06 0x4a 0x98 LDA.u8 r18, [p0] + 11202 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11204 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11206 0x00 0x02 0x17 0x18 ST.s16 r16, [p0, dj0] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11210 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11214 0x10 0x22 0x05 0x18 MOVX r17, #1 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11218 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11224 0x14 0x77 0x27 0x98 EQ r27, r17, r18 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11228 0x14 0x21 0x82 0x18 SEL.EQZ r16, r16, r24, r27 +.delay_slot +.swstall delay_slot + 11232 0x00 0x00 NOPX +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + +.text_segment PM 11248 +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.tail_call +.function_start + 11248 0x00 0x13 0x28 0x00 0x00 0x84 J #9808 +.delay_slot +.swstall delay_slot + 11254 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11256 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11258 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11260 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11262 0x00 0x00 NOPX +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.function_start + 11264 0x05 0x00 0x00 0x21 0x01 0x64 RET lr; MOV r0, #64 +.delay_slot + 11270 0x18 0x50 0xc0 0xf8 MOV r1, p0 +.delay_slot + 11274 0x18 0x60 0x90 0x18 ADD.NC p0, r1, #32 +.delay_slot + 11278 0x08 0x04 0x11 0x98 ST r0, [p0] +.delay_slot + 11282 0x08 0x14 0x11 0x98 ST r0, [p0, #4] +.delay_slot +.swstall delay_slot + 11286 0x00 0x00 NOPX +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_end0 + +.text_segment PM 11296 +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.function_start + 11296 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 11300 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 11306 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] + 11310 0x00 0x00 NOPX + 11312 0x00 0x00 NOPX + 11314 0x00 0x00 NOPX + 11316 0x00 0x00 NOPX + 11318 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 11322 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 11326 0x00 0x00 NOPX + 11328 0x00 0x00 NOPX + 11330 0x00 0x00 NOPX + 11332 0x00 0x00 NOPX + 11334 0x00 0x00 NOPX + 11336 0x00 0x00 NOPX + 11338 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 11342 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 11346 0x00 0x00 NOPX + 11348 0x00 0x00 NOPX + 11350 0x00 0x00 NOPX + 11352 0x00 0x00 NOPX + 11354 0x00 0x00 NOPX + 11356 0x00 0x00 NOPX + 11358 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 11362 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4] + 11366 0x00 0x00 NOPX + 11368 0x00 0x00 NOPX +.no_stack_arguments + 11370 0x00 0x16 0x00 0x00 0x01 0x04 JL #11264 +.delay_slot + 11376 0x0f 0xfb 0x9d 0x98 ST p7, [sp, #-8] +.delay_slot +.swstall delay_slot + 11380 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11382 0x00 0x00 NOPX +.delay_slot + 11384 0x08 0xdc 0x29 0x98 ST el0, [p0], #-12 +.delay_slot + 11388 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.return_address + 11392 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] + 11396 0x00 0x00 NOPX + 11398 0x00 0x00 NOPX + 11400 0x00 0x00 NOPX + 11402 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11404 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11406 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11410 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11414 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11416 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11418 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11420 0x10 0x20 0x01 0x18 MOVX r16, #0 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11424 0xe8 0xc2 0x30 0x3f 0xfe 0x00 0x00 0x00 0x71 0x3a ST r16, [p7, #16]; PADDXM [sp], #-64 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + +.text_segment PM 11440 +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.function_start + 11440 0x04 0x00 0x80 0x00 0x00 0x08 0x7e 0xb0 0x10 0xba MOVA m0, #32; MOVXM ls, #11616 + 11450 0x61 0x0e 0xd0 0x00 0x00 0x09 0xbe 0xb8 0x10 0xba LDA r3, [p3], m0; MOVXM le, #11632 + 11460 0x60 0x90 0xd0 0x3e 0x17 0x48 0x0b 0x3c 0x58 0xba LDA m1, [p3]; MOVX r1, #-6; MOV r0, #828 + 11470 0x62 0x80 0xd0 0x00 0x01 0xf2 0x32 0x34 0x10 0xba LDA m0, [p3, #4]; MOVXM p4, #509032 + 11480 0x04 0x04 0x42 0x98 LDA.s8 r2, [p4] + 11484 0x00 0x00 NOPX + 11486 0x00 0x00 NOPX + 11488 0x00 0x00 NOPX + 11490 0x10 0xc2 0x1d 0x98 LSHL r1, r3, r1 + 11494 0x05 0x0e 0x8a 0xe1 0xf9 0x34 VLDB x1, [p0], m1; ADD.NC lc, r1, #-7 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11500 0x21 0x13 0x70 0x50 0x68 0x3c VLDA x2, [p1], m0; VLDB x0, [p0], m1 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11506 0x21 0x1b 0x70 0x50 0xe8 0xba 0x80 0x12 VLDA x3, [p1], m0; VLDB x1, [p0], m1; MOVX crRnd, r2 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11514 0x21 0x13 0x70 0x50 0x68 0x3c VLDA x2, [p1], m0; VLDB x0, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11520 0x21 0x1b 0x70 0x50 0xe8 0x3c VLDA x3, [p1], m0; VLDB x1, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11526 0x01 0x08 0x9b 0x98 VLDA x2, [p1], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11530 0x21 0x1b 0x70 0x50 0x68 0x3c VLDA x3, [p1], m0; VLDB x0, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11536 0x21 0x13 0x70 0x50 0xe8 0x3c VLDA x2, [p1], m0; VLDB x1, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11542 0x21 0x1b 0x70 0x28 0x34 0x1d 0x00 0xe2 0x41 0x4a VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11552 0x21 0x13 0x70 0x28 0x74 0x1d 0x01 0xe0 0x61 0x4a VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11562 0x21 0x1b 0x70 0x28 0x34 0x1d 0x00 0xe2 0x41 0x4a VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11572 0x21 0x13 0x70 0x28 0x74 0x1d 0x01 0xe0 0x61 0x4a VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11582 0x21 0x1b 0x70 0x28 0x34 0x1d 0x00 0xe2 0x41 0x4a VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11592 0x21 0x13 0x70 0x28 0x74 0x1d 0x01 0xe0 0x61 0x4a VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11602 0x21 0x1b 0x70 0x50 0x68 0x00 0x00 0x08 0x70 0x8c 0x00 0xe2 0x41 0x6e VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; VMUL.f dm0, x1, x2, r0 +.label ZLS_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_176 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11616 0x21 0x13 0x70 0x50 0xea 0x1c 0xa3 0x00 0x00 0x00 0x01 0xa5 0x78 0x0f 0x03 0x0b VLDA x2, [p1], m0; VLDB x1, [p0], m1; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; VMUL.f dm1, x0, x3, r0 +.label ZLE_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_192 +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11632 0x21 0x1b 0x70 0x50 0x6a 0x1c 0x23 0x00 0x00 0x00 0x01 0xa5 0x78 0x07 0x12 0x0b VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 +.loop_nesting 0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11648 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11656 0x43 0x84 0x60 0x02 0x00 0xe2 0x41 0x62 VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11664 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11672 0x43 0x84 0x60 0x02 0x00 0xe2 0x41 0x62 VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11680 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11688 0x43 0x84 0x60 0x02 0x00 0xe2 0x41 0x62 VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11696 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11704 0x0a 0x1c 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p2], #64 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11708 0x43 0x94 0x60 0x50 0x00 0x5c VST.CONV.bf16.fp32 cml1, [p2], #64;RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11714 0x0a 0x1c 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p2], #64 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11718 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.delay_slot + 11722 0x0a 0x1c 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p2], #64 +.delay_slot + 11726 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.delay_slot +.swstall delay_slot + 11730 0x00 0x00 NOPX +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_end0 + +.text_segment PM 11744 +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.function_start + 11744 0x00 0x07 0xc8 0xc8 0x80 0x44 MOVXM p4, #508992 + 11750 0x80 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p4]; MOV r17, CORE_ID + 11756 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 11762 0xff 0x3a 0xb0 0x23 0x14 0x81 0xca 0x60 0x79 0x3a ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 + 11772 0xfd 0x83 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p0, [sp, #-20]; MOV r0, r15 + 11780 0x0f 0xfc 0x15 0x98 ST r0, [sp, #-4] + 11784 0x0f 0xf0 0x3d 0x98 ST lr, [sp, #-16] + 11788 0x00 0x00 NOPX + 11790 0x80 0x17 0x50 0x40 0x01 0x84 JNZ r16, #11936 +.delay_slot + 11796 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 11800 0x00 0x07 0xc4 0xc8 0xa0 0x44 MOVXM p2, #509008 +.delay_slot + 11806 0x40 0xc6 0x30 0x01 0x37 0x60 0x70 0x02 ST r17, [p2]; MOV p2, p7 +.delay_slot + 11814 0x1b 0xd6 0xc0 0xf8 MOV r15, p3 +.delay_slot + 11818 0xfe 0xa3 0xb0 0x00 0x01 0xf3 0xb2 0xc0 0x11 0x3a ST p2, [sp, #-12]; MOVXM p7, #509312 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11828 0x13 0x91 0x60 0x00 0x01 0xf1 0x32 0x34 0x11 0x3a MOVS p0, p7; MOVXM p2, #509032 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11838 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x32 0x32 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #509028 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11848 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 11850 0x00 0x16 0x10 0x00 0x01 0x04 JL #11296 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11856 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11858 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11860 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 11864 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 11868 0x0a 0x06 0x11 0x98 ST r16, [p2] +.return_address + 11872 0xe0 0xc2 0xd0 0x00 0x01 0xf0 0xb2 0x28 0x10 0xba LDA r16, [p7]; MOVXM p1, #509008 + 11882 0x20 0xc6 0xd0 0x00 0x01 0xf1 0xb2 0x2a 0x10 0xba LDA r17, [p1]; MOVXM p3, #509012 + 11892 0xea 0xcb 0x50 0x00 0x01 0xf0 0xb2 0x2e 0x10 0xba LDA.u16 r18, [p7, #10]; MOVXM p1, #509020 + 11902 0x00 0x00 NOPX + 11904 0x00 0x00 NOPX + 11906 0x00 0x00 NOPX + 11908 0x00 0x17 0x58 0x00 0x00 0x84 J #11952 +.delay_slot + 11914 0x00 0x07 0xc4 0xc8 0xc0 0x44 MOVXM p2, #509024 +.delay_slot + 11920 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 11924 0x0a 0x06 0x51 0x98 ST r18, [p2] +.delay_slot + 11928 0x0b 0x06 0x11 0x98 ST r16, [p3] +.delay_slot + 11932 0x09 0x06 0x11 0x98 ST r16, [p1] +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 + 11936 0x00 0x07 0xc6 0xc8 0xa8 0x44 MOVXM p3, #509012 + 11942 0x00 0x2c 0xf0 0x00 0x01 0xf0 0xb2 0x2e 0x10 0xba NOPA; MOVXM p1, #509020 +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 + 11952 0x18 0x67 0x86 0x18 ADD.NC p0, r15, #12 + 11956 0x1f 0xee 0xd0 0x00 0x01 0xf1 0x32 0x20 0x10 0xba LDA r27, [p0], #-4; MOVXM p2, #508992 + 11966 0x00 0xfe 0x16 0x98 LDA r16, [p0], #-4 + 11970 0x00 0xfe 0x36 0x98 LDA r17, [p0], #-4 + 11974 0x02 0x06 0x56 0x98 LDA r18, [p2] + 11978 0x00 0x46 0x76 0x98 LDA r19, [p0, #16] + 11982 0x00 0x00 NOPX + 11984 0x00 0x00 NOPX + 11986 0x00 0x00 NOPX + 11988 0x00 0x00 NOPX + 11990 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 11994 0x00 0xc2 0x39 0x40 0x0e 0x5c ST r16, [p0]; ADD r16, r18, #1 + 12000 0x0a 0x06 0x11 0x98 ST r16, [p2] + 12004 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 12008 0x00 0x00 NOPX + 12010 0x00 0x00 NOPX + 12012 0x00 0x00 NOPX + 12014 0x14 0xd3 0x08 0x18 ACQ r19, r16 + 12018 0x1a 0x67 0x06 0x18 ADD.NC p2, r14, #12 + 12022 0x00 0x00 NOPX + 12024 0x00 0x00 NOPX + 12026 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4 + 12030 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 + 12034 0x02 0xfe 0x56 0x98 LDA r18, [p2], #-4 + 12038 0x02 0x56 0x76 0x98 LDA r19, [p2, #20] + 12042 0x00 0x00 NOPX + 12044 0x00 0x00 NOPX + 12046 0x00 0x00 NOPX + 12048 0x00 0x00 NOPX + 12050 0x00 0x00 NOPX + 12052 0x14 0xa3 0x12 0x18 SEL.EQZ r17, r18, r17, r27 + 12056 0x0a 0x06 0x31 0x98 ST r17, [p2] + 12060 0x00 0x00 NOPX + 12062 0x00 0x00 NOPX + 12064 0x00 0x00 NOPX + 12066 0x00 0x00 NOPX + 12068 0x14 0xd3 0x08 0x18 ACQ r19, r16 + 12072 0xd1 0x11 0x60 0x01 0x00 0x29 0xce 0x60 0x79 0x3a MOVS p6, p2; MOVX r16, #1; MOV r14, p6 + 12082 0x00 0x00 NOPX + 12084 0x00 0x00 NOPX + 12086 0x07 0xee 0x19 0x18 LDA p4, [sp, #-20] + 12090 0x60 0xc6 0xdf 0xd8 0x3b 0x0c LDA r17, [p3]; ST p0, [sp, #-20] + 12096 0x20 0xd2 0xd6 0xdd 0x81 0xd4 LDA r20, [p1]; MOV p3, p7 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 12102 0x02 0x4e 0x56 0x98 LDA r18, [p2], #16 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 12106 0x00 0x5d 0x1e 0x98 LDA p2, [p0], #20 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12110 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12114 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12116 0x04 0x06 0x76 0x98 LDA r19, [p4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12120 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 12122 0x00 0x16 0x58 0x00 0x01 0x04 JL #11440 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12128 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 +.delay_slot + 12132 0x14 0x63 0x0d 0x98 LSHL r17, r17, r16 +.delay_slot + 12136 0x15 0x21 0x0d 0x98 LSHL r16, r20, r16 +.delay_slot + 12140 0x19 0x69 0x41 0x58 ADD.NC p1, r18, r16 +.delay_slot + 12144 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xe2 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV +.return_address + 12160 0xc8 0xc6 0xd0 0x01 0x00 0x28 0xb3 0xd0 0x78 0xba LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 + 12170 0x00 0x07 0xcc 0xc8 0xc0 0x44 MOVXM p6, #509024 + 12176 0x00 0x00 NOPX + 12178 0x00 0x00 NOPX + 12180 0x00 0x00 NOPX + 12182 0x00 0x00 NOPX + 12184 0x00 0x00 NOPX + 12186 0x14 0x51 0x08 0x18 REL r17, r16 + 12190 0x01 0xf6 0x36 0x98 LDA r17, [p1, #-4] + 12194 0x07 0xed 0x19 0x18 LDA p2, [sp, #-20] + 12198 0x00 0x00 NOPX + 12200 0x00 0x00 NOPX + 12202 0x00 0x00 NOPX + 12204 0x00 0x00 NOPX + 12206 0x00 0x00 NOPX + 12208 0x14 0x23 0x11 0x98 SUB r17, r16, r17 + 12212 0x4a 0xc6 0xd3 0xec 0x63 0x0c LDA r17, [p2, #20]; ST r17, [p1, #-4] + 12218 0x00 0x00 NOPX + 12220 0x00 0x00 NOPX + 12222 0x00 0x00 NOPX + 12224 0x00 0x00 NOPX + 12226 0x00 0x00 NOPX + 12228 0x00 0x00 NOPX + 12230 0x14 0x51 0x08 0x18 REL r17, r16 + 12234 0xfc 0xce 0xd0 0x00 0x01 0xf0 0xb2 0x20 0x10 0xba LDA r19, [p7, #-8]; MOVXM p1, #508992 + 12244 0x06 0x06 0x56 0x98 LDA r18, [p6] + 12248 0x01 0x06 0x36 0x98 LDA r17, [p1] + 12252 0x00 0x00 NOPX + 12254 0x00 0x00 NOPX + 12256 0x00 0x00 NOPX + 12258 0x00 0x00 NOPX + 12260 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 12264 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 12268 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 12272 0x80 0x18 0x08 0x40 0x01 0x84 JNZ r16, #12304 +.delay_slot +.swstall delay_slot + 12278 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12280 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12282 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12284 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12286 0x00 0x00 NOPX + 12288 0x10 0x20 0x01 0x18 MOVX r16, #0 + 12292 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x83 0x08 0xc1 0x36 NOPA; NOPB; ST r16, [p1]; NOPX +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 + 12304 0x07 0xf0 0x39 0x18 LDA lr, [sp, #-16] + 12308 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] + 12312 0x07 0xf7 0x99 0x18 LDA p7, [sp, #-12] +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 12316 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.noswbrkpt + 12318 0x07 0xf9 0xd1 0x18 LDA r14, [sp, #-8] +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 12322 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 12324 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 12326 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12330 0x0e 0x8e 0x0b 0x18 MOVS p6, r14 +.delay_slot + 12334 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 12340 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12342 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12344 0x00 0x00 NOPX +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 + +.text_segment PM 12352 +.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh +.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_begin0 +.function_start + 12352 0x03 0x85 0xd0 0x00 0x01 0xf0 0xb3 0xe0 0x10 0xba LDA el0, [p0], #4; MOVXM p1, #509888 + 12362 0x03 0x81 0xd0 0x01 0x00 0x4b 0x08 0x00 0x58 0xba LDA eh0, [p0], #4; MOVX r16, #2; MOV r24, #0 + 12372 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 12378 0xfe 0xf3 0xb0 0x00 0x01 0xf3 0xb3 0xe0 0x11 0x3a ST p7, [sp, #-12]; MOVXM p7, #509888 + 12388 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] + 12392 0x0f 0xf9 0xf5 0x98 ST r15, [sp, #-8] + 12396 0x00 0x00 NOPX + 12398 0x09 0x1c 0x29 0x98 ST el0, [p1], #4 + 12402 0x09 0x1c 0x09 0x98 ST eh0, [p1], #4 + 12406 0x00 0x04 0x2e 0x98 LDA el0, [p0] + 12410 0x00 0x14 0x0e 0x98 LDA eh0, [p0, #4] + 12414 0x00 0x00 NOPX + 12416 0x00 0x00 NOPX + 12418 0x00 0x00 NOPX + 12420 0x00 0x00 NOPX + 12422 0x00 0x00 NOPX + 12424 0x09 0x04 0x29 0x98 ST el0, [p1] + 12428 0x09 0x14 0x09 0x98 ST eh0, [p1, #4] + 12432 0x07 0x5e 0x2a 0x98 LDA.u8 r17, [p7], #5 + 12436 0x07 0xee 0x4a 0x98 LDA.u8 r18, [p7], #-2 + 12440 0x07 0xec 0x2a 0x98 LDA.u8 r1, [p7], #-2 + 12444 0x00 0x00 NOPX + 12446 0x00 0x00 NOPX + 12448 0x00 0x00 NOPX + 12450 0x00 0x00 NOPX +.no_stack_arguments + 12452 0x00 0x1e 0x98 0x00 0x01 0x04 JL #15664 +.delay_slot + 12458 0xfc 0xca 0xb8 0xbe 0x43 0x5c ST r18, [sp, #-28]; SUB r15, r17, r18 +.delay_slot + 12464 0xfd 0x86 0xb0 0xc2 0x11 0x5c ST r1, [sp, #-20]; NE r16, r1, r16 +.delay_slot + 12470 0xfe 0x42 0xb7 0xef 0x15 0x5c ST r16, [sp, #-16]; LT r27, r15, r24 +.delay_slot + 12476 0x16 0x22 0xf1 0x98 SUB r17, r24, r15 +.delay_slot + 12480 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x1e 0x08 0x90 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SEL.EQZ r0, r15, r17, r27; NOPM; NOPV +.return_address + 12496 0xfd 0xd2 0x20 0x40 0x02 0x2c LDA r20, [sp, #-20]; MOVX r16, #0 + 12502 0xe7 0xc5 0x58 0x48 0x43 0x2c LDA.u8 r17, [p7], #3; SUB r18, r16, r2 + 12508 0x07 0xee 0x6a 0x98 LDA.u8 r19, [p7], #-2 + 12512 0x07 0xec 0x31 0x18 LDA r1, [sp, #-20] + 12516 0x00 0x00 NOPX + 12518 0x00 0x00 NOPX + 12520 0x00 0x00 NOPX + 12522 0x13 0xe9 0x46 0x98 XOR r20, r15, r20 + 12526 0x15 0x37 0x0a 0x98 LT r27, r20, r16 + 12530 0xfd 0x4e 0xb8 0xc6 0x63 0x5c ST r19, [sp, #-24]; SUB r17, r17, r19 +.no_stack_arguments + 12536 0xfc 0x46 0xb0 0x00 0x07 0xa6 0x00 0x00 0x41 0x3a ST r17, [sp, #-32]; JL #15664 +.delay_slot + 12546 0x10 0xa9 0x22 0x18 SEL.EQZ r20, r2, r18, r27 +.delay_slot + 12550 0x14 0x77 0x0a 0x98 LT r27, r17, r16 +.delay_slot + 12554 0x14 0x25 0x11 0x98 SUB r18, r16, r17 +.delay_slot + 12558 0x15 0x26 0x70 0x18 EXTEND.s16 r19, r20 +.delay_slot + 12562 0x00 0x2c 0xf0 0x00 0x24 0x41 0x22 0x3d 0x98 0x09 0x00 0x2b 0x60 0x7e NOPA; NOPB; NOPS; SEL.EQZ r0, r17, r18, r27; ADD.NC r15, r19, #1 +.return_address + 12576 0xfc 0x0e 0x20 0x3f 0x37 0xc8 0x00 0x42 0x58 0xba LDA r3, [sp, #-32]; MOVX r19, #-2; MOV m0, #66 + 12586 0xfd 0xc2 0x20 0x01 0x80 0x08 0x29 0xfc 0x58 0xba LDA r16, [sp, #-20]; MOVX r24, #0; MOV r1, #508 + 12596 0xfc 0xda 0x20 0x00 0x60 0x88 0x88 0x02 0x58 0xba LDA r22, [sp, #-28]; MOVX r6, #4; MOV r4, #2 + 12606 0xe1 0x45 0x50 0x00 0x51 0x0b 0x88 0x17 0x58 0xba LDA.u8 r17, [p7], m0; MOVX r5, #8; MOV r28, #23 + 12616 0xfd 0x56 0x20 0x3f 0x27 0x48 0x80 0x20 0x58 0xba LDA r21, [sp, #-24]; MOVX r18, #-6; MOV m1, #32 + 12626 0xfe 0x7a 0x20 0x01 0x70 0xcb 0x48 0x01 0x58 0xba LDA r30, [sp, #-16]; MOVX r23, #6; MOV r26, #1 + 12636 0xe9 0xc0 0x80 0x05 0xd0 0x0b 0xef 0xc0 0x58 0xba MOVA m0, #-178; MOVX r29, #128; MOV r31, #-64 + 12646 0x16 0x28 0x21 0x98 SUB r20, r24, r2 + 12650 0x10 0xc7 0x06 0x98 XOR r3, r3, r16 + 12654 0x1e 0xf1 0x50 0x36 0x02 0x24 LT r27, r3, r24; ADD.NC r0, r22, #2 + 12660 0x15 0x28 0x4b 0x3f 0xf5 0x64 SEL.EQZ r20, r2, r20, r27; MOV r22, #-3 + 12666 0x78 0xe1 0xf1 0x20 0x1d 0x64 MUL r3, r15, r16; MOV r2, #7 + 12672 0x15 0x28 0x70 0x18 EXTEND.s16 r20, r20 + 12676 0x08 0x00 0x90 0xa0 0x01 0x24 AND r0, r1, r0; ADD.NC r1, r0, #1 + 12682 0x0c 0xe7 0xbd 0xb4 0x01 0x24 LSHL r19, r1, r19; ADD.NC r27, r20, #1 + 12688 0x7d 0x0d 0xb0 0xa3 0x02 0xa4 LSHL r20, r15, r6; ADD.NC r1, r3, r0 + 12694 0x09 0xcd 0xb0 0x35 0xff 0x24 LSHL r7, r1, r6; ADD.NC r0, r21, #-1 + 12700 0x16 0xcd 0x0f 0x98 MUL r6, r27, r16 + 12704 0x13 0xdf 0x1f 0x98 MUL r15, r15, r17 + 12708 0x9d 0x6b 0xf9 0xb3 0xff 0x24 MUL r21, r19, r21; ADD.NC r19, r19, #-1 + 12714 0x11 0x37 0x07 0x98 EQ r27, r4, r16 + 12718 0xff 0xd6 0x37 0x90 0xdf 0x5c ST r21, [p7], #-4; MUL r4, r15, r6 + 12724 0x17 0x38 0x52 0x18 SEL.EQZ r28, r28, r5, r27 + 12728 0x11 0x25 0x2d 0x98 LSHL r18, r4, r18 + 12732 0xe5 0x4a 0x38 0xc8 0x3f 0x5c ST r18, [p7], m1; MUL r18, r17, r1 + 12738 0xf9 0xf2 0x3f 0x72 0xfb 0x5c ST r28, [p7], #-16; LSHL r28, r30, r23 + 12744 0xed 0xf2 0x39 0x70 0x1f 0x5c ST r28, [p7], #24; MUL r28, r18, r0 + 12750 0xe3 0xce 0x39 0xce 0xfb 0x5c ST r19, [p7], #4; LSHL r19, r19, r23 + 12756 0xe7 0x35 0xb9 0xb3 0xea 0xa4 LSHL r28, r28, r26; ADD.NC r19, r19, r29 + 12762 0xe3 0xfe 0x39 0x7b 0x5b 0x5c ST r31, [p7], #4; LSHL r30, r18, r26 + 12768 0x94 0x21 0xf9 0x33 0xe2 0xa4 MUL r16, r18, r16; ADD.NC r18, r19, r28 + 12774 0xe3 0x82 0x3f 0xf3 0x04 0x5c ST r0, [p7], #4; SEL.EQZ r28, r31, r24, r27 + 12780 0x10 0xff 0x6d 0x98 LSHL r31, r3, r22 + 12784 0xf0 0x66 0x39 0xbf 0xff 0x24 SUB r1, r30, r19; ADD.NC r19, r31, #-1 + 12790 0xe3 0x86 0x38 0xc6 0xdb 0x5c ST r1, [p7], #4; LSHL r17, r17, r22 + 12796 0xc5 0xa4 0x39 0x31 0xff 0x24 SUB r22, r24, r18; ADD.NC r18, r17, #-1 + 12802 0xe3 0xda 0x33 0xdb 0xc3 0x5c ST r22, [p7], #4; SUB r22, r7, r30 + 12808 0xe3 0xca 0x38 0x43 0x5b 0x5c ST r18, [p7], #4; LSHL r16, r16, r26 + 12814 0xe3 0x9e 0x39 0xfc 0x5b 0x5c ST r7, [p7], #4; LSHL r31, r19, r2 + 12820 0xe3 0xce 0x3e 0xda 0xc1 0x5c ST r19, [p7], #4; ADD r22, r29, r22 + 12826 0x3c 0x20 0x1e 0xbf 0xf2 0xa4 ADD r16, r7, r16; ADD.NC r29, r31, r30 + 12832 0xe3 0xda 0x38 0x43 0xa3 0x5c ST r22, [p7], #4; SUB r16, r16, r29 + 12838 0xe3 0xc2 0x30 0x1f 0x6d 0x6e 0x0f 0xff 0x59 0x3a ST r16, [p7], #4; LSHL r22, r15, r26; MOV r16, #-1 + 12848 0xe3 0xca 0x3e 0x6a 0x81 0x5c ST r18, [p7], #4; ADD r26, r28, r20 + 12854 0xe3 0xea 0x3a 0x52 0xc3 0x5c ST r26, [p7], #4; SUB r20, r20, r22 + 12860 0x08 0x11 0x07 0x1e 0x71 0xab 0x08 0xb2 0x6d 0x10 0x08 0x76 MOVA r17, #64; ST r19, [p7], #4; MAC r16, r16, r21, r17; ADD.NC r19, r20, #64 + 12872 0x0f 0x1e 0x71 0x98 ST r19, [p7], #4 + 12876 0xe3 0xc6 0x38 0x52 0xfb 0x5c ST r17, [p7], #4; LSHL r20, r16, r23 + 12882 0xe3 0xc2 0x3c 0x42 0x83 0x5c ST r16, [p7], #4; SUB r16, r24, r20 + 12888 0xe3 0xc6 0x39 0x52 0xfb 0x5c ST r17, [p7], #4; LSHL r20, r18, r23 + 12894 0xe3 0xc2 0x3c 0x42 0x83 0x5c ST r16, [p7], #4; SUB r16, r24, r20 + 12900 0x0f 0x1e 0x51 0x98 ST r18, [p7], #4 + 12904 0x0f 0x1e 0x31 0x98 ST r17, [p7], #4 + 12908 0x0f 0x0a 0x11 0x98 ST r16, [p7], m0 + 12912 0x07 0x06 0x0a 0x98 LDA.u8 r16, [p7] + 12916 0x00 0x00 NOPX + 12918 0x00 0x00 NOPX + 12920 0x00 0x00 NOPX + 12922 0x00 0x00 NOPX + 12924 0x00 0x00 NOPX + 12926 0x00 0x00 NOPX + 12928 0x80 0x19 0x50 0x00 0x01 0x84 JZ r16, #12960 +.delay_slot + 12934 0x19 0x3b 0x60 0xf8 MOV vaddSign0, crMCDEn +.delay_slot + 12938 0xff 0x7f 0x09 0xa0 0x00 0x44 MOVXM r19, #-8454144 +.delay_slot +.swstall delay_slot + 12944 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12946 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12948 0x00 0x00 NOPX + 12950 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x00 0x26 0x01 0x7a NOPA; NOPS; MOVX r19, #0 +.label TGT_F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_608 + 12960 0xff 0x87 0x20 0x00 0x01 0xf0 0x32 0x34 0x10 0xba LDA lr, [sp, #-4]; MOVXM p0, #509032 + 12970 0x00 0xc0 0x50 0x04 0xe2 0xd4 LDA.s8 r16, [p0]; VINSERT.32 x0, x0, #0, r19 + 12976 0xfe 0x83 0x21 0x02 0xe9 0x54 LDA p0, [sp, #-12]; MOV dj0, #186 + 12982 0xff 0x3e 0x20 0x01 0x25 0xd4 LDA r15, [sp, #-8]; VMOV bmll0, x0 + 12988 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 + 12994 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 12996 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 12998 0x07 0x02 0x17 0x18 ST.s16 r16, [p7, dj0] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13002 0x05 0x00 0x0f 0x70 0x41 0xe4 RET lr; MOV crRnd, r16 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13008 0x08 0x40 0x16 0x18 VCONV.bf16.fp32 wl0, bmll0 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13012 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13016 0x1c 0x01 0x01 0xb8 VEXTRACT.16 r16, x0, #0, vaddSign0 +.delay_slot +.swstall delay_slot + 13020 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13022 0x00 0x00 NOPX +.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh__end +.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_end0 +.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_begin0 +.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params +.function_start + 13024 0x1c 0x56 0xc0 0xf8 MOV r17, p3 + 13028 0x20 0x93 0xde 0x01 0xa9 0x54 LDA p1, [p1]; MOV m7, #106 + 13034 0x00 0x83 0xd6 0xd1 0x02 0x14 LDA p0, [p0]; ADD.NC p3, r17, #2 + 13040 0x03 0xe8 0x8a 0x98 LDA.u8 r4, [p3], m7 + 13044 0x03 0xfd 0x46 0x98 LDA dj2, [p3], #-4 + 13048 0x03 0x3d 0x26 0x98 LDA dn2, [p3], #12 + 13052 0x03 0xff 0x46 0x98 LDA dj6, [p3], #-4 + 13056 0x03 0x2f 0x26 0x98 LDA dn6, [p3], #8 + 13060 0x03 0x2d 0x06 0x98 LDA m2, [p3], #8 + 13064 0x03 0xfc 0x46 0x98 LDA dj0, [p3], #-4 + 13068 0x03 0x3c 0x26 0x98 LDA dn0, [p3], #12 + 13072 0x03 0xfe 0x46 0x98 LDA dj4, [p3], #-4 + 13076 0x03 0x2e 0x26 0x98 LDA dn4, [p3], #8 + 13080 0x03 0x2c 0x06 0x98 LDA m0, [p3], #8 + 13084 0x03 0xfc 0xc6 0x98 LDA dj1, [p3], #-4 + 13088 0x03 0x3c 0xa6 0x98 LDA dn1, [p3], #12 + 13092 0x03 0xfe 0xc6 0x98 LDA dj5, [p3], #-4 + 13096 0x03 0x2e 0xa6 0x98 LDA dn5, [p3], #8 + 13100 0x03 0x2c 0x86 0x98 LDA m1, [p3], #8 + 13104 0x03 0xff 0xc6 0x98 LDA dj7, [p3], #-4 + 13108 0x03 0x2f 0xa6 0x98 LDA dn7, [p3], #8 + 13112 0x65 0xf0 0xd0 0x00 0x01 0xf2 0x32 0x34 0x10 0xba LDA m7, [p3], #8; MOVXM p4, #509032 + 13122 0x80 0x98 0x58 0xc5 0x81 0xd4 LDA.s8 r6, [p4]; MOV p4, p1 + 13128 0x1b 0x0f 0x10 0xb8 MOV m3, #-120 + 13132 0x80 0x85 0x70 0x3b 0x68 0x00 0x20 0x6a 0x60 0x00 0x58 0xb6 VLDA.CONV.fp32.bf16 cml0, [p4];VLDB x6, [p0], #64; MOVX r2, #3; MOV dc4, #0 + 13144 0x7f 0xb8 0xd0 0x38 0xe9 0x04 0x2d 0xe0 0x10 0x0b 0x62 0x09 0x60 0x7e LDA dj3, [p3], #-4; VLDB x1, [p0], #64; MOVS dc3, dc4; LSHL r2, r4, r2; MOV m6, #128 + 13158 0x65 0xb4 0xd1 0x0c 0x4b 0x02 0x80 0x90 0x72 0xba LDA dn3, [p3], #8; MOVS dc1, dc3; MOV m5, r2 + 13168 0x6d 0x30 0xd1 0xab 0x90 0x03 0xe1 0xc0 0x7e 0xba LDA m3, [p3], m3; PADDB [p1], m5; MOV dc7, dc1 + 13178 0x79 0x0a 0xd1 0xf0 0xf4 0x02 0x07 0x90 0x5e 0xba LDA r2, [p3], m6; VLDB.2D x3, [p1], d7; MOV m4, #-112 + 13188 0x71 0x1e 0x50 0x00 0x82 0x2c LDA.s16 r7, [p3], m4; MOVX r0, #16 + 13194 0x69 0xc0 0xd6 0x10 0x4b 0x00 0x00 0x0c 0x79 0xf8 0x10 0x76 LDA m4, [p3], #16; MOVS dc6, dc4; MOVXM ls, #13296 + 13206 0x72 0x92 0xd2 0x10 0x4b 0x00 0x00 0x0d 0xba 0x28 0x10 0x76 LDA r4, [p3, #-28]; MOVS dc2, dc4; MOVXM le, #13392 + 13218 0x0b 0x16 0x84 0x61 0x05 0xb4 VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r0 + 13224 0x1b 0x00 0x8a 0xf8 VMOV cml3, cml0 + 13228 0x60 0x96 0xd0 0x00 0x00 0x0d 0xb2 0x48 0x10 0xba LDA r5, [p3]; MOVXM p3, #13456 + 13238 0x00 0x2c 0xf0 0x00 0x14 0x0a 0x8e 0x01 0xa8 0xba NOPA; MOVX r1, #32; VEXTBCST.128 x10, x3, #0 + 13248 0x07 0x91 0x00 0x00 0x20 0x01 0x5b 0x00 0x36 0x08 0x0e 0xb9 0x78 0x00 0x00 0xe1 MOVA r17, #60; NOPB; NOPS; MOVX r3, #48; VBCST.16 x0, r7; NOPV + 13264 0x00 0x2c 0xf0 0x00 0x20 0x10 0x4b 0x0d 0xd4 0x02 0x0e 0x03 0xac 0x63 0x6a 0x0b NOPA; NOPB; MOVS dc0, dc4; MOVX crRnd, r6; VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r17 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 13280 0x40 0xa3 0xd0 0x00 0x25 0x10 0x4b 0x04 0x2f 0xda 0xb9 0x3f 0xcc 0x48 0x1a 0x0b LDA p2, [p2]; NOPB; MOVS dc5, dc4; ADD r2, r2, #-2; ADD.NC lc, r4, #-1; VMAC.f dm1, dm0, x1, x10, r17 +.label ZLS_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_272 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt + 13296 0x03 0xb3 0x71 0xf0 0xf4 0x02 0x84 0x81 0x6e 0xba VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13306 0x00 0x38 0xea 0x9c 0x0b 0x46 0x8a 0x89 0x01 0x4a VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13316 0x00 0xb1 0x6a 0x30 0x86 0xc6 0x89 0x35 0x01 0x4a VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13326 0x1d 0x89 0x06 0xd8 VSHIFT x11, x1, x2, r1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13330 0x04 0xb0 0x8e 0xc6 0x8c 0x48 0xa1 0x62 VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r17 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13338 0x03 0x9c 0x0f 0x46 0x8a 0x36 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r17 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13346 0x1d 0x89 0x0e 0xd8 VSHIFT x11, x1, x2, r3 + 13350 0x05 0x1c 0x03 0x46 0x8b 0x92 0xe1 0x62 VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r17 + 13358 0x04 0x1c 0x07 0x46 0x88 0x56 0xe1 0x62 VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r17 + 13366 0x00 0x2c 0xf0 0x00 0x10 0x01 0x18 0x41 0x6e 0xba NOPA; NOPB; VSHIFT x4, x6, x1, r0 + 13376 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7c 0x63 0x6a 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm4, dm3, x6, x10, r17 +.label ZLE_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_368 +.end_of_loop +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 13392 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7c 0x48 0x1a 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r17 +.loop_nesting 0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13408 0x03 0x0c 0xf4 0x73 0x90 0x02 0x84 0x81 0x6e 0xba PADDA.3D [p0], d0; PADDB.2D [p4], d3; VSHIFT x10, x1, x2, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13418 0x02 0x9c 0x0b 0x46 0x8a 0x89 0x01 0x62 VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13426 0x02 0x30 0x86 0xc6 0x89 0x35 0x01 0x62 VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13434 0x1d 0x89 0x06 0xd8 VSHIFT x11, x1, x2, r1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13438 0x03 0x9c 0x0f 0x46 0x8c 0x48 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13446 0x00 0x2c 0xf4 0xb0 0x8e 0xc2 0x8a 0x36 0xa1 0x4a NOPA; VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r17 +.label TGT_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_432 +.loop_nesting 1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13456 0x3e 0x1e 0x8b 0x12 0x1d 0xb4 VLDB.2D x3, [p1], d7; VSHIFT x11, x1, x2, r3 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13462 0x00 0x00 0x01 0xb7 0x54 0x02 0x8b 0x92 0xe1 0x5a MOVXM le, #13632; VMAC.f dm3, dm4, x9, x7, r17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13472 0x80 0x85 0x70 0x00 0x01 0x8f 0x4f 0x02 0x88 0x56 0xe1 0x46 VLDA.CONV.fp32.bf16 cml0, [p4]; MOVXM ls, #13552; VMAC.f dm0, dm2, x11, x7, r17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13484 0x1d 0x72 0x7f 0x98 ADD.NC lc, r4, #-1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13488 0x00 0x1d 0x9b 0x98 VLDA x6, [p0], #64 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13492 0x38 0x1c 0x74 0x18 VLDB x1, [p0], #64 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13496 0x38 0x58 0xb4 0x18 VLDB.3D x2, [p0], d2 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13500 0x0d 0x11 0x96 0x18 VCONV.bf16.fp32 x10, cml3 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13504 0x0b 0x10 0x16 0x18 VCONV.bf16.fp32 x6, cml0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13508 0x1c 0x50 0x6c 0xf8 VMAX_LT.bf16 x8, r16, x10, x0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13512 0x1d 0x53 0x14 0x78 VSHUFFLE x10, x10, x6, r5 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13516 0x51 0x42 0x60 0x02 0xa8 0x36 0x70 0x02 VST x8, [p2], m4; VMAX_LT.bf16 x10, r16, x10, x0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13524 0x1d 0x1c 0x03 0x58 VEXTBCST.128 x10, x3, #0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13528 0x47 0x52 0x60 0x01 0x80 0x45 0x70 0x02 VST.3D x10, [p2], d1; VMOV cml3, cml0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13536 0x04 0x1c 0x07 0x46 0x8c 0x6d 0x41 0x62 VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13544 0x02 0x30 0x82 0xc6 0x89 0x03 0x41 0x62 VSHIFT x4, x6, x1, r0; VMAC.f dm1, dm0, x1, x10, r17 +.label ZLS_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_528 +.loop_nesting 2 +.begin_of_loop +.aggressive_scheduled_block_id 2 +.noswbrkpt + 13552 0x03 0xb3 0x71 0xf0 0xf4 0x02 0x84 0x81 0x6e 0xba VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13562 0x00 0x38 0xea 0x9c 0x0b 0x46 0x8a 0x89 0x01 0x4a VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13572 0x00 0xb1 0x6a 0x30 0x86 0xc6 0x89 0x35 0x01 0x4a VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13582 0x1d 0x89 0x06 0xd8 VSHIFT x11, x1, x2, r1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13586 0x04 0xb0 0x8e 0xc6 0x8c 0x48 0xa1 0x62 VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13594 0x03 0x9c 0x0f 0x46 0x8a 0x36 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r17 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13602 0x1d 0x89 0x0e 0xd8 VSHIFT x11, x1, x2, r3 + 13606 0x05 0x1c 0x03 0x46 0x8b 0x92 0xe1 0x62 VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r17 + 13614 0x04 0x1c 0x07 0x46 0x88 0x56 0xe1 0x62 VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r17 + 13622 0x00 0x2c 0xf4 0x61 0x05 0x94 NOPA; VSHIFT x4, x6, x1, r0 + 13628 0x8c 0x6d 0x41 0x48 VMAC.f dm4, dm3, x6, x10, r17 +.label ZLE_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_608 +.end_of_loop +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 13632 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7c 0x48 0x1a 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r17 +.loop_nesting 1 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13648 0x03 0x0c 0xf8 0xe7 0x20 0x04 0x27 0x02 0x84 0x81 0x68 0xb6 PADDA.3D [p0], d0; PADDB.2D [p4], d3; JNZD r2, r2, p3; VSHIFT x10, x1, x2, r0 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13660 0x02 0x9c 0x0b 0x46 0x8a 0x89 0x01 0x62 VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13668 0x02 0x30 0x86 0xc6 0x89 0x35 0x01 0x62 VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13676 0x1d 0x89 0x06 0xd8 VSHIFT x11, x1, x2, r1 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13680 0x03 0x9c 0x0f 0x46 0x8c 0x48 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r17 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13688 0x04 0xb0 0x8e 0xc6 0x8a 0x36 0xa1 0x62 VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r17 +.loop_nesting 0 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13696 0x1d 0x89 0x0e 0xd8 VSHIFT x11, x1, x2, r3 + 13700 0x8b 0x92 0xe1 0x48 VMAC.f dm3, dm4, x9, x7, r17 + 13704 0x88 0x56 0xe1 0x48 VMAC.f dm0, dm2, x11, x7, r17 + 13708 0x00 0x00 NOPX + 13710 0x00 0x00 NOPX + 13712 0x00 0x00 NOPX + 13714 0x00 0x00 NOPX + 13716 0x0d 0x11 0x96 0x18 VCONV.bf16.fp32 x10, cml3 + 13720 0x62 0x02 0xc0 0x50 0x00 0x5c VCONV.bf16.fp32 x6, cml0; RET lr +.delay_slot + 13726 0x1c 0x50 0x6c 0xf8 VMAX_LT.bf16 x8, r16, x10, x0 +.delay_slot + 13730 0x1d 0x53 0x14 0x78 VSHUFFLE x10, x10, x6, r5 +.delay_slot + 13734 0x1d 0x50 0x6c 0xf8 VMAX_LT.bf16 x10, r16, x10, x0 +.delay_slot + 13738 0x0a 0x8a 0x13 0x18 VST x8, [p2], m4 +.delay_slot + 13742 0x0a 0x3a 0x93 0x18 VST.3D x10, [p2], d1 +.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params__end +.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_end0 + +.text_segment PM 13760 +.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function_start + 13760 0x00 0x07 0xc8 0xc8 0x80 0x44 MOVXM p4, #508992 + 13766 0x80 0xc2 0xd0 0x2f 0x41 0xd4 LDA r16, [p4]; MOV r0, r15 + 13772 0x00 0x10 0x00 0x00 0x01 0xc4 PADDXM [sp], #128 + 13778 0xff 0x3a 0xb0 0x02 0x2d 0x70 0x70 0x02 ST r14, [sp, #-8]; MOV r17, CORE_ID + 13786 0xff 0xb6 0xb0 0x01 0xa8 0xf0 0x70 0x02 ST r13, [sp, #-4]; MOV r13, lr + 13794 0x0f 0xec 0x1d 0x98 ST p0, [sp, #-20] + 13798 0x0f 0xf7 0x9d 0x98 ST p7, [sp, #-12] + 13802 0xfe 0x02 0xb0 0x01 0xca 0x60 0x70 0x02 ST r0, [sp, #-16]; MOV r14, p2 + 13810 0x80 0x1b 0x38 0x40 0x01 0x84 JNZ r16, #13936 +.delay_slot + 13816 0x1b 0xd6 0xc0 0xf8 MOV r15, p3 +.delay_slot + 13820 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17 +.delay_slot + 13824 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 13828 0x00 0x07 0xc6 0xc8 0xa0 0x44 MOVXM p3, #509008 +.delay_slot + 13834 0x0b 0x06 0x31 0x98 ST r17, [p3] +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 13838 0xf0 0x91 0x60 0x00 0x01 0xf0 0xb2 0x34 0x11 0x3a MOVS p7, p1; MOVXM p1, #509032 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 13848 0x20 0xc0 0xe0 0x88 0x8b 0x00 0x01 0xf0 0xb2 0x32 0x10 0x76 ST.s8 r16, [p1]; MOVS p0, p2; MOVXM p1, #509028 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13860 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 13862 0x00 0x18 0x20 0x00 0x01 0x04 JL #12352 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13868 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13870 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13872 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 13876 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 13880 0x20 0xc2 0x30 0x00 0x01 0xa5 0x70 0x02 ST r16, [p1]; NOPM +.return_address + 13888 0x33 0x91 0x60 0x01 0x33 0x82 0x00 0x02 MOVS p1, p7; ADD.NC p2, r14, #8 + 13896 0x02 0x06 0x3a 0x98 LDA.u16 r17, [p2] + 13900 0x44 0xc3 0x50 0x00 0x01 0xf1 0x32 0x30 0x10 0xba LDA.u16 r16, [p2, #4]; MOVXM p2, #509024 + 13910 0x00 0x00 NOPX + 13912 0x00 0x1b 0x40 0x00 0x00 0x84 J #13952 +.delay_slot + 13918 0x00 0x07 0xc6 0xc8 0xb0 0x44 MOVXM p3, #509016 +.delay_slot +.swstall delay_slot + 13924 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13926 0x00 0x00 NOPX +.delay_slot + 13928 0x0b 0x06 0x31 0x98 ST r17, [p3] +.delay_slot + 13932 0x0a 0x06 0x11 0x98 ST r16, [p2] +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_176 + 13936 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x01 0xf1 0xb2 0x2c 0x10 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVXM p3, #509016; NOPV +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_192 + 13952 0x1a 0x67 0x86 0x18 ADD.NC p2, r15, #12 + 13956 0x5f 0xee 0xd0 0x00 0x01 0xf2 0x32 0x28 0x10 0xba LDA r27, [p2], #-4; MOVXM p4, #509008 + 13966 0x02 0xfe 0x16 0x98 LDA r16, [p2], #-4 + 13970 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 + 13974 0x02 0x46 0x56 0x98 LDA r18, [p2, #16] + 13978 0x00 0x00 NOPX + 13980 0x00 0x00 NOPX + 13982 0x00 0x00 NOPX + 13984 0x00 0x00 NOPX + 13986 0x00 0x00 NOPX + 13988 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 13992 0x0a 0x06 0x11 0x98 ST r16, [p2] + 13996 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 14000 0x00 0x00 NOPX + 14002 0x00 0x00 NOPX + 14004 0x00 0x00 NOPX + 14006 0x14 0x93 0x08 0x18 ACQ r18, r16 + 14010 0x00 0x2f 0x00 0x00 0x01 0xf3 0xb2 0x20 0x10 0xba MOVA r15, #1; MOVXM p7, #508992 + 14020 0x06 0x00 0x28 0x2b 0xc1 0xe4 MOVX r24, #0; MOV r16, sp + 14026 0x18 0x68 0x5a 0x18 ADD.NC p0, r16, #-76 + 14030 0xfd 0xd3 0x27 0x29 0x81 0xd4 LDA p5, [sp, #-20]; MOV r14, p2 + 14036 0x04 0x06 0x36 0x98 LDA r17, [p4] + 14040 0x60 0xc2 0xd0 0x00 0x01 0xf1 0xb3 0xe0 0x10 0xba LDA r16, [p3]; MOVXM p3, #509888 + 14050 0x07 0x06 0x56 0x98 LDA r18, [p7] + 14054 0x00 0x00 NOPX + 14056 0x00 0x00 NOPX + 14058 0x00 0x00 NOPX + 14060 0x05 0x06 0x76 0x98 LDA r19, [p5] + 14064 0x00 0x00 NOPX + 14066 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 + 14070 0x14 0xa2 0x07 0x18 ADD r17, r18, #1 + 14074 0x14 0x20 0xfd 0x98 LSHL r16, r16, r15 +.no_stack_arguments + 14078 0x00 0x19 0x70 0x00 0x01 0x04 JL #13024 +.delay_slot + 14084 0x0f 0x06 0x31 0x98 ST r17, [p7] +.delay_slot + 14088 0x18 0x49 0xc1 0x58 ADD.NC dn0, r19, r16 +.delay_slot + 14092 0x0f 0xb4 0x25 0x98 ST dn0, [sp, #-76] +.delay_slot + 14096 0x0f 0xbb 0x15 0x98 ST r24, [sp, #-72] +.delay_slot + 14100 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x03 0xdf 0x8a 0xc1 0x36 NOPA; NOPB; ST r24, [sp, #-68]; NOPX +.return_address + 14112 0x1a 0x67 0x0a 0x18 ADD.NC p2, r14, #20 + 14116 0x02 0x06 0x16 0x98 LDA r16, [p2] + 14120 0x00 0x00 NOPX + 14122 0x00 0x00 NOPX + 14124 0x00 0x00 NOPX + 14126 0x00 0x00 NOPX + 14128 0x00 0x00 NOPX + 14130 0x00 0x00 NOPX + 14132 0x14 0x10 0xf8 0x18 REL r16, r15 + 14136 0x5c 0xc2 0xd0 0x00 0x01 0xf0 0xb2 0x30 0x10 0xba LDA r16, [p2, #-8]; MOVXM p1, #509024 + 14146 0x01 0x06 0x56 0x98 LDA r18, [p1] + 14150 0x07 0x06 0x36 0x98 LDA r17, [p7] + 14154 0x07 0xf4 0x99 0x18 LDA p1, [sp, #-12] + 14158 0x07 0xf9 0xd1 0x18 LDA r14, [sp, #-8] + 14162 0x00 0x00 NOPX + 14164 0x00 0x00 NOPX + 14166 0x13 0xe1 0x01 0x98 SUB r16, r15, r16 + 14170 0x0a 0xe6 0x11 0x98 ST r16, [p2, #-8] + 14174 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 14178 0x80 0x1b 0xc0 0x40 0x01 0x84 JNZ r16, #14208 +.delay_slot + 14184 0x10 0x30 0x01 0x18 MOVX r24, #0 +.delay_slot +.swstall delay_slot + 14188 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14190 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14192 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14194 0x00 0x00 NOPX + 14196 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x03 0x83 0x88 0xc1 0x36 NOPA; NOPB; ST r24, [p7]; NOPX +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 + 14208 0xff 0xb6 0x2e 0xed 0x41 0xd4 LDA r13, [sp, #-4]; MOV lr, r13 + 14214 0x07 0xf1 0xf1 0x18 LDA r15, [sp, #-16] + 14218 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 14222 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128 +.delay_slot +.swstall delay_slot + 14228 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14230 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14232 0x00 0x00 NOPX +.delay_slot + 14234 0x1f 0x62 0xc0 0xf8 MOV p7, p1 +.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + +.text_segment PM 14240 +.label __Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE___func_begin0 +.label _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE +.function_start + 14240 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 14246 0xff 0x73 0xb0 0x00 0x01 0xf3 0xb2 0x20 0x11 0x3a ST p7, [sp, #-8]; MOVXM p7, #508992 + 14256 0xe0 0xc2 0xd7 0xff 0x1d 0x82 0x2d 0x70 0x72 0xba LDA r16, [p7]; ST p6, [sp, #-4]; MOV r17, CORE_ID + 14266 0x0f 0xf6 0x1d 0x98 ST p4, [sp, #-12] + 14270 0x0f 0xf1 0x1d 0x98 ST p2, [sp, #-16] + 14274 0xfd 0x87 0xb0 0x03 0xb3 0x60 0x70 0x02 ST lr, [sp, #-20]; MOV p7, p3 + 14282 0x00 0x00 NOPX + 14284 0x00 0x00 NOPX + 14286 0x00 0x00 NOPX + 14288 0x80 0x1c 0xb0 0x40 0x01 0x84 JNZ r16, #14688 +.delay_slot + 14294 0x0f 0xe8 0x1d 0x98 ST p0, [sp, #-24] +.delay_slot + 14298 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17 +.delay_slot + 14302 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 14306 0x00 0x07 0xcc 0xc8 0xa0 0x44 MOVXM p6, #509008 +.delay_slot + 14312 0x0e 0x06 0x31 0x98 ST r17, [p6] +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 14316 0x00 0x20 0x00 0x00 0x01 0xf3 0x32 0x34 0x10 0xba MOVA r0, #1; MOVXM p6, #509032 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 14326 0xc0 0xc0 0xe6 0x84 0x8b 0x00 0x01 0xf0 0x32 0x32 0x10 0x76 ST.s8 r16, [p6]; MOVS p6, p1; MOVXM p0, #509028 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14338 0x00 0x01 0x00 0x00 0x01 0xf0 0xb3 0x00 0x10 0xba MOVA r1, #0; MOVXM p1, #509440 +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 14348 0x00 0x05 0x60 0x00 0x01 0x04 JL #2752 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14354 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14356 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 14358 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 14362 0x00 0x2c 0xf0 0x40 0x0a 0x2c NOPA; MOVX r16, #1 +.delay_slot + 14368 0x00 0x2c 0xf0 0x00 0x20 0x06 0x11 0x80 0x00 0x00 0x37 0x60 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p0]; NOPX; MOV p0, p7; NOPV +.return_address + 14384 0x04 0x00 0xa1 0x01 0x01 0x64 MOVX r16, #1; MOV dj0, #64 + 14390 0x07 0x02 0x56 0x98 LDA r18, [p7, dj0] + 14394 0x00 0x00 NOPX + 14396 0x00 0x00 NOPX + 14398 0x00 0x00 NOPX + 14400 0x00 0x00 NOPX + 14402 0x00 0x00 NOPX + 14404 0x00 0x00 NOPX + 14406 0x14 0xa1 0x07 0x98 EQ r16, r18, r16 + 14410 0x80 0x1c 0x68 0x40 0x01 0x84 JNZ r16, #14544 +.delay_slot + 14416 0x1c 0x5e 0xc0 0xf8 MOV r17, p7 +.delay_slot + 14420 0x18 0xc8 0x90 0x18 ADD.NC dc0, r17, #32 +.delay_slot +.swstall delay_slot + 14424 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14426 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14428 0x00 0x00 NOPX + 14430 0x90 0x1c 0x58 0x40 0x01 0x84 JNZ r18, #14512 +.delay_slot + 14436 0x00 0x07 0xc8 0x2c 0x00 0x44 MOVXM r16, #509440 +.delay_slot + 14442 0x10 0x22 0x01 0x18 MOVX r17, #0 +.delay_slot +.swstall delay_slot + 14446 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14448 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14450 0x00 0x00 NOPX +.no_stack_arguments + 14452 0xfc 0xe3 0xb0 0x00 0x05 0x70 0x00 0x00 0x41 0x3a ST p6, [sp, #-28]; JL #11136 +.delay_slot + 14462 0x00 0x07 0xcc 0xca 0x80 0x44 MOVXM p6, #509248 +.delay_slot + 14468 0x00 0x07 0xc0 0xca 0x80 0x44 MOVXM p0, #509248 +.delay_slot + 14474 0x19 0x61 0x80 0xf8 MOV p1, dc0 +.delay_slot +.swstall delay_slot + 14478 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14480 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.return_address + 14496 0xc0 0xc6 0xd0 0x00 0x01 0xf2 0x0b 0x00 0x10 0xba LDA r17, [p6]; MOVXM r16, #509440 + 14506 0xfc 0xe3 0x20 0x00 0x20 0x3c LDA p6, [sp, #-28]; NOPB +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_272 + 14512 0x00 0x00 NOPX + 14514 0x00 0x1c 0x80 0x00 0x00 0x84 J #14592 +.delay_slot +.swstall delay_slot + 14520 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14522 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14524 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14526 0x00 0x00 NOPX +.delay_slot + 14528 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0xb6 0x60 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; MOV p1, p6; NOPV +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_304 +.no_stack_arguments + 14544 0xfc 0xe3 0xb0 0x00 0x05 0x84 0x00 0x00 0x41 0x3a ST p6, [sp, #-28]; JL #11296 +.delay_slot + 14554 0x00 0x07 0xcc 0xcb 0x00 0x44 MOVXM p6, #509312 +.delay_slot + 14560 0x00 0x07 0xc0 0xcb 0x00 0x44 MOVXM p0, #509312 +.delay_slot + 14566 0x19 0x61 0x80 0xf8 MOV p1, dc0 +.delay_slot +.swstall delay_slot + 14570 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14572 0x00 0x01 0x67 0x98 NOPA +.return_address + 14576 0xc0 0xc6 0xd0 0x00 0x01 0xf2 0x0b 0x00 0x10 0xba LDA r17, [p6]; MOVXM r16, #509440 + 14586 0xfc 0x93 0x20 0x00 0x20 0x3c LDA p1, [sp, #-28]; NOPB +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_352 + 14592 0x1b 0x68 0x05 0x98 ADD.NC p3, r16, #11 + 14596 0x6f 0xcd 0x50 0x00 0x01 0xf3 0x32 0x28 0x10 0xba LDA.u8 r19, [p3], #7; MOVXM p6, #509008 + 14606 0x06 0x06 0x56 0x98 LDA r18, [p6] + 14610 0x03 0x1e 0xba 0x98 LDA.u16 r21, [p3], #2 + 14614 0x03 0x06 0x1a 0x98 LDA.u16 r16, [p3] + 14618 0x00 0x00 NOPX + 14620 0x03 0x16 0x9a 0x98 LDA.u16 r20, [p3, #2] + 14624 0x00 0x00 NOPX + 14626 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 14628 0x00 0x07 0xc0 0xc8 0x88 0x44 MOVXM p0, #508996 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 14634 0x14 0xe7 0x5f 0x98 MUL r19, r19, r21 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 14638 0x00 0xce 0x30 0x00 0x01 0xf1 0x32 0x2e 0x11 0x3a ST r19, [p0]; MOVXM p2, #509020 + 14648 0x14 0xe1 0x0f 0x98 MUL r16, r19, r16 + 14652 0x14 0x63 0x2f 0x98 MUL r17, r17, r18 + 14656 0x15 0x21 0x0f 0x98 MUL r16, r20, r16 + 14660 0x00 0x2c 0xf2 0x06 0x31 0x80 0x01 0xf3 0x32 0x30 0x10 0x76 NOPA; ST r17, [p2]; MOVXM p6, #509024 + 14672 0x00 0x2c 0xf0 0x00 0x26 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p6]; NOPX; NOPM; NOPV +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_448 + 14688 0x00 0x07 0xc0 0xc8 0x90 0x44 MOVXM p0, #509000 + 14694 0x00 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x20 0x10 0xba LDA r16, [p0]; MOVXM p2, #508992 + 14704 0x40 0xc6 0xd0 0x00 0x01 0xf3 0x32 0x26 0x10 0xba LDA r17, [p2]; MOVXM p6, #509004 + 14714 0x06 0x06 0x56 0x98 LDA r18, [p6] + 14718 0x00 0x00 NOPX + 14720 0x00 0x00 NOPX + 14722 0x00 0x00 NOPX + 14724 0x00 0x00 NOPX + 14726 0x80 0x1c 0xf8 0x40 0x01 0x84 JNZ r16, #14832 +.delay_slot + 14732 0x8c 0x40 0xe9 0xb0 0x01 0x24 ADD r17, r17, #1; ADD.NC r19, r16, #1 +.delay_slot + 14738 0x14 0xa4 0x07 0x18 ADD r18, r18, #1 +.delay_slot + 14742 0x0a 0x06 0x31 0x98 ST r17, [p2] +.delay_slot + 14746 0x0e 0x06 0x51 0x98 ST r18, [p6] +.delay_slot + 14750 0x08 0x06 0x71 0x98 ST r19, [p0] + 14754 0x07 0xf6 0x31 0x18 LDA r17, [sp, #-12] + 14758 0x00 0x00 NOPX + 14760 0x00 0x00 NOPX + 14762 0x00 0x00 NOPX + 14764 0x00 0x00 NOPX + 14766 0x00 0x00 NOPX + 14768 0x00 0x00 NOPX + 14770 0x1e 0x68 0x86 0x18 ADD.NC p6, r17, #12 + 14774 0x06 0xff 0x76 0x98 LDA r27, [p6], #-4 + 14778 0x06 0xfe 0x36 0x98 LDA r17, [p6], #-4 + 14782 0x06 0xfe 0x56 0x98 LDA r18, [p6], #-4 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 14786 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.noswbrkpt + 14788 0x06 0x46 0x36 0x98 LDA r17, [p6, #16] +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 14792 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 14794 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 14796 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 14798 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 14800 0x14 0xa3 0x12 0x18 SEL.EQZ r17, r18, r17, r27 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 14804 0xc0 0xc6 0x3f 0xc1 0xfa 0x5c ST r17, [p6]; MOVX r16, #-1 + 14810 0x00 0x00 NOPX + 14812 0x00 0x00 NOPX + 14814 0x00 0x00 NOPX + 14816 0x00 0x00 NOPX + 14818 0x00 0x2c 0xf0 0x00 0x24 0x53 0x08 0x00 0x34 0xaf 0x00 0x2b 0x60 0x7e NOPA; NOPB; NOPS; ACQ r17, r16; NOPM +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_592 + 14832 0x00 0x00 NOPX + 14834 0x00 0x00 NOPX + 14836 0x00 0x00 NOPX + 14838 0x07 0xf5 0x19 0x18 LDA p2, [sp, #-12] + 14842 0x07 0xe8 0x19 0x18 LDA p0, [sp, #-24] +.no_stack_arguments + 14846 0x00 0x08 0xb8 0x00 0x01 0x04 JL #4464 +.delay_slot + 14852 0x00 0x07 0xc6 0xcc 0x00 0x44 MOVXM p3, #509440 +.delay_slot +.swstall delay_slot + 14858 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14860 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14862 0x00 0x00 NOPX +.delay_slot + 14864 0x00 0x2c 0xf0 0x00 0x26 0x88 0x8b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; MOVS p6, p2; NOPX; NOPM; NOPV +.return_address + 14880 0xfe 0x42 0x20 0x00 0x01 0xf0 0xb2 0x24 0x10 0xba LDA r16, [sp, #-16]; MOVXM p1, #509000 + 14890 0x20 0xc6 0xd0 0x00 0x01 0xf0 0xb2 0x22 0x10 0xba LDA r17, [p1]; MOVXM p1, #508996 + 14900 0x01 0x06 0x56 0x98 LDA r18, [p1] + 14904 0x00 0x00 NOPX + 14906 0x00 0x00 NOPX + 14908 0x00 0x00 NOPX + 14910 0x00 0x00 NOPX + 14912 0x00 0x00 NOPX + 14914 0x00 0x00 NOPX + 14916 0x14 0x63 0x28 0x98 NE r17, r17, r18 + 14920 0x88 0x1d 0xd0 0x40 0x01 0x84 JNZ r17, #15264 +.delay_slot +.swstall delay_slot + 14926 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14928 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14930 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14932 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14934 0x00 0x00 NOPX + 14936 0x08 0x02 0x80 0x3f 0x17 0xe8 0xb4 0x03 0x08 0xba MOVA dj0, #64; MOVX r17, #-1; ADD.NC p1, r16, #12 + 14946 0x3f 0xee 0xd0 0x00 0x01 0xf0 0x32 0x2e 0x10 0xba LDA r27, [p1], #-4; MOVXM p0, #509020 + 14956 0x01 0xfe 0x56 0x98 LDA r18, [p1], #-4 + 14960 0x01 0xfe 0x76 0x98 LDA r19, [p1], #-4 + 14964 0x01 0x56 0x96 0x98 LDA r20, [p1, #20] + 14968 0x00 0x00 NOPX + 14970 0x00 0x00 NOPX + 14972 0x00 0x00 NOPX + 14974 0x00 0x00 NOPX + 14976 0x00 0x00 NOPX + 14978 0x14 0xe5 0x22 0x18 SEL.EQZ r18, r19, r18, r27 + 14982 0x20 0xca 0x30 0x40 0x0a 0x5c ST r18, [p1]; MOVX r16, #1 + 14988 0x00 0x00 NOPX + 14990 0x00 0x00 NOPX + 14992 0x00 0x00 NOPX + 14994 0x00 0x00 NOPX + 14996 0x15 0x13 0x18 0x18 ACQ r20, r17 + 15000 0x00 0x00 NOPX + 15002 0x00 0x00 NOPX + 15004 0x00 0x00 NOPX + 15006 0x00 0x06 0x76 0x98 LDA r19, [p0] + 15010 0x07 0x02 0x56 0x98 LDA r18, [p7, dj0] + 15014 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 15016 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.noswbrkpt + 15018 0x06 0x5c 0x1e 0x98 LDA p0, [p6], #20 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 15022 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 15024 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 15026 0x29 0xc6 0xd0 0x27 0x38 0x6c 0x31 0x60 0x78 0xba LDA r17, [p1], #16; LSHL r19, r19, r16; MOV p0, p1 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 15036 0x14 0xa1 0x07 0x98 EQ r16, r18, r16 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 15040 0x80 0x1d 0x88 0x40 0x01 0x84 JNZ r16, #15120 +.delay_slot +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 15046 0x0f 0x80 0x8b 0x18 MOVS p7, p0 +.delay_slot +.swstall delay_slot + 15050 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15052 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15054 0x00 0x00 NOPX +.delay_slot + 15056 0xfe 0x13 0xb0 0x00 0xb4 0xe2 0xa0 0x02 ST p1, [sp, #-16]; ADD.NC p1, r19, r17 + 15064 0x90 0x1d 0x98 0x40 0x01 0x84 JNZ r18, #15152 +.delay_slot +.swstall delay_slot + 15070 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15072 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15074 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15076 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15078 0x00 0x00 NOPX +.no_stack_arguments + 15080 0x00 0x15 0xf8 0x00 0x01 0x04 JL #11248 +.delay_slot + 15086 0x00 0x07 0xc6 0xca 0x80 0x44 MOVXM p3, #509248 +.delay_slot +.swstall delay_slot + 15092 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15094 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15096 0x00 0x00 NOPX +.delay_slot + 15098 0x00 0x2c 0xf4 0xc1 0x81 0xd4 NOPA; MOV p2, p0 +.return_address + 15104 0x00 0x1d 0x98 0x00 0x00 0x84 J #15152 +.delay_slot +.swstall delay_slot + 15110 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15112 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15114 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15116 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15118 0x00 0x00 NOPX +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_880 +.no_stack_arguments + 15120 0x00 0x16 0x58 0x00 0x01 0x04 JL #11440 +.delay_slot + 15126 0x00 0x07 0xc6 0xcb 0x00 0x44 MOVXM p3, #509312 +.delay_slot + 15132 0x1a 0x60 0xc0 0xf8 MOV p2, p0 +.delay_slot +.swstall delay_slot + 15136 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15138 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15140 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_912 +.return_address + 15152 0x07 0xf0 0x99 0x18 LDA p1, [sp, #-16] + 15156 0xfe 0x83 0x20 0x44 0x0a 0x2c LDA p0, [sp, #-12]; MOVX r17, #1 + 15162 0xe8 0xc2 0xd0 0x00 0x01 0xf3 0xb2 0x24 0x10 0xba LDA r16, [p7, #16]; MOVXM p7, #509000 + 15172 0x00 0x00 NOPX + 15174 0x00 0x00 NOPX + 15176 0x00 0x00 NOPX + 15178 0x00 0x00 NOPX + 15180 0x00 0x00 NOPX + 15182 0x00 0x00 NOPX + 15184 0x14 0x11 0x18 0x18 REL r16, r17 + 15188 0x01 0xf6 0x56 0x98 LDA r18, [p1, #-4] + 15192 0x00 0x56 0x16 0x98 LDA r16, [p0, #20] + 15196 0x00 0x00 NOPX + 15198 0x00 0x00 NOPX + 15200 0x00 0x00 NOPX + 15202 0x00 0x00 NOPX + 15204 0x00 0x00 NOPX + 15206 0x14 0x65 0x21 0x98 SUB r18, r17, r18 + 15210 0x09 0xf6 0x51 0x98 ST r18, [p1, #-4] + 15214 0x00 0x00 NOPX + 15216 0x00 0x00 NOPX + 15218 0x00 0x00 NOPX + 15220 0x00 0x00 NOPX + 15222 0x14 0x11 0x18 0x18 REL r16, r17 + 15226 0x06 0xe6 0x56 0x98 LDA r18, [p6, #-8] + 15230 0x00 0x00 NOPX + 15232 0x00 0x00 NOPX + 15234 0x00 0x1d 0xd8 0x00 0x00 0x84 J #15280 +.delay_slot +.swstall delay_slot + 15240 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15242 0x00 0x00 NOPX +.delay_slot + 15244 0x10 0x20 0x01 0x18 MOVX r16, #0 +.delay_slot + 15248 0xe0 0xc2 0x38 0xc6 0x43 0x5c ST r16, [p7]; SUB r17, r17, r18 +.delay_slot + 15254 0x00 0x2c 0xf6 0xe6 0x31 0x80 0x00 0x00 0x00 0x7a NOPA; ST r17, [p6, #-8]; NOPX +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_1024 + 15264 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x01 0x00 0x08 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVX r16, #0; NOPM; NOPV +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_1040 + 15280 0xfd 0x87 0x20 0x00 0x01 0xf3 0xb2 0x30 0x10 0xba LDA lr, [sp, #-20]; MOVXM p7, #509024 + 15290 0xe0 0xca 0xd0 0x00 0x01 0xf3 0x32 0x20 0x10 0xba LDA r18, [p7]; MOVXM p6, #508992 + 15300 0x06 0x06 0x36 0x98 LDA r17, [p6] + 15304 0x00 0x00 NOPX + 15306 0x00 0x00 NOPX + 15308 0x00 0x00 NOPX + 15310 0x00 0x00 NOPX + 15312 0x00 0x00 NOPX + 15314 0x00 0x00 NOPX + 15316 0x14 0x63 0x28 0x98 NE r17, r17, r18 + 15320 0x88 0x1d 0xf8 0x40 0x01 0x84 JNZ r17, #15344 +.delay_slot + 15326 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8] +.delay_slot +.swstall delay_slot + 15330 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15332 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15334 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15336 0x00 0x00 NOPX + 15338 0x00 0x2c 0xfc 0x0c 0x23 0x0c NOPA; ST r16, [p6] +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_1104 + 15344 0x07 0xff 0x19 0x18 LDA p6, [sp, #-4] + 15348 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 15352 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 15358 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15360 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15362 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15364 0x00 0x00 NOPX +.label _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE__end +.label __Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE___func_end0 + +.text_segment PM 15376 +.label __Z13_b896_wrapperPPv___func_begin0 +.label _Z13_b896_wrapperPPv +.function_start + 15376 0x1a 0x60 0xc0 0xf8 MOV p2, p0 + 15380 0x02 0x1c 0x1e 0x98 LDA p0, [p2], #4 + 15384 0x02 0x2c 0x9e 0x98 LDA p1, [p2], #8 + 15388 0x02 0xf5 0x9e 0x98 LDA p3, [p2, #-4] + 15392 0x02 0x05 0x1e 0x98 LDA p2, [p2] +.tail_call + 15396 0x00 0x0d 0x70 0x00 0x00 0x84 J #6880 +.delay_slot +.swstall delay_slot + 15402 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15404 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15406 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15408 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15410 0x00 0x00 NOPX +.label _Z13_b896_wrapperPPv__end +.label __Z13_b896_wrapperPPv___func_end0 + +.text_segment PM 15424 +.label __Z13_b901_wrapperPPv___func_begin0 +.label _Z13_b901_wrapperPPv +.function_start + 15424 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 15428 0x01 0x2c 0x1e 0x98 LDA p0, [p1], #8 + 15432 0x01 0xf5 0x1e 0x98 LDA p2, [p1, #-4] + 15436 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 15440 0x00 0x10 0x18 0x00 0x00 0x84 J #8240 +.delay_slot +.swstall delay_slot + 15446 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15448 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15450 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15452 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15454 0x00 0x00 NOPX +.label _Z13_b901_wrapperPPv__end +.label __Z13_b901_wrapperPPv___func_end0 +.label __Z13_b906_wrapperPPv___func_begin0 +.label _Z13_b906_wrapperPPv +.function_start + 15456 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 15460 0x01 0x2c 0x1e 0x98 LDA p0, [p1], #8 + 15464 0x01 0xf5 0x1e 0x98 LDA p2, [p1, #-4] + 15468 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 15472 0x00 0x11 0xc8 0x00 0x00 0x84 J #9104 +.delay_slot +.swstall delay_slot + 15478 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15480 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15482 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15484 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15486 0x00 0x00 NOPX +.label _Z13_b906_wrapperPPv__end +.label __Z13_b906_wrapperPPv___func_end0 +.label __Z13_b881_wrapperPPv___func_begin0 +.label _Z13_b881_wrapperPPv +.function_start + 15488 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 15492 0x01 0x2c 0x1e 0x98 LDA p0, [p1], #8 + 15496 0x01 0xf5 0x1e 0x98 LDA p2, [p1, #-4] + 15500 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 15504 0x00 0x14 0x88 0x00 0x00 0x84 J #10512 +.delay_slot +.swstall delay_slot + 15510 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15512 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15514 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15516 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15518 0x00 0x00 NOPX +.label _Z13_b881_wrapperPPv__end +.label __Z13_b881_wrapperPPv___func_end0 +.label __Z13_b891_wrapperPPv___func_begin0 +.label _Z13_b891_wrapperPPv +.function_start + 15520 0x1a 0x60 0xc0 0xf8 MOV p2, p0 + 15524 0x02 0x3c 0x1e 0x98 LDA p0, [p2], #12 + 15528 0x02 0xec 0x9e 0x98 LDA p1, [p2], #-8 + 15532 0x02 0x15 0x9e 0x98 LDA p3, [p2, #4] + 15536 0x02 0x05 0x1e 0x98 LDA p2, [p2] +.tail_call + 15540 0x00 0x16 0xf0 0x00 0x00 0x84 J #11744 +.delay_slot +.swstall delay_slot + 15546 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15548 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15550 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15552 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15554 0x00 0x00 NOPX +.label _Z13_b891_wrapperPPv__end +.label __Z13_b891_wrapperPPv___func_end0 + +.text_segment PM 15568 +.label __Z13_b924_wrapperPPv___func_begin0 +.label _Z13_b924_wrapperPPv +.function_start + 15568 0x1b 0x60 0xc0 0xf8 MOV p3, p0 + 15572 0x03 0x1c 0x1e 0x98 LDA p0, [p3], #4 + 15576 0x03 0x1c 0x9e 0x98 LDA p1, [p3], #4 + 15580 0x03 0x2d 0x1e 0x98 LDA p2, [p3], #8 + 15584 0x03 0xf6 0x1e 0x98 LDA p4, [p3, #-4] + 15588 0x03 0x05 0x9e 0x98 LDA p3, [p3] +.tail_call + 15592 0x00 0x1b 0xd0 0x00 0x00 0x84 J #14240 +.delay_slot +.swstall delay_slot + 15598 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15600 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15602 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15604 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15606 0x00 0x00 NOPX +.label _Z13_b924_wrapperPPv__end +.label __Z13_b924_wrapperPPv___func_end0 + +.text_segment PM 15616 +.label __Z13_b919_wrapperPPv___func_begin0 +.label _Z13_b919_wrapperPPv +.function_start + 15616 0x1a 0x60 0xc0 0xf8 MOV p2, p0 + 15620 0x02 0x1c 0x1e 0x98 LDA p0, [p2], #4 + 15624 0x02 0x2c 0x9e 0x98 LDA p1, [p2], #8 + 15628 0x02 0xf5 0x9e 0x98 LDA p3, [p2, #-4] + 15632 0x02 0x05 0x1e 0x98 LDA p2, [p2] +.tail_call + 15636 0x00 0x1a 0xe0 0x00 0x00 0x84 J #13760 +.delay_slot +.swstall delay_slot + 15642 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15644 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15646 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15648 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15650 0x00 0x00 NOPX +.label _Z13_b919_wrapperPPv__end +.label __Z13_b919_wrapperPPv___func_end0 + +.text_segment PM 15664 +.label _ZN12me_primitive10udiv_dstepEjjRjS0_ +.function_start + 15664 0x00 0xc0 0x2f 0xa0 0x41 0xe4 MOVX r3, #0; MOV r31, r0 + 15670 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15674 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15678 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15682 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15686 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15690 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15694 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15698 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15702 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15706 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15710 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15714 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15718 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15722 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15726 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15730 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15734 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15738 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15742 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15746 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15750 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15754 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15758 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15762 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15766 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15770 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15774 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15778 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15782 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 15786 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 15790 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 15794 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 15798 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 15802 0x18 0x9f 0xa0 0xf8 MOV r2, r31 +.label _ZN12me_primitive10udiv_dstepEjjRjS0___end + +.bss_segment DMb 508992 32 + +.data_segment DMb 509024 +.label _ZL8num_iter + 0x1 + 0x0 + 0x0 + 0x0 + +.bss_segment DMb 509028 4 + +.bss_segment DMb 509032 1 + +.rodata_segment DMb 509056 +.label _ZL20g_uniformKernelFuncs + 0x10 + 0x3c + 0x0 + 0x0 + 0x40 + 0x3c + 0x0 + 0x0 + 0x60 + 0x3c + 0x0 + 0x0 + 0x80 + 0x3c + 0x0 + 0x0 + 0xa0 + 0x3c + 0x0 + 0x0 + 0xd0 + 0x3c + 0x0 + 0x0 + 0x0 + 0x3d + 0x0 + 0x0 + +.bss_segment DMb 509120 1024 + +.stack DM_stack 506560 508928 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable5/Release/0_2_reloadable5.map b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable5/Release/0_2_reloadable5.map new file mode 100644 index 0000000000000000000000000000000000000000..b11a3b333f5cabeaaee231f81abbc9a33f2e051a --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable5/Release/0_2_reloadable5.map @@ -0,0 +1,324 @@ + +// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:49:21 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// bridge -o../Release/0_0_reloadable5 ../Release/0_0_reloadable5.o -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/isg -g -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable5.bcf -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork4008 -pme + +// Release: ipp V-2024.06-TGT-241219 + +Memory map for memory 'DM_stack': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 2368 + + 0x0007bac0..0x0007c3ff ( 2368 items) : Stack + +Memory map for memory 'DMb': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 3461 + + 0x00000000..0x0007babf ( 506560 items) : Reserved + 0x0007bac0..0x0007c3ff ( 2368 items) : Stack + 0x0007c400..0x0007c43f ( 64 items) : Reserved + 0x0007c440..0x0007c443 ( 4 items) : ../Release/0_0_reloadable5.o::_ZL9curr_iter (Data, Local, .bss.DMb.4) + 0x0007c444..0x0007c447 ( 4 items) : ../Release/0_0_reloadable5.o::_ZL14num_depth_iter (Data, Local, .bss.DMb.4) + 0x0007c448..0x0007c44b ( 4 items) : ../Release/0_0_reloadable5.o::_ZL10depth_iter (Data, Local, .bss.DMb.4) + 0x0007c44c..0x0007c44f ( 4 items) : ../Release/0_0_reloadable5.o::_ZL11total_iters (Data, Local, .bss.DMb.4) + 0x0007c450..0x0007c453 ( 4 items) : ../Release/0_0_reloadable5.o::_ZL8core_row (Data, Local, .bss.DMb.4) + 0x0007c454..0x0007c457 ( 4 items) : ../Release/0_0_reloadable5.o::_ZL11ifm1_offset (Data, Local, .bss.DMb.4) + 0x0007c458..0x0007c45b ( 4 items) : ../Release/0_0_reloadable5.o::_ZL10ifmsv_size (Data, Local, .bss.DMb.4) + 0x0007c45c..0x0007c45f ( 4 items) : ../Release/0_0_reloadable5.o::_ZL11ifm2_offset (Data, Local, .bss.DMb.4) + 0x0007c460..0x0007c463 ( 4 items) : ../Release/0_0_reloadable5.o::_ZL8num_iter (Data, Local, .data.DMb.4) + 0x0007c464..0x0007c467 ( 4 items) : me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive11control_satE (Data, Global, .bss.DMb.4) + 0x0007c468..0x0007c468 ( 1 items) : me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive11control_rndE (Data, Global, .bss.DMb.1) + 0x0007c480..0x0007c49b ( 28 items) : ../Release/0_0_reloadable5.o::_ZL20g_uniformKernelFuncs (Data, Local, .rodata.DMb.64) + + Called functions : _Z13_b896_wrapperPPv + _Z13_b901_wrapperPPv + _Z13_b906_wrapperPPv + _Z13_b881_wrapperPPv + _Z13_b891_wrapperPPv + _Z13_b924_wrapperPPv + _Z13_b919_wrapperPPv + + 0x0007c4c0..0x0007c4ff ( 64 items) : ../Release/0_0_reloadable5.o::add1d_attribute_broadcasting_params (Data, Global, .bss.DMb.64) + 0x0007c500..0x0007c53f ( 64 items) : ../Release/0_0_reloadable5.o::mul1d_attribute_broadcasting_params (Data, Global, .bss.DMb.64) + 0x0007c540..0x0007c57f ( 64 items) : ../Release/0_0_reloadable5.o::add1d_params (Data, Global, .bss.DMb.64) + 0x0007c580..0x0007c5bf ( 64 items) : ../Release/0_0_reloadable5.o::mul1d_params (Data, Global, .bss.DMb.64) + 0x0007c5c0..0x0007c5ff ( 64 items) : ../Release/0_0_reloadable5.o::clip1d_params (Data, Global, .bss.DMb.64) + 0x0007c600..0x0007c7bf ( 448 items) : ../Release/0_0_reloadable5.o::conv2d_params (Data, Global, .bss.DMb.64) + 0x0007c7c0..0x0007c8bf ( 256 items) : ../Release/0_0_reloadable5.o::conv2d_dw_params (Data, Global, .bss.DMb.64) + 0x0007ccc0..0x000fffff ( 537408 items) : Reserved + +Memory map for memory 'PM': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 13150 + + 0x00000000..0x0000092f ( 2352 items) : Reserved + 0x00000930..0x00000ab5 ( 390 items) : ../Release/0_0_reloadable5.o::_Z13kernelWrapperPPvjjjj (Function, Global, .text) (stack frame size = 64) + + Referenced symbols: _ZL20g_uniformKernelFuncs + + 0x00000ac0..0x00001055 ( 1430 items) : ../Release/0_0_reloadable5.o::_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh (Function, Weak, .text) (stack frame size = 64) + 0x00001060..0x0000116d ( 270 items) : ../Release/0_0_reloadable5.o::_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: _ZN12me_primitive11control_rndE + + 0x00001170..0x00001ad9 ( 2410 items) : ../Release/0_0_reloadable5.o::_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params (Function, Weak, .text) (stack frame size = 128) + + Called functions : _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams + + Referenced symbols: _ZN12me_primitive11control_rndE + + 0x00001ae0..0x00001d17 ( 568 items) : ../Release/0_0_reloadable5.o::_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64) + + Called functions : _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh + _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params + + Referenced symbols: _ZL9curr_iter + conv2d_params + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL14num_depth_iter + _ZL8num_iter + _ZL10depth_iter + _ZL11total_iters + + 0x00001d20..0x00001d37 ( 24 items) : ../Release/0_0_reloadable5.o::_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0) + 0x00001d40..0x00001de1 ( 162 items) : ../Release/0_0_reloadable5.o::_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E + + 0x00001df0..0x00001e27 ( 56 items) : ../Release/0_0_reloadable5.o::_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0) + 0x00001e30..0x00001e6d ( 62 items) : ../Release/0_0_reloadable5.o::_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E + + 0x00001e70..0x00001fa9 ( 314 items) : ../Release/0_0_reloadable5.o::_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: _ZN12me_primitive11control_rndE + + 0x00001fb0..0x00002021 ( 114 items) : ../Release/0_0_reloadable5.o::_ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 128) + + Called functions : _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E + + 0x00002030..0x00002217 ( 488 items) : ../Release/0_0_reloadable5.o::_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64) + + Called functions : _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + add1d_attribute_broadcasting_params + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL11ifm1_offset + _ZL8num_iter + + 0x00002220..0x00002283 ( 100 items) : ../Release/0_0_reloadable5.o::_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 0) + 0x00002290..0x00002381 ( 242 items) : ../Release/0_0_reloadable5.o::_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E (Function, Weak, .text) (stack frame size = 0) + 0x00002390..0x00002577 ( 488 items) : ../Release/0_0_reloadable5.o::_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64) + + Called functions : _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv + _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + clip1d_params + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL11ifm1_offset + _ZL8num_iter + + 0x00002580..0x000025f3 ( 116 items) : ../Release/0_0_reloadable5.o::_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 0) + 0x00002600..0x00002649 ( 74 items) : ../Release/0_0_reloadable5.o::_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv + + 0x00002650..0x00002865 ( 534 items) : ../Release/0_0_reloadable5.o::_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE (Function, Local, .text) (stack frame size = 128) + + Referenced symbols: _ZN12me_primitive11control_rndE + + 0x00002870..0x00002905 ( 150 items) : ../Release/0_0_reloadable5.o::_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE + + 0x00002910..0x00002af7 ( 488 items) : ../Release/0_0_reloadable5.o::_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64) + + Called functions : _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv + _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + mul1d_attribute_broadcasting_params + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL11ifm1_offset + _ZL8num_iter + + 0x00002b00..0x00002b73 ( 116 items) : ../Release/0_0_reloadable5.o::_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv (Function, Weak, .text) (stack frame size = 0) + 0x00002b80..0x00002be1 ( 98 items) : ../Release/0_0_reloadable5.o::_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv + + 0x00002bf0..0x00002bff ( 16 items) : ../Release/0_0_reloadable5.o::_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0) + + Called functions : _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE + + 0x00002c00..0x00002c17 ( 24 items) : ../Release/0_0_reloadable5.o::_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E (Function, Weak, .text) (stack frame size = 0) + 0x00002c20..0x00002ca9 ( 138 items) : ../Release/0_0_reloadable5.o::_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E + + 0x00002cb0..0x00002dd3 ( 292 items) : ../Release/0_0_reloadable5.o::_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: _ZN12me_primitive11control_rndE + + 0x00002de0..0x00003039 ( 602 items) : ../Release/0_0_reloadable5.o::_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE (Function, Global, .text) (stack frame size = 64) + + Called functions : _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + mul1d_params + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL11ifm1_offset + _ZL11ifm2_offset + _ZL8num_iter + + 0x00003040..0x000032df ( 672 items) : ../Release/0_0_reloadable5.o::_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh (Function, Local, .text) (stack frame size = 64) + + Called functions : _ZN12me_primitive10udiv_dstepEjjRjS0_ + + Referenced symbols: conv2d_dw_params + _ZN12me_primitive11control_rndE + + 0x000032e0..0x000035b1 ( 722 items) : ../Release/0_0_reloadable5.o::_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: _ZN12me_primitive11control_rndE + + 0x000035c0..0x0000379d ( 478 items) : ../Release/0_0_reloadable5.o::_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 128) + + Called functions : _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh + _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL8num_iter + _ZL10ifmsv_size + conv2d_dw_params + + 0x000037a0..0x00003c05 ( 1126 items) : ../Release/0_0_reloadable5.o::_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE (Function, Global, .text) (stack frame size = 64) + + Called functions : _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh + _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv + _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params + _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + conv2d_params + add1d_params + mul1d_params + _ZL14num_depth_iter + _ZL11ifm2_offset + _ZL8num_iter + _ZL10depth_iter + _ZL11total_iters + + 0x00003c10..0x00003c33 ( 36 items) : ../Release/0_0_reloadable5.o::_Z13_b896_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + + 0x00003c40..0x00003c5f ( 32 items) : ../Release/0_0_reloadable5.o::_Z13_b901_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + + 0x00003c60..0x00003c7f ( 32 items) : ../Release/0_0_reloadable5.o::_Z13_b906_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + + 0x00003c80..0x00003c9f ( 32 items) : ../Release/0_0_reloadable5.o::_Z13_b881_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + + 0x00003ca0..0x00003cc3 ( 36 items) : ../Release/0_0_reloadable5.o::_Z13_b891_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE + + 0x00003cd0..0x00003cf7 ( 40 items) : ../Release/0_0_reloadable5.o::_Z13_b924_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE + + 0x00003d00..0x00003d23 ( 36 items) : ../Release/0_0_reloadable5.o::_Z13_b919_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + + 0x00003d30..0x00003dbd ( 142 items) : me_div.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive10udiv_dstepEjjRjS0_ (Function, Global, .text) (stack frame size = 0) + +External symbols: + + __dso_handle = 0x0 + _ctors_end = 0x0 + _ctors_start = 0x0 + _dtors_end = 0x0 + _dtors_start = 0x0 + _pc_end = 0x3dbe + _pc_start = 0x930 + _sp_end_DM_stack = 0x7c400 + _sp_start_DM_stack = 0x7bac0 + +Section summary for memory 'DM_stack': + + .stack File + ---------- ---------- + 2368 + ---------- ---------- + 2368 Total + +Section summary for memory 'DMb': + + .bss .data .rodata File + ---------- ---------- ---------- ---------- + 1056 4 28 ../Release/0_0_reloadable5.o + 5 0 0 me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a) + ---------- ---------- ---------- ---------- + 1061 4 28 Total + +Section summary for memory 'PM': + + .text File + ---------- ---------- + 13008 ../Release/0_0_reloadable5.o + 142 me_div.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a) + ---------- ---------- + 13150 Total + +File summary: + +../Release/0_0_reloadable5.o + DMb 1088 + PM 13008 + +me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a) + DMb 5 + +me_div.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a) + PM 142 + diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable5/Release/0_2_reloadable5.sdr b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable5/Release/0_2_reloadable5.sdr new file mode 100644 index 0000000000000000000000000000000000000000..029eac6b3129d1ccada1bf5bd7decb96296f96f7 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable5/Release/0_2_reloadable5.sdr @@ -0,0 +1,129 @@ + +// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:49:21 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// bridge -o../Release/0_0_reloadable5 ../Release/0_0_reloadable5.o -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/isg -g -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable5.bcf -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork4008 -pme + +// Release: ipp V-2024.06-TGT-241219 + +// Symbols in memory 'DM_bankA': +// Symbols in memory 'DM_bankAB': +// Symbols in memory 'DM_bankAC': +// Symbols in memory 'DM_bankAD': +// Symbols in memory 'DM_bankB': +// Symbols in memory 'DM_bankBC': +// Symbols in memory 'DM_bankBD': +// Symbols in memory 'DM_bankC': +// Symbols in memory 'DM_bankCD': +// Symbols in memory 'DM_bankD': +// Symbols in memory 'DM_stack': +// Symbols in memory 'DM_test': +// Symbols in memory 'DMb': +_symbol _ZN12me_primitive11control_satE 0x0007c464 +_symbol _ZN12me_primitive11control_rndE 0x0007c468 +_symbol add1d_attribute_broadcasting_params 0x0007c4c0 +_symbol mul1d_attribute_broadcasting_params 0x0007c500 +_symbol add1d_params 0x0007c540 +_symbol mul1d_params 0x0007c580 +_symbol clip1d_params 0x0007c5c0 +_symbol conv2d_params 0x0007c600 +_symbol conv2d_dw_params 0x0007c7c0 +// Symbols in memory 'DMh': +// Symbols in memory 'DMh_bankA': +// Symbols in memory 'DMh_bankAB': +// Symbols in memory 'DMh_bankAC': +// Symbols in memory 'DMh_bankAD': +// Symbols in memory 'DMh_bankB': +// Symbols in memory 'DMh_bankBC': +// Symbols in memory 'DMh_bankBD': +// Symbols in memory 'DMh_bankC': +// Symbols in memory 'DMh_bankCD': +// Symbols in memory 'DMh_bankD': +// Symbols in memory 'DMh_stack': +// Symbols in memory 'DMs': +// Symbols in memory 'DMs_bankA': +// Symbols in memory 'DMs_bankAB': +// Symbols in memory 'DMs_bankAC': +// Symbols in memory 'DMs_bankAD': +// Symbols in memory 'DMs_bankB': +// Symbols in memory 'DMs_bankBC': +// Symbols in memory 'DMs_bankBD': +// Symbols in memory 'DMs_bankC': +// Symbols in memory 'DMs_bankCD': +// Symbols in memory 'DMs_bankD': +// Symbols in memory 'DMs_stack': +// Symbols in memory 'DMv': +// Symbols in memory 'DMv_bankA': +// Symbols in memory 'DMv_bankAB': +// Symbols in memory 'DMv_bankAC': +// Symbols in memory 'DMv_bankAD': +// Symbols in memory 'DMv_bankB': +// Symbols in memory 'DMv_bankBC': +// Symbols in memory 'DMv_bankBD': +// Symbols in memory 'DMv_bankC': +// Symbols in memory 'DMv_bankCD': +// Symbols in memory 'DMv_bankD': +// Symbols in memory 'DMv_stack': +// Symbols in memory 'DMw': +// Symbols in memory 'DMw_bankA': +// Symbols in memory 'DMw_bankAB': +// Symbols in memory 'DMw_bankAC': +// Symbols in memory 'DMw_bankAD': +// Symbols in memory 'DMw_bankB': +// Symbols in memory 'DMw_bankBC': +// Symbols in memory 'DMw_bankBD': +// Symbols in memory 'DMw_bankC': +// Symbols in memory 'DMw_bankCD': +// Symbols in memory 'DMw_bankD': +// Symbols in memory 'DMw_stack': +// Symbols in memory 'DMx': +// Symbols in memory 'DMx_bankA': +// Symbols in memory 'DMx_bankAB': +// Symbols in memory 'DMx_bankAC': +// Symbols in memory 'DMx_bankAD': +// Symbols in memory 'DMx_bankB': +// Symbols in memory 'DMx_bankBC': +// Symbols in memory 'DMx_bankBD': +// Symbols in memory 'DMx_bankC': +// Symbols in memory 'DMx_bankCD': +// Symbols in memory 'DMx_bankD': +// Symbols in memory 'DMx_stack': +// Symbols in memory 'PM': +_symbol _Z13kernelWrapperPPvjjjj 0x00000930 +_symbol _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh 0x00000ac0 +_symbol _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams 0x00001060 +_symbol _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params 0x00001170 +_symbol _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00001ae0 +_symbol _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E 0x00001d20 +_symbol _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv 0x00001d40 +_symbol _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E 0x00001df0 +_symbol _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv 0x00001e30 +_symbol _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E 0x00001e70 +_symbol _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E 0x00001fb0 +_symbol _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00002030 +_symbol _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv 0x00002220 +_symbol _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E 0x00002290 +_symbol _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00002390 +_symbol _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv 0x00002580 +_symbol _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv 0x00002600 +_symbol _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E 0x00002870 +_symbol _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00002910 +_symbol _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv 0x00002b00 +_symbol _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv 0x00002b80 +_symbol _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E 0x00002bf0 +_symbol _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E 0x00002c00 +_symbol _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv 0x00002c20 +_symbol _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E 0x00002cb0 +_symbol _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE 0x00002de0 +_symbol _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params 0x000032e0 +_symbol _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x000035c0 +_symbol _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE 0x000037a0 +_symbol _Z13_b896_wrapperPPv 0x00003c10 +_symbol _Z13_b901_wrapperPPv 0x00003c40 +_symbol _Z13_b906_wrapperPPv 0x00003c60 +_symbol _Z13_b881_wrapperPPv 0x00003c80 +_symbol _Z13_b891_wrapperPPv 0x00003ca0 +_symbol _Z13_b924_wrapperPPv 0x00003cd0 +_symbol _Z13_b919_wrapperPPv 0x00003d00 +_symbol _ZN12me_primitive10udiv_dstepEjjRjS0_ 0x00003d30 +// Symbols in memory 'PMw': +// Symbols in memory 'TM4': diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable5/Release/0_2_reloadable5.srv b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable5/Release/0_2_reloadable5.srv new file mode 100644 index 0000000000000000000000000000000000000000..cc24263e196c609ab062129e37812e382b48d43f --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable5/Release/0_2_reloadable5.srv @@ -0,0 +1,19187 @@ + +// File generated by darts version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:49:22 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// darts -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -d -h -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable5 me + +// Release: ipp V-2024.06-TGT-241219 +.label __Z13kernelWrapperPPvjjjj___func_begin0 +.label _Z13kernelWrapperPPvjjjj +.function kernelWrapper _Z13kernelWrapperPPvjjjj +.src_ref 0 "0_0_reloadable5.cc" 94 first +.src_ref 0 "0_0_reloadable5.cc" 96 60 first +.src_ref 0 "0_0_reloadable5.cc" 96 110 +.src_ref 0 "0_0_reloadable5.cc" 98 110 +.src_ref 1 "io_buffer_compiler.h" 606 24 +.function_start + 2352 "11010100" // LDA r17, [p0]; MOV r2, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2353 "01000001" // /* MW 5 */ + 2354 "00100001" // /* MW 4 */ + 2355 "11010001" // /* MW 3 */ + 2356 "11000110" // /* MW 2 */ + 2357 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 94 + 2358 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2359 "00000001" // /* MW 5 */ + 2360 "00000000" // /* MW 4 */ + 2361 "00000000" // /* MW 3 */ + 2362 "00001000" // /* MW 2 */ + 2363 "00000000" // /* MW 1 */ + 2364 "00000010" // ST p7, [sp, #-12]; MOV r1, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2365 "01110000" // /* MW 7 */ + 2366 "11010000" // /* MW 6 */ + 2367 "00101011" // /* MW 5 */ + 2368 "00000000" // /* MW 4 */ + 2369 "10110000" // /* MW 3 */ + 2370 "11110011" // /* MW 2 */ + 2371 "11111110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 96 110 +.src_ref 0 "0_0_reloadable5.cc" 98 110 +.src_ref 1 "io_buffer_compiler.h" 606 24 + 2372 "00000010" // ST lr, [sp, #-4]; MOV r15, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2373 "01110000" // /* MW 7 */ + 2374 "10010000" // /* MW 6 */ + 2375 "11101000" // /* MW 5 */ + 2376 "00000001" // /* MW 4 */ + 2377 "10110000" // /* MW 3 */ + 2378 "10000111" // /* MW 2 */ + 2379 "11111111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 96 110 first + 2380 "01011100" // ST r1, [sp, #-8]; NEZ r16, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2381 "11100000" // /* MW 5 */ + 2382 "11000001" // /* MW 4 */ + 2383 "10110111" // /* MW 3 */ + 2384 "00000110" // /* MW 2 */ + 2385 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 + 2386 "11111000" // MOV r26, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2387 "00100000" // /* MW 3 */ + 2388 "10011000" // /* MW 2 */ + 2389 "00011110" // /* MW 1 */ + 2390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2391 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 first + 2392 "00011000" // ADD.NC p7, r17, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2393 "10000010" // /* MW 3 */ + 2394 "01101000" // /* MW 2 */ + 2395 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 + 2396 "10011000" // LDA r17, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2397 "00110110" // /* MW 3 */ + 2398 "00011110" // /* MW 2 */ + 2399 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 36 + 2400 "10011000" // LDA r19, [p7], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2401 "01110110" // /* MW 3 */ + 2402 "00111110" // /* MW 2 */ + 2403 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 46 + 2404 "10011000" // LDA r18, [p7], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2405 "01010110" // /* MW 3 */ + 2406 "11101110" // /* MW 2 */ + 2407 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 60 + 2408 "10011000" // LDA r27, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2409 "01110110" // /* MW 3 */ + 2410 "00000111" // /* MW 2 */ + 2411 "00000111" // /* MW 1 */ + 2412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2413 "00000000" // /* MW 1 */ + 2414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2415 "00000000" // /* MW 1 */ + 2416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2417 "00000000" // /* MW 1 */ + 2418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2419 "00000000" // /* MW 1 */ + 2420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2421 "00000000" // /* MW 1 */ + 2422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2423 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 195 30 first +.src_ref 1 "io_buffer_compiler.h" 195 37 first + 2424 "00011000" // SEL.EQZ r17, r17, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2425 "00110010" // /* MW 3 */ + 2426 "01100011" // /* MW 2 */ + 2427 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 194 23 first + 2428 "10011000" // ST r17, [p7, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2429 "00110001" // /* MW 3 */ + 2430 "11010110" // /* MW 2 */ + 2431 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 +.src_ref 1 "io_buffer_main.h" 410 8 + 2432 "00011000" // MOVX r17, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2433 "11111101" // /* MW 3 */ + 2434 "11100010" // /* MW 2 */ + 2435 "00010111" // /* MW 1 */ + 2436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2437 "00000000" // /* MW 1 */ + 2438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2439 "00000000" // /* MW 1 */ + 2440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2441 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 first + 2442 "00011000" // ACQ.COND r18, r17, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2443 "00011000" // /* MW 3 */ + 2444 "10010111" // /* MW 2 */ + 2445 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 98 60 +.src_ref 0 "0_0_reloadable5.cc" 102 7 + 2446 "00011000" // MOVX r18, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2447 "00001001" // /* MW 3 */ + 2448 "00100100" // /* MW 2 */ + 2449 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 98 60 first + 2450 "10011000" // LSHL r20, r16, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2451 "00101101" // /* MW 3 */ + 2452 "00101001" // /* MW 2 */ + 2453 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 98 60 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 2454 "11111000" // MOV dj0, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2455 "00100000" // /* MW 3 */ + 2456 "10001010" // /* MW 2 */ + 2457 "00011000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 98 60 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 2458 "00001100" // LDA r19, [p0, dj0]; ST dj0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2459 "10001011" // /* MW 5 */ + 2460 "11011000" // /* MW 4 */ + 2461 "11011111" // /* MW 3 */ + 2462 "01001110" // /* MW 2 */ + 2463 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2465 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2467 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2469 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2471 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 98 110 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2472 "00011000" // MOVX r19, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2473 "00000101" // /* MW 3 */ + 2474 "00100110" // /* MW 2 */ + 2475 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 98 110 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2476 "10011000" // LTU r26, r19, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2477 "11111100" // /* MW 3 */ + 2478 "11110100" // /* MW 2 */ + 2479 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 first + 2480 "00000010" // ST r26, [sp, #-16]; ADD.NC p7, r19, #4 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2481 "00000000" // /* MW 7 */ + 2482 "11000001" // /* MW 6 */ + 2483 "10110100" // /* MW 5 */ + 2484 "00000011" // /* MW 4 */ + 2485 "10110000" // /* MW 3 */ + 2486 "01101010" // /* MW 2 */ + 2487 "11111110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 + 2488 "10011000" // LDA r19, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2489 "01110110" // /* MW 3 */ + 2490 "00011110" // /* MW 2 */ + 2491 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 36 + 2492 "10011000" // LDA r21, [p7], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2493 "10110110" // /* MW 3 */ + 2494 "00111110" // /* MW 2 */ + 2495 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 46 + 2496 "10011000" // LDA r20, [p7], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2497 "10010110" // /* MW 3 */ + 2498 "11101110" // /* MW 2 */ + 2499 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 60 + 2500 "10011000" // LDA r27, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2501 "01110110" // /* MW 3 */ + 2502 "00000111" // /* MW 2 */ + 2503 "00000111" // /* MW 1 */ + 2504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2505 "00000000" // /* MW 1 */ + 2506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2507 "00000000" // /* MW 1 */ + 2508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2509 "00000000" // /* MW 1 */ + 2510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2511 "00000000" // /* MW 1 */ + 2512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2513 "00000000" // /* MW 1 */ + 2514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2515 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 195 30 first +.src_ref 1 "io_buffer_compiler.h" 195 37 first + 2516 "00011000" // SEL.EQZ r19, r19, r21, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2517 "01010010" // /* MW 3 */ + 2518 "11100111" // /* MW 2 */ + 2519 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 194 23 first + 2520 "10011000" // ST r19, [p7, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2521 "01110001" // /* MW 3 */ + 2522 "11010110" // /* MW 2 */ + 2523 "00001111" // /* MW 1 */ + 2524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2525 "00000000" // /* MW 1 */ + 2526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2527 "00000000" // /* MW 1 */ + 2528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2529 "00000000" // /* MW 1 */ + 2530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2531 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 first + 2532 "00011000" // ACQ.COND r20, r17, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2533 "00011000" // /* MW 3 */ + 2534 "00010111" // /* MW 2 */ + 2535 "00010101" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 102 7 first + 2536 "10011000" // LSHL r17, r0, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2537 "00101101" // /* MW 3 */ + 2538 "00100011" // /* MW 2 */ + 2539 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 102 7 + 2540 "11111000" // MOV dj0, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2541 "10100000" // /* MW 3 */ + 2542 "10001000" // /* MW 2 */ + 2543 "00011000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 102 7 + 2544 "01000100" // MOVXM p7, #509056 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2545 "00000000" // /* MW 5 */ + 2546 "11001001" // /* MW 4 */ + 2547 "11001110" // /* MW 3 */ + 2548 "00000111" // /* MW 2 */ + 2549 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 102 7 + 2550 "00001100" // LDA p1, [p7, dj0]; ST r16, [sp, #-24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2551 "00101011" // /* MW 5 */ + 2552 "11010100" // /* MW 4 */ + 2553 "11011111" // /* MW 3 */ + 2554 "00010011" // /* MW 2 */ + 2555 "11100000" // /* MW 1 */ + 2556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2557 "00000000" // /* MW 1 */ + 2558 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2559 "00000000" // /* MW 1 */ + 2560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2561 "00000000" // /* MW 1 */ + 2562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2563 "00000000" // /* MW 1 */ + 2564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2565 "00000000" // /* MW 1 */ + 2566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2567 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 102 4 +.no_stack_arguments + 2568 "00011000" // JL p1 /* MW 4 */ /* control_operation: words=4 call unconditional cycles_taken=1 indirect absolute delay_slots=5 */ + 2569 "01000000" // /* MW 3 */ + 2570 "00110000" // /* MW 2 */ + 2571 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 105 60 +.src_ref 0 "0_0_reloadable5.cc" 107 60 +.delay_slot + 2572 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2573 "11000000" // /* MW 3 */ + 2574 "01100000" // /* MW 2 */ + 2575 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2577 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2579 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2581 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2582 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2583 "01111110" // /* MW 9 */ + 2584 "10100101" // /* MW 8 */ + 2585 "00000001" // /* MW 7 */ + 2586 "00000000" // /* MW 6 */ + 2587 "00010000" // /* MW 5 */ + 2588 "00000000" // /* MW 4 */ + 2589 "11110000" // /* MW 3 */ + 2590 "00101100" // /* MW 2 */ + 2591 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 105 60 first +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_main.h" 440 8 +.src_ref 1 "io_buffer_main.h" 440 8 +.return_address + 2592 "00101100" // LDA r17, [p7]; MOVX r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2593 "00001010" // /* MW 5 */ + 2594 "01000000" // /* MW 4 */ + 2595 "11010000" // /* MW 3 */ + 2596 "11000110" // /* MW 2 */ + 2597 "11100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 + 2598 "00011000" // LDA r26, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2599 "01010001" // /* MW 3 */ + 2600 "11101011" // /* MW 2 */ + 2601 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 107 60 + 2602 "00011000" // LDA dj0, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2603 "01000001" // /* MW 3 */ + 2604 "11101100" // /* MW 2 */ + 2605 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_main.h" 440 8 + 2606 "00011000" // LDA el0, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2607 "00101001" // /* MW 3 */ + 2608 "11110000" // /* MW 2 */ + 2609 "00000111" // /* MW 1 */ + 2610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2611 "00000000" // /* MW 1 */ + 2612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2613 "00000000" // /* MW 1 */ + 2614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2615 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 first + 2616 "00011000" // ADD.NC p1, r17, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2617 "10001000" // /* MW 3 */ + 2618 "01101000" // /* MW 2 */ + 2619 "00011001" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 + 2620 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2621 "00110110" // /* MW 3 */ + 2622 "00000110" // /* MW 2 */ + 2623 "00000001" // /* MW 1 */ + 2624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2625 "00000000" // /* MW 1 */ + 2626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2627 "00000000" // /* MW 1 */ + 2628 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2629 "00000000" // /* MW 1 */ + 2630 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2631 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 2632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2633 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 2634 "11111000" // MOV r26, el0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2635 "00011100" // /* MW 3 */ + 2636 "10100000" // /* MW 2 */ + 2637 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2638 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2639 "00001000" // /* MW 3 */ + 2640 "01010101" // /* MW 2 */ + 2641 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_compiler.h" 606 24 first + 2642 "11010100" // LDA r17, [p1, #-4]; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2643 "01000001" // /* MW 5 */ + 2644 "10101111" // /* MW 4 */ + 2645 "11011101" // /* MW 3 */ + 2646 "11000110" // /* MW 2 */ + 2647 "00111110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 107 60 first + 2648 "10011000" // LDA r18, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2649 "01010110" // /* MW 3 */ + 2650 "00000010" // /* MW 2 */ + 2651 "00000111" // /* MW 1 */ + 2652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2653 "00000000" // /* MW 1 */ + 2654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2655 "00000000" // /* MW 1 */ + 2656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2657 "00000000" // /* MW 1 */ + 2658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2659 "00000000" // /* MW 1 */ + 2660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2661 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 first + 2662 "10011000" // SUB r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2663 "00010001" // /* MW 3 */ + 2664 "00100111" // /* MW 2 */ + 2665 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 first +.src_ref 1 "io_buffer_compiler.h" 606 24 + 2666 "00100100" // SEL.EQZ r17, r17, r19, r27; ADD.NC p0, r18, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2667 "00010000" // /* MW 5 */ + 2668 "11010010" // /* MW 4 */ + 2669 "01000000" // /* MW 3 */ + 2670 "01100110" // /* MW 2 */ + 2671 "10001100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 +.src_ref 1 "io_buffer_compiler.h" 606 22 first + 2672 "00001100" // LDA r17, [p0]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2673 "01100011" // /* MW 5 */ + 2674 "11101100" // /* MW 4 */ + 2675 "11010011" // /* MW 3 */ + 2676 "11000110" // /* MW 2 */ + 2677 "00000000" // /* MW 1 */ + 2678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2679 "00000000" // /* MW 1 */ + 2680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2681 "00000000" // /* MW 1 */ + 2682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2683 "00000000" // /* MW 1 */ + 2684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2685 "00000000" // /* MW 1 */ + 2686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2687 "00000000" // /* MW 1 */ + 2688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2689 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 first + 2690 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2691 "00001000" // /* MW 3 */ + 2692 "01010101" // /* MW 2 */ + 2693 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 110 + 2694 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2695 "00111001" // /* MW 3 */ + 2696 "11111100" // /* MW 2 */ + 2697 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 first + 2698 "10011000" // LDA r17, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2699 "00110110" // /* MW 3 */ + 2700 "11110110" // /* MW 2 */ + 2701 "00000000" // /* MW 1 */ + 2702 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2703 "10011001" // /* MW 3 */ + 2704 "11110111" // /* MW 2 */ + 2705 "00000111" // /* MW 1 */ + 2706 "00011000" // LDA r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2707 "11110001" // /* MW 3 */ + 2708 "11111001" // /* MW 2 */ + 2709 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 110 first + 2710 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2711 "00000001" // /* MW 5 */ + 2712 "00000000" // /* MW 4 */ + 2713 "00000000" // /* MW 3 */ + 2714 "11111000" // /* MW 2 */ + 2715 "11111111" // /* MW 1 */ + 2716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2717 "00000000" // /* MW 1 */ + 2718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2719 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 110 + 2720 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 2721 "00000000" // /* MW 3 */ + 2722 "00101000" // /* MW 2 */ + 2723 "00010000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.delay_slot + 2724 "11111000" // MOV r27, el0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2725 "00011100" // /* MW 3 */ + 2726 "11100000" // /* MW 2 */ + 2727 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 first +.delay_slot + 2728 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2729 "00010001" // /* MW 3 */ + 2730 "00100001" // /* MW 2 */ + 2731 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.delay_slot + 2732 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2733 "00000010" // /* MW 3 */ + 2734 "01100001" // /* MW 2 */ + 2735 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 22 +.delay_slot + 2736 "10011000" // ST r16, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2737 "00010001" // /* MW 3 */ + 2738 "11110110" // /* MW 2 */ + 2739 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13kernelWrapperPPvjjjj__end +.label __Z13kernelWrapperPPvjjjj___func_end0 + 2741 "00000000" // /* MW 1 */ +.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_begin0 +.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh +.function setup_conv2d_bf16_params _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh +.src_ref 2 "conv2d_bf16_params.h" 432 first +.src_ref 2 "conv2d_bf16_params.h" 438 17 first +.src_ref 2 "conv2d_bf16_params.h" 452 40 +.src_ref 2 "conv2d_bf16_params.h" 453 40 +.src_ref 2 "conv2d_bf16_params.h" 458 36 +.src_ref 2 "conv2d_bf16_params.h" 470 11 +.function_start + 2752 "10111010" // LDA el0, [p0], #4; MOVX r4, #4; MOV r2, p1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2753 "01111000" // /* MW 9 */ + 2754 "01100000" // /* MW 8 */ + 2755 "01001001" // /* MW 7 */ + 2756 "10001000" // /* MW 6 */ + 2757 "01000000" // /* MW 5 */ + 2758 "00000000" // /* MW 4 */ + 2759 "11010000" // /* MW 3 */ + 2760 "10000101" // /* MW 2 */ + 2761 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 +.src_ref 2 "conv2d_bf16_params.h" 438 17 first +.src_ref 2 "conv2d_bf16_params.h" 452 40 +.src_ref 2 "conv2d_bf16_params.h" 462 7 + 2762 "10111010" // LDA eh0, [p0], #4; MOVX r5, #-1; ADD.NC p2, r2, #9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2763 "01001000" // /* MW 9 */ + 2764 "10000010" // /* MW 8 */ + 2765 "00110000" // /* MW 7 */ + 2766 "11101001" // /* MW 6 */ + 2767 "01010111" // /* MW 5 */ + 2768 "00111110" // /* MW 4 */ + 2769 "11010000" // /* MW 3 */ + 2770 "10000001" // /* MW 2 */ + 2771 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 432 +.src_ref 2 "conv2d_bf16_params.h" 444 52 + 2772 "10111010" // MOVA r1, #-4; PADDXM [sp], #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2773 "01110000" // /* MW 9 */ + 2774 "00000000" // /* MW 8 */ + 2775 "00000000" // /* MW 7 */ + 2776 "00000000" // /* MW 6 */ + 2777 "00000010" // /* MW 5 */ + 2778 "00000000" // /* MW 4 */ + 2779 "00000000" // /* MW 3 */ + 2780 "10000001" // /* MW 2 */ + 2781 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 453 40 +.src_ref 2 "conv2d_bf16_params.h" 458 30 +.src_ref 2 "conv2d_bf16_params.h" 458 30 + 2782 "01110110" // MOVA r6, #12; ST r13, [sp, #-4]; MOVX r16, #1; MOV m0, #16 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 2783 "01011000" // /* MW 11 */ + 2784 "00010000" // /* MW 10 */ + 2785 "00000000" // /* MW 9 */ + 2786 "00101000" // /* MW 8 */ + 2787 "00000000" // /* MW 7 */ + 2788 "10000001" // /* MW 6 */ + 2789 "10110101" // /* MW 5 */ + 2790 "11111101" // /* MW 4 */ + 2791 "00000111" // /* MW 3 */ + 2792 "10000110" // /* MW 2 */ + 2793 "00000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 444 52 +.src_ref 2 "conv2d_bf16_params.h" 470 11 +.src_ref 2 "conv2d_bf16_params.h" 477 40 +.src_ref 2 "conv2d_bf16_params.h" 557 34 + 2794 "01110110" // MOVA r3, #3; ST r14, [sp, #-8]; MOVX r21, #-3; MOV r20, #15 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 2795 "01011000" // /* MW 11 */ + 2796 "00001111" // /* MW 10 */ + 2797 "10001000" // /* MW 9 */ + 2798 "10101010" // /* MW 8 */ + 2799 "01010111" // /* MW 7 */ + 2800 "10111111" // /* MW 6 */ + 2801 "11010101" // /* MW 5 */ + 2802 "11111001" // /* MW 4 */ + 2803 "00000111" // /* MW 3 */ + 2804 "01100011" // /* MW 2 */ + 2805 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 452 40 +.src_ref 2 "conv2d_bf16_params.h" 458 36 +.src_ref 2 "conv2d_bf16_params.h" 462 7 + 2806 "01011100" // ST r15, [sp, #-12]; MOVX r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2807 "00000010" // /* MW 5 */ + 2808 "01100000" // /* MW 4 */ + 2809 "10110000" // /* MW 3 */ + 2810 "10111110" // /* MW 2 */ + 2811 "11111110" // /* MW 1 */ + 2812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2813 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2814 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2815 "00101001" // /* MW 3 */ + 2816 "00011100" // /* MW 2 */ + 2817 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2818 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2819 "00001001" // /* MW 3 */ + 2820 "00011100" // /* MW 2 */ + 2821 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2822 "10011000" // LDA el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2823 "00101110" // /* MW 3 */ + 2824 "00011100" // /* MW 2 */ + 2825 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2826 "10011000" // LDA eh0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2827 "00001110" // /* MW 3 */ + 2828 "00011100" // /* MW 2 */ + 2829 "00000000" // /* MW 1 */ + 2830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2831 "00000000" // /* MW 1 */ + 2832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2833 "00000000" // /* MW 1 */ + 2834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2835 "00000000" // /* MW 1 */ + 2836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2837 "00000000" // /* MW 1 */ + 2838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2839 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2840 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2841 "00101001" // /* MW 3 */ + 2842 "00011100" // /* MW 2 */ + 2843 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2844 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2845 "00001001" // /* MW 3 */ + 2846 "00011100" // /* MW 2 */ + 2847 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2848 "10011000" // LDA el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2849 "00101110" // /* MW 3 */ + 2850 "00011100" // /* MW 2 */ + 2851 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2852 "10011000" // LDA eh0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2853 "00001110" // /* MW 3 */ + 2854 "00011100" // /* MW 2 */ + 2855 "00000000" // /* MW 1 */ + 2856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2857 "00000000" // /* MW 1 */ + 2858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2859 "00000000" // /* MW 1 */ + 2860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2861 "00000000" // /* MW 1 */ + 2862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2863 "00000000" // /* MW 1 */ + 2864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2865 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2866 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2867 "00101001" // /* MW 3 */ + 2868 "00011100" // /* MW 2 */ + 2869 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2870 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2871 "00001001" // /* MW 3 */ + 2872 "00011100" // /* MW 2 */ + 2873 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2874 "10011000" // LDA eh0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2875 "00001110" // /* MW 3 */ + 2876 "00000100" // /* MW 2 */ + 2877 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2878 "10011000" // LDA el0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2879 "00101110" // /* MW 3 */ + 2880 "00010100" // /* MW 2 */ + 2881 "00000000" // /* MW 1 */ + 2882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2883 "00000000" // /* MW 1 */ + 2884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2885 "00000000" // /* MW 1 */ + 2886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2887 "00000000" // /* MW 1 */ + 2888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2889 "00000000" // /* MW 1 */ + 2890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2891 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2892 "10011000" // ST eh0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2893 "00001001" // /* MW 3 */ + 2894 "00000100" // /* MW 2 */ + 2895 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2896 "10011000" // ST el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2897 "00101001" // /* MW 3 */ + 2898 "00010100" // /* MW 2 */ + 2899 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 444 40 first + 2900 "10011000" // LDA.u8 r13, [p2], #-3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2901 "10101010" // /* MW 3 */ + 2902 "11011101" // /* MW 2 */ + 2903 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 447 34 first + 2904 "10011000" // LDA.u8 r17, [p2], #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2905 "00101010" // /* MW 3 */ + 2906 "00011110" // /* MW 2 */ + 2907 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 448 34 first + 2908 "10011000" // LDA.u8 r14, [p2], #-5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2909 "11001010" // /* MW 3 */ + 2910 "10111101" // /* MW 2 */ + 2911 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 452 40 first + 2912 "10011000" // LDA.u16 r15, [p2], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2913 "11111010" // /* MW 3 */ + 2914 "11111101" // /* MW 2 */ + 2915 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 453 40 first + 2916 "10011000" // LDA.u8 r19, [p2], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2917 "01101010" // /* MW 3 */ + 2918 "00001010" // /* MW 2 */ + 2919 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 20 first + 2920 "10011000" // LDA.u8 r7, [p2], #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2921 "11101010" // /* MW 3 */ + 2922 "10101100" // /* MW 2 */ + 2923 "00000010" // /* MW 1 */ + 2924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2925 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 444 52 first + 2926 "10011000" // LSHL r1, r13, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2927 "00011101" // /* MW 3 */ + 2928 "01000010" // /* MW 2 */ + 2929 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 30 first +.src_ref 2 "conv2d_bf16_params.h" 462 7 first + 2930 "00100100" // EQ r16, r1, r16; ADD.NC r18, r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2931 "00000001" // /* MW 5 */ + 2932 "00110001" // /* MW 4 */ + 2933 "11111001" // /* MW 3 */ + 2934 "00100000" // /* MW 2 */ + 2935 "00001100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 462 7 + 2936 "10011000" // LSHL r18, r18, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2937 "01011101" // /* MW 3 */ + 2938 "10100100" // /* MW 2 */ + 2939 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 452 40 first + 2940 "10011000" // EQ r27, r15, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2941 "01000111" // /* MW 3 */ + 2942 "11110110" // /* MW 2 */ + 2943 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 452 40 +.src_ref 2 "conv2d_bf16_params.h" 462 7 first +.src_ref 2 "conv2d_bf16_params.h" 557 34 + 2944 "11100100" // SEL.EQZ r5, r24, r5, r27; MOV eh0, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2945 "00111001" // /* MW 5 */ + 2946 "10110111" // /* MW 4 */ + 2947 "01000000" // /* MW 3 */ + 2948 "01001010" // /* MW 2 */ + 2949 "11000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 462 7 + 2950 "00011000" // SEL.EQZ r29, r17, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2951 "00100010" // /* MW 3 */ + 2952 "01111011" // /* MW 2 */ + 2953 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 30 first + 2954 "10011000" // EQ r6, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2955 "01100111" // /* MW 3 */ + 2956 "11001100" // /* MW 2 */ + 2957 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 + 2958 "10011000" // AND r27, r6, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2959 "00000100" // /* MW 3 */ + 2960 "10110111" // /* MW 2 */ + 2961 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 +.src_ref 2 "conv2d_bf16_params.h" 477 40 +.src_ref 2 "conv2d_bf16_params.h" 557 34 first + 2962 "11100100" // LSHL r15, r15, r21; MOV r25, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2963 "01000001" // /* MW 5 */ + 2964 "10111011" // /* MW 4 */ + 2965 "10111100" // /* MW 3 */ + 2966 "11101011" // /* MW 2 */ + 2967 "01111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 first + 2968 "01011100" // ST r15, [sp, #-20]; SEL.EQZ r6, r7, r24, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2969 "00000100" // /* MW 5 */ + 2970 "10011011" // /* MW 4 */ + 2971 "10110011" // /* MW 3 */ + 2972 "10111110" // /* MW 2 */ + 2973 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 +.src_ref 2 "conv2d_bf16_params.h" 477 40 first + 2974 "10000100" // JNZ r25, #3056 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3056 delay_slots=5 */ + 2975 "00000001" // /* MW 5 */ + 2976 "01000000" // /* MW 4 */ + 2977 "11111000" // /* MW 3 */ + 2978 "00000101" // /* MW 2 */ + 2979 "11001000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 first +.delay_slot + 2980 "10011000" // EQ r27, r6, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2981 "01000111" // /* MW 3 */ + 2982 "10110110" // /* MW 2 */ + 2983 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 444 52 first +.delay_slot + 2984 "10011000" // AND r24, r13, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2985 "01000100" // /* MW 3 */ + 2986 "01110001" // /* MW 2 */ + 2987 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 462 7 first +.delay_slot + 2988 "10011000" // LSHL r30, r19, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2989 "01011101" // /* MW 3 */ + 2990 "11111100" // /* MW 2 */ + 2991 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 470 11 first +.delay_slot + 2992 "10011000" // LSHL r20, r27, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2993 "01001101" // /* MW 3 */ + 2994 "11101000" // /* MW 2 */ + 2995 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 470 11 +.src_ref 2 "conv2d_bf16_params.h" 477 40 first +.delay_slot + 2996 "00011000" // SEL.EQZ r6, r6, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2997 "00110010" // /* MW 3 */ + 2998 "10001100" // /* MW 2 */ + 2999 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 40 + 3000 "10000100" // JNZ r27, #3056 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3056 delay_slots=5 */ + 3001 "00000001" // /* MW 5 */ + 3002 "01000000" // /* MW 4 */ + 3003 "11111000" // /* MW 3 */ + 3004 "00000101" // /* MW 2 */ + 3005 "11011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3007 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3009 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3011 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3013 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3015 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 491 25 +.src_ref 2 "conv2d_bf16_params.h" 492 25 +.src_ref 2 "conv2d_bf16_params.h" 495 99 +.src_ref 2 "conv2d_bf16_params.h" 502 57 +.src_ref 2 "conv2d_bf16_params.h" 533 46 +.src_ref 2 "conv2d_bf16_params.h" 539 82 +.src_ref 2 "conv2d_bf16_params.h" 557 34 +.src_ref 2 "conv2d_bf16_params.h" 621 240 +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.src_ref 2 "conv2d_bf16_params.h" 709 76 + 3016 "10111010" // MOVA r15, #1; J #3104 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=3104 delay_slots=5 */ + 3017 "00100000" // /* MW 9 */ + 3018 "00000000" // /* MW 8 */ + 3019 "00000000" // /* MW 7 */ + 3020 "10000100" // /* MW 6 */ + 3021 "00000001" // /* MW 5 */ + 3022 "00000000" // /* MW 4 */ + 3023 "00000000" // /* MW 3 */ + 3024 "00101111" // /* MW 2 */ + 3025 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 481 24 +.src_ref 2 "conv2d_bf16_params.h" 500 53 +.src_ref 2 "conv2d_bf16_params.h" 506 73 +.src_ref 2 "conv2d_bf16_params.h" 507 53 +.src_ref 2 "conv2d_bf16_params.h" 524 122 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.delay_slot + 3026 "10111010" // MOVA r26, #0; MOVX r5, #-3; MOV r28, #12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3027 "01011000" // /* MW 9 */ + 3028 "00001100" // /* MW 8 */ + 3029 "10001000" // /* MW 7 */ + 3030 "10101011" // /* MW 6 */ + 3031 "01010111" // /* MW 5 */ + 3032 "00111110" // /* MW 4 */ + 3033 "00000000" // /* MW 3 */ + 3034 "00011010" // /* MW 2 */ + 3035 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 504 45 +.src_ref 2 "conv2d_bf16_params.h" 510 45 +.src_ref 2 "conv2d_bf16_params.h" 520 48 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.delay_slot + 3036 "01100100" // MOVX r21, #4; MOV r2, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3037 "01000001" // /* MW 5 */ + 3038 "00100000" // /* MW 4 */ + 3039 "00100001" // /* MW 3 */ + 3040 "01000010" // /* MW 2 */ + 3041 "00000101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 40 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 578 52 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.delay_slot + 3042 "00011000" // MOVX r13, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3043 "00001101" // /* MW 3 */ + 3044 "00011010" // /* MW 2 */ + 3045 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 78 +.src_ref 2 "conv2d_bf16_params.h" 642 20 +.src_ref 2 "conv2d_bf16_params.h" 642 87 +.delay_slot + 3046 "00011000" // MOVX r7, #15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3047 "00111101" // /* MW 3 */ + 3048 "00001110" // /* MW 2 */ + 3049 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.delay_slot + 3050 "00101100" // NOPA; MOVX r4, #-4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3051 "11100010" // /* MW 5 */ + 3052 "10010001" // /* MW 4 */ + 3053 "11111111" // /* MW 3 */ + 3054 "00101100" // /* MW 2 */ + 3055 "00000000" // /* MW 1 */ +.label __ll6__Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh +.src_ref 2 "conv2d_bf16_params.h" 453 40 +.src_ref 2 "conv2d_bf16_params.h" 453 40 +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 504 45 +.src_ref 2 "conv2d_bf16_params.h" 655 23 + 3056 "01110110" // MOVA dj0, #16; MOVS p1, r2; MOVX r21, #4; MOV r4, #-4 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3057 "01011000" // /* MW 11 */ + 3058 "11111100" // /* MW 10 */ + 3059 "10001111" // /* MW 9 */ + 3060 "10001000" // /* MW 8 */ + 3061 "01010000" // /* MW 7 */ + 3062 "00000001" // /* MW 6 */ + 3063 "00001011" // /* MW 5 */ + 3064 "10000010" // /* MW 4 */ + 3065 "10000001" // /* MW 3 */ + 3066 "00000010" // /* MW 2 */ + 3067 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 453 40 first +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 481 24 +.src_ref 2 "conv2d_bf16_params.h" 500 53 +.src_ref 2 "conv2d_bf16_params.h" 506 73 +.src_ref 2 "conv2d_bf16_params.h" 507 53 +.src_ref 2 "conv2d_bf16_params.h" 524 122 +.src_ref 2 "conv2d_bf16_params.h" 539 139 + 3068 "10111010" // ST.s8 r6, [p1, dj0]; MOVX r26, #0; MOV r28, #12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3069 "01011000" // /* MW 9 */ + 3070 "00001100" // /* MW 8 */ + 3071 "10001000" // /* MW 7 */ + 3072 "00001011" // /* MW 6 */ + 3073 "10100000" // /* MW 5 */ + 3074 "00000001" // /* MW 4 */ + 3075 "11100000" // /* MW 3 */ + 3076 "00011000" // /* MW 2 */ + 3077 "00100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 491 25 +.src_ref 2 "conv2d_bf16_params.h" 492 25 +.src_ref 2 "conv2d_bf16_params.h" 495 99 +.src_ref 2 "conv2d_bf16_params.h" 502 57 +.src_ref 2 "conv2d_bf16_params.h" 510 45 +.src_ref 2 "conv2d_bf16_params.h" 520 48 +.src_ref 2 "conv2d_bf16_params.h" 533 46 +.src_ref 2 "conv2d_bf16_params.h" 539 82 +.src_ref 2 "conv2d_bf16_params.h" 557 34 +.src_ref 2 "conv2d_bf16_params.h" 621 240 +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.src_ref 2 "conv2d_bf16_params.h" 709 76 + 3078 "10111010" // MOVA r2, #16; MOVX r5, #-3; MOV r15, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3079 "01011000" // /* MW 9 */ + 3080 "00000001" // /* MW 8 */ + 3081 "11101000" // /* MW 7 */ + 3082 "10101001" // /* MW 6 */ + 3083 "01010111" // /* MW 5 */ + 3084 "00111110" // /* MW 4 */ + 3085 "00000000" // /* MW 3 */ + 3086 "00000010" // /* MW 2 */ + 3087 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 40 +.src_ref 2 "conv2d_bf16_params.h" 529 78 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 578 52 +.src_ref 2 "conv2d_bf16_params.h" 642 20 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 642 87 + 3088 "11100001" // NOPA; NOPB; NOPS; MOVX r7, #15; MOV r13, #3; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3089 "00000000" // /* MW 15 */ + 3090 "00000000" // /* MW 14 */ + 3091 "01011000" // /* MW 13 */ + 3092 "00000011" // /* MW 12 */ + 3093 "10101000" // /* MW 11 */ + 3094 "11101001" // /* MW 10 */ + 3095 "01110001" // /* MW 9 */ + 3096 "00000000" // /* MW 8 */ + 3097 "01011011" // /* MW 7 */ + 3098 "00000001" // /* MW 6 */ + 3099 "00100000" // /* MW 5 */ + 3100 "00000000" // /* MW 4 */ + 3101 "11110000" // /* MW 3 */ + 3102 "00101100" // /* MW 2 */ + 3103 "00000000" // /* MW 1 */ +.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_352 +.src_ref 2 "conv2d_bf16_params.h" 477 40 first +.src_ref 2 "conv2d_bf16_params.h" 495 68 first +.src_ref 2 "conv2d_bf16_params.h" 495 112 +.src_ref 2 "conv2d_bf16_params.h" 682 38 + 3104 "10111010" // LDA.u8 r17, [p2], #-2; EQ r27, r13, r6; MOV m0, #60 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3105 "01011000" // /* MW 9 */ + 3106 "00111100" // /* MW 8 */ + 3107 "00000000" // /* MW 7 */ + 3108 "00111100" // /* MW 6 */ + 3109 "10110011" // /* MW 5 */ + 3110 "00011011" // /* MW 4 */ + 3111 "01010000" // /* MW 3 */ + 3112 "11000101" // /* MW 2 */ + 3113 "01011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 481 24 first +.src_ref 2 "conv2d_bf16_params.h" 495 53 +.src_ref 2 "conv2d_bf16_params.h" 495 112 + 3114 "10111010" // LDA.u8 r1, [p2], m0; SEL.EQZ r18, r1, r26, r27; MOV m5, #-51 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3115 "01011000" // /* MW 9 */ + 3116 "11001101" // /* MW 8 */ + 3117 "10000111" // /* MW 7 */ + 3118 "00010010" // /* MW 6 */ + 3119 "00101101" // /* MW 5 */ + 3120 "00000011" // /* MW 4 */ + 3121 "01010000" // /* MW 3 */ + 3122 "00000101" // /* MW 2 */ + 3123 "01000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 18 first +.src_ref 2 "conv2d_bf16_params.h" 496 68 +.src_ref 2 "conv2d_bf16_params.h" 504 35 +.src_ref 2 "conv2d_bf16_params.h" 539 14 +.src_ref 2 "conv2d_bf16_params.h" 578 47 + 3124 "10111010" // MOVA r23, #2; SEL.EQZ r29, r29, r21, r27; MOV m3, #55 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3125 "01011000" // /* MW 9 */ + 3126 "00110111" // /* MW 8 */ + 3127 "10000000" // /* MW 7 */ + 3128 "10010001" // /* MW 6 */ + 3129 "11011010" // /* MW 5 */ + 3130 "00111011" // /* MW 4 */ + 3131 "00000000" // /* MW 3 */ + 3132 "01010111" // /* MW 2 */ + 3133 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 496 53 +.src_ref 2 "conv2d_bf16_params.h" 499 51 +.src_ref 2 "conv2d_bf16_params.h" 504 45 first +.src_ref 2 "conv2d_bf16_params.h" 509 50 +.src_ref 2 "conv2d_bf16_params.h" 519 42 +.src_ref 2 "conv2d_bf16_params.h" 700 34 + 3134 "10111010" // MOVA r3, #8; EQ r27, r21, r0; MOV m2, #-68 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3135 "01011000" // /* MW 9 */ + 3136 "10111100" // /* MW 8 */ + 3137 "00000111" // /* MW 7 */ + 3138 "00111101" // /* MW 6 */ + 3139 "10110000" // /* MW 5 */ + 3140 "00101011" // /* MW 4 */ + 3141 "00000000" // /* MW 3 */ + 3142 "00000011" // /* MW 2 */ + 3143 "00000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 492 25 first +.src_ref 2 "conv2d_bf16_params.h" 497 46 +.src_ref 2 "conv2d_bf16_params.h" 509 50 + 3144 "10111010" // MOVA r16, #512; LSHL r22, r15, r24; MOV m1, #112 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3145 "01011000" // /* MW 9 */ + 3146 "01110000" // /* MW 8 */ + 3147 "10000000" // /* MW 7 */ + 3148 "01101100" // /* MW 6 */ + 3149 "01101100" // /* MW 5 */ + 3150 "00011111" // /* MW 4 */ + 3151 "00000000" // /* MW 3 */ + 3152 "00010000" // /* MW 2 */ + 3153 "01000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 500 53 +.src_ref 2 "conv2d_bf16_params.h" 520 34 first + 3154 "01100100" // EXTEND.u8 r22, r22; MOV m4, #-105 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3155 "01011101" // /* MW 5 */ + 3156 "00011110" // /* MW 4 */ + 3157 "00001000" // /* MW 3 */ + 3158 "10010010" // /* MW 2 */ + 3159 "10110101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 77 +.src_ref 2 "conv2d_bf16_params.h" 520 48 + 3160 "00111010" // ST r22, [sp, #-16]; LSHL r22, r22, r2; MOV m7, #49 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3161 "01011001" // /* MW 9 */ + 3162 "00110001" // /* MW 8 */ + 3163 "10000000" // /* MW 7 */ + 3164 "01101111" // /* MW 6 */ + 3165 "01100001" // /* MW 5 */ + 3166 "00101101" // /* MW 4 */ + 3167 "10110000" // /* MW 3 */ + 3168 "01011010" // /* MW 2 */ + 3169 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 53 +.src_ref 2 "conv2d_bf16_params.h" 507 42 first + 3170 "01100100" // SUB r30, r30, r29; MOV m6, #-63 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3171 "00000101" // /* MW 5 */ + 3172 "00011111" // /* MW 4 */ + 3173 "00111100" // /* MW 3 */ + 3174 "10111010" // /* MW 2 */ + 3175 "11110111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 495 99 first + 3176 "10011000" // SUB r1, r15, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3177 "00010001" // /* MW 3 */ + 3178 "11000010" // /* MW 2 */ + 3179 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 495 96 +.src_ref 2 "conv2d_bf16_params.h" 495 96 +.src_ref 2 "conv2d_bf16_params.h" 610 64 +.src_ref 2 "conv2d_bf16_params.h" 709 96 + 3180 "01100100" // MUL r31, r17, r1; MOV r1, #7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3181 "00011101" // /* MW 5 */ + 3182 "10100000" // /* MW 4 */ + 3183 "11110000" // /* MW 3 */ + 3184 "11000011" // /* MW 2 */ + 3185 "10001111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 507 53 first + 3186 "10011000" // SUB r17, r26, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3187 "00100001" // /* MW 3 */ + 3188 "10100011" // /* MW 2 */ + 3189 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 495 96 first + 3190 "10011000" // LSHL r31, r31, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3191 "00011101" // /* MW 3 */ + 3192 "11111110" // /* MW 2 */ + 3193 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 495 53 +.src_ref 2 "conv2d_bf16_params.h" 506 48 +.src_ref 2 "conv2d_bf16_params.h" 519 42 first + 3194 "00111010" // ST r31, [p2], m5; LSHL r31, r29, r3; MOV m5, #87 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3195 "01011001" // /* MW 9 */ + 3196 "01010111" // /* MW 8 */ + 3197 "10000000" // /* MW 7 */ + 3198 "11101110" // /* MW 6 */ + 3199 "11110001" // /* MW 5 */ + 3200 "00111011" // /* MW 4 */ + 3201 "00110000" // /* MW 3 */ + 3202 "01111110" // /* MW 2 */ + 3203 "01010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 496 68 first +.src_ref 2 "conv2d_bf16_params.h" 504 35 first +.src_ref 2 "conv2d_bf16_params.h" 522 68 + 3204 "10111010" // LDA.u8 r21, [p2], m3; EQ r19, r23, r0; MOV m3, #-78 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3205 "01011000" // /* MW 9 */ + 3206 "10110010" // /* MW 8 */ + 3207 "10000111" // /* MW 7 */ + 3208 "00111101" // /* MW 6 */ + 3209 "00110000" // /* MW 5 */ + 3210 "00101111" // /* MW 4 */ + 3211 "01010000" // /* MW 3 */ + 3212 "01010101" // /* MW 2 */ + 3213 "01001101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 509 50 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3214 "01011100" // ST r19, [sp, #-24]; LSHL r19, r19, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3215 "01111011" // /* MW 5 */ + 3216 "11001100" // /* MW 4 */ + 3217 "10111001" // /* MW 3 */ + 3218 "01001110" // /* MW 2 */ + 3219 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 496 53 first +.src_ref 2 "conv2d_bf16_params.h" 520 19 first +.src_ref 2 "conv2d_bf16_params.h" 529 62 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3220 "10111010" // ST.s8 r21, [p2], m2; OR r22, r31, r22; MOV m2, #246 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3221 "01011000" // /* MW 9 */ + 3222 "11110110" // /* MW 8 */ + 3223 "00000000" // /* MW 7 */ + 3224 "00101101" // /* MW 6 */ + 3225 "01101011" // /* MW 5 */ + 3226 "00111111" // /* MW 4 */ + 3227 "11100000" // /* MW 3 */ + 3228 "01010100" // /* MW 2 */ + 3229 "01001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 497 46 first +.src_ref 2 "conv2d_bf16_params.h" 509 50 first +.src_ref 2 "conv2d_bf16_params.h" 533 44 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3230 "10111010" // LDA.u16 r16, [p2], m1; SEL.EQZ r19, r19, r16, r27; MOV m1, #-176 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3231 "01011000" // /* MW 9 */ + 3232 "01010000" // /* MW 8 */ + 3233 "10000111" // /* MW 7 */ + 3234 "00010000" // /* MW 6 */ + 3235 "00111000" // /* MW 5 */ + 3236 "00100111" // /* MW 4 */ + 3237 "01010000" // /* MW 3 */ + 3238 "01000011" // /* MW 2 */ + 3239 "01000101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 14 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3240 "10011000" // EQ r31, r23, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3241 "01100111" // /* MW 3 */ + 3242 "11111110" // /* MW 2 */ + 3243 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 499 51 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3244 "10011000" // EQ r16, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3245 "01100111" // /* MW 3 */ + 3246 "11100000" // /* MW 2 */ + 3247 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 499 51 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3248 "10011000" // OR r27, r31, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3249 "00000101" // /* MW 3 */ + 3250 "11110111" // /* MW 2 */ + 3251 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 78 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3252 "10011000" // AND r21, r7, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3253 "01010100" // /* MW 3 */ + 3254 "11101011" // /* MW 2 */ + 3255 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 507 53 first +.src_ref 2 "conv2d_bf16_params.h" 511 47 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3256 "01100100" // ASHL r30, r30, r17; MOV r17, #24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3257 "01100001" // /* MW 5 */ + 3258 "10100000" // /* MW 4 */ + 3259 "11011000" // /* MW 3 */ + 3260 "10100011" // /* MW 2 */ + 3261 "11110111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 491 25 first +.src_ref 2 "conv2d_bf16_params.h" 507 34 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3262 "00111010" // ST r16, [sp, #-32]; LSHL r18, r15, r18; ADD.NC r30, r30, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3263 "01001001" // /* MW 9 */ + 3264 "10000000" // /* MW 8 */ + 3265 "11001111" // /* MW 7 */ + 3266 "01101111" // /* MW 6 */ + 3267 "00101001" // /* MW 5 */ + 3268 "00011111" // /* MW 4 */ + 3269 "10110000" // /* MW 3 */ + 3270 "01000010" // /* MW 2 */ + 3271 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 500 53 first +.src_ref 2 "conv2d_bf16_params.h" 511 47 first + 3272 "01011100" // ST r26, [p2], #4; LSHL r17, r30, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3273 "00111011" // /* MW 5 */ + 3274 "01000110" // /* MW 4 */ + 3275 "00111111" // /* MW 3 */ + 3276 "11101010" // /* MW 2 */ + 3277 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 500 53 +.src_ref 2 "conv2d_bf16_params.h" 534 44 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 3278 "00000010" // ST r26, [p2], m4; MOV m4, #168 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3279 "01010000" // /* MW 7 */ + 3280 "10101000" // /* MW 6 */ + 3281 "00000000" // /* MW 5 */ + 3282 "00000010" // /* MW 4 */ + 3283 "00110000" // /* MW 3 */ + 3284 "01101010" // /* MW 2 */ + 3285 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 77 first +.src_ref 2 "conv2d_bf16_params.h" 509 19 first +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 3286 "01110110" // LDA.u8 r18, [p2], m7; ST r31, [sp, #-28]; OR r27, r19, r0; MOV el0, r27 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3287 "01111000" // /* MW 11 */ + 3288 "11001110" // /* MW 10 */ + 3289 "00001101" // /* MW 9 */ + 3290 "00101100" // /* MW 8 */ + 3291 "10110000" // /* MW 7 */ + 3292 "10100111" // /* MW 6 */ + 3293 "11110101" // /* MW 5 */ + 3294 "11100111" // /* MW 4 */ + 3295 "01010111" // /* MW 3 */ + 3296 "01001001" // /* MW 2 */ + 3297 "01011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 19 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3298 "10011000" // OR r17, r27, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3299 "00010101" // /* MW 3 */ + 3300 "11100011" // /* MW 2 */ + 3301 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 506 73 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3302 "10011000" // SUB r27, r26, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3303 "10000001" // /* MW 3 */ + 3304 "10110111" // /* MW 2 */ + 3305 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 527 47 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3306 "00011000" // EXTEND.u8 r24, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3307 "10010000" // /* MW 3 */ + 3308 "10110000" // /* MW 2 */ + 3309 "00010100" // /* MW 1 */ + 3310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3311 "00000000" // /* MW 1 */ + 3312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3313 "00000000" // /* MW 1 */ + 3314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3315 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 57 first + 3316 "10011000" // SUB r18, r15, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3317 "00100001" // /* MW 3 */ + 3318 "11100101" // /* MW 2 */ + 3319 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 53 + 3320 "10011000" // ST r18, [p2], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3321 "01010001" // /* MW 3 */ + 3322 "11001010" // /* MW 2 */ + 3323 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 506 48 first + 3324 "10011000" // LDA.u8 r18, [p2], m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3325 "01001010" // /* MW 3 */ + 3326 "10101010" // /* MW 2 */ + 3327 "00000010" // /* MW 1 */ + 3328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3329 "00000000" // /* MW 1 */ + 3330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3331 "00000000" // /* MW 1 */ + 3332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3333 "00000000" // /* MW 1 */ + 3334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3335 "00000000" // /* MW 1 */ + 3336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3337 "00000000" // /* MW 1 */ + 3338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3339 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 506 62 + 3340 "10011000" // SUB r18, r18, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3341 "11100001" // /* MW 3 */ + 3342 "10100100" // /* MW 2 */ + 3343 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 506 73 + 3344 "10011000" // ASHL r18, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3345 "10111110" // /* MW 3 */ + 3346 "10100101" // /* MW 2 */ + 3347 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 first + 3348 "10011000" // LSHL r18, r18, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3349 "00101101" // /* MW 3 */ + 3350 "10100100" // /* MW 2 */ + 3351 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 + 3352 "01000100" // MOVXM r27, #65536 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3353 "00000000" // /* MW 5 */ + 3354 "10100000" // /* MW 4 */ + 3355 "00001101" // /* MW 3 */ + 3356 "00000001" // /* MW 2 */ + 3357 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 + 3358 "10011000" // ADD r18, r27, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3359 "00100000" // /* MW 3 */ + 3360 "11100101" // /* MW 2 */ + 3361 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 + 3362 "01000100" // MOVXM r27, #16711680 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3363 "00000000" // /* MW 5 */ + 3364 "10100000" // /* MW 4 */ + 3365 "00001101" // /* MW 3 */ + 3366 "11111111" // /* MW 2 */ + 3367 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 +.src_ref 2 "conv2d_bf16_params.h" 539 14 +.src_ref 2 "conv2d_bf16_params.h" 642 99 + 3368 "01100100" // AND r27, r27, r18; MOV r18, #-16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3369 "11000001" // /* MW 5 */ + 3370 "00111111" // /* MW 4 */ + 3371 "10011001" // /* MW 3 */ + 3372 "11100100" // /* MW 2 */ + 3373 "11011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 511 19 first +.src_ref 2 "conv2d_bf16_params.h" 524 122 +.src_ref 2 "conv2d_bf16_params.h" 539 14 + 3374 "01100100" // OR r27, r27, r17; MOV r17, #-8 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3375 "11100001" // /* MW 5 */ + 3376 "10111111" // /* MW 4 */ + 3377 "10111000" // /* MW 3 */ + 3378 "11100010" // /* MW 2 */ + 3379 "11011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 512 64 first +.src_ref 2 "conv2d_bf16_params.h" 524 122 first + 3380 "01011100" // ST r27, [p2], #4; LSHL r19, r19, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3381 "00111011" // /* MW 5 */ + 3382 "11001110" // /* MW 4 */ + 3383 "00111001" // /* MW 3 */ + 3384 "11101110" // /* MW 2 */ + 3385 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 524 122 + 3386 "10011000" // SUB r26, r26, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3387 "00110001" // /* MW 3 */ + 3388 "10110101" // /* MW 2 */ + 3389 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 524 122 + 3390 "10011000" // LSHL r20, r20, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3391 "10101101" // /* MW 3 */ + 3392 "00101001" // /* MW 2 */ + 3393 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 520 19 first + 3394 "10011000" // OR r26, r14, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3395 "01100101" // /* MW 3 */ + 3396 "10110101" // /* MW 2 */ + 3397 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 511 36 first +.src_ref 2 "conv2d_bf16_params.h" 522 68 first + 3398 "01011100" // ST r26, [p2], m3; EXTEND.u8 r26, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3399 "00100000" // /* MW 5 */ + 3400 "01101001" // /* MW 4 */ + 3401 "00111111" // /* MW 3 */ + 3402 "01101010" // /* MW 2 */ + 3403 "01001101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 524 65 first +.src_ref 2 "conv2d_bf16_params.h" 529 62 first +.src_ref 2 "conv2d_bf16_params.h" 539 14 first + 3404 "10111010" // LDA.u8 r25, [p2], m2; LSHL r20, r27, r18; ADD.NC r30, r26, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3405 "10101000" // /* MW 9 */ + 3406 "10101000" // /* MW 8 */ + 3407 "11001110" // /* MW 7 */ + 3408 "01101111" // /* MW 6 */ + 3409 "01001001" // /* MW 5 */ + 3410 "00110111" // /* MW 4 */ + 3411 "01010000" // /* MW 3 */ + 3412 "01100101" // /* MW 2 */ + 3413 "01001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 534 93 +.src_ref 2 "conv2d_bf16_params.h" 539 14 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 3414 "01100100" // LSHL r22, r22, r17; MOV r17, #254 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3415 "11111001" // /* MW 5 */ + 3416 "10100011" // /* MW 4 */ + 3417 "10111000" // /* MW 3 */ + 3418 "10100011" // /* MW 2 */ + 3419 "10110101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 527 45 first +.src_ref 2 "conv2d_bf16_params.h" 533 44 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 3420 "00101100" // ST.s8 r25, [p2], m1; MUL r26, r26, r24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3421 "00011111" // /* MW 5 */ + 3422 "01101011" // /* MW 4 */ + 3423 "11101101" // /* MW 3 */ + 3424 "01100100" // /* MW 2 */ + 3425 "01000101" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3427 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3428 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3429 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3431 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3433 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 48 first +.src_ref 2 "conv2d_bf16_params.h" 533 46 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3434 "10100100" // LSHL r25, r16, r15; ADD.NC r27, r21, r25 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3435 "11001010" // /* MW 5 */ + 3436 "10110101" // /* MW 4 */ + 3437 "10111101" // /* MW 3 */ + 3438 "01011111" // /* MW 2 */ + 3439 "10000110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 14 first + 3440 "10000100" // JNZ r31, #3568 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3568 delay_slots=5 */ + 3441 "00000001" // /* MW 5 */ + 3442 "01000000" // /* MW 4 */ + 3443 "11111000" // /* MW 3 */ + 3444 "00000110" // /* MW 2 */ + 3445 "11111000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 76 first +.src_ref 2 "conv2d_bf16_params.h" 529 122 +.delay_slot + 3446 "10100100" // ADD r21, r19, #3; ADD.NC r27, r27, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3447 "11110010" // /* MW 5 */ + 3448 "10111011" // /* MW 4 */ + 3449 "11101101" // /* MW 3 */ + 3450 "01000001" // /* MW 2 */ + 3451 "10011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 122 +.delay_slot + 3452 "10011000" // LSHL r21, r27, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3453 "01011101" // /* MW 3 */ + 3454 "11101011" // /* MW 2 */ + 3455 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 534 93 first +.delay_slot + 3456 "10011000" // AND r17, r25, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3457 "00010100" // /* MW 3 */ + 3458 "01100011" // /* MW 2 */ + 3459 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 534 44 +.src_ref 2 "conv2d_bf16_params.h" 539 139 first +.src_ref 2 "conv2d_bf16_params.h" 555 59 +.src_ref 2 "conv2d_bf16_params.h" 559 59 +.src_ref 2 "conv2d_bf16_params.h" 700 17 +.delay_slot + 3460 "00111010" // ST r17, [p2], m4; EQ r27, r6, r28; MOV r17, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3461 "01011001" // /* MW 9 */ + 3462 "00000001" // /* MW 8 */ + 3463 "00101000" // /* MW 7 */ + 3464 "00111110" // /* MW 6 */ + 3465 "10111110" // /* MW 5 */ + 3466 "00001101" // /* MW 4 */ + 3467 "00110000" // /* MW 3 */ + 3468 "01000110" // /* MW 2 */ + 3469 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 669 63 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.delay_slot + 3470 "11111000" // MOV el1, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3471 "10011100" // /* MW 3 */ + 3472 "10011011" // /* MW 2 */ + 3473 "00011000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 14 + 3474 "00011000" // LDA r28, [sp, #-32] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3475 "10010001" // /* MW 3 */ + 3476 "11100011" // /* MW 2 */ + 3477 "00000111" // /* MW 1 */ + 3478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3479 "00000000" // /* MW 1 */ + 3480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3481 "00000000" // /* MW 1 */ + 3482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3483 "00000000" // /* MW 1 */ + 3484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3485 "00000000" // /* MW 1 */ + 3486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3487 "00000000" // /* MW 1 */ + 3488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3489 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 14 + 3490 "10000100" // JNZ r28, #3568 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3568 delay_slots=5 */ + 3491 "00000001" // /* MW 5 */ + 3492 "01000000" // /* MW 4 */ + 3493 "11111000" // /* MW 3 */ + 3494 "00000110" // /* MW 2 */ + 3495 "11100000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3497 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3499 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3501 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3503 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3505 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 115 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 539 162 + 3506 "10111010" // MOVA r28, #5; MOVX r17, #4; MOV r25, #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3507 "01011000" // /* MW 9 */ + 3508 "01000000" // /* MW 8 */ + 3509 "00101000" // /* MW 7 */ + 3510 "10001011" // /* MW 6 */ + 3511 "00010000" // /* MW 5 */ + 3512 "00000001" // /* MW 4 */ + 3513 "00000000" // /* MW 3 */ + 3514 "10111100" // /* MW 2 */ + 3515 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 139 + 3516 "00011000" // SEL.EQZ r31, r17, r13, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3517 "11010010" // /* MW 3 */ + 3518 "01111110" // /* MW 2 */ + 3519 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 162 + 3520 "10011000" // EQ r27, r25, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3521 "01100111" // /* MW 3 */ + 3522 "01110110" // /* MW 2 */ + 3523 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 135 +.src_ref 2 "conv2d_bf16_params.h" 539 139 + 3524 "01100100" // SEL.EQZ r28, r31, r28, r27; MOV r31, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3525 "00000001" // /* MW 5 */ + 3526 "10100000" // /* MW 4 */ + 3527 "01001111" // /* MW 3 */ + 3528 "00111000" // /* MW 2 */ + 3529 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 46 + 3530 "00011000" // EXTEND.s8 r25, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3531 "01010000" // /* MW 3 */ + 3532 "00110010" // /* MW 2 */ + 3533 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 44 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 3534 "10011000" // MUL r30, r25, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3535 "11101111" // /* MW 3 */ + 3536 "01111101" // /* MW 2 */ + 3537 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 115 +.src_ref 2 "conv2d_bf16_params.h" 669 63 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 3538 "11100100" // LT r27, r25, r17; MOV r27, el1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3539 "00111001" // /* MW 5 */ + 3540 "11000100" // /* MW 4 */ + 3541 "01011101" // /* MW 3 */ + 3542 "11100011" // /* MW 2 */ + 3543 "11001110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 82 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3544 "00011000" // SEL.EQZ r17, r15, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3545 "10000010" // /* MW 3 */ + 3546 "11100011" // /* MW 2 */ + 3547 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 79 + 3548 "10011000" // MUL r17, r17, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3549 "11101111" // /* MW 3 */ + 3550 "01100011" // /* MW 2 */ + 3551 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 135 + 3552 "10011000" // SUB r28, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3553 "11000001" // /* MW 3 */ + 3554 "11111001" // /* MW 2 */ + 3555 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 135 + 3556 "10011000" // ASHL r17, r17, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3557 "11001110" // /* MW 3 */ + 3558 "01100011" // /* MW 2 */ + 3559 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 555 55 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 3560 "00100010" // EXTEND.u8 r17, r17; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3561 "00011100" // /* MW 7 */ + 3562 "00000000" // /* MW 6 */ + 3563 "00000000" // /* MW 5 */ + 3564 "10000001" // /* MW 4 */ + 3565 "00010100" // /* MW 3 */ + 3566 "00100011" // /* MW 2 */ + 3567 "00000000" // /* MW 1 */ +.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_816 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 669 63 +.src_ref 2 "conv2d_bf16_params.h" 669 63 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 3568 "10111010" // MOVA r25, #0; MOVX r28, #-1; MOV r27, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3569 "01111000" // /* MW 9 */ + 3570 "00001110" // /* MW 8 */ + 3571 "01110000" // /* MW 7 */ + 3572 "11101011" // /* MW 6 */ + 3573 "11000111" // /* MW 5 */ + 3574 "00111111" // /* MW 4 */ + 3575 "00000000" // /* MW 3 */ + 3576 "00011001" // /* MW 2 */ + 3577 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 669 63 first +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3578 "00011000" // SEL.EQZ r31, r25, r28, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3579 "11000010" // /* MW 3 */ + 3580 "01111111" // /* MW 2 */ + 3581 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 497 34 first +.src_ref 2 "conv2d_bf16_params.h" 641 32 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 3582 "10111010" // LDA r27, [sp, #-24]; EXTEND.u8 r16, r16; ADD.NC r26, r29, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3583 "10101000" // /* MW 9 */ + 3584 "01110100" // /* MW 8 */ + 3585 "01001111" // /* MW 7 */ + 3586 "10000011" // /* MW 6 */ + 3587 "00000100" // /* MW 5 */ + 3588 "00100001" // /* MW 4 */ + 3589 "00100000" // /* MW 3 */ + 3590 "01101110" // /* MW 2 */ + 3591 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 559 61 first +.src_ref 2 "conv2d_bf16_params.h" 640 16 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3592 "10111010" // MOVA r30, #72; EXTEND.u8 r20, r20; MOV r29, #9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3593 "01011000" // /* MW 9 */ + 3594 "00001001" // /* MW 8 */ + 3595 "10101000" // /* MW 7 */ + 3596 "10000011" // /* MW 6 */ + 3597 "01000100" // /* MW 5 */ + 3598 "00101001" // /* MW 4 */ + 3599 "00000000" // /* MW 3 */ + 3600 "00011110" // /* MW 2 */ + 3601 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 25 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3602 "00011000" // SEL.EQZ r25, r29, r30, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3603 "11100010" // /* MW 3 */ + 3604 "01110011" // /* MW 2 */ + 3605 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 578 47 first + 3606 "10011000" // NE r28, r23, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3607 "10001000" // /* MW 3 */ + 3608 "11111001" // /* MW 2 */ + 3609 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 640 16 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 3610 "10011000" // LSHL r29, r29, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3611 "00111101" // /* MW 3 */ + 3612 "01111011" // /* MW 2 */ + 3613 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 557 34 +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.aggressive_scheduled_block_id 6 +.noswbrkpt + 3614 "10111010" // LDA r23, [sp, #-20]; MOVXM r24, #1032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3615 "00010000" // /* MW 9 */ + 3616 "00000100" // /* MW 8 */ + 3617 "00001010" // /* MW 7 */ + 3618 "00000011" // /* MW 6 */ + 3619 "00000000" // /* MW 5 */ + 3620 "00000000" // /* MW 4 */ + 3621 "00100000" // /* MW 3 */ + 3622 "11011110" // /* MW 2 */ + 3623 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 641 44 first +.src_ref 2 "conv2d_bf16_params.h" 642 45 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 3624 "00100100" // LSHL r19, r25, r19; ADD.NC r30, r26, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3625 "11111111" // /* MW 5 */ + 3626 "00111010" // /* MW 4 */ + 3627 "10111111" // /* MW 3 */ + 3628 "11100111" // /* MW 2 */ + 3629 "11001100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 20 +.src_ref 2 "conv2d_bf16_params.h" 642 87 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 3630 "00011000" // MAC r7, r7, r19, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3631 "11100110" // /* MW 3 */ + 3632 "11001111" // /* MW 2 */ + 3633 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 55 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 3634 "01100100" // EXTEND.u8 r19, r22; MOV r23, #522 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3635 "00101001" // /* MW 5 */ + 3636 "10101000" // /* MW 4 */ + 3637 "00001011" // /* MW 3 */ + 3638 "11010010" // /* MW 2 */ + 3639 "10110100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 540 38 +.src_ref 2 "conv2d_bf16_params.h" 645 41 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3640 "01100100" // SEL.EQZ r22, r23, r24, r27; MOV r26, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3641 "00000001" // /* MW 5 */ + 3642 "00100001" // /* MW 4 */ + 3643 "01001101" // /* MW 3 */ + 3644 "10110000" // /* MW 2 */ + 3645 "10111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 540 38 first +.src_ref 2 "conv2d_bf16_params.h" 557 34 + 3646 "11100100" // NE r6, r6, r26; MOV r27, eh0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3647 "00111001" // /* MW 5 */ + 3648 "11000010" // /* MW 4 */ + 3649 "00011101" // /* MW 3 */ + 3650 "10110101" // /* MW 2 */ + 3651 "00110001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 99 first + 3652 "10011000" // AND r7, r7, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3653 "00100100" // /* MW 3 */ + 3654 "11001111" // /* MW 2 */ + 3655 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 557 34 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 + 3656 "11100100" // SEL.EQZ r23, r23, r15, r27; MOV r27, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3657 "01000001" // /* MW 5 */ + 3658 "10100110" // /* MW 4 */ + 3659 "01001101" // /* MW 3 */ + 3660 "11011110" // /* MW 2 */ + 3661 "10111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 119 +.src_ref 2 "conv2d_bf16_params.h" 655 23 first + 3662 "01100100" // SEL.EQZ r4, r5, r4, r27; MOV r18, #31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3663 "01111101" // /* MW 5 */ + 3664 "00100000" // /* MW 4 */ + 3665 "01001001" // /* MW 3 */ + 3666 "00001000" // /* MW 2 */ + 3667 "00101001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 119 first + 3668 "10011000" // AND r23, r23, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3669 "00100100" // /* MW 3 */ + 3670 "11101111" // /* MW 2 */ + 3671 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 540 15 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 + 3672 "10111010" // MOVA r30, #-288; LSHL r4, r16, r4; MOV r18, #-144 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3673 "01011000" // /* MW 9 */ + 3674 "01110000" // /* MW 8 */ + 3675 "01001111" // /* MW 7 */ + 3676 "01101110" // /* MW 6 */ + 3677 "01000010" // /* MW 5 */ + 3678 "00100000" // /* MW 4 */ + 3679 "00000000" // /* MW 3 */ + 3680 "00011110" // /* MW 2 */ + 3681 "11011100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first + 3682 "00011000" // SEL.EQZ r30, r30, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3683 "00100010" // /* MW 3 */ + 3684 "10111101" // /* MW 2 */ + 3685 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 85 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 + 3686 "10111010" // MOVA r5, #144; MUL r26, r23, r19; MOV r16, #288 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3687 "01011000" // /* MW 9 */ + 3688 "00100000" // /* MW 8 */ + 3689 "00001001" // /* MW 7 */ + 3690 "11111110" // /* MW 6 */ + 3691 "10101001" // /* MW 5 */ + 3692 "00101111" // /* MW 4 */ + 3693 "00000000" // /* MW 3 */ + 3694 "00000101" // /* MW 2 */ + 3695 "00010010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first + 3696 "00011000" // SEL.EQZ r16, r16, r5, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3697 "01010010" // /* MW 3 */ + 3698 "00100000" // /* MW 2 */ + 3699 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 126 21 first +.src_ref 2 "conv2d_bf16_params.h" 559 59 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 3700 "10100100" // MUL r24, r17, r4; ADD.NC r27, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3701 "11110010" // /* MW 5 */ + 3702 "10111101" // /* MW 4 */ + 3703 "11111101" // /* MW 3 */ + 3704 "00001001" // /* MW 2 */ + 3705 "10001110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 669 41 first +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.aggressive_scheduled_block_id 7 +.noswbrkpt + 3706 "11100100" // LSHL r16, r16, r31; MOV r27, el1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3707 "00111001" // /* MW 5 */ + 3708 "11000100" // /* MW 4 */ + 3709 "10111101" // /* MW 3 */ + 3710 "00111111" // /* MW 2 */ + 3711 "10000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 117 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3712 "01011100" // ST r27, [sp, #-36]; MUL r26, r14, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3713 "01011111" // /* MW 5 */ + 3714 "01101011" // /* MW 4 */ + 3715 "10110111" // /* MW 3 */ + 3716 "11101110" // /* MW 2 */ + 3717 "11111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 700 34 first + 3718 "00011000" // SEL.EQZ r2, r2, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3719 "00110010" // /* MW 3 */ + 3720 "10000100" // /* MW 2 */ + 3721 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 578 52 first + 3722 "10011000" // LTU r31, r13, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3723 "00001100" // /* MW 3 */ + 3724 "01111110" // /* MW 2 */ + 3725 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 559 92 first + 3726 "10011000" // MUL r24, r20, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3727 "10001111" // /* MW 3 */ + 3728 "00110001" // /* MW 2 */ + 3729 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 578 36 first + 3730 "10011000" // OR r27, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3731 "11000101" // /* MW 3 */ + 3732 "11110111" // /* MW 2 */ + 3733 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 610 64 first +.src_ref 2 "conv2d_bf16_params.h" 611 47 +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 629 82 + 3734 "01110110" // MOVA r3, #128; ST r20, [sp, #-20]; LSHL r28, r27, r1; MOV r20, #256 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3735 "01011000" // /* MW 11 */ + 3736 "00000000" // /* MW 10 */ + 3737 "10001001" // /* MW 9 */ + 3738 "11101110" // /* MW 8 */ + 3739 "11000000" // /* MW 7 */ + 3740 "10110111" // /* MW 6 */ + 3741 "10010101" // /* MW 5 */ + 3742 "11101110" // /* MW 4 */ + 3743 "00000111" // /* MW 3 */ + 3744 "00000011" // /* MW 2 */ + 3745 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 621 156 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.src_ref 2 "conv2d_bf16_params.h" 649 41 + 3746 "11100100" // SEL.EQZ r20, r3, r20, r27; MOV eh0, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3747 "00111001" // /* MW 5 */ + 3748 "10110111" // /* MW 4 */ + 3749 "01000000" // /* MW 3 */ + 3750 "00101000" // /* MW 2 */ + 3751 "00011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 645 41 + 3752 "01000100" // MOVXM r31, #1542 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3753 "00001100" // /* MW 5 */ + 3754 "10101100" // /* MW 4 */ + 3755 "00001111" // /* MW 3 */ + 3756 "00000000" // /* MW 2 */ + 3757 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 554 60 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 first + 3758 "00111010" // ST r4, [sp, #-24]; EQ r27, r15, r0; ADD.NC r4, r4, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3759 "11001001" // /* MW 9 */ + 3760 "00111111" // /* MW 8 */ + 3761 "10001001" // /* MW 7 */ + 3762 "00111100" // /* MW 6 */ + 3763 "10110000" // /* MW 5 */ + 3764 "00011111" // /* MW 4 */ + 3765 "10110000" // /* MW 3 */ + 3766 "00010010" // /* MW 2 */ + 3767 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 554 53 +.src_ref 2 "conv2d_bf16_params.h" 555 53 +.src_ref 2 "conv2d_bf16_params.h" 555 59 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 + 3768 "01110110" // MOVA m3, #-148; ST r4, [p2], #4; SEL.EQZ r31, r22, r31, r27; ADD.NC r22, r17, #-1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3769 "11001000" // /* MW 11 */ + 3770 "01111111" // /* MW 10 */ + 3771 "11001100" // /* MW 9 */ + 3772 "10010010" // /* MW 8 */ + 3773 "11111111" // /* MW 7 */ + 3774 "10101101" // /* MW 6 */ + 3775 "10010001" // /* MW 5 */ + 3776 "00011100" // /* MW 4 */ + 3777 "10000010" // /* MW 3 */ + 3778 "10001100" // /* MW 2 */ + 3779 "11101101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 555 53 +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 621 240 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 + 3780 "00111010" // ST r22, [p2], m3; LSHL r21, r21, r15; MOV r27, eh0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3781 "01111001" // /* MW 9 */ + 3782 "10001110" // /* MW 8 */ + 3783 "01110000" // /* MW 7 */ + 3784 "11101111" // /* MW 6 */ + 3785 "01010111" // /* MW 5 */ + 3786 "00101011" // /* MW 4 */ + 3787 "00110000" // /* MW 3 */ + 3788 "01011010" // /* MW 2 */ + 3789 "01001101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 53 first +.src_ref 2 "conv2d_bf16_params.h" 559 53 +.src_ref 2 "conv2d_bf16_params.h" 621 140 +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 645 41 + 3790 "01110110" // MOVA r25, #22; ST r26, [p2], #4; SUB r20, r20, r28; MOV m4, #88 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3791 "01011000" // /* MW 11 */ + 3792 "01011000" // /* MW 10 */ + 3793 "00000000" // /* MW 9 */ + 3794 "00001110" // /* MW 8 */ + 3795 "01001110" // /* MW 7 */ + 3796 "10101001" // /* MW 6 */ + 3797 "01010001" // /* MW 5 */ + 3798 "00011111" // /* MW 4 */ + 3799 "00000010" // /* MW 3 */ + 3800 "11011001" // /* MW 2 */ + 3801 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 559 53 first +.src_ref 2 "conv2d_bf16_params.h" 621 156 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 first +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 3802 "01011100" // ST r24, [p2], m4; SEL.EQZ r24, r31, r25, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3803 "00100100" // /* MW 5 */ + 3804 "11100011" // /* MW 4 */ + 3805 "00111111" // /* MW 3 */ + 3806 "01100010" // /* MW 2 */ + 3807 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 610 47 first +.src_ref 2 "conv2d_bf16_params.h" 621 222 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 +.aggressive_scheduled_block_id 8 +.noswbrkpt + 3808 "01110110" // LDA r27, [sp, #-32]; ST r28, [p2], #-8; SUB r28, r21, r28; MOV r27, r6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3809 "01111000" // /* MW 11 */ + 3810 "10010000" // /* MW 10 */ + 3811 "01101001" // /* MW 9 */ + 3812 "00001111" // /* MW 8 */ + 3813 "11001110" // /* MW 7 */ + 3814 "10101011" // /* MW 6 */ + 3815 "10010001" // /* MW 5 */ + 3816 "11101111" // /* MW 4 */ + 3817 "00100010" // /* MW 3 */ + 3818 "01101110" // /* MW 2 */ + 3819 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 649 41 +.src_ref 2 "conv2d_bf16_params.h" 655 23 first +.src_ref 2 "conv2d_bf16_params.h" 661 61 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3820 "10111010" // MOVA r19, #279; SEL.EQZ r28, r20, r28, r27; ADD.NC r20, r19, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3821 "11001000" // /* MW 9 */ + 3822 "11111111" // /* MW 8 */ + 3823 "10001100" // /* MW 7 */ + 3824 "00010010" // /* MW 6 */ + 3825 "11001110" // /* MW 5 */ + 3826 "00101001" // /* MW 4 */ + 3827 "00000000" // /* MW 3 */ + 3828 "11110011" // /* MW 2 */ + 3829 "00100010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 127 19 first +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 649 41 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 +.src_ref 2 "conv2d_bf16_params.h" 710 60 +.src_ref 2 "conv2d_bf16_params.h" 710 65 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3830 "10111010" // MOVA r29, #-72; MSC r30, r30, r29, r20; MOV r27, eh0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3831 "01111000" // /* MW 9 */ + 3832 "10001110" // /* MW 8 */ + 3833 "01110000" // /* MW 7 */ + 3834 "01110011" // /* MW 6 */ + 3835 "11101010" // /* MW 5 */ + 3836 "00111011" // /* MW 4 */ + 3837 "00000000" // /* MW 3 */ + 3838 "00011101" // /* MW 2 */ + 3839 "11110111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first +.src_ref 2 "conv2d_bf16_params.h" 679 23 first +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3840 "00101100" // LDA r27, [sp, #-28]; SEL.EQZ r18, r29, r18, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3841 "01000100" // /* MW 5 */ + 3842 "11001010" // /* MW 4 */ + 3843 "00101110" // /* MW 3 */ + 3844 "11101110" // /* MW 2 */ + 3845 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 621 156 first +.src_ref 2 "conv2d_bf16_params.h" 649 41 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3846 "10111010" // MOVA r31, #32; SEL.EQZ r19, r31, r19, r27; MOV r27, r6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3847 "01111000" // /* MW 9 */ + 3848 "10010000" // /* MW 8 */ + 3849 "01101001" // /* MW 7 */ + 3850 "10010011" // /* MW 6 */ + 3851 "00111001" // /* MW 5 */ + 3852 "00111111" // /* MW 4 */ + 3853 "00000000" // /* MW 3 */ + 3854 "00011111" // /* MW 2 */ + 3855 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first +.src_ref 2 "conv2d_bf16_params.h" 700 34 first +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3856 "00011000" // SEL.EQZ r2, r31, r2, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3857 "00100010" // /* MW 3 */ + 3858 "11000100" // /* MW 2 */ + 3859 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 629 82 first +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3860 "10011000" // SUB r21, r3, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3861 "01010001" // /* MW 3 */ + 3862 "11101011" // /* MW 2 */ + 3863 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 611 47 first +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3864 "00111010" // ST r3, [p2], #12; SEL.EQZ r2, r2, r15, r27; MOV r3, #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3865 "01011001" // /* MW 9 */ + 3866 "11000000" // /* MW 8 */ + 3867 "01101111" // /* MW 7 */ + 3868 "10010000" // /* MW 6 */ + 3869 "00100111" // /* MW 5 */ + 3870 "00000100" // /* MW 4 */ + 3871 "00110000" // /* MW 3 */ + 3872 "10001110" // /* MW 2 */ + 3873 "01000111" // /* MW 1 */ +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3874 "00011000" // SEL.EQZ r28, r28, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3875 "00110010" // /* MW 3 */ + 3876 "00111000" // /* MW 2 */ + 3877 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 643 22 first +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id first + 3878 "10011000" // MUL r31, r23, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3879 "01111111" // /* MW 3 */ + 3880 "11111110" // /* MW 2 */ + 3881 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.aggressive_scheduled_block_id 9 +.noswbrkpt + 3882 "00101100" // LDA r17, [sp, #-36]; SEL.EQZ r3, r28, r3, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3883 "01100100" // /* MW 5 */ + 3884 "00001100" // /* MW 4 */ + 3885 "00101110" // /* MW 3 */ + 3886 "11000110" // /* MW 2 */ + 3887 "11111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 621 47 first +.src_ref 2 "conv2d_bf16_params.h" 629 45 +.src_ref 2 "conv2d_bf16_params.h" 684 30 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3888 "00111010" // ST r3, [p2], #-8; MUL r18, r26, r18; MOV m1, #40 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3889 "01011001" // /* MW 9 */ + 3890 "00101000" // /* MW 8 */ + 3891 "10000000" // /* MW 7 */ + 3892 "01111100" // /* MW 6 */ + 3893 "00101001" // /* MW 5 */ + 3894 "00110101" // /* MW 4 */ + 3895 "00110000" // /* MW 3 */ + 3896 "10001110" // /* MW 2 */ + 3897 "01011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 629 45 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3898 "01011100" // ST r21, [p2], m1; SEL.EQZ r3, r2, r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3899 "11100100" // /* MW 5 */ + 3900 "00001101" // /* MW 4 */ + 3901 "00110001" // /* MW 3 */ + 3902 "01010110" // /* MW 2 */ + 3903 "01000101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 644 22 +.src_ref 2 "conv2d_bf16_params.h" 700 17 first +.src_ref 2 "conv2d_bf16_params.h" 705 50 +.src_ref 2 "conv2d_bf16_params.h" 705 61 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3904 "10111010" // LDA r0, [sp, #-16]; MUL r3, r3, r17; ADD.NC r21, r7, r30 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3905 "10101000" // /* MW 9 */ + 3906 "11111100" // /* MW 8 */ + 3907 "10101001" // /* MW 7 */ + 3908 "11111110" // /* MW 6 */ + 3909 "00111000" // /* MW 5 */ + 3910 "00000110" // /* MW 4 */ + 3911 "00100000" // /* MW 3 */ + 3912 "00000010" // /* MW 2 */ + 3913 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 645 38 first +.src_ref 2 "conv2d_bf16_params.h" 700 111 +.src_ref 2 "conv2d_bf16_params.h" 700 149 +.src_ref 2 "conv2d_bf16_params.h" 705 45 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3914 "01111010" // LDA r17, [sp, #-20]; ST r24, [p2], #4; MAC r3, r3, r20, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3915 "00000110" // /* MW 9 */ + 3916 "00000110" // /* MW 8 */ + 3917 "00000101" // /* MW 7 */ + 3918 "10000000" // /* MW 6 */ + 3919 "00010001" // /* MW 5 */ + 3920 "00011111" // /* MW 4 */ + 3921 "00100010" // /* MW 3 */ + 3922 "11000110" // /* MW 2 */ + 3923 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 644 14 +.src_ref 2 "conv2d_bf16_params.h" 649 38 first +.src_ref 2 "conv2d_bf16_params.h" 674 24 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3924 "00111010" // ST r19, [p2], #28; MOVXM r19, #65520 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3925 "00010001" // /* MW 9 */ + 3926 "11111000" // /* MW 8 */ + 3927 "01101111" // /* MW 7 */ + 3928 "00111110" // /* MW 6 */ + 3929 "00000000" // /* MW 5 */ + 3930 "00000000" // /* MW 4 */ + 3931 "00110000" // /* MW 3 */ + 3932 "11001110" // /* MW 2 */ + 3933 "01001111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 644 14 first +.src_ref 2 "conv2d_bf16_params.h" 662 61 +.src_ref 2 "conv2d_bf16_params.h" 664 38 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3934 "00111010" // ST r20, [p2], #4; AND r20, r31, r19; ADD.NC r2, r14, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3935 "11001001" // /* MW 9 */ + 3936 "10111111" // /* MW 8 */ + 3937 "01001011" // /* MW 7 */ + 3938 "10100100" // /* MW 6 */ + 3939 "01001001" // /* MW 5 */ + 3940 "00111111" // /* MW 4 */ + 3941 "00110000" // /* MW 3 */ + 3942 "11010010" // /* MW 2 */ + 3943 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 19 first +.src_ref 2 "conv2d_bf16_params.h" 663 22 first +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3944 "01011100" // ST r17, [p2], #4; MSC r21, r21, r2, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3945 "10011100" // /* MW 5 */ + 3946 "01010110" // /* MW 4 */ + 3947 "00110001" // /* MW 3 */ + 3948 "11000110" // /* MW 2 */ + 3949 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 126 21 first +.src_ref 2 "conv2d_bf16_params.h" 664 38 first + 3950 "01011100" // ST r2, [p2], #4; ADD r30, r30, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3951 "10000001" // /* MW 5 */ + 3952 "01111010" // /* MW 4 */ + 3953 "00111111" // /* MW 3 */ + 3954 "10001010" // /* MW 2 */ + 3955 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 126 21 +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id first + 3956 "01011100" // ST r30, [p2], #4; SUB r28, r16, r31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3957 "11100011" // /* MW 5 */ + 3958 "01110011" // /* MW 4 */ + 3959 "00111000" // /* MW 3 */ + 3960 "11111010" // /* MW 2 */ + 3961 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 127 19 first +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.src_ref 2 "conv2d_bf16_params.h" 675 38 +.src_ref 2 "conv2d_bf16_params.h" 696 37 +.aggressive_scheduled_block_id 10 +.noswbrkpt + 3962 "00111010" // ST r21, [p2], #4; MAC r31, r31, r22, r16; MOV dc0, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3963 "01011001" // /* MW 9 */ + 3964 "00000000" // /* MW 8 */ + 3965 "01100000" // /* MW 7 */ + 3966 "00110000" // /* MW 6 */ + 3967 "11111000" // /* MW 5 */ + 3968 "00101101" // /* MW 4 */ + 3969 "00110000" // /* MW 3 */ + 3970 "11010110" // /* MW 2 */ + 3971 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 644 22 first +.src_ref 2 "conv2d_bf16_params.h" 664 38 first +.src_ref 2 "conv2d_bf16_params.h" 705 45 +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3972 "00111010" // ST dc0, [p2], #4; MUL r2, r31, r0; ADD.NC r17, r17, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3973 "11001001" // /* MW 9 */ + 3974 "01111111" // /* MW 8 */ + 3975 "00101100" // /* MW 7 */ + 3976 "01111110" // /* MW 6 */ + 3977 "00100000" // /* MW 5 */ + 3978 "00111110" // /* MW 4 */ + 3979 "00110000" // /* MW 3 */ + 3980 "10001100" // /* MW 2 */ + 3981 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.src_ref 2 "conv2d_bf16_params.h" 705 50 first +.src_ref 2 "conv2d_bf16_params.h" 705 61 first + 3982 "01011100" // ST dc0, [p2], #4; MAC r14, r14, r17, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3983 "00001100" // /* MW 5 */ + 3984 "10111000" // /* MW 4 */ + 3985 "00111000" // /* MW 3 */ + 3986 "10001100" // /* MW 2 */ + 3987 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 +.src_ref 2 "conv2d_bf16_params.h" 674 24 first +.src_ref 2 "conv2d_bf16_params.h" 675 38 first +.src_ref 2 "conv2d_bf16_params.h" 682 38 +.src_ref 2 "conv2d_bf16_params.h" 718 48 +.src_ref 2 "conv2d_bf16_params.h" 720 50 + 3988 "00111010" // ST r22, [p2], #4; AND r16, r19, r2; MOV r2, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3989 "01011001" // /* MW 9 */ + 3990 "00000000" // /* MW 8 */ + 3991 "01001000" // /* MW 7 */ + 3992 "00100100" // /* MW 6 */ + 3993 "00000001" // /* MW 5 */ + 3994 "00100111" // /* MW 4 */ + 3995 "00110000" // /* MW 3 */ + 3996 "11011010" // /* MW 2 */ + 3997 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 675 38 + 3998 "00111010" // ST r28, [p2], #4; SUB r17, r2, r31; MOV r27, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3999 "01111001" // /* MW 9 */ + 4000 "00001110" // /* MW 8 */ + 4001 "01110000" // /* MW 7 */ + 4002 "10001111" // /* MW 6 */ + 4003 "00011111" // /* MW 5 */ + 4004 "00000101" // /* MW 4 */ + 4005 "00110000" // /* MW 3 */ + 4006 "11110010" // /* MW 2 */ + 4007 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 675 38 first +.src_ref 2 "conv2d_bf16_params.h" 707 61 first + 4008 "01011100" // ST r4, [p2], #4; MUL r14, r23, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4009 "11011111" // /* MW 5 */ + 4010 "10111001" // /* MW 4 */ + 4011 "00111011" // /* MW 3 */ + 4012 "10010010" // /* MW 2 */ + 4013 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 674 22 first +.src_ref 2 "conv2d_bf16_params.h" 675 38 + 4014 "00111010" // ST r17, [p2], #4; SUB r16, r16, r31; MOV r0, #6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4015 "01011001" // /* MW 9 */ + 4016 "00000110" // /* MW 8 */ + 4017 "00001000" // /* MW 7 */ + 4018 "10001100" // /* MW 6 */ + 4019 "00001111" // /* MW 5 */ + 4020 "00100001" // /* MW 4 */ + 4021 "00110000" // /* MW 3 */ + 4022 "11000110" // /* MW 2 */ + 4023 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 25 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 675 38 first +.src_ref 2 "conv2d_bf16_params.h" 679 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id first + 4024 "01110110" // MOVA r0, #72; ST r16, [p2], #4; SEL.EQZ r16, r13, r0, r27; MOV r27, r6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4025 "01111000" // /* MW 11 */ + 4026 "10010000" // /* MW 10 */ + 4027 "01101001" // /* MW 9 */ + 4028 "00010011" // /* MW 8 */ + 4029 "00000000" // /* MW 7 */ + 4030 "10011011" // /* MW 6 */ + 4031 "00010001" // /* MW 5 */ + 4032 "00011110" // /* MW 4 */ + 4033 "00000010" // /* MW 3 */ + 4034 "00000000" // /* MW 2 */ + 4035 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first +.src_ref 2 "conv2d_bf16_params.h" 679 23 first +.src_ref 2 "conv2d_bf16_params.h" 713 12 +.aggressive_scheduled_block_id 11 +.noswbrkpt + 4036 "00101100" // LDA r5, [sp, #-24]; SEL.EQZ r5, r0, r5, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4037 "10100100" // /* MW 5 */ + 4038 "00010100" // /* MW 4 */ + 4039 "00100000" // /* MW 3 */ + 4040 "00010110" // /* MW 2 */ + 4041 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 691 56 first +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4042 "10011000" // MUL r17, r5, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4043 "10101111" // /* MW 3 */ + 4044 "01100011" // /* MW 2 */ + 4045 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 675 38 +.src_ref 2 "conv2d_bf16_params.h" 675 38 first +.src_ref 2 "conv2d_bf16_params.h" 709 71 first + 4046 "00111010" // ST dc0, [p2], #4; LSHL r16, r3, r16; MOV m2, #-56 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4047 "01011001" // /* MW 9 */ + 4048 "11001000" // /* MW 8 */ + 4049 "00000111" // /* MW 7 */ + 4050 "01101101" // /* MW 6 */ + 4051 "00001000" // /* MW 5 */ + 4052 "00000111" // /* MW 4 */ + 4053 "00110000" // /* MW 3 */ + 4054 "10001100" // /* MW 2 */ + 4055 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 675 38 +.src_ref 2 "conv2d_bf16_params.h" 706 23 first +.src_ref 2 "conv2d_bf16_params.h" 706 28 +.src_ref 2 "conv2d_bf16_params.h" 709 76 + 4056 "01110110" // MOVA r3, #-29; ST dc0, [p2], m2; LSHL r15, r16, r15; ADD.NC r13, r3, #7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4057 "11001000" // /* MW 11 */ + 4058 "11000001" // /* MW 10 */ + 4059 "10101000" // /* MW 9 */ + 4060 "11101101" // /* MW 8 */ + 4061 "11110111" // /* MW 7 */ + 4062 "10100000" // /* MW 6 */ + 4063 "01100001" // /* MW 5 */ + 4064 "01001000" // /* MW 4 */ + 4065 "00000010" // /* MW 3 */ + 4066 "01100011" // /* MW 2 */ + 4067 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 682 38 first +.src_ref 2 "conv2d_bf16_params.h" 706 28 + 4068 "01011100" // ST r2, [p2], m0; LSHL r16, r13, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4069 "01111011" // /* MW 5 */ + 4070 "11000000" // /* MW 4 */ + 4071 "00110110" // /* MW 3 */ + 4072 "00001010" // /* MW 2 */ + 4073 "01000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 126 21 first +.src_ref 2 "conv2d_bf16_params.h" 696 37 first + 4074 "01011100" // ST r22, [p2], #4; ADD r3, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4075 "01000001" // /* MW 5 */ + 4076 "10001110" // /* MW 4 */ + 4077 "00111000" // /* MW 3 */ + 4078 "11011010" // /* MW 2 */ + 4079 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 127 19 first +.src_ref 2 "conv2d_bf16_params.h" 696 37 + 4080 "01011100" // ST r18, [p2], #4; MSC r18, r18, r17, r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4081 "10011100" // /* MW 5 */ + 4082 "11001000" // /* MW 4 */ + 4083 "00111000" // /* MW 3 */ + 4084 "11001010" // /* MW 2 */ + 4085 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 first +.src_ref 2 "conv2d_bf16_params.h" 713 12 first + 4086 "01011100" // ST r4, [p2], #4; LSHL r5, r5, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4087 "11011011" // /* MW 5 */ + 4088 "10010100" // /* MW 4 */ + 4089 "00110010" // /* MW 3 */ + 4090 "10010010" // /* MW 2 */ + 4091 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 +.src_ref 2 "conv2d_bf16_params.h" 706 28 +.src_ref 2 "conv2d_bf16_params.h" 706 28 first + 4092 "00111010" // ST r3, [p2], #4; ADD r3, r13, r16; MOV r0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4093 "01011001" // /* MW 9 */ + 4094 "11111101" // /* MW 8 */ + 4095 "00001111" // /* MW 7 */ + 4096 "00000100" // /* MW 6 */ + 4097 "00111000" // /* MW 5 */ + 4098 "00011010" // /* MW 4 */ + 4099 "00110000" // /* MW 3 */ + 4100 "10001110" // /* MW 2 */ + 4101 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 706 28 + 4102 "10011000" // ASHL r0, r3, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4103 "00001110" // /* MW 3 */ + 4104 "11000000" // /* MW 2 */ + 4105 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 first +.src_ref 2 "conv2d_bf16_params.h" 707 66 first + 4106 "01011100" // ST r18, [p2], #4; MUL r4, r14, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4107 "00011111" // /* MW 5 */ + 4108 "00010000" // /* MW 4 */ + 4109 "00110111" // /* MW 3 */ + 4110 "11001010" // /* MW 2 */ + 4111 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 +.src_ref 2 "conv2d_bf16_params.h" 709 96 first + 4112 "01011100" // ST dc0, [p2], #4; LSHL r3, r0, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4113 "00111011" // /* MW 5 */ + 4114 "00001100" // /* MW 4 */ + 4115 "00110000" // /* MW 3 */ + 4116 "10001100" // /* MW 2 */ + 4117 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 first +.src_ref 2 "conv2d_bf16_params.h" 709 90 + 4118 "11111010" // LDA r13, [sp, #-4]; ST dc0, [p2], #4; SUB r3, r15, r3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4119 "00110001" // /* MW 9 */ + 4120 "11000110" // /* MW 8 */ + 4121 "00000011" // /* MW 7 */ + 4122 "10000000" // /* MW 6 */ + 4123 "01100001" // /* MW 5 */ + 4124 "00011100" // /* MW 4 */ + 4125 "00100010" // /* MW 3 */ + 4126 "10110110" // /* MW 2 */ + 4127 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 707 50 first +.src_ref 2 "conv2d_bf16_params.h" 708 59 +.src_ref 2 "conv2d_bf16_params.h" 710 60 first +.src_ref 2 "conv2d_bf16_params.h" 710 65 first + 4128 "01110110" // LDA r14, [sp, #-8]; ST r4, [p2], #4; MAC r7, r7, r29, r0; ADD.NC r1, r0, #-1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4129 "11001000" // /* MW 11 */ + 4130 "00111111" // /* MW 10 */ + 4131 "00101000" // /* MW 9 */ + 4132 "00110000" // /* MW 8 */ + 4133 "01110000" // /* MW 7 */ + 4134 "10111010" // /* MW 6 */ + 4135 "10010001" // /* MW 5 */ + 4136 "00011100" // /* MW 4 */ + 4137 "00100010" // /* MW 3 */ + 4138 "00111010" // /* MW 2 */ + 4139 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 708 48 first +.src_ref 2 "conv2d_bf16_params.h" 713 12 first + 4140 "11111010" // LDA r15, [sp, #-12]; ST r1, [p2], #4; MUL r0, r5, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4141 "10101111" // /* MW 9 */ + 4142 "01000001" // /* MW 8 */ + 4143 "00000001" // /* MW 7 */ + 4144 "10000000" // /* MW 6 */ + 4145 "00110001" // /* MW 5 */ + 4146 "00011100" // /* MW 4 */ + 4147 "00100010" // /* MW 3 */ + 4148 "10111110" // /* MW 2 */ + 4149 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 709 50 first +.src_ref 2 "conv2d_bf16_params.h" 800 first + 4150 "01011100" // ST r3, [p2], #4; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 4151 "00000000" // /* MW 5 */ + 4152 "01010000" // /* MW 4 */ + 4153 "00110000" // /* MW 3 */ + 4154 "10001110" // /* MW 2 */ + 4155 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 710 50 first +.delay_slot + 4156 "10011000" // ST r7, [p2], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4157 "11110001" // /* MW 3 */ + 4158 "01011100" // /* MW 2 */ + 4159 "00001010" // /* MW 1 */ +.delay_slot + 4160 "10011000" // ST r0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4161 "00010001" // /* MW 3 */ + 4162 "00011100" // /* MW 2 */ + 4163 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 718 48 first +.delay_slot + 4164 "10011000" // ST r2, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4165 "01010001" // /* MW 3 */ + 4166 "00011100" // /* MW 2 */ + 4167 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 718 48 +.delay_slot + 4168 "10011000" // ST r2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4169 "01010001" // /* MW 3 */ + 4170 "00000100" // /* MW 2 */ + 4171 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 720 50 first +.src_ref 2 "conv2d_bf16_params.h" 800 first +.delay_slot + 4172 "00111010" // ST r2, [p2, #4]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4173 "01110001" // /* MW 9 */ + 4174 "00000000" // /* MW 8 */ + 4175 "00000000" // /* MW 7 */ + 4176 "00000000" // /* MW 6 */ + 4177 "11111110" // /* MW 5 */ + 4178 "00111111" // /* MW 4 */ + 4179 "00110000" // /* MW 3 */ + 4180 "10001010" // /* MW 2 */ +.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh__end +.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_end0 + 4181 "01000010" // /* MW 1 */ +.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_begin0 +.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams +.function convert_bf16_to_bfp16 _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams +.src_ref 3 "utils.h" 526 11 +.src_ref 3 "utils.h" 531 4 +.src_ref 2 "conv2d_bf16.h" 689 first +.src_ref 2 "conv2d_bf16.h" 698 28 +.src_ref 2 "conv2d_bf16.h" 704 12 +.src_ref 2 "conv2d_bf16.h" 707 12 +.src_ref 2 "conv2d_bf16.h" 707 30 +.function_start + 4192 "01110110" // MOVA dc0, #0; MOVS p2, p1; MOVX r24, #0; MOV r0, p2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4193 "01111000" // /* MW 11 */ + 4194 "01100000" // /* MW 10 */ + 4195 "00001010" // /* MW 9 */ + 4196 "00001000" // /* MW 8 */ + 4197 "10000000" // /* MW 7 */ + 4198 "00000001" // /* MW 6 */ + 4199 "10001011" // /* MW 5 */ + 4200 "10000100" // /* MW 4 */ + 4201 "10000010" // /* MW 3 */ + 4202 "00000011" // /* MW 2 */ + 4203 "00000000" // /* MW 1 */ +.src_ref 3 "utils.h" 526 11 +.src_ref 3 "utils.h" 526 11 +.src_ref 2 "conv2d_bf16.h" 698 28 first +.src_ref 2 "conv2d_bf16.h" 704 12 first +.src_ref 2 "conv2d_bf16.h" 707 12 +.src_ref 2 "conv2d_bf16.h" 707 30 + 4204 "01111110" // MOVA dj1, #0; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc1, dc0; MOVX r26, #0; ADD.NC p3, r0, #4 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 4205 "01100000" // /* MW 13 */ + 4206 "00001001" // /* MW 12 */ + 4207 "00100000" // /* MW 11 */ + 4208 "00100001" // /* MW 10 */ + 4209 "00000000" // /* MW 9 */ + 4210 "00110110" // /* MW 8 */ + 4211 "00000001" // /* MW 7 */ + 4212 "00110100" // /* MW 6 */ + 4213 "00101000" // /* MW 5 */ + 4214 "00101000" // /* MW 4 */ + 4215 "10001000" // /* MW 3 */ + 4216 "00000110" // /* MW 2 */ + 4217 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 698 28 +.src_ref 2 "conv2d_bf16.h" 702 37 + 4218 "10111010" // LDA dn1, [p3], #4; MOVXM p4, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4219 "00010000" // /* MW 9 */ + 4220 "00110100" // /* MW 8 */ + 4221 "00110010" // /* MW 7 */ + 4222 "11110010" // /* MW 6 */ + 4223 "00000001" // /* MW 5 */ + 4224 "00000000" // /* MW 4 */ + 4225 "11010000" // /* MW 3 */ + 4226 "10010100" // /* MW 2 */ + 4227 "01100011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 698 43 +.src_ref 2 "conv2d_bf16.h" 702 4 first + 4228 "10111010" // LDA m1, [p3], #4; MOVXM ls, #4336 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4229 "00010000" // /* MW 9 */ + 4230 "01111000" // /* MW 8 */ + 4231 "01111000" // /* MW 7 */ + 4232 "00000100" // /* MW 6 */ + 4233 "00000000" // /* MW 5 */ + 4234 "00000000" // /* MW 4 */ + 4235 "11010000" // /* MW 3 */ + 4236 "10010000" // /* MW 2 */ + 4237 "01100011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 699 43 first +.src_ref 2 "conv2d_bf16.h" 702 4 + 4238 "10111010" // LDA m0, [p3]; MOVXM le, #4384 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4239 "00010000" // /* MW 9 */ + 4240 "10010000" // /* MW 8 */ + 4241 "10111000" // /* MW 7 */ + 4242 "00000101" // /* MW 6 */ + 4243 "00000000" // /* MW 5 */ + 4244 "00000000" // /* MW 4 */ + 4245 "11010000" // /* MW 3 */ + 4246 "10000000" // /* MW 2 */ + 4247 "01100000" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 +.src_ref 2 "conv2d_bf16.h" 702 37 first + 4248 "01010100" // LDA r0, [p3, #-12]; MOV dj0, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4249 "00000001" // /* MW 5 */ + 4250 "00000000" // /* MW 4 */ + 4251 "11010001" // /* MW 3 */ + 4252 "10000010" // /* MW 2 */ + 4253 "01111010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 702 37 + 4254 "10011000" // LDA.s8 r1, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4255 "00100010" // /* MW 3 */ + 4256 "00000100" // /* MW 2 */ + 4257 "00000100" // /* MW 1 */ + 4258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4259 "00000000" // /* MW 1 */ + 4260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4261 "00000000" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 +.src_ref 2 "conv2d_bf16.h" 705 66 first + 4262 "11110100" // VLDB.POP.512 x1, [p0, lf0, r24]; MOV dn0, dn1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4263 "00000001" // /* MW 5 */ + 4264 "10000101" // /* MW 4 */ + 4265 "10000000" // /* MW 3 */ + 4266 "00001010" // /* MW 2 */ + 4267 "00000000" // /* MW 1 */ +.src_ref 3 "utils.h" 526 11 first + 4268 "00011000" // VLDB.POP.512.2D x0, [p0, lf0, r24, d1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4269 "00010100" // /* MW 3 */ + 4270 "00110000" // /* MW 2 */ + 4271 "00111110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 704 12 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4272 "00011000" // VLDB.FILL.512 [p0, lf0, r24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4273 "00010100" // /* MW 3 */ + 4274 "00010100" // /* MW 2 */ + 4275 "00111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 702 4 first +.src_ref 2 "conv2d_bf16.h" 705 66 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4276 "00110100" // VLDB.POP.512 x1, [p0, lf0, r24]; ADD.NC lc, r0, #-3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4277 "11111101" // /* MW 5 */ + 4278 "11100000" // /* MW 4 */ + 4279 "10001010" // /* MW 3 */ + 4280 "00001010" // /* MW 2 */ + 4281 "00000000" // /* MW 1 */ +.src_ref 3 "utils.h" 526 11 first +.src_ref 2 "conv2d_bf16.h" 707 12 +.src_ref 2 "conv2d_bf16.h" 707 30 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4282 "00011100" // VLDB.POP.512.2D x0, [p0, lf0, r24, d1]; MOVX crRnd, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4283 "00000000" // /* MW 5 */ + 4284 "11110101" // /* MW 4 */ + 4285 "10000000" // /* MW 3 */ + 4286 "00000010" // /* MW 2 */ + 4287 "11000110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 704 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4288 "00011000" // VLDB.FILL.512 [p0, lf0, r24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4289 "00010100" // /* MW 3 */ + 4290 "00010100" // /* MW 2 */ + 4291 "00111100" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4293 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 705 66 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4294 "10111010" // NOPA; VLDB.POP.512 x1, [p0, lf0, r24]; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4295 "01111110" // /* MW 9 */ + 4296 "10100101" // /* MW 8 */ + 4297 "00000001" // /* MW 7 */ + 4298 "00000000" // /* MW 6 */ + 4299 "01010100" // /* MW 5 */ + 4300 "00000000" // /* MW 4 */ + 4301 "11110000" // /* MW 3 */ + 4302 "00101100" // /* MW 2 */ + 4303 "00000000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 first +.src_ref 3 "utils.h" 526 11 first +.src_ref 2 "conv2d_bf16.h" 705 30 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4304 "11100001" // NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];NOPS; NOPX; VCONV.fp32.bf16 cml0, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4305 "00000000" // /* MW 15 */ + 4306 "00000000" // /* MW 14 */ + 4307 "01111000" // /* MW 13 */ + 4308 "11000101" // /* MW 12 */ + 4309 "00000001" // /* MW 11 */ + 4310 "00000000" // /* MW 10 */ + 4311 "00000000" // /* MW 9 */ + 4312 "00000000" // /* MW 8 */ + 4313 "01011011" // /* MW 7 */ + 4314 "00000001" // /* MW 6 */ + 4315 "00101000" // /* MW 5 */ + 4316 "01100000" // /* MW 4 */ + 4317 "11111100" // /* MW 3 */ + 4318 "00101100" // /* MW 2 */ + 4319 "00000000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 +.src_ref 2 "conv2d_bf16.h" 706 18 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4320 "11100001" // NOPA; NOPB; NOPS; NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4321 "00000000" // /* MW 15 */ + 4322 "00000000" // /* MW 14 */ + 4323 "01111000" // /* MW 13 */ + 4324 "11000101" // /* MW 12 */ + 4325 "01000000" // /* MW 11 */ + 4326 "00000000" // /* MW 10 */ + 4327 "00000000" // /* MW 9 */ + 4328 "00000000" // /* MW 8 */ + 4329 "01011011" // /* MW 7 */ + 4330 "00000001" // /* MW 6 */ + 4331 "00100000" // /* MW 5 */ + 4332 "00000000" // /* MW 4 */ + 4333 "11110000" // /* MW 3 */ + 4334 "00101100" // /* MW 2 */ + 4335 "00000000" // /* MW 1 */ +.label ZLS_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_144 +.src_ref 2 "conv2d_bf16.h" 704 12 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 4336 "11100001" // NOPA; VLDB.FILL.512 [p0, lf0, r24]; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4337 "00000000" // /* MW 15 */ + 4338 "00000000" // /* MW 14 */ + 4339 "01111000" // /* MW 13 */ + 4340 "10100101" // /* MW 12 */ + 4341 "00000001" // /* MW 11 */ + 4342 "00000000" // /* MW 10 */ + 4343 "00000000" // /* MW 9 */ + 4344 "00000000" // /* MW 8 */ + 4345 "01011011" // /* MW 7 */ + 4346 "00000001" // /* MW 6 */ + 4347 "00101000" // /* MW 5 */ + 4348 "00101000" // /* MW 4 */ + 4349 "11111000" // /* MW 3 */ + 4350 "00101100" // /* MW 2 */ + 4351 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 705 66 first +.src_ref 2 "conv2d_bf16.h" 707 12 first +.src_ref 2 "conv2d_bf16.h" 707 30 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4352 "11100001" // NOPA; VLDB.POP.512 x1, [p0, lf0, r24];VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4353 "00000000" // /* MW 15 */ + 4354 "00000000" // /* MW 14 */ + 4355 "01111000" // /* MW 13 */ + 4356 "10100101" // /* MW 12 */ + 4357 "00000001" // /* MW 11 */ + 4358 "00000000" // /* MW 10 */ + 4359 "00000000" // /* MW 9 */ + 4360 "00000000" // /* MW 8 */ + 4361 "00000011" // /* MW 7 */ + 4362 "10000000" // /* MW 6 */ + 4363 "10101101" // /* MW 5 */ + 4364 "00000000" // /* MW 4 */ + 4365 "11110000" // /* MW 3 */ + 4366 "00101100" // /* MW 2 */ + 4367 "00000000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 first +.src_ref 3 "utils.h" 526 11 first +.src_ref 2 "conv2d_bf16.h" 705 30 +.src_ref 2 "conv2d_bf16.h" 708 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4368 "11100001" // NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];VST.FLUSH.512.CONV [p2, sf, r26];NOPX; VCONV.fp32.bf16 cml0, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4369 "00000000" // /* MW 15 */ + 4370 "00000000" // /* MW 14 */ + 4371 "01111000" // /* MW 13 */ + 4372 "11000101" // /* MW 12 */ + 4373 "00000001" // /* MW 11 */ + 4374 "00000000" // /* MW 10 */ + 4375 "00000000" // /* MW 9 */ + 4376 "00000000" // /* MW 8 */ + 4377 "00000011" // /* MW 7 */ + 4378 "00000000" // /* MW 6 */ + 4379 "00101001" // /* MW 5 */ + 4380 "01100000" // /* MW 4 */ + 4381 "11111100" // /* MW 3 */ + 4382 "00101100" // /* MW 2 */ + 4383 "00000000" // /* MW 1 */ +.label ZLE_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_192 +.src_ref 3 "kernel_helpers.h" 350 11 +.src_ref 3 "utils.h" 531 4 first +.src_ref 2 "conv2d_bf16.h" 706 18 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4384 "11100001" // NOPA; NOPB; VST.FLUSH.512.CONV.2D [p2, sf, r26, d0];NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4385 "00000000" // /* MW 15 */ + 4386 "00000000" // /* MW 14 */ + 4387 "01111000" // /* MW 13 */ + 4388 "11000101" // /* MW 12 */ + 4389 "01000000" // /* MW 11 */ + 4390 "00000000" // /* MW 10 */ + 4391 "00000000" // /* MW 9 */ + 4392 "00000000" // /* MW 8 */ + 4393 "00000011" // /* MW 7 */ + 4394 "00000000" // /* MW 6 */ + 4395 "00100011" // /* MW 5 */ + 4396 "00000000" // /* MW 4 */ + 4397 "11110000" // /* MW 3 */ + 4398 "00101100" // /* MW 2 */ + 4399 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 4400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4401 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 707 12 first +.src_ref 2 "conv2d_bf16.h" 707 30 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4402 "00011000" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4403 "00000011" // /* MW 3 */ + 4404 "10000000" // /* MW 2 */ + 4405 "00001101" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 first +.src_ref 2 "conv2d_bf16.h" 705 30 first +.src_ref 2 "conv2d_bf16.h" 708 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4406 "00000010" // VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4407 "01110000" // /* MW 7 */ + 4408 "11000101" // /* MW 6 */ + 4409 "00000001" // /* MW 5 */ + 4410 "00000000" // /* MW 4 */ + 4411 "01100000" // /* MW 3 */ + 4412 "00000000" // /* MW 2 */ + 4413 "00100000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 +.src_ref 2 "conv2d_bf16.h" 706 18 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4414 "11111000" // VCONV.fp32.bf16 cmh0, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4415 "10001010" // /* MW 3 */ + 4416 "10000001" // /* MW 2 */ + 4417 "00011000" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 first + 4418 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4419 "00000011" // /* MW 3 */ + 4420 "00000000" // /* MW 2 */ + 4421 "00001011" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 first +.src_ref 2 "conv2d_bf16.h" 705 30 first +.src_ref 2 "conv2d_bf16.h" 707 12 first +.src_ref 2 "conv2d_bf16.h" 707 30 first + 4422 "00000010" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4423 "01110000" // /* MW 7 */ + 4424 "11000101" // /* MW 6 */ + 4425 "00000001" // /* MW 5 */ + 4426 "00000000" // /* MW 4 */ + 4427 "01100000" // /* MW 3 */ + 4428 "00000000" // /* MW 2 */ + 4429 "10110000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 +.src_ref 2 "conv2d_bf16.h" 706 18 first +.src_ref 2 "conv2d_bf16.h" 708 12 first + 4430 "00000010" // VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cmh0, x0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4431 "01110000" // /* MW 7 */ + 4432 "11000101" // /* MW 6 */ + 4433 "01000000" // /* MW 5 */ + 4434 "00000000" // /* MW 4 */ + 4435 "01100000" // /* MW 3 */ + 4436 "00000000" // /* MW 2 */ + 4437 "00100000" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 first + 4438 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4439 "00000011" // /* MW 3 */ + 4440 "00000000" // /* MW 2 */ + 4441 "00001011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 707 12 first +.src_ref 2 "conv2d_bf16.h" 707 30 first +.src_ref 2 "conv2d_bf16.h" 723 first + 4442 "01011100" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 4443 "00000000" // /* MW 5 */ + 4444 "01010000" // /* MW 4 */ + 4445 "01100000" // /* MW 3 */ + 4446 "00000000" // /* MW 2 */ + 4447 "10110000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 708 12 first +.delay_slot + 4448 "00011000" // VST.FLUSH.512.CONV [p2, sf, r26] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4449 "00000011" // /* MW 3 */ + 4450 "00000000" // /* MW 2 */ + 4451 "00001001" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 first +.delay_slot + 4452 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4453 "00000011" // /* MW 3 */ + 4454 "00000000" // /* MW 2 */ + 4455 "00001011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4457 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4459 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams__end +.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_end0 + 4461 "00000000" // /* MW 1 */ +.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_begin0 +.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params +.function conv2d_bf16<(unsigned char)'\x01', (act_t)0, bfloat16, bfloat16, bfloat16, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::async, adf::addressing::linear, adf::margin<0U> >, false, false, true, false> _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 2 "conv2d_bf16.h" 1836 +.src_ref 2 "conv2d_bf16.h" 1836 first +.src_ref 2 "conv2d_bf16.h" 1836 first +.src_ref 2 "conv2d_bf16_params.h" 240 68 +.function_start + 4464 "01111110" // MOVA m0, #-81; PADDB [p3], #64; MOVS p4, p2; PADDXM [sp], #128 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 4465 "01100000" // /* MW 13 */ + 4466 "00010001" // /* MW 12 */ + 4467 "10010001" // /* MW 11 */ + 4468 "00001110" // /* MW 10 */ + 4469 "00000000" // /* MW 9 */ + 4470 "00000000" // /* MW 8 */ + 4471 "10000000" // /* MW 7 */ + 4472 "00000000" // /* MW 6 */ + 4473 "00100000" // /* MW 5 */ + 4474 "00111111" // /* MW 4 */ + 4475 "10000110" // /* MW 3 */ + 4476 "11100000" // /* MW 2 */ + 4477 "11110101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1836 +.src_ref 2 "conv2d_bf16_params.h" 241 95 +.src_ref 2 "conv2d_bf16_params.h" 242 153 +.src_ref 2 "conv2d_bf16_params.h" 250 129 + 4478 "01110110" // MOVA r19, #3; ST r12, [sp, #-16]; MOVX r28, #-24; MOV r17, p3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4479 "01111000" // /* MW 11 */ + 4480 "01100000" // /* MW 10 */ + 4481 "00101011" // /* MW 9 */ + 4482 "00001010" // /* MW 8 */ + 4483 "11000101" // /* MW 7 */ + 4484 "10111111" // /* MW 6 */ + 4485 "10010101" // /* MW 5 */ + 4486 "11110001" // /* MW 4 */ + 4487 "00000111" // /* MW 3 */ + 4488 "01110011" // /* MW 2 */ + 4489 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 866 21 +.src_ref 2 "conv2d_bf16.h" 876 12 +.src_ref 2 "conv2d_bf16.h" 876 51 +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16.h" 1836 +.src_ref 2 "conv2d_bf16_params.h" 242 41 +.src_ref 2 "conv2d_bf16_params.h" 242 54 +.src_ref 2 "conv2d_bf16_params.h" 242 94 +.src_ref 2 "conv2d_bf16_params.h" 242 100 +.src_ref 2 "conv2d_bf16_params.h" 242 153 +.src_ref 2 "conv2d_bf16_params.h" 243 80 +.src_ref 2 "conv2d_bf16_params.h" 245 28 +.src_ref 2 "conv2d_bf16_params.h" 250 106 +.src_ref 2 "conv2d_bf16_params.h" 250 133 + 4490 "01110110" // MOVA r25, #0; ST r17, [sp, #-40]; MOVX r17, #1; ADD.NC p2, r17, #28 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4491 "00001000" // /* MW 11 */ + 4492 "01000111" // /* MW 10 */ + 4493 "00110100" // /* MW 9 */ + 4494 "00101001" // /* MW 8 */ + 4495 "00010000" // /* MW 7 */ + 4496 "10000001" // /* MW 6 */ + 4497 "00110101" // /* MW 5 */ + 4498 "11011010" // /* MW 4 */ + 4499 "00000111" // /* MW 3 */ + 4500 "00011001" // /* MW 2 */ + 4501 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 240 68 +.src_ref 2 "conv2d_bf16_params.h" 240 68 first + 4502 "01110110" // LDA r18, [p2]; ST r9, [sp, #-12]; MOVXM r29, #16777216 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4503 "00010000" // /* MW 11 */ + 4504 "00000000" // /* MW 10 */ + 4505 "10101000" // /* MW 9 */ + 4506 "00000011" // /* MW 8 */ + 4507 "01000000" // /* MW 7 */ + 4508 "10000000" // /* MW 6 */ + 4509 "00110101" // /* MW 5 */ + 4510 "11110101" // /* MW 4 */ + 4511 "11010111" // /* MW 3 */ + 4512 "11001010" // /* MW 2 */ + 4513 "01000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 171 +.src_ref 2 "conv2d_bf16_params.h" 245 20 + 4514 "01110110" // MOVA m6, #88; ST r14, [sp, #-4]; MOVXM r31, #33554431 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4515 "10010000" // /* MW 11 */ + 4516 "11111111" // /* MW 10 */ + 4517 "11101111" // /* MW 9 */ + 4518 "11111111" // /* MW 8 */ + 4519 "01111111" // /* MW 7 */ + 4520 "10000000" // /* MW 6 */ + 4521 "11010101" // /* MW 5 */ + 4522 "11111101" // /* MW 4 */ + 4523 "10000111" // /* MW 3 */ + 4524 "00011000" // /* MW 2 */ + 4525 "00001011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 39 +.src_ref 2 "conv2d_bf16_params.h" 244 82 +.src_ref 2 "conv2d_bf16_params.h" 244 156 +.src_ref 2 "conv2d_bf16_params.h" 249 87 + 4526 "01110110" // MOVA r20, #5; ST r13, [sp, #-32]; MOVX r22, #8; MOV m4, #-20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4527 "01011000" // /* MW 11 */ + 4528 "11101100" // /* MW 10 */ + 4529 "00000111" // /* MW 9 */ + 4530 "00001010" // /* MW 8 */ + 4531 "01100001" // /* MW 7 */ + 4532 "10000001" // /* MW 6 */ + 4533 "10110101" // /* MW 5 */ + 4534 "11100001" // /* MW 4 */ + 4535 "00000111" // /* MW 3 */ + 4536 "10110100" // /* MW 2 */ + 4537 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 243 39 +.src_ref 2 "conv2d_bf16_params.h" 244 87 +.src_ref 2 "conv2d_bf16_params.h" 250 71 + 4538 "01110110" // MOVA r21, #12; ST r15, [sp, #-20]; MOVX r23, #254; MOV m5, #-60 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4539 "01011000" // /* MW 11 */ + 4540 "11000100" // /* MW 10 */ + 4541 "10000111" // /* MW 9 */ + 4542 "11001010" // /* MW 8 */ + 4543 "01110111" // /* MW 7 */ + 4544 "10000111" // /* MW 6 */ + 4545 "11110101" // /* MW 5 */ + 4546 "11101101" // /* MW 4 */ + 4547 "00000111" // /* MW 3 */ + 4548 "10010101" // /* MW 2 */ + 4549 "00000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 44 + 4550 "00000010" // ST p7, [sp, #-8]; MOV m7, #64 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4551 "01010000" // /* MW 7 */ + 4552 "01000000" // /* MW 6 */ + 4553 "10000000" // /* MW 5 */ + 4554 "00000011" // /* MW 4 */ + 4555 "10110000" // /* MW 3 */ + 4556 "01110011" // /* MW 2 */ + 4557 "11111111" // /* MW 1 */ + 4558 "10011000" // ST lr, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4559 "00111101" // /* MW 3 */ + 4560 "11100100" // /* MW 2 */ + 4561 "00001111" // /* MW 1 */ + 4562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4563 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 240 68 + 4564 "10011000" // ADD r12, r29, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4565 "00100000" // /* MW 3 */ + 4566 "01011001" // /* MW 2 */ + 4567 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 240 68 +.src_ref 2 "conv2d_bf16_params.h" 241 95 first + 4568 "01011100" // ST r12, [p2], m0; LSHL r29, r12, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4569 "10011011" // /* MW 5 */ + 4570 "01110111" // /* MW 4 */ + 4571 "00110110" // /* MW 3 */ + 4572 "00110010" // /* MW 2 */ + 4573 "01000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 54 first +.src_ref 2 "conv2d_bf16_params.h" 242 94 first + 4574 "00101100" // LDA.u8 r30, [p2], #-3; EQ r28, r29, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4575 "00101111" // /* MW 5 */ + 4576 "11110010" // /* MW 4 */ + 4577 "01011110" // /* MW 3 */ + 4578 "11111001" // /* MW 2 */ + 4579 "01011011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 245 20 first + 4580 "10011000" // LDA.u8 r9, [p2], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4581 "00101010" // /* MW 3 */ + 4582 "11001001" // /* MW 2 */ + 4583 "00000010" // /* MW 1 */ + 4584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4585 "00000000" // /* MW 1 */ + 4586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4587 "00000000" // /* MW 1 */ + 4588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4589 "00000000" // /* MW 1 */ + 4590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4591 "00000000" // /* MW 1 */ + 4592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4593 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 174 first + 4594 "10011000" // LTU r27, r29, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4595 "11101100" // /* MW 3 */ + 4596 "01110111" // /* MW 2 */ + 4597 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 153 + 4598 "00011000" // SEL.EQZ r14, r25, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4599 "00110010" // /* MW 3 */ + 4600 "01011101" // /* MW 2 */ + 4601 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 171 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4602 "10011000" // LTU r27, r31, r12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4603 "11001100" // /* MW 3 */ + 4604 "11110110" // /* MW 2 */ + 4605 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 241 95 first +.src_ref 2 "conv2d_bf16_params.h" 242 39 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4606 "00101100" // ST.s8 r28, [p2], m4; EQ r13, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4607 "11001111" // /* MW 5 */ + 4608 "10110111" // /* MW 4 */ + 4609 "11101110" // /* MW 3 */ + 4610 "01110000" // /* MW 2 */ + 4611 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 100 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4612 "10011000" // LSHL r31, r13, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4613 "00011101" // /* MW 3 */ + 4614 "01111111" // /* MW 2 */ + 4615 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 153 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4616 "00011000" // SEL.EQZ r12, r25, r14, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4617 "11100010" // /* MW 3 */ + 4618 "01011000" // /* MW 2 */ + 4619 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 98 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4620 "10011000" // OR r28, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4621 "11000101" // /* MW 3 */ + 4622 "11111001" // /* MW 2 */ + 4623 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 54 +.src_ref 2 "conv2d_bf16_params.h" 242 151 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4624 "10100100" // LTU r27, r17, r30; ADD.NC r28, r28, r12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4625 "01100010" // /* MW 5 */ + 4626 "00111100" // /* MW 4 */ + 4627 "10011110" // /* MW 3 */ + 4628 "11111101" // /* MW 2 */ + 4629 "10001110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 41 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4630 "00011000" // SEL.EQZ r28, r25, r28, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4631 "11000010" // /* MW 3 */ + 4632 "01111001" // /* MW 2 */ + 4633 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 243 80 first + 4634 "10011000" // LTU r31, r17, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4635 "11001100" // /* MW 3 */ + 4636 "01111111" // /* MW 2 */ + 4637 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 117 first +.src_ref 2 "conv2d_bf16_params.h" 243 39 + 4638 "01011100" // ST r31, [p2], m5; NE r29, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4639 "11010001" // /* MW 5 */ + 4640 "11110111" // /* MW 4 */ + 4641 "00111110" // /* MW 3 */ + 4642 "01111110" // /* MW 2 */ + 4643 "01010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 44 first +.src_ref 2 "conv2d_bf16_params.h" 245 28 first + 4644 "00101100" // LDA.u8 r30, [p2], m7; NE r12, r9, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4645 "00110001" // /* MW 5 */ + 4646 "10110010" // /* MW 4 */ + 4647 "01010100" // /* MW 3 */ + 4648 "01111001" // /* MW 2 */ + 4649 "01011101" // /* MW 1 */ + 4650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4651 "00000000" // /* MW 1 */ + 4652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4653 "00000000" // /* MW 1 */ + 4654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4655 "00000000" // /* MW 1 */ + 4656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4657 "00000000" // /* MW 1 */ + 4658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4659 "00000000" // /* MW 1 */ + 4660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4661 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 82 +.src_ref 2 "conv2d_bf16_params.h" 244 87 + 4662 "00100100" // NE r22, r30, r22; ADD.NC r31, r30, #-4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4663 "11111100" // /* MW 5 */ + 4664 "10111110" // /* MW 4 */ + 4665 "00011111" // /* MW 3 */ + 4666 "10101101" // /* MW 2 */ + 4667 "11110101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 245 33 + 4668 "10000100" // JNZ r12, #4736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4736 delay_slots=5 */ + 4669 "00000001" // /* MW 5 */ + 4670 "01000000" // /* MW 4 */ + 4671 "01000000" // /* MW 3 */ + 4672 "00001001" // /* MW 2 */ + 4673 "01100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 156 +.delay_slot + 4674 "10011000" // NE r9, r30, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4675 "01001000" // /* MW 3 */ + 4676 "10010011" // /* MW 2 */ + 4677 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 87 +.delay_slot + 4678 "00011000" // EXTEND.u8 r31, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4679 "10010000" // /* MW 3 */ + 4680 "11111110" // /* MW 2 */ + 4681 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 87 +.delay_slot + 4682 "10011000" // AND r22, r9, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4683 "01100100" // /* MW 3 */ + 4684 "01101101" // /* MW 2 */ + 4685 "00010010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 87 +.delay_slot + 4686 "10011000" // LTU r23, r31, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4687 "01111100" // /* MW 3 */ + 4688 "11101111" // /* MW 2 */ + 4689 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 132 +.delay_slot + 4690 "10011000" // AND r16, r23, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4691 "01100100" // /* MW 3 */ + 4692 "11100001" // /* MW 2 */ + 4693 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 245 33 + 4694 "10000100" // JNZ r29, #4736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4736 delay_slots=5 */ + 4695 "00000001" // /* MW 5 */ + 4696 "01000000" // /* MW 4 */ + 4697 "01000000" // /* MW 3 */ + 4698 "00001001" // /* MW 2 */ + 4699 "11101000" // /* MW 1 */ +.delay_slot + 4700 "10011000" // ST p6, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4701 "00011101" // /* MW 3 */ + 4702 "11101011" // /* MW 2 */ + 4703 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4704 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4705 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4707 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4709 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4711 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 +.src_ref 2 "conv2d_bf16.h" 876 51 + 4712 "10111010" // MOVA r27, #1; J #4784 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=4784 delay_slots=5 */ + 4713 "00100000" // /* MW 9 */ + 4714 "00000000" // /* MW 8 */ + 4715 "00000000" // /* MW 7 */ + 4716 "01010110" // /* MW 6 */ + 4717 "00000010" // /* MW 5 */ + 4718 "00000000" // /* MW 4 */ + 4719 "00000000" // /* MW 3 */ + 4720 "00111011" // /* MW 2 */ + 4721 "00000000" // /* MW 1 */ +.delay_slot + 4722 "11111000" // MOV el0, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4723 "10011100" // /* MW 3 */ + 4724 "00011001" // /* MW 2 */ + 4725 "00011000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1849 12 +.delay_slot + 4726 "00011000" // MOVX r19, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4727 "00000101" // /* MW 3 */ + 4728 "00100110" // /* MW 2 */ + 4729 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4731 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4732 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4733 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4735 "00000000" // /* MW 1 */ +.label __ll6__Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params +.src_ref 2 "conv2d_bf16_params.h" 250 71 first +.src_ref 2 "conv2d_bf16_params.h" 250 101 + 4736 "01110110" // MOVA r21, #4; ST p6, [sp, #-24]; EQ r27, r21, r30; MOV el0, r25 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4737 "01111000" // /* MW 11 */ + 4738 "11001110" // /* MW 10 */ + 4739 "00001100" // /* MW 9 */ + 4740 "00111100" // /* MW 8 */ + 4741 "10111111" // /* MW 7 */ + 4742 "10101011" // /* MW 6 */ + 4743 "00011101" // /* MW 5 */ + 4744 "11101011" // /* MW 4 */ + 4745 "00000111" // /* MW 3 */ + 4746 "10010101" // /* MW 2 */ + 4747 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 101 + 4748 "10011000" // LSHL r21, r30, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4749 "01011101" // /* MW 3 */ + 4750 "10101011" // /* MW 2 */ + 4751 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 106 + 4752 "00011000" // SEL.EQZ r21, r21, r25, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4753 "10010010" // /* MW 3 */ + 4754 "01101011" // /* MW 2 */ + 4755 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 129 + 4756 "10011000" // EQ r27, r19, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4757 "11100111" // /* MW 3 */ + 4758 "11110111" // /* MW 2 */ + 4759 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 106 +.src_ref 2 "conv2d_bf16_params.h" 250 133 + 4760 "11100100" // SEL.EQZ r19, r21, r25, r27; MOV r27, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4761 "01000001" // /* MW 5 */ + 4762 "10110000" // /* MW 4 */ + 4763 "01001101" // /* MW 3 */ + 4764 "11110010" // /* MW 2 */ + 4765 "10101100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 133 + 4766 "00011000" // SEL.EQZ r19, r25, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4767 "00110010" // /* MW 3 */ + 4768 "01100111" // /* MW 2 */ + 4769 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 249 87 first + 4770 "10011000" // AND r20, r28, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4771 "01000100" // /* MW 3 */ + 4772 "00101001" // /* MW 2 */ + 4773 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 249 87 + 4774 "00011000" // NEZ r27, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4775 "11110000" // /* MW 3 */ + 4776 "00110110" // /* MW 2 */ + 4777 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 152 first + 4778 "00101100" // NOPA; OR r19, r19, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4779 "10001011" // /* MW 5 */ + 4780 "11001111" // /* MW 4 */ + 4781 "11111001" // /* MW 3 */ + 4782 "00101100" // /* MW 2 */ + 4783 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_320 +.src_ref 2 "conv2d_bf16_params.h" 258 8 first + 4784 "01110110" // MOVA m4, #12; ST r27, [p2], #24; JNZ r29, #4832 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4832 delay_slots=5 */ + 4785 "01100000" // /* MW 11 */ + 4786 "00000000" // /* MW 10 */ + 4787 "00010000" // /* MW 9 */ + 4788 "01011100" // /* MW 8 */ + 4789 "00000010" // /* MW 7 */ + 4790 "10111010" // /* MW 6 */ + 4791 "01110001" // /* MW 5 */ + 4792 "01101111" // /* MW 4 */ + 4793 "10000010" // /* MW 3 */ + 4794 "10010000" // /* MW 2 */ + 4795 "00000001" // /* MW 1 */ +.delay_slot + 4796 "00011000" // ST.s8 r19, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4797 "01100111" // /* MW 3 */ + 4798 "10001010" // /* MW 2 */ + 4799 "00000010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4801 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4803 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4805 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4807 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 259 71 + 4808 "01000100" // MOVXM r20, #16777215 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4809 "11111110" // /* MW 5 */ + 4810 "00111111" // /* MW 4 */ + 4811 "11111010" // /* MW 3 */ + 4812 "11111111" // /* MW 2 */ + 4813 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 259 71 first + 4814 "10011000" // AND r18, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4815 "01000100" // /* MW 3 */ + 4816 "10100101" // /* MW 2 */ + 4817 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 259 71 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4818 "00101110" // NOPA; ST r18, [p3, #28]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 4819 "00011100" // /* MW 13 */ + 4820 "00000000" // /* MW 12 */ + 4821 "00000000" // /* MW 11 */ + 4822 "01010111" // /* MW 10 */ + 4823 "00011010" // /* MW 9 */ + 4824 "01000000" // /* MW 8 */ + 4825 "00000000" // /* MW 7 */ + 4826 "00000000" // /* MW 6 */ + 4827 "10100011" // /* MW 5 */ + 4828 "11101100" // /* MW 4 */ + 4829 "11110110" // /* MW 3 */ + 4830 "00101100" // /* MW 2 */ + 4831 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_368 +.src_ref 2 "conv2d_bf16.h" 1841 65 first +.src_ref 2 "conv2d_bf16.h" 1849 4 +.src_ref 2 "conv2d_bf16.h" 1849 12 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4832 "10111010" // LDA r20, [p2], #-32; EXTEND.u8 r20, r19; MOV r22, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4833 "01011000" // /* MW 9 */ + 4834 "11111101" // /* MW 8 */ + 4835 "11001111" // /* MW 7 */ + 4836 "10000010" // /* MW 6 */ + 4837 "01000100" // /* MW 5 */ + 4838 "00100111" // /* MW 4 */ + 4839 "11010000" // /* MW 3 */ + 4840 "11010010" // /* MW 2 */ + 4841 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16.h" 1841 34 +.src_ref 2 "conv2d_bf16.h" 1842 36 +.src_ref 2 "conv2d_bf16.h" 1842 67 +.src_ref 2 "conv2d_bf16.h" 1842 75 +.src_ref 2 "conv2d_bf16.h" 1845 31 +.src_ref 2 "conv2d_bf16.h" 1849 4 +.src_ref 2 "conv2d_bf16_params.h" 243 80 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4842 "10111010" // MOVA r18, #2; ADD r21, r20, #-1; MOV m4, #36 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4843 "01011000" // /* MW 9 */ + 4844 "00100100" // /* MW 8 */ + 4845 "00000000" // /* MW 7 */ + 4846 "11111010" // /* MW 6 */ + 4847 "01011111" // /* MW 5 */ + 4848 "00101001" // /* MW 4 */ + 4849 "00000000" // /* MW 3 */ + 4850 "01010010" // /* MW 2 */ + 4851 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1842 67 first +.src_ref 2 "conv2d_bf16.h" 1842 106 +.src_ref 2 "conv2d_bf16.h" 1849 4 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4852 "01110110" // LDA r22, [p2], m4; ST el0, [sp, #-48]; AND r22, r21, r22; MOV m4, #-52 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4853 "01011000" // /* MW 11 */ + 4854 "11001100" // /* MW 10 */ + 4855 "00000111" // /* MW 9 */ + 4856 "00100110" // /* MW 8 */ + 4857 "01101011" // /* MW 7 */ + 4858 "10101011" // /* MW 6 */ + 4859 "00101101" // /* MW 5 */ + 4860 "11010000" // /* MW 4 */ + 4861 "11010111" // /* MW 3 */ + 4862 "01011010" // /* MW 2 */ + 4863 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 862 52 +.src_ref 2 "conv2d_bf16.h" 1842 106 +.src_ref 2 "conv2d_bf16.h" 1845 80 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4864 "01110110" // LDA r23, [p2], m4; ST r22, [sp, #-36]; MOVX r19, #-1; MOV m4, #196 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4865 "01011000" // /* MW 11 */ + 4866 "11000100" // /* MW 10 */ + 4867 "00000000" // /* MW 9 */ + 4868 "11101010" // /* MW 8 */ + 4869 "00110111" // /* MW 7 */ + 4870 "10111111" // /* MW 6 */ + 4871 "11010101" // /* MW 5 */ + 4872 "11011110" // /* MW 4 */ + 4873 "11010111" // /* MW 3 */ + 4874 "01011110" // /* MW 2 */ + 4875 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1845 63 first + 4876 "10011000" // LDA r29, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4877 "10110110" // /* MW 3 */ + 4878 "11111111" // /* MW 2 */ + 4879 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 862 52 first + 4880 "10011000" // LDA r31, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4881 "11110110" // /* MW 3 */ + 4882 "10001011" // /* MW 2 */ + 4883 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 4884 "10011000" // LDA r21, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4885 "10110110" // /* MW 3 */ + 4886 "00000110" // /* MW 2 */ + 4887 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 2 "conv2d_bf16.h" 1841 34 first + 4888 "00101100" // LDA r20, [p0]; LSHL r9, r20, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4889 "01011011" // /* MW 5 */ + 4890 "00100110" // /* MW 4 */ + 4891 "11011010" // /* MW 3 */ + 4892 "11010010" // /* MW 2 */ + 4893 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 4894 "10011000" // LDA r30, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4895 "11010110" // /* MW 3 */ + 4896 "00000111" // /* MW 2 */ + 4897 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1842 36 first + 4898 "10011000" // LSHL r22, r22, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4899 "00101101" // /* MW 3 */ + 4900 "10101101" // /* MW 2 */ + 4901 "00010101" // /* MW 1 */ + 4902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4903 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1845 80 first + 4904 "10011000" // ASHL r19, r29, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4905 "00111110" // /* MW 3 */ + 4906 "01100111" // /* MW 2 */ + 4907 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 866 21 first + 4908 "10011000" // NE r17, r31, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4909 "00011000" // /* MW 3 */ + 4910 "11100011" // /* MW 2 */ + 4911 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 866 12 + 4912 "10000100" // JNZ r17, #5024 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5024 delay_slots=5 */ + 4913 "00000001" // /* MW 5 */ + 4914 "01000000" // /* MW 4 */ + 4915 "11010000" // /* MW 3 */ + 4916 "00001001" // /* MW 2 */ + 4917 "10001000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1842 36 first +.src_ref 2 "conv2d_bf16.h" 1842 75 first +.delay_slot + 4918 "10100100" // LSHL r22, r23, r18; ADD.NC r21, r21, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4919 "10110010" // /* MW 5 */ + 4920 "10110101" // /* MW 4 */ + 4921 "10111010" // /* MW 3 */ + 4922 "10100101" // /* MW 2 */ + 4923 "10111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1842 75 +.src_ref 2 "conv2d_bf16.h" 1845 31 first +.delay_slot + 4924 "10100100" // LSHL r21, r19, r18; ADD.NC dn0, r21, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4925 "10110010" // /* MW 5 */ + 4926 "10010101" // /* MW 4 */ + 4927 "10110000" // /* MW 3 */ + 4928 "01100101" // /* MW 2 */ + 4929 "10011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1841 34 first +.delay_slot + 4930 "00000010" // ST dn0, [sp, #-44]; ADD.NC r14, r9, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4931 "10100000" // /* MW 7 */ + 4932 "01101000" // /* MW 6 */ + 4933 "11001010" // /* MW 5 */ + 4934 "00000001" // /* MW 4 */ + 4935 "10110000" // /* MW 3 */ + 4936 "10000100" // /* MW 2 */ + 4937 "11111010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16.h" 885 4 +.delay_slot + 4938 "11111000" // MOV r15, dn0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4939 "10000000" // /* MW 3 */ + 4940 "11010000" // /* MW 2 */ + 4941 "00011011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1845 31 first +.delay_slot + 4942 "01011000" // ADD.NC p6, r21, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4943 "11111001" // /* MW 3 */ + 4944 "01101010" // /* MW 2 */ + 4945 "00011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 + 4946 "01000100" // MOVXM p7, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4947 "11010000" // /* MW 5 */ + 4948 "11001000" // /* MW 4 */ + 4949 "11001110" // /* MW 3 */ + 4950 "00000111" // /* MW 2 */ + 4951 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.src_ref 2 "conv2d_bf16.h" 867 18 first + 4952 "00101100" // LDA.s8 r17, [p7]; MOVX vaddSign0, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4953 "10000000" // /* MW 5 */ + 4954 "10110100" // /* MW 4 */ + 4955 "01010000" // /* MW 3 */ + 4956 "11000100" // /* MW 2 */ + 4957 "11100000" // /* MW 1 */ + 4958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4959 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 + 4960 "01000100" // MOVXM r20, #-8454144 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4961 "00000000" // /* MW 5 */ + 4962 "00100000" // /* MW 4 */ + 4963 "00001010" // /* MW 3 */ + 4964 "01111111" // /* MW 2 */ + 4965 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 + 4966 "01111000" // VINSERT.32 x0, x0, #0, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4967 "10010001" // /* MW 3 */ + 4968 "00000010" // /* MW 2 */ + 4969 "00011000" // /* MW 1 */ + 4970 "11111000" // MOV r20, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4971 "11100000" // /* MW 3 */ + 4972 "00010101" // /* MW 2 */ + 4973 "00011101" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 4974 "00011000" // ADD.NC p7, r20, #-66 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4975 "01011111" // /* MW 3 */ + 4976 "01101010" // /* MW 2 */ + 4977 "00011111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.src_ref 2 "conv2d_bf16.h" 867 18 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 4978 "11010100" // ST.s16 r17, [p7]; VMOV bmll0, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4979 "00100101" // /* MW 5 */ + 4980 "00000001" // /* MW 4 */ + 4981 "11100000" // /* MW 3 */ + 4982 "11000110" // /* MW 2 */ + 4983 "11100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4984 "00011000" // MOVX crRnd, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4985 "10000000" // /* MW 3 */ + 4986 "01111010" // /* MW 2 */ + 4987 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4988 "00011000" // VCONV.bf16.fp32 wl0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4989 "00010110" // /* MW 3 */ + 4990 "01000000" // /* MW 2 */ + 4991 "00001000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4993 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4994 "10111000" // VEXTRACT.16 r17, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4995 "00000001" // /* MW 3 */ + 4996 "01000001" // /* MW 2 */ + 4997 "00011100" // /* MW 1 */ + 4998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4999 "00000000" // /* MW 1 */ + 5000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5001 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 + 5002 "10011000" // LDA.s16 r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5003 "00110010" // /* MW 3 */ + 5004 "00000110" // /* MW 2 */ + 5005 "00000111" // /* MW 1 */ + 5006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5007 "00000000" // /* MW 1 */ + 5008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5009 "00000000" // /* MW 1 */ + 5010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5011 "00000000" // /* MW 1 */ + 5012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5013 "00000000" // /* MW 1 */ + 5014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5015 "00000000" // /* MW 1 */ + 5016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5017 "00000000" // /* MW 1 */ + 5018 "00001100" // NOPA; ST r17, [sp, #-48] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5019 "01101011" // /* MW 5 */ + 5020 "10100100" // /* MW 4 */ + 5021 "11111111" // /* MW 3 */ + 5022 "00101100" // /* MW 2 */ + 5023 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_560 +.src_ref 2 "conv2d_bf16.h" 881 76 +.src_ref 2 "conv2d_bf16.h" 883 4 +.src_ref 2 "conv2d_bf16.h" 884 4 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 5024 "01110110" // MOVA m4, #92; MOVS p1, r14; MOVXM p3, #509032 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5025 "00010000" // /* MW 11 */ + 5026 "00110100" // /* MW 10 */ + 5027 "10110010" // /* MW 9 */ + 5028 "11110001" // /* MW 8 */ + 5029 "00000001" // /* MW 7 */ + 5030 "00000000" // /* MW 6 */ + 5031 "00001011" // /* MW 5 */ + 5032 "10001110" // /* MW 4 */ + 5033 "10000001" // /* MW 3 */ + 5034 "10010000" // /* MW 2 */ + 5035 "00001011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 first +.src_ref 2 "conv2d_bf16.h" 876 51 first +.src_ref 2 "conv2d_bf16.h" 881 76 first +.src_ref 2 "conv2d_bf16.h" 883 4 +.src_ref 2 "conv2d_bf16.h" 884 4 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 5036 "01110110" // LDA.u8 r17, [p2], m4; MOVS p0, p1; SEL.EQZ r17, r25, r19, r27; MOV r19, #11 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5037 "01011000" // /* MW 11 */ + 5038 "00001011" // /* MW 10 */ + 5039 "01101000" // /* MW 9 */ + 5040 "10010010" // /* MW 8 */ + 5041 "00011001" // /* MW 7 */ + 5042 "00110011" // /* MW 6 */ + 5043 "10001011" // /* MW 5 */ + 5044 "10000100" // /* MW 4 */ + 5045 "01010000" // /* MW 3 */ + 5046 "01000101" // /* MW 2 */ + 5047 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 44 +.src_ref 2 "conv2d_bf16_params.h" 243 80 +.src_ref 2 "conv2d_bf16_params.h" 243 80 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5048 "10111010" // MOVA r22, #780; LTU r27, r28, r18; MOV r13, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5049 "01111000" // /* MW 9 */ + 5050 "01100000" // /* MW 8 */ + 5051 "10101010" // /* MW 7 */ + 5052 "01100101" // /* MW 6 */ + 5053 "10111001" // /* MW 5 */ + 5054 "00111001" // /* MW 4 */ + 5055 "00000000" // /* MW 3 */ + 5056 "10010110" // /* MW 2 */ + 5057 "01100001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 883 4 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5058 "00011000" // ST.s8 r19, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5059 "01100111" // /* MW 3 */ + 5060 "00000110" // /* MW 2 */ + 5061 "00000011" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5063 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 884 4 first +.aggressive_scheduled_block_id 4 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 5064 "00000100" // JL #4192 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4192 delay_slots=5 */ + 5065 "00000001" // /* MW 5 */ + 5066 "00000000" // /* MW 4 */ + 5067 "00110000" // /* MW 3 */ + 5068 "00001000" // /* MW 2 */ + 5069 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 first +.delay_slot +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5070 "10011000" // LSHL r21, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5071 "00101101" // /* MW 3 */ + 5072 "01101011" // /* MW 2 */ + 5073 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 +.delay_slot + 5074 "01011000" // ADD.NC p7, r21, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5075 "11111001" // /* MW 3 */ + 5076 "01101010" // /* MW 2 */ + 5077 "00011111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 881 45 first +.delay_slot + 5078 "10011000" // SUB r17, r25, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5079 "00010001" // /* MW 3 */ + 5080 "01100011" // /* MW 2 */ + 5081 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16_params.h" 243 80 +.delay_slot + 5082 "01100100" // LSHL r17, r17, r18; MOV r20, #781 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5083 "00110101" // /* MW 5 */ + 5084 "00101100" // /* MW 4 */ + 5085 "10111010" // /* MW 3 */ + 5086 "01100101" // /* MW 2 */ + 5087 "10001100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16_params.h" 243 80 first +.delay_slot + 5088 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r9, r22, r20, r27; ADD.NC r12, r15, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5089 "00000000" // /* MW 15 */ + 5090 "00000000" // /* MW 14 */ + 5091 "10101000" // /* MW 13 */ + 5092 "11100010" // /* MW 12 */ + 5093 "10001011" // /* MW 11 */ + 5094 "00010001" // /* MW 10 */ + 5095 "10011010" // /* MW 9 */ + 5096 "00101100" // /* MW 8 */ + 5097 "01011011" // /* MW 7 */ + 5098 "00000001" // /* MW 6 */ + 5099 "00100000" // /* MW 5 */ + 5100 "00000000" // /* MW 4 */ + 5101 "11110000" // /* MW 3 */ + 5102 "00101100" // /* MW 2 */ + 5103 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 4 +.return_address + 5104 "00011000" // LDA p1, [sp, #-44] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5105 "10011001" // /* MW 3 */ + 5106 "11010100" // /* MW 2 */ + 5107 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 4 first +.no_stack_arguments + 5108 "00000100" // JL #4192 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4192 delay_slots=5 */ + 5109 "00000001" // /* MW 5 */ + 5110 "00000000" // /* MW 4 */ + 5111 "00110000" // /* MW 3 */ + 5112 "00001000" // /* MW 2 */ + 5113 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5115 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5117 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 44 +.delay_slot + 5118 "00011000" // ADD.NC r13, r13, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5119 "10010000" // /* MW 3 */ + 5120 "01010110" // /* MW 2 */ + 5121 "00011011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 4 +.delay_slot + 5122 "11111000" // MOV p2, r13 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5123 "10100000" // /* MW 3 */ + 5124 "01100110" // /* MW 2 */ + 5125 "00011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 4 +.delay_slot + 5126 "01111010" // NOPA; MOVS p0, r15; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5127 "00000000" // /* MW 9 */ + 5128 "00000000" // /* MW 8 */ + 5129 "00000000" // /* MW 7 */ + 5130 "00000000" // /* MW 6 */ + 5131 "00001011" // /* MW 5 */ + 5132 "10001111" // /* MW 4 */ + 5133 "11110000" // /* MW 3 */ + 5134 "00101100" // /* MW 2 */ + 5135 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 +.src_ref 2 "conv2d_bf16.h" 1115 26 +.src_ref 2 "conv2d_bf16.h" 1115 26 +.return_address + 5136 "10111010" // MOVA dj6, #-332; MOVX r19, #63; ADD.NC p4, r13, #-116 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5137 "00001000" // /* MW 9 */ + 5138 "01100011" // /* MW 8 */ + 5139 "00110011" // /* MW 7 */ + 5140 "11101010" // /* MW 6 */ + 5141 "00110111" // /* MW 5 */ + 5142 "00000001" // /* MW 4 */ + 5143 "10000000" // /* MW 3 */ + 5144 "10011010" // /* MW 2 */ + 5145 "11010110" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 886 4 +.src_ref 2 "conv2d_bf16.h" 896 23 first +.src_ref 2 "conv2d_bf16.h" 1123 71 + 5146 "00101100" // LDA dn0, [p4], #4; MOVX r13, #12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5147 "01100010" // /* MW 5 */ + 5148 "00110100" // /* MW 4 */ + 5149 "11010000" // /* MW 3 */ + 5150 "10000100" // /* MW 2 */ + 5151 "10000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5152 "10011000" // LDA dj0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5153 "01000110" // /* MW 3 */ + 5154 "00011100" // /* MW 2 */ + 5155 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5156 "10011000" // LDA dn4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5157 "00100110" // /* MW 3 */ + 5158 "00011110" // /* MW 2 */ + 5159 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5160 "10011000" // LDA dj4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5161 "01000110" // /* MW 3 */ + 5162 "00011110" // /* MW 2 */ + 5163 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5164 "10011000" // LDA m0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5165 "00000110" // /* MW 3 */ + 5166 "00011100" // /* MW 2 */ + 5167 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5168 "10011000" // LDA dc0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5169 "01100110" // /* MW 3 */ + 5170 "00011100" // /* MW 2 */ + 5171 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5172 "10011000" // LDA dc4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5173 "01100110" // /* MW 3 */ + 5174 "00011110" // /* MW 2 */ + 5175 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 first + 5176 "10011000" // LDA r22, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5177 "11010110" // /* MW 3 */ + 5178 "00011110" // /* MW 2 */ + 5179 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5180 "10011000" // LDA r17, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5181 "00110110" // /* MW 3 */ + 5182 "00011110" // /* MW 2 */ + 5183 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5184 "10011000" // LDA r28, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5185 "10010110" // /* MW 3 */ + 5186 "00011111" // /* MW 2 */ + 5187 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5188 "10011000" // LDA r21, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5189 "10110110" // /* MW 3 */ + 5190 "00011110" // /* MW 2 */ + 5191 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5192 "10011000" // LDA r23, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5193 "11110110" // /* MW 3 */ + 5194 "00011110" // /* MW 2 */ + 5195 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5196 "10011000" // LDA p3, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5197 "10011110" // /* MW 3 */ + 5198 "00011101" // /* MW 2 */ + 5199 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5200 "10011000" // LDA dn2, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5201 "00100110" // /* MW 3 */ + 5202 "00011101" // /* MW 2 */ + 5203 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 first + 5204 "10011000" // LDA dn1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5205 "10100110" // /* MW 3 */ + 5206 "00011100" // /* MW 2 */ + 5207 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5208 "10011000" // LDA dj1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5209 "11000110" // /* MW 3 */ + 5210 "00011100" // /* MW 2 */ + 5211 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5212 "10011000" // LDA dn5, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5213 "10100110" // /* MW 3 */ + 5214 "00011110" // /* MW 2 */ + 5215 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5216 "10011000" // LDA r30, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5217 "11010110" // /* MW 3 */ + 5218 "00011111" // /* MW 2 */ + 5219 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5220 "10011000" // LDA r29, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5221 "10110110" // /* MW 3 */ + 5222 "00011111" // /* MW 2 */ + 5223 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5224 "10011000" // LDA dc1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5225 "11100110" // /* MW 3 */ + 5226 "00011100" // /* MW 2 */ + 5227 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1115 26 first + 5228 "10011000" // LDA.u8 r18, [p4, dj6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5229 "01001010" // /* MW 3 */ + 5230 "11000010" // /* MW 2 */ + 5231 "00000100" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 + 5232 "00011000" // LDA r20, [sp, #-48] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5233 "10010001" // /* MW 3 */ + 5234 "11010010" // /* MW 2 */ + 5235 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 first + 5236 "10011000" // LDA r2, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5237 "01010110" // /* MW 3 */ + 5238 "00000100" // /* MW 2 */ + 5239 "00000100" // /* MW 1 */ + 5240 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5241 "00000000" // /* MW 1 */ + 5242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5243 "00000000" // /* MW 1 */ + 5244 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5245 "00000000" // /* MW 1 */ + 5246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5247 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1115 26 first + 5248 "10011000" // LTU r19, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5249 "00101100" // /* MW 3 */ + 5250 "11100111" // /* MW 2 */ + 5251 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1115 12 + 5252 "10000100" // JNZ r19, #6176 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6176 delay_slots=5 */ + 5253 "00000001" // /* MW 5 */ + 5254 "01000000" // /* MW 4 */ + 5255 "00010000" // /* MW 3 */ + 5256 "00001100" // /* MW 2 */ + 5257 "10011000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 886 4 +.delay_slot + 5258 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5259 "11010000" // /* MW 5 */ + 5260 "11001000" // /* MW 4 */ + 5261 "11000100" // /* MW 3 */ + 5262 "00000111" // /* MW 2 */ + 5263 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 886 4 first +.delay_slot + 5264 "00011000" // ST.s8 r13, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5265 "10100111" // /* MW 3 */ + 5266 "00000101" // /* MW 2 */ + 5267 "00000010" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first +.delay_slot + 5268 "11111000" // VBCST.16 x9, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5269 "01110010" // /* MW 3 */ + 5270 "11010001" // /* MW 2 */ + 5271 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5273 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5275 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1123 71 first + 5276 "10111010" // LDA p4, [sp, #-40]; EQ r27, r13, r18; MOV m7, #132 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5277 "01011000" // /* MW 9 */ + 5278 "10000100" // /* MW 8 */ + 5279 "10000000" // /* MW 7 */ + 5280 "00111111" // /* MW 6 */ + 5281 "10111001" // /* MW 5 */ + 5282 "00011011" // /* MW 4 */ + 5283 "00100000" // /* MW 3 */ + 5284 "01000011" // /* MW 2 */ + 5285 "11111011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1125 16 +.src_ref 2 "conv2d_bf16.h" 1154 80 + 5286 "10111010" // MOVA r19, #0; MOVX r18, #-128; MOV m4, #60 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5287 "01011000" // /* MW 9 */ + 5288 "00111100" // /* MW 8 */ + 5289 "00000000" // /* MW 7 */ + 5290 "00001010" // /* MW 6 */ + 5291 "00100000" // /* MW 5 */ + 5292 "00111101" // /* MW 4 */ + 5293 "00000000" // /* MW 3 */ + 5294 "00010011" // /* MW 2 */ + 5295 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1125 16 + 5296 "10111010" // MOVA m5, #-64; MOVX r26, #0; MOV dc7, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5297 "01111000" // /* MW 9 */ + 5298 "11010000" // /* MW 8 */ + 5299 "11100100" // /* MW 7 */ + 5300 "00001011" // /* MW 6 */ + 5301 "10100000" // /* MW 5 */ + 5302 "00000001" // /* MW 4 */ + 5303 "10000000" // /* MW 3 */ + 5304 "00010100" // /* MW 2 */ + 5305 "11111000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 746 83 + 5306 "01110110" // MOVA m6, #-132; MOVS dc2, dc7; MOVX crRnd, r13; MOV dn3, dc7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5307 "01111000" // /* MW 11 */ + 5308 "11000000" // /* MW 10 */ + 5309 "10100111" // /* MW 9 */ + 5310 "00000001" // /* MW 8 */ + 5311 "11010100" // /* MW 7 */ + 5312 "00011011" // /* MW 6 */ + 5313 "01001011" // /* MW 5 */ + 5314 "00011100" // /* MW 4 */ + 5315 "10000010" // /* MW 3 */ + 5316 "10011000" // /* MW 2 */ + 5317 "11101111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 2 "conv2d_bf16.h" 738 8 +.src_ref 2 "conv2d_bf16.h" 1187 40 +.src_ref 2 "conv2d_bf16.h" 1199 26 +.src_ref 2 "conv2d_bf16.h" 1200 26 +.src_ref 2 "conv2d_bf16.h" 1201 26 +.src_ref 2 "conv2d_bf16.h" 1202 26 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 5318 "01110110" // LDA r5, [sp, #-44]; MOVS dc6, dc7; MOVX r31, #60; MOV r15, #7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5319 "01011000" // /* MW 11 */ + 5320 "00000111" // /* MW 10 */ + 5321 "11101000" // /* MW 9 */ + 5322 "10001001" // /* MW 8 */ + 5323 "11110111" // /* MW 7 */ + 5324 "00000001" // /* MW 6 */ + 5325 "01001011" // /* MW 5 */ + 5326 "00011100" // /* MW 4 */ + 5327 "00100110" // /* MW 3 */ + 5328 "10010110" // /* MW 2 */ + 5329 "11111010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1143 12 +.src_ref 2 "conv2d_bf16.h" 1218 20 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 5330 "10111010" // LDA r18, [sp, #-36]; MOVXM p2, #5440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5331 "00010000" // /* MW 9 */ + 5332 "10100000" // /* MW 8 */ + 5333 "00110010" // /* MW 7 */ + 5334 "00000101" // /* MW 6 */ + 5335 "00000000" // /* MW 5 */ + 5336 "00000000" // /* MW 4 */ + 5337 "00100000" // /* MW 3 */ + 5338 "11001010" // /* MW 2 */ + 5339 "11111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 749 26 +.src_ref 2 "conv2d_bf16.h" 750 26 +.src_ref 2 "conv2d_bf16.h" 751 26 +.src_ref 2 "conv2d_bf16.h" 752 26 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5340 "10111010" // LDA r13, [sp, #-32]; SEL.EQZ r6, r26, r18, r27; MOV r20, #780 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5341 "01011000" // /* MW 9 */ + 5342 "00001100" // /* MW 8 */ + 5343 "10001011" // /* MW 7 */ + 5344 "00010010" // /* MW 6 */ + 5345 "01101001" // /* MW 5 */ + 5346 "00110100" // /* MW 4 */ + 5347 "00100000" // /* MW 3 */ + 5348 "00110110" // /* MW 2 */ + 5349 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 736 8 +.src_ref 2 "conv2d_bf16.h" 738 8 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1873 + 5350 "10110110" // LDA lr, [sp, #-28]; PADDB [p4], m7; MOVX r25, #0; MOV r24, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5351 "01011000" // /* MW 11 */ + 5352 "00000000" // /* MW 10 */ + 5353 "00001000" // /* MW 9 */ + 5354 "00001011" // /* MW 8 */ + 5355 "10010000" // /* MW 7 */ + 5356 "00000001" // /* MW 6 */ + 5357 "00100000" // /* MW 5 */ + 5358 "11010111" // /* MW 4 */ + 5359 "00101001" // /* MW 3 */ + 5360 "10000111" // /* MW 2 */ + 5361 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1125 16 first + 5362 "10011000" // LDA r0, [p4], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5363 "00010110" // /* MW 3 */ + 5364 "10001000" // /* MW 2 */ + 5365 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1125 16 + 5366 "10011000" // LDA dn6, [p4], m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5367 "00100110" // /* MW 3 */ + 5368 "10101011" // /* MW 2 */ + 5369 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1125 16 + 5370 "10011000" // LDA r27, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5371 "01110110" // /* MW 3 */ + 5372 "00101111" // /* MW 2 */ + 5373 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1149 80 first + 5374 "10011000" // LDA m5, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5375 "10000110" // /* MW 3 */ + 5376 "00011110" // /* MW 2 */ + 5377 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1154 80 first + 5378 "10011000" // LDA dj5, [p4], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5379 "11000110" // /* MW 3 */ + 5380 "10001010" // /* MW 2 */ + 5381 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 743 87 first + 5382 "10011000" // LDA m4, [p4], #-28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5383 "00000110" // /* MW 3 */ + 5384 "10011110" // /* MW 2 */ + 5385 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 745 83 first + 5386 "10011000" // LDA r1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5387 "00110110" // /* MW 3 */ + 5388 "00011100" // /* MW 2 */ + 5389 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 746 83 first +.src_ref 2 "conv2d_bf16.h" 1125 16 first + 5390 "10010100" // LDA r0, [p4], m6; ADD.NC dj6, r6, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5391 "00000010" // /* MW 5 */ + 5392 "00000110" // /* MW 4 */ + 5393 "11011101" // /* MW 3 */ + 5394 "00000010" // /* MW 2 */ + 5395 "10011001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1143 66 first + 5396 "10011000" // LDA r3, [p4, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5397 "01110110" // /* MW 3 */ + 5398 "00010100" // /* MW 2 */ + 5399 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1206 63 first + 5400 "10011000" // LDA r4, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5401 "10010110" // /* MW 3 */ + 5402 "00000100" // /* MW 2 */ + 5403 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1149 89 + 5404 "11111000" // MOV r7, m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5405 "00000000" // /* MW 3 */ + 5406 "11011010" // /* MW 2 */ + 5407 "00011001" // /* MW 1 */ + 5408 "01011000" // ADD.NC dj2, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5409 "10011001" // /* MW 3 */ + 5410 "10000011" // /* MW 2 */ + 5411 "00011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1154 89 + 5412 "11111000" // MOV r16, dj5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5413 "00000000" // /* MW 3 */ + 5414 "00011011" // /* MW 2 */ + 5415 "00011100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1125 16 first + 5416 "01011000" // ADD.NC m2, r27, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5417 "10011001" // /* MW 3 */ + 5418 "00001101" // /* MW 2 */ + 5419 "00011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1149 89 first + 5420 "00011000" // ADD.NC m6, r7, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5421 "11100000" // /* MW 3 */ + 5422 "00000011" // /* MW 2 */ + 5423 "00011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1154 89 first + 5424 "00100100" // ADD r3, r3, #-1; ADD.NC m7, r16, #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5425 "11000000" // /* MW 5 */ + 5426 "00010000" // /* MW 4 */ + 5427 "11101110" // /* MW 3 */ + 5428 "11111111" // /* MW 2 */ + 5429 "00011000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1287 37 + 5430 "10111010" // NOPA; NOPB; MOV m1, dj2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5431 "01111110" // /* MW 9 */ + 5432 "10000000" // /* MW 8 */ + 5433 "10000010" // /* MW 7 */ + 5434 "00000000" // /* MW 6 */ + 5435 "00010000" // /* MW 5 */ + 5436 "00000000" // /* MW 4 */ + 5437 "11110000" // /* MW 3 */ + 5438 "00101100" // /* MW 2 */ + 5439 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_976 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 736 8 +.src_ref 2 "conv2d_bf16.h" 738 8 +.src_ref 2 "conv2d_bf16.h" 1147 31 first +.src_ref 2 "conv2d_bf16.h" 1187 40 first +.loop_nesting 1 + 5440 "01110110" // VLDA.CONV.fp32.bf16 cml0, [p6], #64; MOVS p1, r5; LSHL r14, r2, r15; MOV p0, r14 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5441 "01111000" // /* MW 11 */ + 5442 "10010000" // /* MW 10 */ + 5443 "00110011" // /* MW 9 */ + 5444 "11101100" // /* MW 8 */ + 5445 "11100111" // /* MW 7 */ + 5446 "00000100" // /* MW 6 */ + 5447 "00001011" // /* MW 5 */ + 5448 "10000101" // /* MW 4 */ + 5449 "01110001" // /* MW 3 */ + 5450 "10000101" // /* MW 2 */ + 5451 "11000011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 1188 50 first + 5452 "11110110" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc3, dn3; ADD.NC p4, r14, r12 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5453 "10100000" // /* MW 11 */ + 5454 "10011000" // /* MW 10 */ + 5455 "00110011" // /* MW 9 */ + 5456 "00000010" // /* MW 8 */ + 5457 "01001011" // /* MW 7 */ + 5458 "00001110" // /* MW 6 */ + 5459 "00101011" // /* MW 5 */ + 5460 "00101000" // /* MW 4 */ + 5461 "01111000" // /* MW 3 */ + 5462 "10000001" // /* MW 2 */ + 5463 "00100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 742 30 first + 5464 "11110110" // VLDA.POP.576 ex7, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];MOVS dn7, dn6; MOV dj7, dj6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5465 "01110000" // /* MW 11 */ + 5466 "10000000" // /* MW 10 */ + 5467 "11000110" // /* MW 9 */ + 5468 "00000011" // /* MW 8 */ + 5469 "01001011" // /* MW 7 */ + 5470 "01011010" // /* MW 6 */ + 5471 "00101111" // /* MW 5 */ + 5472 "00101000" // /* MW 4 */ + 5473 "01111000" // /* MW 3 */ + 5474 "00111001" // /* MW 2 */ + 5475 "10100000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.src_ref 2 "conv2d_bf16.h" 1149 31 first + 5476 "11110110" // VLDA.CONV.fp32.bf16 cmh0, [p6], m6;VLDB.POP.576 ex6, [p0, lf0, r24];MOVS dn3, r19; MOV m3, m2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5477 "01110000" // /* MW 11 */ + 5478 "00000000" // /* MW 10 */ + 5479 "10000010" // /* MW 9 */ + 5480 "00000001" // /* MW 8 */ + 5481 "00001011" // /* MW 7 */ + 5482 "01010011" // /* MW 6 */ + 5483 "00101011" // /* MW 5 */ + 5484 "00000011" // /* MW 4 */ + 5485 "01110100" // /* MW 3 */ + 5486 "00001101" // /* MW 2 */ + 5487 "11011001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 743 30 first + 5488 "10111010" // VLDA.POP.576 ex8, [p1, lf1, r25, m4];VLDB.POP.576.3D ex11, [p0, lf0, r24, d0]; MOV dj3, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5489 "01011110" // /* MW 9 */ + 5490 "00000000" // /* MW 8 */ + 5491 "11000000" // /* MW 7 */ + 5492 "00000001" // /* MW 6 */ + 5493 "11010100" // /* MW 5 */ + 5494 "00010010" // /* MW 4 */ + 5495 "01110100" // /* MW 3 */ + 5496 "01000001" // /* MW 2 */ + 5497 "01110001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 1152 31 first +.src_ref 2 "conv2d_bf16.h" 1206 8 first + 5498 "10110110" // VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.FILL.512 [p0, lf0, r24]; MOVXM le, #5760 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5499 "00010000" // /* MW 11 */ + 5500 "01000000" // /* MW 10 */ + 5501 "10111011" // /* MW 9 */ + 5502 "00000101" // /* MW 8 */ + 5503 "00000000" // /* MW 7 */ + 5504 "00000000" // /* MW 6 */ + 5505 "00101000" // /* MW 5 */ + 5506 "00101000" // /* MW 4 */ + 5507 "01111000" // /* MW 3 */ + 5508 "10010101" // /* MW 2 */ + 5509 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 1154 31 first +.src_ref 2 "conv2d_bf16.h" 1206 8 + 5510 "10110110" // VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.FILL.512 [p0, lf0, r24]; MOVXM ls, #5712 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5511 "00010000" // /* MW 11 */ + 5512 "00101000" // /* MW 10 */ + 5513 "01111011" // /* MW 9 */ + 5514 "00000100" // /* MW 8 */ + 5515 "00000000" // /* MW 7 */ + 5516 "00000000" // /* MW 6 */ + 5517 "00101000" // /* MW 5 */ + 5518 "00101000" // /* MW 4 */ + 5519 "01111000" // /* MW 3 */ + 5520 "00011101" // /* MW 2 */ + 5521 "11011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 740 30 first + 5522 "00111100" // VLDA.CONV.fp32.bf16 cml3, [p4];VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5523 "00101000" // /* MW 5 */ + 5524 "00000001" // /* MW 4 */ + 5525 "01110100" // /* MW 3 */ + 5526 "10110101" // /* MW 2 */ + 5527 "10000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 1157 31 first + 5528 "00111100" // VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.POP.576.3D ex4, [p0, lf0, r24, d0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5529 "00101000" // /* MW 5 */ + 5530 "00100010" // /* MW 4 */ + 5531 "01111000" // /* MW 3 */ + 5532 "10100101" // /* MW 2 */ + 5533 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 1159 31 first + 5534 "00111100" // VLDA.CONV.fp32.bf16 cmh2, [p6], m6;VLDB.FILL.512 [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5535 "00101000" // /* MW 5 */ + 5536 "00101000" // /* MW 4 */ + 5537 "01111000" // /* MW 3 */ + 5538 "00101101" // /* MW 2 */ + 5539 "11011001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 738 8 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 5540 "00111100" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5541 "00101000" // /* MW 5 */ + 5542 "00101000" // /* MW 4 */ + 5543 "01111000" // /* MW 3 */ + 5544 "10000001" // /* MW 2 */ + 5545 "00100010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.src_ref 2 "conv2d_bf16.h" 1192 29 first +.aggressive_scheduled_block_id 6 +.noswbrkpt + 5546 "00111100" // VLDA.CONV.fp32.bf16 cmh3, [p4], #64;VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5547 "00101000" // /* MW 5 */ + 5548 "00000001" // /* MW 4 */ + 5549 "01110100" // /* MW 3 */ + 5550 "10111101" // /* MW 2 */ + 5551 "10000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 745 30 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5552 "10111010" // VLDA.CONV.fp32.bf16 cmh4, [p4];VLDB.POP.576.3D ex4, [p0, lf0, r24, d0]; VSHUFFLE ex10, ex6, ex11, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5553 "11101110" // /* MW 9 */ + 5554 "11000011" // /* MW 8 */ + 5555 "10011010" // /* MW 7 */ + 5556 "00000010" // /* MW 6 */ + 5557 "00010100" // /* MW 5 */ + 5558 "00010001" // /* MW 4 */ + 5559 "01110100" // /* MW 3 */ + 5560 "11001101" // /* MW 2 */ + 5561 "10000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 1162 81 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5562 "11110110" // VLDA.CONV.fp32.bf16 cml4, [p4];VLDB.FILL.512 [p0, lf0, r24];MOVS p4, p6; VSHUFFLE ex6, ex6, ex11, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5563 "11100000" // /* MW 11 */ + 5564 "11000001" // /* MW 10 */ + 5565 "10011010" // /* MW 9 */ + 5566 "00000001" // /* MW 8 */ + 5567 "10001011" // /* MW 7 */ + 5568 "10011000" // /* MW 6 */ + 5569 "00101100" // /* MW 5 */ + 5570 "00101000" // /* MW 4 */ + 5571 "01111000" // /* MW 3 */ + 5572 "11000101" // /* MW 2 */ + 5573 "10000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 749 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5574 "01001010" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex1, [p1, lf1, r25]; VMAC.f dm0, dm0, ex10, ex7, r9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5575 "11101001" // /* MW 9 */ + 5576 "00010100" // /* MW 8 */ + 5577 "01001000" // /* MW 7 */ + 5578 "00011101" // /* MW 6 */ + 5579 "01010100" // /* MW 5 */ + 5580 "00000000" // /* MW 4 */ + 5581 "01110011" // /* MW 3 */ + 5582 "10000001" // /* MW 2 */ + 5583 "00000010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.src_ref 2 "conv2d_bf16.h" 1286 32 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5584 "01101110" // VLDA.3D.CONV.fp32.bf16 cml3, [p6], d3; MOVS dn3, dn2; MOV dj3, dj5; VMAC.f dm1, dm1, ex6, ex7, r9 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5585 "11101001" // /* MW 13 */ + 5586 "00101100" // /* MW 12 */ + 5587 "01001001" // /* MW 11 */ + 5588 "00000111" // /* MW 10 */ + 5589 "01011000" // /* MW 9 */ + 5590 "01011100" // /* MW 8 */ + 5591 "00000000" // /* MW 7 */ + 5592 "00000000" // /* MW 6 */ + 5593 "10010110" // /* MW 5 */ + 5594 "10010100" // /* MW 4 */ + 5595 "01110110" // /* MW 3 */ + 5596 "00110101" // /* MW 2 */ + 5597 "11001111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 751 26 first +.src_ref 2 "conv2d_bf16.h" 1162 81 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5598 "01101110" // VLDA.CONV.fp32.bf16 cmh3, [p4, #64]; MOVS dc5, dc7; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm2, dm2, ex10, ex8, r9 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5599 "00001001" // /* MW 13 */ + 5600 "01010101" // /* MW 12 */ + 5601 "01001010" // /* MW 11 */ + 5602 "00111110" // /* MW 10 */ + 5603 "10010000" // /* MW 9 */ + 5604 "01001100" // /* MW 8 */ + 5605 "00000000" // /* MW 7 */ + 5606 "00000000" // /* MW 6 */ + 5607 "10010110" // /* MW 5 */ + 5608 "00111000" // /* MW 4 */ + 5609 "01111010" // /* MW 3 */ + 5610 "10111101" // /* MW 2 */ + 5611 "10000010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 1199 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5612 "01101110" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dn2, dc3; VSHUFFLE ex5, ex2, ex4, r0; VADD.f dm0, dm3, dm0, r31 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5613 "00111101" // /* MW 13 */ + 5614 "01100000" // /* MW 12 */ + 5615 "11111000" // /* MW 11 */ + 5616 "00011110" // /* MW 10 */ + 5617 "10010000" // /* MW 9 */ + 5618 "01010100" // /* MW 8 */ + 5619 "00000000" // /* MW 7 */ + 5620 "00000000" // /* MW 6 */ + 5621 "10010110" // /* MW 5 */ + 5622 "10011000" // /* MW 4 */ + 5623 "01110100" // /* MW 3 */ + 5624 "00000001" // /* MW 2 */ + 5625 "01110001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 1200 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5626 "01100010" // VLDA.FILL.512 [p1, lf1, r25]; VADD.f dm1, dm3, dm1, r31 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5627 "00111101" // /* MW 7 */ + 5628 "01100100" // /* MW 6 */ + 5629 "11111001" // /* MW 5 */ + 5630 "00000100" // /* MW 4 */ + 5631 "01110000" // /* MW 3 */ + 5632 "10000001" // /* MW 2 */ + 5633 "00100010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 1201 26 first +.aggressive_scheduled_block_id 6 +.noswbrkpt + 5634 "01100010" // VLDA.POP.576 ex1, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r31 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5635 "00111101" // /* MW 7 */ + 5636 "10001000" // /* MW 6 */ + 5637 "11111010" // /* MW 5 */ + 5638 "00000100" // /* MW 4 */ + 5639 "01110000" // /* MW 3 */ + 5640 "00001001" // /* MW 2 */ + 5641 "10100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5642 "01100010" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; VMAC.f dm3, dm3, ex6, ex8, r9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5643 "00001001" // /* MW 7 */ + 5644 "01101101" // /* MW 6 */ + 5645 "01001011" // /* MW 5 */ + 5646 "00000100" // /* MW 4 */ + 5647 "01110000" // /* MW 3 */ + 5648 "00000001" // /* MW 2 */ + 5649 "01110001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5650 "00111100" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5651 "00101000" // /* MW 5 */ + 5652 "00000001" // /* MW 4 */ + 5653 "01110100" // /* MW 3 */ + 5654 "10000001" // /* MW 2 */ + 5655 "00100010" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 978 11 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5656 "00011000" // VLDB.POP.576.3D ex4, [p0, lf0, r24, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5657 "00010100" // /* MW 3 */ + 5658 "00010001" // /* MW 2 */ + 5659 "00111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 1202 26 first +.src_ref 2 "conv2d_bf16.h" 1206 8 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5660 "01100110" // VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24]; ADD.NC lc, r4, #-5; VADD.f dm3, dm4, dm3, r31 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5661 "00111101" // /* MW 11 */ + 5662 "10001100" // /* MW 10 */ + 5663 "11111011" // /* MW 9 */ + 5664 "10000010" // /* MW 8 */ + 5665 "01111101" // /* MW 7 */ + 5666 "01110010" // /* MW 6 */ + 5667 "00101101" // /* MW 5 */ + 5668 "00101000" // /* MW 4 */ + 5669 "01111000" // /* MW 3 */ + 5670 "00001001" // /* MW 2 */ + 5671 "10100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 749 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5672 "01001010" // VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24]; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5673 "00101001" // /* MW 9 */ + 5674 "00000110" // /* MW 8 */ + 5675 "10100000" // /* MW 7 */ + 5676 "00011101" // /* MW 6 */ + 5677 "00010100" // /* MW 5 */ + 5678 "00010100" // /* MW 4 */ + 5679 "01110100" // /* MW 3 */ + 5680 "00000001" // /* MW 2 */ + 5681 "01110001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.src_ref 2 "conv2d_bf16.h" 751 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5682 "01001110" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24]; NOPX; MOV dj5, r21; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5683 "00001001" // /* MW 13 */ + 5684 "01000110" // /* MW 12 */ + 5685 "10100010" // /* MW 11 */ + 5686 "00001111" // /* MW 10 */ + 5687 "10101010" // /* MW 9 */ + 5688 "01011000" // /* MW 8 */ + 5689 "00000000" // /* MW 7 */ + 5690 "00000000" // /* MW 6 */ + 5691 "00101000" // /* MW 5 */ + 5692 "00000001" // /* MW 4 */ + 5693 "01110100" // /* MW 3 */ + 5694 "10000001" // /* MW 2 */ + 5695 "00100010" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5696 "01001011" // NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5697 "01010001" // /* MW 15 */ + 5698 "00001001" // /* MW 14 */ + 5699 "11101101" // /* MW 13 */ + 5700 "00000011" // /* MW 12 */ + 5701 "11001001" // /* MW 11 */ + 5702 "00000000" // /* MW 10 */ + 5703 "00000000" // /* MW 9 */ + 5704 "00000000" // /* MW 8 */ + 5705 "01011011" // /* MW 7 */ + 5706 "00000001" // /* MW 6 */ + 5707 "00101000" // /* MW 5 */ + 5708 "00100010" // /* MW 4 */ + 5709 "11111000" // /* MW 3 */ + 5710 "00101100" // /* MW 2 */ + 5711 "00000000" // /* MW 1 */ +.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1248 +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.begin_of_loop +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt +.loop_nesting 2 + 5712 "01001011" // VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5713 "01010000" // /* MW 15 */ + 5714 "00011011" // /* MW 14 */ + 5715 "11101101" // /* MW 13 */ + 5716 "00000001" // /* MW 12 */ + 5717 "01001001" // /* MW 11 */ + 5718 "00000001" // /* MW 10 */ + 5719 "00000000" // /* MW 9 */ + 5720 "00000000" // /* MW 8 */ + 5721 "01011011" // /* MW 7 */ + 5722 "00000001" // /* MW 6 */ + 5723 "00101000" // /* MW 5 */ + 5724 "00101000" // /* MW 4 */ + 5725 "01111000" // /* MW 3 */ + 5726 "00001001" // /* MW 2 */ + 5727 "10100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 749 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5728 "01001011" // VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5729 "00110001" // /* MW 15 */ + 5730 "00000000" // /* MW 14 */ + 5731 "01111101" // /* MW 13 */ + 5732 "10100101" // /* MW 12 */ + 5733 "00000001" // /* MW 11 */ + 5734 "00000000" // /* MW 10 */ + 5735 "00000000" // /* MW 9 */ + 5736 "00000000" // /* MW 8 */ + 5737 "01011011" // /* MW 7 */ + 5738 "00000001" // /* MW 6 */ + 5739 "00101000" // /* MW 5 */ + 5740 "00101000" // /* MW 4 */ + 5741 "01111000" // /* MW 3 */ + 5742 "00000001" // /* MW 2 */ + 5743 "01110001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.src_ref 2 "conv2d_bf16.h" 751 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5744 "01001011" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5745 "00110000" // /* MW 15 */ + 5746 "00010010" // /* MW 14 */ + 5747 "01111101" // /* MW 13 */ + 5748 "10100101" // /* MW 12 */ + 5749 "00000001" // /* MW 11 */ + 5750 "00000000" // /* MW 10 */ + 5751 "00000000" // /* MW 9 */ + 5752 "00000000" // /* MW 8 */ + 5753 "01011011" // /* MW 7 */ + 5754 "00000001" // /* MW 6 */ + 5755 "00101000" // /* MW 5 */ + 5756 "00000001" // /* MW 4 */ + 5757 "01110100" // /* MW 3 */ + 5758 "10000001" // /* MW 2 */ + 5759 "00100010" // /* MW 1 */ +.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1296 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.end_of_loop +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5760 "01001011" // NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5761 "01010001" // /* MW 15 */ + 5762 "00001001" // /* MW 14 */ + 5763 "11101101" // /* MW 13 */ + 5764 "00000011" // /* MW 12 */ + 5765 "11001001" // /* MW 11 */ + 5766 "00000000" // /* MW 10 */ + 5767 "00000000" // /* MW 9 */ + 5768 "00000000" // /* MW 8 */ + 5769 "01011011" // /* MW 7 */ + 5770 "00000001" // /* MW 6 */ + 5771 "00101000" // /* MW 5 */ + 5772 "00100010" // /* MW 4 */ + 5773 "11111000" // /* MW 3 */ + 5774 "00101100" // /* MW 2 */ + 5775 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 5776 "01101110" // VLDA.POP.576 ex1, [p1, lf1, r25]; MOVS dn6, dn7; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5777 "00001001" // /* MW 13 */ + 5778 "01101010" // /* MW 12 */ + 5779 "10100011" // /* MW 11 */ + 5780 "00011110" // /* MW 10 */ + 5781 "10010000" // /* MW 9 */ + 5782 "01010100" // /* MW 8 */ + 5783 "00000000" // /* MW 7 */ + 5784 "00000000" // /* MW 6 */ + 5785 "10010110" // /* MW 5 */ + 5786 "10111100" // /* MW 4 */ + 5787 "01111100" // /* MW 3 */ + 5788 "00001001" // /* MW 2 */ + 5789 "10100000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 749 26 first +.src_ref 2 "conv2d_bf16.h" 1286 32 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5790 "01101110" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dc7, dn3; MOV dj7, dj3; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5791 "00101001" // /* MW 13 */ + 5792 "00000110" // /* MW 12 */ + 5793 "10100000" // /* MW 11 */ + 5794 "00000111" // /* MW 10 */ + 5795 "00111000" // /* MW 9 */ + 5796 "01111100" // /* MW 8 */ + 5797 "00000000" // /* MW 7 */ + 5798 "00000000" // /* MW 6 */ + 5799 "10010110" // /* MW 5 */ + 5800 "00011100" // /* MW 4 */ + 5801 "01111110" // /* MW 3 */ + 5802 "00000001" // /* MW 2 */ + 5803 "01110001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 751 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5804 "01001010" // MOVS dc3, p3; MOV r5, dj2; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5805 "00001001" // /* MW 9 */ + 5806 "01000110" // /* MW 8 */ + 5807 "10100010" // /* MW 7 */ + 5808 "11100100" // /* MW 6 */ + 5809 "00000000" // /* MW 5 */ + 5810 "01010101" // /* MW 4 */ + 5811 "01100001" // /* MW 3 */ + 5812 "10010001" // /* MW 2 */ + 5813 "01100001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5814 "01001010" // MOVS dn3, r22; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5815 "00101001" // /* MW 9 */ + 5816 "00101010" // /* MW 8 */ + 5817 "10100001" // /* MW 7 */ + 5818 "11000100" // /* MW 6 */ + 5819 "00000111" // /* MW 5 */ + 5820 "10010010" // /* MW 4 */ + 5821 "01100001" // /* MW 3 */ + 5822 "11000001" // /* MW 2 */ + 5823 "01101010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5824 "01001010" // MOVS dn7, r28; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5825 "00001001" // /* MW 9 */ + 5826 "01101010" // /* MW 8 */ + 5827 "10100011" // /* MW 7 */ + 5828 "11000100" // /* MW 6 */ + 5829 "00000011" // /* MW 5 */ + 5830 "10010010" // /* MW 4 */ + 5831 "01100010" // /* MW 3 */ + 5832 "10000001" // /* MW 2 */ + 5833 "11101011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 749 26 first +.src_ref 2 "conv2d_bf16.h" 1285 32 first +.src_ref 2 "conv2d_bf16.h" 1286 32 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5834 "01100110" // PADDB [p7], m5; MOVS p5, p7; MOV dj2, dj7; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5835 "00101001" // /* MW 11 */ + 5836 "00000110" // /* MW 10 */ + 5837 "10100000" // /* MW 9 */ + 5838 "11100110" // /* MW 8 */ + 5839 "00000000" // /* MW 7 */ + 5840 "10001111" // /* MW 6 */ + 5841 "00100010" // /* MW 5 */ + 5842 "01010111" // /* MW 4 */ + 5843 "01101111" // /* MW 3 */ + 5844 "10010001" // /* MW 2 */ + 5845 "10110011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 751 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5846 "01001010" // MOVS p4, p7; MOV m2, m3; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5847 "00001001" // /* MW 9 */ + 5848 "01000110" // /* MW 8 */ + 5849 "10100010" // /* MW 7 */ + 5850 "11100100" // /* MW 6 */ + 5851 "00000000" // /* MW 5 */ + 5852 "00000110" // /* MW 4 */ + 5853 "01100010" // /* MW 3 */ + 5854 "10010001" // /* MW 2 */ + 5855 "10010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5856 "01100010" // VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5857 "00101001" // /* MW 7 */ + 5858 "00101010" // /* MW 6 */ + 5859 "10100001" // /* MW 5 */ + 5860 "11000110" // /* MW 4 */ + 5861 "00000011" // /* MW 3 */ + 5862 "10010010" // /* MW 2 */ + 5863 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5864 "01100010" // VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5865 "00001001" // /* MW 7 */ + 5866 "01101010" // /* MW 6 */ + 5867 "10100011" // /* MW 5 */ + 5868 "11000110" // /* MW 4 */ + 5869 "00000111" // /* MW 3 */ + 5870 "10010010" // /* MW 2 */ + 5871 "00000001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 + 5872 "11111000" // MOV dj7, dj5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5873 "00000000" // /* MW 3 */ + 5874 "10001011" // /* MW 2 */ + 5875 "00011111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 750 26 first + 5876 "01100010" // MOV m3, r23; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5877 "00101001" // /* MW 7 */ + 5878 "00101010" // /* MW 6 */ + 5879 "10100001" // /* MW 5 */ + 5880 "11100110" // /* MW 4 */ + 5881 "10100000" // /* MW 3 */ + 5882 "00001011" // /* MW 2 */ + 5883 "00000011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 749 26 first + 5884 "01100010" // MOV dj3, r17; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5885 "00101001" // /* MW 7 */ + 5886 "00000110" // /* MW 6 */ + 5887 "10100000" // /* MW 5 */ + 5888 "11100110" // /* MW 4 */ + 5889 "10100000" // /* MW 3 */ + 5890 "10001000" // /* MW 2 */ + 5891 "00000011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.src_ref 2 "conv2d_bf16.h" 1286 32 + 5892 "01001010" // PADDB.3D [p0], d3; MOV m3, dj2; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5893 "00001001" // /* MW 9 */ + 5894 "01101010" // /* MW 8 */ + 5895 "10100011" // /* MW 7 */ + 5896 "11100110" // /* MW 6 */ + 5897 "00000000" // /* MW 5 */ + 5898 "00000101" // /* MW 4 */ + 5899 "00100011" // /* MW 3 */ + 5900 "11110111" // /* MW 2 */ + 5901 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 751 26 first +.src_ref 2 "conv2d_bf16.h" 1286 32 first + 5902 "01100110" // PADDB [p7], m3; MOVS p3, dc3; MOV dj5, r5; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5903 "00001001" // /* MW 11 */ + 5904 "01000110" // /* MW 10 */ + 5905 "10100010" // /* MW 9 */ + 5906 "11100110" // /* MW 8 */ + 5907 "10100000" // /* MW 7 */ + 5908 "10000010" // /* MW 6 */ + 5909 "00100101" // /* MW 5 */ + 5910 "11010111" // /* MW 4 */ + 5911 "01101110" // /* MW 3 */ + 5912 "10001001" // /* MW 2 */ + 5913 "01110001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 + 5914 "00000010" // MOVS dc3, dc5; MOV dj7, dj5 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5915 "01110000" // /* MW 7 */ + 5916 "10000000" // /* MW 6 */ + 5917 "11000101" // /* MW 5 */ + 5918 "00000011" // /* MW 4 */ + 5919 "01100000" // /* MW 3 */ + 5920 "10001001" // /* MW 2 */ + 5921 "01100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 1287 37 + 5922 "00000010" // MOVS dc5, r2; MOV m3, m1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5923 "01110000" // /* MW 7 */ + 5924 "00000000" // /* MW 6 */ + 5925 "10000001" // /* MW 5 */ + 5926 "00000001" // /* MW 4 */ + 5927 "01100000" // /* MW 3 */ + 5928 "01000001" // /* MW 2 */ + 5929 "10100000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first + 5930 "00000010" // VCONV.bf16.fp32 x11, cml1; MOV m1, r29 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5931 "01110000" // /* MW 7 */ + 5932 "01010000" // /* MW 6 */ + 5933 "10000111" // /* MW 5 */ + 5934 "00000000" // /* MW 4 */ + 5935 "11000000" // /* MW 3 */ + 5936 "00010010" // /* MW 2 */ + 5937 "10110010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 + 5938 "00000010" // VCONV.bf16.fp32 x10, cml0; MOV dj5, r30 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5939 "01110000" // /* MW 7 */ + 5940 "10010000" // /* MW 6 */ + 5941 "11000111" // /* MW 5 */ + 5942 "00000010" // /* MW 4 */ + 5943 "11000000" // /* MW 3 */ + 5944 "00000010" // /* MW 2 */ + 5945 "10100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 first +.src_ref 2 "conv2d_bf16.h" 736 8 +.src_ref 2 "conv2d_bf16.h" 1287 37 + 5946 "10111010" // PADDB.3D [p1], d1; MOVS p0, p7; MOV r14, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5947 "01110110" // /* MW 9 */ + 5948 "01100000" // /* MW 8 */ + 5949 "11001000" // /* MW 7 */ + 5950 "00000001" // /* MW 6 */ + 5951 "10010000" // /* MW 5 */ + 5952 "00111011" // /* MW 4 */ + 5953 "01100001" // /* MW 3 */ + 5954 "10010001" // /* MW 2 */ + 5955 "00010011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 2 "conv2d_bf16.h" 1287 37 + 5956 "00000010" // VCONV.bf16.fp32 x6, cmh0; MOV m1, m3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5957 "01110000" // /* MW 7 */ + 5958 "00000000" // /* MW 6 */ + 5959 "10000011" // /* MW 5 */ + 5960 "00000000" // /* MW 4 */ + 5961 "11000000" // /* MW 3 */ + 5962 "00001010" // /* MW 2 */ + 5963 "01100010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 1218 20 first +.src_ref 2 "conv2d_bf16.h" 1287 37 first + 5964 "00110110" // PADDB [p0], m1; VCONV.bf16.fp32 x5, cml2; JZ r18, #6096 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6096 delay_slots=5 */ + 5965 "01100000" // /* MW 11 */ + 5966 "00000000" // /* MW 10 */ + 5967 "00000000" // /* MW 9 */ + 5968 "11111010" // /* MW 8 */ + 5969 "00000010" // /* MW 7 */ + 5970 "00100100" // /* MW 6 */ + 5971 "00100000" // /* MW 5 */ + 5972 "01010111" // /* MW 4 */ + 5973 "11000000" // /* MW 3 */ + 5974 "00100010" // /* MW 2 */ + 5975 "01010010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 2 "conv2d_bf16.h" 738 8 +.delay_slot + 5976 "00000010" // VCONV.bf16.fp32 x7, cmh1; MOV r5, p1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5977 "01110000" // /* MW 7 */ + 5978 "01100000" // /* MW 6 */ + 5979 "10101001" // /* MW 5 */ + 5980 "00000000" // /* MW 4 */ + 5981 "11000000" // /* MW 3 */ + 5982 "00011010" // /* MW 2 */ + 5983 "01110010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 5984 "00000010" // VCONV.bf16.fp32 x8, cml3; MOV dn7, dc7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5985 "01110000" // /* MW 7 */ + 5986 "11000000" // /* MW 6 */ + 5987 "10100111" // /* MW 5 */ + 5988 "00000011" // /* MW 4 */ + 5989 "11000000" // /* MW 3 */ + 5990 "00110010" // /* MW 2 */ + 5991 "10000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 5992 "10111010" // PADDB [p5], m1; VCONV.bf16.fp32 x1, cmh3; MOV p1, p5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5993 "01110110" // /* MW 9 */ + 5994 "01100000" // /* MW 8 */ + 5995 "10110101" // /* MW 7 */ + 5996 "00000000" // /* MW 6 */ + 5997 "10010000" // /* MW 5 */ + 5998 "00101011" // /* MW 4 */ + 5999 "11000101" // /* MW 3 */ + 6000 "00111010" // /* MW 2 */ + 6001 "00010010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 2 "conv2d_bf16.h" 1286 32 +.delay_slot + 6002 "00000010" // VCONV.bf16.fp32 x2, cmh2; MOV dj5, dj2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6003 "01110000" // /* MW 7 */ + 6004 "10000000" // /* MW 6 */ + 6005 "11000010" // /* MW 5 */ + 6006 "00000010" // /* MW 4 */ + 6007 "11000000" // /* MW 3 */ + 6008 "00101010" // /* MW 2 */ + 6009 "00100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 1187 40 +.delay_slot + 6010 "00000010" // MOVS dc7, dc3; MOV r2, dc5 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6011 "01110000" // /* MW 7 */ + 6012 "11000000" // /* MW 6 */ + 6013 "01001101" // /* MW 5 */ + 6014 "00000000" // /* MW 4 */ + 6015 "01100000" // /* MW 3 */ + 6016 "10001001" // /* MW 2 */ + 6017 "11100001" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first + 6018 "11111000" // VMAX_LT.bf16 x11, r16, x11, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6019 "11101100" // /* MW 3 */ + 6020 "11011100" // /* MW 2 */ + 6021 "00011101" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 + 6022 "11111000" // VMAX_LT.bf16 x7, r16, x7, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6023 "11101100" // /* MW 3 */ + 6024 "10111100" // /* MW 2 */ + 6025 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.src_ref 4 "max_min.hpp" 20 104 + 6026 "00000010" // VST x11, [p1, dj7]; VMAX_LT.bf16 x10, r16, x10, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6027 "01110000" // /* MW 7 */ + 6028 "01110110" // /* MW 6 */ + 6029 "10101010" // /* MW 5 */ + 6030 "00000010" // /* MW 4 */ + 6031 "01100000" // /* MW 3 */ + 6032 "01011010" // /* MW 2 */ + 6033 "00111100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first + 6034 "00000010" // VST x7, [p5, #64]; VMAX_LT.bf16 x7, r16, x6, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6035 "01110000" // /* MW 7 */ + 6036 "01110110" // /* MW 6 */ + 6037 "11011010" // /* MW 5 */ + 6038 "00000001" // /* MW 4 */ + 6039 "01100000" // /* MW 3 */ + 6040 "10111010" // /* MW 2 */ + 6041 "10100010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first + 6042 "00111010" // VST x10, [p1]; J #6128 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=6128 delay_slots=5 */ + 6043 "00100001" // /* MW 9 */ + 6044 "00000000" // /* MW 8 */ + 6045 "00000000" // /* MW 7 */ + 6046 "11111110" // /* MW 6 */ + 6047 "00000010" // /* MW 5 */ + 6048 "00000000" // /* MW 4 */ + 6049 "01100000" // /* MW 3 */ + 6050 "11010010" // /* MW 2 */ + 6051 "00100000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 6052 "00000010" // VST x7, [p1, #64]; VMAX_LT.bf16 x10, r16, x8, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6053 "01110000" // /* MW 7 */ + 6054 "01110110" // /* MW 6 */ + 6055 "10100010" // /* MW 5 */ + 6056 "00000010" // /* MW 4 */ + 6057 "01100000" // /* MW 3 */ + 6058 "10111010" // /* MW 2 */ + 6059 "00100010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 +.delay_slot + 6060 "11111000" // VMAX_LT.bf16 x7, r16, x1, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6061 "11101100" // /* MW 3 */ + 6062 "10001100" // /* MW 2 */ + 6063 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.src_ref 4 "max_min.hpp" 20 104 +.delay_slot + 6064 "00000010" // VST x10, [p0]; VMAX_LT.bf16 x10, r16, x5, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6065 "01110000" // /* MW 7 */ + 6066 "01110110" // /* MW 6 */ + 6067 "10010110" // /* MW 5 */ + 6068 "00000010" // /* MW 4 */ + 6069 "01100000" // /* MW 3 */ + 6070 "11010010" // /* MW 2 */ + 6071 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 6072 "00000010" // VST x7, [p0, #64]; VMAX_LT.bf16 x2, r16, x2, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6073 "01110000" // /* MW 7 */ + 6074 "01110110" // /* MW 6 */ + 6075 "10001010" // /* MW 5 */ + 6076 "00000000" // /* MW 4 */ + 6077 "01100000" // /* MW 3 */ + 6078 "10111010" // /* MW 2 */ + 6079 "00000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.delay_slot + 6080 "11100001" // NOPA; NOPB; VST x10, [p4, dj5]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6081 "00000000" // /* MW 15 */ + 6082 "00000000" // /* MW 14 */ + 6083 "01111000" // /* MW 13 */ + 6084 "10100101" // /* MW 12 */ + 6085 "00000001" // /* MW 11 */ + 6086 "00000000" // /* MW 10 */ + 6087 "00000000" // /* MW 9 */ + 6088 "00000000" // /* MW 8 */ + 6089 "10010011" // /* MW 7 */ + 6090 "10100010" // /* MW 6 */ + 6091 "00100100" // /* MW 5 */ + 6092 "00000000" // /* MW 4 */ + 6093 "11110000" // /* MW 3 */ + 6094 "00101100" // /* MW 2 */ + 6095 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1632 +.src_ref 4 "vector.hpp" 1152 43 + 6096 "00011000" // VST.CONV.bf16.fp32 cml1, [p1, dj7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6097 "10100011" // /* MW 3 */ + 6098 "11100000" // /* MW 2 */ + 6099 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6100 "00011000" // VST.CONV.bf16.fp32 cmh1, [p5, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6101 "11100011" // /* MW 3 */ + 6102 "00010100" // /* MW 2 */ + 6103 "00001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6104 "00011000" // VST.CONV.bf16.fp32 cml0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6105 "00100011" // /* MW 3 */ + 6106 "00000100" // /* MW 2 */ + 6107 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6108 "00011000" // VST.CONV.bf16.fp32 cmh0, [p1, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6109 "01100011" // /* MW 3 */ + 6110 "00010100" // /* MW 2 */ + 6111 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6112 "00011000" // VST x8, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6113 "00010011" // /* MW 3 */ + 6114 "00000110" // /* MW 2 */ + 6115 "00001000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6116 "00011000" // VST.CONV.bf16.fp32 cmh3, [p0, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6117 "11100011" // /* MW 3 */ + 6118 "00010101" // /* MW 2 */ + 6119 "00001000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6120 "00000010" // VST.CONV.bf16.fp32 cml2, [p4, dj5]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6121 "01110000" // /* MW 7 */ + 6122 "10100101" // /* MW 6 */ + 6123 "00000001" // /* MW 5 */ + 6124 "00000000" // /* MW 4 */ + 6125 "01100000" // /* MW 3 */ + 6126 "00100100" // /* MW 2 */ + 6127 "10010100" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1664 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 1143 12 first + 6128 "00110110" // PADDB [p7], m5; VST x2, [p7, #64]; JNZD r3, r3, p2; MOV dj2, #0 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 6129 "01011000" // /* MW 11 */ + 6130 "00000000" // /* MW 10 */ + 6131 "01000000" // /* MW 9 */ + 6132 "00000001" // /* MW 8 */ + 6133 "00110101" // /* MW 7 */ + 6134 "00000110" // /* MW 6 */ + 6135 "00100000" // /* MW 5 */ + 6136 "01010111" // /* MW 4 */ + 6137 "01101111" // /* MW 3 */ + 6138 "10010010" // /* MW 2 */ + 6139 "11100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.delay_slot + 6140 "11111000" // MOV dn3, dn2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6141 "10000000" // /* MW 3 */ + 6142 "01000100" // /* MW 2 */ + 6143 "00011011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.delay_slot + 6144 "11111000" // MOV dn2, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6145 "10100000" // /* MW 3 */ + 6146 "01001001" // /* MW 2 */ + 6147 "00011010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.delay_slot + 6148 "11110100" // PADDB.3D [p7], d2; MOV dj2, dj7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6149 "00000001" // /* MW 5 */ + 6150 "00011110" // /* MW 4 */ + 6151 "00000101" // /* MW 3 */ + 6152 "01110010" // /* MW 2 */ + 6153 "11101011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.delay_slot + 6154 "11111000" // MOV dn2, dn7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6155 "10000000" // /* MW 3 */ + 6156 "01001110" // /* MW 2 */ + 6157 "00011010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6159 "00000000" // /* MW 1 */ +.loop_nesting 0 + 6160 "10000100" // J #6832 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6832 delay_slots=5 */ + 6161 "00000000" // /* MW 5 */ + 6162 "00000000" // /* MW 4 */ + 6163 "01011000" // /* MW 3 */ + 6164 "00001101" // /* MW 2 */ + 6165 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6167 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6168 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6169 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6171 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6173 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6175 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1712 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 792 8 +.src_ref 2 "conv2d_bf16.h" 1364 80 +.src_ref 2 "conv2d_bf16.h" 1364 80 + 6176 "01110110" // LDA r31, [sp, #-40]; MOVS dc2, p3; MOVX r14, #136; MOV p1, r14 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6177 "01111000" // /* MW 11 */ + 6178 "10010000" // /* MW 10 */ + 6179 "10110011" // /* MW 9 */ + 6180 "00001000" // /* MW 8 */ + 6181 "11100001" // /* MW 7 */ + 6182 "00000100" // /* MW 6 */ + 6183 "10001011" // /* MW 5 */ + 6184 "00001100" // /* MW 4 */ + 6185 "00100010" // /* MW 3 */ + 6186 "01111110" // /* MW 2 */ + 6187 "11111011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 1369 80 + 6188 "01110110" // MOVA m4, #60; MOVS dn2, r22; MOVX crRnd, r13; MOV dc6, dn2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6189 "01111000" // /* MW 11 */ + 6190 "01000000" // /* MW 10 */ + 6191 "01100010" // /* MW 9 */ + 6192 "00000011" // /* MW 8 */ + 6193 "11010100" // /* MW 7 */ + 6194 "00011011" // /* MW 6 */ + 6195 "00001011" // /* MW 5 */ + 6196 "01010110" // /* MW 4 */ + 6197 "10000010" // /* MW 3 */ + 6198 "10010000" // /* MW 2 */ + 6199 "00000111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 807 26 +.src_ref 2 "conv2d_bf16.h" 808 26 +.src_ref 2 "conv2d_bf16.h" 809 26 +.src_ref 2 "conv2d_bf16.h" 810 26 +.src_ref 2 "conv2d_bf16.h" 1436 26 +.src_ref 2 "conv2d_bf16.h" 1437 26 +.src_ref 2 "conv2d_bf16.h" 1438 26 +.src_ref 2 "conv2d_bf16.h" 1439 26 + 6200 "10111010" // MOVA r20, #60; MOVX r19, #780; MOV m2, r23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6201 "01111000" // /* MW 9 */ + 6202 "11010000" // /* MW 8 */ + 6203 "00000101" // /* MW 7 */ + 6204 "10001001" // /* MW 6 */ + 6205 "00110001" // /* MW 5 */ + 6206 "00011001" // /* MW 4 */ + 6207 "00000000" // /* MW 3 */ + 6208 "10010100" // /* MW 2 */ + 6209 "00000111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 802 83 +.src_ref 2 "conv2d_bf16.h" 1428 39 + 6210 "01110110" // MOVA m6, #-132; MOVS dn6, r28; MOVX r18, #6; MOV dj5, r30 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6211 "01111000" // /* MW 11 */ + 6212 "10010000" // /* MW 10 */ + 6213 "11000111" // /* MW 9 */ + 6214 "11001010" // /* MW 8 */ + 6215 "00100000" // /* MW 7 */ + 6216 "00000001" // /* MW 6 */ + 6217 "00001011" // /* MW 5 */ + 6218 "01011100" // /* MW 4 */ + 6219 "10000110" // /* MW 3 */ + 6220 "10011000" // /* MW 2 */ + 6221 "11101111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 792 8 +.src_ref 2 "conv2d_bf16.h" 794 8 + 6222 "01110110" // LDA p0, [sp, #-44]; MOVS dc5, r2; MOVX r25, #0; MOV m1, r29 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6223 "01111000" // /* MW 11 */ + 6224 "01010000" // /* MW 10 */ + 6225 "10000111" // /* MW 9 */ + 6226 "00001000" // /* MW 8 */ + 6227 "10010000" // /* MW 7 */ + 6228 "00000001" // /* MW 6 */ + 6229 "00001011" // /* MW 5 */ + 6230 "00000010" // /* MW 4 */ + 6231 "00100101" // /* MW 3 */ + 6232 "10000011" // /* MW 2 */ + 6233 "11111010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 794 8 +.src_ref 2 "conv2d_bf16.h" 1455 20 + 6234 "10111010" // LDA r21, [sp, #-36]; MOVX r24, #0; MOV dj6, r21 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6235 "01111000" // /* MW 9 */ + 6236 "01010000" // /* MW 8 */ + 6237 "01000101" // /* MW 7 */ + 6238 "00001011" // /* MW 6 */ + 6239 "10000000" // /* MW 5 */ + 6240 "00000001" // /* MW 4 */ + 6241 "00100000" // /* MW 3 */ + 6242 "11010110" // /* MW 2 */ + 6243 "11111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1337 12 + 6244 "10111010" // LDA r13, [sp, #-32]; MOVXM p2, #6320 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6245 "00010000" // /* MW 9 */ + 6246 "01011000" // /* MW 8 */ + 6247 "00110100" // /* MW 7 */ + 6248 "00000101" // /* MW 6 */ + 6249 "00000000" // /* MW 5 */ + 6250 "00000000" // /* MW 4 */ + 6251 "00100000" // /* MW 3 */ + 6252 "00110110" // /* MW 2 */ + 6253 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 80 first +.src_ref 2 "conv2d_bf16.h" 1873 + 6254 "10010100" // LDA lr, [sp, #-28]; ADD.NC p3, r31, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6255 "01110010" // /* MW 5 */ + 6256 "11011111" // /* MW 4 */ + 6257 "00100110" // /* MW 3 */ + 6258 "10000111" // /* MW 2 */ + 6259 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 80 + 6260 "10011000" // LDA dj3, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6261 "11000110" // /* MW 3 */ + 6262 "00011101" // /* MW 2 */ + 6263 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1369 80 first + 6264 "10011000" // LDA m4, [p3], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6265 "00000110" // /* MW 3 */ + 6266 "10001010" // /* MW 2 */ + 6267 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 799 87 first + 6268 "10011000" // LDA m5, [p3], #-28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6269 "10000110" // /* MW 3 */ + 6270 "10011110" // /* MW 2 */ + 6271 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 801 83 first + 6272 "10011000" // LDA r22, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6273 "11010110" // /* MW 3 */ + 6274 "00011110" // /* MW 2 */ + 6275 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 802 83 first + 6276 "10011000" // LDA r23, [p3], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6277 "11110110" // /* MW 3 */ + 6278 "11001010" // /* MW 2 */ + 6279 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1337 66 first + 6280 "10011000" // LDA r29, [p3, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6281 "10110110" // /* MW 3 */ + 6282 "00010111" // /* MW 2 */ + 6283 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1443 71 first + 6284 "10011000" // LDA r28, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6285 "10010110" // /* MW 3 */ + 6286 "00000111" // /* MW 2 */ + 6287 "00000011" // /* MW 1 */ + 6288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6289 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 1369 89 + 6290 "11111000" // MOV r30, m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6291 "00000000" // /* MW 3 */ + 6292 "10011000" // /* MW 2 */ + 6293 "00011111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 89 +.src_ref 2 "conv2d_bf16.h" 1518 37 + 6294 "11111000" // MOV m6, dj3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6295 "00000000" // /* MW 3 */ + 6296 "00000111" // /* MW 2 */ + 6297 "00011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 89 + 6298 "11111000" // MOV r31, m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6299 "00000000" // /* MW 3 */ + 6300 "11011100" // /* MW 2 */ + 6301 "00011111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 89 first + 6302 "00011000" // ADD.NC m3, r31, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6303 "11100000" // /* MW 3 */ + 6304 "00001111" // /* MW 2 */ + 6305 "00011011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1369 89 first + 6306 "00100100" // ADD r29, r29, #-1; ADD.NC m7, r30, #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6307 "11000000" // /* MW 5 */ + 6308 "00011110" // /* MW 4 */ + 6309 "11101110" // /* MW 3 */ + 6310 "01111111" // /* MW 2 */ + 6311 "11101111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 + 6312 "00000010" // NOPS; MOV dj7, r30 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6313 "01110000" // /* MW 7 */ + 6314 "10010000" // /* MW 6 */ + 6315 "11000111" // /* MW 5 */ + 6316 "00000011" // /* MW 4 */ + 6317 "01100000" // /* MW 3 */ + 6318 "00101011" // /* MW 2 */ + 6319 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1856 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 792 8 first +.src_ref 2 "conv2d_bf16.h" 1362 31 first +.src_ref 2 "conv2d_bf16.h" 1429 50 +.src_ref 2 "conv2d_bf16.h" 1443 16 first +.loop_nesting 1 + 6320 "01111110" // VLDA.CONV.fp32.bf16 cml0, [p6], #64;VLDB.FILL.512 [p1, lf1, r25];MOVS p3, r12; MOVXM ls, #6496 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 6321 "01100000" // /* MW 13 */ + 6322 "10000001" // /* MW 12 */ + 6323 "01110001" // /* MW 11 */ + 6324 "00000010" // /* MW 10 */ + 6325 "10010110" // /* MW 9 */ + 6326 "10001111" // /* MW 8 */ + 6327 "00000000" // /* MW 7 */ + 6328 "00000000" // /* MW 6 */ + 6329 "00101000" // /* MW 5 */ + 6330 "00101000" // /* MW 4 */ + 6331 "01111010" // /* MW 3 */ + 6332 "10000101" // /* MW 2 */ + 6333 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 793 8 first +.src_ref 2 "conv2d_bf16.h" 1364 31 first +.src_ref 2 "conv2d_bf16.h" 1443 16 + 6334 "10110110" // VLDA.CONV.fp32.bf16 cmh0, [p6], m3;VLDB.FILL.512 [p1, lf1, r25]; MOVXM le, #6544 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6335 "00010000" // /* MW 11 */ + 6336 "11001000" // /* MW 10 */ + 6337 "10111100" // /* MW 9 */ + 6338 "00000101" // /* MW 8 */ + 6339 "00000000" // /* MW 7 */ + 6340 "00000000" // /* MW 6 */ + 6341 "00101000" // /* MW 5 */ + 6342 "00101000" // /* MW 4 */ + 6343 "01111010" // /* MW 3 */ + 6344 "00001101" // /* MW 2 */ + 6345 "11001101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 794 8 first +.src_ref 2 "conv2d_bf16.h" 795 30 first +.src_ref 2 "conv2d_bf16.h" 1428 39 first +.src_ref 2 "conv2d_bf16.h" 1443 16 first + 6346 "10110110" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex10, [p1, lf1, r25]; LSHL r30, r2, r18; ADD.NC lc, r28, #-3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6347 "01001000" // /* MW 11 */ + 6348 "00111111" // /* MW 10 */ + 6349 "10111111" // /* MW 9 */ + 6350 "01101110" // /* MW 8 */ + 6351 "11101001" // /* MW 7 */ + 6352 "00000101" // /* MW 6 */ + 6353 "00101000" // /* MW 5 */ + 6354 "00000101" // /* MW 4 */ + 6355 "01110110" // /* MW 3 */ + 6356 "10000001" // /* MW 2 */ + 6357 "00000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 796 30 first +.src_ref 2 "conv2d_bf16.h" 799 30 first +.src_ref 2 "conv2d_bf16.h" 1429 50 + 6358 "10111010" // VLDA.POP.576 ex11, [p0, lf0, r24, m5];VLDB.POP.576 ex4, [p1, lf1, r25]; MOV dj2, r30 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6359 "01111110" // /* MW 9 */ + 6360 "10010000" // /* MW 8 */ + 6361 "01000111" // /* MW 7 */ + 6362 "00000001" // /* MW 6 */ + 6363 "00010100" // /* MW 5 */ + 6364 "00000001" // /* MW 4 */ + 6365 "01110011" // /* MW 3 */ + 6366 "01011001" // /* MW 2 */ + 6367 "01010101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 797 30 first +.src_ref 2 "conv2d_bf16.h" 1367 31 first + 6368 "00111100" // VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.POP.576 ex2, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6369 "00101000" // /* MW 5 */ + 6370 "00000001" // /* MW 4 */ + 6371 "01110110" // /* MW 3 */ + 6372 "10010101" // /* MW 2 */ + 6373 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 1369 31 first + 6374 "00111100" // VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.POP.576.3D ex3, [p1, lf1, r25, d0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6375 "10101000" // /* MW 5 */ + 6376 "00100001" // /* MW 4 */ + 6377 "01111010" // /* MW 3 */ + 6378 "00011101" // /* MW 2 */ + 6379 "11011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 792 8 first +.src_ref 2 "conv2d_bf16.h" 1372 31 first + 6380 "00111100" // VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.FILL.512 [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6381 "00101000" // /* MW 5 */ + 6382 "00101000" // /* MW 4 */ + 6383 "01111010" // /* MW 3 */ + 6384 "10100101" // /* MW 2 */ + 6385 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 793 8 first +.src_ref 2 "conv2d_bf16.h" 1374 31 first + 6386 "00111100" // VLDA.CONV.fp32.bf16 cmh2, [p6], m3;VLDB.FILL.512 [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6387 "00101000" // /* MW 5 */ + 6388 "00101000" // /* MW 4 */ + 6389 "01111010" // /* MW 3 */ + 6390 "00101101" // /* MW 2 */ + 6391 "11001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 795 30 first +.src_ref 2 "conv2d_bf16.h" 1377 31 first + 6392 "00111100" // VLDA.CONV.fp32.bf16 cml3, [p6], #64;VLDB.POP.576 ex1, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6393 "10101000" // /* MW 5 */ + 6394 "00000000" // /* MW 4 */ + 6395 "01110110" // /* MW 3 */ + 6396 "10110101" // /* MW 2 */ + 6397 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 796 30 first +.src_ref 2 "conv2d_bf16.h" 1379 31 first + 6398 "00111100" // VLDA.CONV.fp32.bf16 cmh3, [p6], m7;VLDB.POP.576 ex6, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6399 "00101000" // /* MW 5 */ + 6400 "00000011" // /* MW 4 */ + 6401 "01110110" // /* MW 3 */ + 6402 "00111101" // /* MW 2 */ + 6403 "11011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 797 30 first +.src_ref 2 "conv2d_bf16.h" 1429 50 first + 6404 "00111100" // VLDA.CONV.fp32.bf16 cml4, [p3, dj2];VLDB.POP.576 ex7, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6405 "10101000" // /* MW 5 */ + 6406 "00000011" // /* MW 4 */ + 6407 "01110110" // /* MW 3 */ + 6408 "01000101" // /* MW 2 */ + 6409 "01101000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 801 30 first +.src_ref 2 "conv2d_bf16.h" 1429 50 + 6410 "10111010" // VLDA.CONV.fp32.bf16 cmh4, [p3, dj2];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VSHUFFLE ex5, ex10, ex4, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6411 "11101110" // /* MW 9 */ + 6412 "00101101" // /* MW 8 */ + 6413 "01101001" // /* MW 7 */ + 6414 "00000001" // /* MW 6 */ + 6415 "00010100" // /* MW 5 */ + 6416 "00010010" // /* MW 4 */ + 6417 "01110101" // /* MW 3 */ + 6418 "01001101" // /* MW 2 */ + 6419 "01101000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 792 8 first +.src_ref 2 "conv2d_bf16.h" 794 8 first +.src_ref 2 "conv2d_bf16.h" 802 30 first + 6420 "10111010" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex10, ex10, ex4, r23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6421 "11101110" // /* MW 9 */ + 6422 "00101111" // /* MW 8 */ + 6423 "10101001" // /* MW 7 */ + 6424 "00000010" // /* MW 6 */ + 6425 "00010100" // /* MW 5 */ + 6426 "00010100" // /* MW 4 */ + 6427 "01110101" // /* MW 3 */ + 6428 "10000001" // /* MW 2 */ + 6429 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 793 8 first +.src_ref 2 "conv2d_bf16.h" 799 30 first +.src_ref 2 "conv2d_bf16.h" 803 30 first +.src_ref 2 "conv2d_bf16.h" 807 26 first + 6430 "01100110" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex2, ex3, r22; VMAC.f dm0, dm0, ex5, ex11, r9 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6431 "01101001" // /* MW 11 */ + 6432 "00001011" // /* MW 10 */ + 6433 "01001000" // /* MW 9 */ + 6434 "11000010" // /* MW 8 */ + 6435 "11011011" // /* MW 7 */ + 6436 "00010001" // /* MW 6 */ + 6437 "00101010" // /* MW 5 */ + 6438 "00101000" // /* MW 4 */ + 6439 "01111010" // /* MW 3 */ + 6440 "00000001" // /* MW 2 */ + 6441 "01010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 794 8 first +.src_ref 2 "conv2d_bf16.h" 804 30 first +.src_ref 2 "conv2d_bf16.h" 808 26 first + 6442 "01001010" // VLDA.FILL.512 [p0, lf0, r24]; VSHUFFLE ex10, ex2, ex3, r23; VMAC.f dm1, dm1, ex10, ex11, r9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6443 "01101001" // /* MW 9 */ + 6444 "00110101" // /* MW 8 */ + 6445 "01001001" // /* MW 7 */ + 6446 "11000010" // /* MW 6 */ + 6447 "11011111" // /* MW 5 */ + 6448 "00010001" // /* MW 4 */ + 6449 "01110101" // /* MW 3 */ + 6450 "10000001" // /* MW 2 */ + 6451 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 809 26 first + 6452 "01001000" // VMAC.f dm2, dm2, ex4, ex11, r9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6453 "01101001" // /* MW 3 */ + 6454 "01001001" // /* MW 2 */ + 6455 "01001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 810 26 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 6456 "01001000" // VMAC.f dm3, dm3, ex10, ex11, r9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6457 "01101001" // /* MW 3 */ + 6458 "01110101" // /* MW 2 */ + 6459 "01001011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 795 30 first +.src_ref 2 "conv2d_bf16.h" 802 30 first +.src_ref 2 "conv2d_bf16.h" 1437 26 first +.aggressive_scheduled_block_id 7 +.noswbrkpt + 6460 "01001010" // VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex5, ex1, ex6, r23; VADD.f dm1, dm4, dm1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6461 "00111101" // /* MW 9 */ + 6462 "10000100" // /* MW 8 */ + 6463 "10100001" // /* MW 7 */ + 6464 "11000110" // /* MW 6 */ + 6465 "01011111" // /* MW 5 */ + 6466 "10001011" // /* MW 4 */ + 6467 "10101010" // /* MW 3 */ + 6468 "00000000" // /* MW 2 */ + 6469 "00000110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 796 30 first +.src_ref 2 "conv2d_bf16.h" 1436 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6470 "01100010" // VLDB.POP.576 ex6, [p1, lf1, r25]; VADD.f dm0, dm4, dm0, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6471 "00111101" // /* MW 7 */ + 6472 "10000000" // /* MW 6 */ + 6473 "10100000" // /* MW 5 */ + 6474 "00000000" // /* MW 4 */ + 6475 "10010100" // /* MW 3 */ + 6476 "00000001" // /* MW 2 */ + 6477 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 797 30 first +.src_ref 2 "conv2d_bf16.h" 1438 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6478 "01100010" // VLDB.POP.576 ex7, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6479 "00111101" // /* MW 7 */ + 6480 "10001000" // /* MW 6 */ + 6481 "10100010" // /* MW 5 */ + 6482 "00000000" // /* MW 4 */ + 6483 "11010100" // /* MW 3 */ + 6484 "00000001" // /* MW 2 */ + 6485 "00000011" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 799 30 first +.src_ref 2 "conv2d_bf16.h" 1439 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6486 "01001010" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VADD.f dm3, dm4, dm3, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6487 "00111101" // /* MW 9 */ + 6488 "10001100" // /* MW 8 */ + 6489 "10100011" // /* MW 7 */ + 6490 "00011101" // /* MW 6 */ + 6491 "00010100" // /* MW 5 */ + 6492 "00010010" // /* MW 4 */ + 6493 "01110101" // /* MW 3 */ + 6494 "00000001" // /* MW 2 */ + 6495 "01010101" // /* MW 1 */ +.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2032 +.src_ref 2 "conv2d_bf16.h" 792 8 first +.src_ref 2 "conv2d_bf16.h" 801 30 first +.begin_of_loop +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt +.loop_nesting 2 + 6496 "10110100" // VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex2, ex1, ex6, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6497 "10110111" // /* MW 5 */ + 6498 "00010110" // /* MW 4 */ + 6499 "10000010" // /* MW 3 */ + 6500 "10000010" // /* MW 2 */ + 6501 "10100010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 793 8 first +.src_ref 2 "conv2d_bf16.h" 804 30 first +.src_ref 2 "conv2d_bf16.h" 808 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6502 "01001010" // VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6503 "00001001" // /* MW 9 */ + 6504 "00101010" // /* MW 8 */ + 6505 "10011001" // /* MW 7 */ + 6506 "11000110" // /* MW 6 */ + 6507 "01011111" // /* MW 5 */ + 6508 "00111100" // /* MW 4 */ + 6509 "00101010" // /* MW 3 */ + 6510 "00101000" // /* MW 2 */ + 6511 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 795 30 first +.src_ref 2 "conv2d_bf16.h" 803 30 first +.src_ref 2 "conv2d_bf16.h" 807 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6512 "01001010" // VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6513 "00001001" // /* MW 9 */ + 6514 "00000100" // /* MW 8 */ + 6515 "10011000" // /* MW 7 */ + 6516 "11000110" // /* MW 6 */ + 6517 "01011011" // /* MW 5 */ + 6518 "10111100" // /* MW 4 */ + 6519 "10101001" // /* MW 3 */ + 6520 "00000000" // /* MW 2 */ + 6521 "00000110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 796 30 first +.src_ref 2 "conv2d_bf16.h" 810 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6522 "01100010" // VLDB.POP.576 ex6, [p1, lf1, r25]; VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6523 "00001001" // /* MW 7 */ + 6524 "01101000" // /* MW 6 */ + 6525 "10011011" // /* MW 5 */ + 6526 "00000000" // /* MW 4 */ + 6527 "10010100" // /* MW 3 */ + 6528 "00000001" // /* MW 2 */ + 6529 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 794 8 first +.src_ref 2 "conv2d_bf16.h" 797 30 first +.src_ref 2 "conv2d_bf16.h" 809 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6530 "01101110" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex7, [p1, lf1, r25];NOPS; NOPX; VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 6531 "00001001" // /* MW 13 */ + 6532 "01000110" // /* MW 12 */ + 6533 "10011010" // /* MW 11 */ + 6534 "01101100" // /* MW 10 */ + 6535 "00000101" // /* MW 9 */ + 6536 "00000000" // /* MW 8 */ + 6537 "00000000" // /* MW 7 */ + 6538 "00000000" // /* MW 6 */ + 6539 "10101000" // /* MW 5 */ + 6540 "00000011" // /* MW 4 */ + 6541 "01110110" // /* MW 3 */ + 6542 "10000001" // /* MW 2 */ + 6543 "00000010" // /* MW 1 */ +.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2080 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 799 30 first +.src_ref 2 "conv2d_bf16.h" 802 30 first +.end_of_loop +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6544 "11100001" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0];NOPS; NOPX; VSHUFFLE ex5, ex1, ex6, r23; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6545 "00000000" // /* MW 15 */ + 6546 "00000000" // /* MW 14 */ + 6547 "11101000" // /* MW 13 */ + 6548 "10101111" // /* MW 12 */ + 6549 "01000101" // /* MW 11 */ + 6550 "00000001" // /* MW 10 */ + 6551 "00000000" // /* MW 9 */ + 6552 "00000000" // /* MW 8 */ + 6553 "01011011" // /* MW 7 */ + 6554 "00000001" // /* MW 6 */ + 6555 "00101000" // /* MW 5 */ + 6556 "00100100" // /* MW 4 */ + 6557 "01111010" // /* MW 3 */ + 6558 "00000001" // /* MW 2 */ + 6559 "01010101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 first +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 801 30 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 6560 "11110110" // PADDA.3D [p0], d1; PADDB [p7], m6; MOVS p5, p7; VSHUFFLE ex2, ex1, ex6, r22 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6561 "11100000" // /* MW 11 */ + 6562 "10101101" // /* MW 10 */ + 6563 "10000101" // /* MW 9 */ + 6564 "00000000" // /* MW 8 */ + 6565 "10001011" // /* MW 7 */ + 6566 "10011100" // /* MW 6 */ + 6567 "00100101" // /* MW 5 */ + 6568 "10010111" // /* MW 4 */ + 6569 "11111111" // /* MW 3 */ + 6570 "00001100" // /* MW 2 */ + 6571 "00000111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 804 30 first +.src_ref 2 "conv2d_bf16.h" 808 26 first +.src_ref 2 "conv2d_bf16.h" 1517 32 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6572 "01100110" // PADDB [p7], m4; MOVS p4, p7; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6573 "00001001" // /* MW 11 */ + 6574 "00101010" // /* MW 10 */ + 6575 "10011001" // /* MW 9 */ + 6576 "11000110" // /* MW 8 */ + 6577 "01011111" // /* MW 7 */ + 6578 "00111100" // /* MW 6 */ + 6579 "00100010" // /* MW 5 */ + 6580 "00010111" // /* MW 4 */ + 6581 "01101111" // /* MW 3 */ + 6582 "10010001" // /* MW 2 */ + 6583 "10010011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 803 30 first +.src_ref 2 "conv2d_bf16.h" 807 26 first +.src_ref 2 "conv2d_bf16.h" 1518 37 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6584 "01100110" // PADDB [p7], m6; MOVS p3, p7; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6585 "00001001" // /* MW 11 */ + 6586 "00000100" // /* MW 10 */ + 6587 "10011000" // /* MW 9 */ + 6588 "11000110" // /* MW 8 */ + 6589 "01011011" // /* MW 7 */ + 6590 "10111100" // /* MW 6 */ + 6591 "00100001" // /* MW 5 */ + 6592 "10010111" // /* MW 4 */ + 6593 "01101111" // /* MW 3 */ + 6594 "10010001" // /* MW 2 */ + 6595 "01110011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 810 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6596 "01100010" // MOV dj2, r17; VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6597 "00001001" // /* MW 7 */ + 6598 "01101000" // /* MW 6 */ + 6599 "10011011" // /* MW 5 */ + 6600 "11100110" // /* MW 4 */ + 6601 "10100000" // /* MW 3 */ + 6602 "10001000" // /* MW 2 */ + 6603 "00000010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 first +.src_ref 2 "conv2d_bf16.h" 809 26 first +.src_ref 2 "conv2d_bf16.h" 1428 39 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6604 "01001010" // PADDB.3D [p1], d2; MOV r2, dc5; VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6605 "00001001" // /* MW 9 */ + 6606 "01000110" // /* MW 8 */ + 6607 "10011010" // /* MW 7 */ + 6608 "11100110" // /* MW 6 */ + 6609 "10000000" // /* MW 5 */ + 6610 "10011011" // /* MW 4 */ + 6611 "00100000" // /* MW 3 */ + 6612 "10110111" // /* MW 2 */ + 6613 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 801 30 first + 6614 "11011000" // VSHUFFLE ex2, ex1, ex6, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6615 "01011011" // /* MW 3 */ + 6616 "00001011" // /* MW 2 */ + 6617 "00011001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 802 30 first + 6618 "11011000" // VSHUFFLE ex5, ex1, ex6, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6619 "01011111" // /* MW 3 */ + 6620 "10001011" // /* MW 2 */ + 6621 "00011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 803 30 first +.src_ref 2 "conv2d_bf16.h" 807 26 first + 6622 "01100010" // VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6623 "00001001" // /* MW 7 */ + 6624 "00000100" // /* MW 6 */ + 6625 "10011000" // /* MW 5 */ + 6626 "11000110" // /* MW 4 */ + 6627 "01011011" // /* MW 3 */ + 6628 "10111100" // /* MW 2 */ + 6629 "00000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 804 30 first +.src_ref 2 "conv2d_bf16.h" 808 26 first + 6630 "01100010" // VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6631 "00001001" // /* MW 7 */ + 6632 "00101010" // /* MW 6 */ + 6633 "10011001" // /* MW 5 */ + 6634 "11000110" // /* MW 4 */ + 6635 "01011111" // /* MW 3 */ + 6636 "00111100" // /* MW 2 */ + 6637 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 809 26 first + 6638 "01001000" // VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6639 "00001001" // /* MW 3 */ + 6640 "01000110" // /* MW 2 */ + 6641 "10011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 810 26 first + 6642 "01001000" // VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6643 "00001001" // /* MW 3 */ + 6644 "01101000" // /* MW 2 */ + 6645 "10011011" // /* MW 1 */ + 6646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6647 "00000000" // /* MW 1 */ + 6648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6649 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first + 6650 "00011000" // VCONV.bf16.fp32 x10, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6651 "00010110" // /* MW 3 */ + 6652 "00010000" // /* MW 2 */ + 6653 "00001101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 + 6654 "00011000" // VCONV.bf16.fp32 x11, cml1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6655 "10010110" // /* MW 3 */ + 6656 "10010000" // /* MW 2 */ + 6657 "00001101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 1455 20 first + 6658 "00111010" // VCONV.bf16.fp32 x1, cmh1; JZ r21, #6768 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6768 delay_slots=5 */ + 6659 "01100001" // /* MW 9 */ + 6660 "00000000" // /* MW 8 */ + 6661 "00000000" // /* MW 7 */ + 6662 "01001110" // /* MW 6 */ + 6663 "00000011" // /* MW 5 */ + 6664 "00101010" // /* MW 4 */ + 6665 "11000000" // /* MW 3 */ + 6666 "00011010" // /* MW 2 */ + 6667 "00010010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first +.delay_slot + 6668 "00011000" // VCONV.bf16.fp32 x6, cmh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6669 "01010110" // /* MW 3 */ + 6670 "00010000" // /* MW 2 */ + 6671 "00001011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 6672 "00011000" // VCONV.bf16.fp32 x2, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6673 "10010110" // /* MW 3 */ + 6674 "00010001" // /* MW 2 */ + 6675 "00001001" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 6676 "00011000" // VCONV.bf16.fp32 x7, cmh3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6677 "11010110" // /* MW 3 */ + 6678 "10010001" // /* MW 2 */ + 6679 "00001011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 6680 "00011000" // VCONV.bf16.fp32 x5, cml2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6681 "00010110" // /* MW 3 */ + 6682 "10010001" // /* MW 2 */ + 6683 "00001010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 6684 "00011000" // VCONV.bf16.fp32 x8, cmh2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6685 "01010110" // /* MW 3 */ + 6686 "00010001" // /* MW 2 */ + 6687 "00001100" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first + 6688 "11111000" // VMAX_LT.bf16 x11, r16, x11, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6689 "11101100" // /* MW 3 */ + 6690 "11011100" // /* MW 2 */ + 6691 "00011101" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 + 6692 "11111000" // VMAX_LT.bf16 x1, r16, x1, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6693 "11101100" // /* MW 3 */ + 6694 "10001100" // /* MW 2 */ + 6695 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.src_ref 4 "max_min.hpp" 20 104 + 6696 "00000010" // VST x11, [p5, dj3]; VMAX_LT.bf16 x10, r16, x10, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6697 "01110000" // /* MW 7 */ + 6698 "01110110" // /* MW 6 */ + 6699 "10101010" // /* MW 5 */ + 6700 "00000010" // /* MW 4 */ + 6701 "01100000" // /* MW 3 */ + 6702 "01011010" // /* MW 2 */ + 6703 "10101100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first + 6704 "00000010" // VST x1, [p4, #64]; VMAX_LT.bf16 x1, r16, x6, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6705 "01110000" // /* MW 7 */ + 6706 "01110110" // /* MW 6 */ + 6707 "01011010" // /* MW 5 */ + 6708 "00000000" // /* MW 4 */ + 6709 "01100000" // /* MW 3 */ + 6710 "10001010" // /* MW 2 */ + 6711 "10000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first + 6712 "00111010" // VST x10, [p5]; J #6800 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=6800 delay_slots=5 */ + 6713 "00100001" // /* MW 9 */ + 6714 "00000000" // /* MW 8 */ + 6715 "00000000" // /* MW 7 */ + 6716 "01010010" // /* MW 6 */ + 6717 "00000011" // /* MW 5 */ + 6718 "00000000" // /* MW 4 */ + 6719 "01100000" // /* MW 3 */ + 6720 "11010010" // /* MW 2 */ + 6721 "10100000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 6722 "00000010" // VST x1, [p5, #64]; VMAX_LT.bf16 x10, r16, x2, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6723 "01110000" // /* MW 7 */ + 6724 "01110110" // /* MW 6 */ + 6725 "10001010" // /* MW 5 */ + 6726 "00000010" // /* MW 4 */ + 6727 "01100000" // /* MW 3 */ + 6728 "10001010" // /* MW 2 */ + 6729 "10100010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 +.delay_slot + 6730 "11111000" // VMAX_LT.bf16 x1, r16, x7, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6731 "11101100" // /* MW 3 */ + 6732 "10111100" // /* MW 2 */ + 6733 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.src_ref 4 "max_min.hpp" 20 104 +.delay_slot + 6734 "00000010" // VST x10, [p3, dj3]; VMAX_LT.bf16 x10, r16, x5, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6735 "01110000" // /* MW 7 */ + 6736 "01110110" // /* MW 6 */ + 6737 "10010110" // /* MW 5 */ + 6738 "00000010" // /* MW 4 */ + 6739 "01100000" // /* MW 3 */ + 6740 "01010010" // /* MW 2 */ + 6741 "01101100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 6742 "10111010" // NOPA; VST x1, [p7, #64]; VMAX_LT.bf16 x8, r16, x8, x9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6743 "01110010" // /* MW 9 */ + 6744 "01110110" // /* MW 8 */ + 6745 "00100010" // /* MW 7 */ + 6746 "00000010" // /* MW 6 */ + 6747 "01010011" // /* MW 5 */ + 6748 "00010100" // /* MW 4 */ + 6749 "11110111" // /* MW 3 */ + 6750 "00101100" // /* MW 2 */ + 6751 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.delay_slot + 6752 "11100001" // NOPA; NOPB; VST x10, [p4, dj7]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6753 "00000000" // /* MW 15 */ + 6754 "00000000" // /* MW 14 */ + 6755 "01111000" // /* MW 13 */ + 6756 "10100101" // /* MW 12 */ + 6757 "00000001" // /* MW 11 */ + 6758 "00000000" // /* MW 10 */ + 6759 "00000000" // /* MW 9 */ + 6760 "00000000" // /* MW 8 */ + 6761 "10010011" // /* MW 7 */ + 6762 "11100010" // /* MW 6 */ + 6763 "00100100" // /* MW 5 */ + 6764 "00000000" // /* MW 4 */ + 6765 "11110000" // /* MW 3 */ + 6766 "00101100" // /* MW 2 */ + 6767 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2304 +.src_ref 4 "vector.hpp" 1152 43 + 6768 "00011000" // VST.CONV.bf16.fp32 cml1, [p5, dj3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6769 "10100011" // /* MW 3 */ + 6770 "01100000" // /* MW 2 */ + 6771 "00001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6772 "00011000" // VST.CONV.bf16.fp32 cmh1, [p4, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6773 "11100011" // /* MW 3 */ + 6774 "00010100" // /* MW 2 */ + 6775 "00001100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6776 "00011000" // VST.CONV.bf16.fp32 cml0, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6777 "00100011" // /* MW 3 */ + 6778 "00000100" // /* MW 2 */ + 6779 "00001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6780 "00011000" // VST.CONV.bf16.fp32 cmh0, [p5, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6781 "01100011" // /* MW 3 */ + 6782 "00010100" // /* MW 2 */ + 6783 "00001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6784 "00011000" // VST.CONV.bf16.fp32 cml3, [p3, dj3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6785 "10100011" // /* MW 3 */ + 6786 "01100001" // /* MW 2 */ + 6787 "00001011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6788 "00011000" // VST.CONV.bf16.fp32 cmh3, [p7, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6789 "11100011" // /* MW 3 */ + 6790 "00010101" // /* MW 2 */ + 6791 "00001111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6792 "00000010" // VST.CONV.bf16.fp32 cml2, [p4, dj7]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6793 "01110000" // /* MW 7 */ + 6794 "10100101" // /* MW 6 */ + 6795 "00000001" // /* MW 5 */ + 6796 "00000000" // /* MW 4 */ + 6797 "01100000" // /* MW 3 */ + 6798 "00100100" // /* MW 2 */ + 6799 "10011100" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2336 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 1337 12 first + 6800 "01011100" // VST x8, [p3, #64]; JNZD r29, r29, p2 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 6801 "01000000" // /* MW 5 */ + 6802 "11110101" // /* MW 4 */ + 6803 "01101110" // /* MW 3 */ + 6804 "11000010" // /* MW 2 */ + 6805 "01100010" // /* MW 1 */ +.delay_slot + 6806 "00011000" // PADDB [p7], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6807 "10010000" // /* MW 3 */ + 6808 "10001011" // /* MW 2 */ + 6809 "00111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6811 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6813 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6815 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6816 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6817 "00000000" // /* MW 15 */ + 6818 "00000000" // /* MW 14 */ + 6819 "01111000" // /* MW 13 */ + 6820 "10100101" // /* MW 12 */ + 6821 "00000001" // /* MW 11 */ + 6822 "00000000" // /* MW 10 */ + 6823 "00000000" // /* MW 9 */ + 6824 "00000000" // /* MW 8 */ + 6825 "01011011" // /* MW 7 */ + 6826 "00000001" // /* MW 6 */ + 6827 "00100000" // /* MW 5 */ + 6828 "00000000" // /* MW 4 */ + 6829 "11110000" // /* MW 3 */ + 6830 "00101100" // /* MW 2 */ + 6831 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2368 +.loop_nesting 0 + 6832 "00011000" // LDA r15, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6833 "11110001" // /* MW 3 */ + 6834 "11101101" // /* MW 2 */ + 6835 "00000111" // /* MW 1 */ + 6836 "00011000" // LDA r12, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6837 "10010001" // /* MW 3 */ + 6838 "11110001" // /* MW 2 */ + 6839 "00000111" // /* MW 1 */ + 6840 "00011000" // LDA r9, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6841 "00110001" // /* MW 3 */ + 6842 "11110101" // /* MW 2 */ + 6843 "00000111" // /* MW 1 */ + 6844 "00011000" // LDA p6, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6845 "00011001" // /* MW 3 */ + 6846 "11101011" // /* MW 2 */ + 6847 "00000111" // /* MW 1 */ + 6848 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6849 "10011001" // /* MW 3 */ + 6850 "11111011" // /* MW 2 */ + 6851 "00000111" // /* MW 1 */ + 6852 "00011000" // LDA r14, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6853 "11010001" // /* MW 3 */ + 6854 "11111101" // /* MW 2 */ + 6855 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1873 first + 6856 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 6857 "00000000" // /* MW 3 */ + 6858 "00101000" // /* MW 2 */ + 6859 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1873 +.delay_slot + 6860 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6861 "00000001" // /* MW 5 */ + 6862 "00000000" // /* MW 4 */ + 6863 "00000000" // /* MW 3 */ + 6864 "11110000" // /* MW 2 */ + 6865 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6867 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6869 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6871 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params__end +.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_end0 + 6873 "00000000" // /* MW 1 */ +.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function conv2d_maxpool _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 7 "superkernels.cpp" 74 first +.src_ref 7 "superkernels.cpp" 79 6 +.src_ref 7 "superkernels.cpp" 81 4 +.function_start + 6880 "10111010" // MOVA r0, #1; MOVXM p4, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6881 "00010000" // /* MW 9 */ + 6882 "00100000" // /* MW 8 */ + 6883 "00110010" // /* MW 7 */ + 6884 "11110010" // /* MW 6 */ + 6885 "00000001" // /* MW 5 */ + 6886 "00000000" // /* MW 4 */ + 6887 "00000000" // /* MW 3 */ + 6888 "00100000" // /* MW 2 */ + 6889 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 79 6 first +.src_ref 7 "superkernels.cpp" 81 4 + 6890 "10111010" // LDA r16, [p4]; MOVX r1, #0; MOV r2, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6891 "01111000" // /* MW 9 */ + 6892 "11010000" // /* MW 8 */ + 6893 "01001011" // /* MW 7 */ + 6894 "00001000" // /* MW 6 */ + 6895 "00010000" // /* MW 5 */ + 6896 "00000000" // /* MW 4 */ + 6897 "11010000" // /* MW 3 */ + 6898 "11000010" // /* MW 2 */ + 6899 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 74 + 6900 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6901 "00000001" // /* MW 5 */ + 6902 "00000000" // /* MW 4 */ + 6903 "00000000" // /* MW 3 */ + 6904 "00001000" // /* MW 2 */ + 6905 "00000000" // /* MW 1 */ + 6906 "10011000" // ST r2, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6907 "01010101" // /* MW 3 */ + 6908 "11110000" // /* MW 2 */ + 6909 "00001111" // /* MW 1 */ + 6910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6911 "00000000" // /* MW 1 */ + 6912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6913 "00000000" // /* MW 1 */ + 6914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6915 "00000000" // /* MW 1 */ + 6916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6917 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 79 6 +.src_ref 7 "superkernels.cpp" 79 16 + 6918 "10000100" // JNZ r16, #7088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7088 delay_slots=5 */ + 6919 "00000001" // /* MW 5 */ + 6920 "01000000" // /* MW 4 */ + 6921 "11011000" // /* MW 3 */ + 6922 "00001101" // /* MW 2 */ + 6923 "10000000" // /* MW 1 */ +.delay_slot + 6924 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6925 "10011101" // /* MW 3 */ + 6926 "11111011" // /* MW 2 */ + 6927 "00001111" // /* MW 1 */ +.delay_slot + 6928 "10011000" // ST p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6929 "00011101" // /* MW 3 */ + 6930 "11111111" // /* MW 2 */ + 6931 "00001111" // /* MW 1 */ +.delay_slot + 6932 "10011000" // ST p3, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6933 "10011101" // /* MW 3 */ + 6934 "11101101" // /* MW 2 */ + 6935 "00001111" // /* MW 1 */ +.delay_slot + 6936 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6937 "00111101" // /* MW 3 */ + 6938 "11110100" // /* MW 2 */ + 6939 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 +.delay_slot + 6940 "01000100" // MOVXM r15, #509440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6941 "00000000" // /* MW 5 */ + 6942 "10101100" // /* MW 4 */ + 6943 "11000111" // /* MW 3 */ + 6944 "00000111" // /* MW 2 */ + 6945 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 113 2 +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 6946 "00111010" // MOVS p6, p1; MOVXM p7, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6947 "00010001" // /* MW 9 */ + 6948 "00110100" // /* MW 8 */ + 6949 "10110010" // /* MW 7 */ + 6950 "11110011" // /* MW 6 */ + 6951 "00000001" // /* MW 5 */ + 6952 "00000000" // /* MW 4 */ + 6953 "01100000" // /* MW 3 */ + 6954 "10010001" // /* MW 2 */ + 6955 "11010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 6956 "01110110" // ST.s8 r16, [p7]; MOVS p1, r15; MOVXM p7, #509028 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6957 "00010000" // /* MW 11 */ + 6958 "00110010" // /* MW 10 */ + 6959 "10110010" // /* MW 9 */ + 6960 "11110011" // /* MW 8 */ + 6961 "00000001" // /* MW 7 */ + 6962 "00000000" // /* MW 6 */ + 6963 "00001011" // /* MW 5 */ + 6964 "10001111" // /* MW 4 */ + 6965 "11100001" // /* MW 3 */ + 6966 "11000000" // /* MW 2 */ + 6967 "11100000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6969 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6971 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6972 "00000100" // JL #2752 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=2752 delay_slots=5 */ + 6973 "00000001" // /* MW 5 */ + 6974 "00000000" // /* MW 4 */ + 6975 "01100000" // /* MW 3 */ + 6976 "00000101" // /* MW 2 */ + 6977 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6979 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6980 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6981 "00110001" // /* MW 3 */ + 6982 "00100000" // /* MW 2 */ + 6983 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 6984 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6985 "00000101" // /* MW 3 */ + 6986 "00100000" // /* MW 2 */ + 6987 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 113 2 +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 6988 "00000010" // ST r16, [p7]; MOV p7, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6989 "01110000" // /* MW 7 */ + 6990 "01100000" // /* MW 6 */ + 6991 "10110000" // /* MW 5 */ + 6992 "00000011" // /* MW 4 */ + 6993 "00110000" // /* MW 3 */ + 6994 "11000010" // /* MW 2 */ + 6995 "11100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 +.delay_slot + 6996 "11110110" // NOPA; NOPB; NOPS; MOV p0, p2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6997 "01110000" // /* MW 11 */ + 6998 "01100000" // /* MW 10 */ + 6999 "00110010" // /* MW 9 */ + 7000 "00000000" // /* MW 8 */ + 7001 "01011011" // /* MW 7 */ + 7002 "00000001" // /* MW 6 */ + 7003 "00100000" // /* MW 5 */ + 7004 "00000000" // /* MW 4 */ + 7005 "11110000" // /* MW 3 */ + 7006 "00101100" // /* MW 2 */ + 7007 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 +.return_address + 7008 "10011000" // ADD.NC p2, r15, #11 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7009 "10000101" // /* MW 3 */ + 7010 "01100111" // /* MW 2 */ + 7011 "00011010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 87 19 +.src_ref 7 "superkernels.cpp" 87 35 first + 7012 "10111010" // LDA.u8 r16, [p2], #7; MOVXM p1, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7013 "00010000" // /* MW 9 */ + 7014 "00100010" // /* MW 8 */ + 7015 "10110010" // /* MW 7 */ + 7016 "11110000" // /* MW 6 */ + 7017 "00000001" // /* MW 5 */ + 7018 "00000000" // /* MW 4 */ + 7019 "01010000" // /* MW 3 */ + 7020 "11000001" // /* MW 2 */ + 7021 "01001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 37 first +.src_ref 7 "superkernels.cpp" 89 13 + 7022 "10111010" // LDA.u16 r19, [p2], #2; MOVXM p0, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7023 "00010000" // /* MW 9 */ + 7024 "00110000" // /* MW 8 */ + 7025 "00110010" // /* MW 7 */ + 7026 "11110000" // /* MW 6 */ + 7027 "00000001" // /* MW 5 */ + 7028 "00000000" // /* MW 4 */ + 7029 "01010000" // /* MW 3 */ + 7030 "11001111" // /* MW 2 */ + 7031 "01000011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 73 + 7032 "10011000" // LDA.u16 r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7033 "00111010" // /* MW 3 */ + 7034 "00000110" // /* MW 2 */ + 7035 "00000010" // /* MW 1 */ + 7036 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7037 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 110 + 7038 "10011000" // LDA.u16 r18, [p2, #2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7039 "01011010" // /* MW 3 */ + 7040 "00010110" // /* MW 2 */ + 7041 "00000010" // /* MW 1 */ + 7042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7043 "00000000" // /* MW 1 */ + 7044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7045 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 87 19 first +.src_ref 7 "superkernels.cpp" 113 2 + 7046 "00000010" // ST r16, [p1]; MOV p1, p6 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7047 "01110000" // /* MW 7 */ + 7048 "01100000" // /* MW 6 */ + 7049 "10110110" // /* MW 5 */ + 7050 "00000000" // /* MW 4 */ + 7051 "00110000" // /* MW 3 */ + 7052 "11000010" // /* MW 2 */ + 7053 "00100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 57 first + 7054 "10011000" // MUL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7055 "00001111" // /* MW 3 */ + 7056 "11100001" // /* MW 2 */ + 7057 "00010100" // /* MW 1 */ + 7058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7059 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 94 + 7060 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7061 "00001111" // /* MW 3 */ + 7062 "01100001" // /* MW 2 */ + 7063 "00010100" // /* MW 1 */ + 7064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7065 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 89 28 first + 7066 "10011000" // MUL r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7067 "00001111" // /* MW 3 */ + 7068 "10100001" // /* MW 2 */ + 7069 "00010100" // /* MW 1 */ + 7070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7071 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 89 13 +.src_ref 7 "superkernels.cpp" 113 2 + 7072 "11100001" // NOPA; NOPB; ST r16, [p0]; NOPX; MOV p0, p7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7073 "00000000" // /* MW 15 */ + 7074 "00000000" // /* MW 14 */ + 7075 "01111000" // /* MW 13 */ + 7076 "01100000" // /* MW 12 */ + 7077 "00110111" // /* MW 11 */ + 7078 "00000000" // /* MW 10 */ + 7079 "00000000" // /* MW 9 */ + 7080 "10000000" // /* MW 8 */ + 7081 "00010001" // /* MW 7 */ + 7082 "00000110" // /* MW 6 */ + 7083 "00100000" // /* MW 5 */ + 7084 "00000000" // /* MW 4 */ + 7085 "11110000" // /* MW 3 */ + 7086 "00101100" // /* MW 2 */ + 7087 "00000000" // /* MW 1 */ +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 7 "superkernels.cpp" 106 12 +.src_ref 7 "superkernels.cpp" 113 2 +.src_ref 7 "superkernels.cpp" 117 6 +.src_ref 7 "superkernels.cpp" 136 15 +.src_ref 1 "io_buffer_main.h" 218 49 +.src_ref 1 "io_buffer_main.h" 324 51 + 7088 "10111010" // LDA r15, [sp, #-20]; MOVXM p6, #509000 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7089 "00010000" // /* MW 9 */ + 7090 "00100100" // /* MW 8 */ + 7091 "00110010" // /* MW 7 */ + 7092 "11110011" // /* MW 6 */ + 7093 "00000001" // /* MW 5 */ + 7094 "00000000" // /* MW 4 */ + 7095 "00100000" // /* MW 3 */ + 7096 "10111110" // /* MW 2 */ + 7097 "11111101" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 106 12 first +.src_ref 7 "superkernels.cpp" 108 13 + 7098 "10111010" // LDA r16, [p6]; MOVXM p2, #509004 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7099 "00010000" // /* MW 9 */ + 7100 "00100110" // /* MW 8 */ + 7101 "00110010" // /* MW 7 */ + 7102 "11110001" // /* MW 6 */ + 7103 "00000001" // /* MW 5 */ + 7104 "00000000" // /* MW 4 */ + 7105 "11010000" // /* MW 3 */ + 7106 "11000010" // /* MW 2 */ + 7107 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 107 11 +.src_ref 7 "superkernels.cpp" 108 13 first +.src_ref 7 "superkernels.cpp" 139 6 +.src_ref 7 "superkernels.cpp" 140 14 + 7108 "10111010" // LDA r17, [p2]; MOVXM p7, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7109 "00010000" // /* MW 9 */ + 7110 "00100000" // /* MW 8 */ + 7111 "10110010" // /* MW 7 */ + 7112 "11110011" // /* MW 6 */ + 7113 "00000001" // /* MW 5 */ + 7114 "00000000" // /* MW 4 */ + 7115 "11010000" // /* MW 3 */ + 7116 "11000110" // /* MW 2 */ + 7117 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 107 11 first + 7118 "10011000" // LDA r18, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7119 "01010110" // /* MW 3 */ + 7120 "00000110" // /* MW 2 */ + 7121 "00000111" // /* MW 1 */ + 7122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7123 "00000000" // /* MW 1 */ + 7124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7125 "00000000" // /* MW 1 */ + 7126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7127 "00000000" // /* MW 1 */ + 7128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7129 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 110 6 first +.src_ref 7 "superkernels.cpp" 110 17 first + 7130 "10000100" // JNZ r16, #7216 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7216 delay_slots=5 */ + 7131 "00000001" // /* MW 5 */ + 7132 "01000000" // /* MW 4 */ + 7133 "00011000" // /* MW 3 */ + 7134 "00001110" // /* MW 2 */ + 7135 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 108 13 first +.delay_slot + 7136 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7137 "00000111" // /* MW 3 */ + 7138 "01100010" // /* MW 2 */ + 7139 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 107 11 first +.src_ref 7 "superkernels.cpp" 108 13 +.delay_slot + 7140 "01011100" // ST r17, [p2]; ADD r17, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7141 "00001110" // /* MW 5 */ + 7142 "01000100" // /* MW 4 */ + 7143 "00111001" // /* MW 3 */ + 7144 "11000110" // /* MW 2 */ + 7145 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 106 12 first +.delay_slot + 7146 "00011000" // ADD r19, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7147 "00000111" // /* MW 3 */ + 7148 "00100110" // /* MW 2 */ + 7149 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 106 12 +.delay_slot + 7150 "10011000" // ST r19, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7151 "01110001" // /* MW 3 */ + 7152 "00000110" // /* MW 2 */ + 7153 "00001110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 107 11 first +.delay_slot + 7154 "10011000" // ST r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7155 "00110001" // /* MW 3 */ + 7156 "00000110" // /* MW 2 */ + 7157 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 first + 7158 "00011000" // ADD.NC p2, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7159 "10000110" // /* MW 3 */ + 7160 "01100111" // /* MW 2 */ + 7161 "00011010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 7162 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7163 "01110110" // /* MW 3 */ + 7164 "11111111" // /* MW 2 */ + 7165 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 7166 "10011000" // LDA r16, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7167 "00010110" // /* MW 3 */ + 7168 "11111110" // /* MW 2 */ + 7169 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 7170 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7171 "00110110" // /* MW 3 */ + 7172 "11111110" // /* MW 2 */ + 7173 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 7174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7175 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 7176 "10011000" // LDA r16, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7177 "00010110" // /* MW 3 */ + 7178 "01000110" // /* MW 2 */ + 7179 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7181 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7183 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7185 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7187 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7188 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7189 "00000010" // /* MW 3 */ + 7190 "01100001" // /* MW 2 */ + 7191 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7192 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7193 "00010001" // /* MW 3 */ + 7194 "00000110" // /* MW 2 */ + 7195 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 + 7196 "00011000" // MOVX r17, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7197 "11111101" // /* MW 3 */ + 7198 "11100010" // /* MW 2 */ + 7199 "00010111" // /* MW 1 */ + 7200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7201 "00000000" // /* MW 1 */ + 7202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7203 "00000000" // /* MW 1 */ + 7204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7205 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 7206 "01111010" // NOPA; NOPS; ACQ r16, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7207 "00011000" // /* MW 9 */ + 7208 "00010011" // /* MW 8 */ + 7209 "00000100" // /* MW 7 */ + 7210 "00000000" // /* MW 6 */ + 7211 "01011011" // /* MW 5 */ + 7212 "00000001" // /* MW 4 */ + 7213 "11110000" // /* MW 3 */ + 7214 "00101100" // /* MW 2 */ + 7215 "00000000" // /* MW 1 */ +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_336 +.src_ref 7 "superkernels.cpp" 113 2 first +.no_stack_arguments + 7216 "00000100" // JL #4464 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4464 delay_slots=5 */ + 7217 "00000001" // /* MW 5 */ + 7218 "00000000" // /* MW 4 */ + 7219 "10111000" // /* MW 3 */ + 7220 "00001000" // /* MW 2 */ + 7221 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 113 2 +.delay_slot + 7222 "01000100" // MOVXM p3, #509440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7223 "00000000" // /* MW 5 */ + 7224 "11001100" // /* MW 4 */ + 7225 "11000110" // /* MW 3 */ + 7226 "00000111" // /* MW 2 */ + 7227 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7229 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7231 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7233 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 113 2 +.delay_slot + 7234 "00101110" // NOPA; NOPS; MOV p2, r15; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 7235 "00011100" // /* MW 13 */ + 7236 "00000000" // /* MW 12 */ + 7237 "00000000" // /* MW 11 */ + 7238 "00000111" // /* MW 10 */ + 7239 "00111101" // /* MW 9 */ + 7240 "01010011" // /* MW 8 */ + 7241 "00000000" // /* MW 7 */ + 7242 "00000000" // /* MW 6 */ + 7243 "10110110" // /* MW 5 */ + 7244 "00000010" // /* MW 4 */ + 7245 "11110000" // /* MW 3 */ + 7246 "00101100" // /* MW 2 */ + 7247 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 117 6 first +.src_ref 7 "superkernels.cpp" 117 20 +.return_address + 7248 "10111010" // LDA r16, [p6]; MOVXM p1, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7249 "00010000" // /* MW 9 */ + 7250 "00100010" // /* MW 8 */ + 7251 "10110010" // /* MW 7 */ + 7252 "11110000" // /* MW 6 */ + 7253 "00000001" // /* MW 5 */ + 7254 "00000000" // /* MW 4 */ + 7255 "11010000" // /* MW 3 */ + 7256 "11000010" // /* MW 2 */ + 7257 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 117 20 + 7258 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7259 "00110110" // /* MW 3 */ + 7260 "00000110" // /* MW 2 */ + 7261 "00000001" // /* MW 1 */ + 7262 "00011000" // LDA r0, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7263 "00010001" // /* MW 3 */ + 7264 "11110000" // /* MW 2 */ + 7265 "00000111" // /* MW 1 */ + 7266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7267 "00000000" // /* MW 1 */ + 7268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7269 "00000000" // /* MW 1 */ + 7270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7271 "00000000" // /* MW 1 */ + 7272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7273 "00000000" // /* MW 1 */ + 7274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7275 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 117 17 + 7276 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7277 "00001000" // /* MW 3 */ + 7278 "01100001" // /* MW 2 */ + 7279 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 117 6 + 7280 "10000100" // JNZ r16, #7360 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7360 delay_slots=5 */ + 7281 "00000001" // /* MW 5 */ + 7282 "01000000" // /* MW 4 */ + 7283 "01100000" // /* MW 3 */ + 7284 "00001110" // /* MW 2 */ + 7285 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 136 15 +.src_ref 7 "superkernels.cpp" 140 14 +.delay_slot + 7286 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7287 "00000001" // /* MW 3 */ + 7288 "00110000" // /* MW 2 */ + 7289 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7291 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7293 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7294 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7295 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7297 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 first +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 + 7298 "00100100" // MOVX r16, #1; ADD.NC p1, r15, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7299 "00010100" // /* MW 5 */ + 7300 "11001111" // /* MW 4 */ + 7301 "10100010" // /* MW 3 */ + 7302 "00000000" // /* MW 2 */ + 7303 "00000100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 + 7304 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7305 "00110110" // /* MW 3 */ + 7306 "00000110" // /* MW 2 */ + 7307 "00000001" // /* MW 1 */ + 7308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7309 "00000000" // /* MW 1 */ + 7310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7311 "00000000" // /* MW 1 */ + 7312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7313 "00000000" // /* MW 1 */ + 7314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7315 "00000000" // /* MW 1 */ + 7316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7317 "00000000" // /* MW 1 */ + 7318 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7319 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 7320 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7321 "00001000" // /* MW 3 */ + 7322 "01010001" // /* MW 2 */ + 7323 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 136 15 first +.src_ref 1 "io_buffer_main.h" 327 40 first + 7324 "00001100" // LDA r17, [p1, #-8]; ST r24, [p6] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7325 "00100011" // /* MW 5 */ + 7326 "00001110" // /* MW 4 */ + 7327 "11011100" // /* MW 3 */ + 7328 "11000110" // /* MW 2 */ + 7329 "00111100" // /* MW 1 */ + 7330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7331 "00000000" // /* MW 1 */ + 7332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7333 "00000000" // /* MW 1 */ + 7334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7335 "00000000" // /* MW 1 */ + 7336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7337 "00000000" // /* MW 1 */ + 7338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7339 "00000000" // /* MW 1 */ + 7340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7341 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 + 7342 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7343 "00010001" // /* MW 3 */ + 7344 "00100001" // /* MW 2 */ + 7345 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 7346 "00101110" // NOPA; ST r16, [p1, #-8]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 7347 "00011100" // /* MW 13 */ + 7348 "00000000" // /* MW 12 */ + 7349 "00000000" // /* MW 11 */ + 7350 "01010111" // /* MW 10 */ + 7351 "00011010" // /* MW 9 */ + 7352 "01000000" // /* MW 8 */ + 7353 "00000000" // /* MW 7 */ + 7354 "00000000" // /* MW 6 */ + 7355 "00100011" // /* MW 5 */ + 7356 "11001100" // /* MW 4 */ + 7357 "11110011" // /* MW 3 */ + 7358 "00101100" // /* MW 2 */ + 7359 "00000000" // /* MW 1 */ +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_480 +.src_ref 7 "superkernels.cpp" 139 6 first +.src_ref 7 "superkernels.cpp" 139 19 + 7360 "10111010" // LDA r16, [p7]; MOVXM p6, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7361 "00010000" // /* MW 9 */ + 7362 "00110000" // /* MW 8 */ + 7363 "00110010" // /* MW 7 */ + 7364 "11110011" // /* MW 6 */ + 7365 "00000001" // /* MW 5 */ + 7366 "00000000" // /* MW 4 */ + 7367 "11010000" // /* MW 3 */ + 7368 "11000010" // /* MW 2 */ + 7369 "11100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 139 19 + 7370 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7371 "00110110" // /* MW 3 */ + 7372 "00000110" // /* MW 2 */ + 7373 "00000110" // /* MW 1 */ + 7374 "00011000" // LDA p1, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7375 "10011001" // /* MW 3 */ + 7376 "11111000" // /* MW 2 */ + 7377 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 142 + 7378 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7379 "00111001" // /* MW 3 */ + 7380 "11110100" // /* MW 2 */ + 7381 "00000111" // /* MW 1 */ + 7382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7383 "00000000" // /* MW 1 */ + 7384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7385 "00000000" // /* MW 1 */ + 7386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7387 "00000000" // /* MW 1 */ + 7388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7389 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 139 16 + 7390 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7391 "00001000" // /* MW 3 */ + 7392 "01100001" // /* MW 2 */ + 7393 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 139 6 + 7394 "10000100" // JNZ r16, #7424 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7424 delay_slots=5 */ + 7395 "00000001" // /* MW 5 */ + 7396 "01000000" // /* MW 4 */ + 7397 "10000000" // /* MW 3 */ + 7398 "00001110" // /* MW 2 */ + 7399 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7401 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7403 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7405 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7407 "00000000" // /* MW 1 */ +.delay_slot + 7408 "11111000" // MOV r15, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7409 "00100000" // /* MW 3 */ + 7410 "11010000" // /* MW 2 */ + 7411 "00011011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 140 14 first + 7412 "00110110" // NOPA; NOPB; ST r24, [p7]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7413 "11000001" // /* MW 11 */ + 7414 "10001000" // /* MW 10 */ + 7415 "10000011" // /* MW 9 */ + 7416 "00000011" // /* MW 8 */ + 7417 "00000000" // /* MW 7 */ + 7418 "00000000" // /* MW 6 */ + 7419 "00100000" // /* MW 5 */ + 7420 "00000000" // /* MW 4 */ + 7421 "11110000" // /* MW 3 */ + 7422 "00101100" // /* MW 2 */ + 7423 "00000000" // /* MW 1 */ +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_544 + 7424 "00011000" // LDA p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7425 "00011001" // /* MW 3 */ + 7426 "11111111" // /* MW 2 */ + 7427 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 142 first + 7428 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 7429 "00000000" // /* MW 3 */ + 7430 "00101000" // /* MW 2 */ + 7431 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 142 +.delay_slot + 7432 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7433 "00000001" // /* MW 5 */ + 7434 "00000000" // /* MW 4 */ + 7435 "00000000" // /* MW 3 */ + 7436 "11111000" // /* MW 2 */ + 7437 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7439 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7441 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7443 "00000000" // /* MW 1 */ +.delay_slot + 7444 "00011000" // MOVS p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7445 "10001011" // /* MW 3 */ + 7446 "10000100" // /* MW 2 */ +.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 7447 "00001111" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.src_ref 3 "elementwise_binary.h" 141 first +.src_ref 3 "elementwise_binary.h" 142 23 +.src_ref 3 "elementwise_binary.h" 144 4 first +.function_start + 7456 "01100100" // RET lr; MOV r0, #64 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 7457 "00000001" // /* MW 5 */ + 7458 "00100001" // /* MW 4 */ + 7459 "00000000" // /* MW 3 */ + 7460 "00000000" // /* MW 2 */ + 7461 "00000101" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 141 +.delay_slot + 7462 "11111000" // MOV r1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7463 "11000000" // /* MW 3 */ + 7464 "01010000" // /* MW 2 */ + 7465 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 141 +.delay_slot + 7466 "00011000" // ADD.NC p0, r1, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7467 "10010000" // /* MW 3 */ + 7468 "01100000" // /* MW 2 */ + 7469 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 142 23 first +.delay_slot + 7470 "10011000" // ST r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7471 "00010001" // /* MW 3 */ + 7472 "00000100" // /* MW 2 */ + 7473 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 142 23 +.delay_slot + 7474 "10011000" // ST r0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7475 "00010001" // /* MW 3 */ + 7476 "00010100" // /* MW 2 */ + 7477 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_end0 + 7479 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 3 "elementwise_binary.h" 130 first +.src_ref 3 "elementwise_binary.h" 133 24 first +.function_start + 7488 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7489 "00101110" // /* MW 3 */ + 7490 "00011100" // /* MW 2 */ + 7491 "00000001" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 130 + 7492 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7493 "00000001" // /* MW 5 */ + 7494 "00000000" // /* MW 4 */ + 7495 "00000000" // /* MW 3 */ + 7496 "00001000" // /* MW 2 */ + 7497 "00000000" // /* MW 1 */ + 7498 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7499 "00111101" // /* MW 3 */ + 7500 "11111000" // /* MW 2 */ + 7501 "00001111" // /* MW 1 */ + 7502 "10011000" // ST r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7503 "11110101" // /* MW 3 */ + 7504 "11111101" // /* MW 2 */ + 7505 "00001111" // /* MW 1 */ + 7506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7507 "00000000" // /* MW 1 */ + 7508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7509 "00000000" // /* MW 1 */ + 7510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7511 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 133 22 first + 7512 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7513 "00101001" // /* MW 3 */ + 7514 "00011100" // /* MW 2 */ + 7515 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 134 24 first + 7516 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7517 "00101110" // /* MW 3 */ + 7518 "00011100" // /* MW 2 */ + 7519 "00000001" // /* MW 1 */ + 7520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7521 "00000000" // /* MW 1 */ + 7522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7523 "00000000" // /* MW 1 */ + 7524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7525 "00000000" // /* MW 1 */ + 7526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7527 "00000000" // /* MW 1 */ + 7528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7529 "00000000" // /* MW 1 */ + 7530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7531 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 134 22 + 7532 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7533 "00101001" // /* MW 3 */ + 7534 "00011100" // /* MW 2 */ + 7535 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 135 24 first + 7536 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7537 "00101110" // /* MW 3 */ + 7538 "00000100" // /* MW 2 */ + 7539 "00000001" // /* MW 1 */ + 7540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7541 "00000000" // /* MW 1 */ + 7542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7543 "00000000" // /* MW 1 */ + 7544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7545 "00000000" // /* MW 1 */ + 7546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7547 "00000000" // /* MW 1 */ + 7548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7549 "00000000" // /* MW 1 */ + 7550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7551 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 135 22 + 7552 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7553 "00101001" // /* MW 3 */ + 7554 "00011100" // /* MW 2 */ + 7555 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 136 24 first + 7556 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7557 "00101110" // /* MW 3 */ + 7558 "00010100" // /* MW 2 */ + 7559 "00000001" // /* MW 1 */ + 7560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7561 "00000000" // /* MW 1 */ + 7562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7563 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 137 8 first +.no_stack_arguments + 7564 "00000100" // JL #7456 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7456 delay_slots=5 */ + 7565 "00000001" // /* MW 5 */ + 7566 "00000000" // /* MW 4 */ + 7567 "10010000" // /* MW 3 */ + 7568 "00001110" // /* MW 2 */ + 7569 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7571 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7572 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7573 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7575 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 136 22 first +.delay_slot + 7576 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7577 "00101001" // /* MW 3 */ + 7578 "11011100" // /* MW 2 */ + 7579 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 137 8 +.delay_slot + 7580 "11111000" // MOV r15, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7581 "11000000" // /* MW 3 */ + 7582 "11010000" // /* MW 2 */ + 7583 "00011011" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 137 8 +.src_ref 3 "elementwise_binary.h" 139 4 +.src_ref 8 "add_impl.h" 146 29 +.return_address + 7584 "10111010" // LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7585 "00001000" // /* MW 9 */ + 7586 "11000100" // /* MW 8 */ + 7587 "00110011" // /* MW 7 */ + 7588 "01101000" // /* MW 6 */ + 7589 "00000000" // /* MW 5 */ + 7590 "00000001" // /* MW 4 */ + 7591 "00100000" // /* MW 3 */ + 7592 "00000111" // /* MW 2 */ + 7593 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 146 29 +.src_ref 8 "add_impl.h" 147 37 +.src_ref 8 "add_impl.h" 147 39 + 7594 "10111010" // MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7595 "01011000" // /* MW 9 */ + 7596 "11111101" // /* MW 8 */ + 7597 "00000111" // /* MW 7 */ + 7598 "00001000" // /* MW 6 */ + 7599 "10000000" // /* MW 5 */ + 7600 "00000001" // /* MW 4 */ + 7601 "10000000" // /* MW 3 */ + 7602 "11100010" // /* MW 2 */ + 7603 "00000001" // /* MW 1 */ +.src_ref 8 "add_impl.h" 146 29 first +.src_ref 8 "add_impl.h" 147 39 + 7604 "01111010" // LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7605 "00000001" // /* MW 9 */ + 7606 "10100000" // /* MW 8 */ + 7607 "00000111" // /* MW 7 */ + 7608 "10000000" // /* MW 6 */ + 7609 "00010001" // /* MW 5 */ + 7610 "00001010" // /* MW 4 */ + 7611 "00100000" // /* MW 3 */ + 7612 "10111110" // /* MW 2 */ + 7613 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 50 first + 7614 "10011000" // LDA.u8 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7615 "01001010" // /* MW 3 */ + 7616 "00000110" // /* MW 2 */ + 7617 "00000000" // /* MW 1 */ + 7618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7619 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7621 "00000000" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 37 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7622 "00011000" // ST.s16 r16, [p0, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7623 "00010111" // /* MW 3 */ + 7624 "00000010" // /* MW 2 */ + 7625 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7626 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 7627 "00000000" // /* MW 3 */ + 7628 "00101000" // /* MW 2 */ + 7629 "00010000" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 54 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7630 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7631 "00000101" // /* MW 3 */ + 7632 "00100010" // /* MW 2 */ + 7633 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7634 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7635 "00000001" // /* MW 5 */ + 7636 "00000000" // /* MW 4 */ + 7637 "00000000" // /* MW 3 */ + 7638 "11111000" // /* MW 2 */ + 7639 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 54 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7640 "10011000" // EQ r27, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7641 "00100111" // /* MW 3 */ + 7642 "01110111" // /* MW 2 */ + 7643 "00010100" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 39 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7644 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7645 "10000010" // /* MW 3 */ + 7646 "00100001" // /* MW 2 */ + 7647 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 7649 "00000000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.src_ref 3 "elementwise_binary_broadcasting.h" 81 +.src_ref 3 "elementwise_binary_broadcasting.h" 81 first +.src_ref 3 "elementwise_binary_broadcasting.h" 82 25 +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 +.src_ref 3 "elementwise_binary_broadcasting.h" 83 25 +.function_start + 7664 "10111010" // MOVA m0, #20; MOVX r1, #6; MOV r0, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7665 "01111000" // /* MW 9 */ + 7666 "01100000" // /* MW 8 */ + 7667 "00001000" // /* MW 7 */ + 7668 "11001000" // /* MW 6 */ + 7669 "00010000" // /* MW 5 */ + 7670 "00000000" // /* MW 4 */ + 7671 "10000000" // /* MW 3 */ + 7672 "10000000" // /* MW 2 */ + 7673 "00000010" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 81 +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 + 7674 "00100100" // MOVX r0, #1; ADD.NC p0, r0, #12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7675 "00001100" // /* MW 5 */ + 7676 "11000000" // /* MW 4 */ + 7677 "10100000" // /* MW 3 */ + 7678 "00000000" // /* MW 2 */ + 7679 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first + 7680 "10011000" // LDA.u8 r2, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7681 "01001010" // /* MW 3 */ + 7682 "00001000" // /* MW 2 */ + 7683 "00000000" // /* MW 1 */ + 7684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7685 "00000000" // /* MW 1 */ + 7686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7687 "00000000" // /* MW 1 */ + 7688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7689 "00000000" // /* MW 1 */ + 7690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7691 "00000000" // /* MW 1 */ + 7692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7693 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 84 4 first + 7694 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 7695 "00000000" // /* MW 3 */ + 7696 "00101000" // /* MW 2 */ + 7697 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first +.delay_slot + 7698 "10011000" // NE r0, r2, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7699 "00001000" // /* MW 3 */ + 7700 "10000000" // /* MW 2 */ + 7701 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 83 25 first +.delay_slot + 7702 "10011000" // LSHL r0, r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7703 "00011101" // /* MW 3 */ + 7704 "00000000" // /* MW 2 */ + 7705 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first +.src_ref 3 "elementwise_binary_broadcasting.h" 83 23 +.delay_slot + 7706 "01011100" // ST r0, [p0, #4]; NEZ r3, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7707 "11100000" // /* MW 5 */ + 7708 "00001101" // /* MW 4 */ + 7709 "00110001" // /* MW 3 */ + 7710 "10000010" // /* MW 2 */ + 7711 "00000010" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 25 +.delay_slot + 7712 "10011000" // LSHL r2, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7713 "00011101" // /* MW 3 */ + 7714 "11000100" // /* MW 2 */ + 7715 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 23 +.delay_slot + 7716 "10011000" // ST r2, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7717 "01010001" // /* MW 3 */ + 7718 "00000100" // /* MW 2 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_end0 + 7719 "00001000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 3 "elementwise_binary_broadcasting.h" 76 +.src_ref 3 "elementwise_binary_broadcasting.h" 76 first +.function_start + 7728 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7729 "00000001" // /* MW 5 */ + 7730 "00000000" // /* MW 4 */ + 7731 "00000000" // /* MW 3 */ + 7732 "00001000" // /* MW 2 */ + 7733 "00000000" // /* MW 1 */ + 7734 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7735 "00111101" // /* MW 3 */ + 7736 "11111100" // /* MW 2 */ + 7737 "00001111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 77 8 first +.no_stack_arguments + 7738 "00000100" // JL #7488 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7488 delay_slots=5 */ + 7739 "00000001" // /* MW 5 */ + 7740 "00000000" // /* MW 4 */ + 7741 "10100000" // /* MW 3 */ + 7742 "00001110" // /* MW 2 */ + 7743 "00000000" // /* MW 1 */ +.delay_slot + 7744 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7745 "10011101" // /* MW 3 */ + 7746 "11111011" // /* MW 2 */ + 7747 "00001111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 78 8 +.delay_slot + 7748 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7749 "11000000" // /* MW 3 */ + 7750 "01100000" // /* MW 2 */ + 7751 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7753 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7755 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7756 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7757 "01100111" // /* MW 3 */ + 7758 "00000001" // /* MW 2 */ + 7759 "00000000" // /* MW 1 */ +.return_address +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7760 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7761 "10011001" // /* MW 3 */ + 7762 "11111011" // /* MW 2 */ + 7763 "00000111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 78 8 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7764 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7765 "00111001" // /* MW 3 */ + 7766 "11111100" // /* MW 2 */ + 7767 "00000111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 78 8 first +.tail_call +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7768 "10000100" // J #7664 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=7664 delay_slots=5 */ + 7769 "00000000" // /* MW 5 */ + 7770 "00000000" // /* MW 4 */ + 7771 "11111000" // /* MW 3 */ + 7772 "00001110" // /* MW 2 */ + 7773 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 78 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7774 "11111000" // MOV p0, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7775 "11000000" // /* MW 3 */ + 7776 "01101110" // /* MW 2 */ + 7777 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 79 4 first +.delay_slot + 7778 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7779 "00000001" // /* MW 5 */ + 7780 "00000000" // /* MW 4 */ + 7781 "00000000" // /* MW 3 */ + 7782 "11111000" // /* MW 2 */ + 7783 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7785 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7787 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 7789 "00000000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.src_ref 3 "elementwise_binary_broadcasting.h" 89 first +.src_ref 3 "elementwise_binary_broadcasting.h" 96 37 first +.src_ref 3 "elementwise_binary_broadcasting.h" 102 19 +.function_start + 7792 "01010100" // LDA r0, [p3], #12; MOV m0, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7793 "01010001" // /* MW 5 */ + 7794 "00000000" // /* MW 4 */ + 7795 "11010000" // /* MW 3 */ + 7796 "10000010" // /* MW 2 */ + 7797 "01100111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 102 19 first +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 7798 "11010100" // LDA.u8 r1, [p3], m0; MOV p4, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7799 "10000001" // /* MW 5 */ + 7800 "11001101" // /* MW 4 */ + 7801 "01011000" // /* MW 3 */ + 7802 "00000101" // /* MW 2 */ + 7803 "01100001" // /* MW 1 */ + 7804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7805 "00000000" // /* MW 1 */ + 7806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7807 "00000000" // /* MW 1 */ + 7808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7809 "00000000" // /* MW 1 */ + 7810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7811 "00000000" // /* MW 1 */ + 7812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7813 "00000000" // /* MW 1 */ + 7814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7815 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 102 12 +.src_ref 3 "elementwise_binary_broadcasting.h" 102 35 + 7816 "10000100" // JNZ r1, #7872 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7872 delay_slots=5 */ + 7817 "00000001" // /* MW 5 */ + 7818 "01000000" // /* MW 4 */ + 7819 "01100000" // /* MW 3 */ + 7820 "00001111" // /* MW 2 */ + 7821 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 96 78 +.delay_slot + 7822 "00011000" // MOVX r2, #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7823 "11101001" // /* MW 3 */ + 7824 "11000100" // /* MW 2 */ + 7825 "00010111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 96 78 first +.delay_slot + 7826 "10011000" // LSHL r0, r0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7827 "00101101" // /* MW 3 */ + 7828 "00000000" // /* MW 2 */ + 7829 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7831 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7833 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7835 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 103 28 first + 7836 "10011000" // LDA.s16 r1, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7837 "00110010" // /* MW 3 */ + 7838 "00000100" // /* MW 2 */ + 7839 "00000000" // /* MW 1 */ + 7840 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7841 "00000000" // /* MW 1 */ + 7842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7843 "00000000" // /* MW 1 */ + 7844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7845 "00000000" // /* MW 1 */ + 7846 "10000100" // J #7904 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=7904 delay_slots=5 */ + 7847 "00000000" // /* MW 5 */ + 7848 "00000000" // /* MW 4 */ + 7849 "01110000" // /* MW 3 */ + 7850 "00001111" // /* MW 2 */ + 7851 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7853 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7854 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7855 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first +.delay_slot + 7856 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7857 "01110010" // /* MW 3 */ + 7858 "00000101" // /* MW 2 */ + 7859 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7861 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.delay_slot + 7862 "01111010" // NOPA; VST x0, [p0]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7863 "00000000" // /* MW 9 */ + 7864 "00000000" // /* MW 8 */ + 7865 "00000000" // /* MW 7 */ + 7866 "00000000" // /* MW 6 */ + 7867 "00010011" // /* MW 5 */ + 7868 "00000100" // /* MW 4 */ + 7869 "11110000" // /* MW 3 */ + 7870 "00101100" // /* MW 2 */ + 7871 "00000000" // /* MW 1 */ +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_80 +.src_ref 3 "elementwise_binary_broadcasting.h" 106 28 first + 7872 "10011000" // LDA.s16 r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7873 "00110010" // /* MW 3 */ + 7874 "00000100" // /* MW 2 */ + 7875 "00000001" // /* MW 1 */ + 7876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7877 "00000000" // /* MW 1 */ + 7878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7879 "00000000" // /* MW 1 */ + 7880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7881 "00000000" // /* MW 1 */ + 7882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7883 "00000000" // /* MW 1 */ + 7884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7885 "00000000" // /* MW 1 */ + 7886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7887 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first + 7888 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7889 "01110010" // /* MW 3 */ + 7890 "00000101" // /* MW 2 */ + 7891 "00011000" // /* MW 1 */ + 7892 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7893 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first + 7894 "01111010" // NOPA; VST x0, [p1]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7895 "00000000" // /* MW 9 */ + 7896 "00000000" // /* MW 8 */ + 7897 "00000000" // /* MW 7 */ + 7898 "00000000" // /* MW 6 */ + 7899 "00010011" // /* MW 5 */ + 7900 "00000100" // /* MW 4 */ + 7901 "11110001" // /* MW 3 */ + 7902 "00101100" // /* MW 2 */ + 7903 "00000000" // /* MW 1 */ +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_112 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 first +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 first + 7904 "10111010" // LDA m0, [p4, #20]; MOVX r0, #60; ADD.NC lc, r0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7905 "01001000" // /* MW 9 */ + 7906 "00111111" // /* MW 8 */ + 7907 "10111000" // /* MW 7 */ + 7908 "10001010" // /* MW 6 */ + 7909 "00000111" // /* MW 5 */ + 7910 "00000000" // /* MW 4 */ + 7911 "11010000" // /* MW 3 */ + 7912 "10000000" // /* MW 2 */ + 7913 "10001010" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 7914 "10111010" // LDA m1, [p3, #4]; MOVXM ls, #8016 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7915 "00010000" // /* MW 9 */ + 7916 "10101000" // /* MW 8 */ + 7917 "01111111" // /* MW 7 */ + 7918 "00000100" // /* MW 6 */ + 7919 "00000000" // /* MW 5 */ + 7920 "00000000" // /* MW 4 */ + 7921 "11010000" // /* MW 3 */ + 7922 "10010000" // /* MW 2 */ + 7923 "01100010" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 7924 "01000100" // MOVXM le, #8048 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7925 "11100000" // /* MW 5 */ + 7926 "11111110" // /* MW 4 */ + 7927 "00010110" // /* MW 3 */ + 7928 "00000000" // /* MW 2 */ + 7929 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 7930 "01000100" // MOVXM p4, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7931 "11010000" // /* MW 5 */ + 7932 "11001000" // /* MW 4 */ + 7933 "11001000" // /* MW 3 */ + 7934 "00000111" // /* MW 2 */ + 7935 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 7936 "10011000" // LDA.s8 r1, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7937 "00100010" // /* MW 3 */ + 7938 "00000100" // /* MW 2 */ + 7939 "00000100" // /* MW 1 */ + 7940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7941 "00000000" // /* MW 1 */ + 7942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7943 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 187 20 first + 7944 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7945 "10101011" // /* MW 3 */ + 7946 "00001000" // /* MW 2 */ + 7947 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary.h" 189 20 first + 7948 "10011000" // VLDA.CONV.fp32.bf16 cml2, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7949 "00101011" // /* MW 3 */ + 7950 "00101001" // /* MW 2 */ + 7951 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 211 20 first + 7952 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7953 "00101011" // /* MW 3 */ + 7954 "00001000" // /* MW 2 */ + 7955 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7956 "10011000" // VLDA.CONV.fp32.bf16 cml4, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7957 "00101011" // /* MW 3 */ + 7958 "00101010" // /* MW 2 */ + 7959 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 195 20 +.src_ref 3 "elementwise_binary.h" 218 20 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7960 "00101100" // VLDA.CONV.fp32.bf16 cml1, [p0], m0; MOVX crRnd, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7961 "00000000" // /* MW 5 */ + 7962 "11110101" // /* MW 4 */ + 7963 "01110000" // /* MW 3 */ + 7964 "00010101" // /* MW 2 */ + 7965 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7966 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7967 "00111101" // /* MW 7 */ + 7968 "00101000" // /* MW 6 */ + 7969 "00000011" // /* MW 5 */ + 7970 "00000100" // /* MW 4 */ + 7971 "01110000" // /* MW 3 */ + 7972 "00100101" // /* MW 2 */ + 7973 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7974 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7975 "00101011" // /* MW 3 */ + 7976 "00001000" // /* MW 2 */ + 7977 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7978 "01100010" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7979 "00111101" // /* MW 7 */ + 7980 "00010000" // /* MW 6 */ + 7981 "00000100" // /* MW 5 */ + 7982 "00000100" // /* MW 4 */ + 7983 "01110000" // /* MW 3 */ + 7984 "01000101" // /* MW 2 */ + 7985 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 187 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7986 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7987 "10101011" // /* MW 3 */ + 7988 "00001000" // /* MW 2 */ + 7989 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7990 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7991 "00111101" // /* MW 7 */ + 7992 "00101000" // /* MW 6 */ + 7993 "00000011" // /* MW 5 */ + 7994 "00000100" // /* MW 4 */ + 7995 "01110000" // /* MW 3 */ + 7996 "00100101" // /* MW 2 */ + 7997 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7998 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7999 "00101011" // /* MW 3 */ + 8000 "00001000" // /* MW 2 */ + 8001 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8002 "01101110" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VST.CONV.bf16.fp32 cml3, [p2], #64; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 8003 "00111101" // /* MW 13 */ + 8004 "00010000" // /* MW 12 */ + 8005 "00000100" // /* MW 11 */ + 8006 "01010111" // /* MW 10 */ + 8007 "00011010" // /* MW 9 */ + 8008 "01000000" // /* MW 8 */ + 8009 "00000000" // /* MW 7 */ + 8010 "00000000" // /* MW 6 */ + 8011 "01000110" // /* MW 5 */ + 8012 "00111011" // /* MW 4 */ + 8013 "01110100" // /* MW 3 */ + 8014 "01000101" // /* MW 2 */ + 8015 "00100101" // /* MW 1 */ +.label ZLS_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_224 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 187 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 8016 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8017 "10101011" // /* MW 3 */ + 8018 "00001000" // /* MW 2 */ + 8019 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8020 "01100110" // VLDA.CONV.fp32.bf16 cml2, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8021 "00111101" // /* MW 11 */ + 8022 "00101000" // /* MW 10 */ + 8023 "00000011" // /* MW 9 */ + 8024 "10001110" // /* MW 8 */ + 8025 "00010001" // /* MW 7 */ + 8026 "00001111" // /* MW 6 */ + 8027 "00100001" // /* MW 5 */ + 8028 "00000000" // /* MW 4 */ + 8029 "01110000" // /* MW 3 */ + 8030 "00100101" // /* MW 2 */ + 8031 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8032 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8033 "00000000" // /* MW 15 */ + 8034 "00000000" // /* MW 14 */ + 8035 "01111000" // /* MW 13 */ + 8036 "10100101" // /* MW 12 */ + 8037 "00000001" // /* MW 11 */ + 8038 "00000000" // /* MW 10 */ + 8039 "00000000" // /* MW 9 */ + 8040 "00000000" // /* MW 8 */ + 8041 "01011011" // /* MW 7 */ + 8042 "00000001" // /* MW 6 */ + 8043 "00100000" // /* MW 5 */ + 8044 "00000000" // /* MW 4 */ + 8045 "01110000" // /* MW 3 */ + 8046 "00000101" // /* MW 2 */ + 8047 "00000001" // /* MW 1 */ +.label ZLE_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_256 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8048 "11101011" // VLDA.CONV.fp32.bf16 cml4, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml3, [p2], #64;NOPX; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8049 "10000001" // /* MW 15 */ + 8050 "00100000" // /* MW 14 */ + 8051 "01111000" // /* MW 13 */ + 8052 "10100101" // /* MW 12 */ + 8053 "00000001" // /* MW 11 */ + 8054 "00000000" // /* MW 10 */ + 8055 "00000000" // /* MW 9 */ + 8056 "00000000" // /* MW 8 */ + 8057 "10100011" // /* MW 7 */ + 8058 "00011101" // /* MW 6 */ + 8059 "00100010" // /* MW 5 */ + 8060 "00000000" // /* MW 4 */ + 8061 "01110000" // /* MW 3 */ + 8062 "01000101" // /* MW 2 */ + 8063 "00100101" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 8064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8065 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8066 "01100010" // VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8067 "00111101" // /* MW 7 */ + 8068 "00101000" // /* MW 6 */ + 8069 "00000011" // /* MW 5 */ + 8070 "00000010" // /* MW 4 */ + 8071 "01100000" // /* MW 3 */ + 8072 "11000100" // /* MW 2 */ + 8073 "01000011" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8075 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8076 "01100010" // VST.CONV.bf16.fp32 cml3, [p2], #64; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8077 "00111101" // /* MW 7 */ + 8078 "00010000" // /* MW 6 */ + 8079 "00000100" // /* MW 5 */ + 8080 "00000010" // /* MW 4 */ + 8081 "01100000" // /* MW 3 */ + 8082 "10110100" // /* MW 2 */ + 8083 "01000011" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8085 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 195 20 first +.src_ref 3 "elementwise_binary_broadcasting.h" 121 4 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8086 "01011100" // VST.CONV.bf16.fp32 cml4, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 8087 "00000000" // /* MW 5 */ + 8088 "01010000" // /* MW 4 */ + 8089 "01100000" // /* MW 3 */ + 8090 "11000100" // /* MW 2 */ + 8091 "01000011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8093 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.delay_slot + 8094 "00011000" // VST.CONV.bf16.fp32 cml3, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8095 "10100011" // /* MW 3 */ + 8096 "00011101" // /* MW 2 */ + 8097 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8099 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 195 20 first +.delay_slot + 8100 "00011000" // VST.CONV.bf16.fp32 cml4, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8101 "00100011" // /* MW 3 */ + 8102 "00011110" // /* MW 2 */ + 8103 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 8105 "00000000" // /* MW 1 */ +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 82 +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 82 first +.function_start + 8112 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8113 "00000001" // /* MW 5 */ + 8114 "00000000" // /* MW 4 */ + 8115 "00000000" // /* MW 3 */ + 8116 "00010000" // /* MW 2 */ + 8117 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 90 24 + 8118 "00000010" // ST lr, [sp, #-4]; MOV r16, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8119 "01110000" // /* MW 7 */ + 8120 "01100000" // /* MW 6 */ + 8121 "00001010" // /* MW 5 */ + 8122 "00000010" // /* MW 4 */ + 8123 "10110000" // /* MW 3 */ + 8124 "10000111" // /* MW 2 */ + 8125 "11111111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 90 24 first +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8 + 8126 "00000010" // MOVS p2, p1; ADD.NC p3, r16, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8127 "00000000" // /* MW 7 */ + 8128 "00000011" // /* MW 6 */ + 8129 "10110100" // /* MW 5 */ + 8130 "00000001" // /* MW 4 */ + 8131 "01100000" // /* MW 3 */ + 8132 "10010001" // /* MW 2 */ + 8133 "01010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 19 first +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35 + 8134 "11010100" // LDA.u8 r27, [p3], #2; MOV r16, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8135 "10000001" // /* MW 5 */ + 8136 "00100001" // /* MW 4 */ + 8137 "01011000" // /* MW 3 */ + 8138 "11101101" // /* MW 2 */ + 8139 "01100101" // /* MW 1 */ + 8140 "11010100" // LDA.s16 r18, [p3], #-14; MOV r17, sp /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8141 "11000001" // /* MW 5 */ + 8142 "10101011" // /* MW 4 */ + 8143 "01011000" // /* MW 3 */ + 8144 "11001010" // /* MW 2 */ + 8145 "01110011" // /* MW 1 */ + 8146 "00011000" // ADD.NC p0, r17, #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8147 "11000000" // /* MW 3 */ + 8148 "01101000" // /* MW 2 */ + 8149 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 538 13 first +.src_ref 4 "vector_native_types.hpp" 374 137 first + 8150 "00011000" // VST sfh, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8151 "00101011" // /* MW 3 */ + 8152 "00000111" // /* MW 2 */ + 8153 "00001000" // /* MW 1 */ + 8154 "00011000" // ST.s16 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8155 "01010111" // /* MW 3 */ + 8156 "00000110" // /* MW 2 */ + 8157 "00000000" // /* MW 1 */ + 8158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8159 "00000000" // /* MW 1 */ + 8160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8161 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8 first +.no_stack_arguments + 8162 "00000100" // JL #7792 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7792 delay_slots=5 */ + 8163 "00000001" // /* MW 5 */ + 8164 "00000000" // /* MW 4 */ + 8165 "00111000" // /* MW 3 */ + 8166 "00001111" // /* MW 2 */ + 8167 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35 +.delay_slot + 8168 "11111000" // MOV r17, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8169 "11000000" // /* MW 3 */ + 8170 "01010000" // /* MW 2 */ + 8171 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8173 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35 first +.delay_slot + 8174 "00011000" // SEL.EQZ r18, r16, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8175 "00010010" // /* MW 3 */ + 8176 "00100101" // /* MW 2 */ + 8177 "00010100" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35 +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8 +.delay_slot + 8178 "11100100" // SEL.EQZ r16, r17, r16, r27; MOV p1, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8179 "01000001" // /* MW 5 */ + 8180 "11010010" // /* MW 4 */ + 8181 "01000010" // /* MW 3 */ + 8182 "00100000" // /* MW 2 */ + 8183 "10001100" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8 +.delay_slot + 8184 "00000010" // NOPS; MOV p0, r16 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8185 "01110000" // /* MW 7 */ + 8186 "00010000" // /* MW 6 */ + 8187 "00110100" // /* MW 5 */ + 8188 "00000000" // /* MW 4 */ + 8189 "01100000" // /* MW 3 */ + 8190 "00101011" // /* MW 2 */ + 8191 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4 +.return_address + 8192 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8193 "00111001" // /* MW 3 */ + 8194 "11111100" // /* MW 2 */ + 8195 "00000111" // /* MW 1 */ + 8196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8197 "00000000" // /* MW 1 */ + 8198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8199 "00000000" // /* MW 1 */ + 8200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8201 "00000000" // /* MW 1 */ + 8202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8203 "00000000" // /* MW 1 */ + 8204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8205 "00000000" // /* MW 1 */ + 8206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8207 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4 first + 8208 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 8209 "00000000" // /* MW 3 */ + 8210 "00101000" // /* MW 2 */ + 8211 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4 +.delay_slot + 8212 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8213 "00000001" // /* MW 5 */ + 8214 "00000000" // /* MW 4 */ + 8215 "00000000" // /* MW 3 */ + 8216 "11110000" // /* MW 2 */ + 8217 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8219 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8221 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8223 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 8225 "00000000" // /* MW 1 */ +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_add1d_attribute_broadcasting _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 7 "superkernels.cpp" 147 first +.src_ref 7 "superkernels.cpp" 152 6 +.function_start + 8240 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8241 "10000000" // /* MW 5 */ + 8242 "11001000" // /* MW 4 */ + 8243 "11000110" // /* MW 3 */ + 8244 "00000111" // /* MW 2 */ + 8245 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 152 6 first + 8246 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8247 "11000001" // /* MW 5 */ + 8248 "10110101" // /* MW 4 */ + 8249 "11011000" // /* MW 3 */ + 8250 "11000010" // /* MW 2 */ + 8251 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 147 + 8252 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8253 "00000001" // /* MW 5 */ + 8254 "00000000" // /* MW 4 */ + 8255 "00000000" // /* MW 3 */ + 8256 "00001000" // /* MW 2 */ + 8257 "00000000" // /* MW 1 */ + 8258 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8259 "01110000" // /* MW 7 */ + 8260 "11010000" // /* MW 6 */ + 8261 "00001011" // /* MW 5 */ + 8262 "00000000" // /* MW 4 */ + 8263 "10110000" // /* MW 3 */ + 8264 "01100011" // /* MW 2 */ + 8265 "11111111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 149 11 + 8266 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8267 "00010001" // /* MW 9 */ + 8268 "00101000" // /* MW 8 */ + 8269 "00110010" // /* MW 7 */ + 8270 "11110011" // /* MW 6 */ + 8271 "00000001" // /* MW 5 */ + 8272 "00000000" // /* MW 4 */ + 8273 "10110000" // /* MW 3 */ + 8274 "10000010" // /* MW 2 */ + 8275 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 8276 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8277 "11000000" // /* MW 3 */ + 8278 "11010100" // /* MW 2 */ + 8279 "00011011" // /* MW 1 */ + 8280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8281 "00000000" // /* MW 1 */ + 8282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8283 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 152 6 +.src_ref 7 "superkernels.cpp" 152 16 + 8284 "10000100" // JNZ r16, #8448 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8448 delay_slots=5 */ + 8285 "00000001" // /* MW 5 */ + 8286 "01000000" // /* MW 4 */ + 8287 "10000000" // /* MW 3 */ + 8288 "00010000" // /* MW 2 */ + 8289 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 149 22 first +.delay_slot + 8290 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8291 "10010000" // /* MW 3 */ + 8292 "01100010" // /* MW 2 */ + 8293 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 149 30 +.delay_slot + 8294 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8295 "11111011" // /* MW 3 */ + 8296 "01100011" // /* MW 2 */ + 8297 "00010100" // /* MW 1 */ +.delay_slot + 8298 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8299 "00111101" // /* MW 3 */ + 8300 "11110100" // /* MW 2 */ + 8301 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 149 11 +.src_ref 1 "io_buffer_main.h" 125 25 +.delay_slot + 8302 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8303 "01110000" // /* MW 7 */ + 8304 "01100000" // /* MW 6 */ + 8305 "00110000" // /* MW 5 */ + 8306 "00000011" // /* MW 4 */ + 8307 "00110000" // /* MW 3 */ + 8308 "11000110" // /* MW 2 */ + 8309 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 155 4 +.src_ref 7 "superkernels.cpp" 166 2 +.delay_slot + 8310 "01000100" // MOVXM p0, #509120 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8311 "10000000" // /* MW 5 */ + 8312 "11001001" // /* MW 4 */ + 8313 "11000000" // /* MW 3 */ + 8314 "00000111" // /* MW 2 */ + 8315 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8316 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8317 "11010000" // /* MW 5 */ + 8318 "11001000" // /* MW 4 */ + 8319 "11000100" // /* MW 3 */ + 8320 "00000111" // /* MW 2 */ + 8321 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8322 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8323 "00010000" // /* MW 9 */ + 8324 "00110010" // /* MW 8 */ + 8325 "00110010" // /* MW 7 */ + 8326 "11110001" // /* MW 6 */ + 8327 "00000001" // /* MW 5 */ + 8328 "00000000" // /* MW 4 */ + 8329 "11100000" // /* MW 3 */ + 8330 "11000000" // /* MW 2 */ + 8331 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8333 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 155 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 8334 "00000100" // JL #7728 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7728 delay_slots=5 */ + 8335 "00000001" // /* MW 5 */ + 8336 "00000000" // /* MW 4 */ + 8337 "00011000" // /* MW 3 */ + 8338 "00001111" // /* MW 2 */ + 8339 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8341 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8343 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8344 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8345 "00110001" // /* MW 3 */ + 8346 "00100000" // /* MW 2 */ + 8347 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 8348 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8349 "00000101" // /* MW 3 */ + 8350 "00100000" // /* MW 2 */ + 8351 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 8352 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8353 "00000000" // /* MW 15 */ + 8354 "00000000" // /* MW 14 */ + 8355 "01111000" // /* MW 13 */ + 8356 "10100101" // /* MW 12 */ + 8357 "00000001" // /* MW 11 */ + 8358 "00000000" // /* MW 10 */ + 8359 "00000000" // /* MW 9 */ + 8360 "10000000" // /* MW 8 */ + 8361 "00010001" // /* MW 7 */ + 8362 "00000110" // /* MW 6 */ + 8363 "00100010" // /* MW 5 */ + 8364 "00000000" // /* MW 4 */ + 8365 "11110000" // /* MW 3 */ + 8366 "00101100" // /* MW 2 */ + 8367 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 159 18 +.return_address + 8368 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8369 "10100000" // /* MW 5 */ + 8370 "11001000" // /* MW 4 */ + 8371 "11000100" // /* MW 3 */ + 8372 "00000111" // /* MW 2 */ + 8373 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 159 18 first +.src_ref 7 "superkernels.cpp" 159 65 + 8374 "10111010" // LDA r16, [p2]; MOVXM p2, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8375 "00010000" // /* MW 9 */ + 8376 "01100000" // /* MW 8 */ + 8377 "00110010" // /* MW 7 */ + 8378 "11110001" // /* MW 6 */ + 8379 "00000001" // /* MW 5 */ + 8380 "00000000" // /* MW 4 */ + 8381 "11010000" // /* MW 3 */ + 8382 "11000010" // /* MW 2 */ + 8383 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 157 51 +.src_ref 7 "superkernels.cpp" 159 65 +.src_ref 7 "superkernels.cpp" 166 2 + 8384 "10111010" // LDA r17, [p2]; MOVXM p2, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8385 "00010000" // /* MW 9 */ + 8386 "01100000" // /* MW 8 */ + 8387 "00110010" // /* MW 7 */ + 8388 "11110001" // /* MW 6 */ + 8389 "00000001" // /* MW 5 */ + 8390 "00000000" // /* MW 4 */ + 8391 "11010000" // /* MW 3 */ + 8392 "11000110" // /* MW 2 */ + 8393 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 157 51 first +.src_ref 7 "superkernels.cpp" 159 16 +.src_ref 7 "superkernels.cpp" 164 47 + 8394 "10111010" // LDA.u16 r18, [p2, #10]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8395 "00010000" // /* MW 9 */ + 8396 "00101010" // /* MW 8 */ + 8397 "10110010" // /* MW 7 */ + 8398 "11110000" // /* MW 6 */ + 8399 "00000001" // /* MW 5 */ + 8400 "00000000" // /* MW 4 */ + 8401 "01010000" // /* MW 3 */ + 8402 "11001011" // /* MW 2 */ + 8403 "01001010" // /* MW 1 */ + 8404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8405 "00000000" // /* MW 1 */ + 8406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8407 "00000000" // /* MW 1 */ + 8408 "10000100" // J #8464 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8464 delay_slots=5 */ + 8409 "00000000" // /* MW 5 */ + 8410 "00000000" // /* MW 4 */ + 8411 "10001000" // /* MW 3 */ + 8412 "00010000" // /* MW 2 */ + 8413 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 157 13 +.delay_slot + 8414 "01000100" // MOVXM p0, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8415 "11000000" // /* MW 5 */ + 8416 "11001000" // /* MW 4 */ + 8417 "11000000" // /* MW 3 */ + 8418 "00000111" // /* MW 2 */ + 8419 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8421 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 159 27 first +.delay_slot + 8422 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8423 "00001111" // /* MW 3 */ + 8424 "01100001" // /* MW 2 */ + 8425 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 157 13 first +.delay_slot + 8426 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8427 "10100011" // /* MW 5 */ + 8428 "00001100" // /* MW 4 */ + 8429 "11110000" // /* MW 3 */ + 8430 "00101100" // /* MW 2 */ + 8431 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 159 16 first +.delay_slot + 8432 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8433 "00000000" // /* MW 15 */ + 8434 "00000000" // /* MW 14 */ + 8435 "01111000" // /* MW 13 */ + 8436 "10100101" // /* MW 12 */ + 8437 "00000001" // /* MW 11 */ + 8438 "00000000" // /* MW 10 */ + 8439 "00000000" // /* MW 9 */ + 8440 "10000000" // /* MW 8 */ + 8441 "00010001" // /* MW 7 */ + 8442 "00000110" // /* MW 6 */ + 8443 "00100001" // /* MW 5 */ + 8444 "00000000" // /* MW 4 */ + 8445 "11110000" // /* MW 3 */ + 8446 "00101100" // /* MW 2 */ + 8447 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 7 "superkernels.cpp" 164 47 +.src_ref 7 "superkernels.cpp" 166 2 + 8448 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8449 "00000000" // /* MW 15 */ + 8450 "00000000" // /* MW 14 */ + 8451 "00010000" // /* MW 13 */ + 8452 "00101010" // /* MW 12 */ + 8453 "10110010" // /* MW 11 */ + 8454 "11110000" // /* MW 10 */ + 8455 "00000001" // /* MW 9 */ + 8456 "00000000" // /* MW 8 */ + 8457 "10001011" // /* MW 7 */ + 8458 "10000000" // /* MW 6 */ + 8459 "00100010" // /* MW 5 */ + 8460 "00000000" // /* MW 4 */ + 8461 "11110000" // /* MW 3 */ + 8462 "00101100" // /* MW 2 */ + 8463 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 218 49 first + 8464 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8465 "00000000" // /* MW 7 */ + 8466 "11000011" // /* MW 6 */ + 8467 "10110011" // /* MW 5 */ + 8468 "00000011" // /* MW 4 */ + 8469 "01100000" // /* MW 3 */ + 8470 "10010001" // /* MW 2 */ + 8471 "01110011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 163 2 +.src_ref 1 "io_buffer_main.h" 218 49 + 8472 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8473 "00010000" // /* MW 9 */ + 8474 "00100000" // /* MW 8 */ + 8475 "00110010" // /* MW 7 */ + 8476 "11110000" // /* MW 6 */ + 8477 "00000001" // /* MW 5 */ + 8478 "00000000" // /* MW 4 */ + 8479 "11010000" // /* MW 3 */ + 8480 "11101110" // /* MW 2 */ + 8481 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 8482 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8483 "00010110" // /* MW 3 */ + 8484 "11111110" // /* MW 2 */ + 8485 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 8486 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8487 "00110110" // /* MW 3 */ + 8488 "11111110" // /* MW 2 */ + 8489 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 8490 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8491 "01010110" // /* MW 3 */ + 8492 "01000110" // /* MW 2 */ + 8493 "00000111" // /* MW 1 */ + 8494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8495 "00000000" // /* MW 1 */ + 8496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8497 "00000000" // /* MW 1 */ + 8498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8499 "00000000" // /* MW 1 */ + 8500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8501 "00000000" // /* MW 1 */ + 8502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8503 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 8504 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8505 "00000010" // /* MW 3 */ + 8506 "01100001" // /* MW 2 */ + 8507 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 8508 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8509 "00010001" // /* MW 3 */ + 8510 "00000110" // /* MW 2 */ + 8511 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 + 8512 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8513 "11111101" // /* MW 3 */ + 8514 "11100000" // /* MW 2 */ + 8515 "00010111" // /* MW 1 */ + 8516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8517 "00000000" // /* MW 1 */ + 8518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8519 "00000000" // /* MW 1 */ + 8520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8521 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 8522 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8523 "00001000" // /* MW 3 */ + 8524 "10010011" // /* MW 2 */ + 8525 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 164 45 + 8526 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8527 "10000001" // /* MW 5 */ + 8528 "10101101" // /* MW 4 */ + 8529 "10100111" // /* MW 3 */ + 8530 "00000000" // /* MW 2 */ + 8531 "00000100" // /* MW 1 */ + 8532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8533 "00000000" // /* MW 1 */ + 8534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8535 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 163 2 first + 8536 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8537 "00110110" // /* MW 3 */ + 8538 "00000110" // /* MW 2 */ + 8539 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 1 "io_buffer_main.h" 324 51 + 8540 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8541 "10000001" // /* MW 5 */ + 8542 "11011101" // /* MW 4 */ + 8543 "11011100" // /* MW 3 */ + 8544 "11001010" // /* MW 2 */ + 8545 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 164 47 first + 8546 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8547 "01110110" // /* MW 3 */ + 8548 "00000110" // /* MW 2 */ + 8549 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 8550 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8551 "10011110" // /* MW 3 */ + 8552 "01011100" // /* MW 2 */ + 8553 "00000111" // /* MW 1 */ + 8554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8555 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 166 2 first +.no_stack_arguments + 8556 "00000100" // JL #8112 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8112 delay_slots=5 */ + 8557 "00000001" // /* MW 5 */ + 8558 "00000000" // /* MW 4 */ + 8559 "11011000" // /* MW 3 */ + 8560 "00001111" // /* MW 2 */ + 8561 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8563 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 163 2 first +.delay_slot + 8564 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8565 "00000111" // /* MW 3 */ + 8566 "01100010" // /* MW 2 */ + 8567 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 163 2 +.delay_slot + 8568 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8569 "00110001" // /* MW 3 */ + 8570 "00000110" // /* MW 2 */ + 8571 "00001000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 164 45 first +.delay_slot + 8572 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8573 "00001101" // /* MW 3 */ + 8574 "11100001" // /* MW 2 */ + 8575 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 164 45 +.delay_slot + 8576 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8577 "00000000" // /* MW 15 */ + 8578 "00000000" // /* MW 14 */ + 8579 "10101000" // /* MW 13 */ + 8580 "10100000" // /* MW 12 */ + 8581 "00110100" // /* MW 11 */ + 8582 "00000000" // /* MW 10 */ + 8583 "00000000" // /* MW 9 */ + 8584 "00000000" // /* MW 8 */ + 8585 "01011011" // /* MW 7 */ + 8586 "00000001" // /* MW 6 */ + 8587 "00100000" // /* MW 5 */ + 8588 "00000000" // /* MW 4 */ + 8589 "11110000" // /* MW 3 */ + 8590 "00101100" // /* MW 2 */ + 8591 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 6 +.src_ref 7 "superkernels.cpp" 169 14 +.src_ref 1 "io_buffer_main.h" 324 51 first +.return_address + 8592 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8593 "00010000" // /* MW 9 */ + 8594 "00100000" // /* MW 8 */ + 8595 "00110010" // /* MW 7 */ + 8596 "11110011" // /* MW 6 */ + 8597 "00000001" // /* MW 5 */ + 8598 "00000000" // /* MW 4 */ + 8599 "11010000" // /* MW 3 */ + 8600 "11000110" // /* MW 2 */ + 8601 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 + 8602 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8603 "00000101" // /* MW 3 */ + 8604 "00100000" // /* MW 2 */ + 8605 "00010000" // /* MW 1 */ + 8606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8607 "00000000" // /* MW 1 */ + 8608 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8609 "00000000" // /* MW 1 */ + 8610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8611 "00000000" // /* MW 1 */ + 8612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8613 "00000000" // /* MW 1 */ + 8614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8615 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 8616 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8617 "00001000" // /* MW 3 */ + 8618 "01010001" // /* MW 2 */ + 8619 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 19 +.src_ref 1 "io_buffer_main.h" 327 40 first + 8620 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8621 "00010000" // /* MW 9 */ + 8622 "00110000" // /* MW 8 */ + 8623 "00110010" // /* MW 7 */ + 8624 "11110001" // /* MW 6 */ + 8625 "00000001" // /* MW 5 */ + 8626 "00000000" // /* MW 4 */ + 8627 "11010000" // /* MW 3 */ + 8628 "11001110" // /* MW 2 */ + 8629 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 6 first + 8630 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8631 "00110110" // /* MW 3 */ + 8632 "00000110" // /* MW 2 */ + 8633 "00000110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 19 + 8634 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8635 "01010110" // /* MW 3 */ + 8636 "00000110" // /* MW 2 */ + 8637 "00000010" // /* MW 1 */ + 8638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8639 "00000000" // /* MW 1 */ + 8640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8641 "00000000" // /* MW 1 */ + 8642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8643 "00000000" // /* MW 1 */ + 8644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8645 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 8646 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8647 "00110001" // /* MW 3 */ + 8648 "00100001" // /* MW 2 */ + 8649 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 8650 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8651 "00010001" // /* MW 3 */ + 8652 "11100110" // /* MW 2 */ + 8653 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 16 first + 8654 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8655 "00101000" // /* MW 3 */ + 8656 "01100001" // /* MW 2 */ + 8657 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 6 + 8658 "10000100" // JNZ r16, #8688 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8688 delay_slots=5 */ + 8659 "00000001" // /* MW 5 */ + 8660 "01000000" // /* MW 4 */ + 8661 "11111000" // /* MW 3 */ + 8662 "00010000" // /* MW 2 */ + 8663 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8665 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8667 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8669 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8671 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8673 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 169 14 + 8674 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8675 "00000001" // /* MW 3 */ + 8676 "00100000" // /* MW 2 */ + 8677 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 169 14 first + 8678 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8679 "00000000" // /* MW 9 */ + 8680 "00000000" // /* MW 8 */ + 8681 "00000000" // /* MW 7 */ + 8682 "10000000" // /* MW 6 */ + 8683 "00010001" // /* MW 5 */ + 8684 "00000110" // /* MW 4 */ + 8685 "11110110" // /* MW 3 */ + 8686 "00101100" // /* MW 2 */ + 8687 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 7 "superkernels.cpp" 171 + 8688 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8689 "00111001" // /* MW 3 */ + 8690 "11110100" // /* MW 2 */ + 8691 "00000111" // /* MW 1 */ + 8692 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8693 "00011001" // /* MW 3 */ + 8694 "11111011" // /* MW 2 */ + 8695 "00000111" // /* MW 1 */ + 8696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8697 "00000000" // /* MW 1 */ + 8698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8699 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 8700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8701 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 8702 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8703 "11110001" // /* MW 3 */ + 8704 "11111101" // /* MW 2 */ + 8705 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 8706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8707 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 171 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 8708 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 8709 "00000000" // /* MW 3 */ + 8710 "00101000" // /* MW 2 */ + 8711 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8712 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8713 "10100000" // /* MW 3 */ + 8714 "01100111" // /* MW 2 */ + 8715 "00011111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 171 +.delay_slot + 8716 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8717 "00000001" // /* MW 5 */ + 8718 "00000000" // /* MW 4 */ + 8719 "00000000" // /* MW 3 */ + 8720 "11111000" // /* MW 2 */ + 8721 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8723 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8725 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 8727 "00000000" // /* MW 1 */ +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.function setup _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.src_ref 3 "elementwise_unary.h" 124 first +.src_ref 3 "elementwise_unary.h" 126 24 first +.function_start + 8736 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8737 "00101110" // /* MW 3 */ + 8738 "00011100" // /* MW 2 */ + 8739 "00000001" // /* MW 1 */ + 8740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8741 "00000000" // /* MW 1 */ + 8742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8743 "00000000" // /* MW 1 */ + 8744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8745 "00000000" // /* MW 1 */ + 8746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8747 "00000000" // /* MW 1 */ + 8748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8749 "00000000" // /* MW 1 */ + 8750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8751 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 126 22 first + 8752 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8753 "00101001" // /* MW 3 */ + 8754 "00011100" // /* MW 2 */ + 8755 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 127 24 first + 8756 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8757 "00101110" // /* MW 3 */ + 8758 "00011100" // /* MW 2 */ + 8759 "00000001" // /* MW 1 */ + 8760 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8761 "00000000" // /* MW 1 */ + 8762 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8763 "00000000" // /* MW 1 */ + 8764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8765 "00000000" // /* MW 1 */ + 8766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8767 "00000000" // /* MW 1 */ + 8768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8769 "00000000" // /* MW 1 */ + 8770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8771 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 127 22 + 8772 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8773 "00101001" // /* MW 3 */ + 8774 "00011100" // /* MW 2 */ + 8775 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 128 24 first + 8776 "10011000" // LDA el0, [p1], #24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8777 "00101110" // /* MW 3 */ + 8778 "01101100" // /* MW 2 */ + 8779 "00000001" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 113 33 first + 8780 "10011000" // LDA.s16 r0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8781 "00010010" // /* MW 3 */ + 8782 "00000100" // /* MW 2 */ + 8783 "00000001" // /* MW 1 */ + 8784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8785 "00000000" // /* MW 1 */ + 8786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8787 "00000000" // /* MW 1 */ + 8788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8789 "00000000" // /* MW 1 */ + 8790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8791 "00000000" // /* MW 1 */ + 8792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8793 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 128 22 first + 8794 "10011000" // ST el0, [p0], #24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8795 "00101001" // /* MW 3 */ + 8796 "01101100" // /* MW 2 */ + 8797 "00001000" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 113 33 first + 8798 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8799 "00010111" // /* MW 3 */ + 8800 "00000100" // /* MW 2 */ + 8801 "00000000" // /* MW 1 */ + 8802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8803 "00000000" // /* MW 1 */ + 8804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8805 "00000000" // /* MW 1 */ + 8806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8807 "00000000" // /* MW 1 */ + 8808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8809 "00000000" // /* MW 1 */ + 8810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8811 "00000000" // /* MW 1 */ + 8812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8813 "00000000" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 114 33 first + 8814 "10011000" // LDA.s16 r0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8815 "00010010" // /* MW 3 */ + 8816 "00100100" // /* MW 2 */ + 8817 "00000001" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 114 33 + 8818 "00011000" // ST.s16 r0, [p0, #2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8819 "00010111" // /* MW 3 */ + 8820 "00010100" // /* MW 2 */ + 8821 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 130 4 first + 8822 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 8823 "00000000" // /* MW 3 */ + 8824 "00101000" // /* MW 2 */ + 8825 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8827 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8829 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8831 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8833 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv__end +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_end0 + 8835 "00000000" // /* MW 1 */ +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.function run _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_unary.h" 136 first +.src_ref 3 "elementwise_unary.h" 142 37 +.src_ref 3 "elementwise_unary.h" 154 8 first +.src_ref 3 "elementwise_unary.h" 171 19 +.function_start + 8848 "10110110" // MOVA dj0, #-34; VLDB x4, [p0], #64; MOVXM ls, #8976 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8849 "00010000" // /* MW 11 */ + 8850 "10001000" // /* MW 10 */ + 8851 "01111001" // /* MW 9 */ + 8852 "00001000" // /* MW 8 */ + 8853 "00000000" // /* MW 7 */ + 8854 "00000000" // /* MW 6 */ + 8855 "01101000" // /* MW 5 */ + 8856 "00111010" // /* MW 4 */ + 8857 "10000000" // /* MW 3 */ + 8858 "11000010" // /* MW 2 */ + 8859 "11111011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_unary.h" 142 78 +.src_ref 3 "elementwise_unary.h" 154 8 first +.src_ref 3 "elementwise_unary.h" 190 19 first + 8860 "10110110" // MOVA r17, #-6; VLDB x2, [p0], #64; MOVXM le, #9024 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8861 "00010000" // /* MW 11 */ + 8862 "10100000" // /* MW 10 */ + 8863 "10111001" // /* MW 9 */ + 8864 "00001001" // /* MW 8 */ + 8865 "00000000" // /* MW 7 */ + 8866 "00000000" // /* MW 6 */ + 8867 "01101000" // /* MW 5 */ + 8868 "00111001" // /* MW 4 */ + 8869 "00000000" // /* MW 3 */ + 8870 "01010001" // /* MW 2 */ + 8871 "11111111" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 136 + 8872 "11111000" // MOV r0, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8873 "11000000" // /* MW 3 */ + 8874 "00010100" // /* MW 2 */ + 8875 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 136 first + 8876 "00011000" // ADD.NC p2, r0, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8877 "00010000" // /* MW 3 */ + 8878 "01100000" // /* MW 2 */ + 8879 "00011010" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 103 16 first + 8880 "10011000" // LDA.s16 r2, [p2], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8881 "01010010" // /* MW 3 */ + 8882 "00011100" // /* MW 2 */ + 8883 "00000010" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 142 37 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8884 "10011000" // LDA r0, [p2, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8885 "00010110" // /* MW 3 */ + 8886 "00000000" // /* MW 2 */ + 8887 "00000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_unary.h" 171 19 first +.src_ref 8 "clip_impl.h" 104 16 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8888 "00111100" // LDA.s16 r1, [p2]; VLDB x4, [p0], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8889 "01101000" // /* MW 5 */ + 8890 "00111010" // /* MW 4 */ + 8891 "01010000" // /* MW 3 */ + 8892 "10000110" // /* MW 2 */ + 8893 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8895 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8897 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8899 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_unary.h" 190 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8900 "00011000" // VLDB x2, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8901 "10110100" // /* MW 3 */ + 8902 "00011100" // /* MW 2 */ + 8903 "00111000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8904 "11111000" // VBCST.16 x0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8905 "01110010" // /* MW 3 */ + 8906 "00001001" // /* MW 2 */ + 8907 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 3 "elementwise_unary.h" 142 78 first +.src_ref 3 "elementwise_unary.h" 171 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8908 "00111010" // VLDB x4, [p0], #64; LSHL r17, r0, r17; VMAX_LT.bf16 x5, r16, x4, x0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8909 "01111000" // /* MW 9 */ + 8910 "00110110" // /* MW 8 */ + 8911 "01010000" // /* MW 7 */ + 8912 "11101101" // /* MW 6 */ + 8913 "00011000" // /* MW 5 */ + 8914 "00000001" // /* MW 4 */ + 8915 "01101000" // /* MW 3 */ + 8916 "00111010" // /* MW 2 */ + 8917 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 154 8 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8918 "10011000" // ADD.NC lc, r17, #-3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8919 "11111110" // /* MW 3 */ + 8920 "01111000" // /* MW 2 */ + 8921 "00011101" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8922 "11111000" // VBCST.16 x1, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8923 "01110010" // /* MW 3 */ + 8924 "10000101" // /* MW 2 */ + 8925 "00011000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8926 "11111000" // VMIN_GE.bf16 x3, r16, x5, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8927 "10101100" // /* MW 3 */ + 8928 "10101000" // /* MW 2 */ + 8929 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 3 "elementwise_unary.h" 190 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8930 "01111110" // NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 8931 "01100000" // /* MW 13 */ + 8932 "00101011" // /* MW 12 */ + 8933 "00000000" // /* MW 11 */ + 8934 "11001111" // /* MW 10 */ + 8935 "00000110" // /* MW 9 */ + 8936 "00110001" // /* MW 8 */ + 8937 "00000000" // /* MW 7 */ + 8938 "00000000" // /* MW 6 */ + 8939 "01101000" // /* MW 5 */ + 8940 "00111001" // /* MW 4 */ + 8941 "11110000" // /* MW 3 */ + 8942 "00101100" // /* MW 2 */ + 8943 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8944 "11100001" // NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8945 "00000000" // /* MW 15 */ + 8946 "00000000" // /* MW 14 */ + 8947 "01111000" // /* MW 13 */ + 8948 "01010110" // /* MW 12 */ + 8949 "11011000" // /* MW 11 */ + 8950 "00000001" // /* MW 10 */ + 8951 "00000000" // /* MW 9 */ + 8952 "00000000" // /* MW 8 */ + 8953 "11010011" // /* MW 7 */ + 8954 "00011100" // /* MW 6 */ + 8955 "00100001" // /* MW 5 */ + 8956 "00000000" // /* MW 4 */ + 8957 "11110000" // /* MW 3 */ + 8958 "00101100" // /* MW 2 */ + 8959 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8960 "11100001" // NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8961 "00000000" // /* MW 15 */ + 8962 "00000000" // /* MW 14 */ + 8963 "01111000" // /* MW 13 */ + 8964 "00110110" // /* MW 12 */ + 8965 "01010000" // /* MW 11 */ + 8966 "00000001" // /* MW 10 */ + 8967 "00000000" // /* MW 9 */ + 8968 "00000000" // /* MW 8 */ + 8969 "01011011" // /* MW 7 */ + 8970 "00000001" // /* MW 6 */ + 8971 "00100000" // /* MW 5 */ + 8972 "00000000" // /* MW 4 */ + 8973 "11110000" // /* MW 3 */ + 8974 "00101100" // /* MW 2 */ + 8975 "00000000" // /* MW 1 */ +.label ZLS_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_128 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 171 19 first +.src_ref 3 "elementwise_unary.h" 176 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 8976 "11100001" // NOPA; VLDB x4, [p0], #64; VST x7, [p1], #64; NOPX; VMIN_GE.bf16 x3, r16, x5, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8977 "00000000" // /* MW 15 */ + 8978 "00000000" // /* MW 14 */ + 8979 "01111000" // /* MW 13 */ + 8980 "01010110" // /* MW 12 */ + 8981 "11010100" // /* MW 11 */ + 8982 "00000000" // /* MW 10 */ + 8983 "00000000" // /* MW 9 */ + 8984 "00000000" // /* MW 8 */ + 8985 "11010011" // /* MW 7 */ + 8986 "00011101" // /* MW 6 */ + 8987 "01101001" // /* MW 5 */ + 8988 "00111010" // /* MW 4 */ + 8989 "11110000" // /* MW 3 */ + 8990 "00101100" // /* MW 2 */ + 8991 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 3 "elementwise_unary.h" 190 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8992 "11100001" // NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8993 "00000000" // /* MW 15 */ + 8994 "00000000" // /* MW 14 */ + 8995 "01111000" // /* MW 13 */ + 8996 "00110110" // /* MW 12 */ + 8997 "10001000" // /* MW 11 */ + 8998 "00000001" // /* MW 10 */ + 8999 "00000000" // /* MW 9 */ + 9000 "00000000" // /* MW 8 */ + 9001 "01011011" // /* MW 7 */ + 9002 "00000001" // /* MW 6 */ + 9003 "01101000" // /* MW 5 */ + 9004 "00111001" // /* MW 4 */ + 9005 "11110000" // /* MW 3 */ + 9006 "00101100" // /* MW 2 */ + 9007 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9008 "11100001" // NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9009 "00000000" // /* MW 15 */ + 9010 "00000000" // /* MW 14 */ + 9011 "01111000" // /* MW 13 */ + 9012 "01010110" // /* MW 12 */ + 9013 "11011000" // /* MW 11 */ + 9014 "00000001" // /* MW 10 */ + 9015 "00000000" // /* MW 9 */ + 9016 "00000000" // /* MW 8 */ + 9017 "11010011" // /* MW 7 */ + 9018 "00011100" // /* MW 6 */ + 9019 "00100001" // /* MW 5 */ + 9020 "00000000" // /* MW 4 */ + 9021 "11110000" // /* MW 3 */ + 9022 "00101100" // /* MW 2 */ + 9023 "00000000" // /* MW 1 */ +.label ZLE_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_176 +.src_ref 4 "max_min.hpp" 20 104 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9024 "11100001" // NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9025 "00000000" // /* MW 15 */ + 9026 "00000000" // /* MW 14 */ + 9027 "01111000" // /* MW 13 */ + 9028 "00110110" // /* MW 12 */ + 9029 "01010000" // /* MW 11 */ + 9030 "00000001" // /* MW 10 */ + 9031 "00000000" // /* MW 9 */ + 9032 "00000000" // /* MW 8 */ + 9033 "01011011" // /* MW 7 */ + 9034 "00000001" // /* MW 6 */ + 9035 "00100000" // /* MW 5 */ + 9036 "00000000" // /* MW 4 */ + 9037 "11110000" // /* MW 3 */ + 9038 "00101100" // /* MW 2 */ + 9039 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 176 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 9040 "00000010" // VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9041 "01110000" // /* MW 7 */ + 9042 "01010110" // /* MW 6 */ + 9043 "11010100" // /* MW 5 */ + 9044 "00000000" // /* MW 4 */ + 9045 "01100000" // /* MW 3 */ + 9046 "10111010" // /* MW 2 */ + 9047 "00100011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9048 "11111000" // VMAX_LT.bf16 x6, r16, x2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9049 "01101100" // /* MW 3 */ + 9050 "00010000" // /* MW 2 */ + 9051 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 195 20 first + 9052 "00000010" // VST x3, [p1], #64; VMIN_GE.bf16 x7, r16, x6, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9053 "01110000" // /* MW 7 */ + 9054 "01010110" // /* MW 6 */ + 9055 "11011000" // /* MW 5 */ + 9056 "00000001" // /* MW 4 */ + 9057 "01100000" // /* MW 3 */ + 9058 "10011010" // /* MW 2 */ + 9059 "00100011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 3 "elementwise_unary.h" 158 4 first + 9060 "11100100" // RET lr; VMAX_LT.bf16 x5, r16, x4, x0 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 9061 "11011001" // /* MW 5 */ + 9062 "01000000" // /* MW 4 */ + 9063 "00000101" // /* MW 3 */ + 9064 "00000000" // /* MW 2 */ + 9065 "00000101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 176 20 first +.delay_slot + 9066 "00000010" // VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9067 "01110000" // /* MW 7 */ + 9068 "01010110" // /* MW 6 */ + 9069 "11010100" // /* MW 5 */ + 9070 "00000000" // /* MW 4 */ + 9071 "01100000" // /* MW 3 */ + 9072 "10111010" // /* MW 2 */ + 9073 "00100011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 9074 "11111000" // VMAX_LT.bf16 x6, r16, x2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9075 "01101100" // /* MW 3 */ + 9076 "00010000" // /* MW 2 */ + 9077 "00011011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.delay_slot + 9078 "11111000" // VMIN_GE.bf16 x7, r16, x6, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9079 "10101100" // /* MW 3 */ + 9080 "10110000" // /* MW 2 */ + 9081 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "elementwise_unary.h" 195 20 first +.delay_slot + 9082 "00011000" // VST x3, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9083 "11010011" // /* MW 3 */ + 9084 "00011100" // /* MW 2 */ + 9085 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "elementwise_unary.h" 176 20 first +.delay_slot + 9086 "00011000" // VST x7, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9087 "11010011" // /* MW 3 */ + 9088 "00011101" // /* MW 2 */ +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E__end +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_end0 + 9089 "00001001" // /* MW 1 */ +.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_clip1d _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 7 "superkernels.cpp" 176 first +.src_ref 7 "superkernels.cpp" 181 6 +.function_start + 9104 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9105 "10000000" // /* MW 5 */ + 9106 "11001000" // /* MW 4 */ + 9107 "11000110" // /* MW 3 */ + 9108 "00000111" // /* MW 2 */ + 9109 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 181 6 first + 9110 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9111 "11000001" // /* MW 5 */ + 9112 "10110101" // /* MW 4 */ + 9113 "11011000" // /* MW 3 */ + 9114 "11000010" // /* MW 2 */ + 9115 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 176 + 9116 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9117 "00000001" // /* MW 5 */ + 9118 "00000000" // /* MW 4 */ + 9119 "00000000" // /* MW 3 */ + 9120 "00001000" // /* MW 2 */ + 9121 "00000000" // /* MW 1 */ + 9122 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9123 "01110000" // /* MW 7 */ + 9124 "11010000" // /* MW 6 */ + 9125 "00001011" // /* MW 5 */ + 9126 "00000000" // /* MW 4 */ + 9127 "10110000" // /* MW 3 */ + 9128 "01100011" // /* MW 2 */ + 9129 "11111111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 178 11 + 9130 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9131 "00010001" // /* MW 9 */ + 9132 "00101000" // /* MW 8 */ + 9133 "00110010" // /* MW 7 */ + 9134 "11110011" // /* MW 6 */ + 9135 "00000001" // /* MW 5 */ + 9136 "00000000" // /* MW 4 */ + 9137 "10110000" // /* MW 3 */ + 9138 "10000010" // /* MW 2 */ + 9139 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 9140 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9141 "11000000" // /* MW 3 */ + 9142 "11010100" // /* MW 2 */ + 9143 "00011011" // /* MW 1 */ + 9144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9145 "00000000" // /* MW 1 */ + 9146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9147 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 181 6 +.src_ref 7 "superkernels.cpp" 181 16 + 9148 "10000100" // JNZ r16, #9312 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9312 delay_slots=5 */ + 9149 "00000001" // /* MW 5 */ + 9150 "01000000" // /* MW 4 */ + 9151 "00110000" // /* MW 3 */ + 9152 "00010010" // /* MW 2 */ + 9153 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 178 22 first +.delay_slot + 9154 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9155 "10010000" // /* MW 3 */ + 9156 "01100010" // /* MW 2 */ + 9157 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 178 30 +.delay_slot + 9158 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9159 "11111011" // /* MW 3 */ + 9160 "01100011" // /* MW 2 */ + 9161 "00010100" // /* MW 1 */ +.delay_slot + 9162 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9163 "00111101" // /* MW 3 */ + 9164 "11110100" // /* MW 2 */ + 9165 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 178 11 +.src_ref 1 "io_buffer_main.h" 125 25 +.delay_slot + 9166 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9167 "01110000" // /* MW 7 */ + 9168 "01100000" // /* MW 6 */ + 9169 "00110000" // /* MW 5 */ + 9170 "00000011" // /* MW 4 */ + 9171 "00110000" // /* MW 3 */ + 9172 "11000110" // /* MW 2 */ + 9173 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 184 4 +.src_ref 7 "superkernels.cpp" 195 2 +.delay_slot + 9174 "01000100" // MOVXM p0, #509376 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9175 "10000000" // /* MW 5 */ + 9176 "11001011" // /* MW 4 */ + 9177 "11000000" // /* MW 3 */ + 9178 "00000111" // /* MW 2 */ + 9179 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9180 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9181 "11010000" // /* MW 5 */ + 9182 "11001000" // /* MW 4 */ + 9183 "11000100" // /* MW 3 */ + 9184 "00000111" // /* MW 2 */ + 9185 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9186 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9187 "00010000" // /* MW 9 */ + 9188 "00110010" // /* MW 8 */ + 9189 "00110010" // /* MW 7 */ + 9190 "11110001" // /* MW 6 */ + 9191 "00000001" // /* MW 5 */ + 9192 "00000000" // /* MW 4 */ + 9193 "11100000" // /* MW 3 */ + 9194 "11000000" // /* MW 2 */ + 9195 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9197 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 184 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 9198 "00000100" // JL #8736 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8736 delay_slots=5 */ + 9199 "00000001" // /* MW 5 */ + 9200 "00000000" // /* MW 4 */ + 9201 "00010000" // /* MW 3 */ + 9202 "00010001" // /* MW 2 */ + 9203 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9205 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9207 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9208 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9209 "00110001" // /* MW 3 */ + 9210 "00100000" // /* MW 2 */ + 9211 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 9212 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9213 "00000101" // /* MW 3 */ + 9214 "00100000" // /* MW 2 */ + 9215 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 9216 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9217 "00000000" // /* MW 15 */ + 9218 "00000000" // /* MW 14 */ + 9219 "01111000" // /* MW 13 */ + 9220 "10100101" // /* MW 12 */ + 9221 "00000001" // /* MW 11 */ + 9222 "00000000" // /* MW 10 */ + 9223 "00000000" // /* MW 9 */ + 9224 "10000000" // /* MW 8 */ + 9225 "00010001" // /* MW 7 */ + 9226 "00000110" // /* MW 6 */ + 9227 "00100010" // /* MW 5 */ + 9228 "00000000" // /* MW 4 */ + 9229 "11110000" // /* MW 3 */ + 9230 "00101100" // /* MW 2 */ + 9231 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 188 18 +.return_address + 9232 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9233 "10100000" // /* MW 5 */ + 9234 "11001000" // /* MW 4 */ + 9235 "11000100" // /* MW 3 */ + 9236 "00000111" // /* MW 2 */ + 9237 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 188 18 first +.src_ref 7 "superkernels.cpp" 188 43 + 9238 "10111010" // LDA r16, [p2]; MOVXM p2, #509376 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9239 "00010000" // /* MW 9 */ + 9240 "11100000" // /* MW 8 */ + 9241 "00110010" // /* MW 7 */ + 9242 "11110001" // /* MW 6 */ + 9243 "00000001" // /* MW 5 */ + 9244 "00000000" // /* MW 4 */ + 9245 "11010000" // /* MW 3 */ + 9246 "11000010" // /* MW 2 */ + 9247 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 186 29 +.src_ref 7 "superkernels.cpp" 188 43 +.src_ref 7 "superkernels.cpp" 195 2 + 9248 "10111010" // LDA r17, [p2]; MOVXM p2, #509376 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9249 "00010000" // /* MW 9 */ + 9250 "11100000" // /* MW 8 */ + 9251 "00110010" // /* MW 7 */ + 9252 "11110001" // /* MW 6 */ + 9253 "00000001" // /* MW 5 */ + 9254 "00000000" // /* MW 4 */ + 9255 "11010000" // /* MW 3 */ + 9256 "11000110" // /* MW 2 */ + 9257 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 186 29 first +.src_ref 7 "superkernels.cpp" 188 16 +.src_ref 7 "superkernels.cpp" 193 47 + 9258 "10111010" // LDA.u16 r18, [p2, #8]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9259 "00010000" // /* MW 9 */ + 9260 "00101010" // /* MW 8 */ + 9261 "10110010" // /* MW 7 */ + 9262 "11110000" // /* MW 6 */ + 9263 "00000001" // /* MW 5 */ + 9264 "00000000" // /* MW 4 */ + 9265 "01010000" // /* MW 3 */ + 9266 "11001011" // /* MW 2 */ + 9267 "01001000" // /* MW 1 */ + 9268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9269 "00000000" // /* MW 1 */ + 9270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9271 "00000000" // /* MW 1 */ + 9272 "10000100" // J #9328 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9328 delay_slots=5 */ + 9273 "00000000" // /* MW 5 */ + 9274 "00000000" // /* MW 4 */ + 9275 "00111000" // /* MW 3 */ + 9276 "00010010" // /* MW 2 */ + 9277 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 186 13 +.delay_slot + 9278 "01000100" // MOVXM p0, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9279 "11000000" // /* MW 5 */ + 9280 "11001000" // /* MW 4 */ + 9281 "11000000" // /* MW 3 */ + 9282 "00000111" // /* MW 2 */ + 9283 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9285 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 188 27 first +.delay_slot + 9286 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9287 "00001111" // /* MW 3 */ + 9288 "01100001" // /* MW 2 */ + 9289 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 186 13 first +.delay_slot + 9290 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9291 "10100011" // /* MW 5 */ + 9292 "00001100" // /* MW 4 */ + 9293 "11110000" // /* MW 3 */ + 9294 "00101100" // /* MW 2 */ + 9295 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 188 16 first +.delay_slot + 9296 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9297 "00000000" // /* MW 15 */ + 9298 "00000000" // /* MW 14 */ + 9299 "01111000" // /* MW 13 */ + 9300 "10100101" // /* MW 12 */ + 9301 "00000001" // /* MW 11 */ + 9302 "00000000" // /* MW 10 */ + 9303 "00000000" // /* MW 9 */ + 9304 "10000000" // /* MW 8 */ + 9305 "00010001" // /* MW 7 */ + 9306 "00000110" // /* MW 6 */ + 9307 "00100001" // /* MW 5 */ + 9308 "00000000" // /* MW 4 */ + 9309 "11110000" // /* MW 3 */ + 9310 "00101100" // /* MW 2 */ + 9311 "00000000" // /* MW 1 */ +.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 7 "superkernels.cpp" 193 47 +.src_ref 7 "superkernels.cpp" 195 2 + 9312 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9313 "00000000" // /* MW 15 */ + 9314 "00000000" // /* MW 14 */ + 9315 "00010000" // /* MW 13 */ + 9316 "00101010" // /* MW 12 */ + 9317 "10110010" // /* MW 11 */ + 9318 "11110000" // /* MW 10 */ + 9319 "00000001" // /* MW 9 */ + 9320 "00000000" // /* MW 8 */ + 9321 "10001011" // /* MW 7 */ + 9322 "10000000" // /* MW 6 */ + 9323 "00100010" // /* MW 5 */ + 9324 "00000000" // /* MW 4 */ + 9325 "11110000" // /* MW 3 */ + 9326 "00101100" // /* MW 2 */ + 9327 "00000000" // /* MW 1 */ +.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 218 49 first + 9328 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9329 "00000000" // /* MW 7 */ + 9330 "11000011" // /* MW 6 */ + 9331 "10110011" // /* MW 5 */ + 9332 "00000011" // /* MW 4 */ + 9333 "01100000" // /* MW 3 */ + 9334 "10010001" // /* MW 2 */ + 9335 "01110011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 192 2 +.src_ref 1 "io_buffer_main.h" 218 49 + 9336 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9337 "00010000" // /* MW 9 */ + 9338 "00100000" // /* MW 8 */ + 9339 "00110010" // /* MW 7 */ + 9340 "11110000" // /* MW 6 */ + 9341 "00000001" // /* MW 5 */ + 9342 "00000000" // /* MW 4 */ + 9343 "11010000" // /* MW 3 */ + 9344 "11101110" // /* MW 2 */ + 9345 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 9346 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9347 "00010110" // /* MW 3 */ + 9348 "11111110" // /* MW 2 */ + 9349 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 9350 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9351 "00110110" // /* MW 3 */ + 9352 "11111110" // /* MW 2 */ + 9353 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 9354 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9355 "01010110" // /* MW 3 */ + 9356 "01000110" // /* MW 2 */ + 9357 "00000111" // /* MW 1 */ + 9358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9359 "00000000" // /* MW 1 */ + 9360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9361 "00000000" // /* MW 1 */ + 9362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9363 "00000000" // /* MW 1 */ + 9364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9365 "00000000" // /* MW 1 */ + 9366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9367 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 9368 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9369 "00000010" // /* MW 3 */ + 9370 "01100001" // /* MW 2 */ + 9371 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 9372 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9373 "00010001" // /* MW 3 */ + 9374 "00000110" // /* MW 2 */ + 9375 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 + 9376 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9377 "11111101" // /* MW 3 */ + 9378 "11100000" // /* MW 2 */ + 9379 "00010111" // /* MW 1 */ + 9380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9381 "00000000" // /* MW 1 */ + 9382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9383 "00000000" // /* MW 1 */ + 9384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9385 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 9386 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9387 "00001000" // /* MW 3 */ + 9388 "10010011" // /* MW 2 */ + 9389 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 193 45 + 9390 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9391 "10000001" // /* MW 5 */ + 9392 "10101101" // /* MW 4 */ + 9393 "10100111" // /* MW 3 */ + 9394 "00000000" // /* MW 2 */ + 9395 "00000100" // /* MW 1 */ + 9396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9397 "00000000" // /* MW 1 */ + 9398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9399 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 192 2 first + 9400 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9401 "00110110" // /* MW 3 */ + 9402 "00000110" // /* MW 2 */ + 9403 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 1 "io_buffer_main.h" 324 51 + 9404 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9405 "10000001" // /* MW 5 */ + 9406 "11011101" // /* MW 4 */ + 9407 "11011100" // /* MW 3 */ + 9408 "11001010" // /* MW 2 */ + 9409 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 193 47 first + 9410 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9411 "01110110" // /* MW 3 */ + 9412 "00000110" // /* MW 2 */ + 9413 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 9414 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9415 "10011110" // /* MW 3 */ + 9416 "01011100" // /* MW 2 */ + 9417 "00000111" // /* MW 1 */ + 9418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9419 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 195 2 first +.no_stack_arguments + 9420 "00000100" // JL #8848 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8848 delay_slots=5 */ + 9421 "00000001" // /* MW 5 */ + 9422 "00000000" // /* MW 4 */ + 9423 "01001000" // /* MW 3 */ + 9424 "00010001" // /* MW 2 */ + 9425 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9427 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 192 2 first +.delay_slot + 9428 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9429 "00000111" // /* MW 3 */ + 9430 "01100010" // /* MW 2 */ + 9431 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 192 2 +.delay_slot + 9432 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9433 "00110001" // /* MW 3 */ + 9434 "00000110" // /* MW 2 */ + 9435 "00001000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 193 45 first +.delay_slot + 9436 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9437 "00001101" // /* MW 3 */ + 9438 "11100001" // /* MW 2 */ + 9439 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 193 45 +.delay_slot + 9440 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9441 "00000000" // /* MW 15 */ + 9442 "00000000" // /* MW 14 */ + 9443 "10101000" // /* MW 13 */ + 9444 "10100000" // /* MW 12 */ + 9445 "00110100" // /* MW 11 */ + 9446 "00000000" // /* MW 10 */ + 9447 "00000000" // /* MW 9 */ + 9448 "00000000" // /* MW 8 */ + 9449 "01011011" // /* MW 7 */ + 9450 "00000001" // /* MW 6 */ + 9451 "00100000" // /* MW 5 */ + 9452 "00000000" // /* MW 4 */ + 9453 "11110000" // /* MW 3 */ + 9454 "00101100" // /* MW 2 */ + 9455 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 6 +.src_ref 7 "superkernels.cpp" 198 14 +.src_ref 1 "io_buffer_main.h" 324 51 first +.return_address + 9456 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9457 "00010000" // /* MW 9 */ + 9458 "00100000" // /* MW 8 */ + 9459 "00110010" // /* MW 7 */ + 9460 "11110011" // /* MW 6 */ + 9461 "00000001" // /* MW 5 */ + 9462 "00000000" // /* MW 4 */ + 9463 "11010000" // /* MW 3 */ + 9464 "11000110" // /* MW 2 */ + 9465 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 + 9466 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9467 "00000101" // /* MW 3 */ + 9468 "00100000" // /* MW 2 */ + 9469 "00010000" // /* MW 1 */ + 9470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9471 "00000000" // /* MW 1 */ + 9472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9473 "00000000" // /* MW 1 */ + 9474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9475 "00000000" // /* MW 1 */ + 9476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9477 "00000000" // /* MW 1 */ + 9478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9479 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 9480 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9481 "00001000" // /* MW 3 */ + 9482 "01010001" // /* MW 2 */ + 9483 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 19 +.src_ref 1 "io_buffer_main.h" 327 40 first + 9484 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9485 "00010000" // /* MW 9 */ + 9486 "00110000" // /* MW 8 */ + 9487 "00110010" // /* MW 7 */ + 9488 "11110001" // /* MW 6 */ + 9489 "00000001" // /* MW 5 */ + 9490 "00000000" // /* MW 4 */ + 9491 "11010000" // /* MW 3 */ + 9492 "11001110" // /* MW 2 */ + 9493 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 6 first + 9494 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9495 "00110110" // /* MW 3 */ + 9496 "00000110" // /* MW 2 */ + 9497 "00000110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 19 + 9498 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9499 "01010110" // /* MW 3 */ + 9500 "00000110" // /* MW 2 */ + 9501 "00000010" // /* MW 1 */ + 9502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9503 "00000000" // /* MW 1 */ + 9504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9505 "00000000" // /* MW 1 */ + 9506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9507 "00000000" // /* MW 1 */ + 9508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9509 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 9510 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9511 "00110001" // /* MW 3 */ + 9512 "00100001" // /* MW 2 */ + 9513 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 9514 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9515 "00010001" // /* MW 3 */ + 9516 "11100110" // /* MW 2 */ + 9517 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 16 first + 9518 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9519 "00101000" // /* MW 3 */ + 9520 "01100001" // /* MW 2 */ + 9521 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 6 + 9522 "10000100" // JNZ r16, #9552 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9552 delay_slots=5 */ + 9523 "00000001" // /* MW 5 */ + 9524 "01000000" // /* MW 4 */ + 9525 "10101000" // /* MW 3 */ + 9526 "00010010" // /* MW 2 */ + 9527 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9529 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9531 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9533 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9535 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9536 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9537 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 198 14 + 9538 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9539 "00000001" // /* MW 3 */ + 9540 "00100000" // /* MW 2 */ + 9541 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 198 14 first + 9542 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9543 "00000000" // /* MW 9 */ + 9544 "00000000" // /* MW 8 */ + 9545 "00000000" // /* MW 7 */ + 9546 "10000000" // /* MW 6 */ + 9547 "00010001" // /* MW 5 */ + 9548 "00000110" // /* MW 4 */ + 9549 "11110110" // /* MW 3 */ + 9550 "00101100" // /* MW 2 */ + 9551 "00000000" // /* MW 1 */ +.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 7 "superkernels.cpp" 200 + 9552 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9553 "00111001" // /* MW 3 */ + 9554 "11110100" // /* MW 2 */ + 9555 "00000111" // /* MW 1 */ + 9556 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9557 "00011001" // /* MW 3 */ + 9558 "11111011" // /* MW 2 */ + 9559 "00000111" // /* MW 1 */ + 9560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9561 "00000000" // /* MW 1 */ + 9562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9563 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 9564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9565 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 9566 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9567 "11110001" // /* MW 3 */ + 9568 "11111101" // /* MW 2 */ + 9569 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9571 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 200 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9572 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9573 "00000000" // /* MW 3 */ + 9574 "00101000" // /* MW 2 */ + 9575 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9576 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9577 "10100000" // /* MW 3 */ + 9578 "01100111" // /* MW 2 */ + 9579 "00011111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 200 +.delay_slot + 9580 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9581 "00000001" // /* MW 5 */ + 9582 "00000000" // /* MW 4 */ + 9583 "00000000" // /* MW 3 */ + 9584 "11111000" // /* MW 2 */ + 9585 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9587 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9589 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 9591 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv +.function shared_setup_backbone _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv +.src_ref 3 "elementwise_binary_shared.h" 205 first +.src_ref 3 "elementwise_binary_shared.h" 211 24 first +.src_ref 3 "elementwise_binary_shared.h" 216 36 +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.function_start + 9600 "10111010" // LDA el0, [p1], #4; MOVX r2, #256; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9601 "01011000" // /* MW 9 */ + 9602 "00000000" // /* MW 8 */ + 9603 "00001000" // /* MW 7 */ + 9604 "00001011" // /* MW 6 */ + 9605 "00100000" // /* MW 5 */ + 9606 "00001000" // /* MW 4 */ + 9607 "11010000" // /* MW 3 */ + 9608 "10000101" // /* MW 2 */ + 9609 "00100011" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 36 + 9610 "00011000" // MOVX r0, #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9611 "00000001" // /* MW 3 */ + 9612 "10000000" // /* MW 2 */ + 9613 "00010111" // /* MW 1 */ + 9614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9615 "00000000" // /* MW 1 */ + 9616 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9617 "00000000" // /* MW 1 */ + 9618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9619 "00000000" // /* MW 1 */ + 9620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9621 "00000000" // /* MW 1 */ + 9622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9623 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 211 22 first + 9624 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9625 "00101001" // /* MW 3 */ + 9626 "00011100" // /* MW 2 */ + 9627 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 212 24 first + 9628 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9629 "00101110" // /* MW 3 */ + 9630 "00011100" // /* MW 2 */ + 9631 "00000001" // /* MW 1 */ + 9632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9633 "00000000" // /* MW 1 */ + 9634 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9635 "00000000" // /* MW 1 */ + 9636 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9637 "00000000" // /* MW 1 */ + 9638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9639 "00000000" // /* MW 1 */ + 9640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9641 "00000000" // /* MW 1 */ + 9642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9643 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 212 22 + 9644 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9645 "00101001" // /* MW 3 */ + 9646 "00011100" // /* MW 2 */ + 9647 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 213 24 first + 9648 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9649 "00101110" // /* MW 3 */ + 9650 "00000100" // /* MW 2 */ + 9651 "00000001" // /* MW 1 */ + 9652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9653 "00000000" // /* MW 1 */ + 9654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9655 "00000000" // /* MW 1 */ + 9656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9657 "00000000" // /* MW 1 */ + 9658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9659 "00000000" // /* MW 1 */ + 9660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9661 "00000000" // /* MW 1 */ + 9662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9663 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 213 22 + 9664 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9665 "00101001" // /* MW 3 */ + 9666 "00011100" // /* MW 2 */ + 9667 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 214 24 first + 9668 "10011000" // LDA r3, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9669 "01110110" // /* MW 3 */ + 9670 "00010100" // /* MW 2 */ + 9671 "00000001" // /* MW 1 */ + 9672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9673 "00000000" // /* MW 1 */ + 9674 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9675 "00000000" // /* MW 1 */ + 9676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9677 "00000000" // /* MW 1 */ + 9678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9679 "00000000" // /* MW 1 */ + 9680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9681 "00000000" // /* MW 1 */ + 9682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9683 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 214 22 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9684 "10011000" // ST r3, [p0], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9685 "01110001" // /* MW 3 */ + 9686 "01001100" // /* MW 2 */ + 9687 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 34 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9688 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9689 "00010111" // /* MW 3 */ + 9690 "00000100" // /* MW 2 */ + 9691 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 217 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9692 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9693 "00000000" // /* MW 3 */ + 9694 "00101000" // /* MW 2 */ + 9695 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9696 "01000100" // MOVXM r1, #65280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9697 "00000000" // /* MW 5 */ + 9698 "10111110" // /* MW 4 */ + 9699 "11110000" // /* MW 3 */ + 9700 "00000000" // /* MW 2 */ + 9701 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9702 "10011000" // AND r1, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9703 "00010100" // /* MW 3 */ + 9704 "11000010" // /* MW 2 */ + 9705 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9706 "10011000" // EQ r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9707 "00100111" // /* MW 3 */ + 9708 "01110110" // /* MW 2 */ + 9709 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 36 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9710 "00011000" // SEL.EQZ r0, r0, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9711 "10000010" // /* MW 3 */ + 9712 "00000001" // /* MW 2 */ + 9713 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_end0 + 9715 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv +.function setup _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv +.src_ref 3 "elementwise_binary_shared.h" 219 +.src_ref 3 "elementwise_binary_shared.h" 219 first +.function_start + 9728 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9729 "00000001" // /* MW 5 */ + 9730 "00000000" // /* MW 4 */ + 9731 "00000000" // /* MW 3 */ + 9732 "00001000" // /* MW 2 */ + 9733 "00000000" // /* MW 1 */ + 9734 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9735 "00111101" // /* MW 3 */ + 9736 "11111000" // /* MW 2 */ + 9737 "00001111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 220 8 first +.no_stack_arguments + 9738 "00000100" // JL #9600 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9600 delay_slots=5 */ + 9739 "00000001" // /* MW 5 */ + 9740 "00000000" // /* MW 4 */ + 9741 "11000000" // /* MW 3 */ + 9742 "00010010" // /* MW 2 */ + 9743 "00000000" // /* MW 1 */ +.delay_slot + 9744 "10011000" // ST p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9745 "10011101" // /* MW 3 */ + 9746 "11111111" // /* MW 2 */ + 9747 "00001111" // /* MW 1 */ +.src_ref 8 "mul_impl.h" 193 25 +.delay_slot + 9748 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9749 "11000000" // /* MW 3 */ + 9750 "01100000" // /* MW 2 */ + 9751 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9753 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9755 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9756 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9757 "01100111" // /* MW 3 */ + 9758 "00000001" // /* MW 2 */ + 9759 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 222 4 +.return_address + 9760 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9761 "00111001" // /* MW 3 */ + 9762 "11111000" // /* MW 2 */ + 9763 "00000111" // /* MW 1 */ + 9764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9765 "00000000" // /* MW 1 */ + 9766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9767 "00000000" // /* MW 1 */ + 9768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9769 "00000000" // /* MW 1 */ + 9770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9771 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9773 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9774 "00011000" // LDA p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9775 "10011001" // /* MW 3 */ + 9776 "11111111" // /* MW 2 */ + 9777 "00000111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 222 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9778 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9779 "00000000" // /* MW 3 */ + 9780 "00101000" // /* MW 2 */ + 9781 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9783 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9785 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9787 "00000000" // /* MW 1 */ +.src_ref 8 "mul_impl.h" 193 25 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9788 "00011000" // MOVX r16, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9789 "00001001" // /* MW 3 */ + 9790 "00100000" // /* MW 2 */ + 9791 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 222 4 +.src_ref 8 "mul_impl.h" 193 25 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9792 "00111010" // ST r16, [p7, #16]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9793 "01110001" // /* MW 9 */ + 9794 "00000000" // /* MW 8 */ + 9795 "00000000" // /* MW 7 */ + 9796 "00000000" // /* MW 6 */ + 9797 "11111110" // /* MW 5 */ + 9798 "00111111" // /* MW 4 */ + 9799 "00110000" // /* MW 3 */ + 9800 "11000010" // /* MW 2 */ +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + 9801 "11101000" // /* MW 1 */ +.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE +.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_begin0 +.function shared_run_backbone _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE +.src_ref 3 "elementwise_binary_shared.h" 107 first +.src_ref 3 "elementwise_binary_shared.h" 119 37 +.src_ref 3 "elementwise_binary_shared.h" 126 34 +.src_ref 3 "elementwise_binary_shared.h" 131 19 +.function_start + 9808 "11111000" // MOV r0, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9809 "11000000" // /* MW 3 */ + 9810 "00010110" // /* MW 2 */ + 9811 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 119 37 first + 9812 "00011000" // ADD.NC p3, r0, #14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9813 "00000111" // /* MW 3 */ + 9814 "01100000" // /* MW 2 */ + 9815 "00011011" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 122 22 first + 9816 "10011000" // LDA.s16 r2, [p3], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9817 "01010010" // /* MW 3 */ + 9818 "00011100" // /* MW 2 */ + 9819 "00000011" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 124 15 first + 9820 "10011000" // LDA r4, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9821 "10010110" // /* MW 3 */ + 9822 "00000100" // /* MW 2 */ + 9823 "00000011" // /* MW 1 */ + 9824 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9825 "00000000" // /* MW 1 */ + 9826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9827 "00000000" // /* MW 1 */ + 9828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9829 "00000000" // /* MW 1 */ + 9830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9831 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 124 26 + 9832 "00011000" // MOVX r3, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9833 "00001001" // /* MW 3 */ + 9834 "00000110" // /* MW 2 */ + 9835 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 107 + 9836 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9837 "00000001" // /* MW 5 */ + 9838 "00000000" // /* MW 4 */ + 9839 "00000000" // /* MW 3 */ + 9840 "00010000" // /* MW 2 */ + 9841 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 124 26 + 9842 "10011000" // LTU r3, r3, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9843 "01001100" // /* MW 3 */ + 9844 "11000110" // /* MW 2 */ + 9845 "00010000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 65 25 +.src_ref 3 "elementwise_binary_shared.h" 124 8 + 9846 "10111010" // MOVA r1, #0; JNZ r3, #10000 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10000 delay_slots=5 */ + 9847 "01100000" // /* MW 9 */ + 9848 "00000000" // /* MW 8 */ + 9849 "00010000" // /* MW 7 */ + 9850 "11100010" // /* MW 6 */ + 9851 "00000100" // /* MW 5 */ + 9852 "00000110" // /* MW 4 */ + 9853 "00000000" // /* MW 3 */ + 9854 "00000001" // /* MW 2 */ + 9855 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 65 25 first +.delay_slot + 9856 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9857 "01110010" // /* MW 3 */ + 9858 "00000101" // /* MW 2 */ + 9859 "00011000" // /* MW 1 */ +.delay_slot + 9860 "11111000" // MOV r1, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9861 "11000000" // /* MW 3 */ + 9862 "01011110" // /* MW 2 */ + 9863 "00011000" // /* MW 1 */ +.delay_slot + 9864 "11111000" // MOV p7, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9865 "11100000" // /* MW 3 */ + 9866 "01100101" // /* MW 2 */ + 9867 "00011111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.delay_slot + 9868 "11110100" // PADDB [p7], #-64; MOV p5, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9869 "10000001" // /* MW 5 */ + 9870 "11011101" // /* MW 4 */ + 9871 "00001010" // /* MW 3 */ + 9872 "11110010" // /* MW 2 */ + 9873 "11111111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 119 37 first +.delay_slot + 9874 "00011000" // VST x0, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9875 "00010011" // /* MW 3 */ + 9876 "00000100" // /* MW 2 */ + 9877 "00001111" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first +.src_ref 3 "elementwise_binary_shared.h" 126 34 +.src_ref 3 "elementwise_binary_shared.h" 126 34 +.src_ref 3 "elementwise_binary_shared.h" 131 19 +.src_ref 3 "elementwise_binary_shared.h" 131 19 + 9878 "10111010" // MOVA dj0, #12; MOVS p4, r0; VBCST.16 x0, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9879 "01110010" // /* MW 9 */ + 9880 "10111001" // /* MW 8 */ + 9881 "00000100" // /* MW 7 */ + 9882 "00000000" // /* MW 6 */ + 9883 "00001011" // /* MW 5 */ + 9884 "10000000" // /* MW 4 */ + 9885 "10000100" // /* MW 3 */ + 9886 "10000010" // /* MW 2 */ + 9887 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 126 34 first +.src_ref 3 "elementwise_binary_shared.h" 131 19 first +.src_ref 3 "elementwise_binary_shared.h" 171 16 + 9888 "01010100" // LDA.u8 r0, [p4, dj0]; MOV m2, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9889 "00000001" // /* MW 5 */ + 9890 "00000001" // /* MW 4 */ + 9891 "01010100" // /* MW 3 */ + 9892 "00000001" // /* MW 2 */ + 9893 "10000000" // /* MW 1 */ + 9894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9895 "00000000" // /* MW 1 */ + 9896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9897 "00000000" // /* MW 1 */ + 9898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9899 "00000000" // /* MW 1 */ + 9900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9901 "00000000" // /* MW 1 */ + 9902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9903 "00000000" // /* MW 1 */ + 9904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9905 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 131 12 +.src_ref 3 "elementwise_binary_shared.h" 131 35 + 9906 "10000100" // JNZ r0, #9952 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9952 delay_slots=5 */ + 9907 "00000001" // /* MW 5 */ + 9908 "01000000" // /* MW 4 */ + 9909 "01110000" // /* MW 3 */ + 9910 "00010011" // /* MW 2 */ + 9911 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary_shared.h" 173 18 +.delay_slot + 9912 "10111000" // MOV m0, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9913 "00000000" // /* MW 3 */ + 9914 "00000000" // /* MW 2 */ + 9915 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 166 31 +.delay_slot + 9916 "01000100" // MOVXM p4, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9917 "11010000" // /* MW 5 */ + 9918 "11001000" // /* MW 4 */ + 9919 "11001000" // /* MW 3 */ + 9920 "00000111" // /* MW 2 */ + 9921 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9923 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9925 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9927 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 169 16 + 9928 "10111010" // MOVA m1, #0; J #9968 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=9968 delay_slots=5 */ + 9929 "00100000" // /* MW 9 */ + 9930 "00000000" // /* MW 8 */ + 9931 "00000000" // /* MW 7 */ + 9932 "11011110" // /* MW 6 */ + 9933 "00000100" // /* MW 5 */ + 9934 "00000000" // /* MW 4 */ + 9935 "10000000" // /* MW 3 */ + 9936 "00000100" // /* MW 2 */ + 9937 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9939 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9941 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9943 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9945 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.delay_slot + 9946 "00001100" // NOPA; VST x0, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9947 "00100110" // /* MW 5 */ + 9948 "00001000" // /* MW 4 */ + 9949 "11110000" // /* MW 3 */ + 9950 "00101100" // /* MW 2 */ + 9951 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_144 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 169 16 + 9952 "10111000" // MOV m1, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9953 "10000000" // /* MW 3 */ + 9954 "00000000" // /* MW 2 */ + 9955 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "elementwise_binary_shared.h" 171 16 + 9956 "11110110" // NOPA; NOPB; VST x0, [p1]; MOV m2, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9957 "01010000" // /* MW 11 */ + 9958 "00000000" // /* MW 10 */ + 9959 "00000000" // /* MW 9 */ + 9960 "00000001" // /* MW 8 */ + 9961 "00010011" // /* MW 7 */ + 9962 "00000100" // /* MW 6 */ + 9963 "00100001" // /* MW 5 */ + 9964 "00000000" // /* MW 4 */ + 9965 "11110000" // /* MW 3 */ + 9966 "00101100" // /* MW 2 */ + 9967 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_160 + 9968 "10000100" // J #10128 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10128 delay_slots=5 */ + 9969 "00000000" // /* MW 5 */ + 9970 "00000000" // /* MW 4 */ + 9971 "11001000" // /* MW 3 */ + 9972 "00010011" // /* MW 2 */ + 9973 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary_shared.h" 169 16 +.src_ref 3 "elementwise_binary_shared.h" 173 18 +.delay_slot + 9974 "00000010" // MOVS p0, p7; MOV p7, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9975 "01110000" // /* MW 7 */ + 9976 "01100000" // /* MW 6 */ + 9977 "10110000" // /* MW 5 */ + 9978 "00000011" // /* MW 4 */ + 9979 "01100000" // /* MW 3 */ + 9980 "10010001" // /* MW 2 */ + 9981 "00010011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9983 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9985 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9987 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9988 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9989 "10000001" // /* MW 11 */ + 9990 "10101101" // /* MW 10 */ + 9991 "00000000" // /* MW 9 */ + 9992 "00000000" // /* MW 8 */ + 9993 "00000000" // /* MW 7 */ + 9994 "00000000" // /* MW 6 */ + 9995 "00100000" // /* MW 5 */ + 9996 "00000000" // /* MW 4 */ + 9997 "11110000" // /* MW 3 */ + 9998 "00101100" // /* MW 2 */ + 9999 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_192 +.src_ref 3 "elementwise_binary_shared.h" 150 97 + 10000 "00011000" // MOVX r2, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10001 "00001101" // /* MW 3 */ + 10002 "00000100" // /* MW 2 */ + 10003 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 97 first + 10004 "10011000" // EQ r2, r2, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10005 "01000111" // /* MW 3 */ + 10006 "10000100" // /* MW 2 */ + 10007 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 + 10008 "10000100" // JNZ r2, #10048 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10048 delay_slots=5 */ + 10009 "00000001" // /* MW 5 */ + 10010 "01000000" // /* MW 4 */ + 10011 "10100000" // /* MW 3 */ + 10012 "00010011" // /* MW 2 */ + 10013 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.delay_slot + 10014 "01000100" // MOVXM r0, #1065353216 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10015 "00000000" // /* MW 5 */ + 10016 "00100000" // /* MW 4 */ + 10017 "00000000" // /* MW 3 */ + 10018 "10000000" // /* MW 2 */ + 10019 "00111111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.src_ref 3 "elementwise_binary_shared.h" 166 31 +.delay_slot + 10020 "01000100" // MOVXM p4, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10021 "11010000" // /* MW 5 */ + 10022 "11001000" // /* MW 4 */ + 10023 "11001000" // /* MW 3 */ + 10024 "00000111" // /* MW 2 */ + 10025 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10027 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10029 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10031 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 10032 "11100001" // NOPA; NOPB; NOPS; MOVXM r0, #-1082130432; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10033 "00000000" // /* MW 15 */ + 10034 "00000000" // /* MW 14 */ + 10035 "00010000" // /* MW 13 */ + 10036 "00000000" // /* MW 12 */ + 10037 "00001000" // /* MW 11 */ + 10038 "00000000" // /* MW 10 */ + 10039 "11100000" // /* MW 9 */ + 10040 "00101111" // /* MW 8 */ + 10041 "01011011" // /* MW 7 */ + 10042 "00000001" // /* MW 6 */ + 10043 "00100000" // /* MW 5 */ + 10044 "00000000" // /* MW 4 */ + 10045 "11110000" // /* MW 3 */ + 10046 "00101100" // /* MW 2 */ + 10047 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_240 +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10048 "10111010" // LDA.s8 r0, [p4]; MOVX vaddSign0, #1; MOV dj0, #-66 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10049 "01011000" // /* MW 9 */ + 10050 "10111110" // /* MW 8 */ + 10051 "01000111" // /* MW 7 */ + 10052 "00000000" // /* MW 6 */ + 10053 "11010010" // /* MW 5 */ + 10054 "00000010" // /* MW 4 */ + 10055 "01010000" // /* MW 3 */ + 10056 "10000000" // /* MW 2 */ + 10057 "10000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary_shared.h" 173 18 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10058 "10111000" // MOV m0, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10059 "10000000" // /* MW 3 */ + 10060 "00000000" // /* MW 2 */ + 10061 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 169 16 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10062 "10111000" // MOV m1, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10063 "00000000" // /* MW 3 */ + 10064 "00000000" // /* MW 2 */ + 10065 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 171 16 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10066 "10111000" // MOV m2, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10067 "10000000" // /* MW 3 */ + 10068 "00000000" // /* MW 2 */ + 10069 "00011010" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10071 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10072 "01111000" // VINSERT.32 x0, x0, #0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10073 "00010001" // /* MW 3 */ + 10074 "00000000" // /* MW 2 */ + 10075 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10076 "11010100" // ST.s16 r0, [p5, dj0]; VMOV bmll1, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10077 "00100101" // /* MW 5 */ + 10078 "00000001" // /* MW 4 */ + 10079 "11100010" // /* MW 3 */ + 10080 "00000010" // /* MW 2 */ + 10081 "10100000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10082 "00011000" // MOVX crRnd, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10083 "10000000" // /* MW 3 */ + 10084 "00111010" // /* MW 2 */ + 10085 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10086 "00011000" // VCONV.bf16.fp32 wl0, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10087 "10010110" // /* MW 3 */ + 10088 "01000000" // /* MW 2 */ + 10089 "00001000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10091 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10092 "10111000" // VEXTRACT.16 r0, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10093 "00000001" // /* MW 3 */ + 10094 "00000001" // /* MW 2 */ + 10095 "00011000" // /* MW 1 */ + 10096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10097 "00000000" // /* MW 1 */ + 10098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10099 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 + 10100 "10011000" // LDA.s16 r0, [p5, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10101 "00010010" // /* MW 3 */ + 10102 "00000000" // /* MW 2 */ + 10103 "00000101" // /* MW 1 */ + 10104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10105 "00000000" // /* MW 1 */ + 10106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10107 "00000000" // /* MW 1 */ + 10108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10109 "00000000" // /* MW 1 */ + 10110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10111 "00000000" // /* MW 1 */ + 10112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10113 "00000000" // /* MW 1 */ + 10114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10115 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first + 10116 "11111000" // VBCST.16 x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10117 "01110010" // /* MW 3 */ + 10118 "00000001" // /* MW 2 */ + 10119 "00011000" // /* MW 1 */ + 10120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10121 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first + 10122 "00001100" // NOPA; VST x0, [sp, #-64] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10123 "01100110" // /* MW 5 */ + 10124 "11111000" // /* MW 4 */ + 10125 "11111111" // /* MW 3 */ + 10126 "00101100" // /* MW 2 */ + 10127 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_320 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary_shared.h" 166 4 first +.src_ref 3 "elementwise_binary_shared.h" 166 31 first +.src_ref 3 "elementwise_binary_shared.h" 169 16 first + 10128 "10110110" // LDA r2, [p3, #-16]; VLDB x1, [p7], m1; MOVXM ls, #10240 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10129 "00010000" // /* MW 11 */ + 10130 "00000000" // /* MW 10 */ + 10131 "01111100" // /* MW 9 */ + 10132 "00001000" // /* MW 8 */ + 10133 "00000000" // /* MW 7 */ + 10134 "00000000" // /* MW 6 */ + 10135 "11101000" // /* MW 5 */ + 10136 "01010000" // /* MW 4 */ + 10137 "11011110" // /* MW 3 */ + 10138 "10001010" // /* MW 2 */ + 10139 "01111000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 166 4 +.src_ref 3 "elementwise_binary_shared.h" 166 31 +.src_ref 3 "elementwise_binary_shared.h" 171 16 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 10140 "10110110" // MOVA r3, #-5; VLDB x0, [p1], m2; MOVXM le, #10288 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10141 "00010000" // /* MW 11 */ + 10142 "00011000" // /* MW 10 */ + 10143 "10111100" // /* MW 9 */ + 10144 "00001001" // /* MW 8 */ + 10145 "00000000" // /* MW 7 */ + 10146 "00000000" // /* MW 6 */ + 10147 "01101000" // /* MW 5 */ + 10148 "10010000" // /* MW 4 */ + 10149 "00000010" // /* MW 3 */ + 10150 "01100011" // /* MW 2 */ + 10151 "11111111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary_shared.h" 169 16 first +.src_ref 3 "elementwise_binary_shared.h" 173 18 first +.src_ref 3 "elementwise_binary_shared.h" 177 44 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10152 "00010010" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;VLDB x1, [p7], m1; MOVX r0, #60 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10153 "11110001" // /* MW 7 */ + 10154 "00000000" // /* MW 6 */ + 10155 "11101000" // /* MW 5 */ + 10156 "01010000" // /* MW 4 */ + 10157 "01111110" // /* MW 3 */ + 10158 "00000101" // /* MW 2 */ + 10159 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 166 31 first +.src_ref 3 "elementwise_binary_shared.h" 171 16 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10160 "00111100" // LDA.s8 r4, [p4]; VLDB x0, [p1], m2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10161 "01101000" // /* MW 5 */ + 10162 "10010000" // /* MW 4 */ + 10163 "01010010" // /* MW 3 */ + 10164 "10010000" // /* MW 2 */ + 10165 "10000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10167 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary_shared.h" 173 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10168 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10169 "00101011" // /* MW 3 */ + 10170 "00001000" // /* MW 2 */ + 10171 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10173 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 166 31 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10174 "10011000" // LSHL r2, r2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10175 "00111101" // /* MW 3 */ + 10176 "10000100" // /* MW 2 */ + 10177 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 166 4 +.src_ref 3 "elementwise_binary_shared.h" 177 44 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10178 "01100010" // ADD.NC lc, r2, #-3; VMAC.f dm1, dm0, x1, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10179 "00000001" // /* MW 7 */ + 10180 "00000010" // /* MW 6 */ + 10181 "00000001" // /* MW 5 */ + 10182 "10000110" // /* MW 4 */ + 10183 "01111110" // /* MW 3 */ + 10184 "01110001" // /* MW 2 */ + 10185 "00000101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary_shared.h" 169 16 first +.src_ref 3 "elementwise_binary_shared.h" 171 16 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10186 "00111100" // VLDA x0, [p1], m2; VLDB x1, [p7], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10187 "11101000" // /* MW 5 */ + 10188 "01010000" // /* MW 4 */ + 10189 "01111110" // /* MW 3 */ + 10190 "00000011" // /* MW 2 */ + 10191 "00101001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary_shared.h" 173 18 first +.src_ref 3 "elementwise_binary_shared.h" 185 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10192 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; MOVX crRnd, r4; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10193 "00000000" // /* MW 15 */ + 10194 "00000000" // /* MW 14 */ + 10195 "01111000" // /* MW 13 */ + 10196 "10100101" // /* MW 12 */ + 10197 "00000001" // /* MW 11 */ + 10198 "00000000" // /* MW 10 */ + 10199 "11010100" // /* MW 9 */ + 10200 "00001001" // /* MW 8 */ + 10201 "01011011" // /* MW 7 */ + 10202 "00000001" // /* MW 6 */ + 10203 "00100000" // /* MW 5 */ + 10204 "00000000" // /* MW 4 */ + 10205 "01110000" // /* MW 3 */ + 10206 "00000101" // /* MW 2 */ + 10207 "00000001" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10208 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10209 "00000000" // /* MW 15 */ + 10210 "00000000" // /* MW 14 */ + 10211 "01111000" // /* MW 13 */ + 10212 "10100101" // /* MW 12 */ + 10213 "00000001" // /* MW 11 */ + 10214 "00000000" // /* MW 10 */ + 10215 "00000000" // /* MW 9 */ + 10216 "00000000" // /* MW 8 */ + 10217 "01011011" // /* MW 7 */ + 10218 "00000001" // /* MW 6 */ + 10219 "00100000" // /* MW 5 */ + 10220 "00000000" // /* MW 4 */ + 10221 "11110000" // /* MW 3 */ + 10222 "00101100" // /* MW 2 */ + 10223 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 177 44 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10224 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10225 "00010000" // /* MW 15 */ + 10226 "00001000" // /* MW 14 */ + 10227 "01111000" // /* MW 13 */ + 10228 "10100101" // /* MW 12 */ + 10229 "00000001" // /* MW 11 */ + 10230 "00000000" // /* MW 10 */ + 10231 "00000000" // /* MW 9 */ + 10232 "00000000" // /* MW 8 */ + 10233 "01011011" // /* MW 7 */ + 10234 "00000001" // /* MW 6 */ + 10235 "00100000" // /* MW 5 */ + 10236 "00000000" // /* MW 4 */ + 10237 "11110000" // /* MW 3 */ + 10238 "00101100" // /* MW 2 */ + 10239 "00000000" // /* MW 1 */ +.label ZLS_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_432 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary_shared.h" 169 16 first +.src_ref 3 "elementwise_binary_shared.h" 171 16 first +.begin_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 10240 "11100001" // VLDA x0, [p1], m2; VLDB x1, [p7], m1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10241 "00000000" // /* MW 15 */ + 10242 "00000000" // /* MW 14 */ + 10243 "01111000" // /* MW 13 */ + 10244 "10100101" // /* MW 12 */ + 10245 "00000001" // /* MW 11 */ + 10246 "00000000" // /* MW 10 */ + 10247 "00000000" // /* MW 9 */ + 10248 "00000000" // /* MW 8 */ + 10249 "01011011" // /* MW 7 */ + 10250 "00000001" // /* MW 6 */ + 10251 "11101000" // /* MW 5 */ + 10252 "01010000" // /* MW 4 */ + 10253 "01111110" // /* MW 3 */ + 10254 "00000011" // /* MW 2 */ + 10255 "00101001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary_shared.h" 173 18 first +.src_ref 3 "elementwise_binary_shared.h" 185 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10256 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10257 "00000000" // /* MW 15 */ + 10258 "00000000" // /* MW 14 */ + 10259 "01111000" // /* MW 13 */ + 10260 "10100101" // /* MW 12 */ + 10261 "00000001" // /* MW 11 */ + 10262 "00000000" // /* MW 10 */ + 10263 "00000000" // /* MW 9 */ + 10264 "00000000" // /* MW 8 */ + 10265 "10100011" // /* MW 7 */ + 10266 "00011100" // /* MW 6 */ + 10267 "00100010" // /* MW 5 */ + 10268 "00000000" // /* MW 4 */ + 10269 "01110000" // /* MW 3 */ + 10270 "00000101" // /* MW 2 */ + 10271 "00000001" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10272 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10273 "00000000" // /* MW 15 */ + 10274 "00000000" // /* MW 14 */ + 10275 "01111000" // /* MW 13 */ + 10276 "10100101" // /* MW 12 */ + 10277 "00000001" // /* MW 11 */ + 10278 "00000000" // /* MW 10 */ + 10279 "00000000" // /* MW 9 */ + 10280 "00000000" // /* MW 8 */ + 10281 "01011011" // /* MW 7 */ + 10282 "00000001" // /* MW 6 */ + 10283 "00100000" // /* MW 5 */ + 10284 "00000000" // /* MW 4 */ + 10285 "11110000" // /* MW 3 */ + 10286 "00101100" // /* MW 2 */ + 10287 "00000000" // /* MW 1 */ +.label ZLE_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_480 +.src_ref 3 "elementwise_binary_shared.h" 177 44 first +.end_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10288 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10289 "00010000" // /* MW 15 */ + 10290 "00001000" // /* MW 14 */ + 10291 "01111000" // /* MW 13 */ + 10292 "10100101" // /* MW 12 */ + 10293 "00000001" // /* MW 11 */ + 10294 "00000000" // /* MW 10 */ + 10295 "00000000" // /* MW 9 */ + 10296 "00000000" // /* MW 8 */ + 10297 "01011011" // /* MW 7 */ + 10298 "00000001" // /* MW 6 */ + 10299 "00100000" // /* MW 5 */ + 10300 "00000000" // /* MW 4 */ + 10301 "11110000" // /* MW 3 */ + 10302 "00101100" // /* MW 2 */ + 10303 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 187 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 10304 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10305 "00000001" // /* MW 5 */ + 10306 "00000000" // /* MW 4 */ + 10307 "00000000" // /* MW 3 */ + 10308 "11110000" // /* MW 2 */ + 10309 "11111111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary_shared.h" 185 18 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10310 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10311 "10100011" // /* MW 3 */ + 10312 "00011100" // /* MW 2 */ + 10313 "00001010" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 10314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10315 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 177 44 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 10316 "01001000" // VMAC.f dm1, dm0, x1, x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10317 "00000001" // /* MW 3 */ + 10318 "00000010" // /* MW 2 */ + 10319 "00000001" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 10320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10321 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 187 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 10322 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10323 "00000000" // /* MW 3 */ + 10324 "00101000" // /* MW 2 */ + 10325 "00010000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary_shared.h" 185 18 first +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10326 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10327 "10100011" // /* MW 3 */ + 10328 "00011100" // /* MW 2 */ + 10329 "00001010" // /* MW 1 */ +.delay_slot + 10330 "11111000" // MOV p7, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10331 "10100000" // /* MW 3 */ + 10332 "01100000" // /* MW 2 */ + 10333 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10335 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary_shared.h" 185 18 +.delay_slot + 10336 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10337 "10100011" // /* MW 3 */ + 10338 "00011100" // /* MW 2 */ + 10339 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE__end +.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_end0 + 10341 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E +.function run _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E +.src_ref 4 "vector.hpp" 538 13 +.src_ref 3 "elementwise_binary_shared.h" 237 first +.src_ref 3 "elementwise_binary_shared.h" 244 19 +.src_ref 3 "elementwise_binary_shared.h" 245 12 +.src_ref 3 "elementwise_binary_shared.h" 247 12 +.src_ref 3 "elementwise_binary_shared.h" 250 4 +.function_start + 10352 "10111010" // MOVA dj0, #12; MOVS p3, p2; MOV dc0, lr /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10353 "01110010" // /* MW 9 */ + 10354 "11110000" // /* MW 8 */ + 10355 "01100000" // /* MW 7 */ + 10356 "00000000" // /* MW 6 */ + 10357 "10001011" // /* MW 5 */ + 10358 "10001000" // /* MW 4 */ + 10359 "10000011" // /* MW 3 */ + 10360 "10000010" // /* MW 2 */ + 10361 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 538 13 first +.src_ref 3 "elementwise_binary_shared.h" 244 19 first +.src_ref 3 "elementwise_binary_shared.h" 245 12 +.src_ref 3 "elementwise_binary_shared.h" 247 12 + 10362 "11010100" // LDA.u8 r0, [p2, dj0]; MOV p2, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10363 "10000001" // /* MW 5 */ + 10364 "11000101" // /* MW 4 */ + 10365 "01010100" // /* MW 3 */ + 10366 "00000001" // /* MW 2 */ + 10367 "01000000" // /* MW 1 */ + 10368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10369 "00000000" // /* MW 1 */ + 10370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10371 "00000000" // /* MW 1 */ + 10372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10373 "00000000" // /* MW 1 */ + 10374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10375 "00000000" // /* MW 1 */ + 10376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10377 "00000000" // /* MW 1 */ + 10378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10379 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 244 12 +.src_ref 3 "elementwise_binary_shared.h" 244 35 + 10380 "10000100" // JZ r0, #10448 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10448 delay_slots=5 */ + 10381 "00000001" // /* MW 5 */ + 10382 "00000000" // /* MW 4 */ + 10383 "01101000" // /* MW 3 */ + 10384 "00010100" // /* MW 2 */ + 10385 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 237 +.delay_slot + 10386 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10387 "00000001" // /* MW 5 */ + 10388 "00000000" // /* MW 4 */ + 10389 "00000000" // /* MW 3 */ + 10390 "00001000" // /* MW 2 */ + 10391 "00000000" // /* MW 1 */ +.delay_slot + 10392 "11111000" // MOV r1, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10393 "11100000" // /* MW 3 */ + 10394 "01010101" // /* MW 2 */ + 10395 "00011000" // /* MW 1 */ +.delay_slot + 10396 "00011000" // ADD.NC p1, r1, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10397 "11100000" // /* MW 3 */ + 10398 "01100000" // /* MW 2 */ + 10399 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 538 13 +.src_ref 4 "vector_native_types.hpp" 374 137 first +.delay_slot + 10400 "00011000" // VST sfh, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10401 "00101011" // /* MW 3 */ + 10402 "00000111" // /* MW 2 */ + 10403 "00001001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10405 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 247 12 first +.no_stack_arguments + 10406 "00000100" // JL #9808 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9808 delay_slots=5 */ + 10407 "00000001" // /* MW 5 */ + 10408 "00000000" // /* MW 4 */ + 10409 "00101000" // /* MW 3 */ + 10410 "00010011" // /* MW 2 */ + 10411 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10413 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10415 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10417 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10419 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10420 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10421 "10000001" // /* MW 11 */ + 10422 "10101101" // /* MW 10 */ + 10423 "00000000" // /* MW 9 */ + 10424 "00000000" // /* MW 8 */ + 10425 "00000000" // /* MW 7 */ + 10426 "00000000" // /* MW 6 */ + 10427 "00100000" // /* MW 5 */ + 10428 "00000000" // /* MW 4 */ + 10429 "11110000" // /* MW 3 */ + 10430 "00101100" // /* MW 2 */ + 10431 "00000000" // /* MW 1 */ +.return_address + 10432 "10000100" // J #10480 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10480 delay_slots=5 */ + 10433 "00000000" // /* MW 5 */ + 10434 "00000000" // /* MW 4 */ + 10435 "01111000" // /* MW 3 */ + 10436 "00010100" // /* MW 2 */ + 10437 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10439 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10441 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10443 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10445 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10447 "00000000" // /* MW 1 */ +.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_96 +.src_ref 3 "elementwise_binary_shared.h" 245 12 first +.no_stack_arguments + 10448 "00000100" // JL #9808 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9808 delay_slots=5 */ + 10449 "00000001" // /* MW 5 */ + 10450 "00000000" // /* MW 4 */ + 10451 "00101000" // /* MW 3 */ + 10452 "00010011" // /* MW 2 */ + 10453 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 245 12 +.src_ref 3 "elementwise_binary_shared.h" 245 12 +.delay_slot + 10454 "00000010" // MOVS p0, p1; MOV p1, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10455 "01110000" // /* MW 7 */ + 10456 "01100000" // /* MW 6 */ + 10457 "10110000" // /* MW 5 */ + 10458 "00000000" // /* MW 4 */ + 10459 "01100000" // /* MW 3 */ + 10460 "10010001" // /* MW 2 */ + 10461 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10463 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10465 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10467 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10468 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10469 "10000001" // /* MW 11 */ + 10470 "10101101" // /* MW 10 */ + 10471 "00000000" // /* MW 9 */ + 10472 "00000000" // /* MW 8 */ + 10473 "00000000" // /* MW 7 */ + 10474 "00000000" // /* MW 6 */ + 10475 "00100000" // /* MW 5 */ + 10476 "00000000" // /* MW 4 */ + 10477 "11110000" // /* MW 3 */ + 10478 "00101100" // /* MW 2 */ + 10479 "00000000" // /* MW 1 */ +.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_128 +.src_ref 3 "elementwise_binary_shared.h" 250 4 +.return_address + 10480 "11111000" // MOV lr, dc0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10481 "10000000" // /* MW 3 */ + 10482 "01110001" // /* MW 2 */ + 10483 "00011111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 250 4 first + 10484 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10485 "00000000" // /* MW 3 */ + 10486 "00101000" // /* MW 2 */ + 10487 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 250 4 +.delay_slot + 10488 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10489 "00000001" // /* MW 5 */ + 10490 "00000000" // /* MW 4 */ + 10491 "00000000" // /* MW 3 */ + 10492 "11111000" // /* MW 2 */ + 10493 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10495 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10497 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10499 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_end0 + 10501 "00000000" // /* MW 1 */ +.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_mul1d_attribute_broadcasting _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 7 "superkernels.cpp" 205 first +.src_ref 7 "superkernels.cpp" 210 6 +.function_start + 10512 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10513 "10000000" // /* MW 5 */ + 10514 "11001000" // /* MW 4 */ + 10515 "11000110" // /* MW 3 */ + 10516 "00000111" // /* MW 2 */ + 10517 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 210 6 first + 10518 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10519 "11000001" // /* MW 5 */ + 10520 "10110101" // /* MW 4 */ + 10521 "11011000" // /* MW 3 */ + 10522 "11000010" // /* MW 2 */ + 10523 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 205 + 10524 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10525 "00000001" // /* MW 5 */ + 10526 "00000000" // /* MW 4 */ + 10527 "00000000" // /* MW 3 */ + 10528 "00001000" // /* MW 2 */ + 10529 "00000000" // /* MW 1 */ + 10530 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10531 "01110000" // /* MW 7 */ + 10532 "11010000" // /* MW 6 */ + 10533 "00001011" // /* MW 5 */ + 10534 "00000000" // /* MW 4 */ + 10535 "10110000" // /* MW 3 */ + 10536 "01100011" // /* MW 2 */ + 10537 "11111111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 207 11 + 10538 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10539 "00010001" // /* MW 9 */ + 10540 "00101000" // /* MW 8 */ + 10541 "00110010" // /* MW 7 */ + 10542 "11110011" // /* MW 6 */ + 10543 "00000001" // /* MW 5 */ + 10544 "00000000" // /* MW 4 */ + 10545 "10110000" // /* MW 3 */ + 10546 "10000010" // /* MW 2 */ + 10547 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 10548 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10549 "11000000" // /* MW 3 */ + 10550 "11010100" // /* MW 2 */ + 10551 "00011011" // /* MW 1 */ + 10552 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10553 "00000000" // /* MW 1 */ + 10554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10555 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 210 6 +.src_ref 7 "superkernels.cpp" 210 16 + 10556 "10000100" // JNZ r16, #10720 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10720 delay_slots=5 */ + 10557 "00000001" // /* MW 5 */ + 10558 "01000000" // /* MW 4 */ + 10559 "11110000" // /* MW 3 */ + 10560 "00010100" // /* MW 2 */ + 10561 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 207 22 first +.delay_slot + 10562 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10563 "10010000" // /* MW 3 */ + 10564 "01100010" // /* MW 2 */ + 10565 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 207 30 +.delay_slot + 10566 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10567 "11111011" // /* MW 3 */ + 10568 "01100011" // /* MW 2 */ + 10569 "00010100" // /* MW 1 */ +.delay_slot + 10570 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10571 "00111101" // /* MW 3 */ + 10572 "11110100" // /* MW 2 */ + 10573 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 207 11 +.src_ref 1 "io_buffer_main.h" 125 25 +.delay_slot + 10574 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10575 "01110000" // /* MW 7 */ + 10576 "01100000" // /* MW 6 */ + 10577 "00110000" // /* MW 5 */ + 10578 "00000011" // /* MW 4 */ + 10579 "00110000" // /* MW 3 */ + 10580 "11000110" // /* MW 2 */ + 10581 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 213 4 +.src_ref 7 "superkernels.cpp" 224 2 +.delay_slot + 10582 "01000100" // MOVXM p0, #509184 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10583 "00000000" // /* MW 5 */ + 10584 "11001010" // /* MW 4 */ + 10585 "11000000" // /* MW 3 */ + 10586 "00000111" // /* MW 2 */ + 10587 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 10588 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10589 "11010000" // /* MW 5 */ + 10590 "11001000" // /* MW 4 */ + 10591 "11000100" // /* MW 3 */ + 10592 "00000111" // /* MW 2 */ + 10593 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10594 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10595 "00010000" // /* MW 9 */ + 10596 "00110010" // /* MW 8 */ + 10597 "00110010" // /* MW 7 */ + 10598 "11110001" // /* MW 6 */ + 10599 "00000001" // /* MW 5 */ + 10600 "00000000" // /* MW 4 */ + 10601 "11100000" // /* MW 3 */ + 10602 "11000000" // /* MW 2 */ + 10603 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10605 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 213 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 10606 "00000100" // JL #9728 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9728 delay_slots=5 */ + 10607 "00000001" // /* MW 5 */ + 10608 "00000000" // /* MW 4 */ + 10609 "00000000" // /* MW 3 */ + 10610 "00010011" // /* MW 2 */ + 10611 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10613 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10615 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10616 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10617 "00110001" // /* MW 3 */ + 10618 "00100000" // /* MW 2 */ + 10619 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 10620 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10621 "00000101" // /* MW 3 */ + 10622 "00100000" // /* MW 2 */ + 10623 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 10624 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10625 "00000000" // /* MW 15 */ + 10626 "00000000" // /* MW 14 */ + 10627 "01111000" // /* MW 13 */ + 10628 "10100101" // /* MW 12 */ + 10629 "00000001" // /* MW 11 */ + 10630 "00000000" // /* MW 10 */ + 10631 "00000000" // /* MW 9 */ + 10632 "10000000" // /* MW 8 */ + 10633 "00010001" // /* MW 7 */ + 10634 "00000110" // /* MW 6 */ + 10635 "00100010" // /* MW 5 */ + 10636 "00000000" // /* MW 4 */ + 10637 "11110000" // /* MW 3 */ + 10638 "00101100" // /* MW 2 */ + 10639 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 217 18 +.return_address + 10640 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10641 "10100000" // /* MW 5 */ + 10642 "11001000" // /* MW 4 */ + 10643 "11000100" // /* MW 3 */ + 10644 "00000111" // /* MW 2 */ + 10645 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 217 18 first +.src_ref 7 "superkernels.cpp" 217 65 + 10646 "10111010" // LDA r16, [p2]; MOVXM p2, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10647 "00010000" // /* MW 9 */ + 10648 "10000000" // /* MW 8 */ + 10649 "00110010" // /* MW 7 */ + 10650 "11110001" // /* MW 6 */ + 10651 "00000001" // /* MW 5 */ + 10652 "00000000" // /* MW 4 */ + 10653 "11010000" // /* MW 3 */ + 10654 "11000010" // /* MW 2 */ + 10655 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 215 51 +.src_ref 7 "superkernels.cpp" 217 65 +.src_ref 7 "superkernels.cpp" 224 2 + 10656 "10111010" // LDA r17, [p2]; MOVXM p2, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10657 "00010000" // /* MW 9 */ + 10658 "10000000" // /* MW 8 */ + 10659 "00110010" // /* MW 7 */ + 10660 "11110001" // /* MW 6 */ + 10661 "00000001" // /* MW 5 */ + 10662 "00000000" // /* MW 4 */ + 10663 "11010000" // /* MW 3 */ + 10664 "11000110" // /* MW 2 */ + 10665 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 215 51 first +.src_ref 7 "superkernels.cpp" 217 16 +.src_ref 7 "superkernels.cpp" 222 47 + 10666 "10111010" // LDA.u16 r18, [p2, #10]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10667 "00010000" // /* MW 9 */ + 10668 "00101010" // /* MW 8 */ + 10669 "10110010" // /* MW 7 */ + 10670 "11110000" // /* MW 6 */ + 10671 "00000001" // /* MW 5 */ + 10672 "00000000" // /* MW 4 */ + 10673 "01010000" // /* MW 3 */ + 10674 "11001011" // /* MW 2 */ + 10675 "01001010" // /* MW 1 */ + 10676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10677 "00000000" // /* MW 1 */ + 10678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10679 "00000000" // /* MW 1 */ + 10680 "10000100" // J #10736 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10736 delay_slots=5 */ + 10681 "00000000" // /* MW 5 */ + 10682 "00000000" // /* MW 4 */ + 10683 "11111000" // /* MW 3 */ + 10684 "00010100" // /* MW 2 */ + 10685 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 215 13 +.delay_slot + 10686 "01000100" // MOVXM p0, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10687 "11000000" // /* MW 5 */ + 10688 "11001000" // /* MW 4 */ + 10689 "11000000" // /* MW 3 */ + 10690 "00000111" // /* MW 2 */ + 10691 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10693 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 217 27 first +.delay_slot + 10694 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10695 "00001111" // /* MW 3 */ + 10696 "01100001" // /* MW 2 */ + 10697 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 215 13 first +.delay_slot + 10698 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10699 "10100011" // /* MW 5 */ + 10700 "00001100" // /* MW 4 */ + 10701 "11110000" // /* MW 3 */ + 10702 "00101100" // /* MW 2 */ + 10703 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 217 16 first +.delay_slot + 10704 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10705 "00000000" // /* MW 15 */ + 10706 "00000000" // /* MW 14 */ + 10707 "01111000" // /* MW 13 */ + 10708 "10100101" // /* MW 12 */ + 10709 "00000001" // /* MW 11 */ + 10710 "00000000" // /* MW 10 */ + 10711 "00000000" // /* MW 9 */ + 10712 "10000000" // /* MW 8 */ + 10713 "00010001" // /* MW 7 */ + 10714 "00000110" // /* MW 6 */ + 10715 "00100001" // /* MW 5 */ + 10716 "00000000" // /* MW 4 */ + 10717 "11110000" // /* MW 3 */ + 10718 "00101100" // /* MW 2 */ + 10719 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 7 "superkernels.cpp" 222 47 +.src_ref 7 "superkernels.cpp" 224 2 + 10720 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10721 "00000000" // /* MW 15 */ + 10722 "00000000" // /* MW 14 */ + 10723 "00010000" // /* MW 13 */ + 10724 "00101010" // /* MW 12 */ + 10725 "10110010" // /* MW 11 */ + 10726 "11110000" // /* MW 10 */ + 10727 "00000001" // /* MW 9 */ + 10728 "00000000" // /* MW 8 */ + 10729 "10001011" // /* MW 7 */ + 10730 "10000000" // /* MW 6 */ + 10731 "00100010" // /* MW 5 */ + 10732 "00000000" // /* MW 4 */ + 10733 "11110000" // /* MW 3 */ + 10734 "00101100" // /* MW 2 */ + 10735 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 218 49 first + 10736 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10737 "00000000" // /* MW 7 */ + 10738 "11000011" // /* MW 6 */ + 10739 "10110011" // /* MW 5 */ + 10740 "00000011" // /* MW 4 */ + 10741 "01100000" // /* MW 3 */ + 10742 "10010001" // /* MW 2 */ + 10743 "01110011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 221 2 +.src_ref 1 "io_buffer_main.h" 218 49 + 10744 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10745 "00010000" // /* MW 9 */ + 10746 "00100000" // /* MW 8 */ + 10747 "00110010" // /* MW 7 */ + 10748 "11110000" // /* MW 6 */ + 10749 "00000001" // /* MW 5 */ + 10750 "00000000" // /* MW 4 */ + 10751 "11010000" // /* MW 3 */ + 10752 "11101110" // /* MW 2 */ + 10753 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 10754 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10755 "00010110" // /* MW 3 */ + 10756 "11111110" // /* MW 2 */ + 10757 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 10758 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10759 "00110110" // /* MW 3 */ + 10760 "11111110" // /* MW 2 */ + 10761 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 10762 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10763 "01010110" // /* MW 3 */ + 10764 "01000110" // /* MW 2 */ + 10765 "00000111" // /* MW 1 */ + 10766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10767 "00000000" // /* MW 1 */ + 10768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10769 "00000000" // /* MW 1 */ + 10770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10771 "00000000" // /* MW 1 */ + 10772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10773 "00000000" // /* MW 1 */ + 10774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10775 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 10776 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10777 "00000010" // /* MW 3 */ + 10778 "01100001" // /* MW 2 */ + 10779 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 10780 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10781 "00010001" // /* MW 3 */ + 10782 "00000110" // /* MW 2 */ + 10783 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 + 10784 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10785 "11111101" // /* MW 3 */ + 10786 "11100000" // /* MW 2 */ + 10787 "00010111" // /* MW 1 */ + 10788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10789 "00000000" // /* MW 1 */ + 10790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10791 "00000000" // /* MW 1 */ + 10792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10793 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 10794 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10795 "00001000" // /* MW 3 */ + 10796 "10010011" // /* MW 2 */ + 10797 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 222 45 + 10798 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10799 "10000001" // /* MW 5 */ + 10800 "10101101" // /* MW 4 */ + 10801 "10100111" // /* MW 3 */ + 10802 "00000000" // /* MW 2 */ + 10803 "00000100" // /* MW 1 */ + 10804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10805 "00000000" // /* MW 1 */ + 10806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10807 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 221 2 first + 10808 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10809 "00110110" // /* MW 3 */ + 10810 "00000110" // /* MW 2 */ + 10811 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 1 "io_buffer_main.h" 324 51 + 10812 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10813 "10000001" // /* MW 5 */ + 10814 "11011101" // /* MW 4 */ + 10815 "11011100" // /* MW 3 */ + 10816 "11001010" // /* MW 2 */ + 10817 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 222 47 first + 10818 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10819 "01110110" // /* MW 3 */ + 10820 "00000110" // /* MW 2 */ + 10821 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 10822 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10823 "10011110" // /* MW 3 */ + 10824 "01011100" // /* MW 2 */ + 10825 "00000111" // /* MW 1 */ + 10826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10827 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 224 2 first +.no_stack_arguments + 10828 "00000100" // JL #10352 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10352 delay_slots=5 */ + 10829 "00000001" // /* MW 5 */ + 10830 "00000000" // /* MW 4 */ + 10831 "00111000" // /* MW 3 */ + 10832 "00010100" // /* MW 2 */ + 10833 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10835 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 221 2 first +.delay_slot + 10836 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10837 "00000111" // /* MW 3 */ + 10838 "01100010" // /* MW 2 */ + 10839 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 221 2 +.delay_slot + 10840 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10841 "00110001" // /* MW 3 */ + 10842 "00000110" // /* MW 2 */ + 10843 "00001000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 222 45 first +.delay_slot + 10844 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10845 "00001101" // /* MW 3 */ + 10846 "11100001" // /* MW 2 */ + 10847 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 222 45 +.delay_slot + 10848 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10849 "00000000" // /* MW 15 */ + 10850 "00000000" // /* MW 14 */ + 10851 "10101000" // /* MW 13 */ + 10852 "10100000" // /* MW 12 */ + 10853 "00110100" // /* MW 11 */ + 10854 "00000000" // /* MW 10 */ + 10855 "00000000" // /* MW 9 */ + 10856 "00000000" // /* MW 8 */ + 10857 "01011011" // /* MW 7 */ + 10858 "00000001" // /* MW 6 */ + 10859 "00100000" // /* MW 5 */ + 10860 "00000000" // /* MW 4 */ + 10861 "11110000" // /* MW 3 */ + 10862 "00101100" // /* MW 2 */ + 10863 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 6 +.src_ref 7 "superkernels.cpp" 227 14 +.src_ref 1 "io_buffer_main.h" 324 51 first +.return_address + 10864 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10865 "00010000" // /* MW 9 */ + 10866 "00100000" // /* MW 8 */ + 10867 "00110010" // /* MW 7 */ + 10868 "11110011" // /* MW 6 */ + 10869 "00000001" // /* MW 5 */ + 10870 "00000000" // /* MW 4 */ + 10871 "11010000" // /* MW 3 */ + 10872 "11000110" // /* MW 2 */ + 10873 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 + 10874 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10875 "00000101" // /* MW 3 */ + 10876 "00100000" // /* MW 2 */ + 10877 "00010000" // /* MW 1 */ + 10878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10879 "00000000" // /* MW 1 */ + 10880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10881 "00000000" // /* MW 1 */ + 10882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10883 "00000000" // /* MW 1 */ + 10884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10885 "00000000" // /* MW 1 */ + 10886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10887 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 10888 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10889 "00001000" // /* MW 3 */ + 10890 "01010001" // /* MW 2 */ + 10891 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 19 +.src_ref 1 "io_buffer_main.h" 327 40 first + 10892 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10893 "00010000" // /* MW 9 */ + 10894 "00110000" // /* MW 8 */ + 10895 "00110010" // /* MW 7 */ + 10896 "11110001" // /* MW 6 */ + 10897 "00000001" // /* MW 5 */ + 10898 "00000000" // /* MW 4 */ + 10899 "11010000" // /* MW 3 */ + 10900 "11001110" // /* MW 2 */ + 10901 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 6 first + 10902 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10903 "00110110" // /* MW 3 */ + 10904 "00000110" // /* MW 2 */ + 10905 "00000110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 19 + 10906 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10907 "01010110" // /* MW 3 */ + 10908 "00000110" // /* MW 2 */ + 10909 "00000010" // /* MW 1 */ + 10910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10911 "00000000" // /* MW 1 */ + 10912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10913 "00000000" // /* MW 1 */ + 10914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10915 "00000000" // /* MW 1 */ + 10916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10917 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 10918 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10919 "00110001" // /* MW 3 */ + 10920 "00100001" // /* MW 2 */ + 10921 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 10922 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10923 "00010001" // /* MW 3 */ + 10924 "11100110" // /* MW 2 */ + 10925 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 16 first + 10926 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10927 "00101000" // /* MW 3 */ + 10928 "01100001" // /* MW 2 */ + 10929 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 6 + 10930 "10000100" // JNZ r16, #10960 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10960 delay_slots=5 */ + 10931 "00000001" // /* MW 5 */ + 10932 "01000000" // /* MW 4 */ + 10933 "01101000" // /* MW 3 */ + 10934 "00010101" // /* MW 2 */ + 10935 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10937 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10939 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10941 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10943 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10945 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 227 14 + 10946 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10947 "00000001" // /* MW 3 */ + 10948 "00100000" // /* MW 2 */ + 10949 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 227 14 first + 10950 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10951 "00000000" // /* MW 9 */ + 10952 "00000000" // /* MW 8 */ + 10953 "00000000" // /* MW 7 */ + 10954 "10000000" // /* MW 6 */ + 10955 "00010001" // /* MW 5 */ + 10956 "00000110" // /* MW 4 */ + 10957 "11110110" // /* MW 3 */ + 10958 "00101100" // /* MW 2 */ + 10959 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 7 "superkernels.cpp" 229 + 10960 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10961 "00111001" // /* MW 3 */ + 10962 "11110100" // /* MW 2 */ + 10963 "00000111" // /* MW 1 */ + 10964 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10965 "00011001" // /* MW 3 */ + 10966 "11111011" // /* MW 2 */ + 10967 "00000111" // /* MW 1 */ + 10968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10969 "00000000" // /* MW 1 */ + 10970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10971 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 10972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10973 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10974 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10975 "11110001" // /* MW 3 */ + 10976 "11111101" // /* MW 2 */ + 10977 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10979 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 229 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10980 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10981 "00000000" // /* MW 3 */ + 10982 "00101000" // /* MW 2 */ + 10983 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10984 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10985 "10100000" // /* MW 3 */ + 10986 "01100111" // /* MW 2 */ + 10987 "00011111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 229 +.delay_slot + 10988 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10989 "00000001" // /* MW 5 */ + 10990 "00000000" // /* MW 4 */ + 10991 "00000000" // /* MW 3 */ + 10992 "11111000" // /* MW 2 */ + 10993 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10995 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10997 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 10999 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv +.function shared_setup_backbone _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv +.src_ref 3 "elementwise_binary_shared.h" 205 first +.src_ref 3 "elementwise_binary_shared.h" 211 24 first +.src_ref 3 "elementwise_binary_shared.h" 216 36 +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.function_start + 11008 "10111010" // LDA el0, [p1], #4; MOVX r2, #256; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11009 "01011000" // /* MW 9 */ + 11010 "00000000" // /* MW 8 */ + 11011 "00001000" // /* MW 7 */ + 11012 "00001011" // /* MW 6 */ + 11013 "00100000" // /* MW 5 */ + 11014 "00001000" // /* MW 4 */ + 11015 "11010000" // /* MW 3 */ + 11016 "10000101" // /* MW 2 */ + 11017 "00100011" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 36 + 11018 "00011000" // MOVX r0, #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11019 "00000001" // /* MW 3 */ + 11020 "10000000" // /* MW 2 */ + 11021 "00010111" // /* MW 1 */ + 11022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11023 "00000000" // /* MW 1 */ + 11024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11025 "00000000" // /* MW 1 */ + 11026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11027 "00000000" // /* MW 1 */ + 11028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11029 "00000000" // /* MW 1 */ + 11030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11031 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 211 22 first + 11032 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11033 "00101001" // /* MW 3 */ + 11034 "00011100" // /* MW 2 */ + 11035 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 212 24 first + 11036 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11037 "00101110" // /* MW 3 */ + 11038 "00011100" // /* MW 2 */ + 11039 "00000001" // /* MW 1 */ + 11040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11041 "00000000" // /* MW 1 */ + 11042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11043 "00000000" // /* MW 1 */ + 11044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11045 "00000000" // /* MW 1 */ + 11046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11047 "00000000" // /* MW 1 */ + 11048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11049 "00000000" // /* MW 1 */ + 11050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11051 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 212 22 + 11052 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11053 "00101001" // /* MW 3 */ + 11054 "00011100" // /* MW 2 */ + 11055 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 213 24 first + 11056 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11057 "00101110" // /* MW 3 */ + 11058 "00000100" // /* MW 2 */ + 11059 "00000001" // /* MW 1 */ + 11060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11061 "00000000" // /* MW 1 */ + 11062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11063 "00000000" // /* MW 1 */ + 11064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11065 "00000000" // /* MW 1 */ + 11066 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11067 "00000000" // /* MW 1 */ + 11068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11069 "00000000" // /* MW 1 */ + 11070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11071 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 213 22 + 11072 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11073 "00101001" // /* MW 3 */ + 11074 "00011100" // /* MW 2 */ + 11075 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 214 24 first + 11076 "10011000" // LDA r3, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11077 "01110110" // /* MW 3 */ + 11078 "00010100" // /* MW 2 */ + 11079 "00000001" // /* MW 1 */ + 11080 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11081 "00000000" // /* MW 1 */ + 11082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11083 "00000000" // /* MW 1 */ + 11084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11085 "00000000" // /* MW 1 */ + 11086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11087 "00000000" // /* MW 1 */ + 11088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11089 "00000000" // /* MW 1 */ + 11090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11091 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 214 22 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11092 "10011000" // ST r3, [p0], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11093 "01110001" // /* MW 3 */ + 11094 "01001100" // /* MW 2 */ + 11095 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 34 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11096 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11097 "00010111" // /* MW 3 */ + 11098 "00000100" // /* MW 2 */ + 11099 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 217 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11100 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11101 "00000000" // /* MW 3 */ + 11102 "00101000" // /* MW 2 */ + 11103 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11104 "01000100" // MOVXM r1, #65280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11105 "00000000" // /* MW 5 */ + 11106 "10111110" // /* MW 4 */ + 11107 "11110000" // /* MW 3 */ + 11108 "00000000" // /* MW 2 */ + 11109 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11110 "10011000" // AND r1, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11111 "00010100" // /* MW 3 */ + 11112 "11000010" // /* MW 2 */ + 11113 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11114 "10011000" // EQ r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11115 "00100111" // /* MW 3 */ + 11116 "01110110" // /* MW 2 */ + 11117 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 36 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11118 "00011000" // SEL.EQZ r0, r0, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11119 "10000010" // /* MW 3 */ + 11120 "00000001" // /* MW 2 */ + 11121 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_end0 + 11123 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 3 "elementwise_binary_shared.h" 219 +.src_ref 3 "elementwise_binary_shared.h" 219 first +.function_start + 11136 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11137 "00000001" // /* MW 5 */ + 11138 "00000000" // /* MW 4 */ + 11139 "00000000" // /* MW 3 */ + 11140 "00001000" // /* MW 2 */ + 11141 "00000000" // /* MW 1 */ + 11142 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11143 "00111101" // /* MW 3 */ + 11144 "11111000" // /* MW 2 */ + 11145 "00001111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 220 8 first +.no_stack_arguments + 11146 "00000100" // JL #11008 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11008 delay_slots=5 */ + 11147 "00000001" // /* MW 5 */ + 11148 "00000000" // /* MW 4 */ + 11149 "10000000" // /* MW 3 */ + 11150 "00010101" // /* MW 2 */ + 11151 "00000000" // /* MW 1 */ +.delay_slot + 11152 "11111000" // MOV r0, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11153 "10100000" // /* MW 3 */ + 11154 "00010111" // /* MW 2 */ + 11155 "00011000" // /* MW 1 */ +.delay_slot + 11156 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11157 "00010101" // /* MW 3 */ + 11158 "11111100" // /* MW 2 */ + 11159 "00001111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 220 8 +.delay_slot + 11160 "11111000" // MOV r15, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11161 "11000000" // /* MW 3 */ + 11162 "11010000" // /* MW 2 */ + 11163 "00011011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11165 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11167 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 220 8 +.src_ref 3 "elementwise_binary_shared.h" 222 4 +.src_ref 8 "add_impl.h" 146 29 +.return_address + 11168 "10111010" // LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11169 "00001000" // /* MW 9 */ + 11170 "11000100" // /* MW 8 */ + 11171 "00110011" // /* MW 7 */ + 11172 "01101000" // /* MW 6 */ + 11173 "00000000" // /* MW 5 */ + 11174 "00000001" // /* MW 4 */ + 11175 "00100000" // /* MW 3 */ + 11176 "00000111" // /* MW 2 */ + 11177 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 146 29 +.src_ref 8 "add_impl.h" 147 37 +.src_ref 8 "add_impl.h" 147 39 + 11178 "10111010" // MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11179 "01011000" // /* MW 9 */ + 11180 "11111101" // /* MW 8 */ + 11181 "00000111" // /* MW 7 */ + 11182 "00001000" // /* MW 6 */ + 11183 "10000000" // /* MW 5 */ + 11184 "00000001" // /* MW 4 */ + 11185 "10000000" // /* MW 3 */ + 11186 "11100010" // /* MW 2 */ + 11187 "00000001" // /* MW 1 */ +.src_ref 8 "add_impl.h" 146 29 first +.src_ref 8 "add_impl.h" 147 39 + 11188 "01111010" // LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11189 "00000001" // /* MW 9 */ + 11190 "10100000" // /* MW 8 */ + 11191 "00000111" // /* MW 7 */ + 11192 "10000000" // /* MW 6 */ + 11193 "00010001" // /* MW 5 */ + 11194 "00001010" // /* MW 4 */ + 11195 "00100000" // /* MW 3 */ + 11196 "10111110" // /* MW 2 */ + 11197 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 50 first + 11198 "10011000" // LDA.u8 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11199 "01001010" // /* MW 3 */ + 11200 "00000110" // /* MW 2 */ + 11201 "00000000" // /* MW 1 */ + 11202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11203 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11205 "00000000" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 37 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11206 "00011000" // ST.s16 r16, [p0, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11207 "00010111" // /* MW 3 */ + 11208 "00000010" // /* MW 2 */ + 11209 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 222 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11210 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11211 "00000000" // /* MW 3 */ + 11212 "00101000" // /* MW 2 */ + 11213 "00010000" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 54 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11214 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11215 "00000101" // /* MW 3 */ + 11216 "00100010" // /* MW 2 */ + 11217 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 222 4 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11218 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11219 "00000001" // /* MW 5 */ + 11220 "00000000" // /* MW 4 */ + 11221 "00000000" // /* MW 3 */ + 11222 "11111000" // /* MW 2 */ + 11223 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 54 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11224 "10011000" // EQ r27, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11225 "00100111" // /* MW 3 */ + 11226 "01110111" // /* MW 2 */ + 11227 "00010100" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 39 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11228 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11229 "10000010" // /* MW 3 */ + 11230 "00100001" // /* MW 2 */ + 11231 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 11233 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.src_ref 3 "elementwise_binary_shared.h" 227 first +.src_ref 3 "elementwise_binary_shared.h" 232 8 first +.tail_call +.function_start + 11248 "10000100" // J #9808 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=9808 delay_slots=5 */ + 11249 "00000000" // /* MW 5 */ + 11250 "00000000" // /* MW 4 */ + 11251 "00101000" // /* MW 3 */ + 11252 "00010011" // /* MW 2 */ + 11253 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11255 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11256 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11257 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11259 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11261 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 11263 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.src_ref 3 "elementwise_binary.h" 141 first +.src_ref 3 "elementwise_binary.h" 142 23 +.src_ref 3 "elementwise_binary.h" 144 4 first +.function_start + 11264 "01100100" // RET lr; MOV r0, #64 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 11265 "00000001" // /* MW 5 */ + 11266 "00100001" // /* MW 4 */ + 11267 "00000000" // /* MW 3 */ + 11268 "00000000" // /* MW 2 */ + 11269 "00000101" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 141 +.delay_slot + 11270 "11111000" // MOV r1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11271 "11000000" // /* MW 3 */ + 11272 "01010000" // /* MW 2 */ + 11273 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 141 +.delay_slot + 11274 "00011000" // ADD.NC p0, r1, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11275 "10010000" // /* MW 3 */ + 11276 "01100000" // /* MW 2 */ + 11277 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 142 23 first +.delay_slot + 11278 "10011000" // ST r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11279 "00010001" // /* MW 3 */ + 11280 "00000100" // /* MW 2 */ + 11281 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 142 23 +.delay_slot + 11282 "10011000" // ST r0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11283 "00010001" // /* MW 3 */ + 11284 "00010100" // /* MW 2 */ + 11285 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_end0 + 11287 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.src_ref 3 "elementwise_binary.h" 130 first +.src_ref 3 "elementwise_binary.h" 133 24 first +.function_start + 11296 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11297 "00101110" // /* MW 3 */ + 11298 "00011100" // /* MW 2 */ + 11299 "00000001" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 130 + 11300 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11301 "00000001" // /* MW 5 */ + 11302 "00000000" // /* MW 4 */ + 11303 "00000000" // /* MW 3 */ + 11304 "00001000" // /* MW 2 */ + 11305 "00000000" // /* MW 1 */ + 11306 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11307 "00111101" // /* MW 3 */ + 11308 "11111100" // /* MW 2 */ + 11309 "00001111" // /* MW 1 */ + 11310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11311 "00000000" // /* MW 1 */ + 11312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11313 "00000000" // /* MW 1 */ + 11314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11315 "00000000" // /* MW 1 */ + 11316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11317 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 133 22 first + 11318 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11319 "00101001" // /* MW 3 */ + 11320 "00011100" // /* MW 2 */ + 11321 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 134 24 first + 11322 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11323 "00101110" // /* MW 3 */ + 11324 "00011100" // /* MW 2 */ + 11325 "00000001" // /* MW 1 */ + 11326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11327 "00000000" // /* MW 1 */ + 11328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11329 "00000000" // /* MW 1 */ + 11330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11331 "00000000" // /* MW 1 */ + 11332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11333 "00000000" // /* MW 1 */ + 11334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11335 "00000000" // /* MW 1 */ + 11336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11337 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 134 22 + 11338 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11339 "00101001" // /* MW 3 */ + 11340 "00011100" // /* MW 2 */ + 11341 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 135 24 first + 11342 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11343 "00101110" // /* MW 3 */ + 11344 "00000100" // /* MW 2 */ + 11345 "00000001" // /* MW 1 */ + 11346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11347 "00000000" // /* MW 1 */ + 11348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11349 "00000000" // /* MW 1 */ + 11350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11351 "00000000" // /* MW 1 */ + 11352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11353 "00000000" // /* MW 1 */ + 11354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11355 "00000000" // /* MW 1 */ + 11356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11357 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 135 22 + 11358 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11359 "00101001" // /* MW 3 */ + 11360 "00011100" // /* MW 2 */ + 11361 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 136 24 first + 11362 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11363 "00101110" // /* MW 3 */ + 11364 "00010100" // /* MW 2 */ + 11365 "00000001" // /* MW 1 */ + 11366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11367 "00000000" // /* MW 1 */ + 11368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11369 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 137 8 first +.no_stack_arguments + 11370 "00000100" // JL #11264 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11264 delay_slots=5 */ + 11371 "00000001" // /* MW 5 */ + 11372 "00000000" // /* MW 4 */ + 11373 "00000000" // /* MW 3 */ + 11374 "00010110" // /* MW 2 */ + 11375 "00000000" // /* MW 1 */ +.delay_slot + 11376 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11377 "10011101" // /* MW 3 */ + 11378 "11111011" // /* MW 2 */ + 11379 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11381 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11383 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 136 22 first +.delay_slot + 11384 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11385 "00101001" // /* MW 3 */ + 11386 "11011100" // /* MW 2 */ + 11387 "00001000" // /* MW 1 */ +.src_ref 8 "mul_impl.h" 134 25 +.delay_slot + 11388 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11389 "11000000" // /* MW 3 */ + 11390 "01100000" // /* MW 2 */ + 11391 "00011111" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 +.return_address + 11392 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11393 "00111001" // /* MW 3 */ + 11394 "11111100" // /* MW 2 */ + 11395 "00000111" // /* MW 1 */ + 11396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11397 "00000000" // /* MW 1 */ + 11398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11399 "00000000" // /* MW 1 */ + 11400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11401 "00000000" // /* MW 1 */ + 11402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11403 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11405 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11406 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11407 "10011001" // /* MW 3 */ + 11408 "11111011" // /* MW 2 */ + 11409 "00000111" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11410 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11411 "00000000" // /* MW 3 */ + 11412 "00101000" // /* MW 2 */ + 11413 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11415 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11417 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11419 "00000000" // /* MW 1 */ +.src_ref 8 "mul_impl.h" 134 25 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11420 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11421 "00000001" // /* MW 3 */ + 11422 "00100000" // /* MW 2 */ + 11423 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 +.src_ref 8 "mul_impl.h" 134 25 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11424 "00111010" // ST r16, [p7, #16]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11425 "01110001" // /* MW 9 */ + 11426 "00000000" // /* MW 8 */ + 11427 "00000000" // /* MW 7 */ + 11428 "00000000" // /* MW 6 */ + 11429 "11111110" // /* MW 5 */ + 11430 "00111111" // /* MW 4 */ + 11431 "00110000" // /* MW 3 */ + 11432 "11000010" // /* MW 2 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + 11433 "11101000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.function run _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.src_ref 3 "elementwise_binary.h" 149 first +.src_ref 3 "elementwise_binary.h" 156 37 +.src_ref 3 "elementwise_binary.h" 168 8 first +.function_start + 11440 "10111010" // MOVA m0, #32; MOVXM ls, #11616 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11441 "00010000" // /* MW 9 */ + 11442 "10110000" // /* MW 8 */ + 11443 "01111110" // /* MW 7 */ + 11444 "00001000" // /* MW 6 */ + 11445 "00000000" // /* MW 5 */ + 11446 "00000000" // /* MW 4 */ + 11447 "10000000" // /* MW 3 */ + 11448 "00000000" // /* MW 2 */ + 11449 "00000100" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 156 37 first +.src_ref 3 "elementwise_binary.h" 168 8 first + 11450 "10111010" // LDA r3, [p3], m0; MOVXM le, #11632 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11451 "00010000" // /* MW 9 */ + 11452 "10111000" // /* MW 8 */ + 11453 "10111110" // /* MW 7 */ + 11454 "00001001" // /* MW 6 */ + 11455 "00000000" // /* MW 5 */ + 11456 "00000000" // /* MW 4 */ + 11457 "11010000" // /* MW 3 */ + 11458 "00001110" // /* MW 2 */ + 11459 "01100001" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 156 78 +.src_ref 3 "elementwise_binary.h" 156 78 + 11460 "10111010" // LDA m1, [p3]; MOVX r1, #-6; MOV r0, #828 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11461 "01011000" // /* MW 9 */ + 11462 "00111100" // /* MW 8 */ + 11463 "00001011" // /* MW 7 */ + 11464 "01001000" // /* MW 6 */ + 11465 "00010111" // /* MW 5 */ + 11466 "00111110" // /* MW 4 */ + 11467 "11010000" // /* MW 3 */ + 11468 "10010000" // /* MW 2 */ + 11469 "01100000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 156 78 +.src_ref 3 "elementwise_binary.h" 156 78 + 11470 "10111010" // LDA m0, [p3, #4]; MOVXM p4, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11471 "00010000" // /* MW 9 */ + 11472 "00110100" // /* MW 8 */ + 11473 "00110010" // /* MW 7 */ + 11474 "11110010" // /* MW 6 */ + 11475 "00000001" // /* MW 5 */ + 11476 "00000000" // /* MW 4 */ + 11477 "11010000" // /* MW 3 */ + 11478 "10000000" // /* MW 2 */ + 11479 "01100010" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 156 78 + 11480 "10011000" // LDA.s8 r2, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11481 "01000010" // /* MW 3 */ + 11482 "00000100" // /* MW 2 */ + 11483 "00000100" // /* MW 1 */ + 11484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11485 "00000000" // /* MW 1 */ + 11486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11487 "00000000" // /* MW 1 */ + 11488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11489 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 156 78 + 11490 "10011000" // LSHL r1, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11491 "00011101" // /* MW 3 */ + 11492 "11000010" // /* MW 2 */ + 11493 "00010000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary.h" 168 8 +.src_ref 3 "elementwise_binary.h" 187 20 first + 11494 "00110100" // VLDB x1, [p0], m1; ADD.NC lc, r1, #-7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11495 "11111001" // /* MW 5 */ + 11496 "11100001" // /* MW 4 */ + 11497 "10001010" // /* MW 3 */ + 11498 "00001110" // /* MW 2 */ + 11499 "00000101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary.h" 189 20 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11500 "00111100" // VLDA x2, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11501 "01101000" // /* MW 5 */ + 11502 "01010000" // /* MW 4 */ + 11503 "01110000" // /* MW 3 */ + 11504 "00010011" // /* MW 2 */ + 11505 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 195 20 +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11506 "00010010" // VLDA x3, [p1], m0; VLDB x1, [p0], m1; MOVX crRnd, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11507 "10000000" // /* MW 7 */ + 11508 "10111010" // /* MW 6 */ + 11509 "11101000" // /* MW 5 */ + 11510 "01010000" // /* MW 4 */ + 11511 "01110000" // /* MW 3 */ + 11512 "00011011" // /* MW 2 */ + 11513 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary.h" 189 20 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11514 "00111100" // VLDA x2, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11515 "01101000" // /* MW 5 */ + 11516 "01010000" // /* MW 4 */ + 11517 "01110000" // /* MW 3 */ + 11518 "00010011" // /* MW 2 */ + 11519 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11520 "00111100" // VLDA x3, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11521 "11101000" // /* MW 5 */ + 11522 "01010000" // /* MW 4 */ + 11523 "01110000" // /* MW 3 */ + 11524 "00011011" // /* MW 2 */ + 11525 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11526 "10011000" // VLDA x2, [p1], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11527 "10011011" // /* MW 3 */ + 11528 "00001000" // /* MW 2 */ + 11529 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11530 "00111100" // VLDA x3, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11531 "01101000" // /* MW 5 */ + 11532 "01010000" // /* MW 4 */ + 11533 "01110000" // /* MW 3 */ + 11534 "00011011" // /* MW 2 */ + 11535 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11536 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11537 "11101000" // /* MW 5 */ + 11538 "01010000" // /* MW 4 */ + 11539 "01110000" // /* MW 3 */ + 11540 "00010011" // /* MW 2 */ + 11541 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11542 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11543 "01000001" // /* MW 9 */ + 11544 "11100010" // /* MW 8 */ + 11545 "00000000" // /* MW 7 */ + 11546 "00011101" // /* MW 6 */ + 11547 "00110100" // /* MW 5 */ + 11548 "00101000" // /* MW 4 */ + 11549 "01110000" // /* MW 3 */ + 11550 "00011011" // /* MW 2 */ + 11551 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11552 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11553 "01100001" // /* MW 9 */ + 11554 "11100000" // /* MW 8 */ + 11555 "00000001" // /* MW 7 */ + 11556 "00011101" // /* MW 6 */ + 11557 "01110100" // /* MW 5 */ + 11558 "00101000" // /* MW 4 */ + 11559 "01110000" // /* MW 3 */ + 11560 "00010011" // /* MW 2 */ + 11561 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11562 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11563 "01000001" // /* MW 9 */ + 11564 "11100010" // /* MW 8 */ + 11565 "00000000" // /* MW 7 */ + 11566 "00011101" // /* MW 6 */ + 11567 "00110100" // /* MW 5 */ + 11568 "00101000" // /* MW 4 */ + 11569 "01110000" // /* MW 3 */ + 11570 "00011011" // /* MW 2 */ + 11571 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11572 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11573 "01100001" // /* MW 9 */ + 11574 "11100000" // /* MW 8 */ + 11575 "00000001" // /* MW 7 */ + 11576 "00011101" // /* MW 6 */ + 11577 "01110100" // /* MW 5 */ + 11578 "00101000" // /* MW 4 */ + 11579 "01110000" // /* MW 3 */ + 11580 "00010011" // /* MW 2 */ + 11581 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11582 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11583 "01000001" // /* MW 9 */ + 11584 "11100010" // /* MW 8 */ + 11585 "00000000" // /* MW 7 */ + 11586 "00011101" // /* MW 6 */ + 11587 "00110100" // /* MW 5 */ + 11588 "00101000" // /* MW 4 */ + 11589 "01110000" // /* MW 3 */ + 11590 "00011011" // /* MW 2 */ + 11591 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11592 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11593 "01100001" // /* MW 9 */ + 11594 "11100000" // /* MW 8 */ + 11595 "00000001" // /* MW 7 */ + 11596 "00011101" // /* MW 6 */ + 11597 "01110100" // /* MW 5 */ + 11598 "00101000" // /* MW 4 */ + 11599 "01110000" // /* MW 3 */ + 11600 "00010011" // /* MW 2 */ + 11601 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11602 "01101110" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; VMUL.f dm0, x1, x2, r0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 11603 "01000001" // /* MW 13 */ + 11604 "11100010" // /* MW 12 */ + 11605 "00000000" // /* MW 11 */ + 11606 "10001100" // /* MW 10 */ + 11607 "01110000" // /* MW 9 */ + 11608 "00001000" // /* MW 8 */ + 11609 "00000000" // /* MW 7 */ + 11610 "00000000" // /* MW 6 */ + 11611 "01101000" // /* MW 5 */ + 11612 "01010000" // /* MW 4 */ + 11613 "01110000" // /* MW 3 */ + 11614 "00011011" // /* MW 2 */ + 11615 "00100001" // /* MW 1 */ +.label ZLS_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_176 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.src_ref 3 "elementwise_binary.h" 195 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 11616 "00001011" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; VMUL.f dm1, x0, x3, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11617 "00000011" // /* MW 15 */ + 11618 "00001111" // /* MW 14 */ + 11619 "01111000" // /* MW 13 */ + 11620 "10100101" // /* MW 12 */ + 11621 "00000001" // /* MW 11 */ + 11622 "00000000" // /* MW 10 */ + 11623 "00000000" // /* MW 9 */ + 11624 "00000000" // /* MW 8 */ + 11625 "10100011" // /* MW 7 */ + 11626 "00011100" // /* MW 6 */ + 11627 "11101010" // /* MW 5 */ + 11628 "01010000" // /* MW 4 */ + 11629 "01110000" // /* MW 3 */ + 11630 "00010011" // /* MW 2 */ + 11631 "00100001" // /* MW 1 */ +.label ZLE_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_192 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11632 "00001011" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11633 "00010010" // /* MW 15 */ + 11634 "00000111" // /* MW 14 */ + 11635 "01111000" // /* MW 13 */ + 11636 "10100101" // /* MW 12 */ + 11637 "00000001" // /* MW 11 */ + 11638 "00000000" // /* MW 10 */ + 11639 "00000000" // /* MW 9 */ + 11640 "00000000" // /* MW 8 */ + 11641 "00100011" // /* MW 7 */ + 11642 "00011100" // /* MW 6 */ + 11643 "01101010" // /* MW 5 */ + 11644 "01010000" // /* MW 4 */ + 11645 "01110000" // /* MW 3 */ + 11646 "00011011" // /* MW 2 */ + 11647 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 11648 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11649 "01100001" // /* MW 7 */ + 11650 "11100000" // /* MW 6 */ + 11651 "00000001" // /* MW 5 */ + 11652 "00000010" // /* MW 4 */ + 11653 "01100000" // /* MW 3 */ + 11654 "10010100" // /* MW 2 */ + 11655 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11656 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11657 "01000001" // /* MW 7 */ + 11658 "11100010" // /* MW 6 */ + 11659 "00000000" // /* MW 5 */ + 11660 "00000010" // /* MW 4 */ + 11661 "01100000" // /* MW 3 */ + 11662 "10000100" // /* MW 2 */ + 11663 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11664 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11665 "01100001" // /* MW 7 */ + 11666 "11100000" // /* MW 6 */ + 11667 "00000001" // /* MW 5 */ + 11668 "00000010" // /* MW 4 */ + 11669 "01100000" // /* MW 3 */ + 11670 "10010100" // /* MW 2 */ + 11671 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11672 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11673 "01000001" // /* MW 7 */ + 11674 "11100010" // /* MW 6 */ + 11675 "00000000" // /* MW 5 */ + 11676 "00000010" // /* MW 4 */ + 11677 "01100000" // /* MW 3 */ + 11678 "10000100" // /* MW 2 */ + 11679 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11680 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11681 "01100001" // /* MW 7 */ + 11682 "11100000" // /* MW 6 */ + 11683 "00000001" // /* MW 5 */ + 11684 "00000010" // /* MW 4 */ + 11685 "01100000" // /* MW 3 */ + 11686 "10010100" // /* MW 2 */ + 11687 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11688 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11689 "01000001" // /* MW 7 */ + 11690 "11100010" // /* MW 6 */ + 11691 "00000000" // /* MW 5 */ + 11692 "00000010" // /* MW 4 */ + 11693 "01100000" // /* MW 3 */ + 11694 "10000100" // /* MW 2 */ + 11695 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11696 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11697 "01100001" // /* MW 7 */ + 11698 "11100000" // /* MW 6 */ + 11699 "00000001" // /* MW 5 */ + 11700 "00000010" // /* MW 4 */ + 11701 "01100000" // /* MW 3 */ + 11702 "10010100" // /* MW 2 */ + 11703 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11704 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11705 "00100011" // /* MW 3 */ + 11706 "00011100" // /* MW 2 */ + 11707 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 172 4 first +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11708 "01011100" // VST.CONV.bf16.fp32 cml1, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 11709 "00000000" // /* MW 5 */ + 11710 "01010000" // /* MW 4 */ + 11711 "01100000" // /* MW 3 */ + 11712 "10010100" // /* MW 2 */ + 11713 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11714 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11715 "00100011" // /* MW 3 */ + 11716 "00011100" // /* MW 2 */ + 11717 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 195 20 first +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11718 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11719 "10100011" // /* MW 3 */ + 11720 "00011100" // /* MW 2 */ + 11721 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.delay_slot + 11722 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11723 "00100011" // /* MW 3 */ + 11724 "00011100" // /* MW 2 */ + 11725 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 195 20 first +.delay_slot + 11726 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11727 "10100011" // /* MW 3 */ + 11728 "00011100" // /* MW 2 */ + 11729 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_end0 + 11731 "00000000" // /* MW 1 */ +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.function superkernel_mul1d _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.src_ref 7 "superkernels.cpp" 369 first +.src_ref 7 "superkernels.cpp" 374 6 +.function_start + 11744 "01000100" // MOVXM p4, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11745 "10000000" // /* MW 5 */ + 11746 "11001000" // /* MW 4 */ + 11747 "11001000" // /* MW 3 */ + 11748 "00000111" // /* MW 2 */ + 11749 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 374 6 first + 11750 "11010100" // LDA r16, [p4]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11751 "11000001" // /* MW 5 */ + 11752 "10110101" // /* MW 4 */ + 11753 "11011000" // /* MW 3 */ + 11754 "11000010" // /* MW 2 */ + 11755 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 369 + 11756 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11757 "00000001" // /* MW 5 */ + 11758 "00000000" // /* MW 4 */ + 11759 "00000000" // /* MW 3 */ + 11760 "00001000" // /* MW 2 */ + 11761 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 371 22 first +.src_ref 1 "io_buffer_main.h" 218 49 + 11762 "00111010" // ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11763 "01111001" // /* MW 9 */ + 11764 "01100000" // /* MW 8 */ + 11765 "11001010" // /* MW 7 */ + 11766 "10000001" // /* MW 6 */ + 11767 "00010100" // /* MW 5 */ + 11768 "00100011" // /* MW 4 */ + 11769 "10110000" // /* MW 3 */ + 11770 "00111010" // /* MW 2 */ + 11771 "11111111" // /* MW 1 */ + 11772 "00000010" // ST p0, [sp, #-20]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11773 "01110000" // /* MW 7 */ + 11774 "11010000" // /* MW 6 */ + 11775 "00001011" // /* MW 5 */ + 11776 "00000000" // /* MW 4 */ + 11777 "10110000" // /* MW 3 */ + 11778 "10000011" // /* MW 2 */ + 11779 "11111101" // /* MW 1 */ + 11780 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11781 "00010101" // /* MW 3 */ + 11782 "11111100" // /* MW 2 */ + 11783 "00001111" // /* MW 1 */ + 11784 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11785 "00111101" // /* MW 3 */ + 11786 "11110000" // /* MW 2 */ + 11787 "00001111" // /* MW 1 */ + 11788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11789 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 374 6 first +.src_ref 7 "superkernels.cpp" 374 16 first + 11790 "10000100" // JNZ r16, #11936 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11936 delay_slots=5 */ + 11791 "00000001" // /* MW 5 */ + 11792 "01000000" // /* MW 4 */ + 11793 "01010000" // /* MW 3 */ + 11794 "00010111" // /* MW 2 */ + 11795 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 371 30 first +.delay_slot + 11796 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11797 "11111011" // /* MW 3 */ + 11798 "01100011" // /* MW 2 */ + 11799 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 371 11 +.delay_slot + 11800 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11801 "10100000" // /* MW 5 */ + 11802 "11001000" // /* MW 4 */ + 11803 "11000100" // /* MW 3 */ + 11804 "00000111" // /* MW 2 */ + 11805 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 371 11 +.delay_slot + 11806 "00000010" // ST r17, [p2]; MOV p2, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11807 "01110000" // /* MW 7 */ + 11808 "01100000" // /* MW 6 */ + 11809 "00110111" // /* MW 5 */ + 11810 "00000001" // /* MW 4 */ + 11811 "00110000" // /* MW 3 */ + 11812 "11000110" // /* MW 2 */ + 11813 "01000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 +.delay_slot + 11814 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11815 "11000000" // /* MW 3 */ + 11816 "11010110" // /* MW 2 */ + 11817 "00011011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 377 4 +.src_ref 7 "superkernels.cpp" 379 28 +.src_ref 7 "superkernels.cpp" 381 42 +.src_ref 7 "superkernels.cpp" 393 2 +.delay_slot + 11818 "00111010" // ST p2, [sp, #-12]; MOVXM p7, #509312 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11819 "00010001" // /* MW 9 */ + 11820 "11000000" // /* MW 8 */ + 11821 "10110010" // /* MW 7 */ + 11822 "11110011" // /* MW 6 */ + 11823 "00000001" // /* MW 5 */ + 11824 "00000000" // /* MW 4 */ + 11825 "10110000" // /* MW 3 */ + 11826 "10100011" // /* MW 2 */ + 11827 "11111110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 377 4 +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11828 "00111010" // MOVS p0, p7; MOVXM p2, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11829 "00010001" // /* MW 9 */ + 11830 "00110100" // /* MW 8 */ + 11831 "00110010" // /* MW 7 */ + 11832 "11110001" // /* MW 6 */ + 11833 "00000001" // /* MW 5 */ + 11834 "00000000" // /* MW 4 */ + 11835 "01100000" // /* MW 3 */ + 11836 "10010001" // /* MW 2 */ + 11837 "00010011" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11838 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11839 "00010000" // /* MW 9 */ + 11840 "00110010" // /* MW 8 */ + 11841 "00110010" // /* MW 7 */ + 11842 "11110001" // /* MW 6 */ + 11843 "00000001" // /* MW 5 */ + 11844 "00000000" // /* MW 4 */ + 11845 "11100000" // /* MW 3 */ + 11846 "11000000" // /* MW 2 */ + 11847 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11849 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 377 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 11850 "00000100" // JL #11296 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11296 delay_slots=5 */ + 11851 "00000001" // /* MW 5 */ + 11852 "00000000" // /* MW 4 */ + 11853 "00010000" // /* MW 3 */ + 11854 "00010110" // /* MW 2 */ + 11855 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11857 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11859 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11860 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11861 "00110001" // /* MW 3 */ + 11862 "00100000" // /* MW 2 */ + 11863 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 11864 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11865 "00000101" // /* MW 3 */ + 11866 "00100000" // /* MW 2 */ + 11867 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 11868 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11869 "00010001" // /* MW 3 */ + 11870 "00000110" // /* MW 2 */ + 11871 "00001010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 381 18 +.src_ref 7 "superkernels.cpp" 381 42 first +.return_address + 11872 "10111010" // LDA r16, [p7]; MOVXM p1, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11873 "00010000" // /* MW 9 */ + 11874 "00101000" // /* MW 8 */ + 11875 "10110010" // /* MW 7 */ + 11876 "11110000" // /* MW 6 */ + 11877 "00000001" // /* MW 5 */ + 11878 "00000000" // /* MW 4 */ + 11879 "11010000" // /* MW 3 */ + 11880 "11000010" // /* MW 2 */ + 11881 "11100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 381 16 +.src_ref 7 "superkernels.cpp" 381 18 +.src_ref 7 "superkernels.cpp" 390 48 + 11882 "10111010" // LDA r17, [p1]; MOVXM p3, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11883 "00010000" // /* MW 9 */ + 11884 "00101010" // /* MW 8 */ + 11885 "10110010" // /* MW 7 */ + 11886 "11110001" // /* MW 6 */ + 11887 "00000001" // /* MW 5 */ + 11888 "00000000" // /* MW 4 */ + 11889 "11010000" // /* MW 3 */ + 11890 "11000110" // /* MW 2 */ + 11891 "00100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 379 28 first +.src_ref 7 "superkernels.cpp" 382 16 +.src_ref 7 "superkernels.cpp" 391 48 + 11892 "10111010" // LDA.u16 r18, [p7, #10]; MOVXM p1, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11893 "00010000" // /* MW 9 */ + 11894 "00101110" // /* MW 8 */ + 11895 "10110010" // /* MW 7 */ + 11896 "11110000" // /* MW 6 */ + 11897 "00000001" // /* MW 5 */ + 11898 "00000000" // /* MW 4 */ + 11899 "01010000" // /* MW 3 */ + 11900 "11001011" // /* MW 2 */ + 11901 "11101010" // /* MW 1 */ + 11902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11903 "00000000" // /* MW 1 */ + 11904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11905 "00000000" // /* MW 1 */ + 11906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11907 "00000000" // /* MW 1 */ + 11908 "10000100" // J #11952 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11952 delay_slots=5 */ + 11909 "00000000" // /* MW 5 */ + 11910 "00000000" // /* MW 4 */ + 11911 "01011000" // /* MW 3 */ + 11912 "00010111" // /* MW 2 */ + 11913 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 379 13 +.delay_slot + 11914 "01000100" // MOVXM p2, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11915 "11000000" // /* MW 5 */ + 11916 "11001000" // /* MW 4 */ + 11917 "11000100" // /* MW 3 */ + 11918 "00000111" // /* MW 2 */ + 11919 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 381 27 first +.delay_slot + 11920 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11921 "00001111" // /* MW 3 */ + 11922 "01100001" // /* MW 2 */ + 11923 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 379 13 first +.delay_slot + 11924 "10011000" // ST r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11925 "01010001" // /* MW 3 */ + 11926 "00000110" // /* MW 2 */ + 11927 "00001010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 381 16 first +.delay_slot + 11928 "10011000" // ST r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11929 "00010001" // /* MW 3 */ + 11930 "00000110" // /* MW 2 */ + 11931 "00001011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 382 16 first +.delay_slot + 11932 "10011000" // ST r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11933 "00010001" // /* MW 3 */ + 11934 "00000110" // /* MW 2 */ + 11935 "00001001" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 +.src_ref 7 "superkernels.cpp" 390 48 + 11936 "01000100" // MOVXM p3, #509012 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11937 "10101000" // /* MW 5 */ + 11938 "11001000" // /* MW 4 */ + 11939 "11000110" // /* MW 3 */ + 11940 "00000111" // /* MW 2 */ + 11941 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 391 48 + 11942 "10111010" // NOPA; MOVXM p1, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11943 "00010000" // /* MW 9 */ + 11944 "00101110" // /* MW 8 */ + 11945 "10110010" // /* MW 7 */ + 11946 "11110000" // /* MW 6 */ + 11947 "00000001" // /* MW 5 */ + 11948 "00000000" // /* MW 4 */ + 11949 "11110000" // /* MW 3 */ + 11950 "00101100" // /* MW 2 */ + 11951 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 +.src_ref 1 "io_buffer_main.h" 218 49 first + 11952 "00011000" // ADD.NC p0, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11953 "10000110" // /* MW 3 */ + 11954 "01100111" // /* MW 2 */ + 11955 "00011000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 385 2 +.src_ref 1 "io_buffer_main.h" 218 49 + 11956 "10111010" // LDA r27, [p0], #-4; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11957 "00010000" // /* MW 9 */ + 11958 "00100000" // /* MW 8 */ + 11959 "00110010" // /* MW 7 */ + 11960 "11110001" // /* MW 6 */ + 11961 "00000001" // /* MW 5 */ + 11962 "00000000" // /* MW 4 */ + 11963 "11010000" // /* MW 3 */ + 11964 "11101110" // /* MW 2 */ + 11965 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 11966 "10011000" // LDA r16, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11967 "00010110" // /* MW 3 */ + 11968 "11111110" // /* MW 2 */ + 11969 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 11970 "10011000" // LDA r17, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11971 "00110110" // /* MW 3 */ + 11972 "11111110" // /* MW 2 */ + 11973 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 385 2 first + 11974 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11975 "01010110" // /* MW 3 */ + 11976 "00000110" // /* MW 2 */ + 11977 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 11978 "10011000" // LDA r19, [p0, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11979 "01110110" // /* MW 3 */ + 11980 "01000110" // /* MW 2 */ + 11981 "00000000" // /* MW 1 */ + 11982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11983 "00000000" // /* MW 1 */ + 11984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11985 "00000000" // /* MW 1 */ + 11986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11987 "00000000" // /* MW 1 */ + 11988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11989 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 11990 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11991 "00000010" // /* MW 3 */ + 11992 "01100001" // /* MW 2 */ + 11993 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 385 2 first +.src_ref 1 "io_buffer_main.h" 218 20 + 11994 "01011100" // ST r16, [p0]; ADD r16, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11995 "00001110" // /* MW 5 */ + 11996 "01000000" // /* MW 4 */ + 11997 "00111001" // /* MW 3 */ + 11998 "11000010" // /* MW 2 */ + 11999 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 385 2 + 12000 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12001 "00010001" // /* MW 3 */ + 12002 "00000110" // /* MW 2 */ + 12003 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 +.src_ref 1 "io_buffer_main.h" 395 8 + 12004 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12005 "11111101" // /* MW 3 */ + 12006 "11100000" // /* MW 2 */ + 12007 "00010111" // /* MW 1 */ + 12008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12009 "00000000" // /* MW 1 */ + 12010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12011 "00000000" // /* MW 1 */ + 12012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12013 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 12014 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12015 "00001000" // /* MW 3 */ + 12016 "11010011" // /* MW 2 */ + 12017 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 first + 12018 "00011000" // ADD.NC p2, r14, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12019 "00000110" // /* MW 3 */ + 12020 "01100111" // /* MW 2 */ + 12021 "00011010" // /* MW 1 */ + 12022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12023 "00000000" // /* MW 1 */ + 12024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12025 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 12026 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12027 "01110110" // /* MW 3 */ + 12028 "11111111" // /* MW 2 */ + 12029 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 12030 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12031 "00110110" // /* MW 3 */ + 12032 "11111110" // /* MW 2 */ + 12033 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 12034 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12035 "01010110" // /* MW 3 */ + 12036 "11111110" // /* MW 2 */ + 12037 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 47 first + 12038 "10011000" // LDA r19, [p2, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12039 "01110110" // /* MW 3 */ + 12040 "01010110" // /* MW 2 */ + 12041 "00000010" // /* MW 1 */ + 12042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12043 "00000000" // /* MW 1 */ + 12044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12045 "00000000" // /* MW 1 */ + 12046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12047 "00000000" // /* MW 1 */ + 12048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12049 "00000000" // /* MW 1 */ + 12050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12051 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 12052 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12053 "00010010" // /* MW 3 */ + 12054 "10100011" // /* MW 2 */ + 12055 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 12056 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12057 "00110001" // /* MW 3 */ + 12058 "00000110" // /* MW 2 */ + 12059 "00001010" // /* MW 1 */ + 12060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12061 "00000000" // /* MW 1 */ + 12062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12063 "00000000" // /* MW 1 */ + 12064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12065 "00000000" // /* MW 1 */ + 12066 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12067 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 12068 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12069 "00001000" // /* MW 3 */ + 12070 "11010011" // /* MW 2 */ + 12071 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 390 46 +.src_ref 7 "superkernels.cpp" 391 46 +.src_ref 1 "io_buffer_main.h" 324 32 + 12072 "00111010" // MOVS p6, p2; MOVX r16, #1; MOV r14, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12073 "01111001" // /* MW 9 */ + 12074 "01100000" // /* MW 8 */ + 12075 "11001110" // /* MW 7 */ + 12076 "00101001" // /* MW 6 */ + 12077 "00000000" // /* MW 5 */ + 12078 "00000001" // /* MW 4 */ + 12079 "01100000" // /* MW 3 */ + 12080 "00010001" // /* MW 2 */ + 12081 "11010001" // /* MW 1 */ + 12082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12083 "00000000" // /* MW 1 */ + 12084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12085 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 + 12086 "00011000" // LDA p4, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12087 "00011001" // /* MW 3 */ + 12088 "11101110" // /* MW 2 */ + 12089 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 390 48 first + 12090 "00001100" // LDA r17, [p3]; ST p0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12091 "00111011" // /* MW 5 */ + 12092 "11011000" // /* MW 4 */ + 12093 "11011111" // /* MW 3 */ + 12094 "11000110" // /* MW 2 */ + 12095 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 391 48 first +.src_ref 7 "superkernels.cpp" 393 2 + 12096 "11010100" // LDA r20, [p1]; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12097 "10000001" // /* MW 5 */ + 12098 "11011101" // /* MW 4 */ + 12099 "11010110" // /* MW 3 */ + 12100 "11010010" // /* MW 2 */ + 12101 "00100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 12102 "10011000" // LDA r18, [p2], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12103 "01010110" // /* MW 3 */ + 12104 "01001110" // /* MW 2 */ + 12105 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 12106 "10011000" // LDA p2, [p0], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12107 "00011110" // /* MW 3 */ + 12108 "01011101" // /* MW 2 */ + 12109 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 +.src_ref 1 "io_buffer_main.h" 327 40 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12110 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12111 "11000000" // /* MW 3 */ + 12112 "01100000" // /* MW 2 */ + 12113 "00011111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12115 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12116 "10011000" // LDA r19, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12117 "01110110" // /* MW 3 */ + 12118 "00000110" // /* MW 2 */ + 12119 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12121 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 393 2 first +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 12122 "00000100" // JL #11440 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11440 delay_slots=5 */ + 12123 "00000001" // /* MW 5 */ + 12124 "00000000" // /* MW 4 */ + 12125 "01011000" // /* MW 3 */ + 12126 "00010110" // /* MW 2 */ + 12127 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 +.src_ref 1 "io_buffer_main.h" 327 40 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12128 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12129 "11000000" // /* MW 3 */ + 12130 "11010100" // /* MW 2 */ + 12131 "00011011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 390 46 first +.delay_slot + 12132 "10011000" // LSHL r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12133 "00001101" // /* MW 3 */ + 12134 "01100011" // /* MW 2 */ + 12135 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 391 46 first +.delay_slot + 12136 "10011000" // LSHL r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12137 "00001101" // /* MW 3 */ + 12138 "00100001" // /* MW 2 */ + 12139 "00010101" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 391 46 +.delay_slot + 12140 "01011000" // ADD.NC p1, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12141 "01000001" // /* MW 3 */ + 12142 "01101001" // /* MW 2 */ + 12143 "00011001" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 390 46 first +.delay_slot + 12144 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12145 "00000000" // /* MW 15 */ + 12146 "00000000" // /* MW 14 */ + 12147 "10101000" // /* MW 13 */ + 12148 "11100010" // /* MW 12 */ + 12149 "00110100" // /* MW 11 */ + 12150 "00000000" // /* MW 10 */ + 12151 "00000000" // /* MW 9 */ + 12152 "00000000" // /* MW 8 */ + 12153 "01011011" // /* MW 7 */ + 12154 "00000001" // /* MW 6 */ + 12155 "00100000" // /* MW 5 */ + 12156 "00000000" // /* MW 4 */ + 12157 "11110000" // /* MW 3 */ + 12158 "00101100" // /* MW 2 */ + 12159 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 32 first +.src_ref 1 "io_buffer_main.h" 327 28 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 40 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 +.return_address + 12160 "10111010" // LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12161 "01111000" // /* MW 9 */ + 12162 "11010000" // /* MW 8 */ + 12163 "10110011" // /* MW 7 */ + 12164 "00101000" // /* MW 6 */ + 12165 "00000000" // /* MW 5 */ + 12166 "00000001" // /* MW 4 */ + 12167 "11010000" // /* MW 3 */ + 12168 "11000110" // /* MW 2 */ + 12169 "11001000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 19 + 12170 "01000100" // MOVXM p6, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12171 "11000000" // /* MW 5 */ + 12172 "11001000" // /* MW 4 */ + 12173 "11001100" // /* MW 3 */ + 12174 "00000111" // /* MW 2 */ + 12175 "00000000" // /* MW 1 */ + 12176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12177 "00000000" // /* MW 1 */ + 12178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12179 "00000000" // /* MW 1 */ + 12180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12181 "00000000" // /* MW 1 */ + 12182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12183 "00000000" // /* MW 1 */ + 12184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12185 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 12186 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12187 "00001000" // /* MW 3 */ + 12188 "01010001" // /* MW 2 */ + 12189 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 40 first + 12190 "10011000" // LDA r17, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12191 "00110110" // /* MW 3 */ + 12192 "11110110" // /* MW 2 */ + 12193 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 + 12194 "00011000" // LDA p2, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12195 "00011001" // /* MW 3 */ + 12196 "11101101" // /* MW 2 */ + 12197 "00000111" // /* MW 1 */ + 12198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12199 "00000000" // /* MW 1 */ + 12200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12201 "00000000" // /* MW 1 */ + 12202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12203 "00000000" // /* MW 1 */ + 12204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12205 "00000000" // /* MW 1 */ + 12206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12207 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 + 12208 "10011000" // SUB r17, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12209 "00010001" // /* MW 3 */ + 12210 "00100011" // /* MW 2 */ + 12211 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 first +.src_ref 1 "io_buffer_main.h" 327 28 + 12212 "00001100" // LDA r17, [p2, #20]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12213 "01100011" // /* MW 5 */ + 12214 "11101100" // /* MW 4 */ + 12215 "11010011" // /* MW 3 */ + 12216 "11000110" // /* MW 2 */ + 12217 "01001010" // /* MW 1 */ + 12218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12219 "00000000" // /* MW 1 */ + 12220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12221 "00000000" // /* MW 1 */ + 12222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12223 "00000000" // /* MW 1 */ + 12224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12225 "00000000" // /* MW 1 */ + 12226 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12227 "00000000" // /* MW 1 */ + 12228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12229 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 12230 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12231 "00001000" // /* MW 3 */ + 12232 "01010001" // /* MW 2 */ + 12233 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 6 +.src_ref 7 "superkernels.cpp" 398 14 +.src_ref 1 "io_buffer_main.h" 327 40 first + 12234 "10111010" // LDA r19, [p7, #-8]; MOVXM p1, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12235 "00010000" // /* MW 9 */ + 12236 "00100000" // /* MW 8 */ + 12237 "10110010" // /* MW 7 */ + 12238 "11110000" // /* MW 6 */ + 12239 "00000001" // /* MW 5 */ + 12240 "00000000" // /* MW 4 */ + 12241 "11010000" // /* MW 3 */ + 12242 "11001110" // /* MW 2 */ + 12243 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 19 first + 12244 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12245 "01010110" // /* MW 3 */ + 12246 "00000110" // /* MW 2 */ + 12247 "00000110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 6 + 12248 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12249 "00110110" // /* MW 3 */ + 12250 "00000110" // /* MW 2 */ + 12251 "00000001" // /* MW 1 */ + 12252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12253 "00000000" // /* MW 1 */ + 12254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12255 "00000000" // /* MW 1 */ + 12256 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12257 "00000000" // /* MW 1 */ + 12258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12259 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 12260 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12261 "00110001" // /* MW 3 */ + 12262 "00100001" // /* MW 2 */ + 12263 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 12264 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12265 "00010001" // /* MW 3 */ + 12266 "11100110" // /* MW 2 */ + 12267 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 16 first + 12268 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12269 "00101000" // /* MW 3 */ + 12270 "01100001" // /* MW 2 */ + 12271 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 6 + 12272 "10000100" // JNZ r16, #12304 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12304 delay_slots=5 */ + 12273 "00000001" // /* MW 5 */ + 12274 "01000000" // /* MW 4 */ + 12275 "00001000" // /* MW 3 */ + 12276 "00011000" // /* MW 2 */ + 12277 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12279 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12281 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12283 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12285 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12287 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 398 14 + 12288 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12289 "00000001" // /* MW 3 */ + 12290 "00100000" // /* MW 2 */ + 12291 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 398 14 first + 12292 "00110110" // NOPA; NOPB; ST r16, [p1]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12293 "11000001" // /* MW 11 */ + 12294 "00001000" // /* MW 10 */ + 12295 "10000011" // /* MW 9 */ + 12296 "00000000" // /* MW 8 */ + 12297 "00000000" // /* MW 7 */ + 12298 "00000000" // /* MW 6 */ + 12299 "00100000" // /* MW 5 */ + 12300 "00000000" // /* MW 4 */ + 12301 "11110000" // /* MW 3 */ + 12302 "00101100" // /* MW 2 */ + 12303 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 +.src_ref 7 "superkernels.cpp" 400 + 12304 "00011000" // LDA lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12305 "00111001" // /* MW 3 */ + 12306 "11110000" // /* MW 2 */ + 12307 "00000111" // /* MW 1 */ + 12308 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12309 "11110001" // /* MW 3 */ + 12310 "11111101" // /* MW 2 */ + 12311 "00000111" // /* MW 1 */ + 12312 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12313 "10011001" // /* MW 3 */ + 12314 "11110111" // /* MW 2 */ + 12315 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 12316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12317 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.noswbrkpt + 12318 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12319 "11010001" // /* MW 3 */ + 12320 "11111001" // /* MW 2 */ + 12321 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 12322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12323 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 12324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12325 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 400 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 12326 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12327 "00000000" // /* MW 3 */ + 12328 "00101000" // /* MW 2 */ + 12329 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12330 "00011000" // MOVS p6, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12331 "00001011" // /* MW 3 */ + 12332 "10001110" // /* MW 2 */ + 12333 "00001110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 400 +.delay_slot + 12334 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12335 "00000001" // /* MW 5 */ + 12336 "00000000" // /* MW 4 */ + 12337 "00000000" // /* MW 3 */ + 12338 "11111000" // /* MW 2 */ + 12339 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12341 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12343 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 + 12345 "00000000" // /* MW 1 */ +.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh +.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_begin0 +.function setup_conv2d_dw_params_bf16 _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh +.src_ref 2 "conv2d_dw_bf16_params.h" 211 first +.src_ref 2 "conv2d_dw_bf16_params.h" 215 15 +.src_ref 2 "conv2d_dw_bf16_params.h" 215 17 first +.function_start + 12352 "10111010" // LDA el0, [p0], #4; MOVXM p1, #509888 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12353 "00010000" // /* MW 9 */ + 12354 "11100000" // /* MW 8 */ + 12355 "10110011" // /* MW 7 */ + 12356 "11110000" // /* MW 6 */ + 12357 "00000001" // /* MW 5 */ + 12358 "00000000" // /* MW 4 */ + 12359 "11010000" // /* MW 3 */ + 12360 "10000101" // /* MW 2 */ + 12361 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 17 first +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.src_ref 2 "conv2d_dw_bf16_params.h" 218 80 + 12362 "10111010" // LDA eh0, [p0], #4; MOVX r16, #2; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12363 "01011000" // /* MW 9 */ + 12364 "00000000" // /* MW 8 */ + 12365 "00001000" // /* MW 7 */ + 12366 "01001011" // /* MW 6 */ + 12367 "00000000" // /* MW 5 */ + 12368 "00000001" // /* MW 4 */ + 12369 "11010000" // /* MW 3 */ + 12370 "10000001" // /* MW 2 */ + 12371 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 211 + 12372 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12373 "00000001" // /* MW 5 */ + 12374 "00000000" // /* MW 4 */ + 12375 "00000000" // /* MW 3 */ + 12376 "00001000" // /* MW 2 */ + 12377 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 32 + 12378 "00111010" // ST p7, [sp, #-12]; MOVXM p7, #509888 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12379 "00010001" // /* MW 9 */ + 12380 "11100000" // /* MW 8 */ + 12381 "10110011" // /* MW 7 */ + 12382 "11110011" // /* MW 6 */ + 12383 "00000001" // /* MW 5 */ + 12384 "00000000" // /* MW 4 */ + 12385 "10110000" // /* MW 3 */ + 12386 "11110011" // /* MW 2 */ + 12387 "11111110" // /* MW 1 */ + 12388 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12389 "00111101" // /* MW 3 */ + 12390 "11111100" // /* MW 2 */ + 12391 "00001111" // /* MW 1 */ + 12392 "10011000" // ST r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12393 "11110101" // /* MW 3 */ + 12394 "11111001" // /* MW 2 */ + 12395 "00001111" // /* MW 1 */ + 12396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12397 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 15 + 12398 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12399 "00101001" // /* MW 3 */ + 12400 "00011100" // /* MW 2 */ + 12401 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 15 + 12402 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12403 "00001001" // /* MW 3 */ + 12404 "00011100" // /* MW 2 */ + 12405 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 17 + 12406 "10011000" // LDA el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12407 "00101110" // /* MW 3 */ + 12408 "00000100" // /* MW 2 */ + 12409 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 17 + 12410 "10011000" // LDA eh0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12411 "00001110" // /* MW 3 */ + 12412 "00010100" // /* MW 2 */ + 12413 "00000000" // /* MW 1 */ + 12414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12415 "00000000" // /* MW 1 */ + 12416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12417 "00000000" // /* MW 1 */ + 12418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12419 "00000000" // /* MW 1 */ + 12420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12421 "00000000" // /* MW 1 */ + 12422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12423 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 15 + 12424 "10011000" // ST el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12425 "00101001" // /* MW 3 */ + 12426 "00000100" // /* MW 2 */ + 12427 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 15 + 12428 "10011000" // ST eh0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12429 "00001001" // /* MW 3 */ + 12430 "00010100" // /* MW 2 */ + 12431 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 32 first + 12432 "10011000" // LDA.u8 r17, [p7], #5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12433 "00101010" // /* MW 3 */ + 12434 "01011110" // /* MW 2 */ + 12435 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 52 + 12436 "10011000" // LDA.u8 r18, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12437 "01001010" // /* MW 3 */ + 12438 "11101110" // /* MW 2 */ + 12439 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 80 + 12440 "10011000" // LDA.u8 r1, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12441 "00101010" // /* MW 3 */ + 12442 "11101100" // /* MW 2 */ + 12443 "00000111" // /* MW 1 */ + 12444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12445 "00000000" // /* MW 1 */ + 12446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12447 "00000000" // /* MW 1 */ + 12448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12449 "00000000" // /* MW 1 */ + 12450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12451 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.no_stack_arguments + 12452 "00000100" // JL #15664 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=15664 delay_slots=5 */ + 12453 "00000001" // /* MW 5 */ + 12454 "00000000" // /* MW 4 */ + 12455 "10011000" // /* MW 3 */ + 12456 "00011110" // /* MW 2 */ + 12457 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 38 +.delay_slot + 12458 "01011100" // ST r18, [sp, #-28]; SUB r15, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12459 "01000011" // /* MW 5 */ + 12460 "10111110" // /* MW 4 */ + 12461 "10111000" // /* MW 3 */ + 12462 "11001010" // /* MW 2 */ + 12463 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 80 +.delay_slot + 12464 "01011100" // ST r1, [sp, #-20]; NE r16, r1, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12465 "00010001" // /* MW 5 */ + 12466 "11000010" // /* MW 4 */ + 12467 "10110000" // /* MW 3 */ + 12468 "10000110" // /* MW 2 */ + 12469 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.delay_slot + 12470 "01011100" // ST r16, [sp, #-16]; LT r27, r15, r24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12471 "00010101" // /* MW 5 */ + 12472 "11101111" // /* MW 4 */ + 12473 "10110111" // /* MW 3 */ + 12474 "01000010" // /* MW 2 */ + 12475 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.delay_slot + 12476 "10011000" // SUB r17, r24, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12477 "11110001" // /* MW 3 */ + 12478 "00100010" // /* MW 2 */ + 12479 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.delay_slot + 12480 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r0, r15, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12481 "00000000" // /* MW 15 */ + 12482 "00000000" // /* MW 14 */ + 12483 "01111000" // /* MW 13 */ + 12484 "10100101" // /* MW 12 */ + 12485 "00000001" // /* MW 11 */ + 12486 "10010000" // /* MW 10 */ + 12487 "00001000" // /* MW 9 */ + 12488 "00011110" // /* MW 8 */ + 12489 "01011011" // /* MW 7 */ + 12490 "00000001" // /* MW 6 */ + 12491 "00100000" // /* MW 5 */ + 12492 "00000000" // /* MW 4 */ + 12493 "11110000" // /* MW 3 */ + 12494 "00101100" // /* MW 2 */ + 12495 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.return_address + 12496 "00101100" // LDA r20, [sp, #-20]; MOVX r16, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12497 "00000010" // /* MW 5 */ + 12498 "01000000" // /* MW 4 */ + 12499 "00100000" // /* MW 3 */ + 12500 "11010010" // /* MW 2 */ + 12501 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.src_ref 2 "conv2d_dw_bf16_params.h" 219 32 first + 12502 "00101100" // LDA.u8 r17, [p7], #3; SUB r18, r16, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12503 "01000011" // /* MW 5 */ + 12504 "01001000" // /* MW 4 */ + 12505 "01011000" // /* MW 3 */ + 12506 "11000101" // /* MW 2 */ + 12507 "11100111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 52 + 12508 "10011000" // LDA.u8 r19, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12509 "01101010" // /* MW 3 */ + 12510 "11101110" // /* MW 2 */ + 12511 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 + 12512 "00011000" // LDA r1, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12513 "00110001" // /* MW 3 */ + 12514 "11101100" // /* MW 2 */ + 12515 "00000111" // /* MW 1 */ + 12516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12517 "00000000" // /* MW 1 */ + 12518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12519 "00000000" // /* MW 1 */ + 12520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12521 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 first + 12522 "10011000" // XOR r20, r15, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12523 "01000110" // /* MW 3 */ + 12524 "11101001" // /* MW 2 */ + 12525 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 + 12526 "10011000" // LT r27, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12527 "00001010" // /* MW 3 */ + 12528 "00110111" // /* MW 2 */ + 12529 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 38 first + 12530 "01011100" // ST r19, [sp, #-24]; SUB r17, r17, r19 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12531 "01100011" // /* MW 5 */ + 12532 "11000110" // /* MW 4 */ + 12533 "10111000" // /* MW 3 */ + 12534 "01001110" // /* MW 2 */ + 12535 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.no_stack_arguments + 12536 "00111010" // ST r17, [sp, #-32]; JL #15664 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=15664 delay_slots=5 */ + 12537 "01000001" // /* MW 9 */ + 12538 "00000000" // /* MW 8 */ + 12539 "00000000" // /* MW 7 */ + 12540 "10100110" // /* MW 6 */ + 12541 "00000111" // /* MW 5 */ + 12542 "00000000" // /* MW 4 */ + 12543 "10110000" // /* MW 3 */ + 12544 "01000110" // /* MW 2 */ + 12545 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 first +.delay_slot + 12546 "00011000" // SEL.EQZ r20, r2, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12547 "00100010" // /* MW 3 */ + 12548 "10101001" // /* MW 2 */ + 12549 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first +.delay_slot + 12550 "10011000" // LT r27, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12551 "00001010" // /* MW 3 */ + 12552 "01110111" // /* MW 2 */ + 12553 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.delay_slot + 12554 "10011000" // SUB r18, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12555 "00010001" // /* MW 3 */ + 12556 "00100101" // /* MW 2 */ + 12557 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 first +.delay_slot + 12558 "00011000" // EXTEND.s16 r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12559 "01110000" // /* MW 3 */ + 12560 "00100110" // /* MW 2 */ + 12561 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 87 +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first +.delay_slot + 12562 "01111110" // NOPA; NOPB; NOPS; SEL.EQZ r0, r17, r18, r27; ADD.NC r15, r19, #1 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 12563 "01100000" // /* MW 13 */ + 12564 "00101011" // /* MW 12 */ + 12565 "00000000" // /* MW 11 */ + 12566 "00001001" // /* MW 10 */ + 12567 "10011000" // /* MW 9 */ + 12568 "00111101" // /* MW 8 */ + 12569 "00100010" // /* MW 7 */ + 12570 "01000001" // /* MW 6 */ + 12571 "00100100" // /* MW 5 */ + 12572 "00000000" // /* MW 4 */ + 12573 "11110000" // /* MW 3 */ + 12574 "00101100" // /* MW 2 */ + 12575 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.src_ref 2 "conv2d_dw_bf16_params.h" 226 50 +.src_ref 2 "conv2d_dw_bf16_params.h" 231 68 +.return_address + 12576 "10111010" // LDA r3, [sp, #-32]; MOVX r19, #-2; MOV m0, #66 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12577 "01011000" // /* MW 9 */ + 12578 "01000010" // /* MW 8 */ + 12579 "00000000" // /* MW 7 */ + 12580 "11001000" // /* MW 6 */ + 12581 "00110111" // /* MW 5 */ + 12582 "00111111" // /* MW 4 */ + 12583 "00100000" // /* MW 3 */ + 12584 "00001110" // /* MW 2 */ + 12585 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.src_ref 2 "conv2d_dw_bf16_params.h" 220 23 +.src_ref 2 "conv2d_dw_bf16_params.h" 220 84 +.src_ref 2 "conv2d_dw_bf16_params.h" 232 45 +.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 +.src_ref 2 "conv2d_dw_bf16_params.h" 234 64 +.src_ref 2 "conv2d_dw_bf16_params.h" 243 100 +.src_ref 2 "conv2d_dw_bf16_params.h" 250 53 +.src_ref 2 "conv2d_dw_bf16_params.h" 253 63 +.src_ref 2 "conv2d_dw_bf16_params.h" 260 49 +.src_ref 2 "conv2d_dw_bf16_params.h" 264 47 + 12586 "10111010" // LDA r16, [sp, #-20]; MOVX r24, #0; MOV r1, #508 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12587 "01011000" // /* MW 9 */ + 12588 "11111100" // /* MW 8 */ + 12589 "00101001" // /* MW 7 */ + 12590 "00001000" // /* MW 6 */ + 12591 "10000000" // /* MW 5 */ + 12592 "00000001" // /* MW 4 */ + 12593 "00100000" // /* MW 3 */ + 12594 "11000010" // /* MW 2 */ + 12595 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 220 74 +.src_ref 2 "conv2d_dw_bf16_params.h" 234 64 +.src_ref 2 "conv2d_dw_bf16_params.h" 246 52 +.src_ref 2 "conv2d_dw_bf16_params.h" 253 53 + 12596 "10111010" // LDA r22, [sp, #-28]; MOVX r6, #4; MOV r4, #2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12597 "01011000" // /* MW 9 */ + 12598 "00000010" // /* MW 8 */ + 12599 "10001000" // /* MW 7 */ + 12600 "10001000" // /* MW 6 */ + 12601 "01100000" // /* MW 5 */ + 12602 "00000000" // /* MW 4 */ + 12603 "00100000" // /* MW 3 */ + 12604 "11011010" // /* MW 2 */ + 12605 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 226 50 first +.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 +.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 + 12606 "10111010" // LDA.u8 r17, [p7], m0; MOVX r5, #8; MOV r28, #23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12607 "01011000" // /* MW 9 */ + 12608 "00010111" // /* MW 8 */ + 12609 "10001000" // /* MW 7 */ + 12610 "00001011" // /* MW 6 */ + 12611 "01010001" // /* MW 5 */ + 12612 "00000000" // /* MW 4 */ + 12613 "01010000" // /* MW 3 */ + 12614 "01000101" // /* MW 2 */ + 12615 "11100001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 231 78 +.src_ref 2 "conv2d_dw_bf16_params.h" 232 39 +.src_ref 2 "conv2d_dw_bf16_params.h" 232 76 + 12616 "10111010" // LDA r21, [sp, #-24]; MOVX r18, #-6; MOV m1, #32 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12617 "01011000" // /* MW 9 */ + 12618 "00100000" // /* MW 8 */ + 12619 "10000000" // /* MW 7 */ + 12620 "01001000" // /* MW 6 */ + 12621 "00100111" // /* MW 5 */ + 12622 "00111111" // /* MW 4 */ + 12623 "00100000" // /* MW 3 */ + 12624 "01010110" // /* MW 2 */ + 12625 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 235 50 +.src_ref 2 "conv2d_dw_bf16_params.h" 235 50 +.src_ref 2 "conv2d_dw_bf16_params.h" 242 16 +.src_ref 2 "conv2d_dw_bf16_params.h" 242 63 +.src_ref 2 "conv2d_dw_bf16_params.h" 243 93 +.src_ref 2 "conv2d_dw_bf16_params.h" 250 53 +.src_ref 2 "conv2d_dw_bf16_params.h" 255 73 +.src_ref 2 "conv2d_dw_bf16_params.h" 260 49 +.src_ref 2 "conv2d_dw_bf16_params.h" 264 47 + 12626 "10111010" // LDA r30, [sp, #-16]; MOVX r23, #6; MOV r26, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12627 "01011000" // /* MW 9 */ + 12628 "00000001" // /* MW 8 */ + 12629 "01001000" // /* MW 7 */ + 12630 "11001011" // /* MW 6 */ + 12631 "01110000" // /* MW 5 */ + 12632 "00000001" // /* MW 4 */ + 12633 "00100000" // /* MW 3 */ + 12634 "01111010" // /* MW 2 */ + 12635 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 +.src_ref 2 "conv2d_dw_bf16_params.h" 239 42 +.src_ref 2 "conv2d_dw_bf16_params.h" 242 71 +.src_ref 2 "conv2d_dw_bf16_params.h" 248 72 +.src_ref 2 "conv2d_dw_bf16_params.h" 253 63 +.src_ref 2 "conv2d_dw_bf16_params.h" 264 41 + 12636 "10111010" // MOVA m0, #-178; MOVX r29, #128; MOV r31, #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12637 "01011000" // /* MW 9 */ + 12638 "11000000" // /* MW 8 */ + 12639 "11101111" // /* MW 7 */ + 12640 "00001011" // /* MW 6 */ + 12641 "11010000" // /* MW 5 */ + 12642 "00000101" // /* MW 4 */ + 12643 "10000000" // /* MW 3 */ + 12644 "11000000" // /* MW 2 */ + 12645 "11101001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first + 12646 "10011000" // SUB r20, r24, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12647 "00100001" // /* MW 3 */ + 12648 "00101000" // /* MW 2 */ + 12649 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 + 12650 "10011000" // XOR r3, r3, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12651 "00000110" // /* MW 3 */ + 12652 "11000111" // /* MW 2 */ + 12653 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.src_ref 2 "conv2d_dw_bf16_params.h" 220 74 + 12654 "00100100" // LT r27, r3, r24; ADD.NC r0, r22, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12655 "00000010" // /* MW 5 */ + 12656 "00110110" // /* MW 4 */ + 12657 "01010000" // /* MW 3 */ + 12658 "11110001" // /* MW 2 */ + 12659 "00011110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.src_ref 2 "conv2d_dw_bf16_params.h" 243 42 +.src_ref 2 "conv2d_dw_bf16_params.h" 247 69 + 12660 "01100100" // SEL.EQZ r20, r2, r20, r27; MOV r22, #-3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12661 "11110101" // /* MW 5 */ + 12662 "00111111" // /* MW 4 */ + 12663 "01001011" // /* MW 3 */ + 12664 "00101000" // /* MW 2 */ + 12665 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 220 23 first +.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 + 12666 "01100100" // MUL r3, r15, r16; MOV r2, #7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12667 "00011101" // /* MW 5 */ + 12668 "00100000" // /* MW 4 */ + 12669 "11110001" // /* MW 3 */ + 12670 "11100001" // /* MW 2 */ + 12671 "01111000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first + 12672 "00011000" // EXTEND.s16 r20, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12673 "01110000" // /* MW 3 */ + 12674 "00101000" // /* MW 2 */ + 12675 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 220 84 first +.src_ref 2 "conv2d_dw_bf16_params.h" 231 68 + 12676 "00100100" // AND r0, r1, r0; ADD.NC r1, r0, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12677 "00000001" // /* MW 5 */ + 12678 "10100000" // /* MW 4 */ + 12679 "10010000" // /* MW 3 */ + 12680 "00000000" // /* MW 2 */ + 12681 "00001000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 88 first +.src_ref 2 "conv2d_dw_bf16_params.h" 231 68 first + 12682 "00100100" // LSHL r19, r1, r19; ADD.NC r27, r20, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12683 "00000001" // /* MW 5 */ + 12684 "10110100" // /* MW 4 */ + 12685 "10111101" // /* MW 3 */ + 12686 "11100111" // /* MW 2 */ + 12687 "00001100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 220 44 first +.src_ref 2 "conv2d_dw_bf16_params.h" 253 53 first + 12688 "10100100" // LSHL r20, r15, r6; ADD.NC r1, r3, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12689 "00000010" // /* MW 5 */ + 12690 "10100011" // /* MW 4 */ + 12691 "10110000" // /* MW 3 */ + 12692 "00001101" // /* MW 2 */ + 12693 "01111101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 240 70 +.src_ref 2 "conv2d_dw_bf16_params.h" 246 52 first + 12694 "00100100" // LSHL r7, r1, r6; ADD.NC r0, r21, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12695 "11111111" // /* MW 5 */ + 12696 "00110101" // /* MW 4 */ + 12697 "10110000" // /* MW 3 */ + 12698 "11001101" // /* MW 2 */ + 12699 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 232 45 first + 12700 "10011000" // MUL r6, r27, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12701 "00001111" // /* MW 3 */ + 12702 "11001101" // /* MW 2 */ + 12703 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 226 22 first + 12704 "10011000" // MUL r15, r15, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12705 "00011111" // /* MW 3 */ + 12706 "11011111" // /* MW 2 */ + 12707 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 231 78 first +.src_ref 2 "conv2d_dw_bf16_params.h" 238 79 + 12708 "00100100" // MUL r21, r19, r21; ADD.NC r19, r19, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12709 "11111111" // /* MW 5 */ + 12710 "10110011" // /* MW 4 */ + 12711 "11111001" // /* MW 3 */ + 12712 "01101011" // /* MW 2 */ + 12713 "10011101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 234 64 first + 12714 "10011000" // EQ r27, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12715 "00000111" // /* MW 3 */ + 12716 "00110111" // /* MW 2 */ + 12717 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 231 39 first +.src_ref 2 "conv2d_dw_bf16_params.h" 232 55 first + 12718 "01011100" // ST r21, [p7], #-4; MUL r4, r15, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12719 "11011111" // /* MW 5 */ + 12720 "10010000" // /* MW 4 */ + 12721 "00110111" // /* MW 3 */ + 12722 "11010110" // /* MW 2 */ + 12723 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 first + 12724 "00011000" // SEL.EQZ r28, r28, r5, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12725 "01010010" // /* MW 3 */ + 12726 "00111000" // /* MW 2 */ + 12727 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 232 76 first + 12728 "10011000" // LSHL r18, r4, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12729 "00101101" // /* MW 3 */ + 12730 "00100101" // /* MW 2 */ + 12731 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 227 22 first +.src_ref 2 "conv2d_dw_bf16_params.h" 232 39 + 12732 "01011100" // ST r18, [p7], m1; MUL r18, r17, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12733 "00111111" // /* MW 5 */ + 12734 "11001000" // /* MW 4 */ + 12735 "00111000" // /* MW 3 */ + 12736 "01001010" // /* MW 2 */ + 12737 "11100101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 234 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 235 50 first + 12738 "01011100" // ST r28, [p7], #-16; LSHL r28, r30, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12739 "11111011" // /* MW 5 */ + 12740 "01110010" // /* MW 4 */ + 12741 "00111111" // /* MW 3 */ + 12742 "11110010" // /* MW 2 */ + 12743 "11111001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 235 47 +.src_ref 2 "conv2d_dw_bf16_params.h" 243 53 first + 12744 "01011100" // ST r28, [p7], #24; MUL r28, r18, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12745 "00011111" // /* MW 5 */ + 12746 "01110000" // /* MW 4 */ + 12747 "00111001" // /* MW 3 */ + 12748 "11110010" // /* MW 2 */ + 12749 "11101101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 238 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 242 63 first + 12750 "01011100" // ST r19, [p7], #4; LSHL r19, r19, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12751 "11111011" // /* MW 5 */ + 12752 "11001110" // /* MW 4 */ + 12753 "00111001" // /* MW 3 */ + 12754 "11001110" // /* MW 2 */ + 12755 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 242 71 +.src_ref 2 "conv2d_dw_bf16_params.h" 243 93 first + 12756 "10100100" // LSHL r28, r28, r26; ADD.NC r19, r19, r29 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12757 "11101010" // /* MW 5 */ + 12758 "10110011" // /* MW 4 */ + 12759 "10111001" // /* MW 3 */ + 12760 "00110101" // /* MW 2 */ + 12761 "11100111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 239 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 242 16 first + 12762 "01011100" // ST r31, [p7], #4; LSHL r30, r18, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12763 "01011011" // /* MW 5 */ + 12764 "01111011" // /* MW 4 */ + 12765 "00111001" // /* MW 3 */ + 12766 "11111110" // /* MW 2 */ + 12767 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 243 100 first +.src_ref 2 "conv2d_dw_bf16_params.h" 250 53 first + 12768 "10100100" // MUL r16, r18, r16; ADD.NC r18, r19, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12769 "11100010" // /* MW 5 */ + 12770 "00110011" // /* MW 4 */ + 12771 "11111001" // /* MW 3 */ + 12772 "00100001" // /* MW 2 */ + 12773 "10010100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 first +.src_ref 2 "conv2d_dw_bf16_params.h" 240 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 253 63 first + 12774 "01011100" // ST r0, [p7], #4; SEL.EQZ r28, r31, r24, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12775 "00000100" // /* MW 5 */ + 12776 "11110011" // /* MW 4 */ + 12777 "00111111" // /* MW 3 */ + 12778 "10000010" // /* MW 2 */ + 12779 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 247 69 first + 12780 "10011000" // LSHL r31, r3, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12781 "01101101" // /* MW 3 */ + 12782 "11111111" // /* MW 2 */ + 12783 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 242 23 first +.src_ref 2 "conv2d_dw_bf16_params.h" 247 73 + 12784 "00100100" // SUB r1, r30, r19; ADD.NC r19, r31, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12785 "11111111" // /* MW 5 */ + 12786 "10111111" // /* MW 4 */ + 12787 "00111001" // /* MW 3 */ + 12788 "01100110" // /* MW 2 */ + 12789 "11110000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 241 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 243 42 first + 12790 "01011100" // ST r1, [p7], #4; LSHL r17, r17, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12791 "11011011" // /* MW 5 */ + 12792 "11000110" // /* MW 4 */ + 12793 "00111000" // /* MW 3 */ + 12794 "10000110" // /* MW 2 */ + 12795 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 243 100 +.src_ref 2 "conv2d_dw_bf16_params.h" 245 77 first + 12796 "00100100" // SUB r22, r24, r18; ADD.NC r18, r17, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12797 "11111111" // /* MW 5 */ + 12798 "00110001" // /* MW 4 */ + 12799 "00111001" // /* MW 3 */ + 12800 "10100100" // /* MW 2 */ + 12801 "11000101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 243 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 248 72 first + 12802 "01011100" // ST r22, [p7], #4; SUB r22, r7, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12803 "11000011" // /* MW 5 */ + 12804 "11011011" // /* MW 4 */ + 12805 "00110011" // /* MW 3 */ + 12806 "11011010" // /* MW 2 */ + 12807 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 245 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 250 53 first + 12808 "01011100" // ST r18, [p7], #4; LSHL r16, r16, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12809 "01011011" // /* MW 5 */ + 12810 "01000011" // /* MW 4 */ + 12811 "00111000" // /* MW 3 */ + 12812 "11001010" // /* MW 2 */ + 12813 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 246 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 + 12814 "01011100" // ST r7, [p7], #4; LSHL r31, r19, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12815 "01011011" // /* MW 5 */ + 12816 "11111100" // /* MW 4 */ + 12817 "00111001" // /* MW 3 */ + 12818 "10011110" // /* MW 2 */ + 12819 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 247 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 248 72 first + 12820 "01011100" // ST r19, [p7], #4; ADD r22, r29, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12821 "11000001" // /* MW 5 */ + 12822 "11011010" // /* MW 4 */ + 12823 "00111110" // /* MW 3 */ + 12824 "11001110" // /* MW 2 */ + 12825 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 first +.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 first + 12826 "10100100" // ADD r16, r7, r16; ADD.NC r29, r31, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12827 "11110010" // /* MW 5 */ + 12828 "10111111" // /* MW 4 */ + 12829 "00011110" // /* MW 3 */ + 12830 "00100000" // /* MW 2 */ + 12831 "00111100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 248 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 + 12832 "01011100" // ST r22, [p7], #4; SUB r16, r16, r29 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12833 "10100011" // /* MW 5 */ + 12834 "01000011" // /* MW 4 */ + 12835 "00111000" // /* MW 3 */ + 12836 "11011010" // /* MW 2 */ + 12837 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 249 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 255 73 first +.src_ref 2 "conv2d_dw_bf16_params.h" 258 116 +.src_ref 2 "conv2d_dw_bf16_params.h" 258 140 + 12838 "00111010" // ST r16, [p7], #4; LSHL r22, r15, r26; MOV r16, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12839 "01011001" // /* MW 9 */ + 12840 "11111111" // /* MW 8 */ + 12841 "00001111" // /* MW 7 */ + 12842 "01101110" // /* MW 6 */ + 12843 "01101101" // /* MW 5 */ + 12844 "00011111" // /* MW 4 */ + 12845 "00110000" // /* MW 3 */ + 12846 "11000010" // /* MW 2 */ + 12847 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 252 43 first +.src_ref 2 "conv2d_dw_bf16_params.h" 253 60 first + 12848 "01011100" // ST r18, [p7], #4; ADD r26, r28, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12849 "10000001" // /* MW 5 */ + 12850 "01101010" // /* MW 4 */ + 12851 "00111110" // /* MW 3 */ + 12852 "11001010" // /* MW 2 */ + 12853 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 253 43 +.src_ref 2 "conv2d_dw_bf16_params.h" 255 73 first + 12854 "01011100" // ST r26, [p7], #4; SUB r20, r20, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12855 "11000011" // /* MW 5 */ + 12856 "01010010" // /* MW 4 */ + 12857 "00111010" // /* MW 3 */ + 12858 "11101010" // /* MW 2 */ + 12859 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 254 43 first +.src_ref 2 "conv2d_dw_bf16_params.h" 255 73 +.src_ref 2 "conv2d_dw_bf16_params.h" 256 43 +.src_ref 2 "conv2d_dw_bf16_params.h" 258 116 first +.src_ref 2 "conv2d_dw_bf16_params.h" 258 140 first +.src_ref 2 "conv2d_dw_bf16_params.h" 259 43 +.src_ref 2 "conv2d_dw_bf16_params.h" 263 41 + 12860 "01110110" // MOVA r17, #64; ST r19, [p7], #4; MAC r16, r16, r21, r17; ADD.NC r19, r20, #64 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12861 "00001000" // /* MW 11 */ + 12862 "00010000" // /* MW 10 */ + 12863 "01101101" // /* MW 9 */ + 12864 "10110010" // /* MW 8 */ + 12865 "00001000" // /* MW 7 */ + 12866 "10101011" // /* MW 6 */ + 12867 "01110001" // /* MW 5 */ + 12868 "00011110" // /* MW 4 */ + 12869 "00000111" // /* MW 3 */ + 12870 "00010001" // /* MW 2 */ + 12871 "00001000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 255 43 first + 12872 "10011000" // ST r19, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12873 "01110001" // /* MW 3 */ + 12874 "00011110" // /* MW 2 */ + 12875 "00001111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 256 43 first +.src_ref 2 "conv2d_dw_bf16_params.h" 260 49 first + 12876 "01011100" // ST r17, [p7], #4; LSHL r20, r16, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12877 "11111011" // /* MW 5 */ + 12878 "01010010" // /* MW 4 */ + 12879 "00111000" // /* MW 3 */ + 12880 "11000110" // /* MW 2 */ + 12881 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 258 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 260 49 + 12882 "01011100" // ST r16, [p7], #4; SUB r16, r24, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12883 "10000011" // /* MW 5 */ + 12884 "01000010" // /* MW 4 */ + 12885 "00111100" // /* MW 3 */ + 12886 "11000010" // /* MW 2 */ + 12887 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 259 43 first +.src_ref 2 "conv2d_dw_bf16_params.h" 264 47 first + 12888 "01011100" // ST r17, [p7], #4; LSHL r20, r18, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12889 "11111011" // /* MW 5 */ + 12890 "01010010" // /* MW 4 */ + 12891 "00111001" // /* MW 3 */ + 12892 "11000110" // /* MW 2 */ + 12893 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 260 43 first +.src_ref 2 "conv2d_dw_bf16_params.h" 264 47 + 12894 "01011100" // ST r16, [p7], #4; SUB r16, r24, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12895 "10000011" // /* MW 5 */ + 12896 "01000010" // /* MW 4 */ + 12897 "00111100" // /* MW 3 */ + 12898 "11000010" // /* MW 2 */ + 12899 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 262 40 first + 12900 "10011000" // ST r18, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12901 "01010001" // /* MW 3 */ + 12902 "00011110" // /* MW 2 */ + 12903 "00001111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 263 41 first + 12904 "10011000" // ST r17, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12905 "00110001" // /* MW 3 */ + 12906 "00011110" // /* MW 2 */ + 12907 "00001111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 264 41 first + 12908 "10011000" // ST r16, [p7], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12909 "00010001" // /* MW 3 */ + 12910 "00001010" // /* MW 2 */ + 12911 "00001111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 54 first + 12912 "10011000" // LDA.u8 r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12913 "00001010" // /* MW 3 */ + 12914 "00000110" // /* MW 2 */ + 12915 "00000111" // /* MW 1 */ + 12916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12917 "00000000" // /* MW 1 */ + 12918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12919 "00000000" // /* MW 1 */ + 12920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12921 "00000000" // /* MW 1 */ + 12922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12923 "00000000" // /* MW 1 */ + 12924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12925 "00000000" // /* MW 1 */ + 12926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12927 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.src_ref 2 "conv2d_dw_bf16_params.h" 266 58 + 12928 "10000100" // JZ r16, #12960 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12960 delay_slots=5 */ + 12929 "00000001" // /* MW 5 */ + 12930 "00000000" // /* MW 4 */ + 12931 "01010000" // /* MW 3 */ + 12932 "00011001" // /* MW 2 */ + 12933 "10000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.delay_slot + 12934 "11111000" // MOV vaddSign0, crMCDEn /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12935 "01100000" // /* MW 3 */ + 12936 "00111011" // /* MW 2 */ + 12937 "00011001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.delay_slot + 12938 "01000100" // MOVXM r19, #-8454144 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12939 "00000000" // /* MW 5 */ + 12940 "10100000" // /* MW 4 */ + 12941 "00001001" // /* MW 3 */ + 12942 "01111111" // /* MW 2 */ + 12943 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12945 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12947 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12949 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 + 12950 "01111010" // NOPA; NOPS; MOVX r19, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12951 "00000001" // /* MW 9 */ + 12952 "00100110" // /* MW 8 */ + 12953 "00000000" // /* MW 7 */ + 12954 "00000000" // /* MW 6 */ + 12955 "01011011" // /* MW 5 */ + 12956 "00000001" // /* MW 4 */ + 12957 "11110000" // /* MW 3 */ + 12958 "00101100" // /* MW 2 */ + 12959 "00000000" // /* MW 1 */ +.label TGT_F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_608 +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.src_ref 2 "conv2d_dw_bf16_params.h" 267 + 12960 "10111010" // LDA lr, [sp, #-4]; MOVXM p0, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12961 "00010000" // /* MW 9 */ + 12962 "00110100" // /* MW 8 */ + 12963 "00110010" // /* MW 7 */ + 12964 "11110000" // /* MW 6 */ + 12965 "00000001" // /* MW 5 */ + 12966 "00000000" // /* MW 4 */ + 12967 "00100000" // /* MW 3 */ + 12968 "10000111" // /* MW 2 */ + 12969 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 + 12970 "11010100" // LDA.s8 r16, [p0]; VINSERT.32 x0, x0, #0, r19 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12971 "11100010" // /* MW 5 */ + 12972 "00000100" // /* MW 4 */ + 12973 "01010000" // /* MW 3 */ + 12974 "11000000" // /* MW 2 */ + 12975 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 39 + 12976 "01010100" // LDA p0, [sp, #-12]; MOV dj0, #186 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12977 "11101001" // /* MW 5 */ + 12978 "00000010" // /* MW 4 */ + 12979 "00100001" // /* MW 3 */ + 12980 "10000011" // /* MW 2 */ + 12981 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 + 12982 "11010100" // LDA r15, [sp, #-8]; VMOV bmll0, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12983 "00100101" // /* MW 5 */ + 12984 "00000001" // /* MW 4 */ + 12985 "00100000" // /* MW 3 */ + 12986 "00111110" // /* MW 2 */ + 12987 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 267 first + 12988 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12989 "00000001" // /* MW 5 */ + 12990 "00000000" // /* MW 4 */ + 12991 "00000000" // /* MW 3 */ + 12992 "11111000" // /* MW 2 */ + 12993 "11111111" // /* MW 1 */ + 12994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12995 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 12996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12997 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 39 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 12998 "00011000" // ST.s16 r16, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12999 "00010111" // /* MW 3 */ + 13000 "00000010" // /* MW 2 */ + 13001 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.src_ref 2 "conv2d_dw_bf16_params.h" 267 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13002 "11100100" // RET lr; MOV crRnd, r16 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 13003 "01000001" // /* MW 5 */ + 13004 "01110000" // /* MW 4 */ + 13005 "00001111" // /* MW 3 */ + 13006 "00000000" // /* MW 2 */ + 13007 "00000101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13008 "00011000" // VCONV.bf16.fp32 wl0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13009 "00010110" // /* MW 3 */ + 13010 "01000000" // /* MW 2 */ + 13011 "00001000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13012 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13013 "11000000" // /* MW 3 */ + 13014 "01100000" // /* MW 2 */ + 13015 "00011111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13016 "10111000" // VEXTRACT.16 r16, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13017 "00000001" // /* MW 3 */ + 13018 "00000001" // /* MW 2 */ + 13019 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13021 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh__end +.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_end0 + 13023 "00000000" // /* MW 1 */ +.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_begin0 +.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params +.function conv2d_dw<(unsigned char)'\x01', bfloat16, bfloat16, bfloat16, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::async, adf::addressing::linear, adf::margin<0U> > > _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params +.src_ref 1 "io_buffer_main.h" 125 12 +.src_ref 2 "conv2d_dw_bf16.h" 199 first +.function_start + 13024 "11111000" // MOV r17, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13025 "11000000" // /* MW 3 */ + 13026 "01010110" // /* MW 2 */ + 13027 "00011100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 2 "conv2d_dw_bf16.h" 204 82 + 13028 "01010100" // LDA p1, [p1]; MOV m7, #106 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13029 "10101001" // /* MW 5 */ + 13030 "00000001" // /* MW 4 */ + 13031 "11011110" // /* MW 3 */ + 13032 "10010011" // /* MW 2 */ + 13033 "00100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 12 +.src_ref 1 "io_buffer_main.h" 125 25 + 13034 "00010100" // LDA p0, [p0]; ADD.NC p3, r17, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13035 "00000010" // /* MW 5 */ + 13036 "11010001" // /* MW 4 */ + 13037 "11010110" // /* MW 3 */ + 13038 "10000011" // /* MW 2 */ + 13039 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 204 82 first + 13040 "10011000" // LDA.u8 r4, [p3], m7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13041 "10001010" // /* MW 3 */ + 13042 "11101000" // /* MW 2 */ + 13043 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 221 4 first + 13044 "10011000" // LDA dj2, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13045 "01000110" // /* MW 3 */ + 13046 "11111101" // /* MW 2 */ + 13047 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 221 4 + 13048 "10011000" // LDA dn2, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13049 "00100110" // /* MW 3 */ + 13050 "00111101" // /* MW 2 */ + 13051 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 221 4 + 13052 "10011000" // LDA dj6, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13053 "01000110" // /* MW 3 */ + 13054 "11111111" // /* MW 2 */ + 13055 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 221 4 + 13056 "10011000" // LDA dn6, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13057 "00100110" // /* MW 3 */ + 13058 "00101111" // /* MW 2 */ + 13059 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 221 4 + 13060 "10011000" // LDA m2, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13061 "00000110" // /* MW 3 */ + 13062 "00101101" // /* MW 2 */ + 13063 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 222 4 first + 13064 "10011000" // LDA dj0, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13065 "01000110" // /* MW 3 */ + 13066 "11111100" // /* MW 2 */ + 13067 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 222 4 + 13068 "10011000" // LDA dn0, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13069 "00100110" // /* MW 3 */ + 13070 "00111100" // /* MW 2 */ + 13071 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 222 4 + 13072 "10011000" // LDA dj4, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13073 "01000110" // /* MW 3 */ + 13074 "11111110" // /* MW 2 */ + 13075 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 222 4 + 13076 "10011000" // LDA dn4, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13077 "00100110" // /* MW 3 */ + 13078 "00101110" // /* MW 2 */ + 13079 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 222 4 + 13080 "10011000" // LDA m0, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13081 "00000110" // /* MW 3 */ + 13082 "00101100" // /* MW 2 */ + 13083 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 223 4 first + 13084 "10011000" // LDA dj1, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13085 "11000110" // /* MW 3 */ + 13086 "11111100" // /* MW 2 */ + 13087 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 223 4 + 13088 "10011000" // LDA dn1, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13089 "10100110" // /* MW 3 */ + 13090 "00111100" // /* MW 2 */ + 13091 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 223 4 + 13092 "10011000" // LDA dj5, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13093 "11000110" // /* MW 3 */ + 13094 "11111110" // /* MW 2 */ + 13095 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 223 4 + 13096 "10011000" // LDA dn5, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13097 "10100110" // /* MW 3 */ + 13098 "00101110" // /* MW 2 */ + 13099 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 223 4 + 13100 "10011000" // LDA m1, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13101 "10000110" // /* MW 3 */ + 13102 "00101100" // /* MW 2 */ + 13103 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 224 4 first + 13104 "10011000" // LDA dj7, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13105 "11000110" // /* MW 3 */ + 13106 "11111111" // /* MW 2 */ + 13107 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 224 4 + 13108 "10011000" // LDA dn7, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13109 "10100110" // /* MW 3 */ + 13110 "00101111" // /* MW 2 */ + 13111 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 224 4 +.src_ref 2 "conv2d_dw_bf16.h" 244 56 + 13112 "10111010" // LDA m7, [p3], #8; MOVXM p4, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13113 "00010000" // /* MW 9 */ + 13114 "00110100" // /* MW 8 */ + 13115 "00110010" // /* MW 7 */ + 13116 "11110010" // /* MW 6 */ + 13117 "00000001" // /* MW 5 */ + 13118 "00000000" // /* MW 4 */ + 13119 "11010000" // /* MW 3 */ + 13120 "11110000" // /* MW 2 */ + 13121 "01100101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_dw_bf16.h" 244 56 first + 13122 "11010100" // LDA.s8 r6, [p4]; MOV p4, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13123 "10000001" // /* MW 5 */ + 13124 "11000101" // /* MW 4 */ + 13125 "01011000" // /* MW 3 */ + 13126 "10011000" // /* MW 2 */ + 13127 "10000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 225 4 + 13128 "10111000" // MOV m3, #-120 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13129 "00010000" // /* MW 3 */ + 13130 "00001111" // /* MW 2 */ + 13131 "00011011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 6 "aie_core.h" 81 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_dw_bf16.h" 204 43 + 13132 "10110110" // VLDA.CONV.fp32.bf16 cml0, [p4];VLDB x6, [p0], #64; MOVX r2, #3; MOV dc4, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 13133 "01011000" // /* MW 11 */ + 13134 "00000000" // /* MW 10 */ + 13135 "01100000" // /* MW 9 */ + 13136 "01101010" // /* MW 8 */ + 13137 "00100000" // /* MW 7 */ + 13138 "00000000" // /* MW 6 */ + 13139 "01101000" // /* MW 5 */ + 13140 "00111011" // /* MW 4 */ + 13141 "01110000" // /* MW 3 */ + 13142 "10000101" // /* MW 2 */ + 13143 "10000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 6 "aie_core.h" 81 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 2 "conv2d_dw_bf16.h" 204 43 first +.src_ref 2 "conv2d_dw_bf16.h" 225 4 first +.src_ref 2 "conv2d_dw_bf16.h" 244 56 + 13144 "01111110" // LDA dj3, [p3], #-4; VLDB x1, [p0], #64; MOVS dc3, dc4; LSHL r2, r4, r2; MOV m6, #128 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 13145 "01100000" // /* MW 13 */ + 13146 "00001001" // /* MW 12 */ + 13147 "01100010" // /* MW 11 */ + 13148 "00001011" // /* MW 10 */ + 13149 "00010000" // /* MW 9 */ + 13150 "11100000" // /* MW 8 */ + 13151 "00101101" // /* MW 7 */ + 13152 "00000100" // /* MW 6 */ + 13153 "11101001" // /* MW 5 */ + 13154 "00111000" // /* MW 4 */ + 13155 "11010000" // /* MW 3 */ + 13156 "10111000" // /* MW 2 */ + 13157 "01111111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 2 "conv2d_dw_bf16.h" 204 43 +.src_ref 2 "conv2d_dw_bf16.h" 225 4 + 13158 "10111010" // LDA dn3, [p3], #8; MOVS dc1, dc3; MOV m5, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13159 "01110010" // /* MW 9 */ + 13160 "10010000" // /* MW 8 */ + 13161 "10000000" // /* MW 7 */ + 13162 "00000010" // /* MW 6 */ + 13163 "01001011" // /* MW 5 */ + 13164 "00001100" // /* MW 4 */ + 13165 "11010001" // /* MW 3 */ + 13166 "10110100" // /* MW 2 */ + 13167 "01100101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "conv2d_dw_bf16.h" 204 43 +.src_ref 2 "conv2d_dw_bf16.h" 225 4 + 13168 "10111010" // LDA m3, [p3], m3; PADDB [p1], m5; MOV dc7, dc1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13169 "01111110" // /* MW 9 */ + 13170 "11000000" // /* MW 8 */ + 13171 "11100001" // /* MW 7 */ + 13172 "00000011" // /* MW 6 */ + 13173 "10010000" // /* MW 5 */ + 13174 "10101011" // /* MW 4 */ + 13175 "11010001" // /* MW 3 */ + 13176 "00110000" // /* MW 2 */ + 13177 "01101101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "conv2d_dw_bf16.h" 244 56 +.src_ref 2 "conv2d_dw_bf16.h" 244 56 first + 13178 "10111010" // LDA r2, [p3], m6; VLDB.2D x3, [p1], d7; MOV m4, #-112 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13179 "01011110" // /* MW 9 */ + 13180 "10010000" // /* MW 8 */ + 13181 "00000111" // /* MW 7 */ + 13182 "00000010" // /* MW 6 */ + 13183 "11110100" // /* MW 5 */ + 13184 "11110000" // /* MW 4 */ + 13185 "11010001" // /* MW 3 */ + 13186 "00001010" // /* MW 2 */ + 13187 "01111001" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 2 "conv2d_dw_bf16.h" 244 56 + 13188 "00101100" // LDA.s16 r7, [p3], m4; MOVX r0, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13189 "10000010" // /* MW 5 */ + 13190 "00000000" // /* MW 4 */ + 13191 "01010000" // /* MW 3 */ + 13192 "00011110" // /* MW 2 */ + 13193 "01110001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "conv2d_dw_bf16.h" 250 8 first + 13194 "01110110" // LDA m4, [p3], #16; MOVS dc6, dc4; MOVXM ls, #13296 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 13195 "00010000" // /* MW 11 */ + 13196 "11111000" // /* MW 10 */ + 13197 "01111001" // /* MW 9 */ + 13198 "00001100" // /* MW 8 */ + 13199 "00000000" // /* MW 7 */ + 13200 "00000000" // /* MW 6 */ + 13201 "01001011" // /* MW 5 */ + 13202 "00010000" // /* MW 4 */ + 13203 "11010110" // /* MW 3 */ + 13204 "11000000" // /* MW 2 */ + 13205 "01101001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "conv2d_dw_bf16.h" 244 56 first +.src_ref 2 "conv2d_dw_bf16.h" 250 8 + 13206 "01110110" // LDA r4, [p3, #-28]; MOVS dc2, dc4; MOVXM le, #13392 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 13207 "00010000" // /* MW 11 */ + 13208 "00101000" // /* MW 10 */ + 13209 "10111010" // /* MW 9 */ + 13210 "00001101" // /* MW 8 */ + 13211 "00000000" // /* MW 7 */ + 13212 "00000000" // /* MW 6 */ + 13213 "01001011" // /* MW 5 */ + 13214 "00010000" // /* MW 4 */ + 13215 "11010010" // /* MW 3 */ + 13216 "10010010" // /* MW 2 */ + 13217 "01110010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 142 18 first + 13218 "10110100" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13219 "00000101" // /* MW 5 */ + 13220 "01100001" // /* MW 4 */ + 13221 "10000100" // /* MW 3 */ + 13222 "00010110" // /* MW 2 */ + 13223 "00001011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 270 12 + 13224 "11111000" // VMOV cml3, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13225 "10001010" // /* MW 3 */ + 13226 "00000000" // /* MW 2 */ + 13227 "00011011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 244 4 + 13228 "10111010" // LDA r5, [p3]; MOVXM p3, #13456 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13229 "00010000" // /* MW 9 */ + 13230 "01001000" // /* MW 8 */ + 13231 "10110010" // /* MW 7 */ + 13232 "00001101" // /* MW 6 */ + 13233 "00000000" // /* MW 5 */ + 13234 "00000000" // /* MW 4 */ + 13235 "11010000" // /* MW 3 */ + 13236 "10010110" // /* MW 2 */ + 13237 "01100000" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 2 "conv2d_dw_bf16.h" 268 12 first + 13238 "10111010" // NOPA; MOVX r1, #32; VEXTBCST.128 x10, x3, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13239 "10101000" // /* MW 9 */ + 13240 "00000001" // /* MW 8 */ + 13241 "10001110" // /* MW 7 */ + 13242 "00001010" // /* MW 6 */ + 13243 "00010100" // /* MW 5 */ + 13244 "00000000" // /* MW 4 */ + 13245 "11110000" // /* MW 3 */ + 13246 "00101100" // /* MW 2 */ + 13247 "00000000" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 2 "conv2d_dw_bf16.h" 270 12 +.src_ref 2 "conv2d_dw_bf16.h" 271 12 +.src_ref 2 "conv2d_dw_bf16.h" 272 12 +.src_ref 2 "conv2d_dw_bf16.h" 273 12 +.src_ref 2 "conv2d_dw_bf16.h" 274 12 +.src_ref 2 "conv2d_dw_bf16.h" 275 12 +.src_ref 2 "conv2d_dw_bf16.h" 276 12 +.src_ref 2 "conv2d_dw_bf16.h" 277 12 + 13248 "11100001" // MOVA r17, #60; NOPB; NOPS; MOVX r3, #48; VBCST.16 x0, r7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13249 "00000000" // /* MW 15 */ + 13250 "00000000" // /* MW 14 */ + 13251 "01111000" // /* MW 13 */ + 13252 "10111001" // /* MW 12 */ + 13253 "00001110" // /* MW 11 */ + 13254 "00001000" // /* MW 10 */ + 13255 "00110110" // /* MW 9 */ + 13256 "00000000" // /* MW 8 */ + 13257 "01011011" // /* MW 7 */ + 13258 "00000001" // /* MW 6 */ + 13259 "00100000" // /* MW 5 */ + 13260 "00000000" // /* MW 4 */ + 13261 "00000000" // /* MW 3 */ + 13262 "10010001" // /* MW 2 */ + 13263 "00000111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_dw_bf16.h" 265 12 first +.src_ref 2 "conv2d_dw_bf16.h" 270 12 first + 13264 "00001011" // NOPA; NOPB; MOVS dc0, dc4; MOVX crRnd, r6; VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13265 "01101010" // /* MW 15 */ + 13266 "01100011" // /* MW 14 */ + 13267 "10101100" // /* MW 13 */ + 13268 "00000011" // /* MW 12 */ + 13269 "00001110" // /* MW 11 */ + 13270 "00000010" // /* MW 10 */ + 13271 "11010100" // /* MW 9 */ + 13272 "00001101" // /* MW 8 */ + 13273 "01001011" // /* MW 7 */ + 13274 "00010000" // /* MW 6 */ + 13275 "00100000" // /* MW 5 */ + 13276 "00000000" // /* MW 4 */ + 13277 "11110000" // /* MW 3 */ + 13278 "00101100" // /* MW 2 */ + 13279 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 2 "conv2d_dw_bf16.h" 250 8 first +.src_ref 2 "conv2d_dw_bf16.h" 274 12 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 13280 "00001011" // LDA p2, [p2]; NOPB; MOVS dc5, dc4; ADD r2, r2, #-2; ADD.NC lc, r4, #-1; VMAC.f dm1, dm0, x1, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13281 "00011010" // /* MW 15 */ + 13282 "01001000" // /* MW 14 */ + 13283 "11001100" // /* MW 13 */ + 13284 "00111111" // /* MW 12 */ + 13285 "10111001" // /* MW 11 */ + 13286 "11011010" // /* MW 10 */ + 13287 "00101111" // /* MW 9 */ + 13288 "00000100" // /* MW 8 */ + 13289 "01001011" // /* MW 7 */ + 13290 "00010000" // /* MW 6 */ + 13291 "00100101" // /* MW 5 */ + 13292 "00000000" // /* MW 4 */ + 13293 "11010000" // /* MW 3 */ + 13294 "10100011" // /* MW 2 */ + 13295 "01000000" // /* MW 1 */ +.label ZLS_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_272 +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 142 18 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt +.loop_nesting 1 + 13296 "10111010" // VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13297 "01101110" // /* MW 9 */ + 13298 "10000001" // /* MW 8 */ + 13299 "10000100" // /* MW 7 */ + 13300 "00000010" // /* MW 6 */ + 13301 "11110100" // /* MW 5 */ + 13302 "11110000" // /* MW 4 */ + 13303 "01110001" // /* MW 3 */ + 13304 "10110011" // /* MW 2 */ + 13305 "00000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "conv2d_dw_bf16.h" 266 12 first +.src_ref 2 "conv2d_dw_bf16.h" 271 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13306 "01001010" // VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13307 "00000001" // /* MW 9 */ + 13308 "10001001" // /* MW 8 */ + 13309 "10001010" // /* MW 7 */ + 13310 "01000110" // /* MW 6 */ + 13311 "00001011" // /* MW 5 */ + 13312 "10011100" // /* MW 4 */ + 13313 "11101010" // /* MW 3 */ + 13314 "00111000" // /* MW 2 */ + 13315 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 275 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13316 "01001010" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13317 "00000001" // /* MW 9 */ + 13318 "00110101" // /* MW 8 */ + 13319 "10001001" // /* MW 7 */ + 13320 "11000110" // /* MW 6 */ + 13321 "10000110" // /* MW 5 */ + 13322 "00110000" // /* MW 4 */ + 13323 "01101010" // /* MW 3 */ + 13324 "10110001" // /* MW 2 */ + 13325 "00000000" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13326 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13327 "00000110" // /* MW 3 */ + 13328 "10001001" // /* MW 2 */ + 13329 "00011101" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 2 "conv2d_dw_bf16.h" 272 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13330 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13331 "10100001" // /* MW 7 */ + 13332 "01001000" // /* MW 6 */ + 13333 "10001100" // /* MW 5 */ + 13334 "11000110" // /* MW 4 */ + 13335 "10001110" // /* MW 3 */ + 13336 "10110000" // /* MW 2 */ + 13337 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 267 12 first +.src_ref 2 "conv2d_dw_bf16.h" 276 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13338 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13339 "10100001" // /* MW 7 */ + 13340 "00110110" // /* MW 6 */ + 13341 "10001010" // /* MW 5 */ + 13342 "01000110" // /* MW 4 */ + 13343 "00001111" // /* MW 3 */ + 13344 "10011100" // /* MW 2 */ + 13345 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13346 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13347 "00001110" // /* MW 3 */ + 13348 "10001001" // /* MW 2 */ + 13349 "00011101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 268 12 first +.src_ref 2 "conv2d_dw_bf16.h" 273 12 first + 13350 "01100010" // VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13351 "11100001" // /* MW 7 */ + 13352 "10010010" // /* MW 6 */ + 13353 "10001011" // /* MW 5 */ + 13354 "01000110" // /* MW 4 */ + 13355 "00000011" // /* MW 3 */ + 13356 "00011100" // /* MW 2 */ + 13357 "00000101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 265 12 first +.src_ref 2 "conv2d_dw_bf16.h" 277 12 first + 13358 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13359 "11100001" // /* MW 7 */ + 13360 "01010110" // /* MW 6 */ + 13361 "10001000" // /* MW 5 */ + 13362 "01000110" // /* MW 4 */ + 13363 "00000111" // /* MW 3 */ + 13364 "00011100" // /* MW 2 */ + 13365 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first + 13366 "10111010" // NOPA; NOPB; VSHIFT x4, x6, x1, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13367 "01101110" // /* MW 9 */ + 13368 "01000001" // /* MW 8 */ + 13369 "00011000" // /* MW 7 */ + 13370 "00000001" // /* MW 6 */ + 13371 "00010000" // /* MW 5 */ + 13372 "00000000" // /* MW 4 */ + 13373 "11110000" // /* MW 3 */ + 13374 "00101100" // /* MW 2 */ + 13375 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 270 12 first + 13376 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm4, dm3, x6, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13377 "01101010" // /* MW 15 */ + 13378 "01100011" // /* MW 14 */ + 13379 "01111100" // /* MW 13 */ + 13380 "10100101" // /* MW 12 */ + 13381 "00000001" // /* MW 11 */ + 13382 "00000000" // /* MW 10 */ + 13383 "00000000" // /* MW 9 */ + 13384 "00000000" // /* MW 8 */ + 13385 "01011011" // /* MW 7 */ + 13386 "00000001" // /* MW 6 */ + 13387 "00100000" // /* MW 5 */ + 13388 "00000000" // /* MW 4 */ + 13389 "11110000" // /* MW 3 */ + 13390 "00101100" // /* MW 2 */ + 13391 "00000000" // /* MW 1 */ +.label ZLE_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_368 +.src_ref 2 "conv2d_dw_bf16.h" 274 12 first +.end_of_loop +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 13392 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13393 "00011010" // /* MW 15 */ + 13394 "01001000" // /* MW 14 */ + 13395 "01111100" // /* MW 13 */ + 13396 "10100101" // /* MW 12 */ + 13397 "00000001" // /* MW 11 */ + 13398 "00000000" // /* MW 10 */ + 13399 "00000000" // /* MW 9 */ + 13400 "00000000" // /* MW 8 */ + 13401 "01011011" // /* MW 7 */ + 13402 "00000001" // /* MW 6 */ + 13403 "00100000" // /* MW 5 */ + 13404 "00000000" // /* MW 4 */ + 13405 "11110000" // /* MW 3 */ + 13406 "00101100" // /* MW 2 */ + 13407 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "shuffle.hpp" 142 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 13408 "10111010" // PADDA.3D [p0], d0; PADDB.2D [p4], d3; VSHIFT x10, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13409 "01101110" // /* MW 9 */ + 13410 "10000001" // /* MW 8 */ + 13411 "10000100" // /* MW 7 */ + 13412 "00000010" // /* MW 6 */ + 13413 "10010000" // /* MW 5 */ + 13414 "01110011" // /* MW 4 */ + 13415 "11110100" // /* MW 3 */ + 13416 "00001100" // /* MW 2 */ + 13417 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 266 12 first +.src_ref 2 "conv2d_dw_bf16.h" 271 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13418 "01100010" // VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13419 "00000001" // /* MW 7 */ + 13420 "10001001" // /* MW 6 */ + 13421 "10001010" // /* MW 5 */ + 13422 "01000110" // /* MW 4 */ + 13423 "00001011" // /* MW 3 */ + 13424 "10011100" // /* MW 2 */ + 13425 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 275 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13426 "01100010" // VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13427 "00000001" // /* MW 7 */ + 13428 "00110101" // /* MW 6 */ + 13429 "10001001" // /* MW 5 */ + 13430 "11000110" // /* MW 4 */ + 13431 "10000110" // /* MW 3 */ + 13432 "00110000" // /* MW 2 */ + 13433 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13434 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13435 "00000110" // /* MW 3 */ + 13436 "10001001" // /* MW 2 */ + 13437 "00011101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 267 12 first +.src_ref 2 "conv2d_dw_bf16.h" 272 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13438 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13439 "10100001" // /* MW 7 */ + 13440 "01001000" // /* MW 6 */ + 13441 "10001100" // /* MW 5 */ + 13442 "01000110" // /* MW 4 */ + 13443 "00001111" // /* MW 3 */ + 13444 "10011100" // /* MW 2 */ + 13445 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 276 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13446 "01001010" // NOPA; VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13447 "10100001" // /* MW 9 */ + 13448 "00110110" // /* MW 8 */ + 13449 "10001010" // /* MW 7 */ + 13450 "11000010" // /* MW 6 */ + 13451 "10001110" // /* MW 5 */ + 13452 "10110000" // /* MW 4 */ + 13453 "11110100" // /* MW 3 */ + 13454 "00101100" // /* MW 2 */ + 13455 "00000000" // /* MW 1 */ +.label TGT_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_432 +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 142 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 13456 "10110100" // VLDB.2D x3, [p1], d7; VSHIFT x11, x1, x2, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13457 "00011101" // /* MW 5 */ + 13458 "00010010" // /* MW 4 */ + 13459 "10001011" // /* MW 3 */ + 13460 "00011110" // /* MW 2 */ + 13461 "00111110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 250 8 first +.src_ref 2 "conv2d_dw_bf16.h" 273 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13462 "01011010" // MOVXM le, #13632; VMAC.f dm3, dm4, x9, x7, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13463 "11100001" // /* MW 9 */ + 13464 "10010010" // /* MW 8 */ + 13465 "10001011" // /* MW 7 */ + 13466 "00000010" // /* MW 6 */ + 13467 "01010100" // /* MW 5 */ + 13468 "10110111" // /* MW 4 */ + 13469 "00000001" // /* MW 3 */ + 13470 "00000000" // /* MW 2 */ + 13471 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_dw_bf16.h" 250 8 +.src_ref 2 "conv2d_dw_bf16.h" 277 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13472 "01000110" // VLDA.CONV.fp32.bf16 cml0, [p4]; MOVXM ls, #13552; VMAC.f dm0, dm2, x11, x7, r17 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 13473 "11100001" // /* MW 11 */ + 13474 "01010110" // /* MW 10 */ + 13475 "10001000" // /* MW 9 */ + 13476 "00000010" // /* MW 8 */ + 13477 "01001111" // /* MW 7 */ + 13478 "10001111" // /* MW 6 */ + 13479 "00000001" // /* MW 5 */ + 13480 "00000000" // /* MW 4 */ + 13481 "01110000" // /* MW 3 */ + 13482 "10000101" // /* MW 2 */ + 13483 "10000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 250 8 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13484 "10011000" // ADD.NC lc, r4, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13485 "01111111" // /* MW 3 */ + 13486 "01110010" // /* MW 2 */ + 13487 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13488 "10011000" // VLDA x6, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13489 "10011011" // /* MW 3 */ + 13490 "00011101" // /* MW 2 */ + 13491 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13492 "00011000" // VLDB x1, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13493 "01110100" // /* MW 3 */ + 13494 "00011100" // /* MW 2 */ + 13495 "00111000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13496 "00011000" // VLDB.3D x2, [p0], d2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13497 "10110100" // /* MW 3 */ + 13498 "01011000" // /* MW 2 */ + 13499 "00111000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1110 102 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13500 "00011000" // VCONV.bf16.fp32 x10, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13501 "10010110" // /* MW 3 */ + 13502 "00010001" // /* MW 2 */ + 13503 "00001101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1110 102 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13504 "00011000" // VCONV.bf16.fp32 x6, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13505 "00010110" // /* MW 3 */ + 13506 "00010000" // /* MW 2 */ + 13507 "00001011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13508 "11111000" // VMAX_LT.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13509 "01101100" // /* MW 3 */ + 13510 "01010000" // /* MW 2 */ + 13511 "00011100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 286 17 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13512 "01111000" // VSHUFFLE x10, x10, x6, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13513 "00010100" // /* MW 3 */ + 13514 "01010011" // /* MW 2 */ + 13515 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 2 "conv2d_dw_bf16.h" 285 16 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13516 "00000010" // VST x8, [p2], m4; VMAX_LT.bf16 x10, r16, x10, x0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13517 "01110000" // /* MW 7 */ + 13518 "00110110" // /* MW 6 */ + 13519 "10101000" // /* MW 5 */ + 13520 "00000010" // /* MW 4 */ + 13521 "01100000" // /* MW 3 */ + 13522 "01000010" // /* MW 2 */ + 13523 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 268 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13524 "01011000" // VEXTBCST.128 x10, x3, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13525 "00000011" // /* MW 3 */ + 13526 "00011100" // /* MW 2 */ + 13527 "00011101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 2 "conv2d_dw_bf16.h" 270 12 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13528 "00000010" // VST.3D x10, [p2], d1; VMOV cml3, cml0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13529 "01110000" // /* MW 7 */ + 13530 "01000101" // /* MW 6 */ + 13531 "10000000" // /* MW 5 */ + 13532 "00000001" // /* MW 4 */ + 13533 "01100000" // /* MW 3 */ + 13534 "01010010" // /* MW 2 */ + 13535 "01000111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 265 12 first +.src_ref 2 "conv2d_dw_bf16.h" 270 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13536 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13537 "01000001" // /* MW 7 */ + 13538 "01101101" // /* MW 6 */ + 13539 "10001100" // /* MW 5 */ + 13540 "01000110" // /* MW 4 */ + 13541 "00000111" // /* MW 3 */ + 13542 "00011100" // /* MW 2 */ + 13543 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 274 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13544 "01100010" // VSHIFT x4, x6, x1, r0; VMAC.f dm1, dm0, x1, x10, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13545 "01000001" // /* MW 7 */ + 13546 "00000011" // /* MW 6 */ + 13547 "10001001" // /* MW 5 */ + 13548 "11000110" // /* MW 4 */ + 13549 "10000010" // /* MW 3 */ + 13550 "00110000" // /* MW 2 */ + 13551 "00000010" // /* MW 1 */ +.label ZLS_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_528 +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 142 18 +.begin_of_loop +.aggressive_scheduled_block_id 2 +.noswbrkpt +.loop_nesting 2 + 13552 "10111010" // VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13553 "01101110" // /* MW 9 */ + 13554 "10000001" // /* MW 8 */ + 13555 "10000100" // /* MW 7 */ + 13556 "00000010" // /* MW 6 */ + 13557 "11110100" // /* MW 5 */ + 13558 "11110000" // /* MW 4 */ + 13559 "01110001" // /* MW 3 */ + 13560 "10110011" // /* MW 2 */ + 13561 "00000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "conv2d_dw_bf16.h" 266 12 first +.src_ref 2 "conv2d_dw_bf16.h" 271 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13562 "01001010" // VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13563 "00000001" // /* MW 9 */ + 13564 "10001001" // /* MW 8 */ + 13565 "10001010" // /* MW 7 */ + 13566 "01000110" // /* MW 6 */ + 13567 "00001011" // /* MW 5 */ + 13568 "10011100" // /* MW 4 */ + 13569 "11101010" // /* MW 3 */ + 13570 "00111000" // /* MW 2 */ + 13571 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 275 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13572 "01001010" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13573 "00000001" // /* MW 9 */ + 13574 "00110101" // /* MW 8 */ + 13575 "10001001" // /* MW 7 */ + 13576 "11000110" // /* MW 6 */ + 13577 "10000110" // /* MW 5 */ + 13578 "00110000" // /* MW 4 */ + 13579 "01101010" // /* MW 3 */ + 13580 "10110001" // /* MW 2 */ + 13581 "00000000" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13582 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13583 "00000110" // /* MW 3 */ + 13584 "10001001" // /* MW 2 */ + 13585 "00011101" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 2 "conv2d_dw_bf16.h" 272 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13586 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13587 "10100001" // /* MW 7 */ + 13588 "01001000" // /* MW 6 */ + 13589 "10001100" // /* MW 5 */ + 13590 "11000110" // /* MW 4 */ + 13591 "10001110" // /* MW 3 */ + 13592 "10110000" // /* MW 2 */ + 13593 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 267 12 first +.src_ref 2 "conv2d_dw_bf16.h" 276 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13594 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13595 "10100001" // /* MW 7 */ + 13596 "00110110" // /* MW 6 */ + 13597 "10001010" // /* MW 5 */ + 13598 "01000110" // /* MW 4 */ + 13599 "00001111" // /* MW 3 */ + 13600 "10011100" // /* MW 2 */ + 13601 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13602 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13603 "00001110" // /* MW 3 */ + 13604 "10001001" // /* MW 2 */ + 13605 "00011101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 268 12 first +.src_ref 2 "conv2d_dw_bf16.h" 273 12 first + 13606 "01100010" // VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13607 "11100001" // /* MW 7 */ + 13608 "10010010" // /* MW 6 */ + 13609 "10001011" // /* MW 5 */ + 13610 "01000110" // /* MW 4 */ + 13611 "00000011" // /* MW 3 */ + 13612 "00011100" // /* MW 2 */ + 13613 "00000101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 265 12 first +.src_ref 2 "conv2d_dw_bf16.h" 277 12 first + 13614 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13615 "11100001" // /* MW 7 */ + 13616 "01010110" // /* MW 6 */ + 13617 "10001000" // /* MW 5 */ + 13618 "01000110" // /* MW 4 */ + 13619 "00000111" // /* MW 3 */ + 13620 "00011100" // /* MW 2 */ + 13621 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first + 13622 "10010100" // NOPA; VSHIFT x4, x6, x1, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13623 "00000101" // /* MW 5 */ + 13624 "01100001" // /* MW 4 */ + 13625 "11110100" // /* MW 3 */ + 13626 "00101100" // /* MW 2 */ + 13627 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 270 12 first + 13628 "01001000" // VMAC.f dm4, dm3, x6, x10, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13629 "01000001" // /* MW 3 */ + 13630 "01101101" // /* MW 2 */ + 13631 "10001100" // /* MW 1 */ +.label ZLE_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_608 +.src_ref 2 "conv2d_dw_bf16.h" 274 12 first +.end_of_loop +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 13632 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13633 "00011010" // /* MW 15 */ + 13634 "01001000" // /* MW 14 */ + 13635 "01111100" // /* MW 13 */ + 13636 "10100101" // /* MW 12 */ + 13637 "00000001" // /* MW 11 */ + 13638 "00000000" // /* MW 10 */ + 13639 "00000000" // /* MW 9 */ + 13640 "00000000" // /* MW 8 */ + 13641 "01011011" // /* MW 7 */ + 13642 "00000001" // /* MW 6 */ + 13643 "00100000" // /* MW 5 */ + 13644 "00000000" // /* MW 4 */ + 13645 "11110000" // /* MW 3 */ + 13646 "00101100" // /* MW 2 */ + 13647 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 244 4 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 13648 "10110110" // PADDA.3D [p0], d0; PADDB.2D [p4], d3; JNZD r2, r2, p3; VSHIFT x10, x1, x2, r0 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 13649 "01101000" // /* MW 11 */ + 13650 "10000001" // /* MW 10 */ + 13651 "10000100" // /* MW 9 */ + 13652 "00000010" // /* MW 8 */ + 13653 "00100111" // /* MW 7 */ + 13654 "00000100" // /* MW 6 */ + 13655 "00100000" // /* MW 5 */ + 13656 "11100111" // /* MW 4 */ + 13657 "11111000" // /* MW 3 */ + 13658 "00001100" // /* MW 2 */ + 13659 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 266 12 first +.src_ref 2 "conv2d_dw_bf16.h" 271 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13660 "01100010" // VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13661 "00000001" // /* MW 7 */ + 13662 "10001001" // /* MW 6 */ + 13663 "10001010" // /* MW 5 */ + 13664 "01000110" // /* MW 4 */ + 13665 "00001011" // /* MW 3 */ + 13666 "10011100" // /* MW 2 */ + 13667 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 275 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13668 "01100010" // VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13669 "00000001" // /* MW 7 */ + 13670 "00110101" // /* MW 6 */ + 13671 "10001001" // /* MW 5 */ + 13672 "11000110" // /* MW 4 */ + 13673 "10000110" // /* MW 3 */ + 13674 "00110000" // /* MW 2 */ + 13675 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13676 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13677 "00000110" // /* MW 3 */ + 13678 "10001001" // /* MW 2 */ + 13679 "00011101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 267 12 first +.src_ref 2 "conv2d_dw_bf16.h" 272 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13680 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13681 "10100001" // /* MW 7 */ + 13682 "01001000" // /* MW 6 */ + 13683 "10001100" // /* MW 5 */ + 13684 "01000110" // /* MW 4 */ + 13685 "00001111" // /* MW 3 */ + 13686 "10011100" // /* MW 2 */ + 13687 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 276 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13688 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13689 "10100001" // /* MW 7 */ + 13690 "00110110" // /* MW 6 */ + 13691 "10001010" // /* MW 5 */ + 13692 "11000110" // /* MW 4 */ + 13693 "10001110" // /* MW 3 */ + 13694 "10110000" // /* MW 2 */ + 13695 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 13696 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13697 "00001110" // /* MW 3 */ + 13698 "10001001" // /* MW 2 */ + 13699 "00011101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 273 12 first + 13700 "01001000" // VMAC.f dm3, dm4, x9, x7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13701 "11100001" // /* MW 3 */ + 13702 "10010010" // /* MW 2 */ + 13703 "10001011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 277 12 first + 13704 "01001000" // VMAC.f dm0, dm2, x11, x7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13705 "11100001" // /* MW 3 */ + 13706 "01010110" // /* MW 2 */ + 13707 "10001000" // /* MW 1 */ + 13708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13709 "00000000" // /* MW 1 */ + 13710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13711 "00000000" // /* MW 1 */ + 13712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13713 "00000000" // /* MW 1 */ + 13714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13715 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1110 102 first + 13716 "00011000" // VCONV.bf16.fp32 x10, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13717 "10010110" // /* MW 3 */ + 13718 "00010001" // /* MW 2 */ + 13719 "00001101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_dw_bf16.h" 290 first + 13720 "01011100" // VCONV.bf16.fp32 x6, cml0; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 13721 "00000000" // /* MW 5 */ + 13722 "01010000" // /* MW 4 */ + 13723 "11000000" // /* MW 3 */ + 13724 "00000010" // /* MW 2 */ + 13725 "01100010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 13726 "11111000" // VMAX_LT.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13727 "01101100" // /* MW 3 */ + 13728 "01010000" // /* MW 2 */ + 13729 "00011100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 286 17 first +.delay_slot + 13730 "01111000" // VSHUFFLE x10, x10, x6, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13731 "00010100" // /* MW 3 */ + 13732 "01010011" // /* MW 2 */ + 13733 "00011101" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 13734 "11111000" // VMAX_LT.bf16 x10, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13735 "01101100" // /* MW 3 */ + 13736 "01010000" // /* MW 2 */ + 13737 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 2 "conv2d_dw_bf16.h" 285 16 first +.delay_slot + 13738 "00011000" // VST x8, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13739 "00010011" // /* MW 3 */ + 13740 "10001010" // /* MW 2 */ + 13741 "00001010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1159 33 +.delay_slot + 13742 "00011000" // VST.3D x10, [p2], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13743 "10010011" // /* MW 3 */ + 13744 "00111010" // /* MW 2 */ +.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params__end +.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_end0 + 13745 "00001010" // /* MW 1 */ +.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_conv2d_dwc _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 7 "superkernels.cpp" 444 first +.src_ref 7 "superkernels.cpp" 449 6 +.function_start + 13760 "01000100" // MOVXM p4, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13761 "10000000" // /* MW 5 */ + 13762 "11001000" // /* MW 4 */ + 13763 "11001000" // /* MW 3 */ + 13764 "00000111" // /* MW 2 */ + 13765 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 449 6 first + 13766 "11010100" // LDA r16, [p4]; MOV r0, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13767 "01000001" // /* MW 5 */ + 13768 "00101111" // /* MW 4 */ + 13769 "11010000" // /* MW 3 */ + 13770 "11000010" // /* MW 2 */ + 13771 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 444 + 13772 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13773 "00000001" // /* MW 5 */ + 13774 "00000000" // /* MW 4 */ + 13775 "00000000" // /* MW 3 */ + 13776 "00010000" // /* MW 2 */ + 13777 "00000000" // /* MW 1 */ + 13778 "00000010" // ST r14, [sp, #-8]; MOV r17, CORE_ID /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13779 "01110000" // /* MW 7 */ + 13780 "01110000" // /* MW 6 */ + 13781 "00101101" // /* MW 5 */ + 13782 "00000010" // /* MW 4 */ + 13783 "10110000" // /* MW 3 */ + 13784 "00111010" // /* MW 2 */ + 13785 "11111111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 467 + 13786 "00000010" // ST r13, [sp, #-4]; MOV r13, lr /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13787 "01110000" // /* MW 7 */ + 13788 "11110000" // /* MW 6 */ + 13789 "10101000" // /* MW 5 */ + 13790 "00000001" // /* MW 4 */ + 13791 "10110000" // /* MW 3 */ + 13792 "10110110" // /* MW 2 */ + 13793 "11111111" // /* MW 1 */ + 13794 "10011000" // ST p0, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13795 "00011101" // /* MW 3 */ + 13796 "11101100" // /* MW 2 */ + 13797 "00001111" // /* MW 1 */ + 13798 "10011000" // ST p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13799 "10011101" // /* MW 3 */ + 13800 "11110111" // /* MW 2 */ + 13801 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 452 44 + 13802 "00000010" // ST r0, [sp, #-16]; MOV r14, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13803 "01110000" // /* MW 7 */ + 13804 "01100000" // /* MW 6 */ + 13805 "11001010" // /* MW 5 */ + 13806 "00000001" // /* MW 4 */ + 13807 "10110000" // /* MW 3 */ + 13808 "00000010" // /* MW 2 */ + 13809 "11111110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 449 6 +.src_ref 7 "superkernels.cpp" 449 16 + 13810 "10000100" // JNZ r16, #13936 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=13936 delay_slots=5 */ + 13811 "00000001" // /* MW 5 */ + 13812 "01000000" // /* MW 4 */ + 13813 "00111000" // /* MW 3 */ + 13814 "00011011" // /* MW 2 */ + 13815 "10000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 +.delay_slot + 13816 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13817 "11000000" // /* MW 3 */ + 13818 "11010110" // /* MW 2 */ + 13819 "00011011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 446 22 first +.delay_slot + 13820 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13821 "10010000" // /* MW 3 */ + 13822 "01100010" // /* MW 2 */ + 13823 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 446 30 +.delay_slot + 13824 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13825 "11111011" // /* MW 3 */ + 13826 "01100011" // /* MW 2 */ + 13827 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 446 11 +.delay_slot + 13828 "01000100" // MOVXM p3, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13829 "10100000" // /* MW 5 */ + 13830 "11001000" // /* MW 4 */ + 13831 "11000110" // /* MW 3 */ + 13832 "00000111" // /* MW 2 */ + 13833 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 446 11 +.delay_slot + 13834 "10011000" // ST r17, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13835 "00110001" // /* MW 3 */ + 13836 "00000110" // /* MW 2 */ + 13837 "00001011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 461 2 +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 13838 "00111010" // MOVS p7, p1; MOVXM p1, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13839 "00010001" // /* MW 9 */ + 13840 "00110100" // /* MW 8 */ + 13841 "10110010" // /* MW 7 */ + 13842 "11110000" // /* MW 6 */ + 13843 "00000001" // /* MW 5 */ + 13844 "00000000" // /* MW 4 */ + 13845 "01100000" // /* MW 3 */ + 13846 "10010001" // /* MW 2 */ + 13847 "11110000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 451 4 +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 13848 "01110110" // ST.s8 r16, [p1]; MOVS p0, p2; MOVXM p1, #509028 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 13849 "00010000" // /* MW 11 */ + 13850 "00110010" // /* MW 10 */ + 13851 "10110010" // /* MW 9 */ + 13852 "11110000" // /* MW 8 */ + 13853 "00000001" // /* MW 7 */ + 13854 "00000000" // /* MW 6 */ + 13855 "10001011" // /* MW 5 */ + 13856 "10001000" // /* MW 4 */ + 13857 "11100000" // /* MW 3 */ + 13858 "11000000" // /* MW 2 */ + 13859 "00100000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13861 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 451 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 13862 "00000100" // JL #12352 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12352 delay_slots=5 */ + 13863 "00000001" // /* MW 5 */ + 13864 "00000000" // /* MW 4 */ + 13865 "00100000" // /* MW 3 */ + 13866 "00011000" // /* MW 2 */ + 13867 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13869 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13871 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13872 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13873 "00110001" // /* MW 3 */ + 13874 "00100000" // /* MW 2 */ + 13875 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 13876 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13877 "00000101" // /* MW 3 */ + 13878 "00100000" // /* MW 2 */ + 13879 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 13880 "00000010" // ST r16, [p1]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13881 "01110000" // /* MW 7 */ + 13882 "10100101" // /* MW 6 */ + 13883 "00000001" // /* MW 5 */ + 13884 "00000000" // /* MW 4 */ + 13885 "00110000" // /* MW 3 */ + 13886 "11000010" // /* MW 2 */ + 13887 "00100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 452 44 +.src_ref 7 "superkernels.cpp" 461 2 +.return_address + 13888 "00000010" // MOVS p1, p7; ADD.NC p2, r14, #8 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13889 "00000000" // /* MW 7 */ + 13890 "10000010" // /* MW 6 */ + 13891 "00110011" // /* MW 5 */ + 13892 "00000001" // /* MW 4 */ + 13893 "01100000" // /* MW 3 */ + 13894 "10010001" // /* MW 2 */ + 13895 "00110011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 452 17 first + 13896 "10011000" // LDA.u16 r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13897 "00111010" // /* MW 3 */ + 13898 "00000110" // /* MW 2 */ + 13899 "00000010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 453 13 +.src_ref 7 "superkernels.cpp" 453 15 first + 13900 "10111010" // LDA.u16 r16, [p2, #4]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13901 "00010000" // /* MW 9 */ + 13902 "00110000" // /* MW 8 */ + 13903 "00110010" // /* MW 7 */ + 13904 "11110001" // /* MW 6 */ + 13905 "00000001" // /* MW 5 */ + 13906 "00000000" // /* MW 4 */ + 13907 "01010000" // /* MW 3 */ + 13908 "11000011" // /* MW 2 */ + 13909 "01000100" // /* MW 1 */ + 13910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13911 "00000000" // /* MW 1 */ + 13912 "10000100" // J #13952 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=13952 delay_slots=5 */ + 13913 "00000000" // /* MW 5 */ + 13914 "00000000" // /* MW 4 */ + 13915 "01000000" // /* MW 3 */ + 13916 "00011011" // /* MW 2 */ + 13917 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 452 15 +.src_ref 7 "superkernels.cpp" 457 26 +.delay_slot + 13918 "01000100" // MOVXM p3, #509016 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13919 "10110000" // /* MW 5 */ + 13920 "11001000" // /* MW 4 */ + 13921 "11000110" // /* MW 3 */ + 13922 "00000111" // /* MW 2 */ + 13923 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13925 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13927 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 452 15 first +.delay_slot + 13928 "10011000" // ST r17, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13929 "00110001" // /* MW 3 */ + 13930 "00000110" // /* MW 2 */ + 13931 "00001011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 453 13 first +.delay_slot + 13932 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13933 "00010001" // /* MW 3 */ + 13934 "00000110" // /* MW 2 */ + 13935 "00001010" // /* MW 1 */ +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_176 +.src_ref 7 "superkernels.cpp" 457 26 + 13936 "11100001" // NOPA; NOPB; NOPS; MOVXM p3, #509016; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13937 "00000000" // /* MW 15 */ + 13938 "00000000" // /* MW 14 */ + 13939 "00010000" // /* MW 13 */ + 13940 "00101100" // /* MW 12 */ + 13941 "10110010" // /* MW 11 */ + 13942 "11110001" // /* MW 10 */ + 13943 "00000001" // /* MW 9 */ + 13944 "00000000" // /* MW 8 */ + 13945 "01011011" // /* MW 7 */ + 13946 "00000001" // /* MW 6 */ + 13947 "00100000" // /* MW 5 */ + 13948 "00000000" // /* MW 4 */ + 13949 "11110000" // /* MW 3 */ + 13950 "00101100" // /* MW 2 */ + 13951 "00000000" // /* MW 1 */ +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_192 +.src_ref 1 "io_buffer_main.h" 218 49 first + 13952 "00011000" // ADD.NC p2, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13953 "10000110" // /* MW 3 */ + 13954 "01100111" // /* MW 2 */ + 13955 "00011010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 457 15 +.src_ref 1 "io_buffer_main.h" 218 49 + 13956 "10111010" // LDA r27, [p2], #-4; MOVXM p4, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13957 "00010000" // /* MW 9 */ + 13958 "00101000" // /* MW 8 */ + 13959 "00110010" // /* MW 7 */ + 13960 "11110010" // /* MW 6 */ + 13961 "00000001" // /* MW 5 */ + 13962 "00000000" // /* MW 4 */ + 13963 "11010000" // /* MW 3 */ + 13964 "11101110" // /* MW 2 */ + 13965 "01011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 13966 "10011000" // LDA r16, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13967 "00010110" // /* MW 3 */ + 13968 "11111110" // /* MW 2 */ + 13969 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 13970 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13971 "00110110" // /* MW 3 */ + 13972 "11111110" // /* MW 2 */ + 13973 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 13974 "10011000" // LDA r18, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13975 "01010110" // /* MW 3 */ + 13976 "01000110" // /* MW 2 */ + 13977 "00000010" // /* MW 1 */ + 13978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13979 "00000000" // /* MW 1 */ + 13980 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13981 "00000000" // /* MW 1 */ + 13982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13983 "00000000" // /* MW 1 */ + 13984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13985 "00000000" // /* MW 1 */ + 13986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13987 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 13988 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13989 "00000010" // /* MW 3 */ + 13990 "01100001" // /* MW 2 */ + 13991 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 13992 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13993 "00010001" // /* MW 3 */ + 13994 "00000110" // /* MW 2 */ + 13995 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 + 13996 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13997 "11111101" // /* MW 3 */ + 13998 "11100000" // /* MW 2 */ + 13999 "00010111" // /* MW 1 */ + 14000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14001 "00000000" // /* MW 1 */ + 14002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14003 "00000000" // /* MW 1 */ + 14004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14005 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 14006 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14007 "00001000" // /* MW 3 */ + 14008 "10010011" // /* MW 2 */ + 14009 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 456 11 +.src_ref 7 "superkernels.cpp" 459 47 +.src_ref 7 "superkernels.cpp" 464 6 +.src_ref 7 "superkernels.cpp" 465 16 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 + 14010 "10111010" // MOVA r15, #1; MOVXM p7, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14011 "00010000" // /* MW 9 */ + 14012 "00100000" // /* MW 8 */ + 14013 "10110010" // /* MW 7 */ + 14014 "11110011" // /* MW 6 */ + 14015 "00000001" // /* MW 5 */ + 14016 "00000000" // /* MW 4 */ + 14017 "00000000" // /* MW 3 */ + 14018 "00101111" // /* MW 2 */ + 14019 "00000000" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 52 16 + 14020 "11100100" // MOVX r24, #0; MOV r16, sp /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14021 "11000001" // /* MW 5 */ + 14022 "00101011" // /* MW 4 */ + 14023 "00101000" // /* MW 3 */ + 14024 "00000000" // /* MW 2 */ + 14025 "00000110" // /* MW 1 */ + 14026 "00011000" // ADD.NC p0, r16, #-76 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14027 "01011010" // /* MW 3 */ + 14028 "01101000" // /* MW 2 */ + 14029 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 1 "io_buffer_main.h" 324 51 + 14030 "11010100" // LDA p5, [sp, #-20]; MOV r14, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14031 "10000001" // /* MW 5 */ + 14032 "00101001" // /* MW 4 */ + 14033 "00100111" // /* MW 3 */ + 14034 "11010011" // /* MW 2 */ + 14035 "11111101" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 457 15 first + 14036 "10011000" // LDA r17, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14037 "00110110" // /* MW 3 */ + 14038 "00000110" // /* MW 2 */ + 14039 "00000100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 457 26 +.src_ref 7 "superkernels.cpp" 461 2 + 14040 "10111010" // LDA r16, [p3]; MOVXM p3, #509888 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14041 "00010000" // /* MW 9 */ + 14042 "11100000" // /* MW 8 */ + 14043 "10110011" // /* MW 7 */ + 14044 "11110001" // /* MW 6 */ + 14045 "00000001" // /* MW 5 */ + 14046 "00000000" // /* MW 4 */ + 14047 "11010000" // /* MW 3 */ + 14048 "11000010" // /* MW 2 */ + 14049 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 456 11 first + 14050 "10011000" // LDA r18, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14051 "01010110" // /* MW 3 */ + 14052 "00000110" // /* MW 2 */ + 14053 "00000111" // /* MW 1 */ + 14054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14055 "00000000" // /* MW 1 */ + 14056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14057 "00000000" // /* MW 1 */ + 14058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14059 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 14060 "10011000" // LDA r19, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14061 "01110110" // /* MW 3 */ + 14062 "00000110" // /* MW 2 */ + 14063 "00000101" // /* MW 1 */ + 14064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14065 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 457 24 first + 14066 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14067 "00001111" // /* MW 3 */ + 14068 "01100001" // /* MW 2 */ + 14069 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 456 11 first + 14070 "00011000" // ADD r17, r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14071 "00000111" // /* MW 3 */ + 14072 "10100010" // /* MW 2 */ + 14073 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 459 47 first + 14074 "10011000" // LSHL r16, r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14075 "11111101" // /* MW 3 */ + 14076 "00100000" // /* MW 2 */ + 14077 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 461 2 first +.no_stack_arguments + 14078 "00000100" // JL #13024 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=13024 delay_slots=5 */ + 14079 "00000001" // /* MW 5 */ + 14080 "00000000" // /* MW 4 */ + 14081 "01110000" // /* MW 3 */ + 14082 "00011001" // /* MW 2 */ + 14083 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 456 11 first +.delay_slot + 14084 "10011000" // ST r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14085 "00110001" // /* MW 3 */ + 14086 "00000110" // /* MW 2 */ + 14087 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 459 47 first +.delay_slot + 14088 "01011000" // ADD.NC dn0, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14089 "11000001" // /* MW 3 */ + 14090 "01001001" // /* MW 2 */ + 14091 "00011000" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 201 10 first +.delay_slot + 14092 "10011000" // ST dn0, [sp, #-76] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14093 "00100101" // /* MW 3 */ + 14094 "10110100" // /* MW 2 */ + 14095 "00001111" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 52 16 first +.delay_slot + 14096 "10011000" // ST r24, [sp, #-72] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14097 "00010101" // /* MW 3 */ + 14098 "10111011" // /* MW 2 */ + 14099 "00001111" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 52 16 +.delay_slot + 14100 "00110110" // NOPA; NOPB; ST r24, [sp, #-68]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 14101 "11000001" // /* MW 11 */ + 14102 "10001010" // /* MW 10 */ + 14103 "11011111" // /* MW 9 */ + 14104 "00000011" // /* MW 8 */ + 14105 "00000000" // /* MW 7 */ + 14106 "00000000" // /* MW 6 */ + 14107 "00100000" // /* MW 5 */ + 14108 "00000000" // /* MW 4 */ + 14109 "11110000" // /* MW 3 */ + 14110 "00101100" // /* MW 2 */ + 14111 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 +.return_address + 14112 "00011000" // ADD.NC p2, r14, #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14113 "00001010" // /* MW 3 */ + 14114 "01100111" // /* MW 2 */ + 14115 "00011010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 first + 14116 "10011000" // LDA r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14117 "00010110" // /* MW 3 */ + 14118 "00000110" // /* MW 2 */ + 14119 "00000010" // /* MW 1 */ + 14120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14121 "00000000" // /* MW 1 */ + 14122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14123 "00000000" // /* MW 1 */ + 14124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14125 "00000000" // /* MW 1 */ + 14126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14127 "00000000" // /* MW 1 */ + 14128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14129 "00000000" // /* MW 1 */ + 14130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14131 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 14132 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14133 "11111000" // /* MW 3 */ + 14134 "00010000" // /* MW 2 */ + 14135 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 464 19 +.src_ref 1 "io_buffer_main.h" 327 40 first + 14136 "10111010" // LDA r16, [p2, #-8]; MOVXM p1, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14137 "00010000" // /* MW 9 */ + 14138 "00110000" // /* MW 8 */ + 14139 "10110010" // /* MW 7 */ + 14140 "11110000" // /* MW 6 */ + 14141 "00000001" // /* MW 5 */ + 14142 "00000000" // /* MW 4 */ + 14143 "11010000" // /* MW 3 */ + 14144 "11000010" // /* MW 2 */ + 14145 "01011100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 464 19 first + 14146 "10011000" // LDA r18, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14147 "01010110" // /* MW 3 */ + 14148 "00000110" // /* MW 2 */ + 14149 "00000001" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 464 6 + 14150 "10011000" // LDA r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14151 "00110110" // /* MW 3 */ + 14152 "00000110" // /* MW 2 */ + 14153 "00000111" // /* MW 1 */ + 14154 "00011000" // LDA p1, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14155 "10011001" // /* MW 3 */ + 14156 "11110100" // /* MW 2 */ + 14157 "00000111" // /* MW 1 */ + 14158 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14159 "11010001" // /* MW 3 */ + 14160 "11111001" // /* MW 2 */ + 14161 "00000111" // /* MW 1 */ + 14162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14163 "00000000" // /* MW 1 */ + 14164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14165 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 14166 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14167 "00000001" // /* MW 3 */ + 14168 "11100001" // /* MW 2 */ + 14169 "00010011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 14170 "10011000" // ST r16, [p2, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14171 "00010001" // /* MW 3 */ + 14172 "11100110" // /* MW 2 */ + 14173 "00001010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 464 16 first + 14174 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14175 "00101000" // /* MW 3 */ + 14176 "01100001" // /* MW 2 */ + 14177 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 464 6 + 14178 "10000100" // JNZ r16, #14208 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14208 delay_slots=5 */ + 14179 "00000001" // /* MW 5 */ + 14180 "01000000" // /* MW 4 */ + 14181 "11000000" // /* MW 3 */ + 14182 "00011011" // /* MW 2 */ + 14183 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 465 16 +.delay_slot + 14184 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14185 "00000001" // /* MW 3 */ + 14186 "00110000" // /* MW 2 */ + 14187 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14189 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14191 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14193 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14195 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 465 16 first + 14196 "00110110" // NOPA; NOPB; ST r24, [p7]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 14197 "11000001" // /* MW 11 */ + 14198 "10001000" // /* MW 10 */ + 14199 "10000011" // /* MW 9 */ + 14200 "00000011" // /* MW 8 */ + 14201 "00000000" // /* MW 7 */ + 14202 "00000000" // /* MW 6 */ + 14203 "00100000" // /* MW 5 */ + 14204 "00000000" // /* MW 4 */ + 14205 "11110000" // /* MW 3 */ + 14206 "00101100" // /* MW 2 */ + 14207 "00000000" // /* MW 1 */ +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 7 "superkernels.cpp" 467 + 14208 "11010100" // LDA r13, [sp, #-4]; MOV lr, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14209 "01000001" // /* MW 5 */ + 14210 "11101101" // /* MW 4 */ + 14211 "00101110" // /* MW 3 */ + 14212 "10110110" // /* MW 2 */ + 14213 "11111111" // /* MW 1 */ + 14214 "00011000" // LDA r15, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14215 "11110001" // /* MW 3 */ + 14216 "11110001" // /* MW 2 */ + 14217 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 467 first + 14218 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 14219 "00000000" // /* MW 3 */ + 14220 "00101000" // /* MW 2 */ + 14221 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 467 +.delay_slot + 14222 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14223 "00000001" // /* MW 5 */ + 14224 "00000000" // /* MW 4 */ + 14225 "00000000" // /* MW 3 */ + 14226 "11110000" // /* MW 2 */ + 14227 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14229 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14231 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14233 "00000000" // /* MW 1 */ +.delay_slot + 14234 "11111000" // MOV p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14235 "11000000" // /* MW 3 */ + 14236 "01100010" // /* MW 2 */ +.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 14237 "00011111" // /* MW 1 */ +.label __Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE___func_begin0 +.label _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE +.function superkernel_conv_eltbinary _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE +.src_ref 7 "superkernels.cpp" 578 +.src_ref 7 "superkernels.cpp" 578 first +.function_start + 14240 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14241 "00000001" // /* MW 5 */ + 14242 "00000000" // /* MW 4 */ + 14243 "00000000" // /* MW 3 */ + 14244 "00001000" // /* MW 2 */ + 14245 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 583 6 + 14246 "00111010" // ST p7, [sp, #-8]; MOVXM p7, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14247 "00010001" // /* MW 9 */ + 14248 "00100000" // /* MW 8 */ + 14249 "10110010" // /* MW 7 */ + 14250 "11110011" // /* MW 6 */ + 14251 "00000001" // /* MW 5 */ + 14252 "00000000" // /* MW 4 */ + 14253 "10110000" // /* MW 3 */ + 14254 "01110011" // /* MW 2 */ + 14255 "11111111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 583 6 first + 14256 "10111010" // LDA r16, [p7]; ST p6, [sp, #-4]; MOV r17, CORE_ID /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14257 "01110010" // /* MW 9 */ + 14258 "01110000" // /* MW 8 */ + 14259 "00101101" // /* MW 7 */ + 14260 "10000010" // /* MW 6 */ + 14261 "00011101" // /* MW 5 */ + 14262 "11111111" // /* MW 4 */ + 14263 "11010111" // /* MW 3 */ + 14264 "11000010" // /* MW 2 */ + 14265 "11100000" // /* MW 1 */ + 14266 "10011000" // ST p4, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14267 "00011101" // /* MW 3 */ + 14268 "11110110" // /* MW 2 */ + 14269 "00001111" // /* MW 1 */ + 14270 "10011000" // ST p2, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14271 "00011101" // /* MW 3 */ + 14272 "11110001" // /* MW 2 */ + 14273 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 587 4 +.src_ref 7 "superkernels.cpp" 590 35 +.src_ref 7 "superkernels.cpp" 599 105 +.src_ref 7 "superkernels.cpp" 629 34 + 14274 "00000010" // ST lr, [sp, #-20]; MOV p7, p3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 14275 "01110000" // /* MW 7 */ + 14276 "01100000" // /* MW 6 */ + 14277 "10110011" // /* MW 5 */ + 14278 "00000011" // /* MW 4 */ + 14279 "10110000" // /* MW 3 */ + 14280 "10000111" // /* MW 2 */ + 14281 "11111101" // /* MW 1 */ + 14282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14283 "00000000" // /* MW 1 */ + 14284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14285 "00000000" // /* MW 1 */ + 14286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14287 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 583 6 +.src_ref 7 "superkernels.cpp" 583 16 + 14288 "10000100" // JNZ r16, #14688 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14688 delay_slots=5 */ + 14289 "00000001" // /* MW 5 */ + 14290 "01000000" // /* MW 4 */ + 14291 "10110000" // /* MW 3 */ + 14292 "00011100" // /* MW 2 */ + 14293 "10000000" // /* MW 1 */ +.delay_slot + 14294 "10011000" // ST p0, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14295 "00011101" // /* MW 3 */ + 14296 "11101000" // /* MW 2 */ + 14297 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 580 22 first +.delay_slot + 14298 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14299 "10010000" // /* MW 3 */ + 14300 "01100010" // /* MW 2 */ + 14301 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 580 30 +.delay_slot + 14302 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14303 "11111011" // /* MW 3 */ + 14304 "01100011" // /* MW 2 */ + 14305 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 580 11 +.delay_slot + 14306 "01000100" // MOVXM p6, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14307 "10100000" // /* MW 5 */ + 14308 "11001000" // /* MW 4 */ + 14309 "11001100" // /* MW 3 */ + 14310 "00000111" // /* MW 2 */ + 14311 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 580 11 +.delay_slot + 14312 "10011000" // ST r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14313 "00110001" // /* MW 3 */ + 14314 "00000110" // /* MW 2 */ + 14315 "00001110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 587 4 +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 14316 "10111010" // MOVA r0, #1; MOVXM p6, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14317 "00010000" // /* MW 9 */ + 14318 "00110100" // /* MW 8 */ + 14319 "00110010" // /* MW 7 */ + 14320 "11110011" // /* MW 6 */ + 14321 "00000001" // /* MW 5 */ + 14322 "00000000" // /* MW 4 */ + 14323 "00000000" // /* MW 3 */ + 14324 "00100000" // /* MW 2 */ + 14325 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 621 2 +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 14326 "01110110" // ST.s8 r16, [p6]; MOVS p6, p1; MOVXM p0, #509028 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 14327 "00010000" // /* MW 11 */ + 14328 "00110010" // /* MW 10 */ + 14329 "00110010" // /* MW 9 */ + 14330 "11110000" // /* MW 8 */ + 14331 "00000001" // /* MW 7 */ + 14332 "00000000" // /* MW 6 */ + 14333 "10001011" // /* MW 5 */ + 14334 "10000100" // /* MW 4 */ + 14335 "11100110" // /* MW 3 */ + 14336 "11000000" // /* MW 2 */ + 14337 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 587 4 +.src_ref 7 "superkernels.cpp" 587 4 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14338 "10111010" // MOVA r1, #0; MOVXM p1, #509440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14339 "00010000" // /* MW 9 */ + 14340 "00000000" // /* MW 8 */ + 14341 "10110011" // /* MW 7 */ + 14342 "11110000" // /* MW 6 */ + 14343 "00000001" // /* MW 5 */ + 14344 "00000000" // /* MW 4 */ + 14345 "00000000" // /* MW 3 */ + 14346 "00000001" // /* MW 2 */ + 14347 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 587 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 14348 "00000100" // JL #2752 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=2752 delay_slots=5 */ + 14349 "00000001" // /* MW 5 */ + 14350 "00000000" // /* MW 4 */ + 14351 "01100000" // /* MW 3 */ + 14352 "00000101" // /* MW 2 */ + 14353 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14355 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14357 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 14358 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14359 "00110001" // /* MW 3 */ + 14360 "00100000" // /* MW 2 */ + 14361 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 14362 "00101100" // NOPA; MOVX r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14363 "00001010" // /* MW 5 */ + 14364 "01000000" // /* MW 4 */ + 14365 "11110000" // /* MW 3 */ + 14366 "00101100" // /* MW 2 */ + 14367 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 587 4 +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 14368 "11100001" // NOPA; NOPB; ST r16, [p0]; NOPX; MOV p0, p7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14369 "00000000" // /* MW 15 */ + 14370 "00000000" // /* MW 14 */ + 14371 "01111000" // /* MW 13 */ + 14372 "01100000" // /* MW 12 */ + 14373 "00110111" // /* MW 11 */ + 14374 "00000000" // /* MW 10 */ + 14375 "00000000" // /* MW 9 */ + 14376 "10000000" // /* MW 8 */ + 14377 "00010001" // /* MW 7 */ + 14378 "00000110" // /* MW 6 */ + 14379 "00100000" // /* MW 5 */ + 14380 "00000000" // /* MW 4 */ + 14381 "11110000" // /* MW 3 */ + 14382 "00101100" // /* MW 2 */ + 14383 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 590 35 +.src_ref 7 "superkernels.cpp" 591 4 +.return_address + 14384 "01100100" // MOVX r16, #1; MOV dj0, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14385 "00000001" // /* MW 5 */ + 14386 "00000001" // /* MW 4 */ + 14387 "10100001" // /* MW 3 */ + 14388 "00000000" // /* MW 2 */ + 14389 "00000100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 590 35 first + 14390 "10011000" // LDA r18, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14391 "01010110" // /* MW 3 */ + 14392 "00000010" // /* MW 2 */ + 14393 "00000111" // /* MW 1 */ + 14394 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14395 "00000000" // /* MW 1 */ + 14396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14397 "00000000" // /* MW 1 */ + 14398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14399 "00000000" // /* MW 1 */ + 14400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14401 "00000000" // /* MW 1 */ + 14402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14403 "00000000" // /* MW 1 */ + 14404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14405 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 591 4 first + 14406 "10011000" // EQ r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14407 "00000111" // /* MW 3 */ + 14408 "10100001" // /* MW 2 */ + 14409 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 591 4 + 14410 "10000100" // JNZ r16, #14544 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14544 delay_slots=5 */ + 14411 "00000001" // /* MW 5 */ + 14412 "01000000" // /* MW 4 */ + 14413 "01101000" // /* MW 3 */ + 14414 "00011100" // /* MW 2 */ + 14415 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 599 105 +.delay_slot + 14416 "11111000" // MOV r17, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14417 "11000000" // /* MW 3 */ + 14418 "01011110" // /* MW 2 */ + 14419 "00011100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 599 105 first +.delay_slot + 14420 "00011000" // ADD.NC dc0, r17, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14421 "10010000" // /* MW 3 */ + 14422 "11001000" // /* MW 2 */ + 14423 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14425 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14427 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14428 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14429 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 591 4 first + 14430 "10000100" // JNZ r18, #14512 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14512 delay_slots=5 */ + 14431 "00000001" // /* MW 5 */ + 14432 "01000000" // /* MW 4 */ + 14433 "01011000" // /* MW 3 */ + 14434 "00011100" // /* MW 2 */ + 14435 "10010000" // /* MW 1 */ +.delay_slot + 14436 "01000100" // MOVXM r16, #509440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14437 "00000000" // /* MW 5 */ + 14438 "00101100" // /* MW 4 */ + 14439 "11001000" // /* MW 3 */ + 14440 "00000111" // /* MW 2 */ + 14441 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 611 27 +.delay_slot + 14442 "00011000" // MOVX r17, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14443 "00000001" // /* MW 3 */ + 14444 "00100010" // /* MW 2 */ + 14445 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14447 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14449 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14451 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 594 8 first +.no_stack_arguments + 14452 "00111010" // ST p6, [sp, #-28]; JL #11136 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=11136 delay_slots=5 */ + 14453 "01000001" // /* MW 9 */ + 14454 "00000000" // /* MW 8 */ + 14455 "00000000" // /* MW 7 */ + 14456 "01110000" // /* MW 6 */ + 14457 "00000101" // /* MW 5 */ + 14458 "00000000" // /* MW 4 */ + 14459 "10110000" // /* MW 3 */ + 14460 "11100011" // /* MW 2 */ + 14461 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 595 38 +.delay_slot + 14462 "01000100" // MOVXM p6, #509248 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14463 "10000000" // /* MW 5 */ + 14464 "11001010" // /* MW 4 */ + 14465 "11001100" // /* MW 3 */ + 14466 "00000111" // /* MW 2 */ + 14467 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 594 8 +.delay_slot + 14468 "01000100" // MOVXM p0, #509248 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14469 "10000000" // /* MW 5 */ + 14470 "11001010" // /* MW 4 */ + 14471 "11000000" // /* MW 3 */ + 14472 "00000111" // /* MW 2 */ + 14473 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 594 8 +.delay_slot + 14474 "11111000" // MOV p1, dc0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14475 "10000000" // /* MW 3 */ + 14476 "01100001" // /* MW 2 */ + 14477 "00011001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14479 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14480 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14481 "00000000" // /* MW 15 */ + 14482 "00000000" // /* MW 14 */ + 14483 "01111000" // /* MW 13 */ + 14484 "10100101" // /* MW 12 */ + 14485 "00000001" // /* MW 11 */ + 14486 "00000000" // /* MW 10 */ + 14487 "00000000" // /* MW 9 */ + 14488 "00000000" // /* MW 8 */ + 14489 "01011011" // /* MW 7 */ + 14490 "00000001" // /* MW 6 */ + 14491 "00100000" // /* MW 5 */ + 14492 "00000000" // /* MW 4 */ + 14493 "11110000" // /* MW 3 */ + 14494 "00101100" // /* MW 2 */ + 14495 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 595 38 first +.return_address + 14496 "10111010" // LDA r17, [p6]; MOVXM r16, #509440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14497 "00010000" // /* MW 9 */ + 14498 "00000000" // /* MW 8 */ + 14499 "00001011" // /* MW 7 */ + 14500 "11110010" // /* MW 6 */ + 14501 "00000001" // /* MW 5 */ + 14502 "00000000" // /* MW 4 */ + 14503 "11010000" // /* MW 3 */ + 14504 "11000110" // /* MW 2 */ + 14505 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 621 2 + 14506 "00111100" // LDA p6, [sp, #-28]; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14507 "00100000" // /* MW 5 */ + 14508 "00000000" // /* MW 4 */ + 14509 "00100000" // /* MW 3 */ + 14510 "11100011" // /* MW 2 */ + 14511 "11111100" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_272 + 14512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14513 "00000000" // /* MW 1 */ + 14514 "10000100" // J #14592 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=14592 delay_slots=5 */ + 14515 "00000000" // /* MW 5 */ + 14516 "00000000" // /* MW 4 */ + 14517 "10000000" // /* MW 3 */ + 14518 "00011100" // /* MW 2 */ + 14519 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14521 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14523 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14525 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14527 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 621 2 +.delay_slot + 14528 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV p1, p6; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14529 "00000000" // /* MW 15 */ + 14530 "00000000" // /* MW 14 */ + 14531 "01111000" // /* MW 13 */ + 14532 "01100000" // /* MW 12 */ + 14533 "10110110" // /* MW 11 */ + 14534 "00000000" // /* MW 10 */ + 14535 "00000000" // /* MW 9 */ + 14536 "00000000" // /* MW 8 */ + 14537 "01011011" // /* MW 7 */ + 14538 "00000001" // /* MW 6 */ + 14539 "00100000" // /* MW 5 */ + 14540 "00000000" // /* MW 4 */ + 14541 "11110000" // /* MW 3 */ + 14542 "00101100" // /* MW 2 */ + 14543 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_304 +.src_ref 7 "superkernels.cpp" 599 8 first +.no_stack_arguments + 14544 "00111010" // ST p6, [sp, #-28]; JL #11296 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=11296 delay_slots=5 */ + 14545 "01000001" // /* MW 9 */ + 14546 "00000000" // /* MW 8 */ + 14547 "00000000" // /* MW 7 */ + 14548 "10000100" // /* MW 6 */ + 14549 "00000101" // /* MW 5 */ + 14550 "00000000" // /* MW 4 */ + 14551 "10110000" // /* MW 3 */ + 14552 "11100011" // /* MW 2 */ + 14553 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 600 38 +.delay_slot + 14554 "01000100" // MOVXM p6, #509312 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14555 "00000000" // /* MW 5 */ + 14556 "11001011" // /* MW 4 */ + 14557 "11001100" // /* MW 3 */ + 14558 "00000111" // /* MW 2 */ + 14559 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 599 8 +.delay_slot + 14560 "01000100" // MOVXM p0, #509312 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14561 "00000000" // /* MW 5 */ + 14562 "11001011" // /* MW 4 */ + 14563 "11000000" // /* MW 3 */ + 14564 "00000111" // /* MW 2 */ + 14565 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 599 8 +.delay_slot + 14566 "11111000" // MOV p1, dc0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14567 "10000000" // /* MW 3 */ + 14568 "01100001" // /* MW 2 */ + 14569 "00011001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14571 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14572 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14573 "01100111" // /* MW 3 */ + 14574 "00000001" // /* MW 2 */ + 14575 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 600 38 first +.return_address + 14576 "10111010" // LDA r17, [p6]; MOVXM r16, #509440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14577 "00010000" // /* MW 9 */ + 14578 "00000000" // /* MW 8 */ + 14579 "00001011" // /* MW 7 */ + 14580 "11110010" // /* MW 6 */ + 14581 "00000001" // /* MW 5 */ + 14582 "00000000" // /* MW 4 */ + 14583 "11010000" // /* MW 3 */ + 14584 "11000110" // /* MW 2 */ + 14585 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 621 2 + 14586 "00111100" // LDA p1, [sp, #-28]; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14587 "00100000" // /* MW 5 */ + 14588 "00000000" // /* MW 4 */ + 14589 "00100000" // /* MW 3 */ + 14590 "10010011" // /* MW 2 */ + 14591 "11111100" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_352 + 14592 "10011000" // ADD.NC p3, r16, #11 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14593 "00000101" // /* MW 3 */ + 14594 "01101000" // /* MW 2 */ + 14595 "00011011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 606 35 first +.src_ref 7 "superkernels.cpp" 611 18 + 14596 "10111010" // LDA.u8 r19, [p3], #7; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14597 "00010000" // /* MW 9 */ + 14598 "00101000" // /* MW 8 */ + 14599 "00110010" // /* MW 7 */ + 14600 "11110011" // /* MW 6 */ + 14601 "00000001" // /* MW 5 */ + 14602 "00000000" // /* MW 4 */ + 14603 "01010000" // /* MW 3 */ + 14604 "11001101" // /* MW 2 */ + 14605 "01101111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 611 18 first + 14606 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14607 "01010110" // /* MW 3 */ + 14608 "00000110" // /* MW 2 */ + 14609 "00000110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 607 37 first + 14610 "10011000" // LDA.u16 r21, [p3], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14611 "10111010" // /* MW 3 */ + 14612 "00011110" // /* MW 2 */ + 14613 "00000011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 607 73 + 14614 "10011000" // LDA.u16 r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14615 "00011010" // /* MW 3 */ + 14616 "00000110" // /* MW 2 */ + 14617 "00000011" // /* MW 1 */ + 14618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14619 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 607 110 + 14620 "10011000" // LDA.u16 r20, [p3, #2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14621 "10011010" // /* MW 3 */ + 14622 "00010110" // /* MW 2 */ + 14623 "00000011" // /* MW 1 */ + 14624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14625 "00000000" // /* MW 1 */ + 14626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14627 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 606 19 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 14628 "01000100" // MOVXM p0, #508996 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14629 "10001000" // /* MW 5 */ + 14630 "11001000" // /* MW 4 */ + 14631 "11000000" // /* MW 3 */ + 14632 "00000111" // /* MW 2 */ + 14633 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 607 57 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 14634 "10011000" // MUL r19, r19, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14635 "01011111" // /* MW 3 */ + 14636 "11100111" // /* MW 2 */ + 14637 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 606 19 first +.src_ref 7 "superkernels.cpp" 611 16 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 14638 "00111010" // ST r19, [p0]; MOVXM p2, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14639 "00010001" // /* MW 9 */ + 14640 "00101110" // /* MW 8 */ + 14641 "00110010" // /* MW 7 */ + 14642 "11110001" // /* MW 6 */ + 14643 "00000001" // /* MW 5 */ + 14644 "00000000" // /* MW 4 */ + 14645 "00110000" // /* MW 3 */ + 14646 "11001110" // /* MW 2 */ + 14647 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 607 94 first + 14648 "10011000" // MUL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14649 "00001111" // /* MW 3 */ + 14650 "11100001" // /* MW 2 */ + 14651 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 611 27 first + 14652 "10011000" // MUL r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14653 "00101111" // /* MW 3 */ + 14654 "01100011" // /* MW 2 */ + 14655 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 608 28 first + 14656 "10011000" // MUL r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14657 "00001111" // /* MW 3 */ + 14658 "00100001" // /* MW 2 */ + 14659 "00010101" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 608 13 +.src_ref 7 "superkernels.cpp" 611 16 first + 14660 "01110110" // NOPA; ST r17, [p2]; MOVXM p6, #509024 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 14661 "00010000" // /* MW 11 */ + 14662 "00110000" // /* MW 10 */ + 14663 "00110010" // /* MW 9 */ + 14664 "11110011" // /* MW 8 */ + 14665 "00000001" // /* MW 7 */ + 14666 "10000000" // /* MW 6 */ + 14667 "00110001" // /* MW 5 */ + 14668 "00000110" // /* MW 4 */ + 14669 "11110010" // /* MW 3 */ + 14670 "00101100" // /* MW 2 */ + 14671 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 608 13 first + 14672 "11100001" // NOPA; NOPB; ST r16, [p6]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14673 "00000000" // /* MW 15 */ + 14674 "00000000" // /* MW 14 */ + 14675 "01111000" // /* MW 13 */ + 14676 "10100101" // /* MW 12 */ + 14677 "00000001" // /* MW 11 */ + 14678 "00000000" // /* MW 10 */ + 14679 "00000000" // /* MW 9 */ + 14680 "10000000" // /* MW 8 */ + 14681 "00010001" // /* MW 7 */ + 14682 "00000110" // /* MW 6 */ + 14683 "00100110" // /* MW 5 */ + 14684 "00000000" // /* MW 4 */ + 14685 "11110000" // /* MW 3 */ + 14686 "00101100" // /* MW 2 */ + 14687 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_448 +.src_ref 7 "superkernels.cpp" 614 12 + 14688 "01000100" // MOVXM p0, #509000 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14689 "10010000" // /* MW 5 */ + 14690 "11001000" // /* MW 4 */ + 14691 "11000000" // /* MW 3 */ + 14692 "00000111" // /* MW 2 */ + 14693 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 614 12 first +.src_ref 7 "superkernels.cpp" 616 11 + 14694 "10111010" // LDA r16, [p0]; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14695 "00010000" // /* MW 9 */ + 14696 "00100000" // /* MW 8 */ + 14697 "00110010" // /* MW 7 */ + 14698 "11110001" // /* MW 6 */ + 14699 "00000001" // /* MW 5 */ + 14700 "00000000" // /* MW 4 */ + 14701 "11010000" // /* MW 3 */ + 14702 "11000010" // /* MW 2 */ + 14703 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 615 13 +.src_ref 7 "superkernels.cpp" 616 11 first + 14704 "10111010" // LDA r17, [p2]; MOVXM p6, #509004 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14705 "00010000" // /* MW 9 */ + 14706 "00100110" // /* MW 8 */ + 14707 "00110010" // /* MW 7 */ + 14708 "11110011" // /* MW 6 */ + 14709 "00000001" // /* MW 5 */ + 14710 "00000000" // /* MW 4 */ + 14711 "11010000" // /* MW 3 */ + 14712 "11000110" // /* MW 2 */ + 14713 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 615 13 first + 14714 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14715 "01010110" // /* MW 3 */ + 14716 "00000110" // /* MW 2 */ + 14717 "00000110" // /* MW 1 */ + 14718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14719 "00000000" // /* MW 1 */ + 14720 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14721 "00000000" // /* MW 1 */ + 14722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14723 "00000000" // /* MW 1 */ + 14724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14725 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 618 6 first +.src_ref 7 "superkernels.cpp" 618 17 first + 14726 "10000100" // JNZ r16, #14832 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14832 delay_slots=5 */ + 14727 "00000001" // /* MW 5 */ + 14728 "01000000" // /* MW 4 */ + 14729 "11111000" // /* MW 3 */ + 14730 "00011100" // /* MW 2 */ + 14731 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 614 12 first +.src_ref 7 "superkernels.cpp" 616 11 first +.delay_slot + 14732 "00100100" // ADD r17, r17, #1; ADD.NC r19, r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14733 "00000001" // /* MW 5 */ + 14734 "10110000" // /* MW 4 */ + 14735 "11101001" // /* MW 3 */ + 14736 "01000000" // /* MW 2 */ + 14737 "10001100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 615 13 first +.delay_slot + 14738 "00011000" // ADD r18, r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14739 "00000111" // /* MW 3 */ + 14740 "10100100" // /* MW 2 */ + 14741 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 616 11 first +.delay_slot + 14742 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14743 "00110001" // /* MW 3 */ + 14744 "00000110" // /* MW 2 */ + 14745 "00001010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 615 13 first +.delay_slot + 14746 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14747 "01010001" // /* MW 3 */ + 14748 "00000110" // /* MW 2 */ + 14749 "00001110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 614 12 first +.delay_slot + 14750 "10011000" // ST r19, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14751 "01110001" // /* MW 3 */ + 14752 "00000110" // /* MW 2 */ + 14753 "00001000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 14754 "00011000" // LDA r17, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14755 "00110001" // /* MW 3 */ + 14756 "11110110" // /* MW 2 */ + 14757 "00000111" // /* MW 1 */ + 14758 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14759 "00000000" // /* MW 1 */ + 14760 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14761 "00000000" // /* MW 1 */ + 14762 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14763 "00000000" // /* MW 1 */ + 14764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14765 "00000000" // /* MW 1 */ + 14766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14767 "00000000" // /* MW 1 */ + 14768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14769 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 first + 14770 "00011000" // ADD.NC p6, r17, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14771 "10000110" // /* MW 3 */ + 14772 "01101000" // /* MW 2 */ + 14773 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 14774 "10011000" // LDA r27, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14775 "01110110" // /* MW 3 */ + 14776 "11111111" // /* MW 2 */ + 14777 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 14778 "10011000" // LDA r17, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14779 "00110110" // /* MW 3 */ + 14780 "11111110" // /* MW 2 */ + 14781 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 14782 "10011000" // LDA r18, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14783 "01010110" // /* MW 3 */ + 14784 "11111110" // /* MW 2 */ + 14785 "00000110" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 14786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14787 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 14788 "10011000" // LDA r17, [p6, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14789 "00110110" // /* MW 3 */ + 14790 "01000110" // /* MW 2 */ + 14791 "00000110" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 14792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14793 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 14794 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14795 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 14796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14797 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 14798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14799 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 14800 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14801 "00010010" // /* MW 3 */ + 14802 "10100011" // /* MW 2 */ + 14803 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 +.src_ref 1 "io_buffer_main.h" 395 8 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 14804 "01011100" // ST r17, [p6]; MOVX r16, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14805 "11111010" // /* MW 5 */ + 14806 "11000001" // /* MW 4 */ + 14807 "00111111" // /* MW 3 */ + 14808 "11000110" // /* MW 2 */ + 14809 "11000000" // /* MW 1 */ + 14810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14811 "00000000" // /* MW 1 */ + 14812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14813 "00000000" // /* MW 1 */ + 14814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14815 "00000000" // /* MW 1 */ + 14816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14817 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 14818 "01111110" // NOPA; NOPB; NOPS; ACQ r17, r16; NOPM /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 14819 "01100000" // /* MW 13 */ + 14820 "00101011" // /* MW 12 */ + 14821 "00000000" // /* MW 11 */ + 14822 "10101111" // /* MW 10 */ + 14823 "00110100" // /* MW 9 */ + 14824 "00000000" // /* MW 8 */ + 14825 "00001000" // /* MW 7 */ + 14826 "01010011" // /* MW 6 */ + 14827 "00100100" // /* MW 5 */ + 14828 "00000000" // /* MW 4 */ + 14829 "11110000" // /* MW 3 */ + 14830 "00101100" // /* MW 2 */ + 14831 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_592 + 14832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14833 "00000000" // /* MW 1 */ + 14834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14835 "00000000" // /* MW 1 */ + 14836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14837 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 621 2 +.src_ref 1 "io_buffer_main.h" 125 25 + 14838 "00011000" // LDA p2, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14839 "00011001" // /* MW 3 */ + 14840 "11110101" // /* MW 2 */ + 14841 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 621 2 + 14842 "00011000" // LDA p0, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14843 "00011001" // /* MW 3 */ + 14844 "11101000" // /* MW 2 */ + 14845 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 621 2 first +.no_stack_arguments + 14846 "00000100" // JL #4464 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4464 delay_slots=5 */ + 14847 "00000001" // /* MW 5 */ + 14848 "00000000" // /* MW 4 */ + 14849 "10111000" // /* MW 3 */ + 14850 "00001000" // /* MW 2 */ + 14851 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 621 2 +.delay_slot + 14852 "01000100" // MOVXM p3, #509440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14853 "00000000" // /* MW 5 */ + 14854 "11001100" // /* MW 4 */ + 14855 "11000110" // /* MW 3 */ + 14856 "00000111" // /* MW 2 */ + 14857 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14859 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14861 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14863 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.delay_slot + 14864 "11100001" // NOPA; NOPB; MOVS p6, p2; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14865 "00000000" // /* MW 15 */ + 14866 "00000000" // /* MW 14 */ + 14867 "01111000" // /* MW 13 */ + 14868 "10100101" // /* MW 12 */ + 14869 "00000001" // /* MW 11 */ + 14870 "00000000" // /* MW 10 */ + 14871 "00000000" // /* MW 9 */ + 14872 "00000000" // /* MW 8 */ + 14873 "10001011" // /* MW 7 */ + 14874 "10001000" // /* MW 6 */ + 14875 "00100110" // /* MW 5 */ + 14876 "00000000" // /* MW 4 */ + 14877 "11110000" // /* MW 3 */ + 14878 "00101100" // /* MW 2 */ + 14879 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 623 6 +.src_ref 1 "io_buffer_main.h" 218 49 +.return_address + 14880 "10111010" // LDA r16, [sp, #-16]; MOVXM p1, #509000 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14881 "00010000" // /* MW 9 */ + 14882 "00100100" // /* MW 8 */ + 14883 "10110010" // /* MW 7 */ + 14884 "11110000" // /* MW 6 */ + 14885 "00000001" // /* MW 5 */ + 14886 "00000000" // /* MW 4 */ + 14887 "00100000" // /* MW 3 */ + 14888 "01000010" // /* MW 2 */ + 14889 "11111110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 623 6 first +.src_ref 7 "superkernels.cpp" 623 20 + 14890 "10111010" // LDA r17, [p1]; MOVXM p1, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14891 "00010000" // /* MW 9 */ + 14892 "00100010" // /* MW 8 */ + 14893 "10110010" // /* MW 7 */ + 14894 "11110000" // /* MW 6 */ + 14895 "00000001" // /* MW 5 */ + 14896 "00000000" // /* MW 4 */ + 14897 "11010000" // /* MW 3 */ + 14898 "11000110" // /* MW 2 */ + 14899 "00100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 623 20 + 14900 "10011000" // LDA r18, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14901 "01010110" // /* MW 3 */ + 14902 "00000110" // /* MW 2 */ + 14903 "00000001" // /* MW 1 */ + 14904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14905 "00000000" // /* MW 1 */ + 14906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14907 "00000000" // /* MW 1 */ + 14908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14909 "00000000" // /* MW 1 */ + 14910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14911 "00000000" // /* MW 1 */ + 14912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14913 "00000000" // /* MW 1 */ + 14914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14915 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 623 17 + 14916 "10011000" // NE r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14917 "00101000" // /* MW 3 */ + 14918 "01100011" // /* MW 2 */ + 14919 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 623 6 + 14920 "10000100" // JNZ r17, #15264 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=15264 delay_slots=5 */ + 14921 "00000001" // /* MW 5 */ + 14922 "01000000" // /* MW 4 */ + 14923 "11010000" // /* MW 3 */ + 14924 "00011101" // /* MW 2 */ + 14925 "10001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14927 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14929 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14931 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14933 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14935 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 629 34 +.src_ref 1 "io_buffer_main.h" 218 49 first +.src_ref 1 "io_buffer_main.h" 395 8 + 14936 "10111010" // MOVA dj0, #64; MOVX r17, #-1; ADD.NC p1, r16, #12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14937 "00001000" // /* MW 9 */ + 14938 "00000011" // /* MW 8 */ + 14939 "10110100" // /* MW 7 */ + 14940 "11101000" // /* MW 6 */ + 14941 "00010111" // /* MW 5 */ + 14942 "00111111" // /* MW 4 */ + 14943 "10000000" // /* MW 3 */ + 14944 "00000010" // /* MW 2 */ + 14945 "00001000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 626 52 +.src_ref 1 "io_buffer_main.h" 218 49 + 14946 "10111010" // LDA r27, [p1], #-4; MOVXM p0, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14947 "00010000" // /* MW 9 */ + 14948 "00101110" // /* MW 8 */ + 14949 "00110010" // /* MW 7 */ + 14950 "11110000" // /* MW 6 */ + 14951 "00000001" // /* MW 5 */ + 14952 "00000000" // /* MW 4 */ + 14953 "11010000" // /* MW 3 */ + 14954 "11101110" // /* MW 2 */ + 14955 "00111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 14956 "10011000" // LDA r18, [p1], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14957 "01010110" // /* MW 3 */ + 14958 "11111110" // /* MW 2 */ + 14959 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 14960 "10011000" // LDA r19, [p1], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14961 "01110110" // /* MW 3 */ + 14962 "11111110" // /* MW 2 */ + 14963 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 47 first + 14964 "10011000" // LDA r20, [p1, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14965 "10010110" // /* MW 3 */ + 14966 "01010110" // /* MW 2 */ + 14967 "00000001" // /* MW 1 */ + 14968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14969 "00000000" // /* MW 1 */ + 14970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14971 "00000000" // /* MW 1 */ + 14972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14973 "00000000" // /* MW 1 */ + 14974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14975 "00000000" // /* MW 1 */ + 14976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14977 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 14978 "00011000" // SEL.EQZ r18, r19, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14979 "00100010" // /* MW 3 */ + 14980 "11100101" // /* MW 2 */ + 14981 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 626 50 +.src_ref 7 "superkernels.cpp" 630 3 +.src_ref 1 "io_buffer_main.h" 218 20 + 14982 "01011100" // ST r18, [p1]; MOVX r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14983 "00001010" // /* MW 5 */ + 14984 "01000000" // /* MW 4 */ + 14985 "00110000" // /* MW 3 */ + 14986 "11001010" // /* MW 2 */ + 14987 "00100000" // /* MW 1 */ + 14988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14989 "00000000" // /* MW 1 */ + 14990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14991 "00000000" // /* MW 1 */ + 14992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14993 "00000000" // /* MW 1 */ + 14994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14995 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 14996 "00011000" // ACQ r20, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14997 "00011000" // /* MW 3 */ + 14998 "00010011" // /* MW 2 */ + 14999 "00010101" // /* MW 1 */ + 15000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15001 "00000000" // /* MW 1 */ + 15002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15003 "00000000" // /* MW 1 */ + 15004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15005 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 626 52 first + 15006 "10011000" // LDA r19, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15007 "01110110" // /* MW 3 */ + 15008 "00000110" // /* MW 2 */ + 15009 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 629 34 first + 15010 "10011000" // LDA r18, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15011 "01010110" // /* MW 3 */ + 15012 "00000010" // /* MW 2 */ + 15013 "00000111" // /* MW 1 */ + 15014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15015 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 15016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15017 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.aggressive_scheduled_block_id 4 +.noswbrkpt + 15018 "10011000" // LDA p0, [p6], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15019 "00011110" // /* MW 3 */ + 15020 "01011100" // /* MW 2 */ + 15021 "00000110" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 15022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15023 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 15024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15025 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 626 50 first +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 1 "io_buffer_main.h" 324 32 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 15026 "10111010" // LDA r17, [p1], #16; LSHL r19, r19, r16; MOV p0, p1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15027 "01111000" // /* MW 9 */ + 15028 "01100000" // /* MW 8 */ + 15029 "00110001" // /* MW 7 */ + 15030 "01101100" // /* MW 6 */ + 15031 "00111000" // /* MW 5 */ + 15032 "00100111" // /* MW 4 */ + 15033 "11010000" // /* MW 3 */ + 15034 "11000110" // /* MW 2 */ + 15035 "00101001" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 630 3 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 15036 "10011000" // EQ r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15037 "00000111" // /* MW 3 */ + 15038 "10100001" // /* MW 2 */ + 15039 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 630 3 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 15040 "10000100" // JNZ r16, #15120 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=15120 delay_slots=5 */ + 15041 "00000001" // /* MW 5 */ + 15042 "01000000" // /* MW 4 */ + 15043 "10001000" // /* MW 3 */ + 15044 "00011101" // /* MW 2 */ + 15045 "10000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 32 +.delay_slot +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 15046 "00011000" // MOVS p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15047 "10001011" // /* MW 3 */ + 15048 "10000000" // /* MW 2 */ + 15049 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15051 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15053 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15055 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 626 50 first +.delay_slot + 15056 "00000010" // ST p1, [sp, #-16]; ADD.NC p1, r19, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 15057 "10100000" // /* MW 7 */ + 15058 "11100010" // /* MW 6 */ + 15059 "10110100" // /* MW 5 */ + 15060 "00000000" // /* MW 4 */ + 15061 "10110000" // /* MW 3 */ + 15062 "00010011" // /* MW 2 */ + 15063 "11111110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 630 3 first + 15064 "10000100" // JNZ r18, #15152 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=15152 delay_slots=5 */ + 15065 "00000001" // /* MW 5 */ + 15066 "01000000" // /* MW 4 */ + 15067 "10011000" // /* MW 3 */ + 15068 "00011101" // /* MW 2 */ + 15069 "10010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15071 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15073 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15075 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15077 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15078 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15079 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 633 8 first +.no_stack_arguments + 15080 "00000100" // JL #11248 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11248 delay_slots=5 */ + 15081 "00000001" // /* MW 5 */ + 15082 "00000000" // /* MW 4 */ + 15083 "11111000" // /* MW 3 */ + 15084 "00010101" // /* MW 2 */ + 15085 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 633 8 +.delay_slot + 15086 "01000100" // MOVXM p3, #509248 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15087 "10000000" // /* MW 5 */ + 15088 "11001010" // /* MW 4 */ + 15089 "11000110" // /* MW 3 */ + 15090 "00000111" // /* MW 2 */ + 15091 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15093 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15095 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15097 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 633 8 +.delay_slot + 15098 "11010100" // NOPA; MOV p2, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15099 "10000001" // /* MW 5 */ + 15100 "11000001" // /* MW 4 */ + 15101 "11110100" // /* MW 3 */ + 15102 "00101100" // /* MW 2 */ + 15103 "00000000" // /* MW 1 */ +.return_address + 15104 "10000100" // J #15152 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=15152 delay_slots=5 */ + 15105 "00000000" // /* MW 5 */ + 15106 "00000000" // /* MW 4 */ + 15107 "10011000" // /* MW 3 */ + 15108 "00011101" // /* MW 2 */ + 15109 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15111 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15113 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15115 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15117 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15119 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_880 +.src_ref 7 "superkernels.cpp" 637 8 first +.no_stack_arguments + 15120 "00000100" // JL #11440 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11440 delay_slots=5 */ + 15121 "00000001" // /* MW 5 */ + 15122 "00000000" // /* MW 4 */ + 15123 "01011000" // /* MW 3 */ + 15124 "00010110" // /* MW 2 */ + 15125 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 637 8 +.delay_slot + 15126 "01000100" // MOVXM p3, #509312 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15127 "00000000" // /* MW 5 */ + 15128 "11001011" // /* MW 4 */ + 15129 "11000110" // /* MW 3 */ + 15130 "00000111" // /* MW 2 */ + 15131 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 637 8 +.delay_slot + 15132 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15133 "11000000" // /* MW 3 */ + 15134 "01100000" // /* MW 2 */ + 15135 "00011010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15137 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15139 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15140 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 15141 "10000001" // /* MW 11 */ + 15142 "10101101" // /* MW 10 */ + 15143 "00000000" // /* MW 9 */ + 15144 "00000000" // /* MW 8 */ + 15145 "00000000" // /* MW 7 */ + 15146 "00000000" // /* MW 6 */ + 15147 "00100000" // /* MW 5 */ + 15148 "00000000" // /* MW 4 */ + 15149 "11110000" // /* MW 3 */ + 15150 "00101100" // /* MW 2 */ + 15151 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_912 +.src_ref 1 "io_buffer_main.h" 327 28 +.src_ref 1 "io_buffer_main.h" 327 40 +.return_address + 15152 "00011000" // LDA p1, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15153 "10011001" // /* MW 3 */ + 15154 "11110000" // /* MW 2 */ + 15155 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 + 15156 "00101100" // LDA p0, [sp, #-12]; MOVX r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15157 "00001010" // /* MW 5 */ + 15158 "01000100" // /* MW 4 */ + 15159 "00100000" // /* MW 3 */ + 15160 "10000011" // /* MW 2 */ + 15161 "11111110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 645 15 +.src_ref 1 "io_buffer_main.h" 324 32 first + 15162 "10111010" // LDA r16, [p7, #16]; MOVXM p7, #509000 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15163 "00010000" // /* MW 9 */ + 15164 "00100100" // /* MW 8 */ + 15165 "10110010" // /* MW 7 */ + 15166 "11110011" // /* MW 6 */ + 15167 "00000001" // /* MW 5 */ + 15168 "00000000" // /* MW 4 */ + 15169 "11010000" // /* MW 3 */ + 15170 "11000010" // /* MW 2 */ + 15171 "11101000" // /* MW 1 */ + 15172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15173 "00000000" // /* MW 1 */ + 15174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15175 "00000000" // /* MW 1 */ + 15176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15177 "00000000" // /* MW 1 */ + 15178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15179 "00000000" // /* MW 1 */ + 15180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15181 "00000000" // /* MW 1 */ + 15182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15183 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 15184 "00011000" // REL r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15185 "00011000" // /* MW 3 */ + 15186 "00010001" // /* MW 2 */ + 15187 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 40 first + 15188 "10011000" // LDA r18, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15189 "01010110" // /* MW 3 */ + 15190 "11110110" // /* MW 2 */ + 15191 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 first + 15192 "10011000" // LDA r16, [p0, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15193 "00010110" // /* MW 3 */ + 15194 "01010110" // /* MW 2 */ + 15195 "00000000" // /* MW 1 */ + 15196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15197 "00000000" // /* MW 1 */ + 15198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15199 "00000000" // /* MW 1 */ + 15200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15201 "00000000" // /* MW 1 */ + 15202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15203 "00000000" // /* MW 1 */ + 15204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15205 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 15206 "10011000" // SUB r18, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15207 "00100001" // /* MW 3 */ + 15208 "01100101" // /* MW 2 */ + 15209 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 15210 "10011000" // ST r18, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15211 "01010001" // /* MW 3 */ + 15212 "11110110" // /* MW 2 */ + 15213 "00001001" // /* MW 1 */ + 15214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15215 "00000000" // /* MW 1 */ + 15216 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15217 "00000000" // /* MW 1 */ + 15218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15219 "00000000" // /* MW 1 */ + 15220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15221 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 15222 "00011000" // REL r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15223 "00011000" // /* MW 3 */ + 15224 "00010001" // /* MW 2 */ + 15225 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 40 first + 15226 "10011000" // LDA r18, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15227 "01010110" // /* MW 3 */ + 15228 "11100110" // /* MW 2 */ + 15229 "00000110" // /* MW 1 */ + 15230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15231 "00000000" // /* MW 1 */ + 15232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15233 "00000000" // /* MW 1 */ + 15234 "10000100" // J #15280 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=15280 delay_slots=5 */ + 15235 "00000000" // /* MW 5 */ + 15236 "00000000" // /* MW 4 */ + 15237 "11011000" // /* MW 3 */ + 15238 "00011101" // /* MW 2 */ + 15239 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15240 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15241 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15243 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 645 15 +.src_ref 7 "superkernels.cpp" 649 14 +.delay_slot + 15244 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15245 "00000001" // /* MW 3 */ + 15246 "00100000" // /* MW 2 */ + 15247 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 645 15 first +.src_ref 1 "io_buffer_main.h" 327 32 +.delay_slot + 15248 "01011100" // ST r16, [p7]; SUB r17, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15249 "01000011" // /* MW 5 */ + 15250 "11000110" // /* MW 4 */ + 15251 "00111000" // /* MW 3 */ + 15252 "11000010" // /* MW 2 */ + 15253 "11100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 first +.delay_slot + 15254 "01111010" // NOPA; ST r17, [p6, #-8]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15255 "00000000" // /* MW 9 */ + 15256 "00000000" // /* MW 8 */ + 15257 "00000000" // /* MW 7 */ + 15258 "10000000" // /* MW 6 */ + 15259 "00110001" // /* MW 5 */ + 15260 "11100110" // /* MW 4 */ + 15261 "11110110" // /* MW 3 */ + 15262 "00101100" // /* MW 2 */ + 15263 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_1024 +.src_ref 7 "superkernels.cpp" 649 14 + 15264 "11100001" // NOPA; NOPB; NOPS; MOVX r16, #0; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 15265 "00000000" // /* MW 15 */ + 15266 "00000000" // /* MW 14 */ + 15267 "01111000" // /* MW 13 */ + 15268 "10100101" // /* MW 12 */ + 15269 "00000001" // /* MW 11 */ + 15270 "00001000" // /* MW 10 */ + 15271 "00000000" // /* MW 9 */ + 15272 "00000001" // /* MW 8 */ + 15273 "01011011" // /* MW 7 */ + 15274 "00000001" // /* MW 6 */ + 15275 "00100000" // /* MW 5 */ + 15276 "00000000" // /* MW 4 */ + 15277 "11110000" // /* MW 3 */ + 15278 "00101100" // /* MW 2 */ + 15279 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_1040 +.src_ref 7 "superkernels.cpp" 648 19 +.src_ref 7 "superkernels.cpp" 651 + 15280 "10111010" // LDA lr, [sp, #-20]; MOVXM p7, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15281 "00010000" // /* MW 9 */ + 15282 "00110000" // /* MW 8 */ + 15283 "10110010" // /* MW 7 */ + 15284 "11110011" // /* MW 6 */ + 15285 "00000001" // /* MW 5 */ + 15286 "00000000" // /* MW 4 */ + 15287 "00100000" // /* MW 3 */ + 15288 "10000111" // /* MW 2 */ + 15289 "11111101" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 648 6 +.src_ref 7 "superkernels.cpp" 648 19 first +.src_ref 7 "superkernels.cpp" 649 14 + 15290 "10111010" // LDA r18, [p7]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15291 "00010000" // /* MW 9 */ + 15292 "00100000" // /* MW 8 */ + 15293 "00110010" // /* MW 7 */ + 15294 "11110011" // /* MW 6 */ + 15295 "00000001" // /* MW 5 */ + 15296 "00000000" // /* MW 4 */ + 15297 "11010000" // /* MW 3 */ + 15298 "11001010" // /* MW 2 */ + 15299 "11100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 648 6 + 15300 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15301 "00110110" // /* MW 3 */ + 15302 "00000110" // /* MW 2 */ + 15303 "00000110" // /* MW 1 */ + 15304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15305 "00000000" // /* MW 1 */ + 15306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15307 "00000000" // /* MW 1 */ + 15308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15309 "00000000" // /* MW 1 */ + 15310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15311 "00000000" // /* MW 1 */ + 15312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15313 "00000000" // /* MW 1 */ + 15314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15315 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 648 16 + 15316 "10011000" // NE r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15317 "00101000" // /* MW 3 */ + 15318 "01100011" // /* MW 2 */ + 15319 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 648 6 + 15320 "10000100" // JNZ r17, #15344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=15344 delay_slots=5 */ + 15321 "00000001" // /* MW 5 */ + 15322 "01000000" // /* MW 4 */ + 15323 "11111000" // /* MW 3 */ + 15324 "00011101" // /* MW 2 */ + 15325 "10001000" // /* MW 1 */ +.delay_slot + 15326 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15327 "10011001" // /* MW 3 */ + 15328 "11111011" // /* MW 2 */ + 15329 "00000111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15331 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15333 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15335 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15337 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 649 14 first + 15338 "00001100" // NOPA; ST r16, [p6] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15339 "00100011" // /* MW 5 */ + 15340 "00001100" // /* MW 4 */ + 15341 "11111100" // /* MW 3 */ + 15342 "00101100" // /* MW 2 */ + 15343 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_1104 + 15344 "00011000" // LDA p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15345 "00011001" // /* MW 3 */ + 15346 "11111111" // /* MW 2 */ + 15347 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 651 first + 15348 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 15349 "00000000" // /* MW 3 */ + 15350 "00101000" // /* MW 2 */ + 15351 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 651 +.delay_slot + 15352 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15353 "00000001" // /* MW 5 */ + 15354 "00000000" // /* MW 4 */ + 15355 "00000000" // /* MW 3 */ + 15356 "11111000" // /* MW 2 */ + 15357 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15359 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15361 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15363 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE__end +.label __Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE___func_end0 + 15365 "00000000" // /* MW 1 */ +.label __Z13_b896_wrapperPPv___func_begin0 +.label _Z13_b896_wrapperPPv +.function _b896_wrapper _Z13_b896_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 21 first +.src_ref 0 "0_0_reloadable5.cc" 23 79 +.function_start + 15376 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15377 "11000000" // /* MW 3 */ + 15378 "01100000" // /* MW 2 */ + 15379 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 23 79 first + 15380 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15381 "00011110" // /* MW 3 */ + 15382 "00011100" // /* MW 2 */ + 15383 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 24 79 first + 15384 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15385 "10011110" // /* MW 3 */ + 15386 "00101100" // /* MW 2 */ + 15387 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 26 81 first + 15388 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15389 "10011110" // /* MW 3 */ + 15390 "11110101" // /* MW 2 */ + 15391 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 25 47 first + 15392 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15393 "00011110" // /* MW 3 */ + 15394 "00000101" // /* MW 2 */ + 15395 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 22 4 first +.tail_call + 15396 "10000100" // J #6880 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=6880 delay_slots=5 */ + 15397 "00000000" // /* MW 5 */ + 15398 "00000000" // /* MW 4 */ + 15399 "01110000" // /* MW 3 */ + 15400 "00001101" // /* MW 2 */ + 15401 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15403 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15405 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15407 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15409 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b896_wrapperPPv__end +.label __Z13_b896_wrapperPPv___func_end0 + 15411 "00000000" // /* MW 1 */ +.label __Z13_b901_wrapperPPv___func_begin0 +.label _Z13_b901_wrapperPPv +.function _b901_wrapper _Z13_b901_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 30 first +.src_ref 0 "0_0_reloadable5.cc" 32 79 +.function_start + 15424 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15425 "11000000" // /* MW 3 */ + 15426 "01100000" // /* MW 2 */ + 15427 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 32 79 first + 15428 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15429 "00011110" // /* MW 3 */ + 15430 "00101100" // /* MW 2 */ + 15431 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 34 81 first + 15432 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15433 "00011110" // /* MW 3 */ + 15434 "11110101" // /* MW 2 */ + 15435 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 33 47 first + 15436 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15437 "10011110" // /* MW 3 */ + 15438 "00000100" // /* MW 2 */ + 15439 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 31 4 first +.tail_call + 15440 "10000100" // J #8240 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=8240 delay_slots=5 */ + 15441 "00000000" // /* MW 5 */ + 15442 "00000000" // /* MW 4 */ + 15443 "00011000" // /* MW 3 */ + 15444 "00010000" // /* MW 2 */ + 15445 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15447 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15449 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15451 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15453 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b901_wrapperPPv__end +.label __Z13_b901_wrapperPPv___func_end0 + 15455 "00000000" // /* MW 1 */ +.label __Z13_b906_wrapperPPv___func_begin0 +.label _Z13_b906_wrapperPPv +.function _b906_wrapper _Z13_b906_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 38 first +.src_ref 0 "0_0_reloadable5.cc" 40 79 +.function_start + 15456 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15457 "11000000" // /* MW 3 */ + 15458 "01100000" // /* MW 2 */ + 15459 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 40 79 first + 15460 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15461 "00011110" // /* MW 3 */ + 15462 "00101100" // /* MW 2 */ + 15463 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 42 81 first + 15464 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15465 "00011110" // /* MW 3 */ + 15466 "11110101" // /* MW 2 */ + 15467 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 41 47 first + 15468 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15469 "10011110" // /* MW 3 */ + 15470 "00000100" // /* MW 2 */ + 15471 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 39 4 first +.tail_call + 15472 "10000100" // J #9104 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=9104 delay_slots=5 */ + 15473 "00000000" // /* MW 5 */ + 15474 "00000000" // /* MW 4 */ + 15475 "11001000" // /* MW 3 */ + 15476 "00010001" // /* MW 2 */ + 15477 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15479 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15481 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15483 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15485 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b906_wrapperPPv__end +.label __Z13_b906_wrapperPPv___func_end0 + 15487 "00000000" // /* MW 1 */ +.label __Z13_b881_wrapperPPv___func_begin0 +.label _Z13_b881_wrapperPPv +.function _b881_wrapper _Z13_b881_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 46 first +.src_ref 0 "0_0_reloadable5.cc" 48 79 +.function_start + 15488 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15489 "11000000" // /* MW 3 */ + 15490 "01100000" // /* MW 2 */ + 15491 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 48 79 first + 15492 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15493 "00011110" // /* MW 3 */ + 15494 "00101100" // /* MW 2 */ + 15495 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 50 81 first + 15496 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15497 "00011110" // /* MW 3 */ + 15498 "11110101" // /* MW 2 */ + 15499 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 49 47 first + 15500 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15501 "10011110" // /* MW 3 */ + 15502 "00000100" // /* MW 2 */ + 15503 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 47 4 first +.tail_call + 15504 "10000100" // J #10512 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10512 delay_slots=5 */ + 15505 "00000000" // /* MW 5 */ + 15506 "00000000" // /* MW 4 */ + 15507 "10001000" // /* MW 3 */ + 15508 "00010100" // /* MW 2 */ + 15509 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15511 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15513 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15515 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15517 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b881_wrapperPPv__end +.label __Z13_b881_wrapperPPv___func_end0 + 15519 "00000000" // /* MW 1 */ +.label __Z13_b891_wrapperPPv___func_begin0 +.label _Z13_b891_wrapperPPv +.function _b891_wrapper _Z13_b891_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 54 first +.src_ref 0 "0_0_reloadable5.cc" 56 79 +.function_start + 15520 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15521 "11000000" // /* MW 3 */ + 15522 "01100000" // /* MW 2 */ + 15523 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 56 79 first + 15524 "10011000" // LDA p0, [p2], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15525 "00011110" // /* MW 3 */ + 15526 "00111100" // /* MW 2 */ + 15527 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 57 47 first + 15528 "10011000" // LDA p1, [p2], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15529 "10011110" // /* MW 3 */ + 15530 "11101100" // /* MW 2 */ + 15531 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 59 81 first + 15532 "10011000" // LDA p3, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15533 "10011110" // /* MW 3 */ + 15534 "00010101" // /* MW 2 */ + 15535 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 58 80 first + 15536 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15537 "00011110" // /* MW 3 */ + 15538 "00000101" // /* MW 2 */ + 15539 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 55 4 first +.tail_call + 15540 "10000100" // J #11744 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=11744 delay_slots=5 */ + 15541 "00000000" // /* MW 5 */ + 15542 "00000000" // /* MW 4 */ + 15543 "11110000" // /* MW 3 */ + 15544 "00010110" // /* MW 2 */ + 15545 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15547 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15549 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15551 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15552 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15553 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b891_wrapperPPv__end +.label __Z13_b891_wrapperPPv___func_end0 + 15555 "00000000" // /* MW 1 */ +.label __Z13_b924_wrapperPPv___func_begin0 +.label _Z13_b924_wrapperPPv +.function _b924_wrapper _Z13_b924_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 63 first +.src_ref 0 "0_0_reloadable5.cc" 65 79 +.function_start + 15568 "11111000" // MOV p3, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15569 "11000000" // /* MW 3 */ + 15570 "01100000" // /* MW 2 */ + 15571 "00011011" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 65 79 first + 15572 "10011000" // LDA p0, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15573 "00011110" // /* MW 3 */ + 15574 "00011100" // /* MW 2 */ + 15575 "00000011" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 66 79 first + 15576 "10011000" // LDA p1, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15577 "10011110" // /* MW 3 */ + 15578 "00011100" // /* MW 2 */ + 15579 "00000011" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 67 80 first + 15580 "10011000" // LDA p2, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15581 "00011110" // /* MW 3 */ + 15582 "00101101" // /* MW 2 */ + 15583 "00000011" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 69 81 first + 15584 "10011000" // LDA p4, [p3, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15585 "00011110" // /* MW 3 */ + 15586 "11110110" // /* MW 2 */ + 15587 "00000011" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 68 47 first + 15588 "10011000" // LDA p3, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15589 "10011110" // /* MW 3 */ + 15590 "00000101" // /* MW 2 */ + 15591 "00000011" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 64 4 first +.tail_call + 15592 "10000100" // J #14240 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=14240 delay_slots=5 */ + 15593 "00000000" // /* MW 5 */ + 15594 "00000000" // /* MW 4 */ + 15595 "11010000" // /* MW 3 */ + 15596 "00011011" // /* MW 2 */ + 15597 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15598 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15599 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15601 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15603 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15605 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b924_wrapperPPv__end +.label __Z13_b924_wrapperPPv___func_end0 + 15607 "00000000" // /* MW 1 */ +.label __Z13_b919_wrapperPPv___func_begin0 +.label _Z13_b919_wrapperPPv +.function _b919_wrapper _Z13_b919_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 73 first +.src_ref 0 "0_0_reloadable5.cc" 75 79 +.function_start + 15616 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15617 "11000000" // /* MW 3 */ + 15618 "01100000" // /* MW 2 */ + 15619 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 75 79 first + 15620 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15621 "00011110" // /* MW 3 */ + 15622 "00011100" // /* MW 2 */ + 15623 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 76 79 first + 15624 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15625 "10011110" // /* MW 3 */ + 15626 "00101100" // /* MW 2 */ + 15627 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 78 81 first + 15628 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15629 "10011110" // /* MW 3 */ + 15630 "11110101" // /* MW 2 */ + 15631 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 77 47 first + 15632 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15633 "00011110" // /* MW 3 */ + 15634 "00000101" // /* MW 2 */ + 15635 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 74 4 first +.tail_call + 15636 "10000100" // J #13760 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=13760 delay_slots=5 */ + 15637 "00000000" // /* MW 5 */ + 15638 "00000000" // /* MW 4 */ + 15639 "11100000" // /* MW 3 */ + 15640 "00011010" // /* MW 2 */ + 15641 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15643 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15645 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15647 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15649 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b919_wrapperPPv__end +.label __Z13_b919_wrapperPPv___func_end0 + 15651 "00000000" // /* MW 1 */ +.label _ZN12me_primitive10udiv_dstepEjjRjS0_ +.function udiv_dstep _ZN12me_primitive10udiv_dstepEjjRjS0_ +.src_ref 10 "me_div.c" 108 19 +.src_ref 10 "me_div.c" 108 19 +.src_ref 10 "me_div.c" 115 4 first +.function_start + 15664 "11100100" // MOVX r3, #0; MOV r31, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15665 "01000001" // /* MW 5 */ + 15666 "10100000" // /* MW 4 */ + 15667 "00101111" // /* MW 3 */ + 15668 "11000000" // /* MW 2 */ + 15669 "00000000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15670 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15671 "00011100" // /* MW 3 */ + 15672 "11000110" // /* MW 2 */ + 15673 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15674 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15675 "00011100" // /* MW 3 */ + 15676 "11000110" // /* MW 2 */ + 15677 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15678 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15679 "00011100" // /* MW 3 */ + 15680 "11000110" // /* MW 2 */ + 15681 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15682 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15683 "00011100" // /* MW 3 */ + 15684 "11000110" // /* MW 2 */ + 15685 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15686 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15687 "00011100" // /* MW 3 */ + 15688 "11000110" // /* MW 2 */ + 15689 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15690 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15691 "00011100" // /* MW 3 */ + 15692 "11000110" // /* MW 2 */ + 15693 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15694 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15695 "00011100" // /* MW 3 */ + 15696 "11000110" // /* MW 2 */ + 15697 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15698 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15699 "00011100" // /* MW 3 */ + 15700 "11000110" // /* MW 2 */ + 15701 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15702 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15703 "00011100" // /* MW 3 */ + 15704 "11000110" // /* MW 2 */ + 15705 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15706 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15707 "00011100" // /* MW 3 */ + 15708 "11000110" // /* MW 2 */ + 15709 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15710 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15711 "00011100" // /* MW 3 */ + 15712 "11000110" // /* MW 2 */ + 15713 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15714 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15715 "00011100" // /* MW 3 */ + 15716 "11000110" // /* MW 2 */ + 15717 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15718 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15719 "00011100" // /* MW 3 */ + 15720 "11000110" // /* MW 2 */ + 15721 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15722 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15723 "00011100" // /* MW 3 */ + 15724 "11000110" // /* MW 2 */ + 15725 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15726 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15727 "00011100" // /* MW 3 */ + 15728 "11000110" // /* MW 2 */ + 15729 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15730 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15731 "00011100" // /* MW 3 */ + 15732 "11000110" // /* MW 2 */ + 15733 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15734 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15735 "00011100" // /* MW 3 */ + 15736 "11000110" // /* MW 2 */ + 15737 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15738 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15739 "00011100" // /* MW 3 */ + 15740 "11000110" // /* MW 2 */ + 15741 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15742 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15743 "00011100" // /* MW 3 */ + 15744 "11000110" // /* MW 2 */ + 15745 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15746 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15747 "00011100" // /* MW 3 */ + 15748 "11000110" // /* MW 2 */ + 15749 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15750 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15751 "00011100" // /* MW 3 */ + 15752 "11000110" // /* MW 2 */ + 15753 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15754 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15755 "00011100" // /* MW 3 */ + 15756 "11000110" // /* MW 2 */ + 15757 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15758 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15759 "00011100" // /* MW 3 */ + 15760 "11000110" // /* MW 2 */ + 15761 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15762 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15763 "00011100" // /* MW 3 */ + 15764 "11000110" // /* MW 2 */ + 15765 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15766 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15767 "00011100" // /* MW 3 */ + 15768 "11000110" // /* MW 2 */ + 15769 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15770 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15771 "00011100" // /* MW 3 */ + 15772 "11000110" // /* MW 2 */ + 15773 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15774 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15775 "00011100" // /* MW 3 */ + 15776 "11000110" // /* MW 2 */ + 15777 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15778 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15779 "00011100" // /* MW 3 */ + 15780 "11000110" // /* MW 2 */ + 15781 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 119 first + 15782 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 15783 "00000000" // /* MW 3 */ + 15784 "00101000" // /* MW 2 */ + 15785 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 first +.delay_slot + 15786 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15787 "00011100" // /* MW 3 */ + 15788 "11000110" // /* MW 2 */ + 15789 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 +.delay_slot + 15790 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15791 "00011100" // /* MW 3 */ + 15792 "11000110" // /* MW 2 */ + 15793 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 +.delay_slot + 15794 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15795 "00011100" // /* MW 3 */ + 15796 "11000110" // /* MW 2 */ + 15797 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 +.delay_slot + 15798 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15799 "00011100" // /* MW 3 */ + 15800 "11000110" // /* MW 2 */ + 15801 "00010000" // /* MW 1 */ +.delay_slot + 15802 "11111000" // MOV r2, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15803 "10100000" // /* MW 3 */ + 15804 "10011111" // /* MW 2 */ +.label _ZN12me_primitive10udiv_dstepEjjRjS0___end + 15805 "00011000" // /* MW 1 */ +.dir 0 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src" +.dir 1 "/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer" +.dir 2 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/conv" +.dir 3 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common" +.dir 4 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2" +.dir 5 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p" +.dir 6 "/usr/local/lib/python3.10/dist-packages/data/aie2p/lib" +.dir 7 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend" +.dir 8 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc" +.dir 9 "/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/detail" +.dir 10 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21708/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib" diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable5/Release/0_2_reloadable5.txt b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable5/Release/0_2_reloadable5.txt new file mode 100644 index 0000000000000000000000000000000000000000..eaa1644fb33f11a55e17a2e7f02cedec89cc05c6 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable5/Release/0_2_reloadable5.txt @@ -0,0 +1,5263 @@ +Contents of the .debug_line section: + +sigmoid_carf_templated_lut.h: +File name Line number Starting address View Stmt + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 205 0x2580 x +elementwise_binary_shared.h 211 0x2580 1 x +elementwise_binary_shared.h 216 0x2580 2 +elementwise_binary_shared.h 216 0x2580 3 +elementwise_binary_shared.h 216 0x258a +elementwise_binary_shared.h 211 0x2598 x +elementwise_binary_shared.h 212 0x259c x +elementwise_binary_shared.h 212 0x25ac +elementwise_binary_shared.h 213 0x25b0 x +elementwise_binary_shared.h 213 0x25c0 +elementwise_binary_shared.h 214 0x25c4 x +elementwise_binary_shared.h 214 0x25d4 +elementwise_binary_shared.h 216 0x25d8 x +elementwise_binary_shared.h 217 0x25dc x +elementwise_binary_shared.h 216 0x25e0 +elementwise_binary_shared.h 216 0x25e6 x +elementwise_binary_shared.h 216 0x25ea +elementwise_binary_shared.h 216 0x25ee +elementwise_binary_shared.h 107 0x2650 x +elementwise_binary_shared.h 119 0x2650 1 +elementwise_binary_shared.h 126 0x2650 2 +elementwise_binary_shared.h 131 0x2650 3 +elementwise_binary_shared.h 119 0x2654 x +elementwise_binary_shared.h 122 0x2658 x +elementwise_binary_shared.h 124 0x265c x +elementwise_binary_shared.h 124 0x2668 +elementwise_binary_shared.h 107 0x266c +elementwise_binary_shared.h 124 0x2672 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 65 0x2676 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 124 0x2676 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 65 0x2680 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 150 0x268c +elementwise_binary_shared.h 119 0x2692 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 56 0x2696 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 126 0x2696 1 +elementwise_binary_shared.h 126 0x2696 2 +elementwise_binary_shared.h 131 0x2696 3 +elementwise_binary_shared.h 131 0x2696 4 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x26a0 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 126 0x26a0 1 x +elementwise_binary_shared.h 131 0x26a0 2 x +elementwise_binary_shared.h 171 0x26a0 3 +elementwise_binary_shared.h 131 0x26b2 +elementwise_binary_shared.h 131 0x26b2 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x26b8 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x26b8 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 173 0x26b8 2 +elementwise_binary_shared.h 166 0x26bc + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x26c8 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 169 0x26c8 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x26da x +vector.hpp 1139 0x26e0 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 169 0x26e0 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x26e4 +vector.hpp 1159 0x26e4 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 171 0x26e4 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x26f6 +vector.hpp 1139 0x26f6 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x26f6 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 169 0x26f6 3 +elementwise_binary_shared.h 173 0x26f6 4 +elementwise_binary_shared.h 150 0x2710 +elementwise_binary_shared.h 150 0x2714 x +elementwise_binary_shared.h 150 0x2718 +elementwise_binary_shared.h 150 0x271e +elementwise_binary_shared.h 150 0x2724 +elementwise_binary_shared.h 166 0x2724 1 +elementwise_binary_shared.h 150 0x2730 +elementwise_binary_shared.h 150 0x2740 +elementwise_binary_shared.h 150 0x2740 1 +elementwise_binary_shared.h 150 0x2740 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x274a + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x274a 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 173 0x274a 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x274e + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 169 0x274e 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2752 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 171 0x2752 1 +elementwise_binary_shared.h 150 0x2758 +elementwise_binary_shared.h 150 0x275c +elementwise_binary_shared.h 150 0x275c 1 +elementwise_binary_shared.h 150 0x2762 +elementwise_binary_shared.h 150 0x2766 +elementwise_binary_shared.h 150 0x276c +elementwise_binary_shared.h 150 0x2774 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 56 0x2784 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x278a x +vector.hpp 1139 0x2790 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 166 0x2790 1 x +elementwise_binary_shared.h 166 0x2790 2 x +elementwise_binary_shared.h 169 0x2790 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x279c + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 166 0x279c 1 +elementwise_binary_shared.h 166 0x279c 2 +elementwise_binary_shared.h 171 0x279c 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x27a8 x +vector.hpp 1139 0x27a8 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x27a8 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 169 0x27a8 3 x +elementwise_binary_shared.h 173 0x27a8 4 x +elementwise_binary_shared.h 177 0x27a8 5 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x27b0 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 166 0x27b0 1 x +elementwise_binary_shared.h 171 0x27b0 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x27b8 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x27b8 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 173 0x27b8 2 x +elementwise_binary_shared.h 166 0x27be x +elementwise_binary_shared.h 166 0x27c2 +elementwise_binary_shared.h 177 0x27c2 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x27ca x +vector.hpp 1139 0x27ca 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 169 0x27ca 2 x +elementwise_binary_shared.h 171 0x27ca 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x27d0 +vector.hpp 1159 0x27d0 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x27d0 2 x +accum.hpp 1110 0x27d0 3 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 173 0x27d0 4 x +elementwise_binary_shared.h 185 0x27d0 5 +elementwise_binary_shared.h 177 0x27f0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2800 x +vector.hpp 1139 0x2800 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 169 0x2800 2 x +elementwise_binary_shared.h 171 0x2800 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2810 +vector.hpp 1159 0x2810 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x2810 2 x +accum.hpp 1110 0x2810 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 173 0x2810 4 x +elementwise_binary_shared.h 185 0x2810 5 x +elementwise_binary_shared.h 177 0x2830 x +elementwise_binary_shared.h 187 0x2840 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2846 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2846 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 185 0x2846 2 x +elementwise_binary_shared.h 177 0x284c x +elementwise_binary_shared.h 187 0x2852 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2856 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2856 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 185 0x2856 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2860 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2860 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 185 0x2860 2 +elementwise_binary_shared.h 205 0x2b00 x +elementwise_binary_shared.h 211 0x2b00 1 x +elementwise_binary_shared.h 216 0x2b00 2 +elementwise_binary_shared.h 216 0x2b00 3 +elementwise_binary_shared.h 216 0x2b0a +elementwise_binary_shared.h 211 0x2b18 x +elementwise_binary_shared.h 212 0x2b1c x +elementwise_binary_shared.h 212 0x2b2c +elementwise_binary_shared.h 213 0x2b30 x +elementwise_binary_shared.h 213 0x2b40 +elementwise_binary_shared.h 214 0x2b44 x +elementwise_binary_shared.h 214 0x2b54 +elementwise_binary_shared.h 216 0x2b58 x +elementwise_binary_shared.h 217 0x2b5c x +elementwise_binary_shared.h 216 0x2b60 +elementwise_binary_shared.h 216 0x2b66 x +elementwise_binary_shared.h 216 0x2b6a +elementwise_binary_shared.h 216 0x2b6e + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x32e0 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 199 0x32e0 1 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x32e4 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 204 0x32e4 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x32ea +io_buffer_main.h 125 0x32ea 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 204 0x32f0 x +conv2d_dw_bf16.h 221 0x32f4 x +conv2d_dw_bf16.h 221 0x32f8 +conv2d_dw_bf16.h 221 0x32fc +conv2d_dw_bf16.h 221 0x3300 +conv2d_dw_bf16.h 221 0x3304 +conv2d_dw_bf16.h 222 0x3308 x +conv2d_dw_bf16.h 222 0x330c +conv2d_dw_bf16.h 222 0x3310 +conv2d_dw_bf16.h 222 0x3314 +conv2d_dw_bf16.h 222 0x3318 +conv2d_dw_bf16.h 223 0x331c x +conv2d_dw_bf16.h 223 0x3320 +conv2d_dw_bf16.h 223 0x3324 +conv2d_dw_bf16.h 223 0x3328 +conv2d_dw_bf16.h 223 0x332c +conv2d_dw_bf16.h 224 0x3330 x +conv2d_dw_bf16.h 224 0x3334 +conv2d_dw_bf16.h 224 0x3338 +conv2d_dw_bf16.h 244 0x3338 1 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x3342 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3342 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x3342 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 244 0x3342 3 x +conv2d_dw_bf16.h 225 0x3348 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x334c +aie_core.h 81 0x334c 1 +aie_core.h 100 0x334c 2 +aie_core.h 100 0x334c 3 +aie_core.h 100 0x334c 4 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x334c 5 +vector.hpp 1139 0x334c 6 +vector.hpp 1139 0x334c 7 x +vector.hpp 1139 0x334c 8 x +vector.hpp 1159 0x334c 9 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x334c 10 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 204 0x334c 11 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x3358 +aie_core.h 81 0x3358 1 +aie_core.h 100 0x3358 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3358 3 +vector.hpp 1139 0x3358 4 +vector.hpp 1159 0x3358 5 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 204 0x3358 6 x +conv2d_dw_bf16.h 225 0x3358 7 x +conv2d_dw_bf16.h 244 0x3358 8 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x3366 +aie_core.h 100 0x3366 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3366 2 +vector.hpp 1159 0x3366 3 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 204 0x3366 4 +conv2d_dw_bf16.h 225 0x3366 5 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x3370 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3370 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 204 0x3370 2 +conv2d_dw_bf16.h 225 0x3370 3 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x337a x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x337a 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 244 0x337a 2 +conv2d_dw_bf16.h 244 0x337a 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3384 +shuffle.hpp 142 0x3384 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 244 0x3384 2 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x338a + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x338a 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 250 0x338a 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x3396 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3396 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 244 0x3396 2 x +conv2d_dw_bf16.h 250 0x3396 3 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x33a2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x33a2 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x33a2 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 270 0x33a8 +conv2d_dw_bf16.h 244 0x33ac + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x33b6 +shuffle.hpp 142 0x33b6 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 268 0x33b6 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x33c0 +shuffle.hpp 142 0x33c0 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 270 0x33c0 2 +conv2d_dw_bf16.h 271 0x33c0 3 +conv2d_dw_bf16.h 272 0x33c0 4 +conv2d_dw_bf16.h 273 0x33c0 5 +conv2d_dw_bf16.h 274 0x33c0 6 +conv2d_dw_bf16.h 275 0x33c0 7 +conv2d_dw_bf16.h 276 0x33c0 8 +conv2d_dw_bf16.h 277 0x33c0 9 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x33d0 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x33d0 1 +accum.hpp 1110 0x33d0 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 265 0x33d0 3 x +conv2d_dw_bf16.h 270 0x33d0 4 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x33e0 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x33e0 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x33e0 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 250 0x33e0 3 x +conv2d_dw_bf16.h 274 0x33e0 4 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x33f0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x33f0 1 x +vector.hpp 1139 0x33f0 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x33f0 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x33fa + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 266 0x33fa 1 x +conv2d_dw_bf16.h 271 0x33fa 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x3404 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3404 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3404 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 275 0x3404 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x340e +shuffle.hpp 142 0x3412 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 272 0x3412 1 x +conv2d_dw_bf16.h 267 0x341a x +conv2d_dw_bf16.h 276 0x341a 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3422 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 268 0x3426 x +conv2d_dw_bf16.h 273 0x3426 1 x +conv2d_dw_bf16.h 265 0x342e x +conv2d_dw_bf16.h 277 0x342e 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3436 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 270 0x3440 x +conv2d_dw_bf16.h 274 0x3450 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x3460 x +aie_core.h 100 0x3460 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3460 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 266 0x346a x +conv2d_dw_bf16.h 271 0x346a 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3472 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 275 0x3472 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x347a + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 267 0x347e x +conv2d_dw_bf16.h 272 0x347e 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3486 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 276 0x3486 1 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x3490 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3490 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3490 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 250 0x3496 x +conv2d_dw_bf16.h 273 0x3496 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x34a0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x34a0 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 250 0x34a0 2 +conv2d_dw_bf16.h 277 0x34a0 3 x +conv2d_dw_bf16.h 250 0x34ac x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x34b0 x +vector.hpp 1139 0x34b4 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x34b8 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x34b8 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x34bc x +accum.hpp 1110 0x34c0 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x34c4 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 286 0x34c8 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x34cc x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x34cc 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 285 0x34cc 2 x +conv2d_dw_bf16.h 268 0x34d4 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x34d8 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x34d8 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 270 0x34d8 2 +conv2d_dw_bf16.h 265 0x34e0 x +conv2d_dw_bf16.h 270 0x34e0 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x34e8 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 274 0x34e8 1 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x34f0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x34f0 1 x +vector.hpp 1139 0x34f0 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x34f0 3 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x34fa + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 266 0x34fa 1 x +conv2d_dw_bf16.h 271 0x34fa 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x3504 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3504 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3504 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 275 0x3504 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x350e +shuffle.hpp 142 0x3512 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 272 0x3512 1 x +conv2d_dw_bf16.h 267 0x351a x +conv2d_dw_bf16.h 276 0x351a 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3522 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 268 0x3526 x +conv2d_dw_bf16.h 273 0x3526 1 x +conv2d_dw_bf16.h 265 0x352e x +conv2d_dw_bf16.h 277 0x352e 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3536 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 270 0x353c x +conv2d_dw_bf16.h 274 0x3540 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x3550 x +aie_core.h 100 0x3550 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3550 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 244 0x3550 3 x +conv2d_dw_bf16.h 266 0x355c x +conv2d_dw_bf16.h 271 0x355c 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3564 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 275 0x3564 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x356c + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 267 0x3570 x +conv2d_dw_bf16.h 272 0x3570 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3578 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 276 0x3578 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3580 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 273 0x3584 x +conv2d_dw_bf16.h 277 0x3588 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x3594 x +accum.hpp 1110 0x3598 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 290 0x3598 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x359e x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 286 0x35a2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x35a6 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x35aa x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 285 0x35aa 1 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x35ae x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x35ae 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h: +conv2d_bf16_params.h 432 0xac0 x +conv2d_bf16_params.h 438 0xac0 1 x +conv2d_bf16_params.h 452 0xac0 2 +conv2d_bf16_params.h 453 0xac0 3 +conv2d_bf16_params.h 458 0xac0 4 +conv2d_bf16_params.h 470 0xac0 5 +conv2d_bf16_params.h 438 0xaca +conv2d_bf16_params.h 438 0xaca 1 x +conv2d_bf16_params.h 452 0xaca 2 +conv2d_bf16_params.h 462 0xaca 3 +conv2d_bf16_params.h 432 0xad4 +conv2d_bf16_params.h 444 0xad4 1 +conv2d_bf16_params.h 453 0xade +conv2d_bf16_params.h 458 0xade 1 +conv2d_bf16_params.h 458 0xade 2 +conv2d_bf16_params.h 444 0xaea +conv2d_bf16_params.h 470 0xaea 1 +conv2d_bf16_params.h 477 0xaea 2 +conv2d_bf16_params.h 557 0xaea 3 +conv2d_bf16_params.h 452 0xaf6 +conv2d_bf16_params.h 458 0xaf6 1 +conv2d_bf16_params.h 462 0xaf6 2 +conv2d_bf16_params.h 438 0xafe +conv2d_bf16_params.h 438 0xb02 +conv2d_bf16_params.h 438 0xb06 +conv2d_bf16_params.h 438 0xb0a +conv2d_bf16_params.h 438 0xb18 +conv2d_bf16_params.h 438 0xb1c +conv2d_bf16_params.h 438 0xb20 +conv2d_bf16_params.h 438 0xb24 +conv2d_bf16_params.h 438 0xb32 +conv2d_bf16_params.h 438 0xb36 +conv2d_bf16_params.h 438 0xb3a +conv2d_bf16_params.h 438 0xb3e +conv2d_bf16_params.h 438 0xb4c +conv2d_bf16_params.h 438 0xb50 +conv2d_bf16_params.h 444 0xb54 x +conv2d_bf16_params.h 447 0xb58 x +conv2d_bf16_params.h 448 0xb5c x +conv2d_bf16_params.h 452 0xb60 x +conv2d_bf16_params.h 453 0xb64 x +conv2d_bf16_params.h 458 0xb68 x +conv2d_bf16_params.h 444 0xb6e x +conv2d_bf16_params.h 458 0xb72 x +conv2d_bf16_params.h 462 0xb72 1 x +conv2d_bf16_params.h 462 0xb78 +conv2d_bf16_params.h 452 0xb7c x +conv2d_bf16_params.h 452 0xb80 +conv2d_bf16_params.h 462 0xb80 1 x +conv2d_bf16_params.h 557 0xb80 2 +conv2d_bf16_params.h 462 0xb86 +conv2d_bf16_params.h 458 0xb8a x +conv2d_bf16_params.h 458 0xb8e +conv2d_bf16_params.h 458 0xb92 +conv2d_bf16_params.h 477 0xb92 1 +conv2d_bf16_params.h 557 0xb92 2 x +conv2d_bf16_params.h 458 0xb98 x +conv2d_bf16_params.h 458 0xb9e +conv2d_bf16_params.h 477 0xb9e 1 x +conv2d_bf16_params.h 458 0xba4 x +conv2d_bf16_params.h 444 0xba8 x +conv2d_bf16_params.h 462 0xbac x +conv2d_bf16_params.h 470 0xbb0 x +conv2d_bf16_params.h 470 0xbb4 +conv2d_bf16_params.h 477 0xbb4 1 x +conv2d_bf16_params.h 477 0xbb8 +conv2d_bf16_params.h 491 0xbc8 +conv2d_bf16_params.h 492 0xbc8 1 +conv2d_bf16_params.h 495 0xbc8 2 +conv2d_bf16_params.h 502 0xbc8 3 +conv2d_bf16_params.h 533 0xbc8 4 +conv2d_bf16_params.h 539 0xbc8 5 +conv2d_bf16_params.h 557 0xbc8 6 +conv2d_bf16_params.h 621 0xbc8 7 +conv2d_bf16_params.h 645 0xbc8 8 +conv2d_bf16_params.h 709 0xbc8 9 +conv2d_bf16_params.h 477 0xbd2 +conv2d_bf16_params.h 481 0xbd2 1 +conv2d_bf16_params.h 500 0xbd2 2 +conv2d_bf16_params.h 506 0xbd2 3 +conv2d_bf16_params.h 507 0xbd2 4 +conv2d_bf16_params.h 524 0xbd2 5 +conv2d_bf16_params.h 539 0xbd2 6 +conv2d_bf16_params.h 655 0xbd2 7 +conv2d_bf16_params.h 477 0xbdc +conv2d_bf16_params.h 504 0xbdc 1 +conv2d_bf16_params.h 510 0xbdc 2 +conv2d_bf16_params.h 520 0xbdc 3 +conv2d_bf16_params.h 700 0xbdc 4 +conv2d_bf16_params.h 477 0xbe2 +conv2d_bf16_params.h 539 0xbe2 1 +conv2d_bf16_params.h 578 0xbe2 2 +conv2d_bf16_params.h 642 0xbe2 3 +conv2d_bf16_params.h 529 0xbe6 +conv2d_bf16_params.h 642 0xbe6 1 +conv2d_bf16_params.h 642 0xbe6 2 +conv2d_bf16_params.h 655 0xbea +conv2d_bf16_params.h 453 0xbf0 +conv2d_bf16_params.h 453 0xbf0 1 +conv2d_bf16_params.h 477 0xbf0 2 +conv2d_bf16_params.h 504 0xbf0 3 +conv2d_bf16_params.h 655 0xbf0 4 +conv2d_bf16_params.h 453 0xbfc x +conv2d_bf16_params.h 477 0xbfc 1 +conv2d_bf16_params.h 481 0xbfc 2 +conv2d_bf16_params.h 500 0xbfc 3 +conv2d_bf16_params.h 506 0xbfc 4 +conv2d_bf16_params.h 507 0xbfc 5 +conv2d_bf16_params.h 524 0xbfc 6 +conv2d_bf16_params.h 539 0xbfc 7 +conv2d_bf16_params.h 491 0xc06 +conv2d_bf16_params.h 492 0xc06 1 +conv2d_bf16_params.h 495 0xc06 2 +conv2d_bf16_params.h 502 0xc06 3 +conv2d_bf16_params.h 510 0xc06 4 +conv2d_bf16_params.h 520 0xc06 5 +conv2d_bf16_params.h 533 0xc06 6 +conv2d_bf16_params.h 539 0xc06 7 +conv2d_bf16_params.h 557 0xc06 8 +conv2d_bf16_params.h 621 0xc06 9 +conv2d_bf16_params.h 645 0xc06 10 +conv2d_bf16_params.h 655 0xc06 11 +conv2d_bf16_params.h 700 0xc06 12 +conv2d_bf16_params.h 709 0xc06 13 +conv2d_bf16_params.h 477 0xc10 +conv2d_bf16_params.h 529 0xc10 1 +conv2d_bf16_params.h 539 0xc10 2 +conv2d_bf16_params.h 578 0xc10 3 +conv2d_bf16_params.h 642 0xc10 4 +conv2d_bf16_params.h 642 0xc10 5 +conv2d_bf16_params.h 642 0xc10 6 +conv2d_bf16_params.h 477 0xc20 x +conv2d_bf16_params.h 495 0xc20 1 x +conv2d_bf16_params.h 495 0xc20 2 +conv2d_bf16_params.h 682 0xc20 3 +conv2d_bf16_params.h 477 0xc2a +conv2d_bf16_params.h 481 0xc2a 1 x +conv2d_bf16_params.h 495 0xc2a 2 +conv2d_bf16_params.h 495 0xc2a 3 +conv2d_bf16_params.h 477 0xc34 x +conv2d_bf16_params.h 496 0xc34 1 +conv2d_bf16_params.h 504 0xc34 2 +conv2d_bf16_params.h 539 0xc34 3 +conv2d_bf16_params.h 578 0xc34 4 +conv2d_bf16_params.h 496 0xc3e +conv2d_bf16_params.h 499 0xc3e 1 +conv2d_bf16_params.h 504 0xc3e 2 x +conv2d_bf16_params.h 509 0xc3e 3 +conv2d_bf16_params.h 519 0xc3e 4 +conv2d_bf16_params.h 700 0xc3e 5 +conv2d_bf16_params.h 492 0xc48 x +conv2d_bf16_params.h 497 0xc48 1 +conv2d_bf16_params.h 509 0xc48 2 +conv2d_bf16_params.h 500 0xc52 +conv2d_bf16_params.h 520 0xc52 1 x +conv2d_bf16_params.h 502 0xc58 +conv2d_bf16_params.h 520 0xc58 1 +conv2d_bf16_params.h 502 0xc62 +conv2d_bf16_params.h 507 0xc62 1 x +conv2d_bf16_params.h 495 0xc68 x +conv2d_bf16_params.h 495 0xc6c +conv2d_bf16_params.h 495 0xc6c 1 +conv2d_bf16_params.h 610 0xc6c 2 +conv2d_bf16_params.h 709 0xc6c 3 +conv2d_bf16_params.h 507 0xc72 x +conv2d_bf16_params.h 495 0xc76 x +conv2d_bf16_params.h 495 0xc7a +conv2d_bf16_params.h 506 0xc7a 1 +conv2d_bf16_params.h 519 0xc7a 2 x +conv2d_bf16_params.h 496 0xc84 x +conv2d_bf16_params.h 504 0xc84 1 x +conv2d_bf16_params.h 522 0xc84 2 +conv2d_bf16_params.h 509 0xc8e x +conv2d_bf16_params.h 496 0xc94 x +conv2d_bf16_params.h 520 0xc94 1 x +conv2d_bf16_params.h 529 0xc94 2 +conv2d_bf16_params.h 497 0xc9e x +conv2d_bf16_params.h 509 0xc9e 1 x +conv2d_bf16_params.h 533 0xc9e 2 +conv2d_bf16_params.h 539 0xca8 x +conv2d_bf16_params.h 499 0xcac x +conv2d_bf16_params.h 499 0xcb0 +conv2d_bf16_params.h 529 0xcb4 x +conv2d_bf16_params.h 507 0xcb8 x +conv2d_bf16_params.h 511 0xcb8 1 +conv2d_bf16_params.h 491 0xcbe x +conv2d_bf16_params.h 507 0xcbe 1 +conv2d_bf16_params.h 500 0xcc8 x +conv2d_bf16_params.h 511 0xcc8 1 x +conv2d_bf16_params.h 500 0xcce +conv2d_bf16_params.h 534 0xcce 1 +conv2d_bf16_params.h 502 0xcd6 x +conv2d_bf16_params.h 509 0xcd6 1 x +conv2d_bf16_params.h 642 0xcd6 2 +conv2d_bf16_params.h 510 0xce2 x +conv2d_bf16_params.h 506 0xce6 x +conv2d_bf16_params.h 527 0xcea x +conv2d_bf16_params.h 502 0xcf4 x +conv2d_bf16_params.h 502 0xcf8 +conv2d_bf16_params.h 506 0xcfc x +conv2d_bf16_params.h 506 0xd0c +conv2d_bf16_params.h 506 0xd10 +conv2d_bf16_params.h 510 0xd14 x +conv2d_bf16_params.h 510 0xd18 +conv2d_bf16_params.h 510 0xd1e +conv2d_bf16_params.h 510 0xd22 +conv2d_bf16_params.h 510 0xd28 +conv2d_bf16_params.h 539 0xd28 1 +conv2d_bf16_params.h 642 0xd28 2 +conv2d_bf16_params.h 511 0xd2e x +conv2d_bf16_params.h 524 0xd2e 1 +conv2d_bf16_params.h 539 0xd2e 2 +conv2d_bf16_params.h 512 0xd34 x +conv2d_bf16_params.h 524 0xd34 1 x +conv2d_bf16_params.h 524 0xd3a +conv2d_bf16_params.h 524 0xd3e +conv2d_bf16_params.h 520 0xd42 x +conv2d_bf16_params.h 511 0xd46 x +conv2d_bf16_params.h 522 0xd46 1 x +conv2d_bf16_params.h 524 0xd4c x +conv2d_bf16_params.h 529 0xd4c 1 x +conv2d_bf16_params.h 539 0xd4c 2 x +conv2d_bf16_params.h 534 0xd56 +conv2d_bf16_params.h 539 0xd56 1 +conv2d_bf16_params.h 527 0xd5c x +conv2d_bf16_params.h 533 0xd5c 1 x +conv2d_bf16_params.h 529 0xd6a x +conv2d_bf16_params.h 533 0xd6a 1 +conv2d_bf16_params.h 539 0xd70 x +conv2d_bf16_params.h 529 0xd76 x +conv2d_bf16_params.h 529 0xd76 1 +conv2d_bf16_params.h 529 0xd7c +conv2d_bf16_params.h 534 0xd80 x +conv2d_bf16_params.h 534 0xd84 +conv2d_bf16_params.h 539 0xd84 1 x +conv2d_bf16_params.h 555 0xd84 2 +conv2d_bf16_params.h 559 0xd84 3 +conv2d_bf16_params.h 700 0xd84 4 +conv2d_bf16_params.h 669 0xd8e +conv2d_bf16_params.h 700 0xd8e 1 +conv2d_bf16_params.h 539 0xd92 +conv2d_bf16_params.h 539 0xda2 +conv2d_bf16_params.h 539 0xdb2 +conv2d_bf16_params.h 539 0xdb2 1 +conv2d_bf16_params.h 539 0xdb2 2 +conv2d_bf16_params.h 539 0xdb2 3 +conv2d_bf16_params.h 539 0xdbc +conv2d_bf16_params.h 539 0xdc0 +conv2d_bf16_params.h 539 0xdc4 +conv2d_bf16_params.h 539 0xdc4 1 +conv2d_bf16_params.h 539 0xdca +conv2d_bf16_params.h 539 0xdce +conv2d_bf16_params.h 539 0xdd2 +conv2d_bf16_params.h 669 0xdd2 1 +conv2d_bf16_params.h 539 0xdd8 +conv2d_bf16_params.h 539 0xddc +conv2d_bf16_params.h 539 0xde0 +conv2d_bf16_params.h 539 0xde4 +conv2d_bf16_params.h 555 0xde8 x +conv2d_bf16_params.h 642 0xdf0 +conv2d_bf16_params.h 669 0xdf0 1 +conv2d_bf16_params.h 669 0xdf0 2 +conv2d_bf16_params.h 669 0xdfa x +conv2d_bf16_params.h 497 0xdfe x +conv2d_bf16_params.h 641 0xdfe 1 x +conv2d_bf16_params.h 645 0xdfe 2 +conv2d_bf16_params.h 559 0xe08 x +conv2d_bf16_params.h 640 0xe08 1 +conv2d_bf16_params.h 642 0xe08 2 +conv2d_bf16_params.h 642 0xe08 3 +conv2d_bf16_params.h 642 0xe12 x +conv2d_bf16_params.h 578 0xe16 x +conv2d_bf16_params.h 640 0xe1a x +conv2d_bf16_params.h 557 0xe1e +conv2d_bf16_params.h 645 0xe1e 1 +conv2d_bf16_params.h 641 0xe28 x +conv2d_bf16_params.h 642 0xe28 1 x +conv2d_bf16_params.h 642 0xe2e +conv2d_bf16_params.h 642 0xe2e 1 +conv2d_bf16_params.h 558 0xe32 x +conv2d_bf16_params.h 645 0xe32 1 +conv2d_bf16_params.h 540 0xe38 +conv2d_bf16_params.h 645 0xe38 1 x +conv2d_bf16_params.h 540 0xe3e x +conv2d_bf16_params.h 557 0xe3e 1 +conv2d_bf16_params.h 642 0xe44 x +conv2d_bf16_params.h 557 0xe48 x +conv2d_bf16_params.h 655 0xe48 1 +conv2d_bf16_params.h 558 0xe4e +conv2d_bf16_params.h 655 0xe4e 1 x +conv2d_bf16_params.h 558 0xe54 x +conv2d_bf16_params.h 540 0xe58 x +conv2d_bf16_params.h 655 0xe58 1 +conv2d_bf16_params.h 655 0xe58 2 +conv2d_bf16_params.h 679 0xe58 3 +conv2d_bf16_params.h 655 0xe62 x +conv2d_bf16_params.h 558 0xe66 x +conv2d_bf16_params.h 655 0xe66 1 +conv2d_bf16_params.h 655 0xe66 2 +conv2d_bf16_params.h 679 0xe66 3 +conv2d_bf16_params.h 655 0xe70 x +conv2d_bf16_params.h 126 0xe74 x +conv2d_bf16_params.h 559 0xe74 1 x +conv2d_bf16_params.h 669 0xe7a x +conv2d_bf16_params.h 700 0xe7a 1 +conv2d_bf16_params.h 558 0xe80 x +conv2d_bf16_params.h 700 0xe86 x +conv2d_bf16_params.h 578 0xe8a x +conv2d_bf16_params.h 559 0xe8e x +conv2d_bf16_params.h 578 0xe92 x +conv2d_bf16_params.h 610 0xe96 x +conv2d_bf16_params.h 611 0xe96 1 +conv2d_bf16_params.h 621 0xe96 2 +conv2d_bf16_params.h 621 0xe96 3 +conv2d_bf16_params.h 629 0xe96 4 +conv2d_bf16_params.h 621 0xea2 +conv2d_bf16_params.h 621 0xea2 1 x +conv2d_bf16_params.h 645 0xea2 2 +conv2d_bf16_params.h 649 0xea2 3 +conv2d_bf16_params.h 645 0xea8 +conv2d_bf16_params.h 554 0xeae x +conv2d_bf16_params.h 645 0xeae 1 x +conv2d_bf16_params.h 554 0xeb8 +conv2d_bf16_params.h 555 0xeb8 1 +conv2d_bf16_params.h 555 0xeb8 2 x +conv2d_bf16_params.h 645 0xeb8 3 +conv2d_bf16_params.h 555 0xec4 +conv2d_bf16_params.h 621 0xec4 1 +conv2d_bf16_params.h 621 0xec4 2 x +conv2d_bf16_params.h 645 0xec4 3 +conv2d_bf16_params.h 558 0xece x +conv2d_bf16_params.h 559 0xece 1 +conv2d_bf16_params.h 621 0xece 2 +conv2d_bf16_params.h 621 0xece 3 +conv2d_bf16_params.h 645 0xece 4 +conv2d_bf16_params.h 559 0xeda x +conv2d_bf16_params.h 621 0xeda 1 x +conv2d_bf16_params.h 645 0xeda 2 x +conv2d_bf16_params.h 610 0xee0 x +conv2d_bf16_params.h 621 0xee0 1 +conv2d_bf16_params.h 655 0xee0 2 +conv2d_bf16_params.h 679 0xee0 3 +conv2d_bf16_params.h 621 0xeec +conv2d_bf16_params.h 649 0xeec 1 +conv2d_bf16_params.h 655 0xeec 2 x +conv2d_bf16_params.h 661 0xeec 3 +conv2d_bf16_params.h 127 0xef6 x +conv2d_bf16_params.h 127 0xef6 1 x +conv2d_bf16_params.h 621 0xef6 2 +conv2d_bf16_params.h 649 0xef6 3 +conv2d_bf16_params.h 655 0xef6 4 +conv2d_bf16_params.h 679 0xef6 5 +conv2d_bf16_params.h 710 0xef6 6 +conv2d_bf16_params.h 710 0xef6 7 +conv2d_bf16_params.h 655 0xf00 x +conv2d_bf16_params.h 679 0xf00 1 x +conv2d_bf16_params.h 621 0xf06 x +conv2d_bf16_params.h 649 0xf06 1 x +conv2d_bf16_params.h 655 0xf06 2 +conv2d_bf16_params.h 655 0xf06 3 +conv2d_bf16_params.h 700 0xf06 4 +conv2d_bf16_params.h 700 0xf06 5 +conv2d_bf16_params.h 655 0xf10 x +conv2d_bf16_params.h 700 0xf10 1 x +conv2d_bf16_params.h 629 0xf14 x +conv2d_bf16_params.h 611 0xf18 x +conv2d_bf16_params.h 643 0xf26 x +conv2d_bf16_params.h 664 0xf2a +conv2d_bf16_params.h 621 0xf30 x +conv2d_bf16_params.h 629 0xf30 1 +conv2d_bf16_params.h 684 0xf30 2 x +conv2d_bf16_params.h 629 0xf3a x +conv2d_bf16_params.h 127 0xf40 x +conv2d_bf16_params.h 644 0xf40 1 +conv2d_bf16_params.h 700 0xf40 2 x +conv2d_bf16_params.h 705 0xf40 3 +conv2d_bf16_params.h 705 0xf40 4 +conv2d_bf16_params.h 645 0xf4a x +conv2d_bf16_params.h 700 0xf4a 1 +conv2d_bf16_params.h 700 0xf4a 2 +conv2d_bf16_params.h 705 0xf4a 3 +conv2d_bf16_params.h 644 0xf54 +conv2d_bf16_params.h 649 0xf54 1 x +conv2d_bf16_params.h 674 0xf54 2 +conv2d_bf16_params.h 644 0xf5e x +conv2d_bf16_params.h 662 0xf5e 1 +conv2d_bf16_params.h 664 0xf5e 2 x +conv2d_bf16_params.h 127 0xf68 x +conv2d_bf16_params.h 663 0xf68 1 x +conv2d_bf16_params.h 664 0xf68 2 +conv2d_bf16_params.h 126 0xf6e x +conv2d_bf16_params.h 664 0xf6e 1 x +conv2d_bf16_params.h 126 0xf74 +conv2d_bf16_params.h 664 0xf74 1 +conv2d_bf16_params.h 127 0xf7a x +conv2d_bf16_params.h 127 0xf7a 1 x +conv2d_bf16_params.h 664 0xf7a 2 +conv2d_bf16_params.h 664 0xf7a 3 +conv2d_bf16_params.h 675 0xf7a 4 +conv2d_bf16_params.h 696 0xf7a 5 +conv2d_bf16_params.h 644 0xf84 x +conv2d_bf16_params.h 664 0xf84 1 x +conv2d_bf16_params.h 705 0xf84 2 +conv2d_bf16_params.h 664 0xf8e +conv2d_bf16_params.h 705 0xf8e 1 x +conv2d_bf16_params.h 705 0xf8e 2 x +conv2d_bf16_params.h 127 0xf94 +conv2d_bf16_params.h 674 0xf94 1 x +conv2d_bf16_params.h 675 0xf94 2 x +conv2d_bf16_params.h 682 0xf94 3 +conv2d_bf16_params.h 718 0xf94 4 +conv2d_bf16_params.h 720 0xf94 5 +conv2d_bf16_params.h 127 0xf9e x +conv2d_bf16_params.h 642 0xf9e 1 +conv2d_bf16_params.h 675 0xf9e 2 +conv2d_bf16_params.h 675 0xfa8 x +conv2d_bf16_params.h 707 0xfa8 1 x +conv2d_bf16_params.h 642 0xfae +conv2d_bf16_params.h 674 0xfae 1 x +conv2d_bf16_params.h 675 0xfae 2 +conv2d_bf16_params.h 642 0xfb8 x +conv2d_bf16_params.h 655 0xfb8 1 +conv2d_bf16_params.h 655 0xfb8 2 +conv2d_bf16_params.h 675 0xfb8 3 x +conv2d_bf16_params.h 679 0xfb8 4 +conv2d_bf16_params.h 679 0xfb8 5 +conv2d_bf16_params.h 655 0xfc4 x +conv2d_bf16_params.h 679 0xfc4 1 x +conv2d_bf16_params.h 713 0xfc4 2 +conv2d_bf16_params.h 691 0xfca x +conv2d_bf16_params.h 675 0xfce +conv2d_bf16_params.h 675 0xfce 1 x +conv2d_bf16_params.h 709 0xfce 2 x +conv2d_bf16_params.h 675 0xfd8 +conv2d_bf16_params.h 706 0xfd8 1 x +conv2d_bf16_params.h 706 0xfd8 2 +conv2d_bf16_params.h 709 0xfd8 3 +conv2d_bf16_params.h 682 0xfe4 x +conv2d_bf16_params.h 706 0xfe4 1 +conv2d_bf16_params.h 126 0xfea x +conv2d_bf16_params.h 696 0xfea 1 x +conv2d_bf16_params.h 127 0xff0 x +conv2d_bf16_params.h 127 0xff0 1 x +conv2d_bf16_params.h 696 0xff0 2 +conv2d_bf16_params.h 696 0xff6 x +conv2d_bf16_params.h 713 0xff6 1 x +conv2d_bf16_params.h 696 0xffc +conv2d_bf16_params.h 706 0xffc 1 +conv2d_bf16_params.h 706 0xffc 2 x +conv2d_bf16_params.h 706 0x1006 +conv2d_bf16_params.h 696 0x100a x +conv2d_bf16_params.h 707 0x100a 1 x +conv2d_bf16_params.h 696 0x1010 +conv2d_bf16_params.h 709 0x1010 1 x +conv2d_bf16_params.h 696 0x1016 x +conv2d_bf16_params.h 709 0x1016 1 +conv2d_bf16_params.h 707 0x1020 x +conv2d_bf16_params.h 708 0x1020 1 +conv2d_bf16_params.h 710 0x1020 2 x +conv2d_bf16_params.h 710 0x1020 3 x +conv2d_bf16_params.h 708 0x102c x +conv2d_bf16_params.h 713 0x102c 1 x +conv2d_bf16_params.h 709 0x1036 x +conv2d_bf16_params.h 800 0x1036 1 x +conv2d_bf16_params.h 710 0x103c x +conv2d_bf16_params.h 718 0x1044 x +conv2d_bf16_params.h 718 0x1048 +conv2d_bf16_params.h 720 0x104c x +conv2d_bf16_params.h 800 0x104c 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 526 0x1060 +utils.h 531 0x1060 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 689 0x1060 2 x +conv2d_bf16.h 698 0x1060 3 +conv2d_bf16.h 704 0x1060 4 +conv2d_bf16.h 707 0x1060 5 +conv2d_bf16.h 707 0x1060 6 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 526 0x106c +utils.h 526 0x106c 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 698 0x106c 2 x +conv2d_bf16.h 704 0x106c 3 x +conv2d_bf16.h 707 0x106c 4 +conv2d_bf16.h 707 0x106c 5 +conv2d_bf16.h 698 0x107a +conv2d_bf16.h 702 0x107a 1 +conv2d_bf16.h 698 0x1084 +conv2d_bf16.h 702 0x1084 1 x +conv2d_bf16.h 699 0x108e x +conv2d_bf16.h 702 0x108e 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 531 0x1098 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 702 0x1098 1 x +conv2d_bf16.h 702 0x109e + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 531 0x10a6 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 705 0x10a6 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 526 0x10ac x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 704 0x10b0 x +conv2d_bf16.h 702 0x10b4 x +conv2d_bf16.h 705 0x10b4 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 526 0x10ba x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 707 0x10ba 1 +conv2d_bf16.h 707 0x10ba 2 +conv2d_bf16.h 704 0x10c0 x +conv2d_bf16.h 705 0x10c6 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 350 0x10d0 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 526 0x10d0 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 705 0x10d0 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 350 0x10e0 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 706 0x10e0 1 x +conv2d_bf16.h 704 0x10f0 x +conv2d_bf16.h 705 0x1100 x +conv2d_bf16.h 707 0x1100 1 x +conv2d_bf16.h 707 0x1100 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 350 0x1110 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 526 0x1110 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 705 0x1110 2 +conv2d_bf16.h 708 0x1110 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 350 0x1120 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 531 0x1120 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 706 0x1120 2 x +conv2d_bf16.h 707 0x1132 x +conv2d_bf16.h 707 0x1132 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 350 0x1136 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 705 0x1136 1 x +conv2d_bf16.h 708 0x1136 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 350 0x113e + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 706 0x113e 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 531 0x1142 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 350 0x1146 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 705 0x1146 1 x +conv2d_bf16.h 707 0x1146 2 x +conv2d_bf16.h 707 0x1146 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 350 0x114e + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 706 0x114e 1 x +conv2d_bf16.h 708 0x114e 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 531 0x1156 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 707 0x115a x +conv2d_bf16.h 707 0x115a 1 x +conv2d_bf16.h 723 0x115a 2 x +conv2d_bf16.h 708 0x1160 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 531 0x1164 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x1170 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1836 0x1170 1 +conv2d_bf16.h 1836 0x1170 2 x +conv2d_bf16.h 1836 0x1170 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h: +conv2d_bf16_params.h 240 0x1170 4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1836 0x117e + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h: +conv2d_bf16_params.h 241 0x117e 1 +conv2d_bf16_params.h 242 0x117e 2 +conv2d_bf16_params.h 250 0x117e 3 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 866 0x118a +conv2d_bf16.h 876 0x118a 1 +conv2d_bf16.h 876 0x118a 2 +conv2d_bf16.h 881 0x118a 3 +conv2d_bf16.h 1836 0x118a 4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h: +conv2d_bf16_params.h 242 0x118a 5 +conv2d_bf16_params.h 242 0x118a 6 +conv2d_bf16_params.h 242 0x118a 7 +conv2d_bf16_params.h 242 0x118a 8 +conv2d_bf16_params.h 242 0x118a 9 +conv2d_bf16_params.h 243 0x118a 10 +conv2d_bf16_params.h 245 0x118a 11 +conv2d_bf16_params.h 250 0x118a 12 +conv2d_bf16_params.h 250 0x118a 13 +conv2d_bf16_params.h 240 0x1196 +conv2d_bf16_params.h 240 0x1196 1 x +conv2d_bf16_params.h 242 0x11a2 +conv2d_bf16_params.h 245 0x11a2 1 +conv2d_bf16_params.h 242 0x11ae +conv2d_bf16_params.h 244 0x11ae 1 +conv2d_bf16_params.h 244 0x11ae 2 +conv2d_bf16_params.h 249 0x11ae 3 +conv2d_bf16_params.h 243 0x11ba +conv2d_bf16_params.h 244 0x11ba 1 +conv2d_bf16_params.h 250 0x11ba 2 +conv2d_bf16_params.h 244 0x11c6 +conv2d_bf16_params.h 240 0x11d4 +conv2d_bf16_params.h 240 0x11d8 +conv2d_bf16_params.h 241 0x11d8 1 x +conv2d_bf16_params.h 242 0x11de x +conv2d_bf16_params.h 242 0x11de 1 x +conv2d_bf16_params.h 245 0x11e4 x +conv2d_bf16_params.h 242 0x11f2 x +conv2d_bf16_params.h 242 0x11f6 +conv2d_bf16_params.h 242 0x11fa +conv2d_bf16_params.h 241 0x11fe x +conv2d_bf16_params.h 242 0x11fe 1 +conv2d_bf16_params.h 242 0x1204 x +conv2d_bf16_params.h 242 0x1208 +conv2d_bf16_params.h 242 0x120c +conv2d_bf16_params.h 242 0x1210 +conv2d_bf16_params.h 242 0x1210 1 +conv2d_bf16_params.h 242 0x1216 +conv2d_bf16_params.h 243 0x121a x +conv2d_bf16_params.h 242 0x121e x +conv2d_bf16_params.h 243 0x121e 1 +conv2d_bf16_params.h 244 0x1224 x +conv2d_bf16_params.h 245 0x1224 1 x +conv2d_bf16_params.h 244 0x1236 +conv2d_bf16_params.h 244 0x1236 1 +conv2d_bf16_params.h 245 0x123c +conv2d_bf16_params.h 244 0x1242 +conv2d_bf16_params.h 244 0x1246 +conv2d_bf16_params.h 244 0x124a +conv2d_bf16_params.h 244 0x124e +conv2d_bf16_params.h 244 0x1252 +conv2d_bf16_params.h 245 0x1256 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 876 0x1268 +conv2d_bf16.h 876 0x1268 1 +conv2d_bf16.h 1849 0x1276 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h: +conv2d_bf16_params.h 250 0x1280 x +conv2d_bf16_params.h 250 0x1280 1 +conv2d_bf16_params.h 250 0x128c +conv2d_bf16_params.h 250 0x1290 +conv2d_bf16_params.h 250 0x1294 +conv2d_bf16_params.h 250 0x1298 +conv2d_bf16_params.h 250 0x1298 1 +conv2d_bf16_params.h 250 0x129e +conv2d_bf16_params.h 249 0x12a2 x +conv2d_bf16_params.h 249 0x12a6 +conv2d_bf16_params.h 250 0x12aa x +conv2d_bf16_params.h 258 0x12b0 x +conv2d_bf16_params.h 259 0x12c8 +conv2d_bf16_params.h 259 0x12ce x +conv2d_bf16_params.h 259 0x12d2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1841 0x12e0 x +conv2d_bf16.h 1849 0x12e0 1 +conv2d_bf16.h 1849 0x12e0 2 x +conv2d_bf16.h 876 0x12ea +conv2d_bf16.h 881 0x12ea 1 +conv2d_bf16.h 1841 0x12ea 2 +conv2d_bf16.h 1842 0x12ea 3 +conv2d_bf16.h 1842 0x12ea 4 +conv2d_bf16.h 1842 0x12ea 5 +conv2d_bf16.h 1845 0x12ea 6 +conv2d_bf16.h 1849 0x12ea 7 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h: +conv2d_bf16_params.h 243 0x12ea 8 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1842 0x12f4 x +conv2d_bf16.h 1842 0x12f4 1 +conv2d_bf16.h 1849 0x12f4 2 +conv2d_bf16.h 862 0x1300 +conv2d_bf16.h 1842 0x1300 1 +conv2d_bf16.h 1845 0x1300 2 +conv2d_bf16.h 1845 0x130c x +conv2d_bf16.h 862 0x1310 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x1314 x +io_buffer_main.h 125 0x1318 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1841 0x1318 1 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x131e x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1842 0x1322 x +conv2d_bf16.h 1845 0x1328 x +conv2d_bf16.h 866 0x132c x +conv2d_bf16.h 866 0x1330 +conv2d_bf16.h 1842 0x1336 x +conv2d_bf16.h 1842 0x1336 1 x +conv2d_bf16.h 1842 0x133c +conv2d_bf16.h 1845 0x133c 1 x +conv2d_bf16.h 1841 0x1342 x +conv2d_bf16.h 881 0x134a +conv2d_bf16.h 885 0x134a 1 +conv2d_bf16.h 1845 0x134e x +conv2d_bf16.h 867 0x1352 +conv2d_bf16.h 867 0x1358 +conv2d_bf16.h 867 0x1358 1 x +conv2d_bf16.h 867 0x1360 +conv2d_bf16.h 867 0x1366 +conv2d_bf16.h 867 0x1372 +conv2d_bf16.h 867 0x1372 1 +conv2d_bf16.h 867 0x1378 +conv2d_bf16.h 867 0x137c +conv2d_bf16.h 867 0x1382 +conv2d_bf16.h 867 0x138a +conv2d_bf16.h 881 0x13a0 +conv2d_bf16.h 883 0x13a0 1 +conv2d_bf16.h 884 0x13a0 2 +conv2d_bf16.h 876 0x13ac x +conv2d_bf16.h 876 0x13ac 1 x +conv2d_bf16.h 881 0x13ac 2 x +conv2d_bf16.h 883 0x13ac 3 +conv2d_bf16.h 884 0x13ac 4 +conv2d_bf16.h 885 0x13b8 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h: +conv2d_bf16_params.h 243 0x13b8 1 +conv2d_bf16_params.h 243 0x13b8 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 883 0x13c2 x +conv2d_bf16.h 884 0x13c8 x +conv2d_bf16.h 876 0x13ce x +conv2d_bf16.h 876 0x13d2 +conv2d_bf16.h 881 0x13d6 x +conv2d_bf16.h 881 0x13da + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h: +conv2d_bf16_params.h 243 0x13da 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 881 0x13e0 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h: +conv2d_bf16_params.h 243 0x13e0 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 885 0x13f0 +conv2d_bf16.h 885 0x13f4 x +conv2d_bf16.h 885 0x13fe +conv2d_bf16.h 885 0x1402 +conv2d_bf16.h 885 0x1406 +conv2d_bf16.h 896 0x1410 +conv2d_bf16.h 1115 0x1410 1 +conv2d_bf16.h 1115 0x1410 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x141a +vector.hpp 1152 0x141a 1 +vector.hpp 1152 0x141a 2 +vector.hpp 1152 0x141a 3 +vector.hpp 1152 0x141a 4 +vector.hpp 1152 0x141a 5 +vector.hpp 1152 0x141a 6 +vector.hpp 1152 0x141a 7 +vector.hpp 1152 0x141a 8 +vector.hpp 1152 0x141a 9 +vector.hpp 1152 0x141a 10 +vector.hpp 1152 0x141a 11 +vector.hpp 1152 0x141a 12 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x141a 13 +accum.hpp 149 0x141a 14 +accum.hpp 149 0x141a 15 +accum.hpp 149 0x141a 16 +accum.hpp 149 0x141a 17 +accum.hpp 149 0x141a 18 +accum.hpp 149 0x141a 19 +accum.hpp 149 0x141a 20 +accum.hpp 149 0x141a 21 +accum.hpp 149 0x141a 22 +accum.hpp 149 0x141a 23 +accum.hpp 149 0x141a 24 +accum.hpp 149 0x141a 25 +accum.hpp 149 0x141a 26 +accum.hpp 149 0x141a 27 +accum.hpp 149 0x141a 28 +accum.hpp 1110 0x141a 29 +accum.hpp 1110 0x141a 30 +accum.hpp 1110 0x141a 31 +accum.hpp 1110 0x141a 32 +accum.hpp 1110 0x141a 33 +accum.hpp 1110 0x141a 34 +accum.hpp 1110 0x141a 35 +accum.hpp 1110 0x141a 36 +accum.hpp 1110 0x141a 37 +accum.hpp 1110 0x141a 38 +accum.hpp 1110 0x141a 39 +accum.hpp 1110 0x141a 40 +accum.hpp 1110 0x141a 41 +accum.hpp 1110 0x141a 42 +accum.hpp 1110 0x141a 43 +accum.hpp 1110 0x141a 44 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 886 0x141a 45 +conv2d_bf16.h 896 0x141a 46 x +conv2d_bf16.h 1123 0x141a 47 +conv2d_bf16.h 896 0x1420 +conv2d_bf16.h 896 0x1424 +conv2d_bf16.h 896 0x1428 +conv2d_bf16.h 896 0x142c +conv2d_bf16.h 896 0x1430 +conv2d_bf16.h 896 0x1434 +conv2d_bf16.h 897 0x1438 x +conv2d_bf16.h 897 0x143c +conv2d_bf16.h 897 0x1440 +conv2d_bf16.h 897 0x1444 +conv2d_bf16.h 897 0x1448 +conv2d_bf16.h 897 0x144c +conv2d_bf16.h 897 0x1450 +conv2d_bf16.h 898 0x1454 x +conv2d_bf16.h 898 0x1458 +conv2d_bf16.h 898 0x145c +conv2d_bf16.h 898 0x1460 +conv2d_bf16.h 898 0x1464 +conv2d_bf16.h 898 0x1468 +conv2d_bf16.h 1115 0x146c x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 56 0x1470 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 898 0x1474 x +conv2d_bf16.h 1115 0x1480 x +conv2d_bf16.h 1115 0x1484 +conv2d_bf16.h 886 0x148a +conv2d_bf16.h 886 0x1490 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 56 0x1494 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1123 0x149c +conv2d_bf16.h 1123 0x149c 1 +conv2d_bf16.h 1123 0x149c 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x14a6 +aie_core.h 100 0x14a6 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x14a6 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x14a6 3 +accum.hpp 946 0x14a6 4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1123 0x14a6 5 +conv2d_bf16.h 1125 0x14a6 6 +conv2d_bf16.h 1154 0x14a6 7 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x14b0 +aie_core.h 100 0x14b0 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x14b0 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x14b0 3 +accum.hpp 946 0x14b0 4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1123 0x14b0 5 +conv2d_bf16.h 1125 0x14b0 6 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x14ba +aie_core.h 100 0x14ba 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x14ba 2 +vector.hpp 1152 0x14ba 3 +vector.hpp 1152 0x14ba 4 +vector.hpp 1152 0x14ba 5 +vector.hpp 1152 0x14ba 6 +vector.hpp 1152 0x14ba 7 +vector.hpp 1152 0x14ba 8 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x14ba 9 +accum.hpp 149 0x14ba 10 +accum.hpp 149 0x14ba 11 +accum.hpp 149 0x14ba 12 +accum.hpp 149 0x14ba 13 +accum.hpp 149 0x14ba 14 +accum.hpp 149 0x14ba 15 +accum.hpp 149 0x14ba 16 +accum.hpp 578 0x14ba 17 +accum.hpp 946 0x14ba 18 +accum.hpp 1110 0x14ba 19 +accum.hpp 1110 0x14ba 20 +accum.hpp 1110 0x14ba 21 +accum.hpp 1110 0x14ba 22 +accum.hpp 1110 0x14ba 23 +accum.hpp 1110 0x14ba 24 +accum.hpp 1110 0x14ba 25 +accum.hpp 1110 0x14ba 26 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 746 0x14ba 27 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x14c6 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 738 0x14c6 1 +conv2d_bf16.h 1187 0x14c6 2 +conv2d_bf16.h 1199 0x14c6 3 +conv2d_bf16.h 1200 0x14c6 4 +conv2d_bf16.h 1201 0x14c6 5 +conv2d_bf16.h 1202 0x14c6 6 +conv2d_bf16.h 1143 0x14d2 +conv2d_bf16.h 1218 0x14d2 1 +conv2d_bf16.h 749 0x14dc +conv2d_bf16.h 750 0x14dc 1 +conv2d_bf16.h 751 0x14dc 2 +conv2d_bf16.h 752 0x14dc 3 +conv2d_bf16.h 1123 0x14dc 4 +conv2d_bf16.h 736 0x14e6 +conv2d_bf16.h 738 0x14e6 1 +conv2d_bf16.h 1123 0x14e6 2 +conv2d_bf16.h 1873 0x14e6 3 +conv2d_bf16.h 1125 0x14f2 x +conv2d_bf16.h 1125 0x14f6 +conv2d_bf16.h 1125 0x14fa +conv2d_bf16.h 1149 0x14fe x +conv2d_bf16.h 1154 0x1502 x +conv2d_bf16.h 743 0x1506 x +conv2d_bf16.h 745 0x150a x +conv2d_bf16.h 746 0x150e x +conv2d_bf16.h 1125 0x150e 1 x +conv2d_bf16.h 1143 0x1514 x +conv2d_bf16.h 1206 0x1518 x +conv2d_bf16.h 1149 0x151c +conv2d_bf16.h 1154 0x1524 +conv2d_bf16.h 1125 0x1528 x +conv2d_bf16.h 1149 0x152c x +conv2d_bf16.h 1154 0x1530 x +conv2d_bf16.h 1287 0x1536 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1540 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1540 1 x +accum.hpp 946 0x1540 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 736 0x1540 3 +conv2d_bf16.h 738 0x1540 4 +conv2d_bf16.h 1147 0x1540 5 x +conv2d_bf16.h 1187 0x1540 6 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x154c + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x154c 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x154c 2 +accum.hpp 946 0x154c 3 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 736 0x154c 4 x +conv2d_bf16.h 738 0x154c 5 x +conv2d_bf16.h 1188 0x154c 6 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1558 +aie_core.h 100 0x1558 1 +aie_core.h 100 0x1558 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1558 3 +vector.hpp 1139 0x1558 4 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1558 5 +accum.hpp 578 0x1558 6 +accum.hpp 946 0x1558 7 +accum.hpp 946 0x1558 8 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 737 0x1558 9 x +conv2d_bf16.h 742 0x1558 10 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1564 +aie_core.h 100 0x1564 1 +aie_core.h 100 0x1564 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1564 3 +vector.hpp 1139 0x1564 4 +vector.hpp 1139 0x1564 5 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1564 6 +accum.hpp 578 0x1564 7 +accum.hpp 578 0x1564 8 x +accum.hpp 946 0x1564 9 +accum.hpp 946 0x1564 10 +accum.hpp 946 0x1564 11 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 740 0x1564 12 x +conv2d_bf16.h 1149 0x1564 13 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1570 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1570 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1570 2 +accum.hpp 946 0x1570 3 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x1570 4 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 743 0x1570 5 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x157a x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x157a 1 x +accum.hpp 946 0x157a 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 736 0x157a 3 x +conv2d_bf16.h 1152 0x157a 4 x +conv2d_bf16.h 1206 0x157a 5 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1586 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1586 1 +accum.hpp 946 0x1586 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 737 0x1586 3 x +conv2d_bf16.h 1154 0x1586 4 x +conv2d_bf16.h 1206 0x1586 5 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1592 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1592 1 x +accum.hpp 946 0x1592 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 740 0x1592 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1598 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1598 1 +accum.hpp 946 0x1598 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x1598 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1157 0x1598 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x159e x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x159e 1 x +accum.hpp 946 0x159e 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 736 0x159e 3 x +conv2d_bf16.h 1159 0x159e 4 x +conv2d_bf16.h 737 0x15a4 x +conv2d_bf16.h 738 0x15a4 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x15aa x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x15aa 1 x +accum.hpp 946 0x15aa 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 740 0x15aa 3 x +conv2d_bf16.h 1192 0x15aa 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x15b0 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x15b0 1 +accum.hpp 946 0x15b0 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x15b0 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 745 0x15b0 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x15ba +vector.hpp 1139 0x15ba 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x15ba 2 +accum.hpp 578 0x15ba 3 x +accum.hpp 946 0x15ba 4 +accum.hpp 946 0x15ba 5 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 736 0x15ba 6 x +conv2d_bf16.h 746 0x15ba 7 x +conv2d_bf16.h 1162 0x15ba 8 +conv2d_bf16.h 737 0x15c6 x +conv2d_bf16.h 742 0x15c6 1 x +conv2d_bf16.h 749 0x15c6 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x15d0 x +aie_core.h 143 0x15d0 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x15d0 2 x +vector.hpp 1152 0x15d0 3 +vector.hpp 1152 0x15d0 4 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x15d0 5 x +accum.hpp 946 0x15d0 6 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 750 0x15d0 7 x +conv2d_bf16.h 1286 0x15d0 8 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x15de + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x15de 1 +vector.hpp 1139 0x15de 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x15de 3 +accum.hpp 578 0x15de 4 +accum.hpp 946 0x15de 5 +accum.hpp 946 0x15de 6 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 745 0x15de 7 x +conv2d_bf16.h 751 0x15de 8 x +conv2d_bf16.h 1162 0x15de 9 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x15ec + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x15ec 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x15ec 2 +accum.hpp 946 0x15ec 3 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 743 0x15ec 4 x +conv2d_bf16.h 746 0x15ec 5 x +conv2d_bf16.h 1199 0x15ec 6 x +conv2d_bf16.h 738 0x15fa x +conv2d_bf16.h 1200 0x15fa 1 x +conv2d_bf16.h 742 0x1602 x +conv2d_bf16.h 1201 0x1602 1 x +conv2d_bf16.h 743 0x160a x +conv2d_bf16.h 752 0x160a 1 x +conv2d_bf16.h 738 0x1612 x +conv2d_bf16.h 740 0x1612 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x1618 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 736 0x161c x +conv2d_bf16.h 742 0x161c 1 x +conv2d_bf16.h 1202 0x161c 2 x +conv2d_bf16.h 1206 0x161c 3 x +conv2d_bf16.h 737 0x1628 x +conv2d_bf16.h 743 0x1628 1 x +conv2d_bf16.h 749 0x1628 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x1632 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 738 0x1632 1 x +conv2d_bf16.h 740 0x1632 2 x +conv2d_bf16.h 751 0x1632 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x1640 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 745 0x1640 1 x +conv2d_bf16.h 750 0x1640 2 x +conv2d_bf16.h 736 0x1650 x +conv2d_bf16.h 742 0x1650 1 x +conv2d_bf16.h 746 0x1650 2 x +conv2d_bf16.h 752 0x1650 3 x +conv2d_bf16.h 737 0x1660 x +conv2d_bf16.h 743 0x1660 1 x +conv2d_bf16.h 749 0x1660 2 x +conv2d_bf16.h 738 0x1670 x +conv2d_bf16.h 740 0x1670 1 x +conv2d_bf16.h 751 0x1670 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x1680 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 745 0x1680 1 x +conv2d_bf16.h 750 0x1680 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1690 +aie_core.h 100 0x1690 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1690 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1690 3 +accum.hpp 946 0x1690 4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 742 0x1690 5 x +conv2d_bf16.h 746 0x1690 6 x +conv2d_bf16.h 752 0x1690 7 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x169e + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x169e 1 +vector.hpp 1152 0x169e 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 743 0x169e 3 x +conv2d_bf16.h 749 0x169e 4 x +conv2d_bf16.h 1286 0x169e 5 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x16ac + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x16ac 1 +vector.hpp 1152 0x16ac 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 751 0x16ac 3 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x16b6 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 745 0x16b6 1 x +conv2d_bf16.h 750 0x16b6 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x16c0 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 746 0x16c0 1 x +conv2d_bf16.h 752 0x16c0 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x16ca +vector.hpp 1152 0x16ca 1 +vector.hpp 1152 0x16ca 2 +vector.hpp 1152 0x16ca 3 +vector.hpp 1152 0x16ca 4 +vector.hpp 1152 0x16ca 5 +vector.hpp 1152 0x16ca 6 +vector.hpp 1152 0x16ca 7 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 749 0x16ca 8 x +conv2d_bf16.h 1285 0x16ca 9 x +conv2d_bf16.h 1286 0x16ca 10 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x16d6 +aie_core.h 100 0x16d6 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x16d6 2 +vector.hpp 1152 0x16d6 3 +vector.hpp 1152 0x16d6 4 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x16d6 5 +accum.hpp 946 0x16d6 6 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 751 0x16d6 7 x +conv2d_bf16.h 746 0x16e0 x +conv2d_bf16.h 750 0x16e0 1 x +conv2d_bf16.h 745 0x16e8 x +conv2d_bf16.h 752 0x16e8 1 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x16f0 +aie_core.h 143 0x16f4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 750 0x16f4 1 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x16fc + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 749 0x16fc 1 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x1704 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 752 0x1704 1 x +conv2d_bf16.h 1286 0x1704 2 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x170e + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x170e 1 +vector.hpp 1152 0x170e 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 751 0x170e 3 x +conv2d_bf16.h 1286 0x170e 4 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x171a + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x171a 1 +vector.hpp 1152 0x171a 2 +vector.hpp 1152 0x171a 3 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x171a 4 +accum.hpp 946 0x171a 5 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x1722 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1287 0x1722 1 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x172a + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x172a 1 x +accum.hpp 1110 0x172a 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x1732 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x1732 1 +accum.hpp 1110 0x1732 2 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x173a x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 736 0x173a 1 +conv2d_bf16.h 1287 0x173a 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x1744 x +accum.hpp 1110 0x1744 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1287 0x1744 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x174c +accum.hpp 1110 0x174c 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1218 0x174c 2 x +conv2d_bf16.h 1287 0x174c 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x1758 x +accum.hpp 1110 0x1758 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 738 0x1758 2 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x1760 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x1760 1 +accum.hpp 1110 0x1760 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1768 +vector.hpp 1152 0x1768 1 +vector.hpp 1152 0x1768 2 +vector.hpp 1152 0x1768 3 +vector.hpp 1152 0x1768 4 +vector.hpp 1152 0x1768 5 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x1768 6 +accum.hpp 1110 0x1768 7 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1772 +vector.hpp 1152 0x1772 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x1772 2 x +accum.hpp 1110 0x1772 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1286 0x1772 4 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x177a +aie_core.h 143 0x177a 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x177a 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x177a 3 +accum.hpp 946 0x177a 4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1187 0x177a 5 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1782 x +max_min.hpp 20 0x1786 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x178a x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x178a 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1792 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1792 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x179a x +vector.hpp 1152 0x17a4 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x17a4 1 x +max_min.hpp 20 0x17ac + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x17b0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x17b0 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x17b8 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x17b8 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x17c0 x +vector.hpp 1152 0x17d0 +vector.hpp 1152 0x17d4 +vector.hpp 1152 0x17d8 +vector.hpp 1152 0x17dc +vector.hpp 1152 0x17e0 +vector.hpp 1152 0x17e4 +vector.hpp 1152 0x17e8 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x17f0 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x17f0 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1143 0x17f0 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x17fc + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x17fc 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x17fc 2 +accum.hpp 946 0x17fc 3 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1800 +aie_core.h 100 0x1804 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1804 1 +vector.hpp 1152 0x1804 2 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x180a +aie_core.h 143 0x1820 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 792 0x1820 1 +conv2d_bf16.h 1364 0x1820 2 +conv2d_bf16.h 1364 0x1820 3 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x182c +aie_core.h 143 0x182c 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x182c 2 +vector.hpp 1152 0x182c 3 +vector.hpp 1152 0x182c 4 +vector.hpp 1152 0x182c 5 +vector.hpp 1152 0x182c 6 +vector.hpp 1152 0x182c 7 +vector.hpp 1152 0x182c 8 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x182c 9 +accum.hpp 149 0x182c 10 +accum.hpp 149 0x182c 11 +accum.hpp 149 0x182c 12 +accum.hpp 149 0x182c 13 +accum.hpp 149 0x182c 14 +accum.hpp 149 0x182c 15 +accum.hpp 149 0x182c 16 +accum.hpp 1110 0x182c 17 +accum.hpp 1110 0x182c 18 +accum.hpp 1110 0x182c 19 +accum.hpp 1110 0x182c 20 +accum.hpp 1110 0x182c 21 +accum.hpp 1110 0x182c 22 +accum.hpp 1110 0x182c 23 +accum.hpp 1110 0x182c 24 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1369 0x182c 25 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x1838 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 807 0x1838 1 +conv2d_bf16.h 808 0x1838 2 +conv2d_bf16.h 809 0x1838 3 +conv2d_bf16.h 810 0x1838 4 +conv2d_bf16.h 1436 0x1838 5 +conv2d_bf16.h 1437 0x1838 6 +conv2d_bf16.h 1438 0x1838 7 +conv2d_bf16.h 1439 0x1838 8 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x1842 +aie_core.h 143 0x1842 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 802 0x1842 2 +conv2d_bf16.h 1428 0x1842 3 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x184e +aie_core.h 143 0x184e 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 792 0x184e 2 +conv2d_bf16.h 794 0x184e 3 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x185a + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 794 0x185a 1 +conv2d_bf16.h 1455 0x185a 2 +conv2d_bf16.h 1337 0x1864 +conv2d_bf16.h 1364 0x186e x +conv2d_bf16.h 1873 0x186e 1 +conv2d_bf16.h 1364 0x1874 +conv2d_bf16.h 1369 0x1878 x +conv2d_bf16.h 799 0x187c x +conv2d_bf16.h 801 0x1880 x +conv2d_bf16.h 802 0x1884 x +conv2d_bf16.h 1337 0x1888 x +conv2d_bf16.h 1443 0x188c x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1892 +vector.hpp 1152 0x1892 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1369 0x1892 2 +conv2d_bf16.h 1364 0x1896 +conv2d_bf16.h 1518 0x1896 1 +conv2d_bf16.h 1364 0x189a +conv2d_bf16.h 1364 0x189e x +conv2d_bf16.h 1369 0x18a2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x18a8 +vector.hpp 1152 0x18a8 1 +vector.hpp 1139 0x18b0 +vector.hpp 1139 0x18b0 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x18b0 2 +accum.hpp 578 0x18b0 3 +accum.hpp 578 0x18b0 4 x +accum.hpp 946 0x18b0 5 +accum.hpp 946 0x18b0 6 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 792 0x18b0 7 x +conv2d_bf16.h 1362 0x18b0 8 x +conv2d_bf16.h 1429 0x18b0 9 +conv2d_bf16.h 1443 0x18b0 10 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x18be + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x18be 1 +accum.hpp 946 0x18be 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 793 0x18be 3 x +conv2d_bf16.h 1364 0x18be 4 x +conv2d_bf16.h 1443 0x18be 5 +conv2d_bf16.h 794 0x18ca x +conv2d_bf16.h 795 0x18ca 1 x +conv2d_bf16.h 1428 0x18ca 2 x +conv2d_bf16.h 1443 0x18ca 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x18d6 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x18d6 1 +accum.hpp 578 0x18d6 2 +accum.hpp 946 0x18d6 3 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 796 0x18d6 4 x +conv2d_bf16.h 799 0x18d6 5 x +conv2d_bf16.h 1429 0x18d6 6 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x18e0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x18e0 1 x +accum.hpp 946 0x18e0 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 797 0x18e0 3 x +conv2d_bf16.h 1367 0x18e0 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x18e6 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x18e6 1 +accum.hpp 946 0x18e6 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x18e6 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1369 0x18e6 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x18ec x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x18ec 1 x +accum.hpp 946 0x18ec 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 792 0x18ec 3 x +conv2d_bf16.h 1372 0x18ec 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x18f2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x18f2 1 +accum.hpp 946 0x18f2 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 793 0x18f2 3 x +conv2d_bf16.h 1374 0x18f2 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x18f8 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x18f8 1 x +accum.hpp 946 0x18f8 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 795 0x18f8 3 x +conv2d_bf16.h 1377 0x18f8 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x18fe + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x18fe 1 +accum.hpp 946 0x18fe 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 796 0x18fe 3 x +conv2d_bf16.h 1379 0x18fe 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1904 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1904 1 x +accum.hpp 946 0x1904 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 797 0x1904 3 x +conv2d_bf16.h 1429 0x1904 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x190a + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x190a 1 +accum.hpp 946 0x190a 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x190a 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 801 0x190a 4 x +conv2d_bf16.h 1429 0x190a 5 +conv2d_bf16.h 792 0x1914 x +conv2d_bf16.h 794 0x1914 1 x +conv2d_bf16.h 802 0x1914 2 x +conv2d_bf16.h 793 0x191e x +conv2d_bf16.h 799 0x191e 1 x +conv2d_bf16.h 803 0x191e 2 x +conv2d_bf16.h 807 0x191e 3 x +conv2d_bf16.h 794 0x192a x +conv2d_bf16.h 804 0x192a 1 x +conv2d_bf16.h 808 0x192a 2 x +conv2d_bf16.h 809 0x1934 x +conv2d_bf16.h 810 0x1938 x +conv2d_bf16.h 795 0x193c x +conv2d_bf16.h 802 0x193c 1 x +conv2d_bf16.h 1437 0x193c 2 x +conv2d_bf16.h 796 0x1946 x +conv2d_bf16.h 1436 0x1946 1 x +conv2d_bf16.h 797 0x194e x +conv2d_bf16.h 1438 0x194e 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x1956 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 799 0x1956 1 x +conv2d_bf16.h 1439 0x1956 2 x +conv2d_bf16.h 792 0x1960 x +conv2d_bf16.h 801 0x1960 1 x +conv2d_bf16.h 793 0x1966 x +conv2d_bf16.h 804 0x1966 1 x +conv2d_bf16.h 808 0x1966 2 x +conv2d_bf16.h 795 0x1970 x +conv2d_bf16.h 803 0x1970 1 x +conv2d_bf16.h 807 0x1970 2 x +conv2d_bf16.h 796 0x197a x +conv2d_bf16.h 810 0x197a 1 x +conv2d_bf16.h 794 0x1982 x +conv2d_bf16.h 797 0x1982 1 x +conv2d_bf16.h 809 0x1982 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x1990 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 799 0x1990 1 x +conv2d_bf16.h 802 0x1990 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x19a0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x19a0 1 +vector.hpp 1152 0x19a0 2 +vector.hpp 1152 0x19a0 3 +vector.hpp 1152 0x19a0 4 +vector.hpp 1152 0x19a0 5 +vector.hpp 1152 0x19a0 6 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 801 0x19a0 7 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x19ac +vector.hpp 1152 0x19ac 1 +vector.hpp 1152 0x19ac 2 +vector.hpp 1152 0x19ac 3 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 804 0x19ac 4 x +conv2d_bf16.h 808 0x19ac 5 x +conv2d_bf16.h 1517 0x19ac 6 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x19b8 +vector.hpp 1152 0x19b8 1 +vector.hpp 1152 0x19b8 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 803 0x19b8 3 x +conv2d_bf16.h 807 0x19b8 4 x +conv2d_bf16.h 1518 0x19b8 5 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x19c4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 810 0x19c4 1 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x19cc x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 809 0x19cc 1 x +conv2d_bf16.h 1428 0x19cc 2 +conv2d_bf16.h 801 0x19d6 x +conv2d_bf16.h 802 0x19da x +conv2d_bf16.h 803 0x19de x +conv2d_bf16.h 807 0x19de 1 x +conv2d_bf16.h 804 0x19e6 x +conv2d_bf16.h 808 0x19e6 1 x +conv2d_bf16.h 809 0x19ee x +conv2d_bf16.h 810 0x19f2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x19fa x +accum.hpp 1110 0x19fa 1 x +accum.hpp 149 0x19fe +accum.hpp 1110 0x19fe 1 +accum.hpp 149 0x1a02 +accum.hpp 1110 0x1a02 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1455 0x1a02 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x1a0c x +accum.hpp 1110 0x1a0c 1 x +accum.hpp 149 0x1a10 +accum.hpp 1110 0x1a10 1 +accum.hpp 149 0x1a14 +accum.hpp 1110 0x1a14 1 +accum.hpp 149 0x1a18 +accum.hpp 1110 0x1a18 1 +accum.hpp 149 0x1a1c +accum.hpp 1110 0x1a1c 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1a20 x +max_min.hpp 20 0x1a24 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1a28 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1a28 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1a30 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1a30 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1a38 x +vector.hpp 1152 0x1a42 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1a42 1 x +max_min.hpp 20 0x1a4a + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1a4e x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1a4e 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1a56 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1a56 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1a60 x +vector.hpp 1152 0x1a70 +vector.hpp 1152 0x1a74 +vector.hpp 1152 0x1a78 +vector.hpp 1152 0x1a7c +vector.hpp 1152 0x1a80 +vector.hpp 1152 0x1a84 +vector.hpp 1152 0x1a88 +vector.hpp 1152 0x1a90 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1337 0x1a90 1 x +conv2d_bf16.h 1873 0x1ac8 x +conv2d_bf16.h 1873 0x1acc + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 74 0x1ae0 x +superkernels.cpp 79 0x1ae0 1 +superkernels.cpp 81 0x1ae0 2 +superkernels.cpp 79 0x1aea x +superkernels.cpp 81 0x1aea 1 +superkernels.cpp 74 0x1af4 +superkernels.cpp 79 0x1b06 +superkernels.cpp 79 0x1b06 1 +superkernels.cpp 81 0x1b1c +superkernels.cpp 113 0x1b22 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x1b22 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 81 0x1b2c + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x1b2c 1 +tile.hpp 86 0x1b2c 2 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 81 0x1b3c x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x1b44 +tile.hpp 74 0x1b48 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 113 0x1b4c + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x1b4c 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 81 0x1b54 +superkernels.cpp 81 0x1b60 +superkernels.cpp 87 0x1b64 +superkernels.cpp 87 0x1b64 1 x +superkernels.cpp 88 0x1b6e x +superkernels.cpp 89 0x1b6e 1 +superkernels.cpp 88 0x1b78 +superkernels.cpp 88 0x1b7e +superkernels.cpp 87 0x1b86 x +superkernels.cpp 113 0x1b86 1 +superkernels.cpp 88 0x1b8e x +superkernels.cpp 88 0x1b94 +superkernels.cpp 89 0x1b9a x +superkernels.cpp 89 0x1ba0 +superkernels.cpp 113 0x1ba0 1 +superkernels.cpp 106 0x1bb0 +superkernels.cpp 113 0x1bb0 1 +superkernels.cpp 117 0x1bb0 2 +superkernels.cpp 136 0x1bb0 3 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x1bb0 4 +io_buffer_main.h 324 0x1bb0 5 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 106 0x1bba x +superkernels.cpp 108 0x1bba 1 +superkernels.cpp 107 0x1bc4 +superkernels.cpp 108 0x1bc4 1 x +superkernels.cpp 139 0x1bc4 2 +superkernels.cpp 140 0x1bc4 3 +superkernels.cpp 107 0x1bce x +superkernels.cpp 110 0x1bda x +superkernels.cpp 110 0x1bda 1 x +superkernels.cpp 108 0x1be0 x +superkernels.cpp 107 0x1be4 x +superkernels.cpp 108 0x1be4 1 +superkernels.cpp 106 0x1bea x +superkernels.cpp 106 0x1bee +superkernels.cpp 107 0x1bf2 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x1bf6 x +io_buffer_main.h 218 0x1bfa +io_buffer_main.h 218 0x1bfe +io_buffer_main.h 218 0x1c02 +io_buffer_main.h 235 0x1c08 x +io_buffer_main.h 218 0x1c14 x +io_buffer_main.h 218 0x1c14 1 x +io_buffer_main.h 218 0x1c18 +io_buffer_main.h 395 0x1c1c +io_buffer_main.h 395 0x1c26 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 113 0x1c30 x +superkernels.cpp 113 0x1c36 +superkernels.cpp 113 0x1c42 +superkernels.cpp 117 0x1c50 x +superkernels.cpp 117 0x1c50 1 +superkernels.cpp 117 0x1c5a +superkernels.cpp 117 0x1c6c +superkernels.cpp 117 0x1c70 +superkernels.cpp 136 0x1c76 +superkernels.cpp 140 0x1c76 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x1c82 x +io_buffer_main.h 327 0x1c82 1 +io_buffer_main.h 425 0x1c82 2 +io_buffer_main.h 324 0x1c88 +io_buffer_main.h 425 0x1c98 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 136 0x1c9c x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x1c9c 1 x +io_buffer_main.h 327 0x1cae +io_buffer_main.h 327 0x1cb2 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 139 0x1cc0 x +superkernels.cpp 139 0x1cc0 1 +superkernels.cpp 139 0x1cca +superkernels.cpp 142 0x1cd2 +superkernels.cpp 139 0x1cde +superkernels.cpp 139 0x1ce2 +superkernels.cpp 140 0x1cf4 x +superkernels.cpp 142 0x1d04 x +superkernels.cpp 142 0x1d08 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 141 0x1d20 x +elementwise_binary.h 142 0x1d20 1 +elementwise_binary.h 144 0x1d20 2 x +elementwise_binary.h 141 0x1d26 +elementwise_binary.h 141 0x1d2a +elementwise_binary.h 142 0x1d2e x +elementwise_binary.h 142 0x1d32 +elementwise_binary.h 130 0x1d40 x +elementwise_binary.h 133 0x1d40 1 x +elementwise_binary.h 130 0x1d44 +elementwise_binary.h 133 0x1d58 x +elementwise_binary.h 134 0x1d5c x +elementwise_binary.h 134 0x1d6c +elementwise_binary.h 135 0x1d70 x +elementwise_binary.h 135 0x1d80 +elementwise_binary.h 136 0x1d84 x +elementwise_binary.h 137 0x1d8c x +elementwise_binary.h 136 0x1d98 x +elementwise_binary.h 137 0x1d9c +elementwise_binary.h 137 0x1da0 +elementwise_binary.h 139 0x1da0 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h: +add_impl.h 146 0x1da0 2 +add_impl.h 146 0x1daa +add_impl.h 147 0x1daa 1 +add_impl.h 147 0x1daa 2 +add_impl.h 146 0x1db4 x +add_impl.h 147 0x1db4 1 +add_impl.h 147 0x1dbe x +add_impl.h 147 0x1dc6 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 139 0x1dca x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h: +add_impl.h 147 0x1dce + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 139 0x1dd2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h: +add_impl.h 147 0x1dd8 x +add_impl.h 147 0x1ddc + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h: +elementwise_binary_broadcasting.h 81 0x1df0 +elementwise_binary_broadcasting.h 81 0x1df0 1 x +elementwise_binary_broadcasting.h 82 0x1df0 2 +elementwise_binary_broadcasting.h 82 0x1df0 3 +elementwise_binary_broadcasting.h 83 0x1df0 4 +elementwise_binary_broadcasting.h 81 0x1dfa +elementwise_binary_broadcasting.h 82 0x1dfa 1 +elementwise_binary_broadcasting.h 82 0x1e00 x +elementwise_binary_broadcasting.h 84 0x1e0e x +elementwise_binary_broadcasting.h 82 0x1e12 x +elementwise_binary_broadcasting.h 83 0x1e16 x +elementwise_binary_broadcasting.h 82 0x1e1a x +elementwise_binary_broadcasting.h 83 0x1e1a 1 +elementwise_binary_broadcasting.h 82 0x1e20 +elementwise_binary_broadcasting.h 82 0x1e24 +elementwise_binary_broadcasting.h 76 0x1e30 +elementwise_binary_broadcasting.h 76 0x1e30 1 x +elementwise_binary_broadcasting.h 77 0x1e3a x +elementwise_binary_broadcasting.h 78 0x1e44 +elementwise_binary_broadcasting.h 78 0x1e54 +elementwise_binary_broadcasting.h 78 0x1e58 x +elementwise_binary_broadcasting.h 78 0x1e5e +elementwise_binary_broadcasting.h 79 0x1e62 x +elementwise_binary_broadcasting.h 89 0x1e70 x +elementwise_binary_broadcasting.h 96 0x1e70 1 x +elementwise_binary_broadcasting.h 102 0x1e70 2 +elementwise_binary_broadcasting.h 102 0x1e76 x +elementwise_binary_broadcasting.h 117 0x1e76 1 +elementwise_binary_broadcasting.h 102 0x1e88 +elementwise_binary_broadcasting.h 102 0x1e88 1 +elementwise_binary_broadcasting.h 96 0x1e8e +elementwise_binary_broadcasting.h 96 0x1e92 x +elementwise_binary_broadcasting.h 103 0x1e9c x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 56 0x1eb0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1eb6 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h: +elementwise_binary_broadcasting.h 106 0x1ec0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 56 0x1ed0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1ed6 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1ee0 +add_accum.hpp 19 0x1ee0 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h: +elementwise_binary_broadcasting.h 117 0x1ee0 2 x +elementwise_binary_broadcasting.h 117 0x1ee0 3 x +elementwise_binary_broadcasting.h 117 0x1eea +elementwise_binary_broadcasting.h 117 0x1eea 1 +elementwise_binary_broadcasting.h 117 0x1ef4 +elementwise_binary_broadcasting.h 117 0x1efa +elementwise_binary_broadcasting.h 117 0x1f00 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f08 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f08 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x1f08 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f0c + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f0c 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 189 0x1f0c 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f10 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f10 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x1f10 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f14 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f14 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 213 0x1f14 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f18 x +vector.hpp 1159 0x1f18 1 +vector.hpp 1159 0x1f18 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f18 3 x +accum.hpp 1110 0x1f18 4 +accum.hpp 1110 0x1f18 5 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x1f18 6 x +elementwise_binary.h 195 0x1f18 7 +elementwise_binary.h 218 0x1f18 8 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f1e + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f1e 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1f1e 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 189 0x1f1e 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f26 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f26 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x1f26 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f2a + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f2a 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1f2a 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 213 0x1f2a 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f32 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f32 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x1f32 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f36 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f36 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1f36 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 189 0x1f36 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f3e x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f3e 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x1f3e 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f42 +vector.hpp 1159 0x1f42 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f42 2 +accum.hpp 1110 0x1f42 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1f42 4 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 213 0x1f42 5 x +elementwise_binary.h 218 0x1f42 6 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f50 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f50 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x1f50 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f54 +vector.hpp 1159 0x1f54 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f54 2 +accum.hpp 1110 0x1f54 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1f54 4 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 189 0x1f54 5 x +elementwise_binary.h 195 0x1f54 6 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f60 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f60 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x1f60 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f70 +vector.hpp 1159 0x1f70 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f70 2 +accum.hpp 1110 0x1f70 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1f70 4 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 213 0x1f70 5 x +elementwise_binary.h 218 0x1f70 6 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1f82 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x1f82 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1f82 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 195 0x1f82 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1f8c x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x1f8c 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1f8c 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 218 0x1f8c 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1f96 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x1f96 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 195 0x1f96 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h: +elementwise_binary_broadcasting.h 121 0x1f96 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1f9e x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x1f9e 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 218 0x1f9e 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1fa4 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x1fa4 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 195 0x1fa4 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_attribute_broadcasting.h: +ise_binary_attribute_broadcasting.h 82 0x1fb0 +ise_binary_attribute_broadcasting.h 82 0x1fb0 1 x +ise_binary_attribute_broadcasting.h 90 0x1fb6 +ise_binary_attribute_broadcasting.h 90 0x1fbe x +ise_binary_attribute_broadcasting.h 117 0x1fbe 1 +ise_binary_attribute_broadcasting.h 92 0x1fc6 x +ise_binary_attribute_broadcasting.h 92 0x1fc6 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 538 0x1fd6 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector_native_types.hpp: +vector_native_types.hpp 374 0x1fd6 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_attribute_broadcasting.h: +ise_binary_attribute_broadcasting.h 117 0x1fe2 x +ise_binary_attribute_broadcasting.h 92 0x1fe8 +ise_binary_attribute_broadcasting.h 92 0x1fee x +ise_binary_attribute_broadcasting.h 92 0x1ff2 +ise_binary_attribute_broadcasting.h 117 0x1ff2 1 +ise_binary_attribute_broadcasting.h 117 0x1ff8 +ise_binary_attribute_broadcasting.h 118 0x2000 +ise_binary_attribute_broadcasting.h 118 0x2010 x +ise_binary_attribute_broadcasting.h 118 0x2014 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 147 0x2030 x +superkernels.cpp 152 0x2030 1 +superkernels.cpp 152 0x2036 x +superkernels.cpp 147 0x203c +superkernels.cpp 149 0x204a + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2054 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 152 0x205c +superkernels.cpp 152 0x205c 1 +superkernels.cpp 149 0x2062 x +superkernels.cpp 149 0x2066 +superkernels.cpp 149 0x206e + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x206e 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 155 0x2076 +superkernels.cpp 166 0x2076 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x207c +tile.hpp 74 0x2082 +tile.hpp 86 0x2082 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 155 0x208e x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x2098 +tile.hpp 74 0x209c +tile.hpp 74 0x20a0 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 159 0x20b0 +superkernels.cpp 159 0x20b6 x +superkernels.cpp 159 0x20b6 1 +superkernels.cpp 157 0x20c0 +superkernels.cpp 159 0x20c0 1 +superkernels.cpp 166 0x20c0 2 +superkernels.cpp 157 0x20ca x +superkernels.cpp 159 0x20ca 1 +superkernels.cpp 164 0x20ca 2 +superkernels.cpp 157 0x20de +superkernels.cpp 159 0x20e6 x +superkernels.cpp 157 0x20ea x +superkernels.cpp 159 0x20f0 x +superkernels.cpp 164 0x2100 +superkernels.cpp 166 0x2100 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2110 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 163 0x2118 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2118 1 +io_buffer_main.h 218 0x2122 +io_buffer_main.h 218 0x2126 +io_buffer_main.h 235 0x212a x +io_buffer_main.h 218 0x2138 x +io_buffer_main.h 218 0x2138 1 x +io_buffer_main.h 218 0x213c +io_buffer_main.h 395 0x2140 +io_buffer_main.h 395 0x214a x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 164 0x214e +superkernels.cpp 163 0x2158 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x215c x +io_buffer_main.h 324 0x215c 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 164 0x2162 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x2166 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 166 0x216c x +superkernels.cpp 163 0x2174 x +superkernels.cpp 163 0x2178 +superkernels.cpp 164 0x217c x +superkernels.cpp 164 0x2180 +superkernels.cpp 168 0x2190 +superkernels.cpp 169 0x2190 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x2190 2 x +io_buffer_main.h 327 0x219a +io_buffer_main.h 425 0x219a 1 +io_buffer_main.h 425 0x21a8 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 168 0x21ac + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x21ac 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 168 0x21b6 x +superkernels.cpp 168 0x21ba + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x21c6 x +io_buffer_main.h 327 0x21ca + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 168 0x21ce x +superkernels.cpp 168 0x21d2 +superkernels.cpp 169 0x21e2 +superkernels.cpp 169 0x21e6 x +superkernels.cpp 171 0x21f0 +superkernels.cpp 171 0x2204 x +superkernels.cpp 171 0x220c + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 124 0x2220 x +elementwise_unary.h 126 0x2220 1 x +elementwise_unary.h 126 0x2230 x +elementwise_unary.h 127 0x2234 x +elementwise_unary.h 127 0x2244 +elementwise_unary.h 128 0x2248 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/clip_impl.h: +clip_impl.h 113 0x224c x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 128 0x225a x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/clip_impl.h: +clip_impl.h 113 0x225e x +clip_impl.h 114 0x226e x +clip_impl.h 114 0x2272 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 130 0x2276 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2290 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 136 0x2290 1 x +elementwise_unary.h 142 0x2290 2 +elementwise_unary.h 154 0x2290 3 x +elementwise_unary.h 171 0x2290 4 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x229c x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 142 0x229c 1 +elementwise_unary.h 154 0x229c 2 x +elementwise_unary.h 190 0x229c 3 x +elementwise_unary.h 136 0x22a8 +elementwise_unary.h 136 0x22ac x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/clip_impl.h: +clip_impl.h 103 0x22b0 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 142 0x22b4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x22b8 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 171 0x22b8 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/clip_impl.h: +clip_impl.h 104 0x22b8 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x22c4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 190 0x22c4 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x22cc x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x22cc 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 142 0x22cc 2 x +elementwise_unary.h 171 0x22cc 3 x +elementwise_unary.h 154 0x22d6 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x22de x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x22e2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x22e2 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 190 0x22e2 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x22f0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x22f0 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 195 0x22f0 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x2300 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2310 x +vector.hpp 1159 0x2310 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x2310 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 171 0x2310 3 x +elementwise_unary.h 176 0x2310 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2320 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x2320 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 190 0x2320 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2330 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x2330 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 195 0x2330 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x2340 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2350 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x2350 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 176 0x2350 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x2358 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x235c x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x235c 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 195 0x235c 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x2364 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 158 0x2364 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x236a x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x236a 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 176 0x236a 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x2372 x +max_min.hpp 21 0x2376 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x237a x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 195 0x237a 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x237e + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 176 0x237e 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 176 0x2390 x +superkernels.cpp 181 0x2390 1 +superkernels.cpp 181 0x2396 x +superkernels.cpp 176 0x239c +superkernels.cpp 178 0x23aa + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x23b4 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 181 0x23bc +superkernels.cpp 181 0x23bc 1 +superkernels.cpp 178 0x23c2 x +superkernels.cpp 178 0x23c6 +superkernels.cpp 178 0x23ce + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x23ce 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 184 0x23d6 +superkernels.cpp 195 0x23d6 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x23dc +tile.hpp 74 0x23e2 +tile.hpp 86 0x23e2 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 184 0x23ee x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x23f8 +tile.hpp 74 0x23fc +tile.hpp 74 0x2400 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 188 0x2410 +superkernels.cpp 188 0x2416 x +superkernels.cpp 188 0x2416 1 +superkernels.cpp 186 0x2420 +superkernels.cpp 188 0x2420 1 +superkernels.cpp 195 0x2420 2 +superkernels.cpp 186 0x242a x +superkernels.cpp 188 0x242a 1 +superkernels.cpp 193 0x242a 2 +superkernels.cpp 186 0x243e +superkernels.cpp 188 0x2446 x +superkernels.cpp 186 0x244a x +superkernels.cpp 188 0x2450 x +superkernels.cpp 193 0x2460 +superkernels.cpp 195 0x2460 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2470 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 192 0x2478 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2478 1 +io_buffer_main.h 218 0x2482 +io_buffer_main.h 218 0x2486 +io_buffer_main.h 235 0x248a x +io_buffer_main.h 218 0x2498 x +io_buffer_main.h 218 0x2498 1 x +io_buffer_main.h 218 0x249c +io_buffer_main.h 395 0x24a0 +io_buffer_main.h 395 0x24aa x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 193 0x24ae +superkernels.cpp 192 0x24b8 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x24bc x +io_buffer_main.h 324 0x24bc 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 193 0x24c2 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x24c6 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 195 0x24cc x +superkernels.cpp 192 0x24d4 x +superkernels.cpp 192 0x24d8 +superkernels.cpp 193 0x24dc x +superkernels.cpp 193 0x24e0 +superkernels.cpp 197 0x24f0 +superkernels.cpp 198 0x24f0 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x24f0 2 x +io_buffer_main.h 327 0x24fa +io_buffer_main.h 425 0x24fa 1 +io_buffer_main.h 425 0x2508 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 197 0x250c + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x250c 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 197 0x2516 x +superkernels.cpp 197 0x251a + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x2526 x +io_buffer_main.h 327 0x252a + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 197 0x252e x +superkernels.cpp 197 0x2532 +superkernels.cpp 198 0x2542 +superkernels.cpp 198 0x2546 x +superkernels.cpp 200 0x2550 +superkernels.cpp 200 0x2564 x +superkernels.cpp 200 0x256c + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 219 0x2600 +elementwise_binary_shared.h 219 0x2600 1 x +elementwise_binary_shared.h 220 0x260a x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h: +mul_impl.h 193 0x2614 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 222 0x2620 +elementwise_binary_shared.h 222 0x2632 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h: +mul_impl.h 193 0x263c + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 222 0x2640 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h: +mul_impl.h 193 0x2640 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 538 0x2870 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 237 0x2870 1 x +elementwise_binary_shared.h 244 0x2870 2 +elementwise_binary_shared.h 245 0x2870 3 +elementwise_binary_shared.h 247 0x2870 4 +elementwise_binary_shared.h 250 0x2870 5 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 538 0x287a x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 244 0x287a 1 x +elementwise_binary_shared.h 245 0x287a 2 +elementwise_binary_shared.h 247 0x287a 3 +elementwise_binary_shared.h 244 0x288c +elementwise_binary_shared.h 244 0x288c 1 +elementwise_binary_shared.h 237 0x2892 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 538 0x28a0 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector_native_types.hpp: +vector_native_types.hpp 374 0x28a0 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 247 0x28a6 x +elementwise_binary_shared.h 245 0x28d0 x +elementwise_binary_shared.h 245 0x28d6 +elementwise_binary_shared.h 245 0x28d6 1 +elementwise_binary_shared.h 250 0x28f0 +elementwise_binary_shared.h 250 0x28f4 x +elementwise_binary_shared.h 250 0x28f8 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 205 0x2910 x +superkernels.cpp 210 0x2910 1 +superkernels.cpp 210 0x2916 x +superkernels.cpp 205 0x291c +superkernels.cpp 207 0x292a + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2934 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 210 0x293c +superkernels.cpp 210 0x293c 1 +superkernels.cpp 207 0x2942 x +superkernels.cpp 207 0x2946 +superkernels.cpp 207 0x294e + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x294e 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 213 0x2956 +superkernels.cpp 224 0x2956 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x295c +tile.hpp 74 0x2962 +tile.hpp 86 0x2962 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 213 0x296e x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x2978 +tile.hpp 74 0x297c +tile.hpp 74 0x2980 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 217 0x2990 +superkernels.cpp 217 0x2996 x +superkernels.cpp 217 0x2996 1 +superkernels.cpp 215 0x29a0 +superkernels.cpp 217 0x29a0 1 +superkernels.cpp 224 0x29a0 2 +superkernels.cpp 215 0x29aa x +superkernels.cpp 217 0x29aa 1 +superkernels.cpp 222 0x29aa 2 +superkernels.cpp 215 0x29be +superkernels.cpp 217 0x29c6 x +superkernels.cpp 215 0x29ca x +superkernels.cpp 217 0x29d0 x +superkernels.cpp 222 0x29e0 +superkernels.cpp 224 0x29e0 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x29f0 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 221 0x29f8 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x29f8 1 +io_buffer_main.h 218 0x2a02 +io_buffer_main.h 218 0x2a06 +io_buffer_main.h 235 0x2a0a x +io_buffer_main.h 218 0x2a18 x +io_buffer_main.h 218 0x2a18 1 x +io_buffer_main.h 218 0x2a1c +io_buffer_main.h 395 0x2a20 +io_buffer_main.h 395 0x2a2a x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 222 0x2a2e +superkernels.cpp 221 0x2a38 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x2a3c x +io_buffer_main.h 324 0x2a3c 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 222 0x2a42 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x2a46 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 224 0x2a4c x +superkernels.cpp 221 0x2a54 x +superkernels.cpp 221 0x2a58 +superkernels.cpp 222 0x2a5c x +superkernels.cpp 222 0x2a60 +superkernels.cpp 226 0x2a70 +superkernels.cpp 227 0x2a70 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x2a70 2 x +io_buffer_main.h 327 0x2a7a +io_buffer_main.h 425 0x2a7a 1 +io_buffer_main.h 425 0x2a88 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 226 0x2a8c + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x2a8c 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 226 0x2a96 x +superkernels.cpp 226 0x2a9a + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x2aa6 x +io_buffer_main.h 327 0x2aaa + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 226 0x2aae x +superkernels.cpp 226 0x2ab2 +superkernels.cpp 227 0x2ac2 +superkernels.cpp 227 0x2ac6 x +superkernels.cpp 229 0x2ad0 +superkernels.cpp 229 0x2ae4 x +superkernels.cpp 229 0x2aec + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 219 0x2b80 +elementwise_binary_shared.h 219 0x2b80 1 x +elementwise_binary_shared.h 220 0x2b8a x +elementwise_binary_shared.h 220 0x2b98 +elementwise_binary_shared.h 220 0x2ba0 +elementwise_binary_shared.h 222 0x2ba0 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h: +add_impl.h 146 0x2ba0 2 +add_impl.h 146 0x2baa +add_impl.h 147 0x2baa 1 +add_impl.h 147 0x2baa 2 +add_impl.h 146 0x2bb4 x +add_impl.h 147 0x2bb4 1 +add_impl.h 147 0x2bbe x +add_impl.h 147 0x2bc6 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 222 0x2bca x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h: +add_impl.h 147 0x2bce + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 222 0x2bd2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h: +add_impl.h 147 0x2bd8 x +add_impl.h 147 0x2bdc + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 227 0x2bf0 x +elementwise_binary_shared.h 232 0x2bf0 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 141 0x2c00 x +elementwise_binary.h 142 0x2c00 1 +elementwise_binary.h 144 0x2c00 2 x +elementwise_binary.h 141 0x2c06 +elementwise_binary.h 141 0x2c0a +elementwise_binary.h 142 0x2c0e x +elementwise_binary.h 142 0x2c12 +elementwise_binary.h 130 0x2c20 x +elementwise_binary.h 133 0x2c20 1 x +elementwise_binary.h 130 0x2c24 +elementwise_binary.h 133 0x2c36 x +elementwise_binary.h 134 0x2c3a x +elementwise_binary.h 134 0x2c4a +elementwise_binary.h 135 0x2c4e x +elementwise_binary.h 135 0x2c5e +elementwise_binary.h 136 0x2c62 x +elementwise_binary.h 137 0x2c6a x +elementwise_binary.h 136 0x2c78 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h: +mul_impl.h 134 0x2c7c + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 139 0x2c80 +elementwise_binary.h 139 0x2c92 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h: +mul_impl.h 134 0x2c9c + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 139 0x2ca0 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h: +mul_impl.h 134 0x2ca0 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 149 0x2cb0 x +elementwise_binary.h 156 0x2cb0 1 +elementwise_binary.h 168 0x2cb0 2 x +elementwise_binary.h 156 0x2cba x +elementwise_binary.h 168 0x2cba 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2cc4 +mul_acc32_fp.hpp 36 0x2cc4 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 156 0x2cc4 2 +elementwise_binary.h 156 0x2cc4 3 +elementwise_binary.h 156 0x2cce +elementwise_binary.h 156 0x2cce 1 +elementwise_binary.h 156 0x2cd8 +elementwise_binary.h 156 0x2ce2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2ce6 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 168 0x2ce6 1 +elementwise_binary.h 187 0x2ce6 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2cec +vector.hpp 1139 0x2cec 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 189 0x2cec 2 x +elementwise_binary.h 211 0x2cec 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2cf2 x +vector.hpp 1139 0x2cf2 1 x +vector.hpp 1159 0x2cf2 2 +vector.hpp 1159 0x2cf2 3 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2cf2 4 +accum.hpp 1110 0x2cf2 5 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x2cf2 6 x +elementwise_binary.h 195 0x2cf2 7 +elementwise_binary.h 213 0x2cf2 8 x +elementwise_binary.h 218 0x2cf2 9 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2cfa +vector.hpp 1139 0x2cfa 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 189 0x2cfa 2 x +elementwise_binary.h 211 0x2cfa 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2d00 x +vector.hpp 1139 0x2d00 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x2d00 2 x +elementwise_binary.h 213 0x2d00 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2d06 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 189 0x2d06 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2d0a x +vector.hpp 1139 0x2d0a 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x2d0a 2 x +elementwise_binary.h 213 0x2d0a 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2d10 +vector.hpp 1139 0x2d10 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x2d10 2 x +elementwise_binary.h 189 0x2d10 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2d16 x +vector.hpp 1139 0x2d16 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2d16 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x2d16 3 x +elementwise_binary.h 213 0x2d16 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2d20 +vector.hpp 1139 0x2d20 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2d20 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x2d20 3 x +elementwise_binary.h 189 0x2d20 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2d2a x +vector.hpp 1139 0x2d2a 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2d2a 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x2d2a 3 x +elementwise_binary.h 213 0x2d2a 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2d34 +vector.hpp 1139 0x2d34 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2d34 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x2d34 3 x +elementwise_binary.h 189 0x2d34 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2d3e x +vector.hpp 1139 0x2d3e 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2d3e 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x2d3e 3 x +elementwise_binary.h 213 0x2d3e 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2d48 +vector.hpp 1139 0x2d48 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2d48 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x2d48 3 x +elementwise_binary.h 189 0x2d48 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2d52 x +vector.hpp 1139 0x2d52 1 x +vector.hpp 1159 0x2d52 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2d52 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2d52 4 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x2d52 5 x +elementwise_binary.h 213 0x2d52 6 x +elementwise_binary.h 218 0x2d52 7 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2d60 +vector.hpp 1139 0x2d60 1 +vector.hpp 1159 0x2d60 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2d60 3 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2d60 4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x2d60 5 x +elementwise_binary.h 189 0x2d60 6 x +elementwise_binary.h 195 0x2d60 7 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2d70 x +vector.hpp 1139 0x2d70 1 x +vector.hpp 1159 0x2d70 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2d70 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2d70 4 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x2d70 5 x +elementwise_binary.h 213 0x2d70 6 x +elementwise_binary.h 218 0x2d70 7 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2d80 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2d80 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2d80 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 195 0x2d80 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2d88 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2d88 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2d88 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 218 0x2d88 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2d90 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2d90 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2d90 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 195 0x2d90 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2d98 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2d98 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2d98 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 218 0x2d98 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2da0 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2da0 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2da0 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 195 0x2da0 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2da8 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2da8 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2da8 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 218 0x2da8 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2db0 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2db0 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2db0 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 195 0x2db0 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2db8 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2db8 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 218 0x2db8 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2dbc + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2dbc 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 172 0x2dbc 2 x +elementwise_binary.h 195 0x2dbc 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2dc2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2dc2 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 218 0x2dc2 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2dc6 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2dc6 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 195 0x2dc6 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2dca x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2dca 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 218 0x2dca 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2dce + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2dce 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 195 0x2dce 2 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 369 0x2de0 x +superkernels.cpp 374 0x2de0 1 +superkernels.cpp 374 0x2de6 x +superkernels.cpp 369 0x2dec +superkernels.cpp 371 0x2df2 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2df2 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 374 0x2e0e x +superkernels.cpp 374 0x2e0e 1 x +superkernels.cpp 371 0x2e14 x +superkernels.cpp 371 0x2e18 +superkernels.cpp 371 0x2e1e + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2e26 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 377 0x2e2a +superkernels.cpp 379 0x2e2a 1 +superkernels.cpp 381 0x2e2a 2 +superkernels.cpp 393 0x2e2a 3 +superkernels.cpp 377 0x2e34 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x2e34 1 +tile.hpp 74 0x2e3e +tile.hpp 86 0x2e3e 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 377 0x2e4a x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x2e54 +tile.hpp 74 0x2e58 +tile.hpp 74 0x2e5c x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 381 0x2e60 +superkernels.cpp 381 0x2e60 1 x +superkernels.cpp 381 0x2e6a +superkernels.cpp 381 0x2e6a 1 +superkernels.cpp 390 0x2e6a 2 +superkernels.cpp 379 0x2e74 x +superkernels.cpp 382 0x2e74 1 +superkernels.cpp 391 0x2e74 2 +superkernels.cpp 379 0x2e8a +superkernels.cpp 381 0x2e90 x +superkernels.cpp 379 0x2e94 x +superkernels.cpp 381 0x2e98 x +superkernels.cpp 382 0x2e9c x +superkernels.cpp 390 0x2ea0 +superkernels.cpp 391 0x2ea6 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2eb0 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 385 0x2eb4 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2eb4 1 +io_buffer_main.h 218 0x2ebe +io_buffer_main.h 218 0x2ec2 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 385 0x2ec6 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 235 0x2eca x +io_buffer_main.h 218 0x2ed6 x +io_buffer_main.h 218 0x2ed6 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 385 0x2eda x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2eda 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 385 0x2ee0 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 395 0x2ee4 +io_buffer_main.h 395 0x2ee4 1 +io_buffer_main.h 395 0x2eee x +io_buffer_main.h 218 0x2ef2 x +io_buffer_main.h 218 0x2efa +io_buffer_main.h 218 0x2efe +io_buffer_main.h 218 0x2f02 +io_buffer_main.h 235 0x2f06 x +io_buffer_main.h 218 0x2f14 x +io_buffer_main.h 218 0x2f14 1 x +io_buffer_main.h 218 0x2f18 +io_buffer_main.h 395 0x2f24 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 390 0x2f28 +superkernels.cpp 391 0x2f28 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x2f28 2 +io_buffer_main.h 125 0x2f36 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 390 0x2f3a x +superkernels.cpp 391 0x2f40 x +superkernels.cpp 393 0x2f40 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x2f46 x +io_buffer_main.h 125 0x2f4a +io_buffer_main.h 327 0x2f4e +io_buffer_main.h 327 0x2f4e 1 +io_buffer_main.h 125 0x2f54 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 393 0x2f5a x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x2f60 +io_buffer_main.h 327 0x2f60 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 390 0x2f64 x +superkernels.cpp 391 0x2f68 x +superkernels.cpp 391 0x2f6c +superkernels.cpp 390 0x2f70 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x2f80 x +io_buffer_main.h 327 0x2f80 1 +io_buffer_main.h 327 0x2f80 2 +io_buffer_main.h 327 0x2f80 3 +io_buffer_main.h 327 0x2f80 4 +io_buffer_main.h 425 0x2f80 5 +io_buffer_main.h 425 0x2f80 6 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 397 0x2f8a + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 425 0x2f9a x +io_buffer_main.h 327 0x2f9e x +io_buffer_main.h 324 0x2fa2 +io_buffer_main.h 327 0x2fb0 +io_buffer_main.h 324 0x2fb4 x +io_buffer_main.h 327 0x2fb4 1 +io_buffer_main.h 425 0x2fc6 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 397 0x2fca +superkernels.cpp 398 0x2fca 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x2fca 2 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 397 0x2fd4 x +superkernels.cpp 397 0x2fd8 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x2fe4 x +io_buffer_main.h 327 0x2fe8 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 397 0x2fec x +superkernels.cpp 397 0x2ff0 +superkernels.cpp 398 0x3000 +superkernels.cpp 398 0x3004 x +superkernels.cpp 400 0x3010 +superkernels.cpp 400 0x3026 x +superkernels.cpp 400 0x302e + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16_params.h: +conv2d_dw_bf16_params.h 211 0x3040 x +conv2d_dw_bf16_params.h 215 0x3040 1 +conv2d_dw_bf16_params.h 215 0x3040 2 x +conv2d_dw_bf16_params.h 215 0x304a x +conv2d_dw_bf16_params.h 218 0x304a 1 +conv2d_dw_bf16_params.h 218 0x304a 2 +conv2d_dw_bf16_params.h 211 0x3054 +conv2d_dw_bf16_params.h 218 0x305a +conv2d_dw_bf16_params.h 215 0x306e +conv2d_dw_bf16_params.h 215 0x3072 +conv2d_dw_bf16_params.h 215 0x3076 +conv2d_dw_bf16_params.h 215 0x307a +conv2d_dw_bf16_params.h 215 0x3088 +conv2d_dw_bf16_params.h 215 0x308c +conv2d_dw_bf16_params.h 218 0x3090 x +conv2d_dw_bf16_params.h 218 0x3094 +conv2d_dw_bf16_params.h 218 0x3098 +conv2d_dw_bf16_params.h 218 0x30a4 +conv2d_dw_bf16_params.h 218 0x30aa +conv2d_dw_bf16_params.h 218 0x30b0 +conv2d_dw_bf16_params.h 218 0x30b6 +conv2d_dw_bf16_params.h 218 0x30bc +conv2d_dw_bf16_params.h 218 0x30c0 +conv2d_dw_bf16_params.h 218 0x30d0 +conv2d_dw_bf16_params.h 218 0x30d0 1 +conv2d_dw_bf16_params.h 219 0x30d0 2 +conv2d_dw_bf16_params.h 218 0x30d6 +conv2d_dw_bf16_params.h 219 0x30d6 1 x +conv2d_dw_bf16_params.h 219 0x30dc +conv2d_dw_bf16_params.h 219 0x30e0 +conv2d_dw_bf16_params.h 218 0x30ea x +conv2d_dw_bf16_params.h 218 0x30ee +conv2d_dw_bf16_params.h 219 0x30f2 x +conv2d_dw_bf16_params.h 219 0x30f8 +conv2d_dw_bf16_params.h 218 0x3102 x +conv2d_dw_bf16_params.h 219 0x3106 x +conv2d_dw_bf16_params.h 219 0x310a +conv2d_dw_bf16_params.h 218 0x310e x +conv2d_dw_bf16_params.h 218 0x3112 +conv2d_dw_bf16_params.h 219 0x3112 1 x +conv2d_dw_bf16_params.h 219 0x3120 +conv2d_dw_bf16_params.h 226 0x3120 1 +conv2d_dw_bf16_params.h 231 0x3120 2 +conv2d_dw_bf16_params.h 219 0x312a +conv2d_dw_bf16_params.h 219 0x312a 1 +conv2d_dw_bf16_params.h 220 0x312a 2 +conv2d_dw_bf16_params.h 220 0x312a 3 +conv2d_dw_bf16_params.h 232 0x312a 4 +conv2d_dw_bf16_params.h 234 0x312a 5 +conv2d_dw_bf16_params.h 234 0x312a 6 +conv2d_dw_bf16_params.h 243 0x312a 7 +conv2d_dw_bf16_params.h 250 0x312a 8 +conv2d_dw_bf16_params.h 253 0x312a 9 +conv2d_dw_bf16_params.h 260 0x312a 10 +conv2d_dw_bf16_params.h 264 0x312a 11 +conv2d_dw_bf16_params.h 220 0x3134 +conv2d_dw_bf16_params.h 234 0x3134 1 +conv2d_dw_bf16_params.h 246 0x3134 2 +conv2d_dw_bf16_params.h 253 0x3134 3 +conv2d_dw_bf16_params.h 226 0x313e x +conv2d_dw_bf16_params.h 234 0x313e 1 +conv2d_dw_bf16_params.h 234 0x313e 2 +conv2d_dw_bf16_params.h 231 0x3148 +conv2d_dw_bf16_params.h 232 0x3148 1 +conv2d_dw_bf16_params.h 232 0x3148 2 +conv2d_dw_bf16_params.h 235 0x3152 +conv2d_dw_bf16_params.h 235 0x3152 1 +conv2d_dw_bf16_params.h 242 0x3152 2 +conv2d_dw_bf16_params.h 242 0x3152 3 +conv2d_dw_bf16_params.h 243 0x3152 4 +conv2d_dw_bf16_params.h 250 0x3152 5 +conv2d_dw_bf16_params.h 255 0x3152 6 +conv2d_dw_bf16_params.h 260 0x3152 7 +conv2d_dw_bf16_params.h 264 0x3152 8 +conv2d_dw_bf16_params.h 234 0x315c +conv2d_dw_bf16_params.h 239 0x315c 1 +conv2d_dw_bf16_params.h 242 0x315c 2 +conv2d_dw_bf16_params.h 248 0x315c 3 +conv2d_dw_bf16_params.h 253 0x315c 4 +conv2d_dw_bf16_params.h 264 0x315c 5 +conv2d_dw_bf16_params.h 219 0x3166 x +conv2d_dw_bf16_params.h 219 0x316a +conv2d_dw_bf16_params.h 219 0x316e +conv2d_dw_bf16_params.h 220 0x316e 1 +conv2d_dw_bf16_params.h 219 0x3174 +conv2d_dw_bf16_params.h 243 0x3174 1 +conv2d_dw_bf16_params.h 247 0x3174 2 +conv2d_dw_bf16_params.h 220 0x317a x +conv2d_dw_bf16_params.h 250 0x317a 1 +conv2d_dw_bf16_params.h 219 0x3180 x +conv2d_dw_bf16_params.h 220 0x3184 x +conv2d_dw_bf16_params.h 231 0x3184 1 +conv2d_dw_bf16_params.h 219 0x318a x +conv2d_dw_bf16_params.h 231 0x318a 1 x +conv2d_dw_bf16_params.h 220 0x3190 x +conv2d_dw_bf16_params.h 253 0x3190 1 x +conv2d_dw_bf16_params.h 240 0x3196 +conv2d_dw_bf16_params.h 246 0x3196 1 x +conv2d_dw_bf16_params.h 232 0x319c x +conv2d_dw_bf16_params.h 226 0x31a0 x +conv2d_dw_bf16_params.h 231 0x31a4 x +conv2d_dw_bf16_params.h 238 0x31a4 1 +conv2d_dw_bf16_params.h 234 0x31aa x +conv2d_dw_bf16_params.h 231 0x31ae x +conv2d_dw_bf16_params.h 232 0x31ae 1 x +conv2d_dw_bf16_params.h 234 0x31b4 x +conv2d_dw_bf16_params.h 232 0x31b8 x +conv2d_dw_bf16_params.h 227 0x31bc x +conv2d_dw_bf16_params.h 232 0x31bc 1 +conv2d_dw_bf16_params.h 234 0x31c2 x +conv2d_dw_bf16_params.h 235 0x31c2 1 x +conv2d_dw_bf16_params.h 235 0x31c8 +conv2d_dw_bf16_params.h 243 0x31c8 1 x +conv2d_dw_bf16_params.h 238 0x31ce x +conv2d_dw_bf16_params.h 242 0x31ce 1 x +conv2d_dw_bf16_params.h 242 0x31d4 +conv2d_dw_bf16_params.h 243 0x31d4 1 x +conv2d_dw_bf16_params.h 239 0x31da x +conv2d_dw_bf16_params.h 242 0x31da 1 x +conv2d_dw_bf16_params.h 243 0x31e0 x +conv2d_dw_bf16_params.h 250 0x31e0 1 x +conv2d_dw_bf16_params.h 234 0x31e6 x +conv2d_dw_bf16_params.h 240 0x31e6 1 x +conv2d_dw_bf16_params.h 253 0x31e6 2 x +conv2d_dw_bf16_params.h 247 0x31ec x +conv2d_dw_bf16_params.h 242 0x31f0 x +conv2d_dw_bf16_params.h 247 0x31f0 1 +conv2d_dw_bf16_params.h 241 0x31f6 x +conv2d_dw_bf16_params.h 243 0x31f6 1 x +conv2d_dw_bf16_params.h 243 0x31fc +conv2d_dw_bf16_params.h 245 0x31fc 1 x +conv2d_dw_bf16_params.h 243 0x3202 x +conv2d_dw_bf16_params.h 248 0x3202 1 x +conv2d_dw_bf16_params.h 245 0x3208 x +conv2d_dw_bf16_params.h 250 0x3208 1 x +conv2d_dw_bf16_params.h 246 0x320e x +conv2d_dw_bf16_params.h 250 0x320e 1 +conv2d_dw_bf16_params.h 247 0x3214 x +conv2d_dw_bf16_params.h 248 0x3214 1 x +conv2d_dw_bf16_params.h 250 0x321a x +conv2d_dw_bf16_params.h 250 0x321a 1 x +conv2d_dw_bf16_params.h 248 0x3220 x +conv2d_dw_bf16_params.h 250 0x3220 1 +conv2d_dw_bf16_params.h 249 0x3226 x +conv2d_dw_bf16_params.h 255 0x3226 1 x +conv2d_dw_bf16_params.h 258 0x3226 2 +conv2d_dw_bf16_params.h 258 0x3226 3 +conv2d_dw_bf16_params.h 252 0x3230 x +conv2d_dw_bf16_params.h 253 0x3230 1 x +conv2d_dw_bf16_params.h 253 0x3236 +conv2d_dw_bf16_params.h 255 0x3236 1 x +conv2d_dw_bf16_params.h 254 0x323c x +conv2d_dw_bf16_params.h 255 0x323c 1 +conv2d_dw_bf16_params.h 256 0x323c 2 +conv2d_dw_bf16_params.h 258 0x323c 3 x +conv2d_dw_bf16_params.h 258 0x323c 4 x +conv2d_dw_bf16_params.h 259 0x323c 5 +conv2d_dw_bf16_params.h 263 0x323c 6 +conv2d_dw_bf16_params.h 255 0x3248 x +conv2d_dw_bf16_params.h 256 0x324c x +conv2d_dw_bf16_params.h 260 0x324c 1 x +conv2d_dw_bf16_params.h 258 0x3252 x +conv2d_dw_bf16_params.h 260 0x3252 1 +conv2d_dw_bf16_params.h 259 0x3258 x +conv2d_dw_bf16_params.h 264 0x3258 1 x +conv2d_dw_bf16_params.h 260 0x325e x +conv2d_dw_bf16_params.h 264 0x325e 1 +conv2d_dw_bf16_params.h 262 0x3264 x +conv2d_dw_bf16_params.h 263 0x3268 x +conv2d_dw_bf16_params.h 264 0x326c x +conv2d_dw_bf16_params.h 266 0x3270 x +conv2d_dw_bf16_params.h 266 0x3280 +conv2d_dw_bf16_params.h 266 0x3280 1 +conv2d_dw_bf16_params.h 266 0x3286 +conv2d_dw_bf16_params.h 266 0x328a +conv2d_dw_bf16_params.h 266 0x3296 +conv2d_dw_bf16_params.h 266 0x32a0 +conv2d_dw_bf16_params.h 267 0x32a0 1 +conv2d_dw_bf16_params.h 266 0x32aa +conv2d_dw_bf16_params.h 266 0x32aa 1 +conv2d_dw_bf16_params.h 266 0x32b0 +conv2d_dw_bf16_params.h 266 0x32b6 +conv2d_dw_bf16_params.h 267 0x32bc x +conv2d_dw_bf16_params.h 266 0x32c6 x +conv2d_dw_bf16_params.h 266 0x32ca +conv2d_dw_bf16_params.h 267 0x32ca 1 x +conv2d_dw_bf16_params.h 266 0x32d0 x +conv2d_dw_bf16_params.h 266 0x32d8 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 444 0x35c0 x +superkernels.cpp 449 0x35c0 1 +superkernels.cpp 449 0x35c6 x +superkernels.cpp 444 0x35cc +superkernels.cpp 467 0x35da +superkernels.cpp 452 0x35ea +superkernels.cpp 449 0x35f2 +superkernels.cpp 449 0x35f2 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x35f8 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 446 0x35fc x +superkernels.cpp 446 0x3600 +superkernels.cpp 446 0x3604 +superkernels.cpp 446 0x360a +superkernels.cpp 461 0x360e + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x360e 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 451 0x3618 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x3618 1 +tile.hpp 86 0x3618 2 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 451 0x3626 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x3630 +tile.hpp 74 0x3634 +tile.hpp 74 0x3638 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 452 0x3640 +superkernels.cpp 461 0x3640 1 +superkernels.cpp 452 0x3648 x +superkernels.cpp 453 0x364c +superkernels.cpp 453 0x364c 1 x +superkernels.cpp 452 0x365e +superkernels.cpp 457 0x365e 1 +superkernels.cpp 452 0x3668 x +superkernels.cpp 453 0x366c x +superkernels.cpp 457 0x3670 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x3680 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 457 0x3684 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x3684 1 +io_buffer_main.h 218 0x368e +io_buffer_main.h 218 0x3692 +io_buffer_main.h 235 0x3696 x +io_buffer_main.h 218 0x36a4 x +io_buffer_main.h 218 0x36a4 1 x +io_buffer_main.h 218 0x36a8 +io_buffer_main.h 395 0x36ac +io_buffer_main.h 395 0x36b6 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 456 0x36ba +superkernels.cpp 459 0x36ba 1 +superkernels.cpp 464 0x36ba 2 +superkernels.cpp 465 0x36ba 3 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x36ba 4 +io_buffer_main.h 425 0x36ba 5 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/detail/io_buffer_impl.h: +io_buffer_impl.h 52 0x36c4 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x36ce +io_buffer_main.h 324 0x36ce 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 457 0x36d4 x +superkernels.cpp 457 0x36d8 +superkernels.cpp 461 0x36d8 1 +superkernels.cpp 456 0x36e2 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x36ec x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 457 0x36f2 x +superkernels.cpp 456 0x36f6 x +superkernels.cpp 459 0x36fa x +superkernels.cpp 461 0x36fe x +superkernels.cpp 456 0x3704 x +superkernels.cpp 459 0x3708 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/detail/io_buffer_impl.h: +io_buffer_impl.h 201 0x370c x +io_buffer_impl.h 52 0x3710 x +io_buffer_impl.h 52 0x3714 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x3720 +io_buffer_main.h 324 0x3724 x +io_buffer_main.h 425 0x3734 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 464 0x3738 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x3738 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 464 0x3742 x +superkernels.cpp 464 0x3746 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x3756 x +io_buffer_main.h 327 0x375a + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 464 0x375e x +superkernels.cpp 464 0x3762 +superkernels.cpp 465 0x3768 +superkernels.cpp 465 0x3774 x +superkernels.cpp 467 0x3780 +superkernels.cpp 467 0x378a x +superkernels.cpp 467 0x378e +superkernels.cpp 578 0x37a0 +superkernels.cpp 578 0x37a0 1 x +superkernels.cpp 583 0x37a6 +superkernels.cpp 583 0x37b0 x +superkernels.cpp 587 0x37c2 +superkernels.cpp 590 0x37c2 1 +superkernels.cpp 599 0x37c2 2 +superkernels.cpp 629 0x37c2 3 +superkernels.cpp 583 0x37d0 +superkernels.cpp 583 0x37d0 1 +superkernels.cpp 580 0x37da x +superkernels.cpp 580 0x37de +superkernels.cpp 580 0x37e2 +superkernels.cpp 580 0x37e8 +superkernels.cpp 587 0x37ec + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x37ec 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 621 0x37f6 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x37f6 1 +tile.hpp 86 0x37f6 2 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 587 0x3802 +superkernels.cpp 587 0x3802 1 +superkernels.cpp 587 0x380c x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x3816 +tile.hpp 74 0x381a + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 587 0x3820 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x3820 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 590 0x3830 +superkernels.cpp 591 0x3830 1 +superkernels.cpp 590 0x3836 x +superkernels.cpp 591 0x3846 x +superkernels.cpp 591 0x384a +superkernels.cpp 599 0x3850 +superkernels.cpp 599 0x3854 x +superkernels.cpp 591 0x385e x +superkernels.cpp 611 0x386a +superkernels.cpp 594 0x3874 x +superkernels.cpp 595 0x387e +superkernels.cpp 594 0x3884 +superkernels.cpp 594 0x388a +superkernels.cpp 595 0x38a0 x +superkernels.cpp 621 0x38aa +superkernels.cpp 621 0x38c0 +superkernels.cpp 599 0x38d0 x +superkernels.cpp 600 0x38da +superkernels.cpp 599 0x38e0 +superkernels.cpp 599 0x38e6 +superkernels.cpp 600 0x38f0 x +superkernels.cpp 621 0x38fa +superkernels.cpp 606 0x3904 x +superkernels.cpp 611 0x3904 1 +superkernels.cpp 611 0x390e x +superkernels.cpp 607 0x3912 x +superkernels.cpp 607 0x3916 +superkernels.cpp 607 0x391c +superkernels.cpp 606 0x3924 +superkernels.cpp 607 0x392a +superkernels.cpp 606 0x392e x +superkernels.cpp 611 0x392e 1 +superkernels.cpp 607 0x3938 x +superkernels.cpp 611 0x393c x +superkernels.cpp 608 0x3940 x +superkernels.cpp 608 0x3944 +superkernels.cpp 611 0x3944 1 x +superkernels.cpp 608 0x3950 x +superkernels.cpp 614 0x3960 +superkernels.cpp 614 0x3966 x +superkernels.cpp 616 0x3966 1 +superkernels.cpp 615 0x3970 +superkernels.cpp 616 0x3970 1 x +superkernels.cpp 615 0x397a x +superkernels.cpp 618 0x3986 x +superkernels.cpp 618 0x3986 1 x +superkernels.cpp 614 0x398c x +superkernels.cpp 616 0x398c 1 x +superkernels.cpp 615 0x3992 x +superkernels.cpp 616 0x3996 x +superkernels.cpp 615 0x399a x +superkernels.cpp 614 0x399e x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x39a2 +io_buffer_main.h 218 0x39b2 x +io_buffer_main.h 218 0x39b6 +io_buffer_main.h 218 0x39ba +io_buffer_main.h 218 0x39be +io_buffer_main.h 235 0x39c4 x +io_buffer_main.h 218 0x39d0 x +io_buffer_main.h 218 0x39d0 1 x +io_buffer_main.h 218 0x39d4 +io_buffer_main.h 395 0x39d4 1 +io_buffer_main.h 395 0x39e2 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 621 0x39f6 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x39f6 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 621 0x39fa +superkernels.cpp 621 0x39fe x +superkernels.cpp 621 0x3a04 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x3a10 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 623 0x3a20 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x3a20 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 623 0x3a2a x +superkernels.cpp 623 0x3a2a 1 +superkernels.cpp 623 0x3a34 +superkernels.cpp 623 0x3a44 +superkernels.cpp 623 0x3a48 +superkernels.cpp 629 0x3a58 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x3a58 1 x +io_buffer_main.h 395 0x3a58 2 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 626 0x3a62 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x3a62 1 +io_buffer_main.h 218 0x3a6c +io_buffer_main.h 218 0x3a70 +io_buffer_main.h 235 0x3a74 x +io_buffer_main.h 218 0x3a82 x +io_buffer_main.h 218 0x3a82 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 626 0x3a86 +superkernels.cpp 630 0x3a86 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x3a86 2 +io_buffer_main.h 395 0x3a94 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 626 0x3a9e x +superkernels.cpp 629 0x3aa2 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x3aaa x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 626 0x3ab2 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x3ab2 1 +io_buffer_main.h 324 0x3ab2 2 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 630 0x3abc x +superkernels.cpp 630 0x3ac0 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x3ac6 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 626 0x3ad0 x +superkernels.cpp 630 0x3ad8 x +superkernels.cpp 633 0x3ae8 x +superkernels.cpp 633 0x3aee +superkernels.cpp 633 0x3afa +superkernels.cpp 637 0x3b10 x +superkernels.cpp 637 0x3b16 +superkernels.cpp 637 0x3b1c + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x3b30 +io_buffer_main.h 327 0x3b30 1 +io_buffer_main.h 324 0x3b34 +io_buffer_main.h 327 0x3b34 1 +io_buffer_main.h 327 0x3b34 2 +io_buffer_main.h 425 0x3b34 3 +io_buffer_main.h 425 0x3b34 4 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 645 0x3b3a + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x3b3a 1 x +io_buffer_main.h 425 0x3b50 x +io_buffer_main.h 327 0x3b54 x +io_buffer_main.h 324 0x3b58 x +io_buffer_main.h 327 0x3b66 x +io_buffer_main.h 327 0x3b6a +io_buffer_main.h 425 0x3b76 x +io_buffer_main.h 327 0x3b7a x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 645 0x3b8c +superkernels.cpp 649 0x3b8c 1 +superkernels.cpp 645 0x3b90 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x3b90 1 +io_buffer_main.h 327 0x3b96 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 649 0x3ba0 +superkernels.cpp 648 0x3bb0 +superkernels.cpp 651 0x3bb0 1 +superkernels.cpp 648 0x3bba +superkernels.cpp 648 0x3bba 1 x +superkernels.cpp 649 0x3bba 2 +superkernels.cpp 648 0x3bc4 +superkernels.cpp 648 0x3bd4 +superkernels.cpp 648 0x3bd8 +superkernels.cpp 649 0x3bea x +superkernels.cpp 651 0x3bf4 x +superkernels.cpp 651 0x3bf8 +superkernels.cpp - 0x3bf9 + + +superkernels.cpp: +File name Line number Starting address View Stmt + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc: +0_0_reloadable5.cc 21 0x3c10 x +0_0_reloadable5.cc 23 0x3c10 1 +0_0_reloadable5.cc 23 0x3c14 x +0_0_reloadable5.cc 24 0x3c18 x +0_0_reloadable5.cc 26 0x3c1c x +0_0_reloadable5.cc 25 0x3c20 x +0_0_reloadable5.cc 22 0x3c24 x +0_0_reloadable5.cc 30 0x3c40 x +0_0_reloadable5.cc 32 0x3c40 1 +0_0_reloadable5.cc 32 0x3c44 x +0_0_reloadable5.cc 34 0x3c48 x +0_0_reloadable5.cc 33 0x3c4c x +0_0_reloadable5.cc 31 0x3c50 x +0_0_reloadable5.cc 38 0x3c60 x +0_0_reloadable5.cc 40 0x3c60 1 +0_0_reloadable5.cc 40 0x3c64 x +0_0_reloadable5.cc 42 0x3c68 x +0_0_reloadable5.cc 41 0x3c6c x +0_0_reloadable5.cc 39 0x3c70 x +0_0_reloadable5.cc 46 0x3c80 x +0_0_reloadable5.cc 48 0x3c80 1 +0_0_reloadable5.cc 48 0x3c84 x +0_0_reloadable5.cc 50 0x3c88 x +0_0_reloadable5.cc 49 0x3c8c x +0_0_reloadable5.cc 47 0x3c90 x +0_0_reloadable5.cc 54 0x3ca0 x +0_0_reloadable5.cc 56 0x3ca0 1 +0_0_reloadable5.cc 56 0x3ca4 x +0_0_reloadable5.cc 57 0x3ca8 x +0_0_reloadable5.cc 59 0x3cac x +0_0_reloadable5.cc 58 0x3cb0 x +0_0_reloadable5.cc 55 0x3cb4 x +0_0_reloadable5.cc 63 0x3cd0 x +0_0_reloadable5.cc 65 0x3cd0 1 +0_0_reloadable5.cc 65 0x3cd4 x +0_0_reloadable5.cc 66 0x3cd8 x +0_0_reloadable5.cc 67 0x3cdc x +0_0_reloadable5.cc 69 0x3ce0 x +0_0_reloadable5.cc 68 0x3ce4 x +0_0_reloadable5.cc 64 0x3ce8 x +0_0_reloadable5.cc 73 0x3d00 x +0_0_reloadable5.cc 75 0x3d00 1 +0_0_reloadable5.cc 75 0x3d04 x +0_0_reloadable5.cc 76 0x3d08 x +0_0_reloadable5.cc 78 0x3d0c x +0_0_reloadable5.cc 77 0x3d10 x +0_0_reloadable5.cc 74 0x3d14 x +0_0_reloadable5.cc 94 0x930 x +0_0_reloadable5.cc 96 0x930 1 x +0_0_reloadable5.cc 96 0x930 2 +0_0_reloadable5.cc 98 0x930 3 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0x930 4 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc: +0_0_reloadable5.cc 94 0x936 +0_0_reloadable5.cc 96 0x944 +0_0_reloadable5.cc 98 0x944 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0x944 2 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc: +0_0_reloadable5.cc 96 0x94c x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 410 0x952 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 590 0x958 x +io_buffer_compiler.h 590 0x95c +io_buffer_compiler.h 590 0x960 +io_buffer_compiler.h 590 0x964 +io_buffer_compiler.h 590 0x968 +io_buffer_compiler.h 195 0x978 x +io_buffer_compiler.h 195 0x978 1 x +io_buffer_compiler.h 194 0x97c x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 410 0x980 +io_buffer_main.h 410 0x980 1 +io_buffer_main.h 410 0x98a x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc: +0_0_reloadable5.cc 98 0x98e +0_0_reloadable5.cc 102 0x98e 1 +0_0_reloadable5.cc 98 0x992 x +0_0_reloadable5.cc 98 0x996 +0_0_reloadable5.cc 98 0x99a +0_0_reloadable5.cc 98 0x9a8 +0_0_reloadable5.cc 98 0x9ac + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 590 0x9b0 x +io_buffer_compiler.h 590 0x9b8 +io_buffer_compiler.h 590 0x9bc +io_buffer_compiler.h 590 0x9c0 +io_buffer_compiler.h 590 0x9c4 +io_buffer_compiler.h 195 0x9d4 x +io_buffer_compiler.h 195 0x9d4 1 x +io_buffer_compiler.h 194 0x9d8 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 410 0x9e4 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc: +0_0_reloadable5.cc 102 0x9e8 x +0_0_reloadable5.cc 102 0x9ec +0_0_reloadable5.cc 102 0x9f0 +0_0_reloadable5.cc 102 0x9f6 +0_0_reloadable5.cc 102 0xa08 +0_0_reloadable5.cc 105 0xa0c +0_0_reloadable5.cc 107 0xa0c 1 +0_0_reloadable5.cc 105 0xa20 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0xa20 1 +io_buffer_compiler.h 606 0xa20 2 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 440 0xa20 3 +io_buffer_main.h 440 0xa20 4 +io_buffer_main.h 440 0xa26 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc: +0_0_reloadable5.cc 107 0xa2a + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0xa2e + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 440 0xa2e 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 605 0xa38 x +io_buffer_compiler.h 605 0xa3c + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 440 0xa4a +io_buffer_main.h 440 0xa4e x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0xa52 +io_buffer_compiler.h 606 0xa52 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc: +0_0_reloadable5.cc 107 0xa58 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0xa66 x +io_buffer_compiler.h 605 0xa6a x +io_buffer_compiler.h 606 0xa6a 1 +io_buffer_compiler.h 605 0xa70 +io_buffer_compiler.h 606 0xa70 1 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 440 0xa82 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc: +0_0_reloadable5.cc 110 0xa86 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0xa8a x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc: +0_0_reloadable5.cc 110 0xa96 x +0_0_reloadable5.cc 110 0xaa0 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0xaa4 +io_buffer_compiler.h 606 0xaa8 x +io_buffer_compiler.h 606 0xaac +io_buffer_compiler.h 606 0xab0 +io_buffer_compiler.h - 0xab1 + + +CU: me_div.c: +File name Line number Starting address View Stmt + +./me_div.c:[++] +me_div.c 108 0x3d30 +me_div.c 108 0x3d30 1 +me_div.c 115 0x3d30 2 x +me_div.c 108 0x3d36 +me_div.c 108 0x3d3a +me_div.c 108 0x3d3e +me_div.c 108 0x3d42 +me_div.c 108 0x3d46 +me_div.c 108 0x3d4a +me_div.c 108 0x3d4e +me_div.c 108 0x3d52 +me_div.c 108 0x3d56 +me_div.c 108 0x3d5a +me_div.c 108 0x3d5e +me_div.c 108 0x3d62 +me_div.c 108 0x3d66 +me_div.c 108 0x3d6a +me_div.c 108 0x3d6e +me_div.c 108 0x3d72 +me_div.c 108 0x3d76 +me_div.c 108 0x3d7a +me_div.c 108 0x3d7e +me_div.c 108 0x3d82 +me_div.c 108 0x3d86 +me_div.c 108 0x3d8a +me_div.c 108 0x3d8e +me_div.c 108 0x3d92 +me_div.c 108 0x3d96 +me_div.c 108 0x3d9a +me_div.c 108 0x3d9e +me_div.c 108 0x3da2 +me_div.c 119 0x3da6 x +me_div.c 108 0x3daa x +me_div.c 108 0x3dae +me_div.c 108 0x3db2 +me_div.c 108 0x3db6 +me_div.c - 0x3db7 + + +CU: No directory table +CU: Empty file name table + - 0x1 + + +CU: No directory table +CU: Empty file name table + - 0x1 + + diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable5/scripts/0_2_reloadable5.bcf b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable5/scripts/0_2_reloadable5.bcf new file mode 100644 index 0000000000000000000000000000000000000000..ac2c44e2095fee61e0bb45bf67ea52ec6719ca60 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable5/scripts/0_2_reloadable5.bcf @@ -0,0 +1,16 @@ +_reserved DMb 0x0 0x40000 + +_reserved PM 0x0 0x930 //reserved for main elf + +_entry_point _Z13kernelWrapperPPvjjjj +_symbol _Z13kernelWrapperPPvjjjj 0x930 + +_reserved DMb 0x7b280 0x800 //reserved for lcp ping-pong buffers +_reserved DMb 0x7ba80 0x40 //reserved for sync buffer +_stack DM_stack 0x7bac0 0x940 //stack for core +_reserved DMb 0x7c400 0x40 //reserved for main elf heap +//space for synopsys compiler at 0x7c440 0x880//heap +_reserved DMb 0x40000 0x3b280 + +_reserved DMb 0x7ccc0 0x3340 +_reserved DMb 0x80000 0x80000 // And everything else the core can't see diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable5/scripts/0_2_reloadable5.prx b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable5/scripts/0_2_reloadable5.prx new file mode 100644 index 0000000000000000000000000000000000000000..2fe5c989bcf90f47053cb541fb6b2da4f69aaa2d --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable5/scripts/0_2_reloadable5.prx @@ -0,0 +1,13 @@ + + + \ No newline at end of file diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable5/src/0_2_reloadable5.cc b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable5/src/0_2_reloadable5.cc new file mode 100644 index 0000000000000000000000000000000000000000..ed266062f542d5fd9e7d7b554216254c298b9574 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable5/src/0_2_reloadable5.cc @@ -0,0 +1,110 @@ +// Automatically generated processor driver using AIEngine tool-chain + +#include +#include +#include + + +// Declare Kernel functions and initializers +void conv2d_maxpool(adf::io_buffer>> &__restrict,adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict); +void superkernel_add1d_attribute_broadcasting(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict); +void superkernel_clip1d(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict); +void superkernel_mul1d_attribute_broadcasting(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict); +void superkernel_mul1d(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict,adf::io_buffer, adf::locking::async>> &__restrict); +void superkernel_conv_eltbinary(adf::io_buffer>> &__restrict,adf::io_buffer>> &__restrict,adf::io_buffer, adf::locking::async>> &__restrict,const unsigned int (&)[17],adf::io_buffer, adf::locking::async>> &__restrict); +void superkernel_conv2d_dwc(adf::io_buffer>> &__restrict,adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict); + +// Declare Kernel objects and external arrays + + +void _b896_wrapper(void* args[]) +{ + conv2d_maxpool( + *reinterpret_cast*>(args[0]), + *reinterpret_cast*>(args[1]), + *reinterpret_cast(args[3]), + *reinterpret_cast*>(args[2])); +} + +void _b901_wrapper(void* args[]) +{ + superkernel_add1d_attribute_broadcasting( + *reinterpret_cast*>(args[0]), + *reinterpret_cast(args[2]), + *reinterpret_cast*>(args[1])); +} + +void _b906_wrapper(void* args[]) +{ + superkernel_clip1d( + *reinterpret_cast*>(args[0]), + *reinterpret_cast(args[2]), + *reinterpret_cast*>(args[1])); +} + +void _b881_wrapper(void* args[]) +{ + superkernel_mul1d_attribute_broadcasting( + *reinterpret_cast*>(args[0]), + *reinterpret_cast(args[2]), + *reinterpret_cast*>(args[1])); +} + +void _b891_wrapper(void* args[]) +{ + superkernel_mul1d( + *reinterpret_cast*>(args[0]), + *reinterpret_cast(args[3]), + *reinterpret_cast*>(args[1]), + *reinterpret_cast*>(args[2])); +} + +void _b924_wrapper(void* args[]) +{ + superkernel_conv_eltbinary( + *reinterpret_cast*>(args[0]), + *reinterpret_cast*>(args[1]), + *reinterpret_cast*>(args[2]), + *reinterpret_cast(args[4]), + *reinterpret_cast*>(args[3])); +} + +void _b919_wrapper(void* args[]) +{ + superkernel_conv2d_dwc( + *reinterpret_cast*>(args[0]), + *reinterpret_cast*>(args[1]), + *reinterpret_cast(args[3]), + *reinterpret_cast*>(args[2])); +} + +using UniformKernelFunc = void (*)(void **); + +static UniformKernelFunc g_uniformKernelFuncs[7] = { + _b896_wrapper, + _b901_wrapper, + _b906_wrapper, + _b881_wrapper, + _b891_wrapper, + _b924_wrapper, + _b919_wrapper +}; + +__attribute__((always_inline)) void kernelWrapper(void* args[], uint32 kernelId, uint32 numSyncIn, uint32 numAsyncIn, uint32 numSyncOut) +{ + uint32 idx = 0; + reinterpret_cast(args[idx])->acquire(numSyncIn > 0); + idx += (numSyncIn > 0) ? 1 : 0; + reinterpret_cast(args[idx])->acquire(numSyncIn > 1); + idx += (numSyncIn > 1) ? 1 : 0; + idx += numAsyncIn; + + (*(g_uniformKernelFuncs[kernelId]))(args); + + idx = 0; + reinterpret_cast(args[idx])->release(numSyncIn > 0); + idx += (numSyncIn > 0) ? 1 : 0; + reinterpret_cast(args[idx])->release(numSyncIn > 1); + idx += (numSyncIn > 1) ? 1 : 0; + idx += numAsyncIn; +} diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable6/Release/0_2_reloadable6.calltree b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable6/Release/0_2_reloadable6.calltree new file mode 100644 index 0000000000000000000000000000000000000000..0d87486df8d685214c85a56d2c420e80fd5d49bc --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable6/Release/0_2_reloadable6.calltree @@ -0,0 +1,54 @@ + +// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:46:39 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// bridge -o../Release/0_0_reloadable2 ../Release/0_0_reloadable2.o -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/isg -g -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable2.bcf -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork1731 -pme + + +// Release: ipp V-2024.06-TGT-241219 + +_Z13kernelWrapperPPvjjjj + _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv + _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t + _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E + _ZN12me_primitive10udiv_dstepEjjRjS0_ + _ZN12me_primitive10udiv_dstepEjjRjS0_ (*) + int32_to_float32 + _ZL28normalizeRoundAndPackFloat32iij + _ZL19roundAndPackFloat32iij + float32_add + _ZL14addFloat32Sigsjji + _ZL19propagateFloat32NaNjj + _ZL19roundAndPackFloat32iij (*) + _ZL14subFloat32Sigsjji + _ZL19propagateFloat32NaNjj (*) + _ZL28normalizeRoundAndPackFloat32iij (*) + + +Call tree stack and functions sizes: + +stack stack stack call func func function name + desc level level desc +----- ----- ----- ----- ----- ----- -------------------------------------------------------------- + 64 448 0 0 220 10058 _Z13kernelWrapperPPvjjjj + 128 384 1 1 2676 9838 _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + 64 64 2 2 1588 1588 _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv + 0 0 2 2 670 670 _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t + 256 256 2 2 2680 2822 _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E + 0 0 3 3 142 142 _ZN12me_primitive10udiv_dstepEjjRjS0_ + 0 0 2 2 142 142 _ZN12me_primitive10udiv_dstepEjjRjS0_ + 0 0 2 2 114 530 int32_to_float32 + 0 0 2 3 24 416 _ZL28normalizeRoundAndPackFloat32iij + 0 0 2 4 392 392 _ZL19roundAndPackFloat32iij + 0 0 2 2 64 1968 float32_add + 0 0 3 3 624 1128 _ZL14addFloat32Sigsjji + 0 0 4 4 112 112 _ZL19propagateFloat32NaNjj + 0 0 3 4 392 392 _ZL19roundAndPackFloat32iij + 0 0 2 3 752 1280 _ZL14subFloat32Sigsjji + 0 0 3 4 112 112 _ZL19propagateFloat32NaNjj + 0 0 2 4 24 416 _ZL28normalizeRoundAndPackFloat32iij (*) + + +Maximum call level : 4 +Maximum stack level: 4 +Maximum stack size : 448 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable6/Release/0_2_reloadable6.cmic2 b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable6/Release/0_2_reloadable6.cmic2 new file mode 100644 index 0000000000000000000000000000000000000000..d037f49ea23915d17f1d140dbcf225735acc1af1 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable6/Release/0_2_reloadable6.cmic2 @@ -0,0 +1,14427 @@ + +// File generated by darts version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:46:41 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// darts -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -d -h -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable2 me + +// Release: ipp V-2024.06-TGT-241219 +.label __Z13kernelWrapperPPvjjjj___func_begin0 +.label _Z13kernelWrapperPPvjjjj +.function kernelWrapper _Z13kernelWrapperPPvjjjj +.src_ref 0 "0_0_reloadable2.cc" 29 first +.src_ref 0 "0_0_reloadable2.cc" 31 60 first +.function_start + 2352 "11010100" // LDA r16, [p0]; MOV r0, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2353 "01000001" // /* MW 5 */ + 2354 "00101111" // /* MW 4 */ + 2355 "11010000" // /* MW 3 */ + 2356 "11000010" // /* MW 2 */ + 2357 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 29 + 2358 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2359 "00000001" // /* MW 5 */ + 2360 "00000000" // /* MW 4 */ + 2361 "00000000" // /* MW 3 */ + 2362 "00001000" // /* MW 2 */ + 2363 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 31 110 +.src_ref 1 "io_buffer_compiler.h" 606 24 + 2364 "00000010" // ST p7, [sp, #-8]; MOV r15, r1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2365 "01110000" // /* MW 7 */ + 2366 "01010000" // /* MW 6 */ + 2367 "11101000" // /* MW 5 */ + 2368 "00000001" // /* MW 4 */ + 2369 "10110000" // /* MW 3 */ + 2370 "01110011" // /* MW 2 */ + 2371 "11111111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 17 79 +.src_ref 0 "0_0_reloadable2.cc" 31 110 first + 2372 "00111010" // ST r0, [sp, #-4]; NEZ r26, r15; MOV p7, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2373 "01111001" // /* MW 9 */ + 2374 "01100000" // /* MW 8 */ + 2375 "10110000" // /* MW 7 */ + 2376 "10000011" // /* MW 6 */ + 2377 "10100111" // /* MW 5 */ + 2378 "00011111" // /* MW 4 */ + 2379 "10110000" // /* MW 3 */ + 2380 "10000010" // /* MW 2 */ + 2381 "11111111" // /* MW 1 */ + 2382 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2383 "00111101" // /* MW 3 */ + 2384 "11110100" // /* MW 2 */ + 2385 "00001111" // /* MW 1 */ + 2386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2387 "00000000" // /* MW 1 */ + 2388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2389 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 first + 2390 "00011000" // ADD.NC p0, r16, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2391 "00000010" // /* MW 3 */ + 2392 "01101000" // /* MW 2 */ + 2393 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 + 2394 "10011000" // LDA r16, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2395 "00010110" // /* MW 3 */ + 2396 "00011110" // /* MW 2 */ + 2397 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 36 + 2398 "10011000" // LDA r18, [p0], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2399 "01010110" // /* MW 3 */ + 2400 "00111110" // /* MW 2 */ + 2401 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 46 + 2402 "10011000" // LDA r17, [p0], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2403 "00110110" // /* MW 3 */ + 2404 "11101110" // /* MW 2 */ + 2405 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 60 + 2406 "10011000" // LDA r27, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2407 "01110110" // /* MW 3 */ + 2408 "00000111" // /* MW 2 */ + 2409 "00000000" // /* MW 1 */ + 2410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2411 "00000000" // /* MW 1 */ + 2412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2413 "00000000" // /* MW 1 */ + 2414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2415 "00000000" // /* MW 1 */ + 2416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2417 "00000000" // /* MW 1 */ + 2418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2419 "00000000" // /* MW 1 */ + 2420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2421 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 195 30 first +.src_ref 1 "io_buffer_compiler.h" 195 37 first + 2422 "00011000" // SEL.EQZ r16, r16, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2423 "00100010" // /* MW 3 */ + 2424 "00100001" // /* MW 2 */ + 2425 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 194 23 first + 2426 "10011000" // ST r16, [p0, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2427 "00010001" // /* MW 3 */ + 2428 "11010110" // /* MW 2 */ + 2429 "00001000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 + 2430 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2431 "11111101" // /* MW 3 */ + 2432 "11100000" // /* MW 2 */ + 2433 "00010111" // /* MW 1 */ + 2434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2435 "00000000" // /* MW 1 */ + 2436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2437 "00000000" // /* MW 1 */ + 2438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2439 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 first + 2440 "00011000" // ACQ.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2441 "00001000" // /* MW 3 */ + 2442 "01010111" // /* MW 2 */ + 2443 "00010100" // /* MW 1 */ + 2444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2445 "00000000" // /* MW 1 */ + 2446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2447 "00000000" // /* MW 1 */ + 2448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2449 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 17 79 first + 2450 "10011000" // LDA p0, [p7], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2451 "00011110" // /* MW 3 */ + 2452 "00101100" // /* MW 2 */ + 2453 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 18 47 first + 2454 "10011000" // LDA p1, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2455 "10011110" // /* MW 3 */ + 2456 "11111100" // /* MW 2 */ + 2457 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 19 81 first + 2458 "10011000" // LDA p2, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2459 "00011110" // /* MW 3 */ + 2460 "00000101" // /* MW 2 */ + 2461 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 16 4 first +.no_stack_arguments + 2462 "00000100" // JL #7536 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7536 delay_slots=5 */ + 2463 "00000001" // /* MW 5 */ + 2464 "00000000" // /* MW 4 */ + 2465 "10111000" // /* MW 3 */ + 2466 "00001110" // /* MW 2 */ + 2467 "00000000" // /* MW 1 */ +.delay_slot + 2468 "10011000" // ST r26, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2469 "01010101" // /* MW 3 */ + 2470 "11110011" // /* MW 2 */ + 2471 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2473 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2475 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2477 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2479 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 38 60 first +.return_address + 2480 "10011000" // LDA r16, [p7, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2481 "00010110" // /* MW 3 */ + 2482 "11110110" // /* MW 2 */ + 2483 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 + 2484 "00011000" // LDA r26, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2485 "01010001" // /* MW 3 */ + 2486 "11110011" // /* MW 2 */ + 2487 "00000111" // /* MW 1 */ + 2488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2489 "00000000" // /* MW 1 */ + 2490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2491 "00000000" // /* MW 1 */ + 2492 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2493 "00000000" // /* MW 1 */ + 2494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2495 "00000000" // /* MW 1 */ + 2496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2497 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 first + 2498 "00011000" // ADD.NC p0, r16, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2499 "00001000" // /* MW 3 */ + 2500 "01101000" // /* MW 2 */ + 2501 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 + 2502 "10011000" // LDA r16, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2503 "00010110" // /* MW 3 */ + 2504 "00000110" // /* MW 2 */ + 2505 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_main.h" 440 8 + 2506 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2507 "00000101" // /* MW 3 */ + 2508 "00100010" // /* MW 2 */ + 2509 "00010000" // /* MW 1 */ + 2510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2511 "00000000" // /* MW 1 */ + 2512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2513 "00000000" // /* MW 1 */ + 2514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2515 "00000000" // /* MW 1 */ + 2516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2517 "00000000" // /* MW 1 */ + 2518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2519 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 first + 2520 "00011000" // REL.COND r16, r17, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2521 "00011000" // /* MW 3 */ + 2522 "00010101" // /* MW 2 */ + 2523 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 41 +.src_ref 1 "io_buffer_compiler.h" 606 24 + 2524 "11010100" // LDA lr, [sp, #-12]; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2525 "01000001" // /* MW 5 */ + 2526 "10101111" // /* MW 4 */ + 2527 "00101101" // /* MW 3 */ + 2528 "10000111" // /* MW 2 */ + 2529 "11111110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 first + 2530 "10011000" // LDA r16, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2531 "00010110" // /* MW 3 */ + 2532 "11110110" // /* MW 2 */ + 2533 "00000000" // /* MW 1 */ + 2534 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2535 "10011001" // /* MW 3 */ + 2536 "11111011" // /* MW 2 */ + 2537 "00000111" // /* MW 1 */ + 2538 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2539 "00000000" // /* MW 1 */ + 2540 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2541 "11110001" // /* MW 3 */ + 2542 "11111101" // /* MW 2 */ + 2543 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 41 first + 2544 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2545 "00000001" // /* MW 5 */ + 2546 "00000000" // /* MW 4 */ + 2547 "00000000" // /* MW 3 */ + 2548 "11111000" // /* MW 2 */ + 2549 "11111111" // /* MW 1 */ + 2550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2551 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 41 + 2552 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 2553 "00000000" // /* MW 3 */ + 2554 "00101000" // /* MW 2 */ + 2555 "00010000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 first +.delay_slot + 2556 "10011000" // SUB r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2557 "00000001" // /* MW 3 */ + 2558 "01100011" // /* MW 2 */ + 2559 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.delay_slot + 2560 "00011000" // SEL.EQZ r16, r16, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2561 "00010010" // /* MW 3 */ + 2562 "00100001" // /* MW 2 */ + 2563 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 22 +.delay_slot + 2564 "10011000" // ST r16, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2565 "00010001" // /* MW 3 */ + 2566 "11110110" // /* MW 2 */ + 2567 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2569 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13kernelWrapperPPvjjjj__end +.label __Z13kernelWrapperPPvjjjj___func_end0 + 2571 "00000000" // /* MW 1 */ +.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv___func_begin0 +.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv +.function setup _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv +.src_ref 2 "reduce_base_c8.h" 218 first +.src_ref 2 "reduce_base_c8.h" 220 27 first +.src_ref 2 "reduce_base_c8.h" 290 63 +.src_ref 2 "reduce_base_c8.h" 348 46 +.function_start + 2576 "01110110" // LDA r3, [p1], #4; MOVS p3, p0; MOVX r6, #-5; MOV r0, p1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 2577 "01111000" // /* MW 11 */ + 2578 "01100000" // /* MW 10 */ + 2579 "00001001" // /* MW 9 */ + 2580 "01101000" // /* MW 8 */ + 2581 "01100111" // /* MW 7 */ + 2582 "00111110" // /* MW 6 */ + 2583 "10001011" // /* MW 5 */ + 2584 "10000000" // /* MW 4 */ + 2585 "11010011" // /* MW 3 */ + 2586 "10001110" // /* MW 2 */ + 2587 "00100011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 287 40 +.src_ref 2 "reduce_base_c8.h" 287 40 +.src_ref 2 "reduce_base_c8.h" 348 46 first + 2588 "10111010" // MOVA r7, #16; MOVX r2, #-24; ADD.NC p2, r0, #28 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2589 "00001000" // /* MW 9 */ + 2590 "00000111" // /* MW 8 */ + 2591 "00110000" // /* MW 7 */ + 2592 "00001001" // /* MW 6 */ + 2593 "00100101" // /* MW 5 */ + 2594 "00111110" // /* MW 4 */ + 2595 "00000000" // /* MW 3 */ + 2596 "00000111" // /* MW 2 */ + 2597 "00000010" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 +.src_ref 2 "reduce_base_c8.h" 293 77 +.src_ref 2 "reduce_base_c8.h" 298 44 +.src_ref 2 "reduce_base_c8.h" 299 40 +.src_ref 2 "reduce_base_c8.h" 300 59 +.src_ref 2 "reduce_base_c8.h" 326 79 + 2598 "10111010" // MOVA r30, #3; MOVX r1, #-3; MOV r0, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2599 "01111000" // /* MW 9 */ + 2600 "01100000" // /* MW 8 */ + 2601 "00001000" // /* MW 7 */ + 2602 "10101000" // /* MW 6 */ + 2603 "00010111" // /* MW 5 */ + 2604 "00111110" // /* MW 4 */ + 2605 "00000000" // /* MW 3 */ + 2606 "01111110" // /* MW 2 */ + 2607 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 276 57 +.src_ref 2 "reduce_base_c8.h" 301 81 +.src_ref 2 "reduce_base_c8.h" 305 77 + 2608 "10111010" // MOVA r5, #-1; MOVXM r4, #65528 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2609 "00010000" // /* MW 9 */ + 2610 "11111100" // /* MW 8 */ + 2611 "10001111" // /* MW 7 */ + 2612 "00111100" // /* MW 6 */ + 2613 "00000000" // /* MW 5 */ + 2614 "00000000" // /* MW 4 */ + 2615 "00000000" // /* MW 3 */ + 2616 "11100101" // /* MW 2 */ + 2617 "11111111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 218 +.src_ref 2 "reduce_base_c8.h" 280 76 +.src_ref 2 "reduce_base_c8.h" 312 98 + 2618 "10111010" // MOVA r16, #-4; PADDXM [sp], #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2619 "01110000" // /* MW 9 */ + 2620 "00000000" // /* MW 8 */ + 2621 "00000000" // /* MW 7 */ + 2622 "00000000" // /* MW 6 */ + 2623 "00000010" // /* MW 5 */ + 2624 "00000000" // /* MW 4 */ + 2625 "00000000" // /* MW 3 */ + 2626 "10010000" // /* MW 2 */ + 2627 "11111111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 298 44 first + 2628 "00011000" // ADD.NC p4, r0, #46 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2629 "00010111" // /* MW 3 */ + 2630 "01100000" // /* MW 2 */ + 2631 "00011100" // /* MW 1 */ + 2632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2633 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 220 25 first + 2634 "10011000" // ST r3, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2635 "01110001" // /* MW 3 */ + 2636 "00011100" // /* MW 2 */ + 2637 "00001000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 221 28 first + 2638 "10011000" // LDA r26, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2639 "01010110" // /* MW 3 */ + 2640 "00011111" // /* MW 2 */ + 2641 "00000001" // /* MW 1 */ + 2642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2643 "00000000" // /* MW 1 */ + 2644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2645 "00000000" // /* MW 1 */ + 2646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2647 "00000000" // /* MW 1 */ + 2648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2649 "00000000" // /* MW 1 */ + 2650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2651 "00000000" // /* MW 1 */ + 2652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2653 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 221 26 +.src_ref 2 "reduce_base_c8.h" 301 81 first + 2654 "01011100" // ST r26, [p0], #4; AND r17, r26, r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2655 "10001001" // /* MW 5 */ + 2656 "01000100" // /* MW 4 */ + 2657 "00111101" // /* MW 3 */ + 2658 "11101010" // /* MW 2 */ + 2659 "00000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 222 26 first +.src_ref 2 "reduce_base_c8.h" 293 58 first +.src_ref 2 "reduce_base_c8.h" 301 81 + 2660 "10111010" // LDA r29, [p1], #4; MUL r4, r3, r26; ADD.NC r22, r17, r4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2661 "10101000" // /* MW 9 */ + 2662 "01001000" // /* MW 8 */ + 2663 "11001100" // /* MW 7 */ + 2664 "01111110" // /* MW 6 */ + 2665 "01001101" // /* MW 5 */ + 2666 "00000110" // /* MW 4 */ + 2667 "11010000" // /* MW 3 */ + 2668 "11110110" // /* MW 2 */ + 2669 "00100011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 290 63 first + 2670 "10011000" // LSHL r18, r26, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2671 "01101101" // /* MW 3 */ + 2672 "10100100" // /* MW 2 */ + 2673 "00010110" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 293 77 first + 2674 "10011000" // LSHL r6, r4, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2675 "00011101" // /* MW 3 */ + 2676 "00001100" // /* MW 2 */ + 2677 "00010001" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 290 41 first +.src_ref 2 "reduce_base_c8.h" 300 59 first + 2678 "00100100" // LSHL r17, r26, r1; ADD.NC r1, r18, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2679 "11111111" // /* MW 5 */ + 2680 "10110010" // /* MW 4 */ + 2681 "10110000" // /* MW 3 */ + 2682 "01000011" // /* MW 2 */ + 2683 "11010100" // /* MW 1 */ + 2684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2685 "00000000" // /* MW 1 */ + 2686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2687 "00000000" // /* MW 1 */ + 2688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2689 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 222 24 first +.src_ref 2 "reduce_base_c8.h" 287 40 first + 2690 "01011100" // ST r29, [p0], #4; MAC r7, r7, r29, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2691 "01001100" // /* MW 5 */ + 2692 "10011100" // /* MW 4 */ + 2693 "00111110" // /* MW 3 */ + 2694 "11110110" // /* MW 2 */ + 2695 "00000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 223 29 first +.src_ref 2 "reduce_base_c8.h" 312 60 first + 2696 "11111010" // LDA r2, [p1], #4; ST r29, [sp, #-4]; MUL r4, r29, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2697 "10101111" // /* MW 9 */ + 2698 "01001001" // /* MW 8 */ + 2699 "00000111" // /* MW 7 */ + 2700 "10000000" // /* MW 6 */ + 2701 "10110101" // /* MW 5 */ + 2702 "11111111" // /* MW 4 */ + 2703 "11010111" // /* MW 3 */ + 2704 "10001010" // /* MW 2 */ + 2705 "00100011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 305 57 first + 2706 "10011000" // MUL r20, r3, r29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2707 "11011111" // /* MW 3 */ + 2708 "11101001" // /* MW 2 */ + 2709 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 312 78 first + 2710 "10011000" // MUL r28, r3, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2711 "01001111" // /* MW 3 */ + 2712 "11111000" // /* MW 2 */ + 2713 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 299 40 first + 2714 "10011000" // LSHL r21, r29, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2715 "11101101" // /* MW 3 */ + 2716 "01101011" // /* MW 2 */ + 2717 "00010111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 276 57 first +.src_ref 2 "reduce_base_c8.h" 299 40 + 2718 "00100100" // LSHL r18, r29, r5; ADD.NC r27, r21, #-48 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2719 "11010000" // /* MW 5 */ + 2720 "10110101" // /* MW 4 */ + 2721 "10111101" // /* MW 3 */ + 2722 "10001011" // /* MW 2 */ + 2723 "11101100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 276 41 + 2724 "00011000" // ADD r23, r18, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2725 "11111111" // /* MW 3 */ + 2726 "10101111" // /* MW 2 */ + 2727 "00010100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 301 85 first + 2728 "10011000" // MUL r29, r29, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2729 "01101111" // /* MW 3 */ + 2730 "01111011" // /* MW 2 */ + 2731 "00010111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 223 27 first +.src_ref 2 "reduce_base_c8.h" 236 8 first + 2732 "01011100" // ST r2, [p0], #4; LT r24, r30, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2733 "01010101" // /* MW 5 */ + 2734 "01100000" // /* MW 4 */ + 2735 "00111111" // /* MW 3 */ + 2736 "10001010" // /* MW 2 */ + 2737 "00000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 224 33 first + 2738 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2739 "00101110" // /* MW 3 */ + 2740 "00011100" // /* MW 2 */ + 2741 "00000001" // /* MW 1 */ + 2742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2743 "00000000" // /* MW 1 */ + 2744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2745 "00000000" // /* MW 1 */ + 2746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2747 "00000000" // /* MW 1 */ + 2748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2749 "00000000" // /* MW 1 */ + 2750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2751 "00000000" // /* MW 1 */ + 2752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2753 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 224 31 +.src_ref 2 "reduce_base_c8.h" 318 64 + 2754 "00000010" // ST el0, [p0], #4; MOV r31, el0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2755 "01110000" // /* MW 7 */ + 2756 "00001110" // /* MW 6 */ + 2757 "11110000" // /* MW 5 */ + 2758 "00000011" // /* MW 4 */ + 2759 "00110000" // /* MW 3 */ + 2760 "10000101" // /* MW 2 */ + 2761 "00000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 225 34 first + 2762 "10011000" // LDA eh0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2763 "00001110" // /* MW 3 */ + 2764 "00000100" // /* MW 2 */ + 2765 "00000001" // /* MW 1 */ + 2766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2767 "00000000" // /* MW 1 */ + 2768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2769 "00000000" // /* MW 1 */ + 2770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2771 "00000000" // /* MW 1 */ + 2772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2773 "00000000" // /* MW 1 */ + 2774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2775 "00000000" // /* MW 1 */ + 2776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2777 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 225 32 +.src_ref 2 "reduce_base_c8.h" 318 64 + 2778 "00000010" // ST eh0, [p0]; MOV r25, eh0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2779 "01110000" // /* MW 7 */ + 2780 "10001110" // /* MW 6 */ + 2781 "00110000" // /* MW 5 */ + 2782 "00000011" // /* MW 4 */ + 2783 "00110000" // /* MW 3 */ + 2784 "10000001" // /* MW 2 */ + 2785 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 226 32 first + 2786 "10011000" // LDA r30, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2787 "11010110" // /* MW 3 */ + 2788 "00010111" // /* MW 2 */ + 2789 "00000001" // /* MW 1 */ + 2790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2791 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 first + 2792 "10000100" // JNZ r24, #2912 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=2912 delay_slots=5 */ + 2793 "00000001" // /* MW 5 */ + 2794 "01000000" // /* MW 4 */ + 2795 "10110000" // /* MW 3 */ + 2796 "00000101" // /* MW 2 */ + 2797 "11000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 312 98 first +.delay_slot + 2798 "10011000" // LSHL r19, r28, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2799 "00001101" // /* MW 3 */ + 2800 "00100111" // /* MW 2 */ + 2801 "00010111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 318 64 first +.delay_slot + 2802 "10011000" // MUL r25, r31, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2803 "10011111" // /* MW 3 */ + 2804 "11110011" // /* MW 2 */ + 2805 "00010111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 300 41 first +.src_ref 2 "reduce_base_c8.h" 305 77 first +.delay_slot + 2806 "00100100" // LSHL r20, r20, r5; ADD.NC r5, r17, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2807 "11111111" // /* MW 5 */ + 2808 "10110001" // /* MW 4 */ + 2809 "10110010" // /* MW 3 */ + 2810 "00001011" // /* MW 2 */ + 2811 "10100101" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 280 76 first +.delay_slot + 2812 "10011000" // LSHL r16, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2813 "00001101" // /* MW 3 */ + 2814 "00100001" // /* MW 2 */ + 2815 "00010001" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 226 30 first +.src_ref 2 "reduce_base_c8.h" 318 88 first +.delay_slot + 2816 "01011100" // ST r30, [p0, #4]; MUL r31, r25, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2817 "11011111" // /* MW 5 */ + 2818 "11111111" // /* MW 4 */ + 2819 "00111100" // /* MW 3 */ + 2820 "11111010" // /* MW 2 */ + 2821 "00000010" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2822 "00011000" // MOVX r28, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2823 "00000101" // /* MW 3 */ + 2824 "00111000" // /* MW 2 */ + 2825 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 first + 2826 "10011000" // EQ r28, r2, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2827 "11000111" // /* MW 3 */ + 2828 "10111001" // /* MW 2 */ + 2829 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2830 "10000100" // JNZ r28, #4032 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4032 delay_slots=5 */ + 2831 "00000001" // /* MW 5 */ + 2832 "01000000" // /* MW 4 */ + 2833 "11100000" // /* MW 3 */ + 2834 "00000111" // /* MW 2 */ + 2835 "11100000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2837 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2839 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2840 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2841 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2843 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 242 41 first +.delay_slot + 2844 "00011000" // ADD r22, r3, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2845 "11111111" // /* MW 3 */ + 2846 "11101101" // /* MW 2 */ + 2847 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2848 "00011000" // MOVX r17, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2849 "00001001" // /* MW 3 */ + 2850 "00100010" // /* MW 2 */ + 2851 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 first + 2852 "10011000" // EQ r17, r17, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2853 "00100111" // /* MW 3 */ + 2854 "01100010" // /* MW 2 */ + 2855 "00010100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2856 "10000100" // JNZ r17, #3904 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3904 delay_slots=5 */ + 2857 "00000001" // /* MW 5 */ + 2858 "01000000" // /* MW 4 */ + 2859 "10100000" // /* MW 3 */ + 2860 "00000111" // /* MW 2 */ + 2861 "10001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2863 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2865 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2867 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2869 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 +.delay_slot + 2870 "00011000" // MOVX r7, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2871 "00001101" // /* MW 3 */ + 2872 "00001110" // /* MW 2 */ + 2873 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2874 "10011000" // EQ r2, r7, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2875 "00100111" // /* MW 3 */ + 2876 "11000100" // /* MW 2 */ + 2877 "00010001" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2878 "10000100" // JNZ r2, #3744 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3744 delay_slots=5 */ + 2879 "00000001" // /* MW 5 */ + 2880 "01000000" // /* MW 4 */ + 2881 "01010000" // /* MW 3 */ + 2882 "00000111" // /* MW 2 */ + 2883 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2885 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2887 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2889 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2891 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2892 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2893 "00000000" // /* MW 1 */ + 2894 "10000100" // J #3552 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3552 delay_slots=5 */ + 2895 "00000000" // /* MW 5 */ + 2896 "00000000" // /* MW 4 */ + 2897 "11110000" // /* MW 3 */ + 2898 "00000110" // /* MW 2 */ + 2899 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 329 30 +.delay_slot + 2900 "00011000" // MOVX r26, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2901 "00010001" // /* MW 3 */ + 2902 "00110100" // /* MW 2 */ + 2903 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2905 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2907 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2909 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2911 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_336 +.src_ref 2 "reduce_base_c8.h" 236 8 + 2912 "00011000" // MOVX r29, #5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2913 "00010101" // /* MW 3 */ + 2914 "00111010" // /* MW 2 */ + 2915 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2916 "10011000" // LT r24, r29, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2917 "00101010" // /* MW 3 */ + 2918 "01110000" // /* MW 2 */ + 2919 "00010111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2920 "10000100" // JNZ r24, #3232 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3232 delay_slots=5 */ + 2921 "00000001" // /* MW 5 */ + 2922 "01000000" // /* MW 4 */ + 2923 "01010000" // /* MW 3 */ + 2924 "00000110" // /* MW 2 */ + 2925 "11000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2927 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2929 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2931 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2933 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 +.src_ref 2 "reduce_base_c8.h" 316 38 +.src_ref 2 "reduce_base_c8.h" 329 30 +.delay_slot + 2934 "00011000" // MOVX r26, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2935 "00010001" // /* MW 3 */ + 2936 "00110100" // /* MW 2 */ + 2937 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2938 "10011000" // EQ r17, r26, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2939 "00100111" // /* MW 3 */ + 2940 "10100010" // /* MW 2 */ + 2941 "00010110" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2942 "10000100" // JNZ r17, #3104 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3104 delay_slots=5 */ + 2943 "00000001" // /* MW 5 */ + 2944 "01000000" // /* MW 4 */ + 2945 "00010000" // /* MW 3 */ + 2946 "00000110" // /* MW 2 */ + 2947 "10001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2949 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2951 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2953 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2955 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2957 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2958 "10011000" // NE r2, r29, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2959 "00101000" // /* MW 3 */ + 2960 "01000100" // /* MW 2 */ + 2961 "00010111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2962 "10000100" // JNZ r2, #3552 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3552 delay_slots=5 */ + 2963 "00000001" // /* MW 5 */ + 2964 "01000000" // /* MW 4 */ + 2965 "11110000" // /* MW 3 */ + 2966 "00000110" // /* MW 2 */ + 2967 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2969 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2971 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2973 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2975 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2977 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 286 44 first +.src_ref 2 "reduce_base_c8.h" 289 38 +.src_ref 2 "reduce_base_c8.h" 291 40 +.src_ref 2 "reduce_base_c8.h" 291 40 + 2978 "10111010" // ST.s16 r21, [p4], #2; MOVX r2, #16; MOV m0, #-20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2979 "01011000" // /* MW 9 */ + 2980 "11101100" // /* MW 8 */ + 2981 "00000111" // /* MW 7 */ + 2982 "00001000" // /* MW 6 */ + 2983 "00100010" // /* MW 5 */ + 2984 "00000000" // /* MW 4 */ + 2985 "11100000" // /* MW 3 */ + 2986 "11010110" // /* MW 2 */ + 2987 "10000011" // /* MW 1 */ + 2988 "11111000" // MOV r30, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2989 "10100000" // /* MW 3 */ + 2990 "10011100" // /* MW 2 */ + 2991 "00011111" // /* MW 1 */ + 2992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2993 "00000000" // /* MW 1 */ + 2994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2995 "00000000" // /* MW 1 */ + 2996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2997 "00000000" // /* MW 1 */ + 2998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2999 "00000000" // /* MW 1 */ + 3000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3001 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 287 38 first + 3002 "00011000" // ST.s16 r7, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3003 "11110111" // /* MW 3 */ + 3004 "00011100" // /* MW 2 */ + 3005 "00000100" // /* MW 1 */ + 3006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3007 "00000000" // /* MW 1 */ + 3008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3009 "00000000" // /* MW 1 */ + 3010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3011 "00000000" // /* MW 1 */ + 3012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3013 "00000000" // /* MW 1 */ + 3014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3015 "00000000" // /* MW 1 */ + 3016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3017 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 288 39 first + 3018 "00011000" // ST.s16 r23, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3019 "11110111" // /* MW 3 */ + 3020 "00011110" // /* MW 2 */ + 3021 "00000100" // /* MW 1 */ + 3022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3023 "00000000" // /* MW 1 */ + 3024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3025 "00000000" // /* MW 1 */ + 3026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3027 "00000000" // /* MW 1 */ + 3028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3029 "00000000" // /* MW 1 */ + 3030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3031 "00000000" // /* MW 1 */ + 3032 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3033 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 289 38 first + 3034 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3035 "01010111" // /* MW 3 */ + 3036 "00011100" // /* MW 2 */ + 3037 "00000100" // /* MW 1 */ + 3038 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3039 "00000000" // /* MW 1 */ + 3040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3041 "00000000" // /* MW 1 */ + 3042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3043 "00000000" // /* MW 1 */ + 3044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3045 "00000000" // /* MW 1 */ + 3046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3047 "00000000" // /* MW 1 */ + 3048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3049 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 290 39 first + 3050 "00011000" // ST.s16 r1, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3051 "00110111" // /* MW 3 */ + 3052 "00011100" // /* MW 2 */ + 3053 "00000100" // /* MW 1 */ + 3054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3055 "00000000" // /* MW 1 */ + 3056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3057 "00000000" // /* MW 1 */ + 3058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3059 "00000000" // /* MW 1 */ + 3060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3061 "00000000" // /* MW 1 */ + 3062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3063 "00000000" // /* MW 1 */ + 3064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3065 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 291 40 first + 3066 "00011000" // ST.s16 r2, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3067 "01010111" // /* MW 3 */ + 3068 "00001000" // /* MW 2 */ + 3069 "00000100" // /* MW 1 */ + 3070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3071 "00000000" // /* MW 1 */ + 3072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3073 "00000000" // /* MW 1 */ + 3074 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */ + 3075 "00000000" // /* MW 5 */ + 3076 "00000000" // /* MW 4 */ + 3077 "11101000" // /* MW 3 */ + 3078 "00000110" // /* MW 2 */ + 3079 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3080 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3081 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3083 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3085 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 292 38 first +.delay_slot + 3086 "10011000" // ST r18, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3087 "01010001" // /* MW 3 */ + 3088 "00000110" // /* MW 2 */ + 3089 "00001100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 293 38 first +.delay_slot + 3090 "00101110" // NOPA; ST r6, [p4, #4]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 3091 "00011100" // /* MW 13 */ + 3092 "00000000" // /* MW 12 */ + 3093 "00000000" // /* MW 11 */ + 3094 "01010111" // /* MW 10 */ + 3095 "00011010" // /* MW 9 */ + 3096 "01000000" // /* MW 8 */ + 3097 "00000000" // /* MW 7 */ + 3098 "00000000" // /* MW 6 */ + 3099 "10100011" // /* MW 5 */ + 3100 "00101001" // /* MW 4 */ + 3101 "11111000" // /* MW 3 */ + 3102 "00101100" // /* MW 2 */ + 3103 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_528 +.src_ref 2 "reduce_base_c8.h" 274 44 first +.src_ref 2 "reduce_base_c8.h" 275 40 +.src_ref 2 "reduce_base_c8.h" 275 40 + 3104 "10111010" // ST.s16 r4, [p4], #2; MOVX r6, #-3; MOV r2, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3105 "01011000" // /* MW 9 */ + 3106 "00010000" // /* MW 8 */ + 3107 "01001000" // /* MW 7 */ + 3108 "10101000" // /* MW 6 */ + 3109 "01100111" // /* MW 5 */ + 3110 "00111110" // /* MW 4 */ + 3111 "11100000" // /* MW 3 */ + 3112 "10010010" // /* MW 2 */ + 3113 "10000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 275 40 first +.src_ref 2 "reduce_base_c8.h" 279 40 + 3114 "10111010" // MOVA m0, #-20; MAC r2, r2, r6, r4; MOV r30, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3115 "01111000" // /* MW 9 */ + 3116 "00001110" // /* MW 8 */ + 3117 "11010000" // /* MW 7 */ + 3118 "00110011" // /* MW 6 */ + 3119 "00100010" // /* MW 5 */ + 3120 "00001100" // /* MW 4 */ + 3121 "10000000" // /* MW 3 */ + 3122 "10000000" // /* MW 2 */ + 3123 "11111101" // /* MW 1 */ + 3124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3125 "00000000" // /* MW 1 */ + 3126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3127 "00000000" // /* MW 1 */ + 3128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3129 "00000000" // /* MW 1 */ + 3130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3131 "00000000" // /* MW 1 */ + 3132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3133 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 275 38 + 3134 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3135 "01010111" // /* MW 3 */ + 3136 "00011100" // /* MW 2 */ + 3137 "00000100" // /* MW 1 */ + 3138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3139 "00000000" // /* MW 1 */ + 3140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3141 "00000000" // /* MW 1 */ + 3142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3143 "00000000" // /* MW 1 */ + 3144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3145 "00000000" // /* MW 1 */ + 3146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3147 "00000000" // /* MW 1 */ + 3148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3149 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 276 39 first + 3150 "00011000" // ST.s16 r23, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3151 "11110111" // /* MW 3 */ + 3152 "00011110" // /* MW 2 */ + 3153 "00000100" // /* MW 1 */ + 3154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3155 "00000000" // /* MW 1 */ + 3156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3157 "00000000" // /* MW 1 */ + 3158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3159 "00000000" // /* MW 1 */ + 3160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3161 "00000000" // /* MW 1 */ + 3162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3163 "00000000" // /* MW 1 */ + 3164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3165 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 275 38 first +.src_ref 2 "reduce_base_c8.h" 277 38 first + 3166 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3167 "01010111" // /* MW 3 */ + 3168 "00011100" // /* MW 2 */ + 3169 "00000100" // /* MW 1 */ + 3170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3171 "00000000" // /* MW 1 */ + 3172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3173 "00000000" // /* MW 1 */ + 3174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3175 "00000000" // /* MW 1 */ + 3176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3177 "00000000" // /* MW 1 */ + 3178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3179 "00000000" // /* MW 1 */ + 3180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3181 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 278 39 first + 3182 "00011000" // ST.s16 r5, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3183 "10110111" // /* MW 3 */ + 3184 "00011100" // /* MW 2 */ + 3185 "00000100" // /* MW 1 */ + 3186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3187 "00000000" // /* MW 1 */ + 3188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3189 "00000000" // /* MW 1 */ + 3190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3191 "00000000" // /* MW 1 */ + 3192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3193 "00000000" // /* MW 1 */ + 3194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3195 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3197 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 279 40 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3198 "00011000" // ST.s16 r1, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3199 "00110111" // /* MW 3 */ + 3200 "00001000" // /* MW 2 */ + 3201 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3203 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3205 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3206 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */ + 3207 "00000000" // /* MW 5 */ + 3208 "00000000" // /* MW 4 */ + 3209 "11101000" // /* MW 3 */ + 3210 "00000110" // /* MW 2 */ + 3211 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 279 40 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3212 "00011000" // MOVX r1, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3213 "01000001" // /* MW 3 */ + 3214 "00000010" // /* MW 2 */ + 3215 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3216 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3217 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3219 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 281 38 first +.delay_slot + 3220 "10011000" // ST r3, [p4, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3221 "01110001" // /* MW 3 */ + 3222 "00010100" // /* MW 2 */ + 3223 "00001100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 280 38 first +.delay_slot + 3224 "00000010" // ST r16, [p4]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3225 "01110000" // /* MW 7 */ + 3226 "10100101" // /* MW 6 */ + 3227 "00000001" // /* MW 5 */ + 3228 "00000000" // /* MW 4 */ + 3229 "00110000" // /* MW 3 */ + 3230 "11000010" // /* MW 2 */ + 3231 "10000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_656 +.src_ref 2 "reduce_base_c8.h" 236 8 +.src_ref 2 "reduce_base_c8.h" 301 40 +.src_ref 2 "reduce_base_c8.h" 302 76 + 3232 "00101100" // LDA r3, [sp, #-4]; MOVX r4, #6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3233 "00110010" // /* MW 5 */ + 3234 "00010000" // /* MW 4 */ + 3235 "00100000" // /* MW 3 */ + 3236 "10001110" // /* MW 2 */ + 3237 "11111111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 first + 3238 "10011000" // EQ r4, r2, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3239 "01000111" // /* MW 3 */ + 3240 "10001000" // /* MW 2 */ + 3241 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 3242 "10000100" // JNZ r4, #3408 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3408 delay_slots=5 */ + 3243 "00000001" // /* MW 5 */ + 3244 "01000000" // /* MW 4 */ + 3245 "10101000" // /* MW 3 */ + 3246 "00000110" // /* MW 2 */ + 3247 "00100000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 298 44 +.src_ref 2 "reduce_base_c8.h" 303 40 +.src_ref 2 "reduce_base_c8.h" 310 44 +.src_ref 2 "reduce_base_c8.h" 311 38 +.delay_slot + 3248 "00011000" // MOVX r1, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3249 "01000001" // /* MW 3 */ + 3250 "00000010" // /* MW 2 */ + 3251 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3253 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3255 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3256 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3257 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3259 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 3260 "00011000" // MOVX r3, #7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3261 "00011101" // /* MW 3 */ + 3262 "00000110" // /* MW 2 */ + 3263 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 3264 "10011000" // NE r2, r3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3265 "00101000" // /* MW 3 */ + 3266 "11000100" // /* MW 2 */ + 3267 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 3268 "10000100" // JNZ r2, #3552 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3552 delay_slots=5 */ + 3269 "00000001" // /* MW 5 */ + 3270 "01000000" // /* MW 4 */ + 3271 "11110000" // /* MW 3 */ + 3272 "00000110" // /* MW 2 */ + 3273 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3275 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3277 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3279 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3281 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3283 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 310 44 first +.src_ref 2 "reduce_base_c8.h" 312 41 first +.src_ref 2 "reduce_base_c8.h" 315 40 + 3284 "10111010" // ST.s16 r1, [p4], #2; ADD r2, r19, #-1; MOV m0, #-20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3285 "01011000" // /* MW 9 */ + 3286 "11101100" // /* MW 8 */ + 3287 "00000111" // /* MW 7 */ + 3288 "11111000" // /* MW 6 */ + 3289 "00101111" // /* MW 5 */ + 3290 "00100110" // /* MW 4 */ + 3291 "11100000" // /* MW 3 */ + 3292 "10000110" // /* MW 2 */ + 3293 "10000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 313 38 +.src_ref 2 "reduce_base_c8.h" 317 97 + 3294 "10111010" // MOVA r3, #-6; MOVXM dj0, #65536 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3295 "00010000" // /* MW 9 */ + 3296 "00000000" // /* MW 8 */ + 3297 "01000000" // /* MW 7 */ + 3298 "01000000" // /* MW 6 */ + 3299 "00000000" // /* MW 5 */ + 3300 "00000000" // /* MW 4 */ + 3301 "00000000" // /* MW 3 */ + 3302 "01000011" // /* MW 2 */ + 3303 "11111111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 315 40 +.src_ref 2 "reduce_base_c8.h" 317 97 first + 3304 "01100100" // LSHL r3, r28, r3; MOV r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3305 "00000001" // /* MW 5 */ + 3306 "00100000" // /* MW 4 */ + 3307 "10111100" // /* MW 3 */ + 3308 "11000111" // /* MW 2 */ + 3309 "11100000" // /* MW 1 */ + 3310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3311 "00000000" // /* MW 1 */ + 3312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3313 "00000000" // /* MW 1 */ + 3314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3315 "00000000" // /* MW 1 */ + 3316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3317 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 311 38 first + 3318 "00011000" // ST.s16 r1, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3319 "00110111" // /* MW 3 */ + 3320 "00011100" // /* MW 2 */ + 3321 "00000100" // /* MW 1 */ + 3322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3323 "00000000" // /* MW 1 */ + 3324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3325 "00000000" // /* MW 1 */ + 3326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3327 "00000000" // /* MW 1 */ + 3328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3329 "00000000" // /* MW 1 */ + 3330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3331 "00000000" // /* MW 1 */ + 3332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3333 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 312 39 first + 3334 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3335 "01010111" // /* MW 3 */ + 3336 "00011100" // /* MW 2 */ + 3337 "00000100" // /* MW 1 */ + 3338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3339 "00000000" // /* MW 1 */ + 3340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3341 "00000000" // /* MW 1 */ + 3342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3343 "00000000" // /* MW 1 */ + 3344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3345 "00000000" // /* MW 1 */ + 3346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3347 "00000000" // /* MW 1 */ + 3348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3349 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 313 38 first + 3350 "10011000" // ST dj0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3351 "01000001" // /* MW 3 */ + 3352 "00011100" // /* MW 2 */ + 3353 "00001100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 315 40 first + 3354 "00011000" // ST.s16 r24, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3355 "00010111" // /* MW 3 */ + 3356 "00001011" // /* MW 2 */ + 3357 "00000100" // /* MW 1 */ + 3358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3359 "00000000" // /* MW 1 */ + 3360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3361 "00000000" // /* MW 1 */ + 3362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3363 "00000000" // /* MW 1 */ + 3364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3365 "00000000" // /* MW 1 */ + 3366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3367 "00000000" // /* MW 1 */ + 3368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3369 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 316 38 first + 3370 "10011000" // ST r26, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3371 "01010001" // /* MW 3 */ + 3372 "00000111" // /* MW 2 */ + 3373 "00001100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 317 38 first + 3374 "10011000" // ST r3, [p4, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3375 "01110001" // /* MW 3 */ + 3376 "00010100" // /* MW 2 */ + 3377 "00001100" // /* MW 1 */ + 3378 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */ + 3379 "00000000" // /* MW 5 */ + 3380 "00000000" // /* MW 4 */ + 3381 "11101000" // /* MW 3 */ + 3382 "00000110" // /* MW 2 */ + 3383 "00000000" // /* MW 1 */ +.delay_slot + 3384 "11111000" // MOV r30, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3385 "10100000" // /* MW 3 */ + 3386 "10011111" // /* MW 2 */ + 3387 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3389 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3391 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3393 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3394 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 3395 "00011100" // /* MW 13 */ + 3396 "00000000" // /* MW 12 */ + 3397 "00000000" // /* MW 11 */ + 3398 "01010111" // /* MW 10 */ + 3399 "00011010" // /* MW 9 */ + 3400 "01000000" // /* MW 8 */ + 3401 "00000000" // /* MW 7 */ + 3402 "00000000" // /* MW 6 */ + 3403 "10110110" // /* MW 5 */ + 3404 "00000010" // /* MW 4 */ + 3405 "11110000" // /* MW 3 */ + 3406 "00101100" // /* MW 2 */ + 3407 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_832 +.src_ref 2 "reduce_base_c8.h" 298 44 first +.src_ref 2 "reduce_base_c8.h" 301 40 +.src_ref 2 "reduce_base_c8.h" 301 40 first + 3408 "10111010" // ST.s16 r1, [p4], #2; MSC r2, r2, r3, r22; MOV r2, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3409 "01011000" // /* MW 9 */ + 3410 "00010000" // /* MW 8 */ + 3411 "01001000" // /* MW 7 */ + 3412 "01110000" // /* MW 6 */ + 3413 "00101011" // /* MW 5 */ + 3414 "00000110" // /* MW 4 */ + 3415 "11100000" // /* MW 3 */ + 3416 "10000110" // /* MW 2 */ + 3417 "10000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 302 76 +.src_ref 2 "reduce_base_c8.h" 303 40 +.src_ref 2 "reduce_base_c8.h" 306 62 + 3418 "10111010" // MOVA m0, #-20; MOVX r4, #-3; MOV r6, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3419 "01111000" // /* MW 9 */ + 3420 "00001110" // /* MW 8 */ + 3421 "11010000" // /* MW 7 */ + 3422 "10101000" // /* MW 6 */ + 3423 "01000111" // /* MW 5 */ + 3424 "00111110" // /* MW 4 */ + 3425 "10000000" // /* MW 3 */ + 3426 "10000000" // /* MW 2 */ + 3427 "11111101" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 302 76 first + 3428 "10011000" // LSHL r4, r3, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3429 "01001101" // /* MW 3 */ + 3430 "11001000" // /* MW 2 */ + 3431 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 302 41 +.src_ref 2 "reduce_base_c8.h" 306 62 first + 3432 "00100100" // MUL r30, r30, r6; ADD.NC r3, r4, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3433 "11111111" // /* MW 5 */ + 3434 "10100100" // /* MW 4 */ + 3435 "11110001" // /* MW 3 */ + 3436 "10001101" // /* MW 2 */ + 3437 "11110111" // /* MW 1 */ + 3438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3439 "00000000" // /* MW 1 */ + 3440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3441 "00000000" // /* MW 1 */ + 3442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3443 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 299 38 first + 3444 "00011000" // ST.s16 r27, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3445 "01110111" // /* MW 3 */ + 3446 "00011111" // /* MW 2 */ + 3447 "00000100" // /* MW 1 */ + 3448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3449 "00000000" // /* MW 1 */ + 3450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3451 "00000000" // /* MW 1 */ + 3452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3453 "00000000" // /* MW 1 */ + 3454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3455 "00000000" // /* MW 1 */ + 3456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3457 "00000000" // /* MW 1 */ + 3458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3459 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 300 39 first + 3460 "00011000" // ST.s16 r5, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3461 "10110111" // /* MW 3 */ + 3462 "00011100" // /* MW 2 */ + 3463 "00000100" // /* MW 1 */ + 3464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3465 "00000000" // /* MW 1 */ + 3466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3467 "00000000" // /* MW 1 */ + 3468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3469 "00000000" // /* MW 1 */ + 3470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3471 "00000000" // /* MW 1 */ + 3472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3473 "00000000" // /* MW 1 */ + 3474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3475 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 301 38 first + 3476 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3477 "01010111" // /* MW 3 */ + 3478 "00011100" // /* MW 2 */ + 3479 "00000100" // /* MW 1 */ + 3480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3481 "00000000" // /* MW 1 */ + 3482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3483 "00000000" // /* MW 1 */ + 3484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3485 "00000000" // /* MW 1 */ + 3486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3487 "00000000" // /* MW 1 */ + 3488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3489 "00000000" // /* MW 1 */ + 3490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3491 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 302 39 first + 3492 "00011000" // ST.s16 r3, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3493 "01110111" // /* MW 3 */ + 3494 "00011100" // /* MW 2 */ + 3495 "00000100" // /* MW 1 */ + 3496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3497 "00000000" // /* MW 1 */ + 3498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3499 "00000000" // /* MW 1 */ + 3500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3501 "00000000" // /* MW 1 */ + 3502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3503 "00000000" // /* MW 1 */ + 3504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3505 "00000000" // /* MW 1 */ + 3506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3507 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 303 40 first + 3508 "00011000" // ST.s16 r1, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3509 "00110111" // /* MW 3 */ + 3510 "00001000" // /* MW 2 */ + 3511 "00000100" // /* MW 1 */ + 3512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3513 "00000000" // /* MW 1 */ + 3514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3515 "00000000" // /* MW 1 */ + 3516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3517 "00000000" // /* MW 1 */ + 3518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3519 "00000000" // /* MW 1 */ + 3520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3521 "00000000" // /* MW 1 */ + 3522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3523 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 304 38 first + 3524 "10011000" // ST r17, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3525 "00110001" // /* MW 3 */ + 3526 "00000110" // /* MW 2 */ + 3527 "00001100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 305 38 first + 3528 "00000010" // ST r20, [p4, #4]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3529 "01110000" // /* MW 7 */ + 3530 "10100101" // /* MW 6 */ + 3531 "00000001" // /* MW 5 */ + 3532 "00000000" // /* MW 4 */ + 3533 "00110000" // /* MW 3 */ + 3534 "11010010" // /* MW 2 */ + 3535 "10000010" // /* MW 1 */ +.label __ll42__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv + 3536 "10111000" // MOV dj0, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3537 "01000000" // /* MW 3 */ + 3538 "10000000" // /* MW 2 */ + 3539 "00011000" // /* MW 1 */ + 3540 "00110110" // ST.s16 r30, [p3, dj0]; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3541 "10000001" // /* MW 11 */ + 3542 "10101101" // /* MW 10 */ + 3543 "00000000" // /* MW 9 */ + 3544 "00000000" // /* MW 8 */ + 3545 "00000000" // /* MW 7 */ + 3546 "00000000" // /* MW 6 */ + 3547 "00100000" // /* MW 5 */ + 3548 "00000000" // /* MW 4 */ + 3549 "11100000" // /* MW 3 */ + 3550 "01111010" // /* MW 2 */ + 3551 "01100000" // /* MW 1 */ +.label __ll70__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv +.src_ref 2 "reduce_base_c8.h" 326 79 first +.src_ref 2 "reduce_base_c8.h" 329 51 + 3552 "00010100" // MOVA m2, #24; ADD.NC p0, r0, #30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3553 "00011110" // /* MW 5 */ + 3554 "11000000" // /* MW 4 */ + 3555 "10000000" // /* MW 3 */ + 3556 "00001000" // /* MW 2 */ + 3557 "00000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 329 30 +.src_ref 2 "reduce_base_c8.h" 330 26 +.src_ref 3 "reduce_mean_c8_impl.h" 139 51 first + 3558 "10111010" // LDA r2, [p2], #4; MOVX r0, #16; MOV m0, #-30 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3559 "01011000" // /* MW 9 */ + 3560 "11100010" // /* MW 8 */ + 3561 "00000111" // /* MW 7 */ + 3562 "00001000" // /* MW 6 */ + 3563 "00000010" // /* MW 5 */ + 3564 "00000000" // /* MW 4 */ + 3565 "11010000" // /* MW 3 */ + 3566 "10001010" // /* MW 2 */ + 3567 "01000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 331 24 +.src_ref 3 "reduce_mean_c8_impl.h" 140 34 first + 3568 "01010100" // LDA.s16 r3, [p2]; MOV m1, #38 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3569 "10011001" // /* MW 5 */ + 3570 "00000000" // /* MW 4 */ + 3571 "01010010" // /* MW 3 */ + 3572 "10001110" // /* MW 2 */ + 3573 "01000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 141 49 first + 3574 "10011000" // LDA r1, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3575 "00110110" // /* MW 3 */ + 3576 "00010100" // /* MW 2 */ + 3577 "00000010" // /* MW 1 */ + 3578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3579 "00000000" // /* MW 1 */ + 3580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3581 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 326 28 first + 3582 "00011000" // ST.s16 r31, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3583 "11110111" // /* MW 3 */ + 3584 "00101111" // /* MW 2 */ + 3585 "00000000" // /* MW 1 */ + 3586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3587 "00000000" // /* MW 1 */ + 3588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3589 "00000000" // /* MW 1 */ + 3590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3591 "00000000" // /* MW 1 */ + 3592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3593 "00000000" // /* MW 1 */ + 3594 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3595 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 3596 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3597 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 327 31 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 3598 "00011000" // ST.s16 r24, [p0], #10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3599 "00010111" // /* MW 3 */ + 3600 "01011111" // /* MW 2 */ + 3601 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3603 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3605 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3607 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3608 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3609 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 327 31 +.src_ref 2 "reduce_base_c8.h" 328 23 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3610 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3611 "00000001" // /* MW 3 */ + 3612 "00110000" // /* MW 2 */ + 3613 "00010000" // /* MW 1 */ + 3614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3615 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 328 23 first + 3616 "00011000" // ST.s16 r24, [p0], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3617 "00010111" // /* MW 3 */ + 3618 "11001111" // /* MW 2 */ + 3619 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 329 51 first + 3620 "10011000" // LDA.u16 r4, [p0], m2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3621 "10011010" // /* MW 3 */ + 3622 "01001000" // /* MW 2 */ + 3623 "00000000" // /* MW 1 */ + 3624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3625 "00000000" // /* MW 1 */ + 3626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3627 "00000000" // /* MW 1 */ + 3628 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3629 "00000000" // /* MW 1 */ + 3630 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3631 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 3632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3633 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 329 28 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 3634 "00011000" // ST.s16 r0, [p0], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3635 "00010111" // /* MW 3 */ + 3636 "11111100" // /* MW 2 */ + 3637 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 329 30 +.src_ref 2 "reduce_base_c8.h" 330 28 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3638 "00100100" // LSHL r4, r4, r26; ADD.NC r5, r4, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3639 "11111111" // /* MW 5 */ + 3640 "10100100" // /* MW 4 */ + 3641 "10110010" // /* MW 3 */ + 3642 "00110101" // /* MW 2 */ + 3643 "00100001" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 329 30 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3644 "10011000" // SUB r0, r0, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3645 "01000001" // /* MW 3 */ + 3646 "00000000" // /* MW 2 */ + 3647 "00010000" // /* MW 1 */ + 3648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3649 "00000000" // /* MW 1 */ + 3650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3651 "00000000" // /* MW 1 */ + 3652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3653 "00000000" // /* MW 1 */ + 3654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3655 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 330 26 first + 3656 "00011000" // ST.s16 r5, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3657 "10110111" // /* MW 3 */ + 3658 "00001000" // /* MW 2 */ + 3659 "00000000" // /* MW 1 */ + 3660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3661 "00000000" // /* MW 1 */ + 3662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3663 "00000000" // /* MW 1 */ + 3664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3665 "00000000" // /* MW 1 */ + 3666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3667 "00000000" // /* MW 1 */ + 3668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3669 "00000000" // /* MW 1 */ + 3670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3671 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 331 24 first + 3672 "00011000" // ST.s16 r19, [p0], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3673 "01110111" // /* MW 3 */ + 3674 "00101010" // /* MW 2 */ + 3675 "00000000" // /* MW 1 */ + 3676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3677 "00000000" // /* MW 1 */ + 3678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3679 "00000000" // /* MW 1 */ + 3680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3681 "00000000" // /* MW 1 */ + 3682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3683 "00000000" // /* MW 1 */ + 3684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3685 "00000000" // /* MW 1 */ + 3686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3687 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 139 40 first + 3688 "00011000" // ST.s8 r2, [p0], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3689 "01000111" // /* MW 3 */ + 3690 "11101100" // /* MW 2 */ + 3691 "00000000" // /* MW 1 */ + 3692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3693 "00000000" // /* MW 1 */ + 3694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3695 "00000000" // /* MW 1 */ + 3696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3697 "00000000" // /* MW 1 */ + 3698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3699 "00000000" // /* MW 1 */ + 3700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3701 "00000000" // /* MW 1 */ + 3702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3703 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 140 34 first + 3704 "00011000" // ST.s16 r3, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3705 "01110111" // /* MW 3 */ + 3706 "00000100" // /* MW 2 */ + 3707 "00000000" // /* MW 1 */ + 3708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3709 "00000000" // /* MW 1 */ + 3710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3711 "00000000" // /* MW 1 */ + 3712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3713 "00000000" // /* MW 1 */ + 3714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3715 "00000000" // /* MW 1 */ + 3716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3717 "00000000" // /* MW 1 */ + 3718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3719 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 141 38 first + 3720 "00011000" // ST.s8 r1, [p0, #-2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3721 "00100111" // /* MW 3 */ + 3722 "11100100" // /* MW 2 */ + 3723 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 349 4 first + 3724 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3725 "00000000" // /* MW 3 */ + 3726 "00101000" // /* MW 2 */ + 3727 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 349 4 +.delay_slot + 3728 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3729 "00000001" // /* MW 5 */ + 3730 "00000000" // /* MW 4 */ + 3731 "00000000" // /* MW 3 */ + 3732 "11111000" // /* MW 2 */ + 3733 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3735 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3736 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3737 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3738 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3739 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3740 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3741 "01100111" // /* MW 3 */ + 3742 "00000001" // /* MW 2 */ + 3743 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_1168 +.src_ref 2 "reduce_base_c8.h" 262 44 first +.src_ref 2 "reduce_base_c8.h" 263 77 + 3744 "10111010" // ST.s16 r21, [p4], #2; MOVXM r5, #65512 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3745 "00010000" // /* MW 9 */ + 3746 "11110100" // /* MW 8 */ + 3747 "10101111" // /* MW 7 */ + 3748 "00111100" // /* MW 6 */ + 3749 "00000000" // /* MW 5 */ + 3750 "00000000" // /* MW 4 */ + 3751 "11100000" // /* MW 3 */ + 3752 "11010110" // /* MW 2 */ + 3753 "10000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 263 56 +.src_ref 2 "reduce_base_c8.h" 263 77 first +.src_ref 2 "reduce_base_c8.h" 267 40 + 3754 "10111010" // LDA r2, [sp, #-4]; ADD r7, r5, r26; MOV m0, #-20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3755 "01011000" // /* MW 9 */ + 3756 "11101100" // /* MW 8 */ + 3757 "00000111" // /* MW 7 */ + 3758 "00000100" // /* MW 6 */ + 3759 "01111101" // /* MW 5 */ + 3760 "00001010" // /* MW 4 */ + 3761 "00100000" // /* MW 3 */ + 3762 "10001010" // /* MW 2 */ + 3763 "11111111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 265 118 +.src_ref 2 "reduce_base_c8.h" 329 30 + 3764 "10111010" // MOVA r26, #4; MOVXM r6, #65535 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3765 "10010000" // /* MW 9 */ + 3766 "11111111" // /* MW 8 */ + 3767 "11001111" // /* MW 7 */ + 3768 "00111100" // /* MW 6 */ + 3769 "00000000" // /* MW 5 */ + 3770 "00000000" // /* MW 4 */ + 3771 "00000000" // /* MW 3 */ + 3772 "10011010" // /* MW 2 */ + 3773 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 265 118 first + 3774 "10011000" // ADD r17, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3775 "01100000" // /* MW 3 */ + 3776 "11100010" // /* MW 2 */ + 3777 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 265 98 +.src_ref 2 "reduce_base_c8.h" 267 116 first + 3778 "00011000" // MAC r29, r29, r17, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3779 "01000110" // /* MW 3 */ + 3780 "01111010" // /* MW 2 */ + 3781 "00010100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 265 60 first +.src_ref 2 "reduce_base_c8.h" 265 98 first + 3782 "00011000" // MSC r21, r21, r17, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3783 "01001110" // /* MW 3 */ + 3784 "01101010" // /* MW 2 */ + 3785 "00010100" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 3786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3787 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 263 38 first +.aggressive_scheduled_block_id 4 +.noswbrkpt + 3788 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3789 "01010111" // /* MW 3 */ + 3790 "00011100" // /* MW 2 */ + 3791 "00000100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 263 56 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3792 "10011000" // MUL r2, r7, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3793 "00101111" // /* MW 3 */ + 3794 "11000100" // /* MW 2 */ + 3795 "00010001" // /* MW 1 */ + 3796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3797 "00000000" // /* MW 1 */ + 3798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3799 "00000000" // /* MW 1 */ + 3800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3801 "00000000" // /* MW 1 */ + 3802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3803 "00000000" // /* MW 1 */ + 3804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3805 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 264 39 first + 3806 "00011000" // ST.s16 r22, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3807 "11010111" // /* MW 3 */ + 3808 "00011110" // /* MW 2 */ + 3809 "00000100" // /* MW 1 */ + 3810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3811 "00000000" // /* MW 1 */ + 3812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3813 "00000000" // /* MW 1 */ + 3814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3815 "00000000" // /* MW 1 */ + 3816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3817 "00000000" // /* MW 1 */ + 3818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3819 "00000000" // /* MW 1 */ + 3820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3821 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 265 38 first + 3822 "00011000" // ST.s16 r21, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3823 "10110111" // /* MW 3 */ + 3824 "00011110" // /* MW 2 */ + 3825 "00000100" // /* MW 1 */ + 3826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3827 "00000000" // /* MW 1 */ + 3828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3829 "00000000" // /* MW 1 */ + 3830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3831 "00000000" // /* MW 1 */ + 3832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3833 "00000000" // /* MW 1 */ + 3834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3835 "00000000" // /* MW 1 */ + 3836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3837 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 266 39 first + 3838 "00011000" // ST.s16 r1, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3839 "00110111" // /* MW 3 */ + 3840 "00011100" // /* MW 2 */ + 3841 "00000100" // /* MW 1 */ + 3842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3843 "00000000" // /* MW 1 */ + 3844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3845 "00000000" // /* MW 1 */ + 3846 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3847 "00000000" // /* MW 1 */ + 3848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3849 "00000000" // /* MW 1 */ + 3850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3851 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 3852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3853 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 267 40 first +.aggressive_scheduled_block_id 5 +.noswbrkpt + 3854 "00011000" // ST.s16 r2, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3855 "01010111" // /* MW 3 */ + 3856 "00001000" // /* MW 2 */ + 3857 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3859 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3861 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3862 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */ + 3863 "00000000" // /* MW 5 */ + 3864 "00000000" // /* MW 4 */ + 3865 "11101000" // /* MW 3 */ + 3866 "00000110" // /* MW 2 */ + 3867 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 267 42 +.delay_slot +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3868 "00011000" // MOVX r5, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3869 "01000001" // /* MW 3 */ + 3870 "00001010" // /* MW 2 */ + 3871 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 267 42 +.delay_slot +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3872 "10011000" // SUB r2, r5, r29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3873 "11010001" // /* MW 3 */ + 3874 "01000101" // /* MW 2 */ + 3875 "00010001" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 270 64 +.delay_slot + 3876 "11111000" // MOV r6, eh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3877 "00011100" // /* MW 3 */ + 3878 "10100001" // /* MW 2 */ + 3879 "00011001" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 268 38 first +.delay_slot + 3880 "00000010" // ST r3, [p4]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3881 "01110000" // /* MW 7 */ + 3882 "10100101" // /* MW 6 */ + 3883 "00000001" // /* MW 5 */ + 3884 "00000000" // /* MW 4 */ + 3885 "00110000" // /* MW 3 */ + 3886 "10001110" // /* MW 2 */ + 3887 "10000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 269 38 first +.src_ref 2 "reduce_base_c8.h" 270 64 first +.delay_slot + 3888 "11100001" // NOPA; NOPB; ST r16, [p4, #4]; MUL r30, r30, r6; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3889 "00000000" // /* MW 15 */ + 3890 "00000000" // /* MW 14 */ + 3891 "01111000" // /* MW 13 */ + 3892 "10100101" // /* MW 12 */ + 3893 "00000001" // /* MW 11 */ + 3894 "01111100" // /* MW 10 */ + 3895 "11100011" // /* MW 9 */ + 3896 "10111101" // /* MW 8 */ + 3897 "00010001" // /* MW 7 */ + 3898 "00010110" // /* MW 6 */ + 3899 "00100100" // /* MW 5 */ + 3900 "00000000" // /* MW 4 */ + 3901 "11110000" // /* MW 3 */ + 3902 "00101100" // /* MW 2 */ + 3903 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_1328 +.src_ref 2 "reduce_base_c8.h" 250 44 +.src_ref 2 "reduce_base_c8.h" 250 44 first +.src_ref 2 "reduce_base_c8.h" 255 40 + 3904 "10111010" // ST.s16 r4, [p4], #2; MOVX r4, #16; MOV m0, #-20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3905 "01011000" // /* MW 9 */ + 3906 "11101100" // /* MW 8 */ + 3907 "00000111" // /* MW 7 */ + 3908 "00001000" // /* MW 6 */ + 3909 "01000010" // /* MW 5 */ + 3910 "00000000" // /* MW 4 */ + 3911 "11100000" // /* MW 3 */ + 3912 "10010010" // /* MW 2 */ + 3913 "10000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 255 42 +.src_ref 2 "reduce_base_c8.h" 255 113 +.src_ref 2 "reduce_base_c8.h" 255 113 +.src_ref 2 "reduce_base_c8.h" 255 113 first + 3914 "10111010" // LDA r1, [sp, #-4]; MSC r2, r2, r3, r26; MOV r2, #8 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3915 "01011000" // /* MW 9 */ + 3916 "00001000" // /* MW 8 */ + 3917 "01001000" // /* MW 7 */ + 3918 "01110000" // /* MW 6 */ + 3919 "00101101" // /* MW 5 */ + 3920 "00000110" // /* MW 4 */ + 3921 "00100000" // /* MW 3 */ + 3922 "10000110" // /* MW 2 */ + 3923 "11111111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 255 42 +.src_ref 2 "reduce_base_c8.h" 255 113 +.src_ref 2 "reduce_base_c8.h" 329 30 + 3924 "01100100" // MOVX r3, #16; MOV r26, #4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3925 "00010001" // /* MW 5 */ + 3926 "00100000" // /* MW 4 */ + 3927 "00101101" // /* MW 3 */ + 3928 "11001000" // /* MW 2 */ + 3929 "00000000" // /* MW 1 */ + 3930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3931 "00000000" // /* MW 1 */ + 3932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3933 "00000000" // /* MW 1 */ + 3934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3935 "00000000" // /* MW 1 */ + 3936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3937 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 251 38 first + 3938 "00011000" // ST.s16 r27, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3939 "01110111" // /* MW 3 */ + 3940 "00011111" // /* MW 2 */ + 3941 "00000100" // /* MW 1 */ + 3942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3943 "00000000" // /* MW 1 */ + 3944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3945 "00000000" // /* MW 1 */ + 3946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3947 "00000000" // /* MW 1 */ + 3948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3949 "00000000" // /* MW 1 */ + 3950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3951 "00000000" // /* MW 1 */ + 3952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3953 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 252 39 first + 3954 "00011000" // ST.s16 r5, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3955 "10110111" // /* MW 3 */ + 3956 "00011100" // /* MW 2 */ + 3957 "00000100" // /* MW 1 */ + 3958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3959 "00000000" // /* MW 1 */ + 3960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3961 "00000000" // /* MW 1 */ + 3962 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3963 "00000000" // /* MW 1 */ + 3964 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3965 "00000000" // /* MW 1 */ + 3966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3967 "00000000" // /* MW 1 */ + 3968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3969 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 253 38 first + 3970 "00011000" // ST.s16 r27, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3971 "01110111" // /* MW 3 */ + 3972 "00011111" // /* MW 2 */ + 3973 "00000100" // /* MW 1 */ + 3974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3975 "00000000" // /* MW 1 */ + 3976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3977 "00000000" // /* MW 1 */ + 3978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3979 "00000000" // /* MW 1 */ + 3980 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3981 "00000000" // /* MW 1 */ + 3982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3983 "00000000" // /* MW 1 */ + 3984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3985 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 254 39 first + 3986 "00011000" // ST.s16 r22, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3987 "11010111" // /* MW 3 */ + 3988 "00011110" // /* MW 2 */ + 3989 "00000100" // /* MW 1 */ + 3990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3991 "00000000" // /* MW 1 */ + 3992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3993 "00000000" // /* MW 1 */ + 3994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3995 "00000000" // /* MW 1 */ + 3996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3997 "00000000" // /* MW 1 */ + 3998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3999 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 4000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4001 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 255 40 first +.aggressive_scheduled_block_id 6 +.noswbrkpt + 4002 "00011000" // ST.s16 r3, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4003 "01110111" // /* MW 3 */ + 4004 "00001000" // /* MW 2 */ + 4005 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 4006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4007 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 4008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4009 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 4010 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */ + 4011 "00000000" // /* MW 5 */ + 4012 "00000000" // /* MW 4 */ + 4013 "11101000" // /* MW 3 */ + 4014 "00000110" // /* MW 2 */ + 4015 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 255 42 +.src_ref 2 "reduce_base_c8.h" 255 113 +.delay_slot +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4016 "00011000" // MAC r3, r3, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4017 "00100110" // /* MW 3 */ + 4018 "01000110" // /* MW 2 */ + 4019 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4021 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4023 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 256 38 first +.delay_slot + 4024 "10011000" // ST r6, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4025 "11010001" // /* MW 3 */ + 4026 "00000100" // /* MW 2 */ + 4027 "00001100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 257 38 first +.delay_slot + 4028 "10011000" // ST r18, [p4, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4029 "01010001" // /* MW 3 */ + 4030 "00010110" // /* MW 2 */ + 4031 "00001100" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_1456 +.src_ref 2 "reduce_base_c8.h" 238 44 first + 4032 "00011000" // ST.s16 r21, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4033 "10110111" // /* MW 3 */ + 4034 "00011110" // /* MW 2 */ + 4035 "00000100" // /* MW 1 */ + 4036 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4037 "00000000" // /* MW 1 */ + 4038 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4039 "00000000" // /* MW 1 */ + 4040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4041 "00000000" // /* MW 1 */ + 4042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4043 "00000000" // /* MW 1 */ + 4044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4045 "00000000" // /* MW 1 */ + 4046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4047 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 239 38 first + 4048 "00011000" // ST.s16 r7, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4049 "11110111" // /* MW 3 */ + 4050 "00011100" // /* MW 2 */ + 4051 "00000100" // /* MW 1 */ + 4052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4053 "00000000" // /* MW 1 */ + 4054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4055 "00000000" // /* MW 1 */ + 4056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4057 "00000000" // /* MW 1 */ + 4058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4059 "00000000" // /* MW 1 */ + 4060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4061 "00000000" // /* MW 1 */ + 4062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4063 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 240 39 first + 4064 "00011000" // ST.s16 r23, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4065 "11110111" // /* MW 3 */ + 4066 "00011110" // /* MW 2 */ + 4067 "00000100" // /* MW 1 */ + 4068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4069 "00000000" // /* MW 1 */ + 4070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4071 "00000000" // /* MW 1 */ + 4072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4073 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 241 40 +.src_ref 2 "reduce_base_c8.h" 241 94 + 4074 "00011000" // LDA r3, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4075 "01110001" // /* MW 3 */ + 4076 "11111100" // /* MW 2 */ + 4077 "00000111" // /* MW 1 */ + 4078 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4079 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 4080 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4081 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 241 38 first +.aggressive_scheduled_block_id 7 +.noswbrkpt + 4082 "00011000" // ST.s16 r1, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4083 "00110111" // /* MW 3 */ + 4084 "00011100" // /* MW 2 */ + 4085 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 4086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4087 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 241 94 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 4088 "01000100" // MOVXM r1, #65504 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4089 "11000000" // /* MW 5 */ + 4090 "10111111" // /* MW 4 */ + 4091 "11110000" // /* MW 3 */ + 4092 "00000000" // /* MW 2 */ + 4093 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 241 94 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 4094 "10011000" // ADD r2, r1, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4095 "10100000" // /* MW 3 */ + 4096 "01000101" // /* MW 2 */ + 4097 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 241 40 +.src_ref 2 "reduce_base_c8.h" 241 40 +.src_ref 2 "reduce_base_c8.h" 241 94 +.src_ref 2 "reduce_base_c8.h" 241 94 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4098 "01100100" // MAC r1, r1, r3, r2; MOV r1, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4099 "01000001" // /* MW 5 */ + 4100 "10100000" // /* MW 4 */ + 4101 "11000000" // /* MW 3 */ + 4102 "01000100" // /* MW 2 */ + 4103 "00011000" // /* MW 1 */ + 4104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4105 "00000000" // /* MW 1 */ + 4106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4107 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 242 39 first + 4108 "00011000" // ST.s16 r22, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4109 "11010111" // /* MW 3 */ + 4110 "00011110" // /* MW 2 */ + 4111 "00000100" // /* MW 1 */ + 4112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4113 "00000000" // /* MW 1 */ + 4114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4115 "00000000" // /* MW 1 */ + 4116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4117 "00000000" // /* MW 1 */ + 4118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4119 "00000000" // /* MW 1 */ + 4120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4121 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 243 40 +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 4122 "10111000" // MOV m0, #-20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4123 "11011000" // /* MW 3 */ + 4124 "00001111" // /* MW 2 */ + 4125 "00011000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 243 40 first +.aggressive_scheduled_block_id 8 +.noswbrkpt + 4126 "00011000" // ST.s16 r5, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4127 "10110111" // /* MW 3 */ + 4128 "00001000" // /* MW 2 */ + 4129 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 4130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4131 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 4132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4133 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 4134 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */ + 4135 "00000000" // /* MW 5 */ + 4136 "00000000" // /* MW 4 */ + 4137 "11101000" // /* MW 3 */ + 4138 "00000110" // /* MW 2 */ + 4139 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 243 42 +.src_ref 2 "reduce_base_c8.h" 243 42 +.src_ref 2 "reduce_base_c8.h" 243 91 +.src_ref 2 "reduce_base_c8.h" 243 91 +.delay_slot +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4140 "01100100" // MSC r5, r5, r22, r4; MOV r5, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4141 "01000001" // /* MW 5 */ + 4142 "10100000" // /* MW 4 */ + 4143 "11000010" // /* MW 3 */ + 4144 "01001001" // /* MW 2 */ + 4145 "10110001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4147 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4149 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 244 38 first +.delay_slot + 4150 "10011000" // ST r20, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4151 "10010001" // /* MW 3 */ + 4152 "00000110" // /* MW 2 */ + 4153 "00001100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 245 38 first +.src_ref 2 "reduce_base_c8.h" 329 30 +.delay_slot + 4154 "00111010" // ST r17, [p4, #4]; MOVX r26, #4; MOV r30, eh0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4155 "01111001" // /* MW 9 */ + 4156 "10001110" // /* MW 8 */ + 4157 "11010000" // /* MW 7 */ + 4158 "10001011" // /* MW 6 */ + 4159 "10100000" // /* MW 5 */ + 4160 "00000001" // /* MW 4 */ + 4161 "00110000" // /* MW 3 */ + 4162 "11000110" // /* MW 2 */ +.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv__end +.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv___func_end0 + 4163 "10000010" // /* MW 1 */ +.label __Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t___func_begin0 +.label _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t +.function pad_3d<(pad_3d_mode)0, bfloat16, 1> _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t +.src_ref 3 "pad_3d.h" 266 first +.src_ref 3 "pad_3d.h" 465 37 first +.src_ref 3 "pad_3d.h" 468 21 first +.src_ref 3 "pad_3d.h" 471 29 +.src_ref 3 "pad_3d.h" 479 21 +.function_start + 4176 "10111010" // LDA r0, [p2, #4]; MOVX r4, #-2; MOV m1, #-24 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4177 "01011000" // /* MW 9 */ + 4178 "11101000" // /* MW 8 */ + 4179 "10000111" // /* MW 7 */ + 4180 "11001000" // /* MW 6 */ + 4181 "01000111" // /* MW 5 */ + 4182 "00111110" // /* MW 4 */ + 4183 "11010000" // /* MW 3 */ + 4184 "10000010" // /* MW 2 */ + 4185 "01000010" // /* MW 1 */ +.src_ref 4 "array_helpers.hpp" 950 13 +.src_ref 3 "pad_3d.h" 469 21 first +.src_ref 3 "pad_3d.h" 478 21 +.src_ref 3 "pad_3d.h" 499 52 +.src_ref 3 "pad_3d.h" 511 25 + 4186 "10111010" // LDA r1, [p2], #8; MOVX r2, #-3; MOV r16, #6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4187 "01011000" // /* MW 9 */ + 4188 "00000110" // /* MW 8 */ + 4189 "00001000" // /* MW 7 */ + 4190 "10101010" // /* MW 6 */ + 4191 "00100111" // /* MW 5 */ + 4192 "00111110" // /* MW 4 */ + 4193 "11010000" // /* MW 3 */ + 4194 "10000110" // /* MW 2 */ + 4195 "01000101" // /* MW 1 */ +.src_ref 4 "array_helpers.hpp" 950 13 +.src_ref 3 "pad_3d.h" 470 21 first +.src_ref 3 "pad_3d.h" 486 26 +.src_ref 3 "pad_3d.h" 498 10 +.src_ref 3 "pad_3d.h" 499 26 +.src_ref 3 "pad_3d.h" 509 21 +.src_ref 3 "pad_3d.h" 517 22 + 4196 "10111010" // LDA r5, [p2], #28; MOVX r24, #0; MOV r3, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4197 "01111000" // /* MW 9 */ + 4198 "01100000" // /* MW 8 */ + 4199 "01101000" // /* MW 7 */ + 4200 "00001000" // /* MW 6 */ + 4201 "10000000" // /* MW 5 */ + 4202 "00000001" // /* MW 4 */ + 4203 "11010000" // /* MW 3 */ + 4204 "10010110" // /* MW 2 */ + 4205 "01001111" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 471 29 first + 4206 "10011000" // LDA.s16 r18, [p2], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4207 "01010010" // /* MW 3 */ + 4208 "00101010" // /* MW 2 */ + 4209 "00000010" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 472 25 first + 4210 "10011000" // LDA r6, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4211 "11010110" // /* MW 3 */ + 4212 "00011100" // /* MW 2 */ + 4213 "00000010" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 473 26 first + 4214 "10011000" // LDA r7, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4215 "11110110" // /* MW 3 */ + 4216 "00101100" // /* MW 2 */ + 4217 "00000010" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 475 24 first + 4218 "10011000" // LDA r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4219 "00110110" // /* MW 3 */ + 4220 "00000110" // /* MW 2 */ + 4221 "00000010" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 479 21 first + 4222 "10011000" // ASHL r19, r0, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4223 "01001110" // /* MW 3 */ + 4224 "00100110" // /* MW 2 */ + 4225 "00010000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 477 23 first + 4226 "10011000" // LDA r4, [p2, #8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4227 "10010110" // /* MW 3 */ + 4228 "00100100" // /* MW 2 */ + 4229 "00000010" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 478 21 first + 4230 "10011000" // ASHL r20, r5, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4231 "00101110" // /* MW 3 */ + 4232 "01101000" // /* MW 2 */ + 4233 "00010001" // /* MW 1 */ +.src_ref 5 "broadcast.hpp" 56 25 first + 4234 "11111000" // VBCST.16 x0, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4235 "01110010" // /* MW 3 */ + 4236 "01001001" // /* MW 2 */ + 4237 "00011000" // /* MW 1 */ + 4238 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4239 "00000000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 485 45 first + 4240 "10011000" // MUL r18, r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4241 "01001111" // /* MW 3 */ + 4242 "11100101" // /* MW 2 */ + 4243 "00010100" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 485 34 + 4244 "10011000" // SUB r19, r1, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4245 "00010001" // /* MW 3 */ + 4246 "01100111" // /* MW 2 */ + 4247 "00010000" // /* MW 1 */ +.src_ref 4 "array_helpers.hpp" 998 25 first + 4248 "10011000" // MUL r19, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4249 "00101111" // /* MW 3 */ + 4250 "11100111" // /* MW 2 */ + 4251 "00010100" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 486 43 first + 4252 "10011000" // MUL r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4253 "00101111" // /* MW 3 */ + 4254 "01100011" // /* MW 2 */ + 4255 "00010100" // /* MW 1 */ +.src_ref 4 "array_helpers.hpp" 950 13 first + 4256 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4257 "00001101" // /* MW 3 */ + 4258 "11100001" // /* MW 2 */ + 4259 "00010100" // /* MW 1 */ +.src_ref 4 "array_helpers.hpp" 950 13 +.src_ref 3 "pad_3d.h" 486 26 first + 4260 "10100100" // GE r16, r24, r17; ADD.NC p2, r3, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4261 "10000010" // /* MW 5 */ + 4262 "11000011" // /* MW 4 */ + 4263 "00110100" // /* MW 3 */ + 4264 "00100011" // /* MW 2 */ + 4265 "11000100" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 486 4 + 4266 "10000100" // JNZ r16, #4416 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4416 delay_slots=5 */ + 4267 "00000001" // /* MW 5 */ + 4268 "01000000" // /* MW 4 */ + 4269 "10100000" // /* MW 3 */ + 4270 "00001000" // /* MW 2 */ + 4271 "10000000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 487 22 +.delay_slot + 4272 "11111000" // VMOV bmll0, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4273 "10010010" // /* MW 3 */ + 4274 "00000000" // /* MW 2 */ + 4275 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4277 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4279 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4281 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4283 "00000000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 486 4 + 4284 "01000100" // MOVXM ls, #4400 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4285 "01100000" // /* MW 5 */ + 4286 "11100010" // /* MW 4 */ + 4287 "00010001" // /* MW 3 */ + 4288 "00000000" // /* MW 2 */ + 4289 "00000000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 486 4 + 4290 "01000100" // MOVXM le, #4400 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4291 "01100000" // /* MW 5 */ + 4292 "11100010" // /* MW 4 */ + 4293 "00010110" // /* MW 3 */ + 4294 "00000000" // /* MW 2 */ + 4295 "00000000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 486 4 + 4296 "00000010" // NOPS; MOV lc, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4297 "01110000" // /* MW 7 */ + 4298 "01010000" // /* MW 6 */ + 4299 "10111100" // /* MW 5 */ + 4300 "00000010" // /* MW 4 */ + 4301 "01100000" // /* MW 3 */ + 4302 "00101011" // /* MW 2 */ + 4303 "00000000" // /* MW 1 */ + 4304 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4305 "00000000" // /* MW 15 */ + 4306 "00000000" // /* MW 14 */ + 4307 "01111000" // /* MW 13 */ + 4308 "10100101" // /* MW 12 */ + 4309 "00000001" // /* MW 11 */ + 4310 "00000000" // /* MW 10 */ + 4311 "00000000" // /* MW 9 */ + 4312 "00000000" // /* MW 8 */ + 4313 "01011011" // /* MW 7 */ + 4314 "00000001" // /* MW 6 */ + 4315 "00100000" // /* MW 5 */ + 4316 "00000000" // /* MW 4 */ + 4317 "11110000" // /* MW 3 */ + 4318 "00101100" // /* MW 2 */ + 4319 "00000000" // /* MW 1 */ + 4320 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4321 "00000000" // /* MW 15 */ + 4322 "00000000" // /* MW 14 */ + 4323 "01111000" // /* MW 13 */ + 4324 "10100101" // /* MW 12 */ + 4325 "00000001" // /* MW 11 */ + 4326 "00000000" // /* MW 10 */ + 4327 "00000000" // /* MW 9 */ + 4328 "00000000" // /* MW 8 */ + 4329 "01011011" // /* MW 7 */ + 4330 "00000001" // /* MW 6 */ + 4331 "00100000" // /* MW 5 */ + 4332 "00000000" // /* MW 4 */ + 4333 "11110000" // /* MW 3 */ + 4334 "00101100" // /* MW 2 */ + 4335 "00000000" // /* MW 1 */ + 4336 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4337 "00000000" // /* MW 15 */ + 4338 "00000000" // /* MW 14 */ + 4339 "01111000" // /* MW 13 */ + 4340 "10100101" // /* MW 12 */ + 4341 "00000001" // /* MW 11 */ + 4342 "00000000" // /* MW 10 */ + 4343 "00000000" // /* MW 9 */ + 4344 "00000000" // /* MW 8 */ + 4345 "01011011" // /* MW 7 */ + 4346 "00000001" // /* MW 6 */ + 4347 "00100000" // /* MW 5 */ + 4348 "00000000" // /* MW 4 */ + 4349 "11110000" // /* MW 3 */ + 4350 "00101100" // /* MW 2 */ + 4351 "00000000" // /* MW 1 */ + 4352 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4353 "00000000" // /* MW 15 */ + 4354 "00000000" // /* MW 14 */ + 4355 "01111000" // /* MW 13 */ + 4356 "10100101" // /* MW 12 */ + 4357 "00000001" // /* MW 11 */ + 4358 "00000000" // /* MW 10 */ + 4359 "00000000" // /* MW 9 */ + 4360 "00000000" // /* MW 8 */ + 4361 "01011011" // /* MW 7 */ + 4362 "00000001" // /* MW 6 */ + 4363 "00100000" // /* MW 5 */ + 4364 "00000000" // /* MW 4 */ + 4365 "11110000" // /* MW 3 */ + 4366 "00101100" // /* MW 2 */ + 4367 "00000000" // /* MW 1 */ + 4368 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4369 "00000000" // /* MW 15 */ + 4370 "00000000" // /* MW 14 */ + 4371 "01111000" // /* MW 13 */ + 4372 "10100101" // /* MW 12 */ + 4373 "00000001" // /* MW 11 */ + 4374 "00000000" // /* MW 10 */ + 4375 "00000000" // /* MW 9 */ + 4376 "00000000" // /* MW 8 */ + 4377 "01011011" // /* MW 7 */ + 4378 "00000001" // /* MW 6 */ + 4379 "00100000" // /* MW 5 */ + 4380 "00000000" // /* MW 4 */ + 4381 "11110000" // /* MW 3 */ + 4382 "00101100" // /* MW 2 */ + 4383 "00000000" // /* MW 1 */ + 4384 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4385 "00000000" // /* MW 15 */ + 4386 "00000000" // /* MW 14 */ + 4387 "01111000" // /* MW 13 */ + 4388 "10100101" // /* MW 12 */ + 4389 "00000001" // /* MW 11 */ + 4390 "00000000" // /* MW 10 */ + 4391 "00000000" // /* MW 9 */ + 4392 "00000000" // /* MW 8 */ + 4393 "01011011" // /* MW 7 */ + 4394 "00000001" // /* MW 6 */ + 4395 "00100000" // /* MW 5 */ + 4396 "00000000" // /* MW 4 */ + 4397 "11110000" // /* MW 3 */ + 4398 "00101100" // /* MW 2 */ + 4399 "00000000" // /* MW 1 */ +.label ZLS_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_224 +.src_ref 3 "pad_3d.h" 487 22 first +.begin_of_loop +.end_of_loop +.loop_nesting 1 + 4400 "11100001" // NOPA; NOPB; VST bmll0, [p2], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4401 "00000000" // /* MW 15 */ + 4402 "00000000" // /* MW 14 */ + 4403 "01111000" // /* MW 13 */ + 4404 "10100101" // /* MW 12 */ + 4405 "00000001" // /* MW 11 */ + 4406 "00000000" // /* MW 10 */ + 4407 "00000000" // /* MW 9 */ + 4408 "10000000" // /* MW 8 */ + 4409 "00000110" // /* MW 7 */ + 4410 "00011100" // /* MW 6 */ + 4411 "00100010" // /* MW 5 */ + 4412 "00000000" // /* MW 4 */ + 4413 "11110000" // /* MW 3 */ + 4414 "00101100" // /* MW 2 */ + 4415 "00000000" // /* MW 1 */ +.label TGT_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_240 +.src_ref 3 "pad_3d.h" 495 21 +.src_ref 3 "pad_3d.h" 495 40 first +.src_ref 3 "pad_3d.h" 498 10 +.src_ref 3 "pad_3d.h" 499 38 first +.loop_nesting 0 + 4416 "10111010" // MOVA r6, #4; MUL r16, r5, r1; ADD.NC r17, r7, r6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4417 "10101000" // /* MW 9 */ + 4418 "11001100" // /* MW 8 */ + 4419 "00101001" // /* MW 7 */ + 4420 "11111110" // /* MW 6 */ + 4421 "00000000" // /* MW 5 */ + 4422 "00001011" // /* MW 4 */ + 4423 "00000000" // /* MW 3 */ + 4424 "10000110" // /* MW 2 */ + 4425 "00000000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 495 40 +.src_ref 3 "pad_3d.h" 496 29 first + 4426 "00100100" // SUB r17, r0, r17; ADD.NC dn1, r7, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4427 "11111111" // /* MW 5 */ + 4428 "10000111" // /* MW 4 */ + 4429 "00110010" // /* MW 3 */ + 4430 "01100010" // /* MW 2 */ + 4431 "00000100" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 495 21 first + 4432 "10011000" // LSHL r17, r17, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4433 "01101101" // /* MW 3 */ + 4434 "01100010" // /* MW 2 */ + 4435 "00010100" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 495 58 +.src_ref 3 "pad_3d.h" 498 23 first + 4436 "00100100" // SUB r17, r0, r7; ADD.NC m1, r17, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4437 "00010000" // /* MW 5 */ + 4438 "00010001" // /* MW 4 */ + 4439 "00110010" // /* MW 3 */ + 4440 "01001110" // /* MW 2 */ + 4441 "00000100" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 499 45 first + 4442 "10011000" // MUL r16, r7, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4443 "00001111" // /* MW 3 */ + 4444 "11100001" // /* MW 2 */ + 4445 "00010001" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 498 10 first + 4446 "10011000" // LSHL r6, r17, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4447 "01101101" // /* MW 3 */ + 4448 "01001100" // /* MW 2 */ + 4449 "00010100" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 498 10 +.src_ref 3 "pad_3d.h" 499 52 first + 4450 "10100100" // ASHL r6, r16, r2; ADD.NC p2, r3, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4451 "00110010" // /* MW 5 */ + 4452 "11000011" // /* MW 4 */ + 4453 "11010100" // /* MW 3 */ + 4454 "10000101" // /* MW 2 */ + 4455 "10000001" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 499 26 + 4456 "10011000" // GE r7, r24, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4457 "01101001" // /* MW 3 */ + 4458 "00001110" // /* MW 2 */ + 4459 "00010110" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 499 4 + 4460 "10000100" // JNZ r7, #4624 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4624 delay_slots=5 */ + 4461 "00000001" // /* MW 5 */ + 4462 "01000000" // /* MW 4 */ + 4463 "00001000" // /* MW 3 */ + 4464 "00001001" // /* MW 2 */ + 4465 "00111000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4467 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4469 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4471 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4473 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4475 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 3 "pad_3d.h" 499 4 + 4476 "10111010" // MOVA dc1, #0; MOVXM ls, #4608 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4477 "00010000" // /* MW 9 */ + 4478 "00000000" // /* MW 8 */ + 4479 "01111001" // /* MW 7 */ + 4480 "00000100" // /* MW 6 */ + 4481 "00000000" // /* MW 5 */ + 4482 "00000000" // /* MW 4 */ + 4483 "10000000" // /* MW 3 */ + 4484 "00000111" // /* MW 2 */ + 4485 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 3 "pad_3d.h" 499 4 + 4486 "10111010" // MOVA dj1, #16; MOVXM le, #4608 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4487 "00010000" // /* MW 9 */ + 4488 "00000000" // /* MW 8 */ + 4489 "10111001" // /* MW 7 */ + 4490 "00000101" // /* MW 6 */ + 4491 "00000000" // /* MW 5 */ + 4492 "00000000" // /* MW 4 */ + 4493 "10000000" // /* MW 3 */ + 4494 "00000110" // /* MW 2 */ + 4495 "00000010" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 499 4 + 4496 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV lc, r6; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4497 "00000000" // /* MW 15 */ + 4498 "00000000" // /* MW 14 */ + 4499 "01111000" // /* MW 13 */ + 4500 "10010000" // /* MW 12 */ + 4501 "10111001" // /* MW 11 */ + 4502 "00000010" // /* MW 10 */ + 4503 "00000000" // /* MW 9 */ + 4504 "00000000" // /* MW 8 */ + 4505 "01011011" // /* MW 7 */ + 4506 "00000001" // /* MW 6 */ + 4507 "00100000" // /* MW 5 */ + 4508 "00000000" // /* MW 4 */ + 4509 "11110000" // /* MW 3 */ + 4510 "00101100" // /* MW 2 */ + 4511 "00000000" // /* MW 1 */ + 4512 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4513 "00000000" // /* MW 15 */ + 4514 "00000000" // /* MW 14 */ + 4515 "01111000" // /* MW 13 */ + 4516 "10100101" // /* MW 12 */ + 4517 "00000001" // /* MW 11 */ + 4518 "00000000" // /* MW 10 */ + 4519 "00000000" // /* MW 9 */ + 4520 "00000000" // /* MW 8 */ + 4521 "01011011" // /* MW 7 */ + 4522 "00000001" // /* MW 6 */ + 4523 "00100000" // /* MW 5 */ + 4524 "00000000" // /* MW 4 */ + 4525 "11110000" // /* MW 3 */ + 4526 "00101100" // /* MW 2 */ + 4527 "00000000" // /* MW 1 */ + 4528 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4529 "00000000" // /* MW 15 */ + 4530 "00000000" // /* MW 14 */ + 4531 "01111000" // /* MW 13 */ + 4532 "10100101" // /* MW 12 */ + 4533 "00000001" // /* MW 11 */ + 4534 "00000000" // /* MW 10 */ + 4535 "00000000" // /* MW 9 */ + 4536 "00000000" // /* MW 8 */ + 4537 "01011011" // /* MW 7 */ + 4538 "00000001" // /* MW 6 */ + 4539 "00100000" // /* MW 5 */ + 4540 "00000000" // /* MW 4 */ + 4541 "11110000" // /* MW 3 */ + 4542 "00101100" // /* MW 2 */ + 4543 "00000000" // /* MW 1 */ + 4544 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4545 "00000000" // /* MW 15 */ + 4546 "00000000" // /* MW 14 */ + 4547 "01111000" // /* MW 13 */ + 4548 "10100101" // /* MW 12 */ + 4549 "00000001" // /* MW 11 */ + 4550 "00000000" // /* MW 10 */ + 4551 "00000000" // /* MW 9 */ + 4552 "00000000" // /* MW 8 */ + 4553 "01011011" // /* MW 7 */ + 4554 "00000001" // /* MW 6 */ + 4555 "00100000" // /* MW 5 */ + 4556 "00000000" // /* MW 4 */ + 4557 "11110000" // /* MW 3 */ + 4558 "00101100" // /* MW 2 */ + 4559 "00000000" // /* MW 1 */ + 4560 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4561 "00000000" // /* MW 15 */ + 4562 "00000000" // /* MW 14 */ + 4563 "01111000" // /* MW 13 */ + 4564 "10100101" // /* MW 12 */ + 4565 "00000001" // /* MW 11 */ + 4566 "00000000" // /* MW 10 */ + 4567 "00000000" // /* MW 9 */ + 4568 "00000000" // /* MW 8 */ + 4569 "01011011" // /* MW 7 */ + 4570 "00000001" // /* MW 6 */ + 4571 "00100000" // /* MW 5 */ + 4572 "00000000" // /* MW 4 */ + 4573 "11110000" // /* MW 3 */ + 4574 "00101100" // /* MW 2 */ + 4575 "00000000" // /* MW 1 */ + 4576 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4577 "00000000" // /* MW 15 */ + 4578 "00000000" // /* MW 14 */ + 4579 "01111000" // /* MW 13 */ + 4580 "10100101" // /* MW 12 */ + 4581 "00000001" // /* MW 11 */ + 4582 "00000000" // /* MW 10 */ + 4583 "00000000" // /* MW 9 */ + 4584 "00000000" // /* MW 8 */ + 4585 "01011011" // /* MW 7 */ + 4586 "00000001" // /* MW 6 */ + 4587 "00100000" // /* MW 5 */ + 4588 "00000000" // /* MW 4 */ + 4589 "11110000" // /* MW 3 */ + 4590 "00101100" // /* MW 2 */ + 4591 "00000000" // /* MW 1 */ + 4592 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4593 "00000000" // /* MW 15 */ + 4594 "00000000" // /* MW 14 */ + 4595 "01111000" // /* MW 13 */ + 4596 "10100101" // /* MW 12 */ + 4597 "00000001" // /* MW 11 */ + 4598 "00000000" // /* MW 10 */ + 4599 "00000000" // /* MW 9 */ + 4600 "00000000" // /* MW 8 */ + 4601 "01011011" // /* MW 7 */ + 4602 "00000001" // /* MW 6 */ + 4603 "00100000" // /* MW 5 */ + 4604 "00000000" // /* MW 4 */ + 4605 "11110000" // /* MW 3 */ + 4606 "00101100" // /* MW 2 */ + 4607 "00000000" // /* MW 1 */ +.label ZLS_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_432 +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 5 "vector.hpp" 1159 33 first +.begin_of_loop +.end_of_loop +.loop_nesting 1 + 4608 "11100001" // NOPA; NOPB; VST.2D.128 wl0, [p2], d1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4609 "00000000" // /* MW 15 */ + 4610 "00000000" // /* MW 14 */ + 4611 "01111000" // /* MW 13 */ + 4612 "10100101" // /* MW 12 */ + 4613 "00000001" // /* MW 11 */ + 4614 "00000000" // /* MW 10 */ + 4615 "00000000" // /* MW 9 */ + 4616 "00000000" // /* MW 8 */ + 4617 "00101110" // /* MW 7 */ + 4618 "00110000" // /* MW 6 */ + 4619 "00100010" // /* MW 5 */ + 4620 "00000000" // /* MW 4 */ + 4621 "11110000" // /* MW 3 */ + 4622 "00101100" // /* MW 2 */ + 4623 "00000000" // /* MW 1 */ +.label TGT_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_448 +.src_ref 3 "pad_3d.h" 514 39 +.loop_nesting 0 + 4624 "01000100" // MOVXM r7, #2147483640 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4625 "11110000" // /* MW 5 */ + 4626 "10111111" // /* MW 4 */ + 4627 "11110011" // /* MW 3 */ + 4628 "11111111" // /* MW 2 */ + 4629 "01111111" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 514 39 first + 4630 "10011000" // AND r7, r7, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4631 "01000100" // /* MW 3 */ + 4632 "11001110" // /* MW 2 */ + 4633 "00010001" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 514 35 + 4634 "10011000" // SUB r7, r5, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4635 "01110001" // /* MW 3 */ + 4636 "01001110" // /* MW 2 */ + 4637 "00010001" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 514 55 + 4638 "10011000" // MUL r7, r7, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4639 "00001111" // /* MW 3 */ + 4640 "11001110" // /* MW 2 */ + 4641 "00010001" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 511 25 first + 4642 "10011000" // ASHL r2, r4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4643 "00101110" // /* MW 3 */ + 4644 "00000100" // /* MW 2 */ + 4645 "00010001" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 509 36 first + 4646 "10011000" // SUB r4, r5, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4647 "01000001" // /* MW 3 */ + 4648 "01001000" // /* MW 2 */ + 4649 "00010001" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 515 30 first + 4650 "10011000" // MUL r2, r2, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4651 "00001111" // /* MW 3 */ + 4652 "10000100" // /* MW 2 */ + 4653 "00010000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 509 28 first + 4654 "10011000" // MUL r0, r4, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4655 "00001111" // /* MW 3 */ + 4656 "00000000" // /* MW 2 */ + 4657 "00010001" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 509 21 +.src_ref 3 "pad_3d.h" 514 55 +.src_ref 3 "pad_3d.h" 517 39 first + 4658 "01100100" // MUL r1, r1, r2; MOV r6, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4659 "00000101" // /* MW 5 */ + 4660 "00100000" // /* MW 4 */ + 4661 "11110011" // /* MW 3 */ + 4662 "01000101" // /* MW 2 */ + 4663 "00001000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 509 21 first + 4664 "10011000" // LSHL r0, r0, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4665 "01101101" // /* MW 3 */ + 4666 "00000000" // /* MW 2 */ + 4667 "00010000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 509 21 +.src_ref 3 "pad_3d.h" 517 22 first + 4668 "10100100" // GE r0, r24, r1; ADD.NC p2, r3, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4669 "00000010" // /* MW 5 */ + 4670 "11000011" // /* MW 4 */ + 4671 "00110100" // /* MW 3 */ + 4672 "00000011" // /* MW 2 */ + 4673 "11000000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 517 4 + 4674 "10000100" // JNZ r0, #4832 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4832 delay_slots=5 */ + 4675 "00000001" // /* MW 5 */ + 4676 "01000000" // /* MW 4 */ + 4677 "01110000" // /* MW 3 */ + 4678 "00001001" // /* MW 2 */ + 4679 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4681 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4683 "00000000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 514 55 first +.delay_slot + 4684 "10011000" // LSHL r4, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4685 "01101101" // /* MW 3 */ + 4686 "11001000" // /* MW 2 */ + 4687 "00010001" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 514 55 +.delay_slot + 4688 "00011000" // ADD.NC m0, r4, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4689 "00001000" // /* MW 3 */ + 4690 "00000010" // /* MW 2 */ + 4691 "00011000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 515 37 first +.delay_slot + 4692 "10011000" // ADD.NC dn0, r2, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4693 "01111111" // /* MW 3 */ + 4694 "01000001" // /* MW 2 */ + 4695 "00011000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 3 "pad_3d.h" 517 4 first + 4696 "10111010" // MOVA dc0, #0; MOVXM ls, #4816 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4697 "00010000" // /* MW 9 */ + 4698 "01101000" // /* MW 8 */ + 4699 "01111001" // /* MW 7 */ + 4700 "00000100" // /* MW 6 */ + 4701 "00000000" // /* MW 5 */ + 4702 "00000000" // /* MW 4 */ + 4703 "10000000" // /* MW 3 */ + 4704 "00000011" // /* MW 2 */ + 4705 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 3 "pad_3d.h" 517 4 + 4706 "10111010" // MOVA dj0, #16; MOVXM le, #4816 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4707 "00010000" // /* MW 9 */ + 4708 "01101000" // /* MW 8 */ + 4709 "10111001" // /* MW 7 */ + 4710 "00000101" // /* MW 6 */ + 4711 "00000000" // /* MW 5 */ + 4712 "00000000" // /* MW 4 */ + 4713 "10000000" // /* MW 3 */ + 4714 "00000010" // /* MW 2 */ + 4715 "00000010" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 517 4 + 4716 "11111000" // MOV lc, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4717 "10100000" // /* MW 3 */ + 4718 "01110000" // /* MW 2 */ + 4719 "00011101" // /* MW 1 */ + 4720 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4721 "00000000" // /* MW 15 */ + 4722 "00000000" // /* MW 14 */ + 4723 "01111000" // /* MW 13 */ + 4724 "10100101" // /* MW 12 */ + 4725 "00000001" // /* MW 11 */ + 4726 "00000000" // /* MW 10 */ + 4727 "00000000" // /* MW 9 */ + 4728 "00000000" // /* MW 8 */ + 4729 "01011011" // /* MW 7 */ + 4730 "00000001" // /* MW 6 */ + 4731 "00100000" // /* MW 5 */ + 4732 "00000000" // /* MW 4 */ + 4733 "11110000" // /* MW 3 */ + 4734 "00101100" // /* MW 2 */ + 4735 "00000000" // /* MW 1 */ + 4736 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4737 "00000000" // /* MW 15 */ + 4738 "00000000" // /* MW 14 */ + 4739 "01111000" // /* MW 13 */ + 4740 "10100101" // /* MW 12 */ + 4741 "00000001" // /* MW 11 */ + 4742 "00000000" // /* MW 10 */ + 4743 "00000000" // /* MW 9 */ + 4744 "00000000" // /* MW 8 */ + 4745 "01011011" // /* MW 7 */ + 4746 "00000001" // /* MW 6 */ + 4747 "00100000" // /* MW 5 */ + 4748 "00000000" // /* MW 4 */ + 4749 "11110000" // /* MW 3 */ + 4750 "00101100" // /* MW 2 */ + 4751 "00000000" // /* MW 1 */ + 4752 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4753 "00000000" // /* MW 15 */ + 4754 "00000000" // /* MW 14 */ + 4755 "01111000" // /* MW 13 */ + 4756 "10100101" // /* MW 12 */ + 4757 "00000001" // /* MW 11 */ + 4758 "00000000" // /* MW 10 */ + 4759 "00000000" // /* MW 9 */ + 4760 "00000000" // /* MW 8 */ + 4761 "01011011" // /* MW 7 */ + 4762 "00000001" // /* MW 6 */ + 4763 "00100000" // /* MW 5 */ + 4764 "00000000" // /* MW 4 */ + 4765 "11110000" // /* MW 3 */ + 4766 "00101100" // /* MW 2 */ + 4767 "00000000" // /* MW 1 */ + 4768 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4769 "00000000" // /* MW 15 */ + 4770 "00000000" // /* MW 14 */ + 4771 "01111000" // /* MW 13 */ + 4772 "10100101" // /* MW 12 */ + 4773 "00000001" // /* MW 11 */ + 4774 "00000000" // /* MW 10 */ + 4775 "00000000" // /* MW 9 */ + 4776 "00000000" // /* MW 8 */ + 4777 "01011011" // /* MW 7 */ + 4778 "00000001" // /* MW 6 */ + 4779 "00100000" // /* MW 5 */ + 4780 "00000000" // /* MW 4 */ + 4781 "11110000" // /* MW 3 */ + 4782 "00101100" // /* MW 2 */ + 4783 "00000000" // /* MW 1 */ + 4784 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4785 "00000000" // /* MW 15 */ + 4786 "00000000" // /* MW 14 */ + 4787 "01111000" // /* MW 13 */ + 4788 "10100101" // /* MW 12 */ + 4789 "00000001" // /* MW 11 */ + 4790 "00000000" // /* MW 10 */ + 4791 "00000000" // /* MW 9 */ + 4792 "00000000" // /* MW 8 */ + 4793 "01011011" // /* MW 7 */ + 4794 "00000001" // /* MW 6 */ + 4795 "00100000" // /* MW 5 */ + 4796 "00000000" // /* MW 4 */ + 4797 "11110000" // /* MW 3 */ + 4798 "00101100" // /* MW 2 */ + 4799 "00000000" // /* MW 1 */ + 4800 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4801 "00000000" // /* MW 15 */ + 4802 "00000000" // /* MW 14 */ + 4803 "01111000" // /* MW 13 */ + 4804 "10100101" // /* MW 12 */ + 4805 "00000001" // /* MW 11 */ + 4806 "00000000" // /* MW 10 */ + 4807 "00000000" // /* MW 9 */ + 4808 "00000000" // /* MW 8 */ + 4809 "01011011" // /* MW 7 */ + 4810 "00000001" // /* MW 6 */ + 4811 "00100000" // /* MW 5 */ + 4812 "00000000" // /* MW 4 */ + 4813 "11110000" // /* MW 3 */ + 4814 "00101100" // /* MW 2 */ + 4815 "00000000" // /* MW 1 */ +.label ZLS_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_640 +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 5 "vector.hpp" 1159 33 first +.begin_of_loop +.end_of_loop +.loop_nesting 1 + 4816 "11100001" // NOPA; NOPB; VST.2D.128 wl0, [p2], d0; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4817 "00000000" // /* MW 15 */ + 4818 "00000000" // /* MW 14 */ + 4819 "01111000" // /* MW 13 */ + 4820 "10100101" // /* MW 12 */ + 4821 "00000001" // /* MW 11 */ + 4822 "00000000" // /* MW 10 */ + 4823 "00000000" // /* MW 9 */ + 4824 "00000000" // /* MW 8 */ + 4825 "00101110" // /* MW 7 */ + 4826 "00010000" // /* MW 6 */ + 4827 "00100010" // /* MW 5 */ + 4828 "00000000" // /* MW 4 */ + 4829 "11110000" // /* MW 3 */ + 4830 "00101100" // /* MW 2 */ + 4831 "00000000" // /* MW 1 */ +.label TGT_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_656 +.src_ref 3 "pad_3d.h" 282 first +.loop_nesting 0 + 4832 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 4833 "00000000" // /* MW 3 */ + 4834 "00101000" // /* MW 2 */ + 4835 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4837 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4839 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4840 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4841 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4843 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t__end +.label __Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t___func_end0 + 4845 "00000000" // /* MW 1 */ +.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E___func_begin0 +.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E +.function run _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E +.src_ref 2 "reduce_base_c8.h" 352 30 +.src_ref 2 "reduce_base_c8.h" 362 first +.src_ref 2 "reduce_base_c8.h" 365 18 +.src_ref 3 "reduce_mean_c8_impl.h" 200 65 +.src_ref 3 "reduce_mean_c8_impl.h" 223 35 +.function_start + 4848 "11111000" // MOV r3, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4849 "11000000" // /* MW 3 */ + 4850 "11010100" // /* MW 2 */ + 4851 "00011000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 365 18 first + 4852 "00000010" // MOVS dn3, p7; ADD.NC p7, r3, #44 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4853 "00000000" // /* MW 7 */ + 4854 "11001011" // /* MW 6 */ + 4855 "10110000" // /* MW 5 */ + 4856 "00000011" // /* MW 4 */ + 4857 "01100000" // /* MW 3 */ + 4858 "10010001" // /* MW 2 */ + 4859 "01101011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 367 19 first + 4860 "10011000" // LDA.u16 r0, [p7], #-16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4861 "00011010" // /* MW 3 */ + 4862 "10001100" // /* MW 2 */ + 4863 "00000111" // /* MW 1 */ + 4864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4865 "00000000" // /* MW 1 */ + 4866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4867 "00000000" // /* MW 1 */ + 4868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4869 "00000000" // /* MW 1 */ + 4870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4871 "00000000" // /* MW 1 */ + 4872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4873 "00000000" // /* MW 1 */ + 4874 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4875 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 367 12 +.src_ref 2 "reduce_base_c8.h" 367 19 + 4876 "10000100" // JNZ r0, #5088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5088 delay_slots=5 */ + 4877 "00000001" // /* MW 5 */ + 4878 "01000000" // /* MW 4 */ + 4879 "11110000" // /* MW 3 */ + 4880 "00001001" // /* MW 2 */ + 4881 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 101 18 +.src_ref 5 "broadcast.hpp" 80 25 +.src_ref 5 "blend.hpp" 170 36 +.src_ref 2 "reduce_base_c8.h" 372 34 +.delay_slot + 4882 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4883 "00000001" // /* MW 3 */ + 4884 "00100000" // /* MW 2 */ + 4885 "00010000" // /* MW 1 */ +.src_ref 5 "broadcast.hpp" 80 25 first +.delay_slot + 4886 "11111000" // VBCST.32 x1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4887 "01110010" // /* MW 3 */ + 4888 "11000010" // /* MW 2 */ + 4889 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4891 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4892 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4893 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 362 +.delay_slot + 4894 "11000100" // PADDXM [sp], #256 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4895 "00000001" // /* MW 5 */ + 4896 "00000000" // /* MW 4 */ + 4897 "00000000" // /* MW 3 */ + 4898 "00100000" // /* MW 2 */ + 4899 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 372 43 + 4900 "10111000" // MOV dj2, #36 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4901 "01001000" // /* MW 3 */ + 4902 "10000000" // /* MW 2 */ + 4903 "00011010" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 372 43 first + 4904 "10011000" // LDA r1, [p2, dj2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4905 "00110110" // /* MW 3 */ + 4906 "01000000" // /* MW 2 */ + 4907 "00000010" // /* MW 1 */ + 4908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4909 "00000000" // /* MW 1 */ + 4910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4911 "00000000" // /* MW 1 */ + 4912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4913 "00000000" // /* MW 1 */ + 4914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4915 "00000000" // /* MW 1 */ + 4916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4917 "00000000" // /* MW 1 */ + 4918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4919 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 372 34 + 4920 "10011000" // GE r2, r16, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4921 "00011001" // /* MW 3 */ + 4922 "00000100" // /* MW 2 */ + 4923 "00010100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 372 12 + 4924 "10000100" // JNZ r2, #5088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5088 delay_slots=5 */ + 4925 "00000001" // /* MW 5 */ + 4926 "01000000" // /* MW 4 */ + 4927 "11110000" // /* MW 3 */ + 4928 "00001001" // /* MW 2 */ + 4929 "00010000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 2 "reduce_base_c8.h" 374 29 +.delay_slot + 4930 "11111000" // VMOV bmll2, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4931 "10010010" // /* MW 3 */ + 4932 "00000010" // /* MW 2 */ + 4933 "00011010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4935 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4937 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4939 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4941 "00000000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 2 "reduce_base_c8.h" 372 12 +.src_ref 2 "reduce_base_c8.h" 374 29 + 4942 "01110110" // NOPA; MOVS p3, p1; MOVXM ls, #5072 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4943 "00010000" // /* MW 11 */ + 4944 "11101000" // /* MW 10 */ + 4945 "01111001" // /* MW 9 */ + 4946 "00000100" // /* MW 8 */ + 4947 "00000000" // /* MW 7 */ + 4948 "00000000" // /* MW 6 */ + 4949 "10001011" // /* MW 5 */ + 4950 "10000100" // /* MW 4 */ + 4951 "11110011" // /* MW 3 */ + 4952 "00101100" // /* MW 2 */ + 4953 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 372 12 + 4954 "01000100" // MOVXM le, #5072 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4955 "10100000" // /* MW 5 */ + 4956 "11100111" // /* MW 4 */ + 4957 "00010110" // /* MW 3 */ + 4958 "00000000" // /* MW 2 */ + 4959 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 372 12 + 4960 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV lc, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4961 "00000000" // /* MW 15 */ + 4962 "00000000" // /* MW 14 */ + 4963 "01111000" // /* MW 13 */ + 4964 "01010000" // /* MW 12 */ + 4965 "10111000" // /* MW 11 */ + 4966 "00000010" // /* MW 10 */ + 4967 "00000000" // /* MW 9 */ + 4968 "00000000" // /* MW 8 */ + 4969 "01011011" // /* MW 7 */ + 4970 "00000001" // /* MW 6 */ + 4971 "00100000" // /* MW 5 */ + 4972 "00000000" // /* MW 4 */ + 4973 "11110000" // /* MW 3 */ + 4974 "00101100" // /* MW 2 */ + 4975 "00000000" // /* MW 1 */ + 4976 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4977 "00000000" // /* MW 15 */ + 4978 "00000000" // /* MW 14 */ + 4979 "01111000" // /* MW 13 */ + 4980 "10100101" // /* MW 12 */ + 4981 "00000001" // /* MW 11 */ + 4982 "00000000" // /* MW 10 */ + 4983 "00000000" // /* MW 9 */ + 4984 "00000000" // /* MW 8 */ + 4985 "01011011" // /* MW 7 */ + 4986 "00000001" // /* MW 6 */ + 4987 "00100000" // /* MW 5 */ + 4988 "00000000" // /* MW 4 */ + 4989 "11110000" // /* MW 3 */ + 4990 "00101100" // /* MW 2 */ + 4991 "00000000" // /* MW 1 */ + 4992 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4993 "00000000" // /* MW 15 */ + 4994 "00000000" // /* MW 14 */ + 4995 "01111000" // /* MW 13 */ + 4996 "10100101" // /* MW 12 */ + 4997 "00000001" // /* MW 11 */ + 4998 "00000000" // /* MW 10 */ + 4999 "00000000" // /* MW 9 */ + 5000 "00000000" // /* MW 8 */ + 5001 "01011011" // /* MW 7 */ + 5002 "00000001" // /* MW 6 */ + 5003 "00100000" // /* MW 5 */ + 5004 "00000000" // /* MW 4 */ + 5005 "11110000" // /* MW 3 */ + 5006 "00101100" // /* MW 2 */ + 5007 "00000000" // /* MW 1 */ + 5008 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5009 "00000000" // /* MW 15 */ + 5010 "00000000" // /* MW 14 */ + 5011 "01111000" // /* MW 13 */ + 5012 "10100101" // /* MW 12 */ + 5013 "00000001" // /* MW 11 */ + 5014 "00000000" // /* MW 10 */ + 5015 "00000000" // /* MW 9 */ + 5016 "00000000" // /* MW 8 */ + 5017 "01011011" // /* MW 7 */ + 5018 "00000001" // /* MW 6 */ + 5019 "00100000" // /* MW 5 */ + 5020 "00000000" // /* MW 4 */ + 5021 "11110000" // /* MW 3 */ + 5022 "00101100" // /* MW 2 */ + 5023 "00000000" // /* MW 1 */ + 5024 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5025 "00000000" // /* MW 15 */ + 5026 "00000000" // /* MW 14 */ + 5027 "01111000" // /* MW 13 */ + 5028 "10100101" // /* MW 12 */ + 5029 "00000001" // /* MW 11 */ + 5030 "00000000" // /* MW 10 */ + 5031 "00000000" // /* MW 9 */ + 5032 "00000000" // /* MW 8 */ + 5033 "01011011" // /* MW 7 */ + 5034 "00000001" // /* MW 6 */ + 5035 "00100000" // /* MW 5 */ + 5036 "00000000" // /* MW 4 */ + 5037 "11110000" // /* MW 3 */ + 5038 "00101100" // /* MW 2 */ + 5039 "00000000" // /* MW 1 */ + 5040 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5041 "00000000" // /* MW 15 */ + 5042 "00000000" // /* MW 14 */ + 5043 "01111000" // /* MW 13 */ + 5044 "10100101" // /* MW 12 */ + 5045 "00000001" // /* MW 11 */ + 5046 "00000000" // /* MW 10 */ + 5047 "00000000" // /* MW 9 */ + 5048 "00000000" // /* MW 8 */ + 5049 "01011011" // /* MW 7 */ + 5050 "00000001" // /* MW 6 */ + 5051 "00100000" // /* MW 5 */ + 5052 "00000000" // /* MW 4 */ + 5053 "11110000" // /* MW 3 */ + 5054 "00101100" // /* MW 2 */ + 5055 "00000000" // /* MW 1 */ + 5056 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5057 "00000000" // /* MW 15 */ + 5058 "00000000" // /* MW 14 */ + 5059 "01111000" // /* MW 13 */ + 5060 "10100101" // /* MW 12 */ + 5061 "00000001" // /* MW 11 */ + 5062 "00000000" // /* MW 10 */ + 5063 "00000000" // /* MW 9 */ + 5064 "00000000" // /* MW 8 */ + 5065 "01011011" // /* MW 7 */ + 5066 "00000001" // /* MW 6 */ + 5067 "00100000" // /* MW 5 */ + 5068 "00000000" // /* MW 4 */ + 5069 "11110000" // /* MW 3 */ + 5070 "00101100" // /* MW 2 */ + 5071 "00000000" // /* MW 1 */ +.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_224 +.src_ref 5 "vector.hpp" 1159 33 first +.src_ref 2 "reduce_base_c8.h" 374 29 first +.begin_of_loop +.end_of_loop +.loop_nesting 1 + 5072 "11100001" // NOPA; NOPB; VST bmll2, [p3], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5073 "00000000" // /* MW 15 */ + 5074 "00000000" // /* MW 14 */ + 5075 "01111000" // /* MW 13 */ + 5076 "10100101" // /* MW 12 */ + 5077 "00000001" // /* MW 11 */ + 5078 "00000000" // /* MW 10 */ + 5079 "00000000" // /* MW 9 */ + 5080 "10000000" // /* MW 8 */ + 5081 "00000110" // /* MW 7 */ + 5082 "00011101" // /* MW 6 */ + 5083 "00100011" // /* MW 5 */ + 5084 "00000000" // /* MW 4 */ + 5085 "11110000" // /* MW 3 */ + 5086 "00101100" // /* MW 2 */ + 5087 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_240 +.src_ref 5 "vector.hpp" 57 98 +.src_ref 2 "reduce_base_c8.h" 388 28 +.src_ref 2 "reduce_base_c8.h" 412 41 +.src_ref 3 "reduce_mean_c8_impl.h" 268 19 +.loop_nesting 0 + 5088 "10111000" // MOV m4, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5089 "01000000" // /* MW 3 */ + 5090 "00000000" // /* MW 2 */ + 5091 "00011100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 388 28 first + 5092 "10011000" // LDA.u16 r17, [p7], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5093 "00111010" // /* MW 3 */ + 5094 "10001010" // /* MW 2 */ + 5095 "00000111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 388 28 +.src_ref 2 "reduce_base_c8.h" 388 28 + 5096 "01010100" // LDA.s16 r22, [p7], #-2; MOV m5, #-58 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5097 "00011001" // /* MW 5 */ + 5098 "00011111" // /* MW 4 */ + 5099 "01011010" // /* MW 3 */ + 5100 "11011010" // /* MW 2 */ + 5101 "11111111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 388 28 +.src_ref 2 "reduce_base_c8.h" 570 33 +.src_ref 2 "reduce_base_c8.h" 570 33 +.src_ref 2 "reduce_base_c8.h" 570 33 + 5102 "01010100" // LDA.u16 r26, [p7], m5; MOV dj0, #46 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5103 "10111001" // /* MW 5 */ + 5104 "00000000" // /* MW 4 */ + 5105 "01010001" // /* MW 3 */ + 5106 "01101011" // /* MW 2 */ + 5107 "11110101" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 570 33 first +.src_ref 2 "reduce_base_c8.h" 594 43 + 5108 "11010100" // LDA.s16 r20, [p7, dj0]; MOV r19, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5109 "10000001" // /* MW 5 */ + 5110 "10111101" // /* MW 4 */ + 5111 "01011001" // /* MW 3 */ + 5112 "01010010" // /* MW 2 */ + 5113 "11100000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 570 33 +.src_ref 2 "reduce_base_c8.h" 594 43 first + 5114 "00010100" // LDA.s16 r19, [p7, dj0]; ADD.NC p3, r19, #56 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5115 "00111000" // /* MW 5 */ + 5116 "11010011" // /* MW 4 */ + 5117 "01010110" // /* MW 3 */ + 5118 "01001110" // /* MW 2 */ + 5119 "11100000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 594 43 + 5120 "10011000" // LDA.s16 r21, [p3], #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5121 "10110010" // /* MW 3 */ + 5122 "11011110" // /* MW 2 */ + 5123 "00000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 594 64 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5124 "10011000" // LDA.u16 r28, [p3], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5125 "10011010" // /* MW 3 */ + 5126 "11111111" // /* MW 2 */ + 5127 "00000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 388 28 +.src_ref 2 "reduce_base_c8.h" 595 56 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5128 "00101100" // LDA.s16 r17, [p3], #6; MOVX r7, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5129 "00010010" // /* MW 5 */ + 5130 "00011100" // /* MW 4 */ + 5131 "01010000" // /* MW 3 */ + 5132 "11000110" // /* MW 2 */ + 5133 "01100111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "aie_core.h" 90 15 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 7 "accum.hpp" 153 115 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 943 89 +.src_ref 2 "reduce_base_c8.h" 388 28 +.src_ref 2 "reduce_base_c8.h" 596 56 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5134 "10111010" // LDA.s16 r18, [p3, #-2]; MOVX r18, #-2; MOV dc4, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5135 "01011000" // /* MW 9 */ + 5136 "00000000" // /* MW 8 */ + 5137 "01100000" // /* MW 7 */ + 5138 "11001010" // /* MW 6 */ + 5139 "00100111" // /* MW 5 */ + 5140 "00111111" // /* MW 4 */ + 5141 "01010000" // /* MW 3 */ + 5142 "11001010" // /* MW 2 */ + 5143 "01111110" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "aie_core.h" 90 15 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 2 "reduce_base_c8.h" 388 28 first +.src_ref 2 "reduce_base_c8.h" 570 24 +.src_ref 2 "reduce_base_c8.h" 570 24 +.src_ref 2 "reduce_base_c8.h" 570 24 +.src_ref 2 "reduce_base_c8.h" 570 33 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5144 "01110110" // LDA.s16 r7, [p7, dj0]; MOVS dc2, dc4; LSHL r18, r17, r18; MOV r6, #1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5145 "01011000" // /* MW 11 */ + 5146 "00000001" // /* MW 10 */ + 5147 "11001000" // /* MW 9 */ + 5148 "01101100" // /* MW 8 */ + 5149 "00101001" // /* MW 7 */ + 5150 "00100011" // /* MW 6 */ + 5151 "01001011" // /* MW 5 */ + 5152 "00010000" // /* MW 4 */ + 5153 "01010010" // /* MW 3 */ + 5154 "00011110" // /* MW 2 */ + 5155 "11100000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 +.src_ref 6 "me_vmult_float_emulated.h" 117 42 +.src_ref 6 "me_vmult_float_emulated.h" 117 42 +.src_ref 6 "me_vmult_float_emulated.h" 118 6 +.src_ref 6 "me_vmult_float_emulated.h" 118 6 +.src_ref 6 "me_vmult_float_emulated.h" 118 9 +.src_ref 6 "me_vmult_float_emulated.h" 118 9 +.src_ref 6 "me_vmult_float_emulated.h" 119 6 +.src_ref 6 "me_vmult_float_emulated.h" 119 6 +.src_ref 6 "me_vmult_float_emulated.h" 119 9 +.src_ref 6 "me_vmult_float_emulated.h" 119 9 +.src_ref 6 "me_vmult_float_emulated.h" 120 6 +.src_ref 6 "me_vmult_float_emulated.h" 120 6 +.src_ref 6 "me_vmult_float_emulated.h" 120 9 +.src_ref 6 "me_vmult_float_emulated.h" 120 9 +.src_ref 6 "me_vmult_float_emulated.h" 121 6 +.src_ref 6 "me_vmult_float_emulated.h" 121 6 +.src_ref 6 "me_vmult_float_emulated.h" 121 9 +.src_ref 6 "me_vmult_float_emulated.h" 121 9 +.src_ref 6 "me_vmult_float_emulated.h" 122 6 +.src_ref 6 "me_vmult_float_emulated.h" 122 6 +.src_ref 6 "me_vmult_float_emulated.h" 122 9 +.src_ref 6 "me_vmult_float_emulated.h" 122 9 +.src_ref 6 "me_vmult_float_emulated.h" 123 6 +.src_ref 6 "me_vmult_float_emulated.h" 123 6 +.src_ref 6 "me_vmult_float_emulated.h" 123 9 +.src_ref 6 "me_vmult_float_emulated.h" 123 9 +.src_ref 6 "me_vmult_float_emulated.h" 124 6 +.src_ref 6 "me_vmult_float_emulated.h" 124 6 +.src_ref 6 "me_vmult_float_emulated.h" 124 9 +.src_ref 6 "me_vmult_float_emulated.h" 124 9 +.src_ref 6 "me_vmult_float_emulated.h" 125 6 +.src_ref 6 "me_vmult_float_emulated.h" 125 6 +.src_ref 6 "me_vmult_float_emulated.h" 125 9 +.src_ref 6 "me_vmult_float_emulated.h" 125 9 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 5 "add.hpp" 28 49 +.src_ref 5 "add.hpp" 28 49 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 7 "add_accum.hpp" 19 92 +.src_ref 7 "add_accum.hpp" 19 92 +.src_ref 7 "add_accum.hpp" 19 92 +.src_ref 7 "add_accum.hpp" 19 92 +.src_ref 2 "reduce_base_c8.h" 388 28 +.src_ref 2 "reduce_base_c8.h" 595 75 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5156 "01110110" // LDA.u16 r27, [p3]; MOVS dn2, r26; LSHL r7, r22, r7; MOV r2, #60 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5157 "01011000" // /* MW 11 */ + 5158 "00111100" // /* MW 10 */ + 5159 "01001000" // /* MW 9 */ + 5160 "11101100" // /* MW 8 */ + 5161 "01110011" // /* MW 7 */ + 5162 "00101100" // /* MW 6 */ + 5163 "00001011" // /* MW 5 */ + 5164 "01011010" // /* MW 4 */ + 5165 "01010010" // /* MW 3 */ + 5166 "11101111" // /* MW 2 */ + 5167 "01100000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "aie_core.h" 73 15 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 7 "accum.hpp" 153 115 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 2 "reduce_base_c8.h" 570 24 first +.src_ref 3 "reduce_mean_c8_impl.h" 200 65 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5168 "01110110" // MOVA dj2, #64; MOVS p3, p1; LSHL r20, r20, r6; MOV m2, r7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5169 "01111000" // /* MW 11 */ + 5170 "11010000" // /* MW 10 */ + 5171 "00000001" // /* MW 9 */ + 5172 "01101101" // /* MW 8 */ + 5173 "01000011" // /* MW 7 */ + 5174 "00101001" // /* MW 6 */ + 5175 "10001011" // /* MW 5 */ + 5176 "10000100" // /* MW 4 */ + 5177 "10000011" // /* MW 3 */ + 5178 "00001010" // /* MW 2 */ + 5179 "00001000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 first +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 943 89 +.src_ref 2 "reduce_base_c8.h" 570 24 +.src_ref 2 "reduce_base_c8.h" 570 24 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5180 "10111010" // VLDA.2D bmll1, [p3], d2; LSHL r19, r19, r6; MOV m5, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5181 "01111000" // /* MW 9 */ + 5182 "00010000" // /* MW 8 */ + 5183 "10000101" // /* MW 7 */ + 5184 "01101110" // /* MW 6 */ + 5185 "00110011" // /* MW 5 */ + 5186 "00100111" // /* MW 4 */ + 5187 "10110000" // /* MW 3 */ + 5188 "00010010" // /* MW 2 */ + 5189 "01101010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 90 15 first +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 943 89 +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 2 "reduce_base_c8.h" 570 24 +.src_ref 2 "reduce_base_c8.h" 570 24 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5190 "10111010" // VLDA.CONV.fp32.bf16 bmll4, [p0], m5; LSHL r19, r21, r6; MOV m6, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5191 "01111000" // /* MW 9 */ + 5192 "11010000" // /* MW 8 */ + 5193 "00000100" // /* MW 7 */ + 5194 "01101111" // /* MW 6 */ + 5195 "00110011" // /* MW 5 */ + 5196 "00101011" // /* MW 4 */ + 5197 "00110000" // /* MW 3 */ + 5198 "01000001" // /* MW 2 */ + 5199 "00010101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 90 15 +.src_ref 2 "reduce_base_c8.h" 391 8 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5200 "00100100" // LSHL r17, r17, r6; ADD.NC lc, r18, #-2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5201 "11111110" // /* MW 5 */ + 5202 "11110010" // /* MW 4 */ + 5203 "10111010" // /* MW 3 */ + 5204 "01001101" // /* MW 2 */ + 5205 "10001100" // /* MW 1 */ +.src_ref 6 "aie_core.h" 90 15 +.src_ref 6 "aie_core.h" 90 15 first +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 943 89 + 5206 "11100100" // LSHL r17, r18, r6; MOV dj0, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5207 "01000001" // /* MW 5 */ + 5208 "00010001" // /* MW 4 */ + 5209 "10110001" // /* MW 3 */ + 5210 "01001101" // /* MW 2 */ + 5211 "10010100" // /* MW 1 */ +.src_ref 6 "aie_core.h" 90 15 +.src_ref 6 "aie_core.h" 90 15 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 943 89 +.src_ref 7 "accum.hpp" 943 89 +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 2 "reduce_base_c8.h" 570 24 first +.src_ref 2 "reduce_base_c8.h" 570 24 first + 5212 "01110110" // VLDA.CONV.fp32.bf16 bmll0, [p0], m6; MOVS dc0, dc4; LSHL r6, r7, r6; MOV m0, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5213 "01111000" // /* MW 11 */ + 5214 "11010000" // /* MW 10 */ + 5215 "00000100" // /* MW 9 */ + 5216 "01101100" // /* MW 8 */ + 5217 "01100011" // /* MW 7 */ + 5218 "00001110" // /* MW 6 */ + 5219 "01001011" // /* MW 5 */ + 5220 "00010000" // /* MW 4 */ + 5221 "00110000" // /* MW 3 */ + 5222 "00000001" // /* MW 2 */ + 5223 "00011001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 90 15 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 943 89 +.src_ref 7 "accum.hpp" 943 89 +.src_ref 7 "add_accum.hpp" 19 92 first +.src_ref 2 "reduce_base_c8.h" 570 24 + 5224 "01001010" // MOVS dn0, r28; MOV m7, r6; VADD.f dm4, dm1, dm4, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5225 "00111101" // /* MW 9 */ + 5226 "00110000" // /* MW 8 */ + 5227 "00010100" // /* MW 7 */ + 5228 "11100100" // /* MW 6 */ + 5229 "00100000" // /* MW 5 */ + 5230 "00000011" // /* MW 4 */ + 5231 "01100111" // /* MW 3 */ + 5232 "10000001" // /* MW 2 */ + 5233 "00001011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 90 15 +.src_ref 6 "aie_core.h" 90 15 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 943 89 +.src_ref 7 "accum.hpp" 943 89 +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 2 "reduce_base_c8.h" 570 24 first + 5234 "10111010" // VLDA.CONV.fp32.bf16 bmll2, [p0], m7; MOVS dn4, r27; MOV dj4, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5235 "01110010" // /* MW 9 */ + 5236 "01010000" // /* MW 8 */ + 5237 "01000100" // /* MW 7 */ + 5238 "00000010" // /* MW 6 */ + 5239 "00001011" // /* MW 5 */ + 5240 "01011011" // /* MW 4 */ + 5241 "00110100" // /* MW 3 */ + 5242 "00100001" // /* MW 2 */ + 5243 "00011101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "aie_core.h" 90 15 first +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 7 "accum.hpp" 153 115 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 943 89 + 5244 "11010100" // VLDA.3D.CONV.fp32.bf16 bmll3, [p0], d0; MOV dc1, dc4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5245 "00000001" // /* MW 5 */ + 5246 "10010011" // /* MW 4 */ + 5247 "00110011" // /* MW 3 */ + 5248 "00110001" // /* MW 2 */ + 5249 "00000011" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 7 "add_accum.hpp" 19 92 first +.src_ref 2 "reduce_base_c8.h" 570 24 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 5250 "01100010" // VLDA.CONV.fp32.bf16 bmll4, [p0], m5; VADD.f dm1, dm4, dm0, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5251 "00111101" // /* MW 7 */ + 5252 "10000000" // /* MW 6 */ + 5253 "00010001" // /* MW 5 */ + 5254 "00000100" // /* MW 4 */ + 5255 "00110000" // /* MW 3 */ + 5256 "01000001" // /* MW 2 */ + 5257 "00010101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 first +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 7 "accum.hpp" 198 120 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 5258 "10011000" // VLDA.2D bmll1, [p3], d2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5259 "10010101" // /* MW 3 */ + 5260 "01010000" // /* MW 2 */ + 5261 "00000011" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5263 "00000000" // /* MW 1 */ +.src_ref 7 "add_accum.hpp" 19 92 first +.src_ref 2 "reduce_base_c8.h" 391 8 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5264 "01011010" // MOVXM ls, #5312; VADD.f dm0, dm1, dm2, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5265 "00111101" // /* MW 9 */ + 5266 "00101000" // /* MW 8 */ + 5267 "00010000" // /* MW 7 */ + 5268 "00000010" // /* MW 6 */ + 5269 "01001100" // /* MW 5 */ + 5270 "10001111" // /* MW 4 */ + 5271 "00000000" // /* MW 3 */ + 5272 "00000000" // /* MW 2 */ + 5273 "00000000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 2 "reduce_base_c8.h" 412 41 +.src_ref 2 "reduce_base_c8.h" 570 24 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 5274 "11010100" // VLDA.CONV.fp32.bf16 bmll0, [p0], m6; MOV dj3, m4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5275 "00000001" // /* MW 5 */ + 5276 "00010000" // /* MW 4 */ + 5277 "00110111" // /* MW 3 */ + 5278 "00000001" // /* MW 2 */ + 5279 "00011001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "aie_core.h" 73 15 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 5 "vector.hpp" 1285 72 +.src_ref 7 "accum.hpp" 153 115 +.src_ref 7 "accum.hpp" 153 115 +.src_ref 7 "accum.hpp" 153 115 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 7 "add_accum.hpp" 19 92 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5280 "11101011" // MOVA dj1, #64; NOPB; MOVS p4, p1; MOVX r4, #32; MOV m1, m2; VADD.f dm4, dm1, dm4, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5281 "10000001" // /* MW 15 */ + 5282 "10100001" // /* MW 14 */ + 5283 "01111000" // /* MW 13 */ + 5284 "00000000" // /* MW 12 */ + 5285 "10000010" // /* MW 11 */ + 5286 "00001000" // /* MW 10 */ + 5287 "01000100" // /* MW 9 */ + 5288 "00000000" // /* MW 8 */ + 5289 "10001011" // /* MW 7 */ + 5290 "10000100" // /* MW 6 */ + 5291 "00100100" // /* MW 5 */ + 5292 "00000000" // /* MW 4 */ + 5293 "10000000" // /* MW 3 */ + 5294 "00000110" // /* MW 2 */ + 5295 "00001000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 7 "accum.hpp" 153 115 +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 7 "add_accum.hpp" 19 92 +.src_ref 2 "reduce_base_c8.h" 391 8 first +.src_ref 2 "reduce_base_c8.h" 570 24 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5296 "11101011" // VLDA.CONV.fp32.bf16 bmll2, [p0], m7;NOPB; MOVS dn1, r26; MOVXM le, #5408; VADD.f dm2, dm0, dm3, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5297 "01100001" // /* MW 15 */ + 5298 "10010000" // /* MW 14 */ + 5299 "00010000" // /* MW 13 */ + 5300 "10010000" // /* MW 12 */ + 5301 "10111010" // /* MW 11 */ + 5302 "00000101" // /* MW 10 */ + 5303 "00000000" // /* MW 9 */ + 5304 "00000000" // /* MW 8 */ + 5305 "00001011" // /* MW 7 */ + 5306 "01011010" // /* MW 6 */ + 5307 "00100001" // /* MW 5 */ + 5308 "00000000" // /* MW 4 */ + 5309 "00110000" // /* MW 3 */ + 5310 "00100001" // /* MW 2 */ + 5311 "00011101" // /* MW 1 */ +.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_464 +.src_ref 6 "aie_core.h" 90 15 first +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 943 89 +.begin_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 5312 "10011000" // VLDA.3D.CONV.fp32.bf16 bmll3, [p0], d0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5313 "10001001" // /* MW 3 */ + 5314 "00011001" // /* MW 2 */ + 5315 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 first +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "add_accum.hpp" 19 92 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5316 "01100110" // VLDA.2D bmll1, [p3], d2; NOPB; NOPS; VADD.f dm1, dm4, dm0, r2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5317 "00111101" // /* MW 11 */ + 5318 "10000000" // /* MW 10 */ + 5319 "00010001" // /* MW 9 */ + 5320 "10001110" // /* MW 8 */ + 5321 "10101101" // /* MW 7 */ + 5322 "00000000" // /* MW 6 */ + 5323 "00100000" // /* MW 5 */ + 5324 "00000000" // /* MW 4 */ + 5325 "10110000" // /* MW 3 */ + 5326 "00010010" // /* MW 2 */ + 5327 "01101010" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 2 "reduce_base_c8.h" 570 24 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5328 "11100001" // VLDA.CONV.fp32.bf16 bmll4, [p0], m5;NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5329 "00000000" // /* MW 15 */ + 5330 "00000000" // /* MW 14 */ + 5331 "01111000" // /* MW 13 */ + 5332 "10100101" // /* MW 12 */ + 5333 "00000001" // /* MW 11 */ + 5334 "00000000" // /* MW 10 */ + 5335 "00000000" // /* MW 9 */ + 5336 "00000000" // /* MW 8 */ + 5337 "01011011" // /* MW 7 */ + 5338 "00000001" // /* MW 6 */ + 5339 "00100000" // /* MW 5 */ + 5340 "00000000" // /* MW 4 */ + 5341 "00110000" // /* MW 3 */ + 5342 "01000001" // /* MW 2 */ + 5343 "00010101" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5344 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5345 "00000000" // /* MW 15 */ + 5346 "00000000" // /* MW 14 */ + 5347 "01111000" // /* MW 13 */ + 5348 "10100101" // /* MW 12 */ + 5349 "00000001" // /* MW 11 */ + 5350 "00000000" // /* MW 10 */ + 5351 "00000000" // /* MW 9 */ + 5352 "00000000" // /* MW 8 */ + 5353 "01011011" // /* MW 7 */ + 5354 "00000001" // /* MW 6 */ + 5355 "00100000" // /* MW 5 */ + 5356 "00000000" // /* MW 4 */ + 5357 "11110000" // /* MW 3 */ + 5358 "00101100" // /* MW 2 */ + 5359 "00000000" // /* MW 1 */ +.src_ref 7 "add_accum.hpp" 19 92 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5360 "11101011" // NOPA; NOPB; NOPS; NOPX; NOPM; VADD.f dm0, dm1, dm2, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5361 "01000001" // /* MW 15 */ + 5362 "10000001" // /* MW 14 */ + 5363 "01111000" // /* MW 13 */ + 5364 "10100101" // /* MW 12 */ + 5365 "00000001" // /* MW 11 */ + 5366 "00000000" // /* MW 10 */ + 5367 "00000000" // /* MW 9 */ + 5368 "00000000" // /* MW 8 */ + 5369 "01011011" // /* MW 7 */ + 5370 "00000001" // /* MW 6 */ + 5371 "00100000" // /* MW 5 */ + 5372 "00000000" // /* MW 4 */ + 5373 "11110000" // /* MW 3 */ + 5374 "00101100" // /* MW 2 */ + 5375 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 first +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 5 "vector.hpp" 1159 33 first +.src_ref 7 "accum.hpp" 153 115 first +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 2 "reduce_base_c8.h" 570 24 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5376 "11100001" // VLDA.CONV.fp32.bf16 bmll0, [p0], m6;NOPB; VST.2D bmll2, [p4], d1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5377 "00000000" // /* MW 15 */ + 5378 "00000000" // /* MW 14 */ + 5379 "01111000" // /* MW 13 */ + 5380 "10100101" // /* MW 12 */ + 5381 "00000001" // /* MW 11 */ + 5382 "00000000" // /* MW 10 */ + 5383 "00000000" // /* MW 9 */ + 5384 "10000000" // /* MW 8 */ + 5385 "00000110" // /* MW 7 */ + 5386 "00110001" // /* MW 6 */ + 5387 "00100100" // /* MW 5 */ + 5388 "00000000" // /* MW 4 */ + 5389 "00110000" // /* MW 3 */ + 5390 "00000001" // /* MW 2 */ + 5391 "00011001" // /* MW 1 */ +.src_ref 7 "add_accum.hpp" 19 92 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5392 "11101011" // NOPA; NOPB; NOPS; NOPX; NOPM; VADD.f dm4, dm1, dm4, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5393 "10000001" // /* MW 15 */ + 5394 "10100001" // /* MW 14 */ + 5395 "01111000" // /* MW 13 */ + 5396 "10100101" // /* MW 12 */ + 5397 "00000001" // /* MW 11 */ + 5398 "00000000" // /* MW 10 */ + 5399 "00000000" // /* MW 9 */ + 5400 "00000000" // /* MW 8 */ + 5401 "01011011" // /* MW 7 */ + 5402 "00000001" // /* MW 6 */ + 5403 "00100000" // /* MW 5 */ + 5404 "00000000" // /* MW 4 */ + 5405 "11110000" // /* MW 3 */ + 5406 "00101100" // /* MW 2 */ + 5407 "00000000" // /* MW 1 */ +.label ZLE_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_560 +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 7 "add_accum.hpp" 19 92 +.src_ref 2 "reduce_base_c8.h" 570 24 first +.end_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5408 "11101011" // VLDA.CONV.fp32.bf16 bmll2, [p0], m7;NOPB; NOPS; NOPX; NOPM; VADD.f dm2, dm0, dm3, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5409 "01100001" // /* MW 15 */ + 5410 "10010000" // /* MW 14 */ + 5411 "01111000" // /* MW 13 */ + 5412 "10100101" // /* MW 12 */ + 5413 "00000001" // /* MW 11 */ + 5414 "00000000" // /* MW 10 */ + 5415 "00000000" // /* MW 9 */ + 5416 "00000000" // /* MW 8 */ + 5417 "01011011" // /* MW 7 */ + 5418 "00000001" // /* MW 6 */ + 5419 "00100000" // /* MW 5 */ + 5420 "00000000" // /* MW 4 */ + 5421 "00110000" // /* MW 3 */ + 5422 "00100001" // /* MW 2 */ + 5423 "00011101" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 107 23 +.src_ref 2 "reduce_base_c8.h" 412 41 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 5424 "10111010" // LDA.u16 r1, [p7, dj3]; MOVXM r5, #16256 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5425 "00010000" // /* MW 9 */ + 5426 "11000000" // /* MW 8 */ + 5427 "10101111" // /* MW 7 */ + 5428 "00001100" // /* MW 6 */ + 5429 "00000000" // /* MW 5 */ + 5430 "00000000" // /* MW 4 */ + 5431 "01010000" // /* MW 3 */ + 5432 "00000111" // /* MW 2 */ + 5433 "11101100" // /* MW 1 */ +.src_ref 6 "aie_core.h" 90 15 first +.src_ref 6 "me_vmult_float_emulated.h" 107 23 first +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 7 "add_accum.hpp" 19 92 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5434 "01001010" // VLDA.3D.CONV.fp32.bf16 bmll3, [p0], d0; VBCST.16 x4, r5; VADD.f dm1, dm4, dm0, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5435 "00111101" // /* MW 9 */ + 5436 "10000000" // /* MW 8 */ + 5437 "00010001" // /* MW 7 */ + 5438 "11100010" // /* MW 6 */ + 5439 "01110010" // /* MW 5 */ + 5440 "00010101" // /* MW 4 */ + 5441 "00110010" // /* MW 3 */ + 5442 "00110001" // /* MW 2 */ + 5443 "00000011" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 101 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5444 "11111000" // VBCST.16 x0, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5445 "01110010" // /* MW 3 */ + 5446 "01000001" // /* MW 2 */ + 5447 "00011000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5449 "00000000" // /* MW 1 */ +.src_ref 7 "add_accum.hpp" 19 92 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5450 "01001000" // VADD.f dm0, dm1, dm2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5451 "00111101" // /* MW 3 */ + 5452 "00101000" // /* MW 2 */ + 5453 "00010000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 first +.src_ref 5 "vector.hpp" 1159 33 first +.src_ref 7 "accum.hpp" 153 115 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5454 "10011000" // VST.2D bmll2, [p4], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5455 "00000110" // /* MW 3 */ + 5456 "00110001" // /* MW 2 */ + 5457 "00001100" // /* MW 1 */ + 5458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5459 "00000000" // /* MW 1 */ +.src_ref 7 "add_accum.hpp" 19 92 first +.src_ref 2 "reduce_base_c8.h" 412 52 first + 5460 "01100010" // ADD r5, r1, #-1; VADD.f dm2, dm0, dm3, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5461 "00111101" // /* MW 7 */ + 5462 "00001100" // /* MW 6 */ + 5463 "00010010" // /* MW 5 */ + 5464 "11111001" // /* MW 4 */ + 5465 "01011111" // /* MW 3 */ + 5466 "00000010" // /* MW 2 */ + 5467 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 412 31 + 5468 "10011000" // NE r0, r5, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5469 "00001000" // /* MW 3 */ + 5470 "01000000" // /* MW 2 */ + 5471 "00010001" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 412 16 + 5472 "10000100" // JNZ r0, #6368 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6368 delay_slots=5 */ + 5473 "00000001" // /* MW 5 */ + 5474 "01000000" // /* MW 4 */ + 5475 "01110000" // /* MW 3 */ + 5476 "00001100" // /* MW 2 */ + 5477 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5479 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5481 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5483 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 first +.src_ref 5 "vector.hpp" 1159 33 first +.src_ref 7 "accum.hpp" 153 115 first +.delay_slot + 5484 "10011000" // VST.2D bmll2, [p4], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5485 "00000110" // /* MW 3 */ + 5486 "00110001" // /* MW 2 */ + 5487 "00001100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5489 "00000000" // /* MW 1 */ +.src_ref 5 "broadcast.hpp" 80 25 first +.src_ref 3 "reduce_mean_c8_impl.h" 184 15 first +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 5490 "00101100" // LDA r6, [p2, #12]; MOVX r5, #3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5491 "00011010" // /* MW 5 */ + 5492 "00010100" // /* MW 4 */ + 5493 "11010000" // /* MW 3 */ + 5494 "10011010" // /* MW 2 */ + 5495 "01000110" // /* MW 1 */ + 5496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5497 "00000000" // /* MW 1 */ + 5498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5499 "00000000" // /* MW 1 */ + 5500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5501 "00000000" // /* MW 1 */ + 5502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5503 "00000000" // /* MW 1 */ + 5504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5505 "00000000" // /* MW 1 */ + 5506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5507 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 5508 "10011000" // GE r7, r5, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5509 "01101001" // /* MW 3 */ + 5510 "01001110" // /* MW 2 */ + 5511 "00010001" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 5512 "10000100" // JNZ r7, #7296 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7296 delay_slots=5 */ + 5513 "00000001" // /* MW 5 */ + 5514 "01000000" // /* MW 4 */ + 5515 "01000000" // /* MW 3 */ + 5516 "00001110" // /* MW 2 */ + 5517 "00111000" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 +.delay_slot + 5518 "00011000" // MOVX r0, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5519 "00010001" // /* MW 3 */ + 5520 "00000000" // /* MW 2 */ + 5521 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5523 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5525 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5527 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5529 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 5530 "10011000" // NE r5, r6, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5531 "00001000" // /* MW 3 */ + 5532 "10001010" // /* MW 2 */ + 5533 "00010001" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 5534 "10000100" // JNZ r5, #6512 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6512 delay_slots=5 */ + 5535 "00000001" // /* MW 5 */ + 5536 "01000000" // /* MW 4 */ + 5537 "10111000" // /* MW 3 */ + 5538 "00001100" // /* MW 2 */ + 5539 "00101000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5541 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5543 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5545 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5547 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5549 "00000000" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 + 5550 "11100100" // MOVX r17, #257; MOV dc4, lr /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5551 "11000001" // /* MW 5 */ + 5552 "10000011" // /* MW 4 */ + 5553 "10101001" // /* MW 3 */ + 5554 "01000000" // /* MW 2 */ + 5555 "00100100" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 + 5556 "01000100" // MOVXM r21, #65535 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5557 "11111110" // /* MW 5 */ + 5558 "10111111" // /* MW 4 */ + 5559 "11111010" // /* MW 3 */ + 5560 "00000000" // /* MW 2 */ + 5561 "00000000" // /* MW 1 */ +.src_ref 5 "blend.hpp" 163 48 + 5562 "00101100" // NOPA; MOVX r20, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5563 "00000010" // /* MW 5 */ + 5564 "01010000" // /* MW 4 */ + 5565 "11110000" // /* MW 3 */ + 5566 "00101100" // /* MW 2 */ + 5567 "00000000" // /* MW 1 */ +.label __ll91__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 +.src_ref 6 "me_vmult_float_emulated.h" 117 42 +.src_ref 6 "me_vmult_float_emulated.h" 118 6 +.src_ref 6 "me_vmult_float_emulated.h" 118 9 +.src_ref 6 "me_vmult_float_emulated.h" 119 6 +.src_ref 6 "me_vmult_float_emulated.h" 119 9 +.src_ref 6 "me_vmult_float_emulated.h" 120 6 +.src_ref 6 "me_vmult_float_emulated.h" 120 9 +.src_ref 6 "me_vmult_float_emulated.h" 121 6 +.src_ref 6 "me_vmult_float_emulated.h" 121 9 +.src_ref 6 "me_vmult_float_emulated.h" 122 6 +.src_ref 6 "me_vmult_float_emulated.h" 122 9 +.src_ref 6 "me_vmult_float_emulated.h" 123 6 +.src_ref 6 "me_vmult_float_emulated.h" 123 9 +.src_ref 6 "me_vmult_float_emulated.h" 124 6 +.src_ref 6 "me_vmult_float_emulated.h" 124 9 +.src_ref 6 "me_vmult_float_emulated.h" 125 6 +.src_ref 6 "me_vmult_float_emulated.h" 125 9 +.src_ref 5 "add.hpp" 28 49 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 3 "reduce_mean_c8_impl.h" 200 49 +.src_ref 3 "reduce_mean_c8_impl.h" 200 65 +.src_ref 3 "reduce_mean_c8_impl.h" 200 65 +.src_ref 3 "reduce_mean_c8_impl.h" 223 35 + 5568 "01110110" // MOVA dj2, #64; MOVS p2, r3; MOVX r5, #16; MOV r2, #60 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5569 "01011000" // /* MW 11 */ + 5570 "00111100" // /* MW 10 */ + 5571 "01001000" // /* MW 9 */ + 5572 "00001000" // /* MW 8 */ + 5573 "01010010" // /* MW 7 */ + 5574 "00000000" // /* MW 6 */ + 5575 "00001011" // /* MW 5 */ + 5576 "10000011" // /* MW 4 */ + 5577 "10000010" // /* MW 3 */ + 5578 "00001010" // /* MW 2 */ + 5579 "00001000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1285 72 +.src_ref 3 "reduce_mean_c8_impl.h" 200 65 first + 5580 "00101100" // LDA.s16 r6, [p2, dj2]; MOVX r4, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5581 "00000010" // /* MW 5 */ + 5582 "00010001" // /* MW 4 */ + 5583 "01010000" // /* MW 3 */ + 5584 "00011010" // /* MW 2 */ + 5585 "01001000" // /* MW 1 */ + 5586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5587 "00000000" // /* MW 1 */ + 5588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5589 "00000000" // /* MW 1 */ + 5590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5591 "00000000" // /* MW 1 */ + 5592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5593 "00000000" // /* MW 1 */ + 5594 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5595 "00000000" // /* MW 1 */ + 5596 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5597 "01100111" // /* MW 3 */ + 5598 "00000001" // /* MW 2 */ + 5599 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 200 49 + 5600 "11100001" // NOPA; NOPB; NOPS; ASHL r5, r6, r5; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5601 "00000000" // /* MW 15 */ + 5602 "00000000" // /* MW 14 */ + 5603 "01111000" // /* MW 13 */ + 5604 "10100101" // /* MW 12 */ + 5605 "00000001" // /* MW 11 */ + 5606 "11110100" // /* MW 10 */ + 5607 "01010010" // /* MW 9 */ + 5608 "00001100" // /* MW 8 */ + 5609 "01011011" // /* MW 7 */ + 5610 "00000001" // /* MW 6 */ + 5611 "00100000" // /* MW 5 */ + 5612 "00000000" // /* MW 4 */ + 5613 "11110000" // /* MW 3 */ + 5614 "00101100" // /* MW 2 */ + 5615 "00000000" // /* MW 1 */ +.label __ll93__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E +.src_ref 3 "reduce_mean_c8_impl.h" 223 35 + 5616 "01110110" // MOVA dj2, #36; ST dn3, [sp, #-4]; MOVXM p7, #509168 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5617 "00010000" // /* MW 11 */ + 5618 "01111000" // /* MW 10 */ + 5619 "10110010" // /* MW 9 */ + 5620 "11110011" // /* MW 8 */ + 5621 "00000001" // /* MW 7 */ + 5622 "10000000" // /* MW 6 */ + 5623 "10100101" // /* MW 5 */ + 5624 "11111101" // /* MW 4 */ + 5625 "10000111" // /* MW 3 */ + 5626 "10001010" // /* MW 2 */ + 5627 "00000100" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1285 72 +.src_ref 5 "vector.hpp" 1289 16 + 5628 "01110110" // LDA.s8 r23, [p7]; ST dc4, [sp, #-8]; MOVX r5, #0; VBCST.32 x2, r5 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5629 "01111000" // /* MW 11 */ + 5630 "00111001" // /* MW 10 */ + 5631 "10001011" // /* MW 9 */ + 5632 "00001000" // /* MW 8 */ + 5633 "01010000" // /* MW 7 */ + 5634 "10000000" // /* MW 6 */ + 5635 "01100101" // /* MW 5 */ + 5636 "11111010" // /* MW 4 */ + 5637 "01010111" // /* MW 3 */ + 5638 "11011100" // /* MW 2 */ + 5639 "11100000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 112 6 +.src_ref 6 "me_vmult_float_emulated.h" 112 19 +.src_ref 5 "vector.hpp" 57 98 +.src_ref 5 "vector.hpp" 57 98 +.src_ref 5 "vector.hpp" 1280 49 +.src_ref 5 "vector.hpp" 1285 72 +.src_ref 5 "vector.hpp" 1287 41 +.src_ref 5 "vector.hpp" 1288 16 +.src_ref 5 "vector.hpp" 1289 16 +.src_ref 5 "vector.hpp" 1292 26 +.src_ref 3 "reduce_mean_c8_impl.h" 223 35 first +.src_ref 3 "reduce_mean_c8_impl.h" 268 19 + 5640 "01110110" // LDA r6, [p2, dj2]; MOVS p7, p1; MOVX r22, #-1; VMOV bmll0, x2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5641 "01111000" // /* MW 11 */ + 5642 "01001001" // /* MW 10 */ + 5643 "00000010" // /* MW 9 */ + 5644 "11101000" // /* MW 8 */ + 5645 "01100111" // /* MW 7 */ + 5646 "00111111" // /* MW 6 */ + 5647 "10001011" // /* MW 5 */ + 5648 "10000100" // /* MW 4 */ + 5649 "11010111" // /* MW 3 */ + 5650 "00011010" // /* MW 2 */ + 5651 "01001000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 915 23 +.src_ref 5 "vector.hpp" 915 23 +.src_ref 5 "vector.hpp" 1280 49 + 5652 "10111010" // MOVA r24, #31; MOVX vaddSign0, #1; VMOV bmll2, x2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5653 "01111000" // /* MW 9 */ + 5654 "01001001" // /* MW 8 */ + 5655 "00000010" // /* MW 7 */ + 5656 "00000001" // /* MW 6 */ + 5657 "11010010" // /* MW 5 */ + 5658 "00000010" // /* MW 4 */ + 5659 "00000000" // /* MW 3 */ + 5660 "11111000" // /* MW 2 */ + 5661 "00000011" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 3 "reduce_mean_c8_impl.h" 223 9 first + 5662 "10111010" // MOVA r25, #16; MOVXM ls, #5760 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5663 "00010000" // /* MW 9 */ + 5664 "01000000" // /* MW 8 */ + 5665 "01111011" // /* MW 7 */ + 5666 "00000100" // /* MW 6 */ + 5667 "00000000" // /* MW 5 */ + 5668 "00000000" // /* MW 4 */ + 5669 "00000000" // /* MW 3 */ + 5670 "00011001" // /* MW 2 */ + 5671 "00000010" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 223 9 + 5672 "10111010" // VLDA wl2, [sp, #-32]; MOVXM le, #6336 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5673 "00010000" // /* MW 9 */ + 5674 "01100000" // /* MW 8 */ + 5675 "10111100" // /* MW 7 */ + 5676 "00000101" // /* MW 6 */ + 5677 "00000000" // /* MW 5 */ + 5678 "00000000" // /* MW 4 */ + 5679 "10110000" // /* MW 3 */ + 5680 "10010100" // /* MW 2 */ + 5681 "11111111" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1286 98 + 5682 "00011000" // MOVX r26, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5683 "00000001" // /* MW 3 */ + 5684 "01110100" // /* MW 2 */ + 5685 "00010000" // /* MW 1 */ + 5686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5687 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 112 6 +.src_ref 6 "me_vmult_float_emulated.h" 112 19 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 114 6 +.src_ref 6 "me_vmult_float_emulated.h" 114 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 6 +.src_ref 6 "me_vmult_float_emulated.h" 115 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 +.src_ref 5 "vector.hpp" 1286 72 +.src_ref 7 "accum.hpp" 1108 103 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 5688 "00011000" // MOVX crRnd, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5689 "10000000" // /* MW 3 */ + 5690 "11111010" // /* MW 2 */ + 5691 "00010101" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 112 6 first +.src_ref 6 "me_vmult_float_emulated.h" 112 19 first +.src_ref 3 "reduce_mean_c8_impl.h" 223 9 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 5692 "00000010" // VCONV.bf16.fp32 wl0, bmll0; ADD.NC lc, r6, #0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5693 "00000000" // /* MW 7 */ + 5694 "10000000" // /* MW 6 */ + 5695 "10111001" // /* MW 5 */ + 5696 "00000010" // /* MW 4 */ + 5697 "11000000" // /* MW 3 */ + 5698 "00000010" // /* MW 2 */ + 5699 "00001000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 114 6 +.src_ref 6 "me_vmult_float_emulated.h" 114 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 6 +.src_ref 6 "me_vmult_float_emulated.h" 115 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5700 "11111000" // VMOV x3, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5701 "10010010" // /* MW 3 */ + 5702 "10100000" // /* MW 2 */ + 5703 "00011001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 first +.src_ref 6 "me_vmult_float_emulated.h" 115 6 +.src_ref 6 "me_vmult_float_emulated.h" 115 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 + 5704 "01100010" // VMOV x5, x3; VMSC.f dm0, dm2, x0, x4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5705 "10000011" // /* MW 7 */ + 5706 "01000000" // /* MW 6 */ + 5707 "00010000" // /* MW 5 */ + 5708 "11100110" // /* MW 4 */ + 5709 "10010010" // /* MW 3 */ + 5710 "10100110" // /* MW 2 */ + 5711 "00000010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 + 5712 "11111000" // VMOV x6, x5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5713 "10010010" // /* MW 3 */ + 5714 "00101010" // /* MW 2 */ + 5715 "00011011" // /* MW 1 */ + 5716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5717 "00000000" // /* MW 1 */ + 5718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5719 "00000000" // /* MW 1 */ + 5720 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5721 "00000000" // /* MW 1 */ + 5722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5723 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 114 6 first +.src_ref 6 "me_vmult_float_emulated.h" 114 19 first + 5724 "00011000" // VCONV.bf16.fp32 wl3, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5725 "00010110" // /* MW 3 */ + 5726 "11000000" // /* MW 2 */ + 5727 "00001001" // /* MW 1 */ + 5728 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5729 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 115 34 first + 5730 "01001000" // VMSC.f dm0, dm0, x3, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5731 "10000011" // /* MW 3 */ + 5732 "00000110" // /* MW 2 */ + 5733 "00010000" // /* MW 1 */ + 5734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5735 "00000000" // /* MW 1 */ + 5736 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5737 "00000000" // /* MW 1 */ + 5738 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5739 "00000000" // /* MW 1 */ + 5740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5741 "00000000" // /* MW 1 */ + 5742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5743 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 115 6 +.src_ref 6 "me_vmult_float_emulated.h" 115 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "add_reduce.hpp" 322 47 + 5744 "11100001" // NOPA; NOPB; VCONV.bf16.fp32 wl5, bmll0; MOVX r7, #8; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5745 "00000000" // /* MW 15 */ + 5746 "00000000" // /* MW 14 */ + 5747 "01111000" // /* MW 13 */ + 5748 "10100101" // /* MW 12 */ + 5749 "00000001" // /* MW 11 */ + 5750 "00001000" // /* MW 10 */ + 5751 "01110001" // /* MW 9 */ + 5752 "00000000" // /* MW 8 */ + 5753 "00010110" // /* MW 7 */ + 5754 "11000000" // /* MW 6 */ + 5755 "00100010" // /* MW 5 */ + 5756 "00000000" // /* MW 4 */ + 5757 "11110000" // /* MW 3 */ + 5758 "00101100" // /* MW 2 */ + 5759 "00000000" // /* MW 1 */ +.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_912 +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 3 "reduce_mean_c8_impl.h" 226 22 first +.begin_of_loop +.loop_nesting 1 + 5760 "11110100" // VLDB x7, [p1], #64; VMOV bmhh4, x9 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5761 "00100101" // /* MW 5 */ + 5762 "10100101" // /* MW 4 */ + 5763 "10001001" // /* MW 3 */ + 5764 "10111110" // /* MW 2 */ + 5765 "00100011" // /* MW 1 */ + 5766 "11111000" // VMOV bmhh3, x11 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5767 "10010010" // /* MW 3 */ + 5768 "11010110" // /* MW 2 */ + 5769 "00011011" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1280 49 + 5770 "11111000" // MOV r28, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5771 "11000000" // /* MW 3 */ + 5772 "00011110" // /* MW 2 */ + 5773 "00011111" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1280 49 first + 5774 "10011000" // AND r29, r28, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5775 "10000100" // /* MW 3 */ + 5776 "00111011" // /* MW 2 */ + 5777 "00010111" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1285 72 first +.src_ref 5 "vector.hpp" 1285 72 first + 5778 "00100100" // LT r27, r29, r4; ADD.NC r28, r29, #-32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5779 "11100000" // /* MW 5 */ + 5780 "00111101" // /* MW 4 */ + 5781 "01011110" // /* MW 3 */ + 5782 "11001001" // /* MW 2 */ + 5783 "11101110" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1285 72 + 5784 "10011000" // LSHL r30, r22, r29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5785 "11011101" // /* MW 3 */ + 5786 "10111101" // /* MW 2 */ + 5787 "00010101" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1286 98 first + 5788 "10011000" // SUB r31, r26, r29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5789 "11010001" // /* MW 3 */ + 5790 "10111111" // /* MW 2 */ + 5791 "00010110" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1285 72 first +.src_ref 5 "add_reduce.hpp" 322 47 first + 5792 "10100100" // SEL.EQZ r30, r5, r30, r27; VSHIFT x8, x7, x0, r25 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5793 "11001101" // /* MW 5 */ + 5794 "01110000" // /* MW 4 */ + 5795 "01001000" // /* MW 3 */ + 5796 "10111100" // /* MW 2 */ + 5797 "00101111" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first + 5798 "11111000" // VMOV bmll4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5799 "10010010" // /* MW 3 */ + 5800 "00010000" // /* MW 2 */ + 5801 "00011100" // /* MW 1 */ +.src_ref 5 "vector.hpp" 142 95 first +.src_ref 7 "accum.hpp" 198 120 + 5802 "11111000" // VMOV wl8, wh7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5803 "00100010" // /* MW 3 */ + 5804 "01001110" // /* MW 2 */ + 5805 "00011100" // /* MW 1 */ +.src_ref 5 "vector.hpp" 142 95 +.src_ref 7 "accum.hpp" 198 120 first + 5806 "11111000" // VMOV wl10, wl7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5807 "00100010" // /* MW 3 */ + 5808 "01001111" // /* MW 2 */ + 5809 "00011101" // /* MW 1 */ + 5810 "11111000" // VMOV bmhl4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5811 "10010010" // /* MW 3 */ + 5812 "10010000" // /* MW 2 */ + 5813 "00011100" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 5814 "11111000" // VMOV bmhl3, x10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5815 "10010010" // /* MW 3 */ + 5816 "10010100" // /* MW 2 */ + 5817 "00011011" // /* MW 1 */ +.src_ref 7 "accum.hpp" 199 120 first +.src_ref 5 "add.hpp" 28 49 first +.aggressive_scheduled_block_id 4 +.noswbrkpt + 5818 "01100010" // VMOV cml2, cmh4; VADD.f dm3, dm1, dm2, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5819 "00111101" // /* MW 7 */ + 5820 "00101000" // /* MW 6 */ + 5821 "00010011" // /* MW 5 */ + 5822 "11100110" // /* MW 4 */ + 5823 "10001010" // /* MW 3 */ + 5824 "00010010" // /* MW 2 */ + 5825 "00000010" // /* MW 1 */ +.src_ref 7 "accum.hpp" 199 120 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5826 "11111000" // VMOV cml1, cmh3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5827 "10001010" // /* MW 3 */ + 5828 "00001110" // /* MW 2 */ + 5829 "00011001" // /* MW 1 */ +.src_ref 5 "vector.hpp" 142 95 first +.src_ref 5 "vector.hpp" 243 115 first +.src_ref 5 "add_reduce.hpp" 324 44 first +.aggressive_scheduled_block_id 4 +.noswbrkpt + 5830 "01100010" // VMOV wl8, wh7; VADD.f dm2, dm2, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5831 "00111101" // /* MW 7 */ + 5832 "01010000" // /* MW 6 */ + 5833 "00010010" // /* MW 5 */ + 5834 "11100110" // /* MW 4 */ + 5835 "00100010" // /* MW 3 */ + 5836 "01001110" // /* MW 2 */ + 5837 "00000100" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5838 "11111000" // VMOV bmll2, x7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5839 "10010010" // /* MW 3 */ + 5840 "00001110" // /* MW 2 */ + 5841 "00011010" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 5842 "11011000" // VSHIFT x9, x8, x0, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5843 "01100110" // /* MW 3 */ + 5844 "11000000" // /* MW 2 */ + 5845 "00011100" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 5 "add_reduce.hpp" 324 44 first +.aggressive_scheduled_block_id 5 +.noswbrkpt + 5846 "01100010" // VMOV bmll1, x8; VADD.f dm4, dm1, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5847 "00111101" // /* MW 7 */ + 5848 "00110000" // /* MW 6 */ + 5849 "00010100" // /* MW 5 */ + 5850 "11100110" // /* MW 4 */ + 5851 "10010010" // /* MW 3 */ + 5852 "00010000" // /* MW 2 */ + 5853 "00000001" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5854 "11111000" // VMOV bmll4, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5855 "10010010" // /* MW 3 */ + 5856 "00010010" // /* MW 2 */ + 5857 "00011100" // /* MW 1 */ +.src_ref 7 "accum.hpp" 151 136 first + 5858 "11111000" // VMOV x8, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5859 "00010010" // /* MW 3 */ + 5860 "00101100" // /* MW 2 */ + 5861 "00011100" // /* MW 1 */ +.src_ref 5 "vector.hpp" 243 115 first +.src_ref 7 "accum.hpp" 151 115 + 5862 "11111000" // VMOV wl9, wl8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5863 "00100010" // /* MW 3 */ + 5864 "11010001" // /* MW 2 */ + 5865 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 5866 "11011000" // VSHIFT x8, x9, x0, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5867 "01100110" // /* MW 3 */ + 5868 "01001000" // /* MW 2 */ + 5869 "00011100" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 5 "add_reduce.hpp" 324 44 first +.aggressive_scheduled_block_id 6 +.noswbrkpt + 5870 "01100010" // VMOV bmll1, x8; VADD.f dm1, dm3, dm1, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5871 "00111101" // /* MW 7 */ + 5872 "01100100" // /* MW 6 */ + 5873 "00010001" // /* MW 5 */ + 5874 "11100110" // /* MW 4 */ + 5875 "10010010" // /* MW 3 */ + 5876 "00010000" // /* MW 2 */ + 5877 "00000001" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5878 "11111000" // VMOV bmll3, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5879 "10010010" // /* MW 3 */ + 5880 "00010010" // /* MW 2 */ + 5881 "00011011" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 + 5882 "11111000" // VMOV x8, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5883 "00010010" // /* MW 3 */ + 5884 "00101000" // /* MW 2 */ + 5885 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 5886 "11011000" // VSHIFT x10, x8, x0, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5887 "00011110" // /* MW 3 */ + 5888 "01000000" // /* MW 2 */ + 5889 "00011101" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first +.src_ref 5 "add_reduce.hpp" 324 44 first +.aggressive_scheduled_block_id 7 +.noswbrkpt + 5890 "01100010" // VMOV x8, bmll4; VADD.f dm2, dm2, dm3, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5891 "00111101" // /* MW 7 */ + 5892 "01001100" // /* MW 6 */ + 5893 "00010010" // /* MW 5 */ + 5894 "11100110" // /* MW 4 */ + 5895 "00010010" // /* MW 3 */ + 5896 "00110000" // /* MW 2 */ + 5897 "00000100" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 5898 "11111000" // VMOV bmll3, x10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5899 "10010010" // /* MW 3 */ + 5900 "00010100" // /* MW 2 */ + 5901 "00011011" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.src_ref 5 "add_reduce.hpp" 324 44 first +.aggressive_scheduled_block_id 7 +.noswbrkpt + 5902 "01100010" // VSHIFT x8, x8, x0, r7; VADD.f dm3, dm4, dm3, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5903 "00111101" // /* MW 7 */ + 5904 "10001100" // /* MW 6 */ + 5905 "00010011" // /* MW 5 */ + 5906 "11000110" // /* MW 4 */ + 5907 "00011110" // /* MW 3 */ + 5908 "01000000" // /* MW 2 */ + 5909 "00000100" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5910 "11111000" // VMOV bmll3, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5911 "10010010" // /* MW 3 */ + 5912 "00010000" // /* MW 2 */ + 5913 "00011011" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 5914 "11111000" // VMOV x8, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5915 "00010010" // /* MW 3 */ + 5916 "00100100" // /* MW 2 */ + 5917 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.src_ref 5 "add_reduce.hpp" 324 44 +.aggressive_scheduled_block_id 8 +.noswbrkpt + 5918 "01100010" // VSHIFT x8, x8, x0, r7; VADD.f dm1, dm1, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5919 "00111101" // /* MW 7 */ + 5920 "00110000" // /* MW 6 */ + 5921 "00010001" // /* MW 5 */ + 5922 "11000110" // /* MW 4 */ + 5923 "00011110" // /* MW 3 */ + 5924 "01000000" // /* MW 2 */ + 5925 "00000100" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5926 "11111000" // VMOV bmll4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5927 "10010010" // /* MW 3 */ + 5928 "00010000" // /* MW 2 */ + 5929 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id first + 5930 "11111000" // VMOV x8, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5931 "00010010" // /* MW 3 */ + 5932 "00101000" // /* MW 2 */ + 5933 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.src_ref 5 "add_reduce.hpp" 324 44 +.aggressive_scheduled_block_id 9 +.noswbrkpt + 5934 "01100010" // VSHIFT x8, x8, x0, r0; VADD.f dm2, dm2, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5935 "00111101" // /* MW 7 */ + 5936 "01010000" // /* MW 6 */ + 5937 "00010010" // /* MW 5 */ + 5938 "11000110" // /* MW 4 */ + 5939 "00000010" // /* MW 3 */ + 5940 "01000000" // /* MW 2 */ + 5941 "00000100" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5942 "11111000" // VMOV bmll4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5943 "10010010" // /* MW 3 */ + 5944 "00010000" // /* MW 2 */ + 5945 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first + 5946 "11111000" // VMOV x8, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5947 "00010010" // /* MW 3 */ + 5948 "00101100" // /* MW 2 */ + 5949 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id first + 5950 "11011000" // VSHIFT x8, x8, x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5951 "00000010" // /* MW 3 */ + 5952 "01000000" // /* MW 2 */ + 5953 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first +.src_ref 5 "add_reduce.hpp" 324 44 first +.aggressive_scheduled_block_id 10 +.noswbrkpt + 5954 "01100010" // VMOV x10, bmll1; VADD.f dm3, dm3, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5955 "00111101" // /* MW 7 */ + 5956 "01110000" // /* MW 6 */ + 5957 "00010011" // /* MW 5 */ + 5958 "11100110" // /* MW 4 */ + 5959 "00010010" // /* MW 3 */ + 5960 "00100100" // /* MW 2 */ + 5961 "00000101" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 10 +.nohwbrkpt +.noswbrkpt + 5962 "11111000" // VMOV bmll4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5963 "10010010" // /* MW 3 */ + 5964 "00010000" // /* MW 2 */ + 5965 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.src_ref 5 "add_reduce.hpp" 324 44 first +.aggressive_scheduled_block_id 10 +.noswbrkpt + 5966 "01100010" // VSHIFT x10, x10, x0, r0; VADD.f dm0, dm1, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5967 "00111101" // /* MW 7 */ + 5968 "00110000" // /* MW 6 */ + 5969 "00010000" // /* MW 5 */ + 5970 "11000110" // /* MW 4 */ + 5971 "00000010" // /* MW 3 */ + 5972 "01010000" // /* MW 2 */ + 5973 "00000101" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5974 "11111000" // VMOV bmll4, x10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5975 "10010010" // /* MW 3 */ + 5976 "00010100" // /* MW 2 */ + 5977 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first + 5978 "11111000" // VMOV x8, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5979 "00010010" // /* MW 3 */ + 5980 "00101000" // /* MW 2 */ + 5981 "00011100" // /* MW 1 */ +.src_ref 5 "vector.hpp" 915 23 first + 5982 "10111000" // VEXTRACT.32 r23, x8, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5983 "00000001" // /* MW 3 */ + 5984 "11100010" // /* MW 2 */ + 5985 "00011101" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id first + 5986 "11111000" // VMOV x10, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5987 "00010010" // /* MW 3 */ + 5988 "00101100" // /* MW 2 */ + 5989 "00011101" // /* MW 1 */ +.src_ref 5 "vector.hpp" 915 23 first +.src_ref 5 "vector.hpp" 1288 16 first +.aggressive_scheduled_block_id 11 +.noswbrkpt + 5990 "01110100" // VLDB wh10, [p7, #32]; VEXTRACT.32 r6, x10, #0, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5991 "00000011" // /* MW 5 */ + 5992 "01010100" // /* MW 4 */ + 5993 "10000011" // /* MW 3 */ + 5994 "11010000" // /* MW 2 */ + 5995 "11100010" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 5996 "11111000" // VMOV x11, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5997 "00010010" // /* MW 3 */ + 5998 "10100000" // /* MW 2 */ + 5999 "00011101" // /* MW 1 */ +.src_ref 5 "vector.hpp" 915 23 first +.src_ref 5 "vector.hpp" 1287 41 first +.src_ref 5 "broadcast.hpp" 80 25 first +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 6000 "10110100" // VLDB wl10, [p7]; VEXTBCST.32 x10, x11, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6001 "00000110" // /* MW 5 */ + 6002 "10110100" // /* MW 4 */ + 6003 "10001010" // /* MW 3 */ + 6004 "11010100" // /* MW 2 */ + 6005 "11100000" // /* MW 1 */ +.src_ref 5 "blend.hpp" 163 48 first +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 6006 "00111000" // VSEL.32 x9, x10, x9, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6007 "10100000" // /* MW 3 */ + 6008 "11010100" // /* MW 2 */ + 6009 "00011100" // /* MW 1 */ +.src_ref 5 "vector.hpp" 853 46 first +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 6010 "01111000" // VINSERT.32 x10, x2, #0, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6011 "11010001" // /* MW 3 */ + 6012 "00010000" // /* MW 2 */ + 6013 "00011101" // /* MW 1 */ +.src_ref 5 "vector.hpp" 853 46 +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 6014 "01111000" // VINSERT.32 x8, x2, #0, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6015 "11110001" // /* MW 3 */ + 6016 "00010010" // /* MW 2 */ + 6017 "00011100" // /* MW 1 */ +.src_ref 5 "vector.hpp" 142 95 first +.src_ref 5 "vector.hpp" 1413 19 first +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 6018 "11111000" // VMOV wl11, wl9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6019 "00100010" // /* MW 3 */ + 6020 "11010011" // /* MW 2 */ + 6021 "00011101" // /* MW 1 */ +.src_ref 5 "vector.hpp" 142 95 +.src_ref 5 "vector.hpp" 1413 19 +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 6022 "11111000" // VMOV wh11, wl9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6023 "00100010" // /* MW 3 */ + 6024 "10010011" // /* MW 2 */ + 6025 "00011101" // /* MW 1 */ +.src_ref 5 "vector.hpp" 142 95 +.src_ref 5 "vector.hpp" 1413 19 +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6026 "11111000" // VMOV wh8, wl10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6027 "00100010" // /* MW 3 */ + 6028 "00010101" // /* MW 2 */ + 6029 "00011100" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 first + 6030 "00111000" // VSEL.32 x8, x11, x8, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6031 "00000000" // /* MW 3 */ + 6032 "01011100" // /* MW 2 */ + 6033 "00011100" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 + 6034 "00111000" // VSEL.32 x8, x1, x8, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6035 "00001000" // /* MW 3 */ + 6036 "00001100" // /* MW 2 */ + 6037 "00011100" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 + 6038 "00111000" // VSEL.32 x7, x8, x7, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6039 "10101000" // /* MW 3 */ + 6040 "11000011" // /* MW 2 */ + 6041 "00011011" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 + 6042 "11111000" // VMOV bmll0, x7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6043 "10010010" // /* MW 3 */ + 6044 "00001110" // /* MW 2 */ + 6045 "00011000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 + 6046 "11111000" // VMOV x9, x6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6047 "10010010" // /* MW 3 */ + 6048 "10101100" // /* MW 2 */ + 6049 "00011100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 first +.src_ref 6 "me_vmult_float_emulated.h" 108 19 first +.src_ref 6 "me_vmult_float_emulated.h" 109 21 first + 6050 "00000010" // VCONV.bf16.fp32 wl6, bmll0; VMOV bmll2, x7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6051 "01110000" // /* MW 7 */ + 6052 "01001001" // /* MW 6 */ + 6053 "00000111" // /* MW 5 */ + 6054 "00000001" // /* MW 4 */ + 6055 "11000000" // /* MW 3 */ + 6056 "00000010" // /* MW 2 */ + 6057 "01101000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 + 6058 "11111000" // VMOV x8, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6059 "10010010" // /* MW 3 */ + 6060 "00110010" // /* MW 2 */ + 6061 "00011100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 5 "vector.hpp" 1285 72 +.src_ref 5 "vector.hpp" 1285 72 first +.src_ref 5 "vector.hpp" 1289 16 + 6062 "01011010" // LSHL r29, r22, r28; MOV r27, r29; VMSC.f dm2, dm2, x6, x4, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6063 "10000011" // /* MW 9 */ + 6064 "01001100" // /* MW 8 */ + 6065 "00010010" // /* MW 7 */ + 6066 "00001111" // /* MW 6 */ + 6067 "11101010" // /* MW 5 */ + 6068 "11101101" // /* MW 4 */ + 6069 "11001101" // /* MW 3 */ + 6070 "10111011" // /* MW 2 */ + 6071 "00000101" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 120 9 first +.src_ref 5 "vector.hpp" 1285 72 +.src_ref 5 "vector.hpp" 1289 16 first +.aggressive_scheduled_block_id 12 +.aggressive_scheduled_block_id first + 6072 "01100010" // SEL.EQZ r19, r5, r29, r27; VMUL.f dm1, x6, x5, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6073 "10100001" // /* MW 7 */ + 6074 "11101100" // /* MW 6 */ + 6075 "00010001" // /* MW 5 */ + 6076 "10010001" // /* MW 4 */ + 6077 "00111110" // /* MW 3 */ + 6078 "00001011" // /* MW 2 */ + 6079 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 124 9 first +.src_ref 5 "vector.hpp" 1285 72 first +.src_ref 5 "vector.hpp" 1289 16 +.aggressive_scheduled_block_id 12 +.noswbrkpt + 6080 "01011010" // SEL.EQZ r18, r22, r30, r27; VMOV x6, x8; VMUL.f dm0, x6, x3, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6081 "01100001" // /* MW 9 */ + 6082 "11101100" // /* MW 8 */ + 6083 "00010000" // /* MW 7 */ + 6084 "00101111" // /* MW 6 */ + 6085 "00001001" // /* MW 5 */ + 6086 "00110011" // /* MW 4 */ + 6087 "11100010" // /* MW 3 */ + 6088 "10100101" // /* MW 2 */ + 6089 "00000101" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 125 9 first +.aggressive_scheduled_block_id 12 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6090 "01001000" // VMUL.f dm3, x6, x0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6091 "00000001" // /* MW 3 */ + 6092 "11101100" // /* MW 2 */ + 6093 "00010011" // /* MW 1 */ + 6094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6095 "00000000" // /* MW 1 */ + 6096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6097 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 109 21 first +.src_ref 6 "me_vmult_float_emulated.h" 110 6 first +.src_ref 6 "me_vmult_float_emulated.h" 110 19 first + 6098 "00011000" // VCONV.bf16.fp32 wl9, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6099 "00010110" // /* MW 3 */ + 6100 "11000001" // /* MW 2 */ + 6101 "00001100" // /* MW 1 */ + 6102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6103 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 111 34 first + 6104 "01001000" // VMSC.f dm2, dm2, x9, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6105 "10000011" // /* MW 3 */ + 6106 "01010010" // /* MW 2 */ + 6107 "00010010" // /* MW 1 */ + 6108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6109 "00000000" // /* MW 1 */ + 6110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6111 "00000000" // /* MW 1 */ + 6112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6113 "00000000" // /* MW 1 */ + 6114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6115 "00000000" // /* MW 1 */ + 6116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6117 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 + 6118 "00011000" // VCONV.bf16.fp32 wl8, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6119 "00010110" // /* MW 3 */ + 6120 "01000001" // /* MW 2 */ + 6121 "00001100" // /* MW 1 */ + 6122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6123 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 117 42 first + 6124 "01001000" // VMUL.f dm4, x8, x5, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6125 "10100001" // /* MW 3 */ + 6126 "11110000" // /* MW 2 */ + 6127 "00010100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 118 9 first + 6128 "01001000" // VMUL.f dm2, x8, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6129 "01100001" // /* MW 3 */ + 6130 "11110000" // /* MW 2 */ + 6131 "00010010" // /* MW 1 */ + 6132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6133 "00000000" // /* MW 1 */ + 6134 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6135 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 13 +.aggressive_scheduled_block_id first + 6136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6137 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 119 9 first +.aggressive_scheduled_block_id 13 +.noswbrkpt + 6138 "01001000" // VMUL.f dm2, x9, x5, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6139 "10100001" // /* MW 3 */ + 6140 "11110010" // /* MW 2 */ + 6141 "00010010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 117 42 first +.aggressive_scheduled_block_id 13 +.nohwbrkpt +.noswbrkpt + 6142 "11111000" // VMOV lfl0, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6143 "00010010" // /* MW 3 */ + 6144 "01110000" // /* MW 2 */ + 6145 "00011001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 118 6 first +.aggressive_scheduled_block_id 13 +.nohwbrkpt +.noswbrkpt + 6146 "01001000" // VADD.f dm4, dm4, dm2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6147 "00111101" // /* MW 3 */ + 6148 "10001000" // /* MW 2 */ + 6149 "00010100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 118 6 +.aggressive_scheduled_block_id 13 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6150 "11111000" // VMOV bmll4, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6151 "10010010" // /* MW 3 */ + 6152 "00000101" // /* MW 2 */ + 6153 "00011100" // /* MW 1 */ + 6154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6155 "00000000" // /* MW 1 */ + 6156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6157 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 14 +.aggressive_scheduled_block_id first + 6158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6159 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 122 9 first +.aggressive_scheduled_block_id 14 +.noswbrkpt + 6160 "01001000" // VMUL.f dm2, x0, x8, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6161 "00000001" // /* MW 3 */ + 6162 "11100001" // /* MW 2 */ + 6163 "00010010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 118 6 first +.aggressive_scheduled_block_id 14 +.nohwbrkpt +.noswbrkpt + 6164 "11111000" // VMOV lfh0, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6165 "00010010" // /* MW 3 */ + 6166 "01110000" // /* MW 2 */ + 6167 "00011000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 119 6 first +.aggressive_scheduled_block_id 14 +.nohwbrkpt +.noswbrkpt + 6168 "01001000" // VADD.f dm4, dm4, dm2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6169 "00111101" // /* MW 3 */ + 6170 "10001000" // /* MW 2 */ + 6171 "00010100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 119 6 +.aggressive_scheduled_block_id 14 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6172 "11111000" // VMOV bmll4, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6173 "10010010" // /* MW 3 */ + 6174 "00000001" // /* MW 2 */ + 6175 "00011100" // /* MW 1 */ + 6176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6177 "00000000" // /* MW 1 */ + 6178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6179 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 15 +.aggressive_scheduled_block_id first + 6180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6181 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 121 9 first +.aggressive_scheduled_block_id 15 +.noswbrkpt + 6182 "01001000" // VMUL.f dm1, x9, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6183 "01100001" // /* MW 3 */ + 6184 "11110010" // /* MW 2 */ + 6185 "00010001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 119 6 first +.aggressive_scheduled_block_id 15 +.nohwbrkpt +.noswbrkpt + 6186 "11111000" // VMOV lfl0, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6187 "00010010" // /* MW 3 */ + 6188 "01110000" // /* MW 2 */ + 6189 "00011001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 120 6 first +.aggressive_scheduled_block_id 15 +.nohwbrkpt +.noswbrkpt + 6190 "01001000" // VADD.f dm4, dm4, dm1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6191 "00111101" // /* MW 3 */ + 6192 "10000100" // /* MW 2 */ + 6193 "00010100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 120 6 +.aggressive_scheduled_block_id 15 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6194 "11111000" // VMOV bmll4, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6195 "10010010" // /* MW 3 */ + 6196 "00000101" // /* MW 2 */ + 6197 "00011100" // /* MW 1 */ + 6198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6199 "00000000" // /* MW 1 */ + 6200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6201 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 16 +.aggressive_scheduled_block_id first + 6202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6203 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 123 9 first +.aggressive_scheduled_block_id 16 +.noswbrkpt + 6204 "01001000" // VMUL.f dm1, x9, x0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6205 "00000001" // /* MW 3 */ + 6206 "11110010" // /* MW 2 */ + 6207 "00010001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 120 6 first +.aggressive_scheduled_block_id 16 +.nohwbrkpt +.noswbrkpt + 6208 "11111000" // VMOV lfh0, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6209 "00010010" // /* MW 3 */ + 6210 "01110000" // /* MW 2 */ + 6211 "00011000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 121 6 first +.aggressive_scheduled_block_id 16 +.nohwbrkpt +.noswbrkpt + 6212 "01001000" // VADD.f dm4, dm4, dm1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6213 "00111101" // /* MW 3 */ + 6214 "10000100" // /* MW 2 */ + 6215 "00010100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 121 6 +.aggressive_scheduled_block_id 16 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6216 "11111000" // VMOV bmll4, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6217 "10010010" // /* MW 3 */ + 6218 "00000001" // /* MW 2 */ + 6219 "00011100" // /* MW 1 */ + 6220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6221 "00000000" // /* MW 1 */ + 6222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6223 "00000000" // /* MW 1 */ + 6224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6225 "00000000" // /* MW 1 */ + 6226 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6227 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 121 6 +.aggressive_scheduled_block_id 17 +.aggressive_scheduled_block_id first + 6228 "11111000" // VMOV lfl1, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6229 "00010010" // /* MW 3 */ + 6230 "01110000" // /* MW 2 */ + 6231 "00011101" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 122 6 first +.aggressive_scheduled_block_id 17 +.noswbrkpt + 6232 "01001000" // VADD.f dm2, dm4, dm2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6233 "00111101" // /* MW 3 */ + 6234 "10001000" // /* MW 2 */ + 6235 "00010010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 122 6 +.aggressive_scheduled_block_id 17 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6236 "11111000" // VMOV bmll4, lfl1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6237 "10010010" // /* MW 3 */ + 6238 "00010101" // /* MW 2 */ + 6239 "00011100" // /* MW 1 */ + 6240 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6241 "00000000" // /* MW 1 */ + 6242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6243 "00000000" // /* MW 1 */ + 6244 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6245 "00000000" // /* MW 1 */ + 6246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6247 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 122 6 +.aggressive_scheduled_block_id 18 +.aggressive_scheduled_block_id first + 6248 "11111000" // VMOV lfh1, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6249 "00010010" // /* MW 3 */ + 6250 "01101000" // /* MW 2 */ + 6251 "00011100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 123 6 first +.aggressive_scheduled_block_id 18 +.noswbrkpt + 6252 "01001000" // VADD.f dm2, dm2, dm1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6253 "00111101" // /* MW 3 */ + 6254 "01000100" // /* MW 2 */ + 6255 "00010010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 123 6 +.aggressive_scheduled_block_id 18 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6256 "11111000" // VMOV bmll2, lfh1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6257 "10010010" // /* MW 3 */ + 6258 "00010001" // /* MW 2 */ + 6259 "00011010" // /* MW 1 */ + 6260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6261 "00000000" // /* MW 1 */ + 6262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6263 "00000000" // /* MW 1 */ + 6264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6265 "00000000" // /* MW 1 */ + 6266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6267 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 123 6 +.aggressive_scheduled_block_id 19 +.aggressive_scheduled_block_id first + 6268 "11111000" // VMOV lfl1, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6269 "00010010" // /* MW 3 */ + 6270 "01101000" // /* MW 2 */ + 6271 "00011101" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 124 6 first +.aggressive_scheduled_block_id 19 +.noswbrkpt + 6272 "01001000" // VADD.f dm0, dm1, dm0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6273 "00111101" // /* MW 3 */ + 6274 "00100000" // /* MW 2 */ + 6275 "00010000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 124 6 +.aggressive_scheduled_block_id 19 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6276 "11111000" // VMOV bmll1, lfl1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6277 "10010010" // /* MW 3 */ + 6278 "00010101" // /* MW 2 */ + 6279 "00011001" // /* MW 1 */ + 6280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6281 "00000000" // /* MW 1 */ + 6282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6283 "00000000" // /* MW 1 */ + 6284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6285 "00000000" // /* MW 1 */ + 6286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6287 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 124 6 +.aggressive_scheduled_block_id 20 +.aggressive_scheduled_block_id first + 6288 "11111000" // VMOV lfh1, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6289 "00010010" // /* MW 3 */ + 6290 "01100000" // /* MW 2 */ + 6291 "00011100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 125 6 first +.aggressive_scheduled_block_id 20 +.noswbrkpt + 6292 "01001000" // VADD.f dm0, dm0, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6293 "00111101" // /* MW 3 */ + 6294 "00001100" // /* MW 2 */ + 6295 "00010000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 125 6 +.aggressive_scheduled_block_id 20 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6296 "11111000" // VMOV bmll0, lfh1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6297 "10010010" // /* MW 3 */ + 6298 "00010001" // /* MW 2 */ + 6299 "00011000" // /* MW 1 */ + 6300 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6301 "00000000" // /* MW 1 */ + 6302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6303 "00000000" // /* MW 1 */ + 6304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6305 "00000000" // /* MW 1 */ + 6306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6307 "00000000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1286 72 first +.src_ref 7 "accum.hpp" 1108 103 first + 6308 "00011000" // VCONV.bf16.fp32 wl11, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6309 "00010110" // /* MW 3 */ + 6310 "11000000" // /* MW 2 */ + 6311 "00001101" // /* MW 1 */ + 6312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6313 "00000000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1286 41 + 6314 "11011000" // VSHIFT x11, x0, x11, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6315 "11111110" // /* MW 3 */ + 6316 "10000101" // /* MW 2 */ + 6317 "00011101" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1289 16 first + 6318 "00111000" // VSEL.8 x11, x10, x11, r19:r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6319 "11001100" // /* MW 3 */ + 6320 "11010101" // /* MW 2 */ + 6321 "00011101" // /* MW 1 */ + 6322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6323 "00000000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 57 98 first +.src_ref 5 "vector.hpp" 1292 26 first + 6324 "00110110" // NOPA; NOPB; VST wh11, [p7, #32]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6325 "01000001" // /* MW 11 */ + 6326 "01100101" // /* MW 10 */ + 6327 "10001011" // /* MW 9 */ + 6328 "00000011" // /* MW 8 */ + 6329 "00000000" // /* MW 7 */ + 6330 "00000000" // /* MW 6 */ + 6331 "00100000" // /* MW 5 */ + 6332 "00000000" // /* MW 4 */ + 6333 "11110000" // /* MW 3 */ + 6334 "00101100" // /* MW 2 */ + 6335 "00000000" // /* MW 1 */ +.label ZLE_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1488 +.src_ref 5 "vector.hpp" 57 98 +.src_ref 3 "reduce_mean_c8_impl.h" 268 19 first +.end_of_loop + 6336 "11100001" // NOPA; NOPB; VST wl11, [p7], m4; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6337 "00000000" // /* MW 15 */ + 6338 "00000000" // /* MW 14 */ + 6339 "01111000" // /* MW 13 */ + 6340 "10100101" // /* MW 12 */ + 6341 "00000001" // /* MW 11 */ + 6342 "00000000" // /* MW 10 */ + 6343 "00000000" // /* MW 9 */ + 6344 "10000000" // /* MW 8 */ + 6345 "11101010" // /* MW 7 */ + 6346 "10001010" // /* MW 6 */ + 6347 "00100111" // /* MW 5 */ + 6348 "00000000" // /* MW 4 */ + 6349 "11110000" // /* MW 3 */ + 6350 "00101100" // /* MW 2 */ + 6351 "00000000" // /* MW 1 */ +.loop_nesting 0 + 6352 "10000100" // J #6384 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6384 delay_slots=5 */ + 6353 "00000000" // /* MW 5 */ + 6354 "00000000" // /* MW 4 */ + 6355 "01111000" // /* MW 3 */ + 6356 "00001100" // /* MW 2 */ + 6357 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6359 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6361 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6363 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6365 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6367 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1520 + 6368 "01011100" // ST dn3, [sp, #-4]; MOVX vaddSign0, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6369 "10000000" // /* MW 5 */ + 6370 "10110100" // /* MW 4 */ + 6371 "10110000" // /* MW 3 */ + 6372 "10110100" // /* MW 2 */ + 6373 "11111111" // /* MW 1 */ + 6374 "01111010" // NOPA; ST lr, [sp, #-8]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6375 "00000000" // /* MW 9 */ + 6376 "00000000" // /* MW 8 */ + 6377 "00000000" // /* MW 7 */ + 6378 "10000000" // /* MW 6 */ + 6379 "00111101" // /* MW 5 */ + 6380 "11111000" // /* MW 4 */ + 6381 "11110111" // /* MW 3 */ + 6382 "00101100" // /* MW 2 */ + 6383 "00000000" // /* MW 1 */ +.label __ll133__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E +.src_ref 2 "reduce_base_c8.h" 352 30 first + 6384 "00011000" // ADD.NC p7, r3, #34 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6385 "10010001" // /* MW 3 */ + 6386 "01100001" // /* MW 2 */ + 6387 "00011111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 352 30 + 6388 "11010100" // LDA.u16 r3, [p7]; MOV crMCDEn, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6389 "11000001" // /* MW 5 */ + 6390 "01100100" // /* MW 4 */ + 6391 "01011011" // /* MW 3 */ + 6392 "10001111" // /* MW 2 */ + 6393 "11100000" // /* MW 1 */ +.aggressive_scheduled_block_id 21 +.aggressive_scheduled_block_id first + 6394 "11111000" // MOV crSCDEn, crMCDEn /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6395 "01100000" // /* MW 3 */ + 6396 "01111011" // /* MW 2 */ + 6397 "00011000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 352 30 +.aggressive_scheduled_block_id 21 +.noswbrkpt + 6398 "00011000" // ST.s16 r3, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6399 "01110111" // /* MW 3 */ + 6400 "00000100" // /* MW 2 */ + 6401 "00000111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 353 57 first +.aggressive_scheduled_block_id 21 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6402 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */ + 6403 "00000001" // /* MW 5 */ + 6404 "00000000" // /* MW 4 */ + 6405 "11111000" // /* MW 3 */ + 6406 "00010011" // /* MW 2 */ + 6407 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 21 +.nohwbrkpt +.noswbrkpt + 6408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6409 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 21 +.nohwbrkpt +.noswbrkpt + 6410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6411 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 21 +.nohwbrkpt +.noswbrkpt + 6412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6413 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 352 30 first +.delay_slot +.aggressive_scheduled_block_id 21 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6414 "00011000" // ADD r3, r3, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6415 "00000111" // /* MW 3 */ + 6416 "11000110" // /* MW 2 */ + 6417 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 352 30 +.delay_slot + 6418 "01111110" // NOPA; NOPB; NOPS; EXTEND.u16 r0, r3; NOPM /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 6419 "01100000" // /* MW 13 */ + 6420 "00101011" // /* MW 12 */ + 6421 "00000000" // /* MW 11 */ + 6422 "10101111" // /* MW 10 */ + 6423 "00110100" // /* MW 9 */ + 6424 "00000000" // /* MW 8 */ + 6425 "10110000" // /* MW 7 */ + 6426 "11000000" // /* MW 6 */ + 6427 "00100000" // /* MW 5 */ + 6428 "00000000" // /* MW 4 */ + 6429 "11110000" // /* MW 3 */ + 6430 "00101100" // /* MW 2 */ + 6431 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 420 4 +.return_address + 6432 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6433 "00111001" // /* MW 3 */ + 6434 "11111000" // /* MW 2 */ + 6435 "00000111" // /* MW 1 */ + 6436 "00011000" // LDA p1, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6437 "10011001" // /* MW 3 */ + 6438 "11111100" // /* MW 2 */ + 6439 "00000111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 353 23 first + 6440 "00011000" // ST.s16 r3, [p7, #10] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6441 "01110111" // /* MW 3 */ + 6442 "01010100" // /* MW 2 */ + 6443 "00000111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 420 4 first + 6444 "11000100" // PADDXM [sp], #-256 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6445 "00000001" // /* MW 5 */ + 6446 "00000000" // /* MW 4 */ + 6447 "00000000" // /* MW 3 */ + 6448 "11100000" // /* MW 2 */ + 6449 "11111111" // /* MW 1 */ + 6450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6451 "00000000" // /* MW 1 */ + 6452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6453 "00000000" // /* MW 1 */ + 6454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6455 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 420 4 + 6456 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 6457 "00000000" // /* MW 3 */ + 6458 "00101000" // /* MW 2 */ + 6459 "00010000" // /* MW 1 */ +.delay_slot + 6460 "11111000" // MOV p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6461 "11000000" // /* MW 3 */ + 6462 "01100010" // /* MW 2 */ + 6463 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6465 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6467 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6469 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6470 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6471 "01111110" // /* MW 9 */ + 6472 "10100101" // /* MW 8 */ + 6473 "00000001" // /* MW 7 */ + 6474 "00000000" // /* MW 6 */ + 6475 "00010000" // /* MW 5 */ + 6476 "00000000" // /* MW 4 */ + 6477 "11110000" // /* MW 3 */ + 6478 "00101100" // /* MW 2 */ + 6479 "00000000" // /* MW 1 */ +.label __ll135__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E +.src_ref 5 "blend.hpp" 163 48 + 6480 "10111010" // MOVA r20, #255; J #5568 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=5568 delay_slots=5 */ + 6481 "00100000" // /* MW 9 */ + 6482 "00000000" // /* MW 8 */ + 6483 "00000000" // /* MW 7 */ + 6484 "10111000" // /* MW 6 */ + 6485 "00000010" // /* MW 5 */ + 6486 "00000000" // /* MW 4 */ + 6487 "00000000" // /* MW 3 */ + 6488 "11110100" // /* MW 2 */ + 6489 "00011111" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 +.delay_slot + 6490 "00011000" // MOVX r21, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6491 "00000001" // /* MW 3 */ + 6492 "00101010" // /* MW 2 */ + 6493 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6495 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6497 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6499 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6500 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6501 "10000001" // /* MW 11 */ + 6502 "10101101" // /* MW 10 */ + 6503 "00000000" // /* MW 9 */ + 6504 "00000000" // /* MW 8 */ + 6505 "00000000" // /* MW 7 */ + 6506 "00000000" // /* MW 6 */ + 6507 "00100000" // /* MW 5 */ + 6508 "00000000" // /* MW 4 */ + 6509 "11110000" // /* MW 3 */ + 6510 "00101100" // /* MW 2 */ + 6511 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1664 +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 6512 "00011000" // MOVX r5, #5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6513 "00010101" // /* MW 3 */ + 6514 "00001010" // /* MW 2 */ + 6515 "00010000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 first + 6516 "10011000" // EQ r5, r5, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6517 "01100111" // /* MW 3 */ + 6518 "01001010" // /* MW 2 */ + 6519 "00010001" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 6520 "10000100" // JNZ r5, #7264 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7264 delay_slots=5 */ + 6521 "00000001" // /* MW 5 */ + 6522 "01000000" // /* MW 4 */ + 6523 "00110000" // /* MW 3 */ + 6524 "00001110" // /* MW 2 */ + 6525 "00101000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6527 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6529 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6531 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6533 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6535 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 6536 "00011000" // MOVX r7, #6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6537 "00011001" // /* MW 3 */ + 6538 "00001110" // /* MW 2 */ + 6539 "00010000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 6540 "10011000" // EQ r7, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6541 "01100111" // /* MW 3 */ + 6542 "11001110" // /* MW 2 */ + 6543 "00010001" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 6544 "10000100" // JNZ r7, #7504 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7504 delay_slots=5 */ + 6545 "00000001" // /* MW 5 */ + 6546 "01000000" // /* MW 4 */ + 6547 "10101000" // /* MW 3 */ + 6548 "00001110" // /* MW 2 */ + 6549 "00111000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 200 49 +.delay_slot + 6550 "00011000" // MOVX r5, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6551 "01000001" // /* MW 3 */ + 6552 "00001010" // /* MW 2 */ + 6553 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6555 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6557 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6558 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6559 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6560 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6561 "00000000" // /* MW 15 */ + 6562 "00000000" // /* MW 14 */ + 6563 "01111000" // /* MW 13 */ + 6564 "10100101" // /* MW 12 */ + 6565 "00000001" // /* MW 11 */ + 6566 "00000000" // /* MW 10 */ + 6567 "00000000" // /* MW 9 */ + 6568 "00000000" // /* MW 8 */ + 6569 "01011011" // /* MW 7 */ + 6570 "00000001" // /* MW 6 */ + 6571 "00100000" // /* MW 5 */ + 6572 "00000000" // /* MW 4 */ + 6573 "11110000" // /* MW 3 */ + 6574 "00101100" // /* MW 2 */ + 6575 "00000000" // /* MW 1 */ +.label __ll67__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E +.src_ref 3 "reduce_mean_c8_impl.h" 200 65 first +.src_ref 3 "reduce_mean_c8_impl.h" 202 30 + 6576 "10111010" // LDA.s16 r7, [p2, dj2]; MOVX r17, #7; MOV dc4, lr /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6577 "01111000" // /* MW 9 */ + 6578 "11110000" // /* MW 8 */ + 6579 "01100000" // /* MW 7 */ + 6580 "11101010" // /* MW 6 */ + 6581 "00010000" // /* MW 5 */ + 6582 "00000001" // /* MW 4 */ + 6583 "01010000" // /* MW 3 */ + 6584 "00011110" // /* MW 2 */ + 6585 "01001000" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 +.src_ref 3 "reduce_mean_c8_impl.h" 202 30 first + 6586 "01100100" // NE r6, r17, r6; MOV r17, #257 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6587 "00000101" // /* MW 5 */ + 6588 "10100100" // /* MW 4 */ + 6589 "00011000" // /* MW 3 */ + 6590 "10001101" // /* MW 2 */ + 6591 "10001001" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 202 12 + 6592 "10000100" // JNZ r6, #7232 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7232 delay_slots=5 */ + 6593 "00000001" // /* MW 5 */ + 6594 "01000000" // /* MW 4 */ + 6595 "00100000" // /* MW 3 */ + 6596 "00001110" // /* MW 2 */ + 6597 "00110000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6598 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6599 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6601 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6603 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6605 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 200 49 first +.delay_slot + 6606 "10011000" // ASHL r5, r7, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6607 "01011110" // /* MW 3 */ + 6608 "11001010" // /* MW 2 */ + 6609 "00010001" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 7 "accum.hpp" 199 120 +.src_ref 3 "reduce_mean_c8_impl.h" 206 9 first +.src_ref 3 "reduce_mean_c8_impl.h" 206 35 +.src_ref 3 "reduce_mean_c8_impl.h" 209 22 + 6610 "01110110" // MOVA dj2, #36; MOVS p0, p1; MOVXM ls, #6672 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6611 "00010000" // /* MW 11 */ + 6612 "00001000" // /* MW 10 */ + 6613 "01111101" // /* MW 9 */ + 6614 "00000100" // /* MW 8 */ + 6615 "00000000" // /* MW 7 */ + 6616 "00000000" // /* MW 6 */ + 6617 "10001011" // /* MW 5 */ + 6618 "10000100" // /* MW 4 */ + 6619 "10000000" // /* MW 3 */ + 6620 "10001010" // /* MW 2 */ + 6621 "00000100" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 206 9 +.src_ref 3 "reduce_mean_c8_impl.h" 206 35 + 6622 "01110110" // LDA r7, [p2, dj2]; ST dn3, [sp, #-4]; MOVXM le, #6768 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6623 "00010000" // /* MW 11 */ + 6624 "00111000" // /* MW 10 */ + 6625 "10111101" // /* MW 9 */ + 6626 "00000101" // /* MW 8 */ + 6627 "00000000" // /* MW 7 */ + 6628 "10000000" // /* MW 6 */ + 6629 "10100101" // /* MW 5 */ + 6630 "11111101" // /* MW 4 */ + 6631 "11010111" // /* MW 3 */ + 6632 "00011110" // /* MW 2 */ + 6633 "01001000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 199 120 first +.src_ref 3 "reduce_mean_c8_impl.h" 209 22 first + 6634 "10011000" // VLDA bmll2, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6635 "00010101" // /* MW 3 */ + 6636 "00011101" // /* MW 2 */ + 6637 "00000000" // /* MW 1 */ + 6638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6639 "00000000" // /* MW 1 */ +.src_ref 7 "accum.hpp" 199 120 + 6640 "11111000" // VMOV bmhh4, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6641 "10010010" // /* MW 3 */ + 6642 "11000010" // /* MW 2 */ + 6643 "00011100" // /* MW 1 */ + 6644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6645 "00000000" // /* MW 1 */ +.src_ref 7 "accum.hpp" 199 120 +.src_ref 5 "add.hpp" 28 49 first + 6646 "01100010" // VMOV bmll3, bmhh4; VADD.f dm1, dm3, dm2, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6647 "00111101" // /* MW 7 */ + 6648 "01101000" // /* MW 6 */ + 6649 "00010001" // /* MW 5 */ + 6650 "11100110" // /* MW 4 */ + 6651 "00010010" // /* MW 3 */ + 6652 "00010011" // /* MW 2 */ + 6653 "00000011" // /* MW 1 */ + 6654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6655 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 206 9 first + 6656 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC lc, r7, #-1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6657 "00000000" // /* MW 15 */ + 6658 "00000000" // /* MW 14 */ + 6659 "11001000" // /* MW 13 */ + 6660 "11111111" // /* MW 12 */ + 6661 "10111001" // /* MW 11 */ + 6662 "00000010" // /* MW 10 */ + 6663 "00000000" // /* MW 9 */ + 6664 "00000000" // /* MW 8 */ + 6665 "01011011" // /* MW 7 */ + 6666 "00000001" // /* MW 6 */ + 6667 "00100000" // /* MW 5 */ + 6668 "00000000" // /* MW 4 */ + 6669 "11110000" // /* MW 3 */ + 6670 "00101100" // /* MW 2 */ + 6671 "00000000" // /* MW 1 */ +.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1824 +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 199 120 first +.src_ref 3 "reduce_mean_c8_impl.h" 209 22 first +.begin_of_loop +.loop_nesting 1 + 6672 "11100001" // VLDA bmll2, [p0], #64; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6673 "00000000" // /* MW 15 */ + 6674 "00000000" // /* MW 14 */ + 6675 "01111000" // /* MW 13 */ + 6676 "10100101" // /* MW 12 */ + 6677 "00000001" // /* MW 11 */ + 6678 "00000000" // /* MW 10 */ + 6679 "00000000" // /* MW 9 */ + 6680 "00000000" // /* MW 8 */ + 6681 "01011011" // /* MW 7 */ + 6682 "00000001" // /* MW 6 */ + 6683 "00100000" // /* MW 5 */ + 6684 "00000000" // /* MW 4 */ + 6685 "10110000" // /* MW 3 */ + 6686 "10100010" // /* MW 2 */ + 6687 "00000011" // /* MW 1 */ + 6688 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6689 "00000000" // /* MW 15 */ + 6690 "00000000" // /* MW 14 */ + 6691 "01111000" // /* MW 13 */ + 6692 "10100101" // /* MW 12 */ + 6693 "00000001" // /* MW 11 */ + 6694 "00000000" // /* MW 10 */ + 6695 "00000000" // /* MW 9 */ + 6696 "00000000" // /* MW 8 */ + 6697 "01011011" // /* MW 7 */ + 6698 "00000001" // /* MW 6 */ + 6699 "00100000" // /* MW 5 */ + 6700 "00000000" // /* MW 4 */ + 6701 "11110000" // /* MW 3 */ + 6702 "00101100" // /* MW 2 */ + 6703 "00000000" // /* MW 1 */ + 6704 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6705 "00000000" // /* MW 15 */ + 6706 "00000000" // /* MW 14 */ + 6707 "01111000" // /* MW 13 */ + 6708 "10100101" // /* MW 12 */ + 6709 "00000001" // /* MW 11 */ + 6710 "00000000" // /* MW 10 */ + 6711 "00000000" // /* MW 9 */ + 6712 "00000000" // /* MW 8 */ + 6713 "01011011" // /* MW 7 */ + 6714 "00000001" // /* MW 6 */ + 6715 "00100000" // /* MW 5 */ + 6716 "00000000" // /* MW 4 */ + 6717 "11110000" // /* MW 3 */ + 6718 "00101100" // /* MW 2 */ + 6719 "00000000" // /* MW 1 */ + 6720 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6721 "00000000" // /* MW 15 */ + 6722 "00000000" // /* MW 14 */ + 6723 "01111000" // /* MW 13 */ + 6724 "10100101" // /* MW 12 */ + 6725 "00000001" // /* MW 11 */ + 6726 "00000000" // /* MW 10 */ + 6727 "00000000" // /* MW 9 */ + 6728 "00000000" // /* MW 8 */ + 6729 "01011011" // /* MW 7 */ + 6730 "00000001" // /* MW 6 */ + 6731 "00100000" // /* MW 5 */ + 6732 "00000000" // /* MW 4 */ + 6733 "11110000" // /* MW 3 */ + 6734 "00101100" // /* MW 2 */ + 6735 "00000000" // /* MW 1 */ +.src_ref 7 "accum.hpp" 150 115 first +.aggressive_scheduled_block_id 22 +.aggressive_scheduled_block_id first + 6736 "11100001" // NOPA; NOPB; NOPS; NOPX; VMOV bmhh4, bmll1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6737 "00000000" // /* MW 15 */ + 6738 "00000000" // /* MW 14 */ + 6739 "01111000" // /* MW 13 */ + 6740 "00001001" // /* MW 12 */ + 6741 "01100010" // /* MW 11 */ + 6742 "00000010" // /* MW 10 */ + 6743 "00000000" // /* MW 9 */ + 6744 "00000000" // /* MW 8 */ + 6745 "01011011" // /* MW 7 */ + 6746 "00000001" // /* MW 6 */ + 6747 "00100000" // /* MW 5 */ + 6748 "00000000" // /* MW 4 */ + 6749 "11110000" // /* MW 3 */ + 6750 "00101100" // /* MW 2 */ + 6751 "00000000" // /* MW 1 */ +.src_ref 5 "add.hpp" 28 49 first +.aggressive_scheduled_block_id 22 +.noswbrkpt + 6752 "11101011" // NOPA; NOPB; NOPS; NOPX; NOPM; VADD.f dm1, dm3, dm2, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6753 "01000001" // /* MW 15 */ + 6754 "10001011" // /* MW 14 */ + 6755 "01111000" // /* MW 13 */ + 6756 "10100101" // /* MW 12 */ + 6757 "00000001" // /* MW 11 */ + 6758 "00000000" // /* MW 10 */ + 6759 "00000000" // /* MW 9 */ + 6760 "00000000" // /* MW 8 */ + 6761 "01011011" // /* MW 7 */ + 6762 "00000001" // /* MW 6 */ + 6763 "00100000" // /* MW 5 */ + 6764 "00000000" // /* MW 4 */ + 6765 "11110000" // /* MW 3 */ + 6766 "00101100" // /* MW 2 */ + 6767 "00000000" // /* MW 1 */ +.label ZLE_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1920 +.src_ref 7 "accum.hpp" 199 120 first +.end_of_loop +.aggressive_scheduled_block_id 22 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6768 "11100001" // NOPA; NOPB; NOPS; NOPX; VMOV bmll3, bmhh4; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6769 "00000000" // /* MW 15 */ + 6770 "00000000" // /* MW 14 */ + 6771 "01111000" // /* MW 13 */ + 6772 "10001001" // /* MW 12 */ + 6773 "10001001" // /* MW 11 */ + 6774 "00000001" // /* MW 10 */ + 6775 "00000000" // /* MW 9 */ + 6776 "00000000" // /* MW 8 */ + 6777 "01011011" // /* MW 7 */ + 6778 "00000001" // /* MW 6 */ + 6779 "00100000" // /* MW 5 */ + 6780 "00000000" // /* MW 4 */ + 6781 "11110000" // /* MW 3 */ + 6782 "00101100" // /* MW 2 */ + 6783 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 5 "add_reduce.hpp" 322 47 +.aggressive_scheduled_block_id 23 +.aggressive_scheduled_block_id first +.loop_nesting 0 + 6784 "10111010" // MOVA r16, #16; MOVXM p7, #509168 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6785 "00010000" // /* MW 9 */ + 6786 "01111000" // /* MW 8 */ + 6787 "10110010" // /* MW 7 */ + 6788 "11110011" // /* MW 6 */ + 6789 "00000001" // /* MW 5 */ + 6790 "00000000" // /* MW 4 */ + 6791 "00000000" // /* MW 3 */ + 6792 "00010000" // /* MW 2 */ + 6793 "00000010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 19 first +.src_ref 5 "vector.hpp" 915 23 +.src_ref 5 "add_reduce.hpp" 322 47 +.aggressive_scheduled_block_id 23 +.noswbrkpt + 6794 "10111010" // LDA.s8 r4, [p7]; MOVX r6, #8; MOV vaddSign0, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6795 "01011000" // /* MW 9 */ + 6796 "00000001" // /* MW 8 */ + 6797 "10011000" // /* MW 7 */ + 6798 "00001000" // /* MW 6 */ + 6799 "01100001" // /* MW 5 */ + 6800 "00000000" // /* MW 4 */ + 6801 "01010000" // /* MW 3 */ + 6802 "10010000" // /* MW 2 */ + 6803 "11100000" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 +.aggressive_scheduled_block_id 23 +.nohwbrkpt +.noswbrkpt + 6804 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6805 "00000101" // /* MW 3 */ + 6806 "00100010" // /* MW 2 */ + 6807 "00010000" // /* MW 1 */ +.aggressive_scheduled_block_id 23 +.nohwbrkpt +.noswbrkpt + 6808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6809 "00000000" // /* MW 1 */ +.src_ref 7 "accum.hpp" 150 115 first +.aggressive_scheduled_block_id 23 +.nohwbrkpt +.noswbrkpt + 6810 "11111000" // VMOV bmhh4, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6811 "00010010" // /* MW 3 */ + 6812 "11000100" // /* MW 2 */ + 6813 "00011100" // /* MW 1 */ +.aggressive_scheduled_block_id 23 +.nohwbrkpt +.noswbrkpt + 6814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6815 "00000000" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 +.aggressive_scheduled_block_id 23 +.nohwbrkpt +.noswbrkpt + 6816 "11111000" // VMOV x2, bmhh4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6817 "00010010" // /* MW 3 */ + 6818 "00110011" // /* MW 2 */ + 6819 "00011001" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.aggressive_scheduled_block_id 23 +.nohwbrkpt +.noswbrkpt + 6820 "11011000" // VSHIFT x2, x2, x0, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6821 "00010010" // /* MW 3 */ + 6822 "00010000" // /* MW 2 */ + 6823 "00011001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 112 6 +.src_ref 6 "me_vmult_float_emulated.h" 112 19 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 114 6 +.src_ref 6 "me_vmult_float_emulated.h" 114 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 6 +.src_ref 6 "me_vmult_float_emulated.h" 115 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 1108 103 +.src_ref 5 "add_reduce.hpp" 324 44 first +.aggressive_scheduled_block_id 23 +.noswbrkpt + 6824 "01011010" // MOVX crRnd, r4; VMOV bmll0, x2; VADD.f dm0, dm2, dm0, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6825 "00111101" // /* MW 9 */ + 6826 "01000000" // /* MW 8 */ + 6827 "00010000" // /* MW 7 */ + 6828 "00101111" // /* MW 6 */ + 6829 "01001001" // /* MW 5 */ + 6830 "00000000" // /* MW 4 */ + 6831 "10000000" // /* MW 3 */ + 6832 "00111010" // /* MW 2 */ + 6833 "00000001" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 +.aggressive_scheduled_block_id 23 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6834 "11111000" // VMOV bmll2, bmhh4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6835 "00010010" // /* MW 3 */ + 6836 "00010011" // /* MW 2 */ + 6837 "00011010" // /* MW 1 */ +.src_ref 5 "broadcast.hpp" 80 25 first + 6838 "11111000" // VBCST.32 x2, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6839 "01110010" // /* MW 3 */ + 6840 "00010110" // /* MW 2 */ + 6841 "00011001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 112 6 +.src_ref 6 "me_vmult_float_emulated.h" 112 19 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 + 6842 "11111000" // VMOV bmll1, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6843 "10010010" // /* MW 3 */ + 6844 "00000100" // /* MW 2 */ + 6845 "00011001" // /* MW 1 */ + 6846 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6847 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 113 21 first + 6848 "11111000" // VMOV bmll2, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6849 "00010010" // /* MW 3 */ + 6850 "00000100" // /* MW 2 */ + 6851 "00011010" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first +.aggressive_scheduled_block_id 24 +.aggressive_scheduled_block_id first + 6852 "11111000" // VMOV x2, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6853 "00010010" // /* MW 3 */ + 6854 "00100000" // /* MW 2 */ + 6855 "00011001" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.src_ref 5 "add_reduce.hpp" 324 44 +.aggressive_scheduled_block_id 24 +.noswbrkpt + 6856 "01100010" // VSHIFT x2, x2, x0, r16; VADD.f dm0, dm0, dm3, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6857 "00111101" // /* MW 7 */ + 6858 "00001100" // /* MW 6 */ + 6859 "00010000" // /* MW 5 */ + 6860 "11000110" // /* MW 4 */ + 6861 "01000010" // /* MW 3 */ + 6862 "00010000" // /* MW 2 */ + 6863 "00000001" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 24 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6864 "11111000" // VMOV bmll3, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6865 "10010010" // /* MW 3 */ + 6866 "00000100" // /* MW 2 */ + 6867 "00011011" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 114 6 +.src_ref 6 "me_vmult_float_emulated.h" 114 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 6 +.src_ref 6 "me_vmult_float_emulated.h" 115 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 + 6868 "11111000" // VMOV x2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6869 "10010010" // /* MW 3 */ + 6870 "00100000" // /* MW 2 */ + 6871 "00011001" // /* MW 1 */ + 6872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6873 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 112 6 first +.src_ref 6 "me_vmult_float_emulated.h" 112 19 first + 6874 "00011000" // VCONV.bf16.fp32 wl0, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6875 "10010110" // /* MW 3 */ + 6876 "01000000" // /* MW 2 */ + 6877 "00001000" // /* MW 1 */ + 6878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6879 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 113 21 first +.src_ref 5 "add_reduce.hpp" 324 22 first +.aggressive_scheduled_block_id 25 +.aggressive_scheduled_block_id first + 6880 "01100010" // VMOV x3, bmll0; VMSC.f dm4, dm2, x0, x4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6881 "10000011" // /* MW 7 */ + 6882 "01000000" // /* MW 6 */ + 6883 "00010100" // /* MW 5 */ + 6884 "11100110" // /* MW 4 */ + 6885 "00010010" // /* MW 3 */ + 6886 "10100000" // /* MW 2 */ + 6887 "00000001" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.src_ref 5 "add_reduce.hpp" 324 44 +.aggressive_scheduled_block_id 25 +.noswbrkpt + 6888 "01100010" // VSHIFT x3, x3, x0, r6; VADD.f dm0, dm0, dm2, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6889 "00111101" // /* MW 7 */ + 6890 "00001000" // /* MW 6 */ + 6891 "00010000" // /* MW 5 */ + 6892 "11000110" // /* MW 4 */ + 6893 "00011010" // /* MW 3 */ + 6894 "10011000" // /* MW 2 */ + 6895 "00000001" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 25 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6896 "11111000" // VMOV bmll2, x3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6897 "10010010" // /* MW 3 */ + 6898 "00000110" // /* MW 2 */ + 6899 "00011010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 115 6 +.src_ref 6 "me_vmult_float_emulated.h" 115 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 + 6900 "11111000" // VMOV x3, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6901 "10010010" // /* MW 3 */ + 6902 "10100100" // /* MW 2 */ + 6903 "00011001" // /* MW 1 */ + 6904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6905 "00000000" // /* MW 1 */ + 6906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6907 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 113 21 first +.src_ref 6 "me_vmult_float_emulated.h" 114 6 first +.src_ref 6 "me_vmult_float_emulated.h" 114 19 first + 6908 "00011000" // VCONV.bf16.fp32 wl2, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6909 "00010110" // /* MW 3 */ + 6910 "01000010" // /* MW 2 */ + 6911 "00001001" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first +.aggressive_scheduled_block_id 26 +.aggressive_scheduled_block_id first + 6912 "11111000" // VMOV x5, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6913 "00010010" // /* MW 3 */ + 6914 "10100000" // /* MW 2 */ + 6915 "00011010" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.src_ref 5 "add_reduce.hpp" 324 44 +.aggressive_scheduled_block_id 26 +.noswbrkpt + 6916 "01100010" // VSHIFT x6, x5, x0, r0; VADD.f dm0, dm0, dm2, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6917 "00111101" // /* MW 7 */ + 6918 "00001000" // /* MW 6 */ + 6919 "00010000" // /* MW 5 */ + 6920 "11000110" // /* MW 4 */ + 6921 "00000010" // /* MW 3 */ + 6922 "00101000" // /* MW 2 */ + 6923 "00000011" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 26 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6924 "11111000" // VMOV bmll2, x6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6925 "10010010" // /* MW 3 */ + 6926 "00001100" // /* MW 2 */ + 6927 "00011010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 + 6928 "11111000" // VMOV x5, x3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6929 "10010010" // /* MW 3 */ + 6930 "10100110" // /* MW 2 */ + 6931 "00011010" // /* MW 1 */ + 6932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6933 "00000000" // /* MW 1 */ + 6934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6935 "00000000" // /* MW 1 */ + 6936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6937 "00000000" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first + 6938 "11111000" // VMOV x6, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6939 "00010010" // /* MW 3 */ + 6940 "00100000" // /* MW 2 */ + 6941 "00011011" // /* MW 1 */ +.src_ref 5 "vector.hpp" 915 23 first + 6942 "10111000" // VEXTRACT.32 r0, x6, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6943 "00000001" // /* MW 3 */ + 6944 "00011010" // /* MW 2 */ + 6945 "00011000" // /* MW 1 */ + 6946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6947 "00000000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 856 23 first + 6948 "01111000" // VINSERT.32 x6, x0, #0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6949 "00010001" // /* MW 3 */ + 6950 "00000000" // /* MW 2 */ + 6951 "00011011" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 first + 6952 "00111000" // VSEL.32 x1, x1, x6, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6953 "00001000" // /* MW 3 */ + 6954 "10001011" // /* MW 2 */ + 6955 "00011000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 + 6956 "11111000" // VMOV bmll2, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6957 "10010010" // /* MW 3 */ + 6958 "00000010" // /* MW 2 */ + 6959 "00011010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 + 6960 "11111000" // VMOV x1, x5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6961 "10010010" // /* MW 3 */ + 6962 "10101010" // /* MW 2 */ + 6963 "00011000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 first +.src_ref 6 "me_vmult_float_emulated.h" 108 19 first +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 + 6964 "00000010" // VCONV.bf16.fp32 wl5, bmll2; VMOV x6, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6965 "01110000" // /* MW 7 */ + 6966 "01001001" // /* MW 6 */ + 6967 "10010001" // /* MW 5 */ + 6968 "00000001" // /* MW 4 */ + 6969 "11000000" // /* MW 3 */ + 6970 "00100010" // /* MW 2 */ + 6971 "01011000" // /* MW 1 */ + 6972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6973 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 109 21 first + 6974 "01001000" // VMSC.f dm1, dm2, x5, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6975 "10000011" // /* MW 3 */ + 6976 "01001010" // /* MW 2 */ + 6977 "00010001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 124 9 first + 6978 "01001000" // VMUL.f dm0, x5, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6979 "01000001" // /* MW 3 */ + 6980 "11101010" // /* MW 2 */ + 6981 "00010000" // /* MW 1 */ + 6982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6983 "00000000" // /* MW 1 */ + 6984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6985 "00000000" // /* MW 1 */ + 6986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6987 "00000000" // /* MW 1 */ + 6988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6989 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 109 21 first +.src_ref 6 "me_vmult_float_emulated.h" 110 6 first +.src_ref 6 "me_vmult_float_emulated.h" 110 19 first + 6990 "00011000" // VCONV.bf16.fp32 wl1, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6991 "10010110" // /* MW 3 */ + 6992 "11000000" // /* MW 2 */ + 6993 "00001000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 115 34 first + 6994 "01001000" // VMSC.f dm4, dm4, x2, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6995 "10000011" // /* MW 3 */ + 6996 "10000100" // /* MW 2 */ + 6997 "00010100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 111 34 first + 6998 "01001000" // VMSC.f dm3, dm1, x1, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6999 "10000011" // /* MW 3 */ + 7000 "00100010" // /* MW 2 */ + 7001 "00010011" // /* MW 1 */ + 7002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7003 "00000000" // /* MW 1 */ + 7004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7005 "00000000" // /* MW 1 */ + 7006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7007 "00000000" // /* MW 1 */ + 7008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7009 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 115 6 first +.src_ref 6 "me_vmult_float_emulated.h" 115 19 first +.src_ref 6 "me_vmult_float_emulated.h" 115 34 first + 7010 "00011000" // VCONV.bf16.fp32 wl3, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7011 "00010110" // /* MW 3 */ + 7012 "11000010" // /* MW 2 */ + 7013 "00001001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 111 6 first +.src_ref 6 "me_vmult_float_emulated.h" 111 19 first +.src_ref 6 "me_vmult_float_emulated.h" 111 34 first + 7014 "00011000" // VCONV.bf16.fp32 wl6, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7015 "10010110" // /* MW 3 */ + 7016 "01000001" // /* MW 2 */ + 7017 "00001011" // /* MW 1 */ + 7018 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7019 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 117 42 first + 7020 "01001000" // VMUL.f dm2, x6, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7021 "01100001" // /* MW 3 */ + 7022 "11101100" // /* MW 2 */ + 7023 "00010010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 118 9 first + 7024 "01001000" // VMUL.f dm3, x6, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7025 "01000001" // /* MW 3 */ + 7026 "11101100" // /* MW 2 */ + 7027 "00010011" // /* MW 1 */ + 7028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7029 "00000000" // /* MW 1 */ + 7030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7031 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 27 +.aggressive_scheduled_block_id first + 7032 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7033 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 119 9 first +.aggressive_scheduled_block_id 27 +.noswbrkpt + 7034 "01001000" // VMUL.f dm3, x1, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7035 "01100001" // /* MW 3 */ + 7036 "11100010" // /* MW 2 */ + 7037 "00010011" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 117 42 first +.aggressive_scheduled_block_id 27 +.nohwbrkpt +.noswbrkpt + 7038 "11111000" // VMOV lfl0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7039 "00010010" // /* MW 3 */ + 7040 "01101000" // /* MW 2 */ + 7041 "00011001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 118 6 first +.aggressive_scheduled_block_id 27 +.nohwbrkpt +.noswbrkpt + 7042 "01001000" // VADD.f dm2, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7043 "00111101" // /* MW 3 */ + 7044 "01001100" // /* MW 2 */ + 7045 "00010010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 118 6 +.aggressive_scheduled_block_id 27 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7046 "11111000" // VMOV bmll2, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7047 "10010010" // /* MW 3 */ + 7048 "00000101" // /* MW 2 */ + 7049 "00011010" // /* MW 1 */ + 7050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7051 "00000000" // /* MW 1 */ + 7052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7053 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 28 +.aggressive_scheduled_block_id first + 7054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7055 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 120 9 first +.aggressive_scheduled_block_id 28 +.noswbrkpt + 7056 "01001000" // VMUL.f dm3, x5, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7057 "01100001" // /* MW 3 */ + 7058 "11101010" // /* MW 2 */ + 7059 "00010011" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 118 6 first +.aggressive_scheduled_block_id 28 +.nohwbrkpt +.noswbrkpt + 7060 "11111000" // VMOV lfh0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7061 "00010010" // /* MW 3 */ + 7062 "01101000" // /* MW 2 */ + 7063 "00011000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 119 6 first +.aggressive_scheduled_block_id 28 +.nohwbrkpt +.noswbrkpt + 7064 "01001000" // VADD.f dm2, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7065 "00111101" // /* MW 3 */ + 7066 "01001100" // /* MW 2 */ + 7067 "00010010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 119 6 +.aggressive_scheduled_block_id 28 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7068 "11111000" // VMOV bmll2, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7069 "10010010" // /* MW 3 */ + 7070 "00000001" // /* MW 2 */ + 7071 "00011010" // /* MW 1 */ + 7072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7073 "00000000" // /* MW 1 */ + 7074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7075 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 29 +.aggressive_scheduled_block_id first + 7076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7077 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 121 9 first +.aggressive_scheduled_block_id 29 +.noswbrkpt + 7078 "01001000" // VMUL.f dm3, x1, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7079 "01000001" // /* MW 3 */ + 7080 "11100010" // /* MW 2 */ + 7081 "00010011" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 119 6 first +.aggressive_scheduled_block_id 29 +.nohwbrkpt +.noswbrkpt + 7082 "11111000" // VMOV lfl0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7083 "00010010" // /* MW 3 */ + 7084 "01101000" // /* MW 2 */ + 7085 "00011001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 120 6 first +.aggressive_scheduled_block_id 29 +.nohwbrkpt +.noswbrkpt + 7086 "01001000" // VADD.f dm2, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7087 "00111101" // /* MW 3 */ + 7088 "01001100" // /* MW 2 */ + 7089 "00010010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 120 6 +.aggressive_scheduled_block_id 29 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7090 "11111000" // VMOV bmll2, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7091 "10010010" // /* MW 3 */ + 7092 "00000101" // /* MW 2 */ + 7093 "00011010" // /* MW 1 */ + 7094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7095 "00000000" // /* MW 1 */ + 7096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7097 "00000000" // /* MW 1 */ + 7098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7099 "00000000" // /* MW 1 */ + 7100 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7101 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 120 6 +.aggressive_scheduled_block_id 30 +.aggressive_scheduled_block_id first + 7102 "11111000" // VMOV lfh0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7103 "00010010" // /* MW 3 */ + 7104 "01101000" // /* MW 2 */ + 7105 "00011000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 121 6 first +.aggressive_scheduled_block_id 30 +.noswbrkpt + 7106 "01001000" // VADD.f dm2, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7107 "00111101" // /* MW 3 */ + 7108 "01001100" // /* MW 2 */ + 7109 "00010010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 121 6 +.aggressive_scheduled_block_id 30 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7110 "11111000" // VMOV bmll2, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7111 "10010010" // /* MW 3 */ + 7112 "00000001" // /* MW 2 */ + 7113 "00011010" // /* MW 1 */ + 7114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7115 "00000000" // /* MW 1 */ + 7116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7117 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 122 9 first + 7118 "01001000" // VMUL.f dm3, x0, x6, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7119 "11000001" // /* MW 3 */ + 7120 "11100000" // /* MW 2 */ + 7121 "00010011" // /* MW 1 */ + 7122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7123 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 121 6 first +.aggressive_scheduled_block_id 31 +.aggressive_scheduled_block_id first + 7124 "11111000" // VMOV lfl0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7125 "00010010" // /* MW 3 */ + 7126 "01101000" // /* MW 2 */ + 7127 "00011001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 122 6 first +.aggressive_scheduled_block_id 31 +.noswbrkpt + 7128 "01001000" // VADD.f dm3, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7129 "00111101" // /* MW 3 */ + 7130 "01001100" // /* MW 2 */ + 7131 "00010011" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 122 6 +.aggressive_scheduled_block_id 31 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7132 "11111000" // VMOV bmll2, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7133 "10010010" // /* MW 3 */ + 7134 "00000101" // /* MW 2 */ + 7135 "00011010" // /* MW 1 */ + 7136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7137 "00000000" // /* MW 1 */ + 7138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7139 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 123 9 first + 7140 "01001000" // VMUL.f dm1, x1, x0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7141 "00000001" // /* MW 3 */ + 7142 "11100010" // /* MW 2 */ + 7143 "00010001" // /* MW 1 */ + 7144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7145 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 122 6 first +.aggressive_scheduled_block_id 32 +.aggressive_scheduled_block_id first + 7146 "11111000" // VMOV lfh0, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7147 "00010010" // /* MW 3 */ + 7148 "01101100" // /* MW 2 */ + 7149 "00011000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 123 6 first +.aggressive_scheduled_block_id 32 +.noswbrkpt + 7150 "01001000" // VADD.f dm1, dm2, dm1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7151 "00111101" // /* MW 3 */ + 7152 "01000100" // /* MW 2 */ + 7153 "00010001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 123 6 +.aggressive_scheduled_block_id 32 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7154 "11111000" // VMOV bmll2, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7155 "10010010" // /* MW 3 */ + 7156 "00000001" // /* MW 2 */ + 7157 "00011010" // /* MW 1 */ + 7158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7159 "00000000" // /* MW 1 */ + 7160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7161 "00000000" // /* MW 1 */ + 7162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7163 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 33 +.aggressive_scheduled_block_id first + 7164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7165 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 123 6 +.src_ref 6 "me_vmult_float_emulated.h" 124 6 first +.aggressive_scheduled_block_id 33 +.noswbrkpt + 7166 "01100010" // VMOV x0, bmll1; VADD.f dm0, dm2, dm0, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7167 "00111101" // /* MW 7 */ + 7168 "01000000" // /* MW 6 */ + 7169 "00010000" // /* MW 5 */ + 7170 "11100110" // /* MW 4 */ + 7171 "00010010" // /* MW 3 */ + 7172 "00100100" // /* MW 2 */ + 7173 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 124 6 +.src_ref 6 "me_vmult_float_emulated.h" 125 9 first +.aggressive_scheduled_block_id 33 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7174 "01100010" // VMOV bmll2, x0; VMUL.f dm4, x5, x0, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7175 "00000001" // /* MW 7 */ + 7176 "11101010" // /* MW 6 */ + 7177 "00010100" // /* MW 5 */ + 7178 "11100110" // /* MW 4 */ + 7179 "10010010" // /* MW 3 */ + 7180 "00000000" // /* MW 2 */ + 7181 "00000010" // /* MW 1 */ + 7182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7183 "00000000" // /* MW 1 */ + 7184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7185 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 34 +.aggressive_scheduled_block_id first + 7186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7187 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 125 6 +.aggressive_scheduled_block_id 34 +.noswbrkpt + 7188 "01001000" // VADD.f dm0, dm2, dm4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7189 "00111101" // /* MW 3 */ + 7190 "01010000" // /* MW 2 */ + 7191 "00010000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 124 6 first +.aggressive_scheduled_block_id 34 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7192 "11111000" // VMOV bmll2, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7193 "00010010" // /* MW 3 */ + 7194 "00000000" // /* MW 2 */ + 7195 "00011010" // /* MW 1 */ + 7196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7197 "00000000" // /* MW 1 */ + 7198 "10000100" // J #6384 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6384 delay_slots=5 */ + 7199 "00000000" // /* MW 5 */ + 7200 "00000000" // /* MW 4 */ + 7201 "01111000" // /* MW 3 */ + 7202 "00001100" // /* MW 2 */ + 7203 "00000000" // /* MW 1 */ +.delay_slot + 7204 "10011000" // ST dc4, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7205 "01100101" // /* MW 3 */ + 7206 "11111010" // /* MW 2 */ + 7207 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7209 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 125 6 first +.delay_slot + 7210 "11111000" // VMOV bmll2, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7211 "00010010" // /* MW 3 */ + 7212 "00000000" // /* MW 2 */ + 7213 "00011010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7215 "00000000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1159 33 first +.src_ref 7 "accum.hpp" 1108 103 first +.delay_slot + 7216 "11100001" // NOPA; NOPB; VST.CONV.bf16.fp32 bmll2, [p1];NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7217 "00000000" // /* MW 15 */ + 7218 "00000000" // /* MW 14 */ + 7219 "01111000" // /* MW 13 */ + 7220 "10100101" // /* MW 12 */ + 7221 "00000001" // /* MW 11 */ + 7222 "00000000" // /* MW 10 */ + 7223 "00000000" // /* MW 9 */ + 7224 "10000000" // /* MW 8 */ + 7225 "00010010" // /* MW 7 */ + 7226 "00000101" // /* MW 6 */ + 7227 "00100001" // /* MW 5 */ + 7228 "00000000" // /* MW 4 */ + 7229 "11110000" // /* MW 3 */ + 7230 "00101100" // /* MW 2 */ + 7231 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2384 +.src_ref 5 "blend.hpp" 163 48 + 7232 "10111010" // MOVA r20, #0; J #5616 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=5616 delay_slots=5 */ + 7233 "00100000" // /* MW 9 */ + 7234 "00000000" // /* MW 8 */ + 7235 "00000000" // /* MW 7 */ + 7236 "10111110" // /* MW 6 */ + 7237 "00000010" // /* MW 5 */ + 7238 "00000000" // /* MW 4 */ + 7239 "00000000" // /* MW 3 */ + 7240 "00010100" // /* MW 2 */ + 7241 "00000000" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 +.delay_slot + 7242 "00011000" // MOVX r21, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7243 "00000001" // /* MW 3 */ + 7244 "00101010" // /* MW 2 */ + 7245 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7247 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7248 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7249 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7251 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7252 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7253 "10000001" // /* MW 11 */ + 7254 "10101101" // /* MW 10 */ + 7255 "00000000" // /* MW 9 */ + 7256 "00000000" // /* MW 8 */ + 7257 "00000000" // /* MW 7 */ + 7258 "00000000" // /* MW 6 */ + 7259 "00100000" // /* MW 5 */ + 7260 "00000000" // /* MW 4 */ + 7261 "11110000" // /* MW 3 */ + 7262 "00101100" // /* MW 2 */ + 7263 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2416 + 7264 "10000100" // J #7456 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=7456 delay_slots=5 */ + 7265 "00000000" // /* MW 5 */ + 7266 "00000000" // /* MW 4 */ + 7267 "10010000" // /* MW 3 */ + 7268 "00001110" // /* MW 2 */ + 7269 "00000000" // /* MW 1 */ +.delay_slot + 7270 "00000010" // ST p1, [sp, #-4]; MOV dc4, lr /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7271 "01110000" // /* MW 7 */ + 7272 "11110000" // /* MW 6 */ + 7273 "01100000" // /* MW 5 */ + 7274 "00000010" // /* MW 4 */ + 7275 "10110000" // /* MW 3 */ + 7276 "10010011" // /* MW 2 */ + 7277 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7279 "00000000" // /* MW 1 */ +.delay_slot + 7280 "00011000" // VST x0, [sp, #-256] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7281 "00110011" // /* MW 3 */ + 7282 "11110000" // /* MW 2 */ + 7283 "00001111" // /* MW 1 */ +.delay_slot + 7284 "00011000" // VST x4, [sp, #-192] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7285 "00110011" // /* MW 3 */ + 7286 "11110101" // /* MW 2 */ + 7287 "00001111" // /* MW 1 */ +.delay_slot + 7288 "00000010" // VST x1, [sp, #-128]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7289 "01110000" // /* MW 7 */ + 7290 "10100101" // /* MW 6 */ + 7291 "00000001" // /* MW 5 */ + 7292 "00000000" // /* MW 4 */ + 7293 "01100000" // /* MW 3 */ + 7294 "00001110" // /* MW 2 */ + 7295 "11111111" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2448 +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 7296 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7297 "00000101" // /* MW 3 */ + 7298 "00100010" // /* MW 2 */ + 7299 "00010000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 first + 7300 "10011000" // EQ r17, r17, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7301 "01100111" // /* MW 3 */ + 7302 "01100010" // /* MW 2 */ + 7303 "00010100" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 7304 "10000100" // JNZ r17, #7456 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7456 delay_slots=5 */ + 7305 "00000001" // /* MW 5 */ + 7306 "01000000" // /* MW 4 */ + 7307 "10010000" // /* MW 3 */ + 7308 "00001110" // /* MW 2 */ + 7309 "10001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7311 "00000000" // /* MW 1 */ +.delay_slot + 7312 "00011000" // VST x0, [sp, #-256] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7313 "00110011" // /* MW 3 */ + 7314 "11110000" // /* MW 2 */ + 7315 "00001111" // /* MW 1 */ +.delay_slot + 7316 "00011000" // VST x4, [sp, #-192] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7317 "00110011" // /* MW 3 */ + 7318 "11110101" // /* MW 2 */ + 7319 "00001111" // /* MW 1 */ +.delay_slot + 7320 "00011000" // VST x1, [sp, #-128] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7321 "01110011" // /* MW 3 */ + 7322 "11111000" // /* MW 2 */ + 7323 "00001111" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 +.delay_slot + 7324 "00111010" // ST p1, [sp, #-4]; MOVX r7, #2; MOV dc4, lr /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7325 "01111001" // /* MW 9 */ + 7326 "11110000" // /* MW 8 */ + 7327 "01100000" // /* MW 7 */ + 7328 "01001010" // /* MW 6 */ + 7329 "01110000" // /* MW 5 */ + 7330 "00000000" // /* MW 4 */ + 7331 "10110000" // /* MW 3 */ + 7332 "10010011" // /* MW 2 */ + 7333 "11111111" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 7334 "10011000" // EQ r7, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7335 "01100111" // /* MW 3 */ + 7336 "11001110" // /* MW 2 */ + 7337 "00010001" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 7338 "10000100" // JNZ r7, #7424 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7424 delay_slots=5 */ + 7339 "00000001" // /* MW 5 */ + 7340 "01000000" // /* MW 4 */ + 7341 "10000000" // /* MW 3 */ + 7342 "00001110" // /* MW 2 */ + 7343 "00111000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7345 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7347 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7349 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7351 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7353 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 7354 "10011000" // EQ r7, r5, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7355 "01100111" // /* MW 3 */ + 7356 "01001110" // /* MW 2 */ + 7357 "00010001" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 7358 "10000100" // JNZ r7, #7392 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7392 delay_slots=5 */ + 7359 "00000001" // /* MW 5 */ + 7360 "01000000" // /* MW 4 */ + 7361 "01110000" // /* MW 3 */ + 7362 "00001110" // /* MW 2 */ + 7363 "00111000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 200 49 +.delay_slot + 7364 "00011000" // MOVX r5, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7365 "01000001" // /* MW 3 */ + 7366 "00001010" // /* MW 2 */ + 7367 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7369 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7371 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7373 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7375 "00000000" // /* MW 1 */ + 7376 "10000100" // J #6576 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6576 delay_slots=5 */ + 7377 "00000000" // /* MW 5 */ + 7378 "00000000" // /* MW 4 */ + 7379 "11011000" // /* MW 3 */ + 7380 "00001100" // /* MW 2 */ + 7381 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7383 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7385 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7387 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7389 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7391 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2544 +.src_ref 5 "blend.hpp" 170 36 + 7392 "10111010" // MOVA r17, #257; J #5568 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=5568 delay_slots=5 */ + 7393 "00100000" // /* MW 9 */ + 7394 "00000000" // /* MW 8 */ + 7395 "00000000" // /* MW 7 */ + 7396 "10111000" // /* MW 6 */ + 7397 "00000010" // /* MW 5 */ + 7398 "00000000" // /* MW 4 */ + 7399 "00000000" // /* MW 3 */ + 7400 "00110001" // /* MW 2 */ + 7401 "00100000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 57 98 +.src_ref 5 "blend.hpp" 170 36 +.src_ref 3 "reduce_mean_c8_impl.h" 268 19 +.delay_slot + 7402 "01100100" // MOVX r21, #0; MOV m4, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7403 "01000001" // /* MW 5 */ + 7404 "00000000" // /* MW 4 */ + 7405 "00101000" // /* MW 3 */ + 7406 "01000000" // /* MW 2 */ + 7407 "00000101" // /* MW 1 */ +.src_ref 5 "blend.hpp" 163 48 +.delay_slot + 7408 "00011000" // MOVX r20, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7409 "00000001" // /* MW 3 */ + 7410 "00101000" // /* MW 2 */ + 7411 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7413 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7415 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7416 "00100010" // NOPA; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7417 "00011100" // /* MW 7 */ + 7418 "00000000" // /* MW 6 */ + 7419 "00000000" // /* MW 5 */ + 7420 "00000100" // /* MW 4 */ + 7421 "11110000" // /* MW 3 */ + 7422 "00101100" // /* MW 2 */ + 7423 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2576 + 7424 "10000100" // J #6480 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6480 delay_slots=5 */ + 7425 "00000000" // /* MW 5 */ + 7426 "00000000" // /* MW 4 */ + 7427 "10101000" // /* MW 3 */ + 7428 "00001100" // /* MW 2 */ + 7429 "00000000" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 +.delay_slot + 7430 "01000100" // MOVXM r17, #65535 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7431 "11111110" // /* MW 5 */ + 7432 "10111111" // /* MW 4 */ + 7433 "11111000" // /* MW 3 */ + 7434 "00000000" // /* MW 2 */ + 7435 "00000000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 57 98 +.src_ref 3 "reduce_mean_c8_impl.h" 268 19 +.delay_slot + 7436 "10111000" // MOV m4, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7437 "00100000" // /* MW 3 */ + 7438 "00000000" // /* MW 2 */ + 7439 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7441 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7443 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7444 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7445 "10000001" // /* MW 11 */ + 7446 "10101101" // /* MW 10 */ + 7447 "00000000" // /* MW 9 */ + 7448 "00000000" // /* MW 8 */ + 7449 "00000000" // /* MW 7 */ + 7450 "00000000" // /* MW 6 */ + 7451 "00100000" // /* MW 5 */ + 7452 "00000000" // /* MW 4 */ + 7453 "11110000" // /* MW 3 */ + 7454 "00101100" // /* MW 2 */ + 7455 "00000000" // /* MW 1 */ +.label __ll128__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 112 6 +.src_ref 6 "me_vmult_float_emulated.h" 112 19 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 114 6 +.src_ref 6 "me_vmult_float_emulated.h" 114 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 6 +.src_ref 6 "me_vmult_float_emulated.h" 115 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 + 7456 "10111010" // VLDA x0, [sp, #-256]; J #5568 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=5568 delay_slots=5 */ + 7457 "00100000" // /* MW 9 */ + 7458 "00000000" // /* MW 8 */ + 7459 "00000000" // /* MW 7 */ + 7460 "10111000" // /* MW 6 */ + 7461 "00000010" // /* MW 5 */ + 7462 "00000000" // /* MW 4 */ + 7463 "01110000" // /* MW 3 */ + 7464 "00000111" // /* MW 2 */ + 7465 "11111110" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "blend.hpp" 163 48 +.delay_slot + 7466 "10111010" // VLDA x4, [sp, #-192]; MOVX r0, #4; MOV r20, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7467 "01011000" // /* MW 9 */ + 7468 "00000000" // /* MW 8 */ + 7469 "10001000" // /* MW 7 */ + 7470 "10001010" // /* MW 6 */ + 7471 "00000000" // /* MW 5 */ + 7472 "00000000" // /* MW 4 */ + 7473 "01110000" // /* MW 3 */ + 7474 "10100111" // /* MW 2 */ + 7475 "11111110" // /* MW 1 */ +.src_ref 5 "vector.hpp" 57 98 +.src_ref 5 "vector.hpp" 57 98 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1280 49 +.src_ref 5 "vector.hpp" 1287 41 +.src_ref 5 "vector.hpp" 1288 16 +.src_ref 5 "vector.hpp" 1292 26 +.src_ref 5 "blend.hpp" 170 36 +.src_ref 3 "reduce_mean_c8_impl.h" 226 22 +.src_ref 3 "reduce_mean_c8_impl.h" 268 19 +.delay_slot + 7476 "10111010" // LDA p1, [sp, #-4]; MOVXM r16, #65535 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7477 "10010000" // /* MW 9 */ + 7478 "11111111" // /* MW 8 */ + 7479 "00001111" // /* MW 7 */ + 7480 "00111110" // /* MW 6 */ + 7481 "00000000" // /* MW 5 */ + 7482 "00000000" // /* MW 4 */ + 7483 "00100000" // /* MW 3 */ + 7484 "10010011" // /* MW 2 */ + 7485 "11111111" // /* MW 1 */ +.src_ref 5 "vector.hpp" 57 98 +.src_ref 5 "blend.hpp" 170 36 +.src_ref 3 "reduce_mean_c8_impl.h" 268 19 +.delay_slot + 7486 "01100100" // MOVX r21, #0; MOV m4, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7487 "10000001" // /* MW 5 */ + 7488 "00000000" // /* MW 4 */ + 7489 "00101000" // /* MW 3 */ + 7490 "01000000" // /* MW 2 */ + 7491 "00000101" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 +.delay_slot + 7492 "00011000" // MOVX r17, #257 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7493 "00000101" // /* MW 3 */ + 7494 "00100010" // /* MW 2 */ + 7495 "00010001" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 +.delay_slot + 7496 "00100010" // VLDA x1, [sp, #-128]; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7497 "00011100" // /* MW 7 */ + 7498 "00000000" // /* MW 6 */ + 7499 "00000000" // /* MW 5 */ + 7500 "00000100" // /* MW 4 */ + 7501 "01110000" // /* MW 3 */ + 7502 "00001111" // /* MW 2 */ + 7503 "11111111" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2656 + 7504 "10000100" // J #6480 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6480 delay_slots=5 */ + 7505 "00000000" // /* MW 5 */ + 7506 "00000000" // /* MW 4 */ + 7507 "10101000" // /* MW 3 */ + 7508 "00001100" // /* MW 2 */ + 7509 "00000000" // /* MW 1 */ +.delay_slot + 7510 "11111000" // MOV dc4, lr /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7511 "11100000" // /* MW 3 */ + 7512 "11000001" // /* MW 2 */ + 7513 "00011100" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 +.delay_slot + 7514 "01000100" // MOVXM r17, #65535 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7515 "11111110" // /* MW 5 */ + 7516 "10111111" // /* MW 4 */ + 7517 "11111000" // /* MW 3 */ + 7518 "00000000" // /* MW 2 */ + 7519 "00000000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 57 98 +.src_ref 3 "reduce_mean_c8_impl.h" 268 19 +.delay_slot + 7520 "10111000" // MOV m4, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7521 "00100000" // /* MW 3 */ + 7522 "00000000" // /* MW 2 */ + 7523 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7525 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E__end +.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E___func_end0 + 7527 "00000000" // /* MW 1 */ +.label __Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_reduce_mean_c8 _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 8 "superkernels.cpp" 472 +.src_ref 8 "superkernels.cpp" 472 first +.function_start + 7536 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7537 "00000001" // /* MW 5 */ + 7538 "00000000" // /* MW 4 */ + 7539 "00000000" // /* MW 3 */ + 7540 "00010000" // /* MW 2 */ + 7541 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 477 6 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7542 "00111010" // ST p7, [sp, #-20]; MOVXM p7, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7543 "00010001" // /* MW 9 */ + 7544 "01100000" // /* MW 8 */ + 7545 "10110010" // /* MW 7 */ + 7546 "11110011" // /* MW 6 */ + 7547 "00000001" // /* MW 5 */ + 7548 "00000000" // /* MW 4 */ + 7549 "10110000" // /* MW 3 */ + 7550 "11110011" // /* MW 2 */ + 7551 "11111101" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 477 6 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7552 "10111010" // LDA r16, [p7]; ST p6, [sp, #-28]; MOV r16, CORE_ID /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7553 "01110010" // /* MW 9 */ + 7554 "01110000" // /* MW 8 */ + 7555 "00001101" // /* MW 7 */ + 7556 "10000010" // /* MW 6 */ + 7557 "00011101" // /* MW 5 */ + 7558 "11100111" // /* MW 4 */ + 7559 "11010111" // /* MW 3 */ + 7560 "11000010" // /* MW 2 */ + 7561 "11100000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 474 22 first +.src_ref 8 "superkernels.cpp" 569 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7562 "00111010" // ST r11, [sp, #-8]; EXTEND.u8 r16, r16; MOV r11, lr /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7563 "01111001" // /* MW 9 */ + 7564 "11110000" // /* MW 8 */ + 7565 "01101000" // /* MW 7 */ + 7566 "10000001" // /* MW 6 */ + 7567 "00000100" // /* MW 5 */ + 7568 "00100001" // /* MW 4 */ + 7569 "10110000" // /* MW 3 */ + 7570 "00101110" // /* MW 2 */ + 7571 "11111111" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 474 30 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7572 "01011100" // ST r15, [sp, #-16]; ADD r17, r16, #-2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7573 "11110110" // /* MW 5 */ + 7574 "01000111" // /* MW 4 */ + 7575 "10111000" // /* MW 3 */ + 7576 "00111110" // /* MW 2 */ + 7577 "11111110" // /* MW 1 */ + 7578 "10011000" // ST r13, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7579 "10110101" // /* MW 3 */ + 7580 "11101001" // /* MW 2 */ + 7581 "00001111" // /* MW 1 */ + 7582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7583 "00000000" // /* MW 1 */ + 7584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7585 "00000000" // /* MW 1 */ + 7586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7587 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 477 6 first +.src_ref 8 "superkernels.cpp" 477 16 first + 7588 "10000100" // JNZ r16, #8160 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8160 delay_slots=5 */ + 7589 "00000001" // /* MW 5 */ + 7590 "01000000" // /* MW 4 */ + 7591 "11110000" // /* MW 3 */ + 7592 "00001111" // /* MW 2 */ + 7593 "10000000" // /* MW 1 */ +.delay_slot + 7594 "10011000" // ST r12, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7595 "10010101" // /* MW 3 */ + 7596 "11111101" // /* MW 2 */ + 7597 "00001111" // /* MW 1 */ +.delay_slot + 7598 "10011000" // ST r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7599 "11010101" // /* MW 3 */ + 7600 "11110101" // /* MW 2 */ + 7601 "00001111" // /* MW 1 */ +.delay_slot + 7602 "10011000" // ST p0, [sp, #-32] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7603 "00011101" // /* MW 3 */ + 7604 "11100000" // /* MW 2 */ + 7605 "00001111" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 474 11 +.delay_slot + 7606 "01000100" // MOVXM p6, #509128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7607 "10010000" // /* MW 5 */ + 7608 "11001001" // /* MW 4 */ + 7609 "11001100" // /* MW 3 */ + 7610 "00000111" // /* MW 2 */ + 7611 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 474 11 first +.delay_slot + 7612 "10011000" // ST r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7613 "00110001" // /* MW 3 */ + 7614 "00000110" // /* MW 2 */ + 7615 "00001110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 1 "io_buffer_main.h" 218 49 +.src_ref 1 "io_buffer_main.h" 324 51 +.src_ref 5 "tile.hpp" 74 8 +.src_ref 5 "tile.hpp" 74 8 + 7616 "01110110" // MOVA r17, #1; MOVS p7, p2; MOVXM p2, #509164 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7617 "00010000" // /* MW 11 */ + 7618 "01110110" // /* MW 10 */ + 7619 "00110010" // /* MW 9 */ + 7620 "11110001" // /* MW 8 */ + 7621 "00000001" // /* MW 7 */ + 7622 "00000000" // /* MW 6 */ + 7623 "10001011" // /* MW 5 */ + 7624 "10001000" // /* MW 4 */ + 7625 "00000111" // /* MW 3 */ + 7626 "00110001" // /* MW 2 */ + 7627 "00000000" // /* MW 1 */ +.src_ref 5 "tile.hpp" 74 8 first +.src_ref 5 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 7628 "00111010" // ST r17, [p2]; MOVXM p2, #509168 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7629 "00010001" // /* MW 9 */ + 7630 "01111000" // /* MW 8 */ + 7631 "00110010" // /* MW 7 */ + 7632 "11110001" // /* MW 6 */ + 7633 "00000001" // /* MW 5 */ + 7634 "00000000" // /* MW 4 */ + 7635 "00110000" // /* MW 3 */ + 7636 "11000110" // /* MW 2 */ + 7637 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 483 44 +.src_ref 5 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 7638 "11010100" // ST.s8 r16, [p2]; MOV p6, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7639 "10000001" // /* MW 5 */ + 7640 "11000101" // /* MW 4 */ + 7641 "11101100" // /* MW 3 */ + 7642 "11000000" // /* MW 2 */ + 7643 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 480 4 first +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 7644 "00000100" // JL #2576 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=2576 delay_slots=5 */ + 7645 "00000001" // /* MW 5 */ + 7646 "00000000" // /* MW 4 */ + 7647 "00001000" // /* MW 3 */ + 7648 "00000101" // /* MW 2 */ + 7649 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 480 4 +.delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7650 "01000100" // MOVXM p0, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7651 "10000000" // /* MW 5 */ + 7652 "11001000" // /* MW 4 */ + 7653 "11000000" // /* MW 3 */ + 7654 "00000111" // /* MW 2 */ + 7655 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7657 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7659 "00000000" // /* MW 1 */ +.src_ref 5 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7660 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7661 "00110001" // /* MW 3 */ + 7662 "00100000" // /* MW 2 */ + 7663 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7664 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7665 "00000000" // /* MW 15 */ + 7666 "00000000" // /* MW 14 */ + 7667 "01111000" // /* MW 13 */ + 7668 "10100101" // /* MW 12 */ + 7669 "00000001" // /* MW 11 */ + 7670 "00000000" // /* MW 10 */ + 7671 "00000000" // /* MW 9 */ + 7672 "00000000" // /* MW 8 */ + 7673 "01011011" // /* MW 7 */ + 7674 "00000001" // /* MW 6 */ + 7675 "00100000" // /* MW 5 */ + 7676 "00000000" // /* MW 4 */ + 7677 "11110000" // /* MW 3 */ + 7678 "00101100" // /* MW 2 */ + 7679 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 51 +.src_ref 8 "superkernels.cpp" 487 47 +.return_address + 7680 "10111010" // MOVA r17, #0; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7681 "00010000" // /* MW 9 */ + 7682 "00100000" // /* MW 8 */ + 7683 "00110010" // /* MW 7 */ + 7684 "11110001" // /* MW 6 */ + 7685 "00000001" // /* MW 5 */ + 7686 "00000000" // /* MW 4 */ + 7687 "00000000" // /* MW 3 */ + 7688 "00010001" // /* MW 2 */ + 7689 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 18 +.src_ref 8 "superkernels.cpp" 481 51 first + 7690 "10111010" // LDA r14, [p2]; MOVXM p2, #509128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7691 "00010000" // /* MW 9 */ + 7692 "01100100" // /* MW 8 */ + 7693 "00110010" // /* MW 7 */ + 7694 "11110001" // /* MW 6 */ + 7695 "00000001" // /* MW 5 */ + 7696 "00000000" // /* MW 4 */ + 7697 "11010000" // /* MW 3 */ + 7698 "10111010" // /* MW 2 */ + 7699 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 18 +.src_ref 8 "superkernels.cpp" 481 85 + 7700 "10111010" // LDA r18, [p2]; MOVXM p2, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7701 "00010000" // /* MW 9 */ + 7702 "00100010" // /* MW 8 */ + 7703 "00110010" // /* MW 7 */ + 7704 "11110001" // /* MW 6 */ + 7705 "00000001" // /* MW 5 */ + 7706 "00000000" // /* MW 4 */ + 7707 "11010000" // /* MW 3 */ + 7708 "11001010" // /* MW 2 */ + 7709 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 85 +.src_ref 8 "superkernels.cpp" 482 16 + 7710 "10111010" // LDA r13, [p2], #4; MOVXM p3, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7711 "00010000" // /* MW 9 */ + 7712 "01101000" // /* MW 8 */ + 7713 "10110010" // /* MW 7 */ + 7714 "11110001" // /* MW 6 */ + 7715 "00000001" // /* MW 5 */ + 7716 "00000000" // /* MW 4 */ + 7717 "11010000" // /* MW 3 */ + 7718 "10110110" // /* MW 2 */ + 7719 "01000011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 16 +.src_ref 8 "superkernels.cpp" 482 40 first + 7720 "10111010" // LDA el0, [p2, #4]; MOVXM p1, #509132 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7721 "00010000" // /* MW 9 */ + 7722 "01100110" // /* MW 8 */ + 7723 "10110010" // /* MW 7 */ + 7724 "11110000" // /* MW 6 */ + 7725 "00000001" // /* MW 5 */ + 7726 "00000000" // /* MW 4 */ + 7727 "11010000" // /* MW 3 */ + 7728 "10000101" // /* MW 2 */ + 7729 "01000010" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 120 first +.src_ref 8 "superkernels.cpp" 483 44 + 7730 "11010100" // LDA r15, [p2]; MOV r16, p6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7731 "10000001" // /* MW 5 */ + 7732 "00111001" // /* MW 4 */ + 7733 "11011000" // /* MW 3 */ + 7734 "10111110" // /* MW 2 */ + 7735 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 483 44 + 7736 "00011000" // ADD.NC p2, r16, #40 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7737 "00010100" // /* MW 3 */ + 7738 "01101000" // /* MW 2 */ + 7739 "00011010" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 + 7740 "01000100" // MOVXM p6, #509184 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7741 "00000000" // /* MW 5 */ + 7742 "11001010" // /* MW 4 */ + 7743 "11001100" // /* MW 3 */ + 7744 "00000111" // /* MW 2 */ + 7745 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 483 13 + 7746 "01000100" // MOVXM p0, #509160 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7747 "11010000" // /* MW 5 */ + 7748 "11001001" // /* MW 4 */ + 7749 "11000000" // /* MW 3 */ + 7750 "00000111" // /* MW 2 */ + 7751 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 27 + 7752 "10011000" // MUL r18, r14, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7753 "00101111" // /* MW 3 */ + 7754 "10100101" // /* MW 2 */ + 7755 "00010011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 + 7756 "01000100" // MOVXM r16, #-2147483648 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7757 "00000000" // /* MW 5 */ + 7758 "00100000" // /* MW 4 */ + 7759 "00001000" // /* MW 3 */ + 7760 "00000000" // /* MW 2 */ + 7761 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 61 +.src_ref 8 "superkernels.cpp" 482 16 first + 7762 "01011100" // ST el0, [p3]; MUL r18, r13, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7763 "01011111" // /* MW 5 */ + 7764 "11001010" // /* MW 4 */ + 7765 "00110110" // /* MW 3 */ + 7766 "10000101" // /* MW 2 */ + 7767 "01100000" // /* MW 1 */ + 7768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7769 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 96 first + 7770 "10011000" // MUL r18, r15, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7771 "00101111" // /* MW 3 */ + 7772 "11100101" // /* MW 2 */ + 7773 "00010011" // /* MW 1 */ + 7774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7775 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 16 + 7776 "10011000" // ST r18, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7777 "01010001" // /* MW 3 */ + 7778 "00000110" // /* MW 2 */ + 7779 "00001001" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 483 15 first + 7780 "10011000" // LDA el0, [p2], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7781 "00101110" // /* MW 3 */ + 7782 "01001100" // /* MW 2 */ + 7783 "00000010" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 first + 7784 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7785 "00110001" // /* MW 3 */ + 7786 "00011110" // /* MW 2 */ + 7787 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 + 7788 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7789 "00110001" // /* MW 3 */ + 7790 "00011110" // /* MW 2 */ + 7791 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 + 7792 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7793 "00110001" // /* MW 3 */ + 7794 "00011110" // /* MW 2 */ + 7795 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 + 7796 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7797 "00110001" // /* MW 3 */ + 7798 "00011110" // /* MW 2 */ + 7799 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 + 7800 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7801 "00110001" // /* MW 3 */ + 7802 "00011110" // /* MW 2 */ + 7803 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 + 7804 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7805 "00110001" // /* MW 3 */ + 7806 "00011110" // /* MW 2 */ + 7807 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 483 13 first + 7808 "10011000" // ST el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7809 "00101001" // /* MW 3 */ + 7810 "00000100" // /* MW 2 */ + 7811 "00001000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 first + 7812 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7813 "00110001" // /* MW 3 */ + 7814 "00011110" // /* MW 2 */ + 7815 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 + 7816 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7817 "00110001" // /* MW 3 */ + 7818 "00011110" // /* MW 2 */ + 7819 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 + 7820 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7821 "00110001" // /* MW 3 */ + 7822 "00011110" // /* MW 2 */ + 7823 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 first + 7824 "10011000" // LDA r1, [p2], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7825 "00110110" // /* MW 3 */ + 7826 "11011100" // /* MW 2 */ + 7827 "00000010" // /* MW 1 */ + 7828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7829 "00000000" // /* MW 1 */ + 7830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7831 "00000000" // /* MW 1 */ + 7832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7833 "00000000" // /* MW 1 */ + 7834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7835 "00000000" // /* MW 1 */ + 7836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7837 "00000000" // /* MW 1 */ + 7838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7839 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 + 7840 "10011000" // GEU r17, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7841 "00001011" // /* MW 3 */ + 7842 "01100011" // /* MW 2 */ + 7843 "00010000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 + 7844 "10000100" // JNZ r17, #7920 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7920 delay_slots=5 */ + 7845 "00000001" // /* MW 5 */ + 7846 "01000000" // /* MW 4 */ + 7847 "01111000" // /* MW 3 */ + 7848 "00001111" // /* MW 2 */ + 7849 "10001000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 1 "io_buffer_main.h" 218 49 +.src_ref 1 "io_buffer_main.h" 324 51 +.delay_slot + 7850 "11111000" // MOV r12, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7851 "11000000" // /* MW 3 */ + 7852 "00011110" // /* MW 2 */ + 7853 "00011011" // /* MW 1 */ +.delay_slot + 7854 "10011000" // ST p2, [sp, #-40] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7855 "00011101" // /* MW 3 */ + 7856 "11011001" // /* MW 2 */ + 7857 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7859 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7861 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7863 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.no_stack_arguments + 7864 "00000100" // JL #10912 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10912 delay_slots=5 */ + 7865 "00000001" // /* MW 5 */ + 7866 "00000000" // /* MW 4 */ + 7867 "01010000" // /* MW 3 */ + 7868 "00010101" // /* MW 2 */ + 7869 "00000000" // /* MW 1 */ +.delay_slot + 7870 "10011000" // ST r12, [sp, #-36] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7871 "10010101" // /* MW 3 */ + 7872 "11011101" // /* MW 2 */ + 7873 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7874 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7875 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7877 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7879 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7880 "00100010" // NOPA; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7881 "00011100" // /* MW 7 */ + 7882 "00000000" // /* MW 6 */ + 7883 "00000000" // /* MW 5 */ + 7884 "00000100" // /* MW 4 */ + 7885 "11110000" // /* MW 3 */ + 7886 "00101100" // /* MW 2 */ + 7887 "00000000" // /* MW 1 */ +.return_address + 7888 "10000100" // J #7984 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=7984 delay_slots=5 */ + 7889 "00000000" // /* MW 5 */ + 7890 "00000000" // /* MW 4 */ + 7891 "10011000" // /* MW 3 */ + 7892 "00001111" // /* MW 2 */ + 7893 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.delay_slot + 7894 "01000100" // MOVXM p7, #509168 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7895 "11100000" // /* MW 5 */ + 7896 "11001001" // /* MW 4 */ + 7897 "11001110" // /* MW 3 */ + 7898 "00000111" // /* MW 2 */ + 7899 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7901 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7903 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7905 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7906 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 7907 "00011100" // /* MW 13 */ + 7908 "00000000" // /* MW 12 */ + 7909 "00000000" // /* MW 11 */ + 7910 "01010111" // /* MW 10 */ + 7911 "00011010" // /* MW 9 */ + 7912 "01000000" // /* MW 8 */ + 7913 "00000000" // /* MW 7 */ + 7914 "00000000" // /* MW 6 */ + 7915 "10110110" // /* MW 5 */ + 7916 "00000010" // /* MW 4 */ + 7917 "11110000" // /* MW 3 */ + 7918 "00101100" // /* MW 2 */ + 7919 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_384 +.src_ref 8 "superkernels.cpp" 491 40 +.no_stack_arguments + 7920 "00000100" // JL #10912 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10912 delay_slots=5 */ + 7921 "00000001" // /* MW 5 */ + 7922 "00000000" // /* MW 4 */ + 7923 "01010000" // /* MW 3 */ + 7924 "00010101" // /* MW 2 */ + 7925 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7927 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7929 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7931 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7932 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7933 "01100111" // /* MW 3 */ + 7934 "00000001" // /* MW 2 */ + 7935 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.delay_slot + 7936 "11100001" // NOPA; NOPB; NOPS; SUB r1, r1, r16; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7937 "00000000" // /* MW 15 */ + 7938 "00000000" // /* MW 14 */ + 7939 "01111000" // /* MW 13 */ + 7940 "10100101" // /* MW 12 */ + 7941 "00000001" // /* MW 11 */ + 7942 "00001100" // /* MW 10 */ + 7943 "00011000" // /* MW 9 */ + 7944 "00000010" // /* MW 8 */ + 7945 "01011011" // /* MW 7 */ + 7946 "00000001" // /* MW 6 */ + 7947 "00100000" // /* MW 5 */ + 7948 "00000000" // /* MW 4 */ + 7949 "11110000" // /* MW 3 */ + 7950 "00101100" // /* MW 2 */ + 7951 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.return_address +.no_stack_arguments + 7952 "00000100" // JL #12416 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12416 delay_slots=5 */ + 7953 "00000001" // /* MW 5 */ + 7954 "00000000" // /* MW 4 */ + 7955 "01000000" // /* MW 3 */ + 7956 "00011000" // /* MW 2 */ + 7957 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.delay_slot + 7958 "11111000" // MOV r1, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7959 "00100000" // /* MW 3 */ + 7960 "01010000" // /* MW 2 */ + 7961 "00011000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.delay_slot + 7962 "01000100" // MOVXM p7, #509168 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7963 "11100000" // /* MW 5 */ + 7964 "11001001" // /* MW 4 */ + 7965 "11001110" // /* MW 3 */ + 7966 "00000111" // /* MW 2 */ + 7967 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.delay_slot + 7968 "01000100" // MOVXM r2, #1325400064 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7969 "00000000" // /* MW 5 */ + 7970 "00100000" // /* MW 4 */ + 7971 "00000001" // /* MW 3 */ + 7972 "00000000" // /* MW 2 */ + 7973 "01001111" // /* MW 1 */ +.delay_slot + 7974 "10011000" // ST r12, [sp, #-36] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7975 "10010101" // /* MW 3 */ + 7976 "11011101" // /* MW 2 */ + 7977 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7978 "00111100" // NOPA; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7979 "00100000" // /* MW 5 */ + 7980 "00000000" // /* MW 4 */ + 7981 "11110000" // /* MW 3 */ + 7982 "00101100" // /* MW 2 */ + 7983 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 8 "superkernels.cpp" 491 40 +.src_ref 8 "superkernels.cpp" 491 40 +.src_ref 8 "superkernels.cpp" 491 40 +.return_address + 7984 "10111010" // LDA.s8 r16, [p7]; MOVX vaddSign0, #1; VINSERT.32 x0, x0, #0, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7985 "10111000" // /* MW 9 */ + 7986 "00001000" // /* MW 8 */ + 7987 "00000000" // /* MW 7 */ + 7988 "00000000" // /* MW 6 */ + 7989 "11010010" // /* MW 5 */ + 7990 "00000010" // /* MW 4 */ + 7991 "01010000" // /* MW 3 */ + 7992 "11000000" // /* MW 2 */ + 7993 "11100000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.src_ref 8 "superkernels.cpp" 492 38 +.src_ref 8 "superkernels.cpp" 492 38 + 7994 "10111010" // MOVA m0, #-38; MOVX r24, #0; VMOV bmll0, x0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7995 "01111000" // /* MW 9 */ + 7996 "01001001" // /* MW 8 */ + 7997 "00000000" // /* MW 7 */ + 7998 "00001000" // /* MW 6 */ + 7999 "10000000" // /* MW 5 */ + 8000 "00000001" // /* MW 4 */ + 8001 "10000000" // /* MW 3 */ + 8002 "01000000" // /* MW 2 */ + 8003 "11111011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 498 13 +.src_ref 8 "superkernels.cpp" 498 15 + 8004 "10111010" // LDA p2, [sp, #-40]; MOVXM p3, #509140 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8005 "00010000" // /* MW 9 */ + 8006 "01101010" // /* MW 8 */ + 8007 "10110010" // /* MW 7 */ + 8008 "11110001" // /* MW 6 */ + 8009 "00000001" // /* MW 5 */ + 8010 "00000000" // /* MW 4 */ + 8011 "00100000" // /* MW 3 */ + 8012 "00100011" // /* MW 2 */ + 8013 "11111011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 499 14 + 8014 "01000100" // MOVXM p1, #509144 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8015 "10110000" // /* MW 5 */ + 8016 "11001001" // /* MW 4 */ + 8017 "11000010" // /* MW 3 */ + 8018 "00000111" // /* MW 2 */ + 8019 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 7 +.src_ref 8 "superkernels.cpp" 508 7 +.src_ref 8 "superkernels.cpp" 511 7 + 8020 "01000100" // MOVXM p7, #509136 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8021 "10100000" // /* MW 5 */ + 8022 "11001001" // /* MW 4 */ + 8023 "11001110" // /* MW 3 */ + 8024 "00000111" // /* MW 2 */ + 8025 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 8026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8027 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 38 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 8028 "00011000" // ST.s16 r16, [p6], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8029 "00010111" // /* MW 3 */ + 8030 "00011110" // /* MW 2 */ + 8031 "00000110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8032 "00011000" // MOVX crRnd, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8033 "10000000" // /* MW 3 */ + 8034 "00111010" // /* MW 2 */ + 8035 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8036 "00011000" // VCONV.bf16.fp32 wl0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8037 "00010110" // /* MW 3 */ + 8038 "01000000" // /* MW 2 */ + 8039 "00001000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8041 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8042 "10111000" // VEXTRACT.16 r16, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8043 "00000001" // /* MW 3 */ + 8044 "00000001" // /* MW 2 */ + 8045 "00011100" // /* MW 1 */ + 8046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8047 "00000000" // /* MW 1 */ + 8048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8049 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 492 38 first + 8050 "00011000" // ST.s8 r24, [p6], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8051 "00000111" // /* MW 3 */ + 8052 "00001011" // /* MW 2 */ + 8053 "00000110" // /* MW 1 */ + 8054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8055 "00000000" // /* MW 1 */ + 8056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8057 "00000000" // /* MW 1 */ + 8058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8059 "00000000" // /* MW 1 */ + 8060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8061 "00000000" // /* MW 1 */ + 8062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8063 "00000000" // /* MW 1 */ + 8064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8065 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 494 25 first + 8066 "10011000" // ST r14, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8067 "11010001" // /* MW 3 */ + 8068 "00011101" // /* MW 2 */ + 8069 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 495 24 first + 8070 "10011000" // ST r15, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8071 "11110001" // /* MW 3 */ + 8072 "00000101" // /* MW 2 */ + 8073 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 496 24 first + 8074 "10011000" // ST r13, [p6, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8075 "10110001" // /* MW 3 */ + 8076 "00010101" // /* MW 2 */ + 8077 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 498 15 first + 8078 "10011000" // LDA el0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8079 "00101110" // /* MW 3 */ + 8080 "00011100" // /* MW 2 */ + 8081 "00000010" // /* MW 1 */ + 8082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8083 "00000000" // /* MW 1 */ + 8084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8085 "00000000" // /* MW 1 */ + 8086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8087 "00000000" // /* MW 1 */ + 8088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8089 "00000000" // /* MW 1 */ + 8090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8091 "00000000" // /* MW 1 */ + 8092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8093 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 498 13 + 8094 "10011000" // ST el0, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8095 "00101001" // /* MW 3 */ + 8096 "00000100" // /* MW 2 */ + 8097 "00001011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 499 16 first + 8098 "10011000" // LDA el0, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8099 "00101110" // /* MW 3 */ + 8100 "00000100" // /* MW 2 */ + 8101 "00000010" // /* MW 1 */ + 8102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8103 "00000000" // /* MW 1 */ + 8104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8105 "00000000" // /* MW 1 */ + 8106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8107 "00000000" // /* MW 1 */ + 8108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8109 "00000000" // /* MW 1 */ + 8110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8111 "00000000" // /* MW 1 */ + 8112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8113 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 499 14 + 8114 "10011000" // ST el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8115 "00101001" // /* MW 3 */ + 8116 "00000100" // /* MW 2 */ + 8117 "00001001" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 500 15 first + 8118 "10011000" // LDA el0, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8119 "00101110" // /* MW 3 */ + 8120 "00010100" // /* MW 2 */ + 8121 "00000010" // /* MW 1 */ + 8122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8123 "00000000" // /* MW 1 */ + 8124 "10000100" // J #8176 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8176 delay_slots=5 */ + 8125 "00000000" // /* MW 5 */ + 8126 "00000000" // /* MW 4 */ + 8127 "11111000" // /* MW 3 */ + 8128 "00001111" // /* MW 2 */ + 8129 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 500 13 +.delay_slot + 8130 "01000100" // MOVXM p0, #509148 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8131 "10111000" // /* MW 5 */ + 8132 "11001001" // /* MW 4 */ + 8133 "11000000" // /* MW 3 */ + 8134 "00000111" // /* MW 2 */ + 8135 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8137 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8139 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8140 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8141 "01100111" // /* MW 3 */ + 8142 "00000001" // /* MW 2 */ + 8143 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 500 13 +.delay_slot + 8144 "11100001" // NOPA; NOPB; ST el0, [p0]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8145 "00000000" // /* MW 15 */ + 8146 "00000000" // /* MW 14 */ + 8147 "01111000" // /* MW 13 */ + 8148 "10100101" // /* MW 12 */ + 8149 "00000001" // /* MW 11 */ + 8150 "00000000" // /* MW 10 */ + 8151 "00000000" // /* MW 9 */ + 8152 "10000000" // /* MW 8 */ + 8153 "00101001" // /* MW 7 */ + 8154 "00000100" // /* MW 6 */ + 8155 "00100000" // /* MW 5 */ + 8156 "00000000" // /* MW 4 */ + 8157 "11110000" // /* MW 3 */ + 8158 "00101100" // /* MW 2 */ + 8159 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_624 +.src_ref 8 "superkernels.cpp" 505 7 +.src_ref 8 "superkernels.cpp" 508 7 +.src_ref 8 "superkernels.cpp" 511 7 + 8160 "00111010" // ST p2, [sp, #-36]; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8161 "00010001" // /* MW 9 */ + 8162 "01101000" // /* MW 8 */ + 8163 "10110010" // /* MW 7 */ + 8164 "11110011" // /* MW 6 */ + 8165 "00000001" // /* MW 5 */ + 8166 "00000000" // /* MW 4 */ + 8167 "10110000" // /* MW 3 */ + 8168 "10100011" // /* MW 2 */ + 8169 "11111011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 1 "io_buffer_main.h" 218 49 +.src_ref 1 "io_buffer_main.h" 324 51 + 8170 "11010100" // NOPA; MOV r12, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8171 "10000001" // /* MW 5 */ + 8172 "00101001" // /* MW 4 */ + 8173 "11110110" // /* MW 3 */ + 8174 "00101100" // /* MW 2 */ + 8175 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_640 +.src_ref 8 "superkernels.cpp" 505 7 first +.src_ref 8 "superkernels.cpp" 505 19 + 8176 "00101100" // LDA r16, [p7]; MOVX r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8177 "00001010" // /* MW 5 */ + 8178 "01000100" // /* MW 4 */ + 8179 "11010000" // /* MW 3 */ + 8180 "11000010" // /* MW 2 */ + 8181 "11100000" // /* MW 1 */ + 8182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8183 "00000000" // /* MW 1 */ + 8184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8185 "00000000" // /* MW 1 */ + 8186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8187 "00000000" // /* MW 1 */ + 8188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8189 "00000000" // /* MW 1 */ + 8190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8191 "00000000" // /* MW 1 */ + 8192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8193 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 19 + 8194 "10011000" // NE r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8195 "00001000" // /* MW 3 */ + 8196 "01100011" // /* MW 2 */ + 8197 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 25 + 8198 "10000100" // JNZ r17, #8368 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8368 delay_slots=5 */ + 8199 "00000001" // /* MW 5 */ + 8200 "01000000" // /* MW 4 */ + 8201 "01011000" // /* MW 3 */ + 8202 "00010000" // /* MW 2 */ + 8203 "10001000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 first +.delay_slot + 8204 "00011000" // ADD.NC p6, r12, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8205 "00000110" // /* MW 3 */ + 8206 "01100110" // /* MW 2 */ + 8207 "00011110" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8209 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8211 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8212 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8213 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8215 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 29 + 8216 "01000100" // MOVXM p2, #509124 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8217 "10001000" // /* MW 5 */ + 8218 "11001001" // /* MW 4 */ + 8219 "11000100" // /* MW 3 */ + 8220 "00000111" // /* MW 2 */ + 8221 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 29 first +.src_ref 8 "superkernels.cpp" 505 65 + 8222 "10111010" // LDA r16, [p2]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8223 "00010000" // /* MW 9 */ + 8224 "00110000" // /* MW 8 */ + 8225 "00110010" // /* MW 7 */ + 8226 "11110001" // /* MW 6 */ + 8227 "00000001" // /* MW 5 */ + 8228 "00000000" // /* MW 4 */ + 8229 "11010000" // /* MW 3 */ + 8230 "11000010" // /* MW 2 */ + 8231 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 65 + 8232 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8233 "00111010" // /* MW 3 */ + 8234 "00000100" // /* MW 2 */ + 8235 "00000010" // /* MW 1 */ + 8236 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8237 "00000000" // /* MW 1 */ + 8238 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8239 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 41 +.no_stack_arguments + 8240 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */ + 8241 "00000001" // /* MW 5 */ + 8242 "00000000" // /* MW 4 */ + 8243 "11111000" // /* MW 3 */ + 8244 "00010011" // /* MW 2 */ + 8245 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 41 +.delay_slot + 8246 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8247 "00000001" // /* MW 3 */ + 8248 "00011010" // /* MW 2 */ + 8249 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8251 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 41 +.delay_slot + 8252 "10011000" // LT r27, r16, r13 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8253 "11011010" // /* MW 3 */ + 8254 "00110110" // /* MW 2 */ + 8255 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 41 +.src_ref 8 "superkernels.cpp" 505 41 +.delay_slot + 8256 "11100100" // SUB r17, r13, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8257 "01000001" // /* MW 5 */ + 8258 "10111011" // /* MW 4 */ + 8259 "00110111" // /* MW 3 */ + 8260 "01100000" // /* MW 2 */ + 8261 "01101100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 41 +.delay_slot + 8262 "01111010" // NOPA; NOPS; SEL.EQZ r0, r16, r17, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8263 "00010010" // /* MW 9 */ + 8264 "00000001" // /* MW 8 */ + 8265 "00000100" // /* MW 7 */ + 8266 "00000000" // /* MW 6 */ + 8267 "01011011" // /* MW 5 */ + 8268 "00000001" // /* MW 4 */ + 8269 "11110000" // /* MW 3 */ + 8270 "00101100" // /* MW 2 */ + 8271 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 41 +.src_ref 8 "superkernels.cpp" 505 41 +.return_address + 8272 "11100100" // SUB r16, r13, r3; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8273 "01000001" // /* MW 5 */ + 8274 "10101111" // /* MW 4 */ + 8275 "00111101" // /* MW 3 */ + 8276 "00000110" // /* MW 2 */ + 8277 "01101100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 41 + 8278 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8279 "00000010" // /* MW 3 */ + 8280 "11100001" // /* MW 2 */ + 8281 "00010000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 6 +.src_ref 8 "superkernels.cpp" 505 76 + 8282 "10000100" // JNZ r16, #8352 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8352 delay_slots=5 */ + 8283 "00000001" // /* MW 5 */ + 8284 "01000000" // /* MW 4 */ + 8285 "01010000" // /* MW 3 */ + 8286 "00010000" // /* MW 2 */ + 8287 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8289 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8291 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8293 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8294 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8295 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8297 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 +.src_ref 1 "io_buffer_main.h" 395 8 + 8298 "11100100" // MOVX r16, #-1; MOV p2, p6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8299 "10000001" // /* MW 5 */ + 8300 "11011001" // /* MW 4 */ + 8301 "10100100" // /* MW 3 */ + 8302 "00011111" // /* MW 2 */ + 8303 "11111100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 first + 8304 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8305 "01110110" // /* MW 3 */ + 8306 "11111111" // /* MW 2 */ + 8307 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 8308 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8309 "00110110" // /* MW 3 */ + 8310 "11111110" // /* MW 2 */ + 8311 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 8312 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8313 "01010110" // /* MW 3 */ + 8314 "11111110" // /* MW 2 */ + 8315 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 8316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8317 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first +.aggressive_scheduled_block_id 4 +.noswbrkpt + 8318 "10011000" // LDA r17, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8319 "00110110" // /* MW 3 */ + 8320 "01000110" // /* MW 2 */ + 8321 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 8322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8323 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 8324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8325 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 8326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8327 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 8328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8329 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 8330 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8331 "00010010" // /* MW 3 */ + 8332 "10100011" // /* MW 2 */ + 8333 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8334 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8335 "00110001" // /* MW 3 */ + 8336 "00000110" // /* MW 2 */ + 8337 "00001010" // /* MW 1 */ + 8338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8339 "00000000" // /* MW 1 */ + 8340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8341 "00000000" // /* MW 1 */ + 8342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8343 "00000000" // /* MW 1 */ + 8344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8345 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 8346 "00101100" // NOPA; ACQ r17, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8347 "00010000" // /* MW 5 */ + 8348 "10100110" // /* MW 4 */ + 8349 "11111000" // /* MW 3 */ + 8350 "00101100" // /* MW 2 */ + 8351 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_816 + 8352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8353 "00000000" // /* MW 1 */ + 8354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8355 "00000000" // /* MW 1 */ + 8356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8357 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 7 first + 8358 "10111010" // LDA r16, [p7]; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8359 "01111110" // /* MW 9 */ + 8360 "10100101" // /* MW 8 */ + 8361 "00000001" // /* MW 7 */ + 8362 "00000000" // /* MW 6 */ + 8363 "00010000" // /* MW 5 */ + 8364 "00000000" // /* MW 4 */ + 8365 "11010000" // /* MW 3 */ + 8366 "11000010" // /* MW 2 */ + 8367 "11100000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_832 +.src_ref 8 "superkernels.cpp" 508 19 +.src_ref 8 "superkernels.cpp" 522 6 +.src_ref 8 "superkernels.cpp" 558 19 + 8368 "00011000" // MOVX r14, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8369 "00001001" // /* MW 3 */ + 8370 "00011100" // /* MW 2 */ + 8371 "00010000" // /* MW 1 */ + 8372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8373 "00000000" // /* MW 1 */ + 8374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8375 "00000000" // /* MW 1 */ + 8376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8377 "00000000" // /* MW 1 */ + 8378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8379 "00000000" // /* MW 1 */ + 8380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8381 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 19 + 8382 "10011000" // NE r16, r14, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8383 "00001000" // /* MW 3 */ + 8384 "10100001" // /* MW 2 */ + 8385 "00010011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 25 + 8386 "10000100" // JNZ r16, #8544 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8544 delay_slots=5 */ + 8387 "00000001" // /* MW 5 */ + 8388 "01000000" // /* MW 4 */ + 8389 "10110000" // /* MW 3 */ + 8390 "00010000" // /* MW 2 */ + 8391 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8393 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8394 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8395 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8397 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8399 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8401 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 29 + 8402 "01000100" // MOVXM p2, #509152 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8403 "11000000" // /* MW 5 */ + 8404 "11001001" // /* MW 4 */ + 8405 "11000100" // /* MW 3 */ + 8406 "00000111" // /* MW 2 */ + 8407 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 29 +.src_ref 8 "superkernels.cpp" 508 65 + 8408 "10111010" // LDA r16, [p2]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8409 "00010000" // /* MW 9 */ + 8410 "00110000" // /* MW 8 */ + 8411 "00110010" // /* MW 7 */ + 8412 "11110001" // /* MW 6 */ + 8413 "00000001" // /* MW 5 */ + 8414 "00000000" // /* MW 4 */ + 8415 "11010000" // /* MW 3 */ + 8416 "11000010" // /* MW 2 */ + 8417 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 65 + 8418 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8419 "00111010" // /* MW 3 */ + 8420 "00000100" // /* MW 2 */ + 8421 "00000010" // /* MW 1 */ + 8422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8423 "00000000" // /* MW 1 */ + 8424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8425 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 41 +.no_stack_arguments + 8426 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */ + 8427 "00000001" // /* MW 5 */ + 8428 "00000000" // /* MW 4 */ + 8429 "11111000" // /* MW 3 */ + 8430 "00010011" // /* MW 2 */ + 8431 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 41 +.delay_slot + 8432 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8433 "00000001" // /* MW 3 */ + 8434 "00011010" // /* MW 2 */ + 8435 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8437 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 41 +.delay_slot + 8438 "10011000" // LT r27, r16, r13 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8439 "11011010" // /* MW 3 */ + 8440 "00110110" // /* MW 2 */ + 8441 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 41 +.src_ref 8 "superkernels.cpp" 508 41 +.delay_slot + 8442 "11100100" // SUB r17, r13, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8443 "01000001" // /* MW 5 */ + 8444 "10111011" // /* MW 4 */ + 8445 "00110111" // /* MW 3 */ + 8446 "01100000" // /* MW 2 */ + 8447 "01101100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 41 +.delay_slot + 8448 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r0, r16, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8449 "00000000" // /* MW 15 */ + 8450 "00000000" // /* MW 14 */ + 8451 "01111000" // /* MW 13 */ + 8452 "10100101" // /* MW 12 */ + 8453 "00000001" // /* MW 11 */ + 8454 "10010000" // /* MW 10 */ + 8455 "00001000" // /* MW 9 */ + 8456 "00100000" // /* MW 8 */ + 8457 "01011011" // /* MW 7 */ + 8458 "00000001" // /* MW 6 */ + 8459 "00100000" // /* MW 5 */ + 8460 "00000000" // /* MW 4 */ + 8461 "11110000" // /* MW 3 */ + 8462 "00101100" // /* MW 2 */ + 8463 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 41 +.src_ref 8 "superkernels.cpp" 508 41 +.return_address + 8464 "11100100" // SUB r16, r13, r3; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8465 "01000001" // /* MW 5 */ + 8466 "10101111" // /* MW 4 */ + 8467 "00111101" // /* MW 3 */ + 8468 "00000110" // /* MW 2 */ + 8469 "01101100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 41 + 8470 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8471 "00000010" // /* MW 3 */ + 8472 "11100001" // /* MW 2 */ + 8473 "00010000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 6 +.src_ref 8 "superkernels.cpp" 508 76 + 8474 "10000100" // JNZ r16, #8544 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8544 delay_slots=5 */ + 8475 "00000001" // /* MW 5 */ + 8476 "01000000" // /* MW 4 */ + 8477 "10110000" // /* MW 3 */ + 8478 "00010000" // /* MW 2 */ + 8479 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8481 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8483 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8485 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8487 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8489 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 +.src_ref 1 "io_buffer_main.h" 395 8 + 8490 "11100100" // MOVX r16, #-1; MOV p2, p6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8491 "10000001" // /* MW 5 */ + 8492 "11011001" // /* MW 4 */ + 8493 "10100100" // /* MW 3 */ + 8494 "00011111" // /* MW 2 */ + 8495 "11111100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 first + 8496 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8497 "01110110" // /* MW 3 */ + 8498 "11111111" // /* MW 2 */ + 8499 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 8500 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8501 "00110110" // /* MW 3 */ + 8502 "11111110" // /* MW 2 */ + 8503 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 8504 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8505 "01010110" // /* MW 3 */ + 8506 "11111110" // /* MW 2 */ + 8507 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 8508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8509 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first +.aggressive_scheduled_block_id 5 +.noswbrkpt + 8510 "10011000" // LDA r17, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8511 "00110110" // /* MW 3 */ + 8512 "01000110" // /* MW 2 */ + 8513 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 8514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8515 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 8516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8517 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 8518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8519 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 8520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8521 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 8522 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8523 "00010010" // /* MW 3 */ + 8524 "10100011" // /* MW 2 */ + 8525 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8526 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8527 "00110001" // /* MW 3 */ + 8528 "00000110" // /* MW 2 */ + 8529 "00001010" // /* MW 1 */ + 8530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8531 "00000000" // /* MW 1 */ + 8532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8533 "00000000" // /* MW 1 */ + 8534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8535 "00000000" // /* MW 1 */ + 8536 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8537 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 8538 "00101100" // NOPA; ACQ r17, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8539 "00010000" // /* MW 5 */ + 8540 "10100110" // /* MW 4 */ + 8541 "11111000" // /* MW 3 */ + 8542 "00101100" // /* MW 2 */ + 8543 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1008 + 8544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8545 "00000000" // /* MW 1 */ + 8546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8547 "00000000" // /* MW 1 */ + 8548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8549 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 7 first +.src_ref 8 "superkernels.cpp" 511 29 + 8550 "10111010" // LDA r16, [p7]; MOVXM p7, #509156 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8551 "00010000" // /* MW 9 */ + 8552 "01110010" // /* MW 8 */ + 8553 "10110010" // /* MW 7 */ + 8554 "11110011" // /* MW 6 */ + 8555 "00000001" // /* MW 5 */ + 8556 "00000000" // /* MW 4 */ + 8557 "11010000" // /* MW 3 */ + 8558 "11000010" // /* MW 2 */ + 8559 "11100000" // /* MW 1 */ + 8560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8561 "00000000" // /* MW 1 */ + 8562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8563 "00000000" // /* MW 1 */ + 8564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8565 "00000000" // /* MW 1 */ + 8566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8567 "00000000" // /* MW 1 */ + 8568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8569 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 19 + 8570 "00011000" // MOVX r18, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8571 "00010001" // /* MW 3 */ + 8572 "00100100" // /* MW 2 */ + 8573 "00010000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 19 + 8574 "10011000" // NE r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8575 "00001000" // /* MW 3 */ + 8576 "10100001" // /* MW 2 */ + 8577 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 25 + 8578 "10000100" // JNZ r16, #8768 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8768 delay_slots=5 */ + 8579 "00000001" // /* MW 5 */ + 8580 "01000000" // /* MW 4 */ + 8581 "00100000" // /* MW 3 */ + 8582 "00010001" // /* MW 2 */ + 8583 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 66 +.delay_slot + 8584 "01000100" // MOVXM p2, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8585 "11000000" // /* MW 5 */ + 8586 "11001000" // /* MW 4 */ + 8587 "11000100" // /* MW 3 */ + 8588 "00000111" // /* MW 2 */ + 8589 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8591 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8593 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8594 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8595 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 42 +.delay_slot + 8596 "00011000" // MOVX r17, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8597 "00000001" // /* MW 3 */ + 8598 "00100010" // /* MW 2 */ + 8599 "00010000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 29 +.src_ref 8 "superkernels.cpp" 511 42 + 8600 "00101100" // LDA r16, [p7]; MOVX r13, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8601 "00000010" // /* MW 5 */ + 8602 "00110100" // /* MW 4 */ + 8603 "11010000" // /* MW 3 */ + 8604 "11000010" // /* MW 2 */ + 8605 "11100000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 66 + 8606 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8607 "00111010" // /* MW 3 */ + 8608 "00000100" // /* MW 2 */ + 8609 "00000010" // /* MW 1 */ + 8610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8611 "00000000" // /* MW 1 */ + 8612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8613 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 42 +.no_stack_arguments + 8614 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */ + 8615 "00000001" // /* MW 5 */ + 8616 "00000000" // /* MW 4 */ + 8617 "11111000" // /* MW 3 */ + 8618 "00010011" // /* MW 2 */ + 8619 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8621 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8623 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 42 +.delay_slot + 8624 "10011000" // LT r27, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8625 "00011010" // /* MW 3 */ + 8626 "00110111" // /* MW 2 */ + 8627 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 42 +.src_ref 8 "superkernels.cpp" 511 42 +.delay_slot + 8628 "11100100" // SUB r17, r17, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8629 "01000001" // /* MW 5 */ + 8630 "10111011" // /* MW 4 */ + 8631 "00110111" // /* MW 3 */ + 8632 "01100000" // /* MW 2 */ + 8633 "10001100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 42 +.delay_slot + 8634 "00101100" // NOPA; SEL.EQZ r0, r16, r17, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8635 "00100100" // /* MW 5 */ + 8636 "00000010" // /* MW 4 */ + 8637 "11111000" // /* MW 3 */ + 8638 "00101100" // /* MW 2 */ + 8639 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 42 +.src_ref 8 "superkernels.cpp" 511 42 +.return_address + 8640 "11100100" // SUB r16, r13, r3; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8641 "01000001" // /* MW 5 */ + 8642 "10101111" // /* MW 4 */ + 8643 "00111101" // /* MW 3 */ + 8644 "00000110" // /* MW 2 */ + 8645 "01101100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 42 + 8646 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8647 "00000010" // /* MW 3 */ + 8648 "11100001" // /* MW 2 */ + 8649 "00010000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 6 +.src_ref 8 "superkernels.cpp" 511 77 + 8650 "10000100" // JNZ r16, #8736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8736 delay_slots=5 */ + 8651 "00000001" // /* MW 5 */ + 8652 "01000000" // /* MW 4 */ + 8653 "00010000" // /* MW 3 */ + 8654 "00010001" // /* MW 2 */ + 8655 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8657 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8659 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8661 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8663 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8665 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 516 45 +.src_ref 8 "superkernels.cpp" 522 6 +.src_ref 1 "io_buffer_main.h" 218 49 first +.src_ref 1 "io_buffer_main.h" 395 8 + 8666 "10111010" // LDA r27, [p6], #-4; MOVX r17, #-1; MOV r16, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8667 "01011000" // /* MW 9 */ + 8668 "00000001" // /* MW 8 */ + 8669 "00001000" // /* MW 7 */ + 8670 "11101010" // /* MW 6 */ + 8671 "00010111" // /* MW 5 */ + 8672 "00111111" // /* MW 4 */ + 8673 "11010000" // /* MW 3 */ + 8674 "11101110" // /* MW 2 */ + 8675 "11011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 8676 "10011000" // LDA r18, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8677 "01010110" // /* MW 3 */ + 8678 "11111110" // /* MW 2 */ + 8679 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 8680 "10011000" // LDA r19, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8681 "01110110" // /* MW 3 */ + 8682 "11111110" // /* MW 2 */ + 8683 "00000110" // /* MW 1 */ +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 8684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8685 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first +.aggressive_scheduled_block_id 6 +.noswbrkpt + 8686 "10011000" // LDA r18, [p6, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8687 "01010110" // /* MW 3 */ + 8688 "01000110" // /* MW 2 */ + 8689 "00000110" // /* MW 1 */ +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 8690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8691 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 8692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8693 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 8694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8695 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 8696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8697 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 8698 "00011000" // SEL.EQZ r18, r19, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8699 "00100010" // /* MW 3 */ + 8700 "11100101" // /* MW 2 */ + 8701 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8702 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8703 "01010001" // /* MW 3 */ + 8704 "00000110" // /* MW 2 */ + 8705 "00001110" // /* MW 1 */ + 8706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8707 "00000000" // /* MW 1 */ + 8708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8709 "00000000" // /* MW 1 */ + 8710 "10000100" // J #8784 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8784 delay_slots=5 */ + 8711 "00000000" // /* MW 5 */ + 8712 "00000000" // /* MW 4 */ + 8713 "00101000" // /* MW 3 */ + 8714 "00010001" // /* MW 2 */ + 8715 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8717 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first +.delay_slot + 8718 "00011000" // ACQ r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8719 "00011000" // /* MW 3 */ + 8720 "10010011" // /* MW 2 */ + 8721 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8723 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8725 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8726 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8727 "01111110" // /* MW 9 */ + 8728 "10100101" // /* MW 8 */ + 8729 "00000001" // /* MW 7 */ + 8730 "00000000" // /* MW 6 */ + 8731 "00010000" // /* MW 5 */ + 8732 "00000000" // /* MW 4 */ + 8733 "11110000" // /* MW 3 */ + 8734 "00101100" // /* MW 2 */ + 8735 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1200 + 8736 "10000100" // J #8784 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8784 delay_slots=5 */ + 8737 "00000000" // /* MW 5 */ + 8738 "00000000" // /* MW 4 */ + 8739 "00101000" // /* MW 3 */ + 8740 "00010001" // /* MW 2 */ + 8741 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 516 45 +.src_ref 8 "superkernels.cpp" 522 6 +.delay_slot + 8742 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8743 "00000101" // /* MW 3 */ + 8744 "00100000" // /* MW 2 */ + 8745 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8747 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8749 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8751 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8752 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8753 "00000000" // /* MW 15 */ + 8754 "00000000" // /* MW 14 */ + 8755 "01111000" // /* MW 13 */ + 8756 "10100101" // /* MW 12 */ + 8757 "00000001" // /* MW 11 */ + 8758 "00000000" // /* MW 10 */ + 8759 "00000000" // /* MW 9 */ + 8760 "00000000" // /* MW 8 */ + 8761 "01011011" // /* MW 7 */ + 8762 "00000001" // /* MW 6 */ + 8763 "00100000" // /* MW 5 */ + 8764 "00000000" // /* MW 4 */ + 8765 "11110000" // /* MW 3 */ + 8766 "00101100" // /* MW 2 */ + 8767 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1232 +.src_ref 8 "superkernels.cpp" 516 45 +.src_ref 8 "superkernels.cpp" 522 6 + 8768 "11100001" // NOPA; NOPB; NOPS; MOVX r16, #1; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8769 "00000000" // /* MW 15 */ + 8770 "00000000" // /* MW 14 */ + 8771 "01111000" // /* MW 13 */ + 8772 "10100101" // /* MW 12 */ + 8773 "00000001" // /* MW 11 */ + 8774 "00101000" // /* MW 10 */ + 8775 "00000000" // /* MW 9 */ + 8776 "00000001" // /* MW 8 */ + 8777 "01011011" // /* MW 7 */ + 8778 "00000001" // /* MW 6 */ + 8779 "00100000" // /* MW 5 */ + 8780 "00000000" // /* MW 4 */ + 8781 "11110000" // /* MW 3 */ + 8782 "00101100" // /* MW 2 */ + 8783 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1248 +.src_ref 8 "superkernels.cpp" 516 47 +.src_ref 1 "io_buffer_main.h" 125 25 + 8784 "10111010" // LDA p7, [sp, #-32]; MOVXM p6, #509132 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8785 "00010000" // /* MW 9 */ + 8786 "01100110" // /* MW 8 */ + 8787 "00110010" // /* MW 7 */ + 8788 "11110011" // /* MW 6 */ + 8789 "00000001" // /* MW 5 */ + 8790 "00000000" // /* MW 4 */ + 8791 "00100000" // /* MW 3 */ + 8792 "01110011" // /* MW 2 */ + 8793 "11111100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 516 47 first +.src_ref 8 "superkernels.cpp" 522 6 + 8794 "10111010" // LDA r21, [p6]; MOVXM p2, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8795 "00010000" // /* MW 9 */ + 8796 "01101000" // /* MW 8 */ + 8797 "00110010" // /* MW 7 */ + 8798 "11110001" // /* MW 6 */ + 8799 "00000001" // /* MW 5 */ + 8800 "00000000" // /* MW 4 */ + 8801 "11010000" // /* MW 3 */ + 8802 "11010110" // /* MW 2 */ + 8803 "11000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 514 2 +.src_ref 8 "superkernels.cpp" 522 6 first + 8804 "10111010" // LDA r17, [p2]; MOVXM p6, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8805 "00010000" // /* MW 9 */ + 8806 "01100000" // /* MW 8 */ + 8807 "00110010" // /* MW 7 */ + 8808 "11110011" // /* MW 6 */ + 8809 "00000001" // /* MW 5 */ + 8810 "00000000" // /* MW 4 */ + 8811 "11010000" // /* MW 3 */ + 8812 "11000110" // /* MW 2 */ + 8813 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 514 2 first + 8814 "10011000" // LDA r20, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8815 "10010110" // /* MW 3 */ + 8816 "00000110" // /* MW 2 */ + 8817 "00000110" // /* MW 1 */ + 8818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8819 "00000000" // /* MW 1 */ + 8820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8821 "00000000" // /* MW 1 */ + 8822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8823 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 8824 "10011000" // LDA r19, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8825 "01110110" // /* MW 3 */ + 8826 "00000110" // /* MW 2 */ + 8827 "00000111" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 516 45 first + 8828 "10011000" // LSHL r21, r21, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8829 "00001101" // /* MW 3 */ + 8830 "01101011" // /* MW 2 */ + 8831 "00010101" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 522 6 first + 8832 "10011000" // EQ r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8833 "00000111" // /* MW 3 */ + 8834 "01100001" // /* MW 2 */ + 8835 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 522 6 + 8836 "10000100" // JNZ r16, #9232 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9232 delay_slots=5 */ + 8837 "00000001" // /* MW 5 */ + 8838 "01000000" // /* MW 4 */ + 8839 "00001000" // /* MW 3 */ + 8840 "00010010" // /* MW 2 */ + 8841 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 514 2 first +.delay_slot + 8842 "00011000" // ADD r20, r20, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8843 "00000111" // /* MW 3 */ + 8844 "00101000" // /* MW 2 */ + 8845 "00010101" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 514 2 +.delay_slot + 8846 "10011000" // ST r20, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8847 "10010001" // /* MW 3 */ + 8848 "00000110" // /* MW 2 */ + 8849 "00001110" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8851 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 516 45 first +.delay_slot + 8852 "01011000" // ADD.NC p0, r19, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8853 "11010101" // /* MW 3 */ + 8854 "01101001" // /* MW 2 */ + 8855 "00011000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 516 12 +.src_ref 8 "superkernels.cpp" 522 6 +.delay_slot + 8856 "01011100" // ST p0, [sp, #-68]; MOVX r18, #4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8857 "00100010" // /* MW 5 */ + 8858 "01001000" // /* MW 4 */ + 8859 "10110000" // /* MW 3 */ + 8860 "10000011" // /* MW 2 */ + 8861 "11110111" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 522 6 first + 8862 "10011000" // EQ r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8863 "00100111" // /* MW 3 */ + 8864 "01100001" // /* MW 2 */ + 8865 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 522 6 + 8866 "10000100" // JNZ r16, #9088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9088 delay_slots=5 */ + 8867 "00000001" // /* MW 5 */ + 8868 "01000000" // /* MW 4 */ + 8869 "11000000" // /* MW 3 */ + 8870 "00010001" // /* MW 2 */ + 8871 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8873 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8874 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8875 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8877 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8879 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8881 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 522 6 + 8882 "10011000" // NE r16, r17, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8883 "11101000" // /* MW 3 */ + 8884 "01100000" // /* MW 2 */ + 8885 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 522 6 + 8886 "10000100" // JNZ r16, #9040 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9040 delay_slots=5 */ + 8887 "00000001" // /* MW 5 */ + 8888 "01000000" // /* MW 4 */ + 8889 "10101000" // /* MW 3 */ + 8890 "00010001" // /* MW 2 */ + 8891 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 523 26 +.delay_slot + 8892 "01000100" // MOVXM p6, #509152 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8893 "11000000" // /* MW 5 */ + 8894 "11001001" // /* MW 4 */ + 8895 "11001100" // /* MW 3 */ + 8896 "00000111" // /* MW 2 */ + 8897 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8899 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8901 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8903 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8905 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 523 26 first +.src_ref 8 "superkernels.cpp" 523 61 + 8906 "10111010" // LDA r18, [p6]; MOVXM p6, #509000 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8907 "00010000" // /* MW 9 */ + 8908 "00100100" // /* MW 8 */ + 8909 "00110010" // /* MW 7 */ + 8910 "11110011" // /* MW 6 */ + 8911 "00000001" // /* MW 5 */ + 8912 "00000000" // /* MW 4 */ + 8913 "11010000" // /* MW 3 */ + 8914 "11001010" // /* MW 2 */ + 8915 "11000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 523 61 +.src_ref 8 "superkernels.cpp" 524 44 + 8916 "10111010" // LDA r16, [p6]; MOVXM p6, #509140 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8917 "00010000" // /* MW 9 */ + 8918 "01101010" // /* MW 8 */ + 8919 "00110010" // /* MW 7 */ + 8920 "11110011" // /* MW 6 */ + 8921 "00000001" // /* MW 5 */ + 8922 "00000000" // /* MW 4 */ + 8923 "11010000" // /* MW 3 */ + 8924 "11000010" // /* MW 2 */ + 8925 "11000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 524 30 +.src_ref 8 "superkernels.cpp" 524 44 first + 8926 "00101100" // LDA r17, [p6]; MOVX r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8927 "00000010" // /* MW 5 */ + 8928 "01100000" // /* MW 4 */ + 8929 "11010000" // /* MW 3 */ + 8930 "11000110" // /* MW 2 */ + 8931 "11000000" // /* MW 1 */ + 8932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8933 "00000000" // /* MW 1 */ + 8934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8935 "00000000" // /* MW 1 */ + 8936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8937 "00000000" // /* MW 1 */ + 8938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8939 "00000000" // /* MW 1 */ + 8940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8941 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 523 37 first + 8942 "10011000" // MUL r18, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8943 "00001111" // /* MW 3 */ + 8944 "10100101" // /* MW 2 */ + 8945 "00010100" // /* MW 1 */ + 8946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8947 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 524 30 first +.src_ref 8 "superkernels.cpp" 524 30 first + 8948 "10100100" // SUB r19, r17, r18; ADD.NC r20, r18, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8949 "10000010" // /* MW 5 */ + 8950 "00110010" // /* MW 4 */ + 8951 "00111010" // /* MW 3 */ + 8952 "11100100" // /* MW 2 */ + 8953 "10001100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 524 30 + 8954 "10011000" // LTU r27, r20, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8955 "00011100" // /* MW 3 */ + 8956 "00110111" // /* MW 2 */ + 8957 "00010101" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 524 30 + 8958 "00011000" // SEL.EQZ r19, r19, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8959 "00000010" // /* MW 3 */ + 8960 "11100111" // /* MW 2 */ + 8961 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 524 42 + 8962 "10011000" // LTU r27, r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8963 "00011100" // /* MW 3 */ + 8964 "10110111" // /* MW 2 */ + 8965 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 524 30 + 8966 "00011000" // SEL.EQZ r17, r24, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8967 "00110010" // /* MW 3 */ + 8968 "00100011" // /* MW 2 */ + 8969 "00010110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 525 65 first + 8970 "10011000" // SUB r18, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8971 "00010001" // /* MW 3 */ + 8972 "00100101" // /* MW 2 */ + 8973 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 526 36 first + 8974 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8975 "00001000" // /* MW 3 */ + 8976 "01100001" // /* MW 2 */ + 8977 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 547 6 first + 8978 "10000100" // JNZ r16, #9344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9344 delay_slots=5 */ + 8979 "00000001" // /* MW 5 */ + 8980 "01000000" // /* MW 4 */ + 8981 "01000000" // /* MW 3 */ + 8982 "00010010" // /* MW 2 */ + 8983 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 525 32 +.delay_slot + 8984 "01000100" // MOVXM p6, #509200 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8985 "00100000" // /* MW 5 */ + 8986 "11001010" // /* MW 4 */ + 8987 "11001100" // /* MW 3 */ + 8988 "00000111" // /* MW 2 */ + 8989 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 525 32 first +.delay_slot + 8990 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8991 "01010001" // /* MW 3 */ + 8992 "00000110" // /* MW 2 */ + 8993 "00001110" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8995 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8997 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8999 "00000000" // /* MW 1 */ + 9000 "10000100" // J #9200 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9200 delay_slots=5 */ + 9001 "00000000" // /* MW 5 */ + 9002 "00000000" // /* MW 4 */ + 9003 "11111000" // /* MW 3 */ + 9004 "00010001" // /* MW 2 */ + 9005 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 7 +.src_ref 8 "superkernels.cpp" 558 19 +.delay_slot + 9006 "10111010" // MOVA r14, #2; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9007 "00010000" // /* MW 9 */ + 9008 "01101000" // /* MW 8 */ + 9009 "10110010" // /* MW 7 */ + 9010 "11110011" // /* MW 6 */ + 9011 "00000001" // /* MW 5 */ + 9012 "00000000" // /* MW 4 */ + 9013 "00000000" // /* MW 3 */ + 9014 "01001110" // /* MW 2 */ + 9015 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 552 2 +.src_ref 8 "superkernels.cpp" 554 19 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 +.delay_slot + 9016 "10111010" // MOVA r15, #1; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9017 "00010000" // /* MW 9 */ + 9018 "00100000" // /* MW 8 */ + 9019 "00110010" // /* MW 7 */ + 9020 "11110001" // /* MW 6 */ + 9021 "00000001" // /* MW 5 */ + 9022 "00000000" // /* MW 4 */ + 9023 "00000000" // /* MW 3 */ + 9024 "00101111" // /* MW 2 */ + 9025 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 43 +.src_ref 8 "superkernels.cpp" 555 15 +.src_ref 8 "superkernels.cpp" 558 43 +.src_ref 8 "superkernels.cpp" 559 15 +.src_ref 8 "superkernels.cpp" 562 44 +.src_ref 8 "superkernels.cpp" 563 16 +.src_ref 8 "superkernels.cpp" 567 14 +.delay_slot + 9026 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9027 "00000001" // /* MW 3 */ + 9028 "00011010" // /* MW 2 */ + 9029 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9031 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9032 "00100010" // NOPA; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9033 "00011100" // /* MW 7 */ + 9034 "00000000" // /* MW 6 */ + 9035 "00000000" // /* MW 5 */ + 9036 "00000100" // /* MW 4 */ + 9037 "11110000" // /* MW 3 */ + 9038 "00101100" // /* MW 2 */ + 9039 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1504 + 9040 "10000100" // J #9200 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9200 delay_slots=5 */ + 9041 "00000000" // /* MW 5 */ + 9042 "00000000" // /* MW 4 */ + 9043 "11111000" // /* MW 3 */ + 9044 "00010001" // /* MW 2 */ + 9045 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 7 +.src_ref 8 "superkernels.cpp" 558 19 +.delay_slot + 9046 "10111010" // MOVA r14, #2; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9047 "00010000" // /* MW 9 */ + 9048 "01101000" // /* MW 8 */ + 9049 "10110010" // /* MW 7 */ + 9050 "11110011" // /* MW 6 */ + 9051 "00000001" // /* MW 5 */ + 9052 "00000000" // /* MW 4 */ + 9053 "00000000" // /* MW 3 */ + 9054 "01001110" // /* MW 2 */ + 9055 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 552 2 +.src_ref 8 "superkernels.cpp" 554 19 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 +.delay_slot + 9056 "10111010" // MOVA r15, #1; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9057 "00010000" // /* MW 9 */ + 9058 "00100000" // /* MW 8 */ + 9059 "00110010" // /* MW 7 */ + 9060 "11110001" // /* MW 6 */ + 9061 "00000001" // /* MW 5 */ + 9062 "00000000" // /* MW 4 */ + 9063 "00000000" // /* MW 3 */ + 9064 "00101111" // /* MW 2 */ + 9065 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 43 +.src_ref 8 "superkernels.cpp" 555 15 +.src_ref 8 "superkernels.cpp" 558 43 +.src_ref 8 "superkernels.cpp" 559 15 +.src_ref 8 "superkernels.cpp" 562 44 +.src_ref 8 "superkernels.cpp" 563 16 +.src_ref 8 "superkernels.cpp" 567 14 +.delay_slot + 9066 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9067 "00000001" // /* MW 3 */ + 9068 "00011010" // /* MW 2 */ + 9069 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9071 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9072 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9073 "00000000" // /* MW 15 */ + 9074 "00000000" // /* MW 14 */ + 9075 "01111000" // /* MW 13 */ + 9076 "10100101" // /* MW 12 */ + 9077 "00000001" // /* MW 11 */ + 9078 "00000000" // /* MW 10 */ + 9079 "00000000" // /* MW 9 */ + 9080 "00000000" // /* MW 8 */ + 9081 "01011011" // /* MW 7 */ + 9082 "00000001" // /* MW 6 */ + 9083 "00100000" // /* MW 5 */ + 9084 "00000000" // /* MW 4 */ + 9085 "11110000" // /* MW 3 */ + 9086 "00101100" // /* MW 2 */ + 9087 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1552 +.src_ref 8 "superkernels.cpp" 532 27 +.src_ref 8 "superkernels.cpp" 533 31 +.src_ref 8 "superkernels.cpp" 554 43 +.src_ref 8 "superkernels.cpp" 555 15 +.src_ref 8 "superkernels.cpp" 558 43 +.src_ref 8 "superkernels.cpp" 559 15 +.src_ref 8 "superkernels.cpp" 562 44 +.src_ref 8 "superkernels.cpp" 563 16 +.src_ref 8 "superkernels.cpp" 567 14 + 9088 "10111010" // MOVA r13, #0; MOVXM p6, #509156 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9089 "00010000" // /* MW 9 */ + 9090 "01110010" // /* MW 8 */ + 9091 "00110010" // /* MW 7 */ + 9092 "11110011" // /* MW 6 */ + 9093 "00000001" // /* MW 5 */ + 9094 "00000000" // /* MW 4 */ + 9095 "00000000" // /* MW 3 */ + 9096 "00001101" // /* MW 2 */ + 9097 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 532 27 first +.src_ref 8 "superkernels.cpp" 532 63 +.src_ref 8 "superkernels.cpp" 552 2 + 9098 "10111010" // LDA r18, [p6]; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9099 "00010000" // /* MW 9 */ + 9100 "00100000" // /* MW 8 */ + 9101 "00110010" // /* MW 7 */ + 9102 "11110001" // /* MW 6 */ + 9103 "00000001" // /* MW 5 */ + 9104 "00000000" // /* MW 4 */ + 9105 "11010000" // /* MW 3 */ + 9106 "11001010" // /* MW 2 */ + 9107 "11000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 532 63 +.src_ref 8 "superkernels.cpp" 533 46 + 9108 "10111010" // LDA r16, [p2]; MOVXM p6, #509144 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9109 "00010000" // /* MW 9 */ + 9110 "01101100" // /* MW 8 */ + 9111 "00110010" // /* MW 7 */ + 9112 "11110011" // /* MW 6 */ + 9113 "00000001" // /* MW 5 */ + 9114 "00000000" // /* MW 4 */ + 9115 "11010000" // /* MW 3 */ + 9116 "11000010" // /* MW 2 */ + 9117 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 533 46 first +.src_ref 8 "superkernels.cpp" 554 19 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 + 9118 "00101100" // LDA r17, [p6]; MOVX r15, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9119 "00001010" // /* MW 5 */ + 9120 "00111100" // /* MW 4 */ + 9121 "11010000" // /* MW 3 */ + 9122 "11000110" // /* MW 2 */ + 9123 "11000000" // /* MW 1 */ + 9124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9125 "00000000" // /* MW 1 */ + 9126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9127 "00000000" // /* MW 1 */ + 9128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9129 "00000000" // /* MW 1 */ + 9130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9131 "00000000" // /* MW 1 */ + 9132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9133 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 532 39 first + 9134 "10011000" // MUL r18, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9135 "00001111" // /* MW 3 */ + 9136 "10100101" // /* MW 2 */ + 9137 "00010100" // /* MW 1 */ + 9138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9139 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 533 31 first +.src_ref 8 "superkernels.cpp" 533 31 first + 9140 "10100100" // SUB r19, r17, r18; ADD.NC r20, r18, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9141 "10000010" // /* MW 5 */ + 9142 "00110010" // /* MW 4 */ + 9143 "00111010" // /* MW 3 */ + 9144 "11100100" // /* MW 2 */ + 9145 "10001100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 533 31 + 9146 "10011000" // LTU r27, r20, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9147 "00011100" // /* MW 3 */ + 9148 "00110111" // /* MW 2 */ + 9149 "00010101" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 533 31 + 9150 "00011000" // SEL.EQZ r19, r19, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9151 "00000010" // /* MW 3 */ + 9152 "11100111" // /* MW 2 */ + 9153 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 533 44 + 9154 "10011000" // LTU r27, r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9155 "00011100" // /* MW 3 */ + 9156 "10110111" // /* MW 2 */ + 9157 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 533 31 + 9158 "00011000" // SEL.EQZ r17, r13, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9159 "00110010" // /* MW 3 */ + 9160 "01100011" // /* MW 2 */ + 9161 "00010011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 534 67 first + 9162 "10011000" // SUB r18, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9163 "00010001" // /* MW 3 */ + 9164 "00100101" // /* MW 2 */ + 9165 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 535 37 first + 9166 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9167 "00001000" // /* MW 3 */ + 9168 "01100001" // /* MW 2 */ + 9169 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 547 6 first + 9170 "10000100" // JNZ r16, #9344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9344 delay_slots=5 */ + 9171 "00000001" // /* MW 5 */ + 9172 "01000000" // /* MW 4 */ + 9173 "01000000" // /* MW 3 */ + 9174 "00010010" // /* MW 2 */ + 9175 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 534 33 +.delay_slot + 9176 "01000100" // MOVXM p6, #509208 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9177 "00110000" // /* MW 5 */ + 9178 "11001010" // /* MW 4 */ + 9179 "11001100" // /* MW 3 */ + 9180 "00000111" // /* MW 2 */ + 9181 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 534 33 first +.delay_slot + 9182 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9183 "01010001" // /* MW 3 */ + 9184 "00000110" // /* MW 2 */ + 9185 "00001110" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9187 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9189 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 7 +.delay_slot + 9190 "10111010" // NOPA; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9191 "00010000" // /* MW 9 */ + 9192 "01101000" // /* MW 8 */ + 9193 "10110010" // /* MW 7 */ + 9194 "11110011" // /* MW 6 */ + 9195 "00000001" // /* MW 5 */ + 9196 "00000000" // /* MW 4 */ + 9197 "11110000" // /* MW 3 */ + 9198 "00101100" // /* MW 2 */ + 9199 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1664 +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 1 "io_buffer_main.h" 324 51 + 9200 "00111010" // MOVS p6, r12; J #9408 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=9408 delay_slots=5 */ + 9201 "00100001" // /* MW 9 */ + 9202 "00000000" // /* MW 8 */ + 9203 "00000000" // /* MW 7 */ + 9204 "10011000" // /* MW 6 */ + 9205 "00000100" // /* MW 5 */ + 9206 "00000000" // /* MW 4 */ + 9207 "01100000" // /* MW 3 */ + 9208 "10000001" // /* MW 2 */ + 9209 "11010001" // /* MW 1 */ +.delay_slot + 9210 "00011000" // LDA r12, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9211 "10010001" // /* MW 3 */ + 9212 "11100101" // /* MW 2 */ + 9213 "00000111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9215 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9216 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9217 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9219 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9220 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9221 "10000001" // /* MW 11 */ + 9222 "10101101" // /* MW 10 */ + 9223 "00000000" // /* MW 9 */ + 9224 "00000000" // /* MW 8 */ + 9225 "00000000" // /* MW 7 */ + 9226 "00000000" // /* MW 6 */ + 9227 "00100000" // /* MW 5 */ + 9228 "00000000" // /* MW 4 */ + 9229 "11110000" // /* MW 3 */ + 9230 "00101100" // /* MW 2 */ + 9231 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1696 +.src_ref 8 "superkernels.cpp" 541 26 + 9232 "01000100" // MOVXM p6, #509124 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9233 "10001000" // /* MW 5 */ + 9234 "11001001" // /* MW 4 */ + 9235 "11001100" // /* MW 3 */ + 9236 "00000111" // /* MW 2 */ + 9237 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 541 26 first +.src_ref 8 "superkernels.cpp" 541 61 + 9238 "10111010" // LDA r19, [p6]; MOVXM p6, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9239 "00010000" // /* MW 9 */ + 9240 "00100010" // /* MW 8 */ + 9241 "00110010" // /* MW 7 */ + 9242 "11110011" // /* MW 6 */ + 9243 "00000001" // /* MW 5 */ + 9244 "00000000" // /* MW 4 */ + 9245 "11010000" // /* MW 3 */ + 9246 "11001110" // /* MW 2 */ + 9247 "11000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 541 61 +.src_ref 8 "superkernels.cpp" 542 44 + 9248 "10111010" // LDA r16, [p6]; MOVXM p6, #509148 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9249 "00010000" // /* MW 9 */ + 9250 "01101110" // /* MW 8 */ + 9251 "00110010" // /* MW 7 */ + 9252 "11110011" // /* MW 6 */ + 9253 "00000001" // /* MW 5 */ + 9254 "00000000" // /* MW 4 */ + 9255 "11010000" // /* MW 3 */ + 9256 "11000010" // /* MW 2 */ + 9257 "11000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 542 44 first + 9258 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9259 "01010110" // /* MW 3 */ + 9260 "00000110" // /* MW 2 */ + 9261 "00000110" // /* MW 1 */ + 9262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9263 "00000000" // /* MW 1 */ + 9264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9265 "00000000" // /* MW 1 */ + 9266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9267 "00000000" // /* MW 1 */ + 9268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9269 "00000000" // /* MW 1 */ + 9270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9271 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 541 37 first + 9272 "10011000" // MUL r19, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9273 "00001111" // /* MW 3 */ + 9274 "11100111" // /* MW 2 */ + 9275 "00010100" // /* MW 1 */ + 9276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9277 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 542 30 first +.src_ref 8 "superkernels.cpp" 542 30 first + 9278 "10100100" // SUB r20, r18, r19; ADD.NC r21, r19, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9279 "10000010" // /* MW 5 */ + 9280 "10110011" // /* MW 4 */ + 9281 "00111010" // /* MW 3 */ + 9282 "00100110" // /* MW 2 */ + 9283 "10010101" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 542 30 + 9284 "10011000" // LTU r27, r21, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9285 "00101100" // /* MW 3 */ + 9286 "01110111" // /* MW 2 */ + 9287 "00010101" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 542 30 + 9288 "00011000" // SEL.EQZ r20, r20, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9289 "00000010" // /* MW 3 */ + 9290 "00101001" // /* MW 2 */ + 9291 "00010101" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 542 30 +.src_ref 8 "superkernels.cpp" 542 42 + 9292 "01100100" // LTU r27, r19, r18; MOV r17, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9293 "00000001" // /* MW 5 */ + 9294 "10100000" // /* MW 4 */ + 9295 "10011000" // /* MW 3 */ + 9296 "11100101" // /* MW 2 */ + 9297 "10011110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 542 30 + 9298 "00011000" // SEL.EQZ r17, r17, r20, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9299 "01000010" // /* MW 3 */ + 9300 "01100011" // /* MW 2 */ + 9301 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 543 69 first + 9302 "10011000" // SUB r18, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9303 "00010001" // /* MW 3 */ + 9304 "00100101" // /* MW 2 */ + 9305 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 544 38 first + 9306 "10011000" // EQ r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9307 "00000111" // /* MW 3 */ + 9308 "01100001" // /* MW 2 */ + 9309 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 547 6 first + 9310 "10000100" // JNZ r16, #10176 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10176 delay_slots=5 */ + 9311 "00000001" // /* MW 5 */ + 9312 "01000000" // /* MW 4 */ + 9313 "11100000" // /* MW 3 */ + 9314 "00010011" // /* MW 2 */ + 9315 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 543 34 +.delay_slot + 9316 "01000100" // MOVXM p6, #509216 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9317 "01000000" // /* MW 5 */ + 9318 "11001010" // /* MW 4 */ + 9319 "11001100" // /* MW 3 */ + 9320 "00000111" // /* MW 2 */ + 9321 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 543 34 first +.delay_slot + 9322 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9323 "01010001" // /* MW 3 */ + 9324 "00000110" // /* MW 2 */ + 9325 "00001110" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9327 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9329 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 9330 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 9331 "00011100" // /* MW 13 */ + 9332 "00000000" // /* MW 12 */ + 9333 "00000000" // /* MW 11 */ + 9334 "01010111" // /* MW 10 */ + 9335 "00011010" // /* MW 9 */ + 9336 "01000000" // /* MW 8 */ + 9337 "00000000" // /* MW 7 */ + 9338 "00000000" // /* MW 6 */ + 9339 "10110110" // /* MW 5 */ + 9340 "00000010" // /* MW 4 */ + 9341 "11110000" // /* MW 3 */ + 9342 "00101100" // /* MW 2 */ + 9343 "00000000" // /* MW 1 */ +.label __ll65__Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 8 "superkernels.cpp" 554 19 +.src_ref 8 "superkernels.cpp" 558 19 +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 1 "io_buffer_main.h" 324 51 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 +.aggressive_scheduled_block_id 7 +.noswbrkpt + 9344 "01110110" // LDA p0, [sp, #-68]; MOVS p6, r12; MOVX r14, #2; MOV r15, #1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9345 "01011000" // /* MW 11 */ + 9346 "00000001" // /* MW 10 */ + 9347 "11101000" // /* MW 9 */ + 9348 "01001001" // /* MW 8 */ + 9349 "11100000" // /* MW 7 */ + 9350 "00000000" // /* MW 6 */ + 9351 "00001011" // /* MW 5 */ + 9352 "10001100" // /* MW 4 */ + 9353 "00100110" // /* MW 3 */ + 9354 "10000011" // /* MW 2 */ + 9355 "11110111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 9356 "00011000" // LDA p1, [sp, #-68] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9357 "10011001" // /* MW 3 */ + 9358 "10111100" // /* MW 2 */ + 9359 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 9360 "00011000" // LDA r12, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9361 "10010001" // /* MW 3 */ + 9362 "11100101" // /* MW 2 */ + 9363 "00000111" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 287 11 first +.aggressive_scheduled_block_id 7 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 9364 "00000100" // JL #4176 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4176 delay_slots=5 */ + 9365 "00000001" // /* MW 5 */ + 9366 "00000000" // /* MW 4 */ + 9367 "00101000" // /* MW 3 */ + 9368 "00001000" // /* MW 2 */ + 9369 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 552 2 +.delay_slot +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9370 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9371 "11000000" // /* MW 3 */ + 9372 "01100000" // /* MW 2 */ + 9373 "00011111" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 43 +.src_ref 8 "superkernels.cpp" 555 15 +.src_ref 8 "superkernels.cpp" 558 43 +.src_ref 8 "superkernels.cpp" 559 15 +.src_ref 8 "superkernels.cpp" 562 44 +.src_ref 8 "superkernels.cpp" 563 16 +.src_ref 8 "superkernels.cpp" 567 14 +.delay_slot + 9374 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9375 "00000001" // /* MW 3 */ + 9376 "00011010" // /* MW 2 */ + 9377 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9379 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9381 "00000000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 287 11 +.delay_slot + 9382 "10111010" // NOPA; MOVXM p2, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9383 "00010000" // /* MW 9 */ + 9384 "10000000" // /* MW 8 */ + 9385 "00110010" // /* MW 7 */ + 9386 "11110001" // /* MW 6 */ + 9387 "00000001" // /* MW 5 */ + 9388 "00000000" // /* MW 4 */ + 9389 "11110000" // /* MW 3 */ + 9390 "00101100" // /* MW 2 */ + 9391 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 552 2 +.src_ref 8 "superkernels.cpp" 552 2 +.return_address + 9392 "00111010" // MOVS p0, p7; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9393 "00010001" // /* MW 9 */ + 9394 "00100000" // /* MW 8 */ + 9395 "00110010" // /* MW 7 */ + 9396 "11110001" // /* MW 6 */ + 9397 "00000001" // /* MW 5 */ + 9398 "00000000" // /* MW 4 */ + 9399 "01100000" // /* MW 3 */ + 9400 "10010001" // /* MW 2 */ + 9401 "00010011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 7 + 9402 "01000100" // MOVXM p7, #509136 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9403 "10100000" // /* MW 5 */ + 9404 "11001001" // /* MW 4 */ + 9405 "11001110" // /* MW 3 */ + 9406 "00000111" // /* MW 2 */ + 9407 "00000000" // /* MW 1 */ +.label __ll95__Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 1 "io_buffer_main.h" 324 51 first + 9408 "10011000" // LDA p1, [p6], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9409 "10011110" // /* MW 3 */ + 9410 "01011100" // /* MW 2 */ + 9411 "00000110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 552 2 first +.no_stack_arguments + 9412 "00000100" // JL #4848 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4848 delay_slots=5 */ + 9413 "00000001" // /* MW 5 */ + 9414 "00000000" // /* MW 4 */ + 9415 "01111000" // /* MW 3 */ + 9416 "00001001" // /* MW 2 */ + 9417 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9419 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9421 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9423 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9425 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9426 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 9427 "00011100" // /* MW 13 */ + 9428 "00000000" // /* MW 12 */ + 9429 "00000000" // /* MW 11 */ + 9430 "01010111" // /* MW 10 */ + 9431 "00011010" // /* MW 9 */ + 9432 "01000000" // /* MW 8 */ + 9433 "00000000" // /* MW 7 */ + 9434 "00000000" // /* MW 6 */ + 9435 "10110110" // /* MW 5 */ + 9436 "00000010" // /* MW 4 */ + 9437 "11110000" // /* MW 3 */ + 9438 "00101100" // /* MW 2 */ + 9439 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 7 first +.return_address + 9440 "10011000" // LDA r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9441 "00010110" // /* MW 3 */ + 9442 "00000110" // /* MW 2 */ + 9443 "00000111" // /* MW 1 */ + 9444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9445 "00000000" // /* MW 1 */ + 9446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9447 "00000000" // /* MW 1 */ + 9448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9449 "00000000" // /* MW 1 */ + 9450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9451 "00000000" // /* MW 1 */ + 9452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9453 "00000000" // /* MW 1 */ + 9454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9455 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 19 + 9456 "10011000" // NE r17, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9457 "00001000" // /* MW 3 */ + 9458 "11100011" // /* MW 2 */ + 9459 "00010011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 25 + 9460 "10000100" // JNZ r17, #9664 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9664 delay_slots=5 */ + 9461 "00000001" // /* MW 5 */ + 9462 "01000000" // /* MW 4 */ + 9463 "11100000" // /* MW 3 */ + 9464 "00010010" // /* MW 2 */ + 9465 "10001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9467 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9469 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9471 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9473 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9475 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 29 +.src_ref 8 "superkernels.cpp" 555 15 + 9476 "01000100" // MOVXM p7, #509124 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9477 "10001000" // /* MW 5 */ + 9478 "11001001" // /* MW 4 */ + 9479 "11001110" // /* MW 3 */ + 9480 "00000111" // /* MW 2 */ + 9481 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 29 +.src_ref 8 "superkernels.cpp" 554 67 + 9482 "10111010" // LDA r16, [p7]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9483 "00010000" // /* MW 9 */ + 9484 "00110000" // /* MW 8 */ + 9485 "00110010" // /* MW 7 */ + 9486 "11110001" // /* MW 6 */ + 9487 "00000001" // /* MW 5 */ + 9488 "00000000" // /* MW 4 */ + 9489 "11010000" // /* MW 3 */ + 9490 "11000010" // /* MW 2 */ + 9491 "11100000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 67 + 9492 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9493 "00111010" // /* MW 3 */ + 9494 "00000100" // /* MW 2 */ + 9495 "00000010" // /* MW 1 */ + 9496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9497 "00000000" // /* MW 1 */ + 9498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9499 "00000000" // /* MW 1 */ + 9500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9501 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 43 +.no_stack_arguments + 9502 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */ + 9503 "00000001" // /* MW 5 */ + 9504 "00000000" // /* MW 4 */ + 9505 "11111000" // /* MW 3 */ + 9506 "00010011" // /* MW 2 */ + 9507 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9509 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 29 +.delay_slot + 9510 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9511 "00000111" // /* MW 3 */ + 9512 "00100000" // /* MW 2 */ + 9513 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 29 +.src_ref 8 "superkernels.cpp" 554 43 +.delay_slot + 9514 "01011100" // ST r16, [p7]; LT r27, r16, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9515 "10110101" // /* MW 5 */ + 9516 "01101101" // /* MW 4 */ + 9517 "00111000" // /* MW 3 */ + 9518 "11000010" // /* MW 2 */ + 9519 "11100000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 43 +.src_ref 8 "superkernels.cpp" 554 43 +.delay_slot + 9520 "11100100" // SUB r17, r13, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9521 "01000001" // /* MW 5 */ + 9522 "10111011" // /* MW 4 */ + 9523 "00110111" // /* MW 3 */ + 9524 "01100000" // /* MW 2 */ + 9525 "01101100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 43 +.delay_slot + 9526 "01111010" // NOPA; NOPS; SEL.EQZ r0, r16, r17, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9527 "00010010" // /* MW 9 */ + 9528 "00000001" // /* MW 8 */ + 9529 "00000100" // /* MW 7 */ + 9530 "00000000" // /* MW 6 */ + 9531 "01011011" // /* MW 5 */ + 9532 "00000001" // /* MW 4 */ + 9533 "11110000" // /* MW 3 */ + 9534 "00101100" // /* MW 2 */ + 9535 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 43 +.src_ref 8 "superkernels.cpp" 554 43 +.src_ref 1 "io_buffer_main.h" 324 51 +.return_address + 9536 "10111010" // LDA p2, [sp, #-36]; SUB r16, r13, r3; MOV r27, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9537 "01111000" // /* MW 9 */ + 9538 "11010000" // /* MW 8 */ + 9539 "01101011" // /* MW 7 */ + 9540 "10001111" // /* MW 6 */ + 9541 "00000001" // /* MW 5 */ + 9542 "00011011" // /* MW 4 */ + 9543 "00100000" // /* MW 3 */ + 9544 "10100011" // /* MW 2 */ + 9545 "11111011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 43 + 9546 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9547 "00000010" // /* MW 3 */ + 9548 "11100001" // /* MW 2 */ + 9549 "00010000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 6 +.src_ref 8 "superkernels.cpp" 554 78 + 9550 "10000100" // JNZ r16, #9632 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9632 delay_slots=5 */ + 9551 "00000001" // /* MW 5 */ + 9552 "01000000" // /* MW 4 */ + 9553 "11010000" // /* MW 3 */ + 9554 "00010010" // /* MW 2 */ + 9555 "10000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 +.delay_slot + 9556 "00011000" // MOVX r15, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9557 "00000101" // /* MW 3 */ + 9558 "00011110" // /* MW 2 */ + 9559 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9561 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9563 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9565 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9567 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 555 15 first +.src_ref 1 "io_buffer_main.h" 324 51 first + 9568 "00001100" // LDA r16, [p2, #20]; ST r13, [p7] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9569 "01100011" // /* MW 5 */ + 9570 "00001011" // /* MW 4 */ + 9571 "11011110" // /* MW 3 */ + 9572 "11000010" // /* MW 2 */ + 9573 "01001010" // /* MW 1 */ + 9574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9575 "00000000" // /* MW 1 */ + 9576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9577 "00000000" // /* MW 1 */ + 9578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9579 "00000000" // /* MW 1 */ + 9580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9581 "00000000" // /* MW 1 */ + 9582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9583 "00000000" // /* MW 1 */ + 9584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9585 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 9586 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9587 "11111000" // /* MW 3 */ + 9588 "00010000" // /* MW 2 */ + 9589 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 7 +.src_ref 1 "io_buffer_main.h" 327 40 first + 9590 "10111010" // LDA r16, [p6, #-8]; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9591 "00010000" // /* MW 9 */ + 9592 "01101000" // /* MW 8 */ + 9593 "10110010" // /* MW 7 */ + 9594 "11110011" // /* MW 6 */ + 9595 "00000001" // /* MW 5 */ + 9596 "00000000" // /* MW 4 */ + 9597 "11010000" // /* MW 3 */ + 9598 "11000010" // /* MW 2 */ + 9599 "11011100" // /* MW 1 */ + 9600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9601 "00000000" // /* MW 1 */ + 9602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9603 "00000000" // /* MW 1 */ + 9604 "10000100" // J #9648 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9648 delay_slots=5 */ + 9605 "00000000" // /* MW 5 */ + 9606 "00000000" // /* MW 4 */ + 9607 "11011000" // /* MW 3 */ + 9608 "00010010" // /* MW 2 */ + 9609 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9611 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9613 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9615 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 +.delay_slot + 9616 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9617 "00000001" // /* MW 3 */ + 9618 "11100001" // /* MW 2 */ + 9619 "00010011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 +.delay_slot + 9620 "00110110" // NOPA; NOPB; ST r16, [p6, #-8]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9621 "11000001" // /* MW 11 */ + 9622 "00001000" // /* MW 10 */ + 9623 "01110011" // /* MW 9 */ + 9624 "00000011" // /* MW 8 */ + 9625 "00000000" // /* MW 7 */ + 9626 "00000000" // /* MW 6 */ + 9627 "00100000" // /* MW 5 */ + 9628 "00000000" // /* MW 4 */ + 9629 "11110000" // /* MW 3 */ + 9630 "00101100" // /* MW 2 */ + 9631 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2096 +.src_ref 8 "superkernels.cpp" 558 7 + 9632 "11100001" // NOPA; NOPB; NOPS; MOVXM p7, #509136; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9633 "00000000" // /* MW 15 */ + 9634 "00000000" // /* MW 14 */ + 9635 "00010000" // /* MW 13 */ + 9636 "01101000" // /* MW 12 */ + 9637 "10110010" // /* MW 11 */ + 9638 "11110011" // /* MW 10 */ + 9639 "00000001" // /* MW 9 */ + 9640 "00000000" // /* MW 8 */ + 9641 "01011011" // /* MW 7 */ + 9642 "00000001" // /* MW 6 */ + 9643 "00100000" // /* MW 5 */ + 9644 "00000000" // /* MW 4 */ + 9645 "11110000" // /* MW 3 */ + 9646 "00101100" // /* MW 2 */ + 9647 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2112 +.src_ref 8 "superkernels.cpp" 558 7 first + 9648 "11100001" // LDA r16, [p7]; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9649 "00000000" // /* MW 15 */ + 9650 "00000000" // /* MW 14 */ + 9651 "01111000" // /* MW 13 */ + 9652 "10100101" // /* MW 12 */ + 9653 "00000001" // /* MW 11 */ + 9654 "00000000" // /* MW 10 */ + 9655 "00000000" // /* MW 9 */ + 9656 "00000000" // /* MW 8 */ + 9657 "01011011" // /* MW 7 */ + 9658 "00000001" // /* MW 6 */ + 9659 "00100000" // /* MW 5 */ + 9660 "00000000" // /* MW 4 */ + 9661 "11010000" // /* MW 3 */ + 9662 "11000010" // /* MW 2 */ + 9663 "11100000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2128 +.src_ref 8 "superkernels.cpp" 558 43 + 9664 "00011000" // MOVX r17, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9665 "00000001" // /* MW 3 */ + 9666 "00100010" // /* MW 2 */ + 9667 "00010000" // /* MW 1 */ + 9668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9669 "00000000" // /* MW 1 */ + 9670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9671 "00000000" // /* MW 1 */ + 9672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9673 "00000000" // /* MW 1 */ + 9674 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9675 "00000000" // /* MW 1 */ + 9676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9677 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 19 + 9678 "10011000" // NE r16, r14, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9679 "00001000" // /* MW 3 */ + 9680 "10100001" // /* MW 2 */ + 9681 "00010011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 25 + 9682 "10000100" // JNZ r16, #9872 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9872 delay_slots=5 */ + 9683 "00000001" // /* MW 5 */ + 9684 "01000000" // /* MW 4 */ + 9685 "01001000" // /* MW 3 */ + 9686 "00010011" // /* MW 2 */ + 9687 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 29 +.src_ref 8 "superkernels.cpp" 559 15 +.delay_slot + 9688 "01000100" // MOVXM p7, #509152 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9689 "11000000" // /* MW 5 */ + 9690 "11001001" // /* MW 4 */ + 9691 "11001110" // /* MW 3 */ + 9692 "00000111" // /* MW 2 */ + 9693 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 67 +.delay_slot + 9694 "01000100" // MOVXM p2, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9695 "11000000" // /* MW 5 */ + 9696 "11001000" // /* MW 4 */ + 9697 "11000100" // /* MW 3 */ + 9698 "00000111" // /* MW 2 */ + 9699 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9701 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9703 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9704 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9705 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 29 + 9706 "10011000" // LDA r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9707 "00010110" // /* MW 3 */ + 9708 "00000110" // /* MW 2 */ + 9709 "00000111" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 67 + 9710 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9711 "00111010" // /* MW 3 */ + 9712 "00000100" // /* MW 2 */ + 9713 "00000010" // /* MW 1 */ + 9714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9715 "00000000" // /* MW 1 */ + 9716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9717 "00000000" // /* MW 1 */ + 9718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9719 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 43 +.no_stack_arguments + 9720 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */ + 9721 "00000001" // /* MW 5 */ + 9722 "00000000" // /* MW 4 */ + 9723 "11111000" // /* MW 3 */ + 9724 "00010011" // /* MW 2 */ + 9725 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9727 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 29 +.delay_slot + 9728 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9729 "00000111" // /* MW 3 */ + 9730 "00100000" // /* MW 2 */ + 9731 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 29 +.src_ref 8 "superkernels.cpp" 558 43 +.delay_slot + 9732 "01011100" // ST r16, [p7]; LT r27, r16, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9733 "00110101" // /* MW 5 */ + 9734 "01101110" // /* MW 4 */ + 9735 "00111000" // /* MW 3 */ + 9736 "11000010" // /* MW 2 */ + 9737 "11100000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 43 +.src_ref 8 "superkernels.cpp" 558 43 +.delay_slot + 9738 "11100100" // SUB r17, r17, r16; MOV r14, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9739 "01000001" // /* MW 5 */ + 9740 "00111011" // /* MW 4 */ + 9741 "00110111" // /* MW 3 */ + 9742 "01100000" // /* MW 2 */ + 9743 "10001100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 43 +.delay_slot + 9744 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r0, r16, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9745 "00000000" // /* MW 15 */ + 9746 "00000000" // /* MW 14 */ + 9747 "01111000" // /* MW 13 */ + 9748 "10100101" // /* MW 12 */ + 9749 "00000001" // /* MW 11 */ + 9750 "10010000" // /* MW 10 */ + 9751 "00001000" // /* MW 9 */ + 9752 "00100000" // /* MW 8 */ + 9753 "01011011" // /* MW 7 */ + 9754 "00000001" // /* MW 6 */ + 9755 "00100000" // /* MW 5 */ + 9756 "00000000" // /* MW 4 */ + 9757 "11110000" // /* MW 3 */ + 9758 "00101100" // /* MW 2 */ + 9759 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 43 +.src_ref 8 "superkernels.cpp" 558 43 +.src_ref 1 "io_buffer_main.h" 324 51 +.src_ref 1 "io_buffer_main.h" 324 51 +.return_address + 9760 "10111010" // LDA p1, [sp, #-36]; SUB r16, r13, r3; MOV r27, r14 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9761 "01111000" // /* MW 9 */ + 9762 "10010000" // /* MW 8 */ + 9763 "01101011" // /* MW 7 */ + 9764 "10001111" // /* MW 6 */ + 9765 "00000001" // /* MW 5 */ + 9766 "00011011" // /* MW 4 */ + 9767 "00100000" // /* MW 3 */ + 9768 "10010011" // /* MW 2 */ + 9769 "11111011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 43 + 9770 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9771 "00000010" // /* MW 3 */ + 9772 "11100001" // /* MW 2 */ + 9773 "00010000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 6 +.src_ref 8 "superkernels.cpp" 558 78 + 9774 "10000100" // JNZ r16, #9840 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9840 delay_slots=5 */ + 9775 "00000001" // /* MW 5 */ + 9776 "01000000" // /* MW 4 */ + 9777 "00111000" // /* MW 3 */ + 9778 "00010011" // /* MW 2 */ + 9779 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 7 +.delay_slot + 9780 "01000100" // MOVXM p2, #509136 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9781 "10100000" // /* MW 5 */ + 9782 "11001001" // /* MW 4 */ + 9783 "11000100" // /* MW 3 */ + 9784 "00000111" // /* MW 2 */ + 9785 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9787 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9789 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9791 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9793 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 559 15 first +.src_ref 1 "io_buffer_main.h" 324 51 first + 9794 "00001100" // LDA r16, [p1, #20]; ST r13, [p7] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9795 "01100011" // /* MW 5 */ + 9796 "00001011" // /* MW 4 */ + 9797 "11011110" // /* MW 3 */ + 9798 "11000010" // /* MW 2 */ + 9799 "00101010" // /* MW 1 */ + 9800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9801 "00000000" // /* MW 1 */ + 9802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9803 "00000000" // /* MW 1 */ + 9804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9805 "00000000" // /* MW 1 */ + 9806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9807 "00000000" // /* MW 1 */ + 9808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9809 "00000000" // /* MW 1 */ + 9810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9811 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 9812 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9813 "11111000" // /* MW 3 */ + 9814 "00010000" // /* MW 2 */ + 9815 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 40 first + 9816 "10011000" // LDA r16, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9817 "00010110" // /* MW 3 */ + 9818 "11100110" // /* MW 2 */ + 9819 "00000110" // /* MW 1 */ + 9820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9821 "00000000" // /* MW 1 */ + 9822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9823 "00000000" // /* MW 1 */ + 9824 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9825 "00000000" // /* MW 1 */ + 9826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9827 "00000000" // /* MW 1 */ + 9828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9829 "00000000" // /* MW 1 */ + 9830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9831 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 + 9832 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9833 "00000001" // /* MW 3 */ + 9834 "11100001" // /* MW 2 */ + 9835 "00010011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 9836 "10011000" // ST r16, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9837 "00010001" // /* MW 3 */ + 9838 "11100110" // /* MW 2 */ + 9839 "00001110" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2304 + 9840 "10000100" // J #9888 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9888 delay_slots=5 */ + 9841 "00000000" // /* MW 5 */ + 9842 "00000000" // /* MW 4 */ + 9843 "01010000" // /* MW 3 */ + 9844 "00010011" // /* MW 2 */ + 9845 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 +.delay_slot + 9846 "11111000" // MOV p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9847 "11000000" // /* MW 3 */ + 9848 "01100010" // /* MW 2 */ + 9849 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9851 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9853 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9854 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9855 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9856 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9857 "00000000" // /* MW 15 */ + 9858 "00000000" // /* MW 14 */ + 9859 "01111000" // /* MW 13 */ + 9860 "10100101" // /* MW 12 */ + 9861 "00000001" // /* MW 11 */ + 9862 "00000000" // /* MW 10 */ + 9863 "00000000" // /* MW 9 */ + 9864 "00000000" // /* MW 8 */ + 9865 "01011011" // /* MW 7 */ + 9866 "00000001" // /* MW 6 */ + 9867 "00100000" // /* MW 5 */ + 9868 "00000000" // /* MW 4 */ + 9869 "11110000" // /* MW 3 */ + 9870 "00101100" // /* MW 2 */ + 9871 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2336 +.src_ref 8 "superkernels.cpp" 562 7 +.src_ref 1 "io_buffer_main.h" 324 51 + 9872 "11100001" // LDA p7, [sp, #-36]; NOPB; NOPS; MOVXM p2, #509136; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9873 "00000000" // /* MW 15 */ + 9874 "00000000" // /* MW 14 */ + 9875 "00010000" // /* MW 13 */ + 9876 "01101000" // /* MW 12 */ + 9877 "00110010" // /* MW 11 */ + 9878 "11110001" // /* MW 10 */ + 9879 "00000001" // /* MW 9 */ + 9880 "00000000" // /* MW 8 */ + 9881 "01011011" // /* MW 7 */ + 9882 "00000001" // /* MW 6 */ + 9883 "00100000" // /* MW 5 */ + 9884 "00000000" // /* MW 4 */ + 9885 "00100000" // /* MW 3 */ + 9886 "11110011" // /* MW 2 */ + 9887 "11111011" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2352 +.src_ref 8 "superkernels.cpp" 562 7 first +.src_ref 8 "superkernels.cpp" 562 19 + 9888 "00101100" // LDA r16, [p2]; MOVX r17, #4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9889 "00100010" // /* MW 5 */ + 9890 "01000100" // /* MW 4 */ + 9891 "11010000" // /* MW 3 */ + 9892 "11000010" // /* MW 2 */ + 9893 "01000000" // /* MW 1 */ + 9894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9895 "00000000" // /* MW 1 */ + 9896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9897 "00000000" // /* MW 1 */ + 9898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9899 "00000000" // /* MW 1 */ + 9900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9901 "00000000" // /* MW 1 */ + 9902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9903 "00000000" // /* MW 1 */ + 9904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9905 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 19 + 9906 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9907 "00001000" // /* MW 3 */ + 9908 "01100001" // /* MW 2 */ + 9909 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 25 + 9910 "10000100" // JNZ r16, #10064 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10064 delay_slots=5 */ + 9911 "00000001" // /* MW 5 */ + 9912 "01000000" // /* MW 4 */ + 9913 "10101000" // /* MW 3 */ + 9914 "00010011" // /* MW 2 */ + 9915 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 29 +.delay_slot + 9916 "01000100" // MOVXM p2, #509156 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9917 "11001000" // /* MW 5 */ + 9918 "11001001" // /* MW 4 */ + 9919 "11000100" // /* MW 3 */ + 9920 "00000111" // /* MW 2 */ + 9921 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9923 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9925 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9927 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9929 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 29 +.src_ref 8 "superkernels.cpp" 562 68 + 9930 "10111010" // LDA r16, [p2]; MOVXM p1, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9931 "00010000" // /* MW 9 */ + 9932 "00110000" // /* MW 8 */ + 9933 "10110010" // /* MW 7 */ + 9934 "11110000" // /* MW 6 */ + 9935 "00000001" // /* MW 5 */ + 9936 "00000000" // /* MW 4 */ + 9937 "11010000" // /* MW 3 */ + 9938 "11000010" // /* MW 2 */ + 9939 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 68 + 9940 "10011000" // LDA.u16 r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9941 "00111010" // /* MW 3 */ + 9942 "00000100" // /* MW 2 */ + 9943 "00000001" // /* MW 1 */ + 9944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9945 "00000000" // /* MW 1 */ + 9946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9947 "00000000" // /* MW 1 */ + 9948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9949 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 44 +.no_stack_arguments + 9950 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */ + 9951 "00000001" // /* MW 5 */ + 9952 "00000000" // /* MW 4 */ + 9953 "11111000" // /* MW 3 */ + 9954 "00010011" // /* MW 2 */ + 9955 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9957 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 29 +.delay_slot + 9958 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9959 "00000111" // /* MW 3 */ + 9960 "00100000" // /* MW 2 */ + 9961 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 29 +.src_ref 8 "superkernels.cpp" 562 44 +.delay_slot + 9962 "01011100" // ST r16, [p2]; LT r27, r16, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9963 "10110101" // /* MW 5 */ + 9964 "01101101" // /* MW 4 */ + 9965 "00111000" // /* MW 3 */ + 9966 "11000010" // /* MW 2 */ + 9967 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 44 +.src_ref 8 "superkernels.cpp" 562 44 +.delay_slot + 9968 "11100100" // SUB r17, r13, r16; MOV r14, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9969 "01000001" // /* MW 5 */ + 9970 "00111011" // /* MW 4 */ + 9971 "00110111" // /* MW 3 */ + 9972 "01100000" // /* MW 2 */ + 9973 "01101100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 44 +.delay_slot + 9974 "01111010" // NOPA; NOPS; SEL.EQZ r0, r16, r17, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9975 "00010010" // /* MW 9 */ + 9976 "00000001" // /* MW 8 */ + 9977 "00000100" // /* MW 7 */ + 9978 "00000000" // /* MW 6 */ + 9979 "01011011" // /* MW 5 */ + 9980 "00000001" // /* MW 4 */ + 9981 "11110000" // /* MW 3 */ + 9982 "00101100" // /* MW 2 */ + 9983 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 44 +.src_ref 8 "superkernels.cpp" 562 44 +.return_address + 9984 "11100100" // SUB r16, r13, r3; MOV r27, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9985 "01000001" // /* MW 5 */ + 9986 "10101110" // /* MW 4 */ + 9987 "00111101" // /* MW 3 */ + 9988 "00000110" // /* MW 2 */ + 9989 "01101100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 44 + 9990 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9991 "00000010" // /* MW 3 */ + 9992 "11100001" // /* MW 2 */ + 9993 "00010000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 6 +.src_ref 8 "superkernels.cpp" 562 79 + 9994 "10000100" // JNZ r16, #10064 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10064 delay_slots=5 */ + 9995 "00000001" // /* MW 5 */ + 9996 "01000000" // /* MW 4 */ + 9997 "10101000" // /* MW 3 */ + 9998 "00010011" // /* MW 2 */ + 9999 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 563 16 +.delay_slot + 10000 "01000100" // MOVXM p2, #509156 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10001 "11001000" // /* MW 5 */ + 10002 "11001001" // /* MW 4 */ + 10003 "11000100" // /* MW 3 */ + 10004 "00000111" // /* MW 2 */ + 10005 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10007 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10009 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10011 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10013 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 563 16 first +.src_ref 1 "io_buffer_main.h" 324 51 first + 10014 "00001100" // LDA r16, [p7, #20]; ST r13, [p2] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10015 "01100011" // /* MW 5 */ + 10016 "00001011" // /* MW 4 */ + 10017 "11010100" // /* MW 3 */ + 10018 "11000010" // /* MW 2 */ + 10019 "11101010" // /* MW 1 */ + 10020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10021 "00000000" // /* MW 1 */ + 10022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10023 "00000000" // /* MW 1 */ + 10024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10025 "00000000" // /* MW 1 */ + 10026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10027 "00000000" // /* MW 1 */ + 10028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10029 "00000000" // /* MW 1 */ + 10030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10031 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 10032 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10033 "11111000" // /* MW 3 */ + 10034 "00010000" // /* MW 2 */ + 10035 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 40 first + 10036 "10011000" // LDA r16, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10037 "00010110" // /* MW 3 */ + 10038 "11100110" // /* MW 2 */ + 10039 "00000110" // /* MW 1 */ + 10040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10041 "00000000" // /* MW 1 */ + 10042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10043 "00000000" // /* MW 1 */ + 10044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10045 "00000000" // /* MW 1 */ + 10046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10047 "00000000" // /* MW 1 */ + 10048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10049 "00000000" // /* MW 1 */ + 10050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10051 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 + 10052 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10053 "00000001" // /* MW 3 */ + 10054 "11100001" // /* MW 2 */ + 10055 "00010011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 10056 "00000010" // ST r16, [p6, #-8]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10057 "01110000" // /* MW 7 */ + 10058 "10100101" // /* MW 6 */ + 10059 "00000001" // /* MW 5 */ + 10060 "00000000" // /* MW 4 */ + 10061 "00110000" // /* MW 3 */ + 10062 "11000010" // /* MW 2 */ + 10063 "11011100" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2528 +.src_ref 8 "superkernels.cpp" 566 6 +.src_ref 8 "superkernels.cpp" 567 14 + 10064 "01000100" // MOVXM p6, #509120 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10065 "10000000" // /* MW 5 */ + 10066 "11001001" // /* MW 4 */ + 10067 "11001100" // /* MW 3 */ + 10068 "00000111" // /* MW 2 */ + 10069 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 566 6 first +.src_ref 8 "superkernels.cpp" 566 19 + 10070 "10111010" // LDA r16, [p6]; MOVXM p2, #509160 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10071 "00010000" // /* MW 9 */ + 10072 "01110100" // /* MW 8 */ + 10073 "00110010" // /* MW 7 */ + 10074 "11110001" // /* MW 6 */ + 10075 "00000001" // /* MW 5 */ + 10076 "00000000" // /* MW 4 */ + 10077 "11010000" // /* MW 3 */ + 10078 "11000010" // /* MW 2 */ + 10079 "11000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 566 19 + 10080 "10011000" // LDA r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10081 "00110110" // /* MW 3 */ + 10082 "00000110" // /* MW 2 */ + 10083 "00000010" // /* MW 1 */ + 10084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10085 "00000000" // /* MW 1 */ + 10086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10087 "00000000" // /* MW 1 */ + 10088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10089 "00000000" // /* MW 1 */ + 10090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10091 "00000000" // /* MW 1 */ + 10092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10093 "00000000" // /* MW 1 */ + 10094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10095 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 566 16 + 10096 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10097 "00001000" // /* MW 3 */ + 10098 "01100001" // /* MW 2 */ + 10099 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 566 6 + 10100 "10000100" // JNZ r16, #10128 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10128 delay_slots=5 */ + 10101 "00000001" // /* MW 5 */ + 10102 "01000000" // /* MW 4 */ + 10103 "11001000" // /* MW 3 */ + 10104 "00010011" // /* MW 2 */ + 10105 "10000000" // /* MW 1 */ +.delay_slot + 10106 "00011000" // LDA p7, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10107 "10011001" // /* MW 3 */ + 10108 "11101111" // /* MW 2 */ + 10109 "00000111" // /* MW 1 */ +.delay_slot + 10110 "00011000" // LDA r15, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10111 "11110001" // /* MW 3 */ + 10112 "11110001" // /* MW 2 */ + 10113 "00000111" // /* MW 1 */ +.delay_slot + 10114 "00011000" // LDA r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10115 "11010001" // /* MW 3 */ + 10116 "11110101" // /* MW 2 */ + 10117 "00000111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10119 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10121 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 567 14 first + 10122 "00001100" // NOPA; ST r13, [p6] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10123 "01100011" // /* MW 5 */ + 10124 "00001011" // /* MW 4 */ + 10125 "11111100" // /* MW 3 */ + 10126 "00101100" // /* MW 2 */ + 10127 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2592 +.src_ref 8 "superkernels.cpp" 569 +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 10128 "11010100" // LDA r11, [sp, #-8]; MOV lr, r11 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10129 "01000001" // /* MW 5 */ + 10130 "11101011" // /* MW 4 */ + 10131 "00101110" // /* MW 3 */ + 10132 "00101110" // /* MW 2 */ + 10133 "11111111" // /* MW 1 */ +.aggressive_scheduled_block_id 8 +.noswbrkpt + 10134 "00011000" // LDA r12, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10135 "10010001" // /* MW 3 */ + 10136 "11111101" // /* MW 2 */ + 10137 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 10138 "00011000" // LDA r13, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10139 "10110001" // /* MW 3 */ + 10140 "11101001" // /* MW 2 */ + 10141 "00000111" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 569 first +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 10142 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10143 "00000000" // /* MW 3 */ + 10144 "00101000" // /* MW 2 */ + 10145 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10146 "11111000" // MOV p6, r12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10147 "00100000" // /* MW 3 */ + 10148 "01100110" // /* MW 2 */ + 10149 "00011110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 569 +.delay_slot + 10150 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10151 "00000001" // /* MW 5 */ + 10152 "00000000" // /* MW 4 */ + 10153 "00000000" // /* MW 3 */ + 10154 "11110000" // /* MW 2 */ + 10155 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10157 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10159 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10160 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10161 "00000000" // /* MW 15 */ + 10162 "00000000" // /* MW 14 */ + 10163 "01111000" // /* MW 13 */ + 10164 "10100101" // /* MW 12 */ + 10165 "00000001" // /* MW 11 */ + 10166 "00000000" // /* MW 10 */ + 10167 "00000000" // /* MW 9 */ + 10168 "00000000" // /* MW 8 */ + 10169 "01011011" // /* MW 7 */ + 10170 "00000001" // /* MW 6 */ + 10171 "00100000" // /* MW 5 */ + 10172 "00000000" // /* MW 4 */ + 10173 "11110000" // /* MW 3 */ + 10174 "00101100" // /* MW 2 */ + 10175 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2640 +.src_ref 8 "superkernels.cpp" 554 43 +.src_ref 8 "superkernels.cpp" 555 15 +.src_ref 8 "superkernels.cpp" 558 43 +.src_ref 8 "superkernels.cpp" 559 15 +.src_ref 8 "superkernels.cpp" 562 44 +.src_ref 8 "superkernels.cpp" 563 16 +.src_ref 8 "superkernels.cpp" 567 14 +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 1 "io_buffer_main.h" 324 51 + 10176 "01110110" // MOVA r13, #0; MOVS p6, r12; J #9408 /* MW 12 */ /* control_operation: words=12 jump unconditional cycles_taken=1 direct absolute target_address=9408 delay_slots=5 */ + 10177 "00100000" // /* MW 11 */ + 10178 "00000000" // /* MW 10 */ + 10179 "00000000" // /* MW 9 */ + 10180 "10011000" // /* MW 8 */ + 10181 "00000100" // /* MW 7 */ + 10182 "00000000" // /* MW 6 */ + 10183 "00001011" // /* MW 5 */ + 10184 "10001100" // /* MW 4 */ + 10185 "00000110" // /* MW 3 */ + 10186 "00001101" // /* MW 2 */ + 10187 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 19 +.src_ref 8 "superkernels.cpp" 558 19 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 +.delay_slot + 10188 "01100100" // MOVX r15, #1; MOV r14, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10189 "00001001" // /* MW 5 */ + 10190 "00100000" // /* MW 4 */ + 10191 "10100111" // /* MW 3 */ + 10192 "11000000" // /* MW 2 */ + 10193 "00000011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 552 2 +.delay_slot + 10194 "01000100" // MOVXM p2, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10195 "10000000" // /* MW 5 */ + 10196 "11001000" // /* MW 4 */ + 10197 "11000100" // /* MW 3 */ + 10198 "00000111" // /* MW 2 */ + 10199 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 7 +.delay_slot + 10200 "01000100" // MOVXM p7, #509136 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10201 "10100000" // /* MW 5 */ + 10202 "11001001" // /* MW 4 */ + 10203 "11001110" // /* MW 3 */ + 10204 "00000111" // /* MW 2 */ + 10205 "00000000" // /* MW 1 */ +.delay_slot + 10206 "00011000" // LDA r12, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10207 "10010001" // /* MW 3 */ + 10208 "11100101" // /* MW 2 */ + 10209 "00000111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 10211 "00000000" // /* MW 1 */ +.label _ZN12me_primitive10udiv_dstepEjjRjS0_ +.function udiv_dstep _ZN12me_primitive10udiv_dstepEjjRjS0_ +.src_ref 9 "me_div.c" 108 19 +.src_ref 9 "me_div.c" 108 19 +.src_ref 9 "me_div.c" 115 4 first +.function_start + 10224 "11100100" // MOVX r3, #0; MOV r31, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10225 "01000001" // /* MW 5 */ + 10226 "10100000" // /* MW 4 */ + 10227 "00101111" // /* MW 3 */ + 10228 "11000000" // /* MW 2 */ + 10229 "00000000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10230 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10231 "00011100" // /* MW 3 */ + 10232 "11000110" // /* MW 2 */ + 10233 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10234 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10235 "00011100" // /* MW 3 */ + 10236 "11000110" // /* MW 2 */ + 10237 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10238 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10239 "00011100" // /* MW 3 */ + 10240 "11000110" // /* MW 2 */ + 10241 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10242 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10243 "00011100" // /* MW 3 */ + 10244 "11000110" // /* MW 2 */ + 10245 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10246 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10247 "00011100" // /* MW 3 */ + 10248 "11000110" // /* MW 2 */ + 10249 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10250 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10251 "00011100" // /* MW 3 */ + 10252 "11000110" // /* MW 2 */ + 10253 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10254 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10255 "00011100" // /* MW 3 */ + 10256 "11000110" // /* MW 2 */ + 10257 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10258 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10259 "00011100" // /* MW 3 */ + 10260 "11000110" // /* MW 2 */ + 10261 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10262 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10263 "00011100" // /* MW 3 */ + 10264 "11000110" // /* MW 2 */ + 10265 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10266 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10267 "00011100" // /* MW 3 */ + 10268 "11000110" // /* MW 2 */ + 10269 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10270 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10271 "00011100" // /* MW 3 */ + 10272 "11000110" // /* MW 2 */ + 10273 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10274 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10275 "00011100" // /* MW 3 */ + 10276 "11000110" // /* MW 2 */ + 10277 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10278 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10279 "00011100" // /* MW 3 */ + 10280 "11000110" // /* MW 2 */ + 10281 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10282 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10283 "00011100" // /* MW 3 */ + 10284 "11000110" // /* MW 2 */ + 10285 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10286 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10287 "00011100" // /* MW 3 */ + 10288 "11000110" // /* MW 2 */ + 10289 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10290 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10291 "00011100" // /* MW 3 */ + 10292 "11000110" // /* MW 2 */ + 10293 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10294 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10295 "00011100" // /* MW 3 */ + 10296 "11000110" // /* MW 2 */ + 10297 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10298 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10299 "00011100" // /* MW 3 */ + 10300 "11000110" // /* MW 2 */ + 10301 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10302 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10303 "00011100" // /* MW 3 */ + 10304 "11000110" // /* MW 2 */ + 10305 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10306 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10307 "00011100" // /* MW 3 */ + 10308 "11000110" // /* MW 2 */ + 10309 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10310 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10311 "00011100" // /* MW 3 */ + 10312 "11000110" // /* MW 2 */ + 10313 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10314 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10315 "00011100" // /* MW 3 */ + 10316 "11000110" // /* MW 2 */ + 10317 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10318 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10319 "00011100" // /* MW 3 */ + 10320 "11000110" // /* MW 2 */ + 10321 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10322 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10323 "00011100" // /* MW 3 */ + 10324 "11000110" // /* MW 2 */ + 10325 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10326 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10327 "00011100" // /* MW 3 */ + 10328 "11000110" // /* MW 2 */ + 10329 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10330 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10331 "00011100" // /* MW 3 */ + 10332 "11000110" // /* MW 2 */ + 10333 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10334 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10335 "00011100" // /* MW 3 */ + 10336 "11000110" // /* MW 2 */ + 10337 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10338 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10339 "00011100" // /* MW 3 */ + 10340 "11000110" // /* MW 2 */ + 10341 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 119 first + 10342 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10343 "00000000" // /* MW 3 */ + 10344 "00101000" // /* MW 2 */ + 10345 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 first +.delay_slot + 10346 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10347 "00011100" // /* MW 3 */ + 10348 "11000110" // /* MW 2 */ + 10349 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 +.delay_slot + 10350 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10351 "00011100" // /* MW 3 */ + 10352 "11000110" // /* MW 2 */ + 10353 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 +.delay_slot + 10354 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10355 "00011100" // /* MW 3 */ + 10356 "11000110" // /* MW 2 */ + 10357 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 +.delay_slot + 10358 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10359 "00011100" // /* MW 3 */ + 10360 "11000110" // /* MW 2 */ + 10361 "00010000" // /* MW 1 */ +.delay_slot + 10362 "11111000" // MOV r2, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10363 "10100000" // /* MW 3 */ + 10364 "10011111" // /* MW 2 */ +.label _ZN12me_primitive10udiv_dstepEjjRjS0___end + 10365 "00011000" // /* MW 1 */ +.label _ZL19propagateFloat32NaNjj +.function propagateFloat32NaN _ZL19propagateFloat32NaNjj +.src_ref 10 "softfloat-specialize" 78 24 +.src_ref 10 "softfloat-specialize" 137 22 +.src_ref 10 "softfloat-specialize" 139 22 +.src_ref 10 "softfloat-specialize" 143 4 first +.function_start + 10368 "10111010" // MOVA r3, #-22; MOVXM r18, #-16777216 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10369 "00010000" // /* MW 9 */ + 10370 "00000000" // /* MW 8 */ + 10371 "01001000" // /* MW 7 */ + 10372 "00000010" // /* MW 6 */ + 10373 "11000000" // /* MW 5 */ + 10374 "00111111" // /* MW 4 */ + 10375 "00000000" // /* MW 3 */ + 10376 "01000011" // /* MW 2 */ + 10377 "11111101" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 137 22 +.src_ref 10 "softfloat-specialize" 139 22 +.src_ref 10 "softfloat-specialize" 140 6 +.src_ref 10 "softfloat-specialize" 141 6 + 10378 "10111010" // MOVA r7, #511; MOVXM r0, #4194304 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10379 "00010000" // /* MW 9 */ + 10380 "00000000" // /* MW 8 */ + 10381 "00001000" // /* MW 7 */ + 10382 "00000000" // /* MW 6 */ + 10383 "00010000" // /* MW 5 */ + 10384 "00000000" // /* MW 4 */ + 10385 "00000000" // /* MW 3 */ + 10386 "11100111" // /* MW 2 */ + 10387 "00111111" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 78 38 +.src_ref 10 "softfloat-specialize" 137 22 +.src_ref 10 "softfloat-specialize" 139 22 +.src_ref 10 "softfloat-specialize" 140 6 first + 10388 "10111010" // MOVA r16, #1; OR r4, r1, r0; MOV r5, #510 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10389 "01011000" // /* MW 9 */ + 10390 "11111110" // /* MW 8 */ + 10391 "10101001" // /* MW 7 */ + 10392 "00101100" // /* MW 6 */ + 10393 "01000000" // /* MW 5 */ + 10394 "00000010" // /* MW 4 */ + 10395 "00000000" // /* MW 3 */ + 10396 "00110000" // /* MW 2 */ + 10397 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 141 6 first + 10398 "10011000" // OR r0, r2, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10399 "00000101" // /* MW 3 */ + 10400 "10000000" // /* MW 2 */ + 10401 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 137 22 first + 10402 "10011000" // LSHL r6, r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10403 "00111101" // /* MW 3 */ + 10404 "01001100" // /* MW 2 */ + 10405 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 139 22 first + 10406 "10011000" // LSHL r3, r2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10407 "00111101" // /* MW 3 */ + 10408 "10000110" // /* MW 2 */ + 10409 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 139 22 + 10410 "10011000" // AND r3, r7, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10411 "00110100" // /* MW 3 */ + 10412 "11000110" // /* MW 2 */ + 10413 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 137 22 first + 10414 "10011000" // AND r6, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10415 "01100100" // /* MW 3 */ + 10416 "11001100" // /* MW 2 */ + 10417 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 137 22 + 10418 "10011000" // EQ r6, r5, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10419 "01100111" // /* MW 3 */ + 10420 "01001100" // /* MW 2 */ + 10421 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 78 38 first + 10422 "10011000" // LSHL r17, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10423 "00001101" // /* MW 3 */ + 10424 "10100011" // /* MW 2 */ + 10425 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 78 24 + 10426 "10011000" // LTU r27, r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10427 "00011100" // /* MW 3 */ + 10428 "10110111" // /* MW 2 */ + 10429 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 143 62 first + 10430 "00011000" // SEL.EQZ r17, r4, r0, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10431 "00000010" // /* MW 3 */ + 10432 "00100010" // /* MW 2 */ + 10433 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 137 22 +.src_ref 10 "softfloat-specialize" 139 22 + 10434 "01000100" // MOVXM r16, #4194303 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10435 "11111110" // /* MW 5 */ + 10436 "00111111" // /* MW 4 */ + 10437 "11111000" // /* MW 3 */ + 10438 "00111111" // /* MW 2 */ + 10439 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 139 22 first + 10440 "10011000" // AND r2, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10441 "00000100" // /* MW 3 */ + 10442 "10000101" // /* MW 2 */ + 10443 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 139 22 + 10444 "00011000" // NEZ r2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10445 "11110000" // /* MW 3 */ + 10446 "10000100" // /* MW 2 */ + 10447 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 137 22 first + 10448 "10011000" // AND r1, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10449 "00000100" // /* MW 3 */ + 10450 "01000011" // /* MW 2 */ + 10451 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 137 22 + 10452 "00011000" // NEZ r1, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10453 "11110000" // /* MW 3 */ + 10454 "01000010" // /* MW 2 */ + 10455 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 143 4 first + 10456 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10457 "00000000" // /* MW 3 */ + 10458 "00101000" // /* MW 2 */ + 10459 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 137 22 first +.delay_slot + 10460 "10011000" // AND r27, r1, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10461 "01100100" // /* MW 3 */ + 10462 "01110110" // /* MW 2 */ + 10463 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 139 22 first +.delay_slot + 10464 "10011000" // EQ r1, r3, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10465 "01010111" // /* MW 3 */ + 10466 "11000010" // /* MW 2 */ + 10467 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 143 49 first +.delay_slot + 10468 "00011000" // SEL.EQZ r3, r17, r4, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10469 "01000010" // /* MW 3 */ + 10470 "01000110" // /* MW 2 */ + 10471 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 139 22 first +.delay_slot + 10472 "10011000" // AND r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10473 "00100100" // /* MW 3 */ + 10474 "01110110" // /* MW 2 */ + 10475 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 143 27 first +.delay_slot + 10476 "00011000" // SEL.EQZ r0, r3, r0, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10477 "00000010" // /* MW 3 */ + 10478 "11000000" // /* MW 2 */ +.label _ZL19propagateFloat32NaNjj__end + 10479 "00010000" // /* MW 1 */ +.label _ZL19roundAndPackFloat32iij +.function roundAndPackFloat32 _ZL19roundAndPackFloat32iij +.src_ref 10 "softfloat.c" 154 first +.src_ref 10 "softfloat.c" 161 19 +.src_ref 10 "softfloat.c" 203 30 +.function_start + 10480 "10111010" // MOVA r0, #64; MOVXM p0, #509172 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10481 "00010000" // /* MW 9 */ + 10482 "01111010" // /* MW 8 */ + 10483 "00110010" // /* MW 7 */ + 10484 "11110000" // /* MW 6 */ + 10485 "00000001" // /* MW 5 */ + 10486 "00000000" // /* MW 4 */ + 10487 "00000000" // /* MW 3 */ + 10488 "00000000" // /* MW 2 */ + 10489 "00001000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 161 19 first +.src_ref 10 "softfloat.c" 171 16 +.src_ref 10 "softfloat.c" 174 16 +.src_ref 10 "softfloat.c" 178 21 +.src_ref 10 "softfloat.c" 194 29 + 10490 "00101100" // LDA r4, [p0]; MOVX r6, #127 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10491 "11111010" // /* MW 5 */ + 10492 "10011001" // /* MW 4 */ + 10493 "11010000" // /* MW 3 */ + 10494 "10010010" // /* MW 2 */ + 10495 "00000000" // /* MW 1 */ +.swstall __RAW__R_1948 + 10496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10497 "00000000" // /* MW 1 */ +.swstall __RAW__R_1948 + 10498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10499 "00000000" // /* MW 1 */ +.swstall __RAW__R_1948 + 10500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10501 "00000000" // /* MW 1 */ +.swstall __RAW__R_1948 + 10502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10503 "00000000" // /* MW 1 */ +.swstall __RAW__R_1948 + 10504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10505 "00000000" // /* MW 1 */ +.swstall __RAW__R_1948 + 10506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10507 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 162 36 first +.src_ref 10 "softfloat.c" 164 4 first + 10508 "10000100" // JZ r4, #10576 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10576 delay_slots=5 */ + 10509 "00000001" // /* MW 5 */ + 10510 "00000000" // /* MW 4 */ + 10511 "10101000" // /* MW 3 */ + 10512 "00010100" // /* MW 2 */ + 10513 "00100000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 182 40 +.src_ref 10 "softfloat.c" 185 68 +.src_ref 10 "softfloat.c" 202 18 +.delay_slot + 10514 "00011000" // MOVX r5, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10515 "00000001" // /* MW 3 */ + 10516 "01001010" // /* MW 2 */ + 10517 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10519 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10521 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10523 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10525 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 165 8 +.src_ref 10 "softfloat.c" 171 16 +.src_ref 10 "softfloat.c" 171 34 +.src_ref 10 "softfloat.c" 174 16 +.src_ref 10 "softfloat.c" 174 34 + 10526 "10111010" // MOVA r16, #3; MOVX r7, #2; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10527 "01011000" // /* MW 9 */ + 10528 "00000000" // /* MW 8 */ + 10529 "00001000" // /* MW 7 */ + 10530 "01001011" // /* MW 6 */ + 10531 "01110000" // /* MW 5 */ + 10532 "00000000" // /* MW 4 */ + 10533 "00000000" // /* MW 3 */ + 10534 "01110000" // /* MW 2 */ + 10535 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 165 26 +.src_ref 10 "softfloat.c" 171 34 first + 10536 "01100100" // EQ r27, r7, r4; MOV r5, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10537 "00000101" // /* MW 5 */ + 10538 "10100000" // /* MW 4 */ + 10539 "11110010" // /* MW 3 */ + 10540 "11001000" // /* MW 2 */ + 10541 "00111110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 171 16 + 10542 "00011000" // SEL.EQZ r7, r6, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10543 "10000010" // /* MW 3 */ + 10544 "10001111" // /* MW 2 */ + 10545 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 174 34 first + 10546 "10011000" // EQ r27, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10547 "00000111" // /* MW 3 */ + 10548 "00110111" // /* MW 2 */ + 10549 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 170 12 +.src_ref 10 "softfloat.c" 174 16 + 10550 "11100100" // SEL.EQZ r16, r6, r24, r27; MOV r27, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10551 "01000001" // /* MW 5 */ + 10552 "10100001" // /* MW 4 */ + 10553 "01001101" // /* MW 3 */ + 10554 "00110000" // /* MW 2 */ + 10555 "00110100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 170 12 first +.src_ref 10 "softfloat.c" 170 12 first + 10556 "00011000" // SEL.EQZ r7, r16, r7, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10557 "01110010" // /* MW 3 */ + 10558 "00001110" // /* MW 2 */ + 10559 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 165 26 first + 10560 "10011000" // EQ r27, r5, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10561 "01000111" // /* MW 3 */ + 10562 "01110110" // /* MW 2 */ + 10563 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 165 8 + 10564 "00110110" // NOPA; NOPB; NOPS; SEL.EQZ r5, r7, r24, r27 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10565 "10000001" // /* MW 11 */ + 10566 "10101101" // /* MW 10 */ + 10567 "00000000" // /* MW 9 */ + 10568 "00010000" // /* MW 8 */ + 10569 "01011100" // /* MW 7 */ + 10570 "00001110" // /* MW 6 */ + 10571 "00100000" // /* MW 5 */ + 10572 "00000000" // /* MW 4 */ + 10573 "11110000" // /* MW 3 */ + 10574 "00101100" // /* MW 2 */ + 10575 "00000000" // /* MW 1 */ +.label TGT_F_ZL19roundAndPackFloat32iij_96 +.src_ref 10 "softfloat.c" 179 14 +.src_ref 10 "softfloat.c" 179 17 first +.src_ref 10 "softfloat.c" 180 23 +.src_ref 10 "softfloat.c" 181 28 + 10576 "01100100" // EXTEND.u16 r18, r2; MOV r16, #253 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10577 "11110101" // /* MW 5 */ + 10578 "00100011" // /* MW 4 */ + 10579 "00001000" // /* MW 3 */ + 10580 "10010110" // /* MW 2 */ + 10581 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 179 14 + 10582 "10011000" // LT r18, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10583 "00001010" // /* MW 3 */ + 10584 "10100101" // /* MW 2 */ + 10585 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 179 4 + 10586 "10000100" // JNZ r18, #10768 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10768 delay_slots=5 */ + 10587 "00000001" // /* MW 5 */ + 10588 "01000000" // /* MW 4 */ + 10589 "00001000" // /* MW 3 */ + 10590 "00010101" // /* MW 2 */ + 10591 "10010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 178 21 first +.delay_slot + 10592 "10011000" // AND r17, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10593 "01100100" // /* MW 3 */ + 10594 "11100010" // /* MW 2 */ + 10595 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 48 +.src_ref 10 "softfloat.c" 128 31 +.delay_slot + 10596 "00011000" // MOVX r7, #31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10597 "01111101" // /* MW 3 */ + 10598 "00001110" // /* MW 2 */ + 10599 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 31 first +.delay_slot + 10600 "10011000" // LSHL r1, r1, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10601 "01111101" // /* MW 3 */ + 10602 "01000010" // /* MW 2 */ + 10603 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10605 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10607 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 181 28 first +.src_ref 10 "softfloat.c" 182 40 first +.src_ref 10 "softfloat.c" 182 59 + 10608 "10111010" // MOVA r18, #0; EQ r19, r2, r16; ADD.NC r20, r3, r5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10609 "10101000" // /* MW 9 */ + 10610 "11001010" // /* MW 8 */ + 10611 "10001000" // /* MW 7 */ + 10612 "00111110" // /* MW 6 */ + 10613 "00111000" // /* MW 5 */ + 10614 "00000101" // /* MW 4 */ + 10615 "00000000" // /* MW 3 */ + 10616 "00010010" // /* MW 2 */ + 10617 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 182 59 + 10618 "10011000" // LT r20, r20, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10619 "00101010" // /* MW 3 */ + 10620 "00101001" // /* MW 2 */ + 10621 "00010101" // /* MW 1 */ +.src_ref 10 "softfloat.c" 180 23 first + 10622 "10011000" // LT r16, r16, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10623 "00101010" // /* MW 3 */ + 10624 "00100000" // /* MW 2 */ + 10625 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 182 18 first + 10626 "10011000" // AND r19, r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10627 "01000100" // /* MW 3 */ + 10628 "11100111" // /* MW 2 */ + 10629 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 181 13 first + 10630 "10011000" // OR r19, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10631 "00000101" // /* MW 3 */ + 10632 "11100111" // /* MW 2 */ + 10633 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 180 8 first + 10634 "10000100" // JNZ r19, #10848 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10848 delay_slots=5 */ + 10635 "00000001" // /* MW 5 */ + 10636 "01000000" // /* MW 4 */ + 10637 "00110000" // /* MW 3 */ + 10638 "00010101" // /* MW 2 */ + 10639 "10011000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 38 +.src_ref 10 "softfloat.c" 187 18 +.src_ref 10 "softfloat.c" 192 39 +.src_ref 10 "softfloat.c" 204 4 +.src_ref 10 "softfloat.c" 204 14 +.delay_slot + 10640 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10641 "00000001" // /* MW 3 */ + 10642 "00100000" // /* MW 2 */ + 10643 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10645 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10647 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10649 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10651 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 187 18 first + 10652 "10011000" // GE r19, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10653 "00001001" // /* MW 3 */ + 10654 "10100111" // /* MW 2 */ + 10655 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 187 8 + 10656 "10000100" // JNZ r19, #10784 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10784 delay_slots=5 */ + 10657 "00000001" // /* MW 5 */ + 10658 "01000000" // /* MW 4 */ + 10659 "00010000" // /* MW 3 */ + 10660 "00010101" // /* MW 2 */ + 10661 "10011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10663 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10665 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10667 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10669 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10671 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 192 39 first + 10672 "10011000" // SUB r2, r16, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10673 "00100001" // /* MW 3 */ + 10674 "00000100" // /* MW 2 */ + 10675 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 46 4 first +.src_ref 10 "softfloat-macros" 46 15 first + 10676 "10000100" // JZ r2, #10736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10736 delay_slots=5 */ + 10677 "00000001" // /* MW 5 */ + 10678 "00000000" // /* MW 4 */ + 10679 "11111000" // /* MW 3 */ + 10680 "00010100" // /* MW 2 */ + 10681 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10683 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10685 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10687 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10689 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10691 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 +.src_ref 10 "softfloat-macros" 50 38 first + 10692 "01100100" // SUB r17, r16, r2; MOV r19, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10693 "10000001" // /* MW 5 */ + 10694 "10100000" // /* MW 4 */ + 10695 "00111001" // /* MW 3 */ + 10696 "01000100" // /* MW 2 */ + 10697 "10000100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 48 + 10698 "10011000" // AND r7, r7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10699 "00010100" // /* MW 3 */ + 10700 "11001111" // /* MW 2 */ + 10701 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 32 + 10702 "10011000" // LSHL r7, r3, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10703 "01111101" // /* MW 3 */ + 10704 "11001110" // /* MW 2 */ + 10705 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 15 + 10706 "10011000" // LSHL r17, r3, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10707 "00011101" // /* MW 3 */ + 10708 "11100011" // /* MW 2 */ + 10709 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 first + 10710 "10011000" // LT r27, r2, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10711 "00111010" // /* MW 3 */ + 10712 "10110111" // /* MW 2 */ + 10713 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 57 first + 10714 "00011000" // NEZ r7, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10715 "11110000" // /* MW 3 */ + 10716 "11001110" // /* MW 2 */ + 10717 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 53 16 first + 10718 "00011000" // NEZ r3, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10719 "11110000" // /* MW 3 */ + 10720 "11000110" // /* MW 2 */ + 10721 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 25 first + 10722 "10011000" // OR r2, r7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10723 "00010101" // /* MW 3 */ + 10724 "11000101" // /* MW 2 */ + 10725 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 9 first + 10726 "01111010" // NOPA; NOPS; SEL.EQZ r3, r3, r2, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10727 "00100010" // /* MW 9 */ + 10728 "11000110" // /* MW 8 */ + 10729 "00000000" // /* MW 7 */ + 10730 "00000000" // /* MW 6 */ + 10731 "01011011" // /* MW 5 */ + 10732 "00000001" // /* MW 4 */ + 10733 "11110000" // /* MW 3 */ + 10734 "00101100" // /* MW 2 */ + 10735 "00000000" // /* MW 1 */ +.label TGT_F_ZL19roundAndPackFloat32iij_256 + 10736 "10000100" // J #10784 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10784 delay_slots=5 */ + 10737 "00000000" // /* MW 5 */ + 10738 "00000000" // /* MW 4 */ + 10739 "00010000" // /* MW 3 */ + 10740 "00010101" // /* MW 2 */ + 10741 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 194 29 first +.delay_slot + 10742 "10011000" // AND r17, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10743 "01100100" // /* MW 3 */ + 10744 "11100010" // /* MW 2 */ + 10745 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 204 4 +.src_ref 10 "softfloat.c" 204 14 +.delay_slot + 10746 "00011000" // MOVX r2, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10747 "00000001" // /* MW 3 */ + 10748 "00000100" // /* MW 2 */ + 10749 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10751 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10753 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10754 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 10755 "00011100" // /* MW 13 */ + 10756 "00000000" // /* MW 12 */ + 10757 "00000000" // /* MW 11 */ + 10758 "01010111" // /* MW 10 */ + 10759 "00011010" // /* MW 9 */ + 10760 "01000000" // /* MW 8 */ + 10761 "00000000" // /* MW 7 */ + 10762 "00000000" // /* MW 6 */ + 10763 "10110110" // /* MW 5 */ + 10764 "00000010" // /* MW 4 */ + 10765 "11110000" // /* MW 3 */ + 10766 "00101100" // /* MW 2 */ + 10767 "00000000" // /* MW 1 */ +.label TGT_F_ZL19roundAndPackFloat32iij_288 +.src_ref 10 "softfloat.c" 204 4 +.src_ref 10 "softfloat.c" 204 14 + 10768 "11100001" // NOPA; NOPB; NOPS; MOVX r16, #0; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10769 "00000000" // /* MW 15 */ + 10770 "00000000" // /* MW 14 */ + 10771 "01111000" // /* MW 13 */ + 10772 "10100101" // /* MW 12 */ + 10773 "00000001" // /* MW 11 */ + 10774 "00001000" // /* MW 10 */ + 10775 "00000000" // /* MW 9 */ + 10776 "00000001" // /* MW 8 */ + 10777 "01011011" // /* MW 7 */ + 10778 "00000001" // /* MW 6 */ + 10779 "00100000" // /* MW 5 */ + 10780 "00000000" // /* MW 4 */ + 10781 "11110000" // /* MW 3 */ + 10782 "00101100" // /* MW 2 */ + 10783 "00000000" // /* MW 1 */ +.label TGT_F_ZL19roundAndPackFloat32iij_304 +.src_ref 10 "softfloat.c" 202 18 first +.src_ref 10 "softfloat.c" 202 36 +.src_ref 10 "softfloat.c" 203 30 first + 10784 "10111010" // MOVA r0, #-7; XOR r3, r17, r0; ADD.NC r5, r3, r5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10785 "10101000" // /* MW 9 */ + 10786 "11001010" // /* MW 8 */ + 10787 "10101000" // /* MW 7 */ + 10788 "00110100" // /* MW 6 */ + 10789 "00110000" // /* MW 5 */ + 10790 "00100010" // /* MW 4 */ + 10791 "00000000" // /* MW 3 */ + 10792 "00100000" // /* MW 2 */ + 10793 "11111111" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 59 +.src_ref 10 "softfloat.c" 203 12 +.src_ref 10 "softfloat.c" 203 46 + 10794 "10111010" // MOVA r3, #23; OR r6, r3, r4; MOV r4, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10795 "01011000" // /* MW 9 */ + 10796 "11111111" // /* MW 8 */ + 10797 "10001111" // /* MW 7 */ + 10798 "00101100" // /* MW 6 */ + 10799 "01100010" // /* MW 5 */ + 10800 "00000110" // /* MW 4 */ + 10801 "00000000" // /* MW 3 */ + 10802 "11100011" // /* MW 2 */ + 10803 "00000010" // /* MW 1 */ +.src_ref 10 "softfloat.c" 203 46 + 10804 "00011000" // EQZ r6, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10805 "11010000" // /* MW 3 */ + 10806 "10001100" // /* MW 2 */ + 10807 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 202 36 + 10808 "10011000" // LSHL r0, r5, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10809 "00001101" // /* MW 3 */ + 10810 "01000000" // /* MW 2 */ + 10811 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 203 12 + 10812 "10011000" // XOR r4, r6, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10813 "01000110" // /* MW 3 */ + 10814 "10001000" // /* MW 2 */ + 10815 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 205 4 first + 10816 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10817 "00000000" // /* MW 3 */ + 10818 "00101000" // /* MW 2 */ + 10819 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 203 9 first +.delay_slot + 10820 "10011000" // AND r27, r4, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10821 "00000100" // /* MW 3 */ + 10822 "00110110" // /* MW 2 */ + 10823 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 204 4 first +.src_ref 10 "softfloat.c" 204 14 first +.delay_slot + 10824 "00011000" // SEL.EQZ r2, r16, r2, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10825 "00100010" // /* MW 3 */ + 10826 "00000100" // /* MW 2 */ + 10827 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 59 first +.delay_slot + 10828 "10011000" // LSHL r2, r2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10829 "00111101" // /* MW 3 */ + 10830 "10000100" // /* MW 2 */ + 10831 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 38 +.delay_slot + 10832 "10011000" // ADD r2, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10833 "00100000" // /* MW 3 */ + 10834 "01000100" // /* MW 2 */ + 10835 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 66 +.delay_slot + 10836 "00110110" // NOPA; NOPB; NOPS; ADD r0, r27, r2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10837 "10000001" // /* MW 11 */ + 10838 "10101101" // /* MW 10 */ + 10839 "00000000" // /* MW 9 */ + 10840 "00000100" // /* MW 8 */ + 10841 "00000001" // /* MW 7 */ + 10842 "00110110" // /* MW 6 */ + 10843 "00100000" // /* MW 5 */ + 10844 "00000000" // /* MW 4 */ + 10845 "11110000" // /* MW 3 */ + 10846 "00101100" // /* MW 2 */ + 10847 "00000000" // /* MW 1 */ +.label TGT_F_ZL19roundAndPackFloat32iij_368 +.src_ref 10 "softfloat.c" 185 12 first + 10848 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10849 "00000000" // /* MW 3 */ + 10850 "00101000" // /* MW 2 */ + 10851 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 38 +.delay_slot + 10852 "01000100" // MOVXM r2, #2139095040 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10853 "00000000" // /* MW 5 */ + 10854 "00100000" // /* MW 4 */ + 10855 "00000001" // /* MW 3 */ + 10856 "10000000" // /* MW 2 */ + 10857 "01111111" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 38 first +.delay_slot + 10858 "10011000" // ADD r3, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10859 "00100000" // /* MW 3 */ + 10860 "01000110" // /* MW 2 */ + 10861 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 185 68 first +.delay_slot + 10862 "00011000" // EQZ r2, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10863 "11010000" // /* MW 3 */ + 10864 "01000100" // /* MW 2 */ + 10865 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 185 49 +.delay_slot + 10866 "10011000" // SUB r0, r3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10867 "00100001" // /* MW 3 */ + 10868 "11000000" // /* MW 2 */ + 10869 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL19roundAndPackFloat32iij__end + 10871 "00000000" // /* MW 1 */ +.label _ZL28normalizeRoundAndPackFloat32iij +.function normalizeRoundAndPackFloat32 _ZL28normalizeRoundAndPackFloat32iij +.src_ref 10 "softfloat.c" 218 first +.src_ref 10 "softfloat.c" 224 11 first +.tail_call +.function_start + 10880 "10000100" // J #10480 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10480 delay_slots=5 */ + 10881 "00000000" // /* MW 5 */ + 10882 "00000000" // /* MW 4 */ + 10883 "01111000" // /* MW 3 */ + 10884 "00010100" // /* MW 2 */ + 10885 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 552 53 first +.delay_slot + 10886 "00011000" // CLZ r16, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10887 "00110000" // /* MW 3 */ + 10888 "11100000" // /* MW 2 */ + 10889 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 223 45 first +.delay_slot + 10890 "00011000" // ADD r16, r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10891 "11111111" // /* MW 3 */ + 10892 "00100001" // /* MW 2 */ + 10893 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 224 44 first +.delay_slot + 10894 "10011000" // SUB r2, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10895 "00000001" // /* MW 3 */ + 10896 "10000101" // /* MW 2 */ + 10897 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 224 62 +.delay_slot + 10898 "10011000" // LSHL r3, r3, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10899 "00001101" // /* MW 3 */ + 10900 "11000111" // /* MW 2 */ + 10901 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL28normalizeRoundAndPackFloat32iij__end + 10903 "00000000" // /* MW 1 */ +.label int32_to_float32 +.function int32_to_float32 int32_to_float32 +.src_ref 10 "softfloat.c" 477 first +.src_ref 10 "softfloat.c" 481 4 +.src_ref 10 "softfloat.c" 481 11 first +.function_start + 10912 "10000100" // JZ r1, #10992 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10992 delay_slots=5 */ + 10913 "00000001" // /* MW 5 */ + 10914 "00000000" // /* MW 4 */ + 10915 "01111000" // /* MW 3 */ + 10916 "00010101" // /* MW 2 */ + 10917 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10919 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10921 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10923 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10925 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10927 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 482 11 + 10928 "01000100" // MOVXM r16, #-2147483648 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10929 "00000000" // /* MW 5 */ + 10930 "00100000" // /* MW 4 */ + 10931 "00001000" // /* MW 3 */ + 10932 "00000000" // /* MW 2 */ + 10933 "10000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 482 11 first + 10934 "10011000" // EQ r16, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10935 "00000111" // /* MW 3 */ + 10936 "01100001" // /* MW 2 */ + 10937 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 482 4 + 10938 "10000100" // JNZ r16, #11008 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11008 delay_slots=5 */ + 10939 "00000001" // /* MW 5 */ + 10940 "01000000" // /* MW 4 */ + 10941 "10000000" // /* MW 3 */ + 10942 "00010101" // /* MW 2 */ + 10943 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10945 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10947 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10949 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10951 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10953 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 484 11 +.src_ref 10 "softfloat.c" 484 11 first +.tail_call + 10954 "10111010" // MOVA r2, #156; J #10880 /* MW 10 */ /* control_operation: words=10 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10880 delay_slots=5 */ + 10955 "00100000" // /* MW 9 */ + 10956 "00000000" // /* MW 8 */ + 10957 "00000000" // /* MW 7 */ + 10958 "01010000" // /* MW 6 */ + 10959 "00000101" // /* MW 5 */ + 10960 "00000000" // /* MW 4 */ + 10961 "00000000" // /* MW 3 */ + 10962 "10000010" // /* MW 2 */ + 10963 "00010011" // /* MW 1 */ +.src_ref 10 "softfloat.c" 484 60 +.src_ref 10 "softfloat.c" 484 62 +.delay_slot + 10964 "00011000" // ABS r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10965 "00010000" // /* MW 3 */ + 10966 "01000111" // /* MW 2 */ + 10967 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 483 16 +.delay_slot + 10968 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10969 "00000001" // /* MW 3 */ + 10970 "00100000" // /* MW 2 */ + 10971 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 483 16 first +.delay_slot + 10972 "10011000" // LT r1, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10973 "00001010" // /* MW 3 */ + 10974 "01000011" // /* MW 2 */ + 10975 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10977 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10978 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 10979 "00011100" // /* MW 13 */ + 10980 "00000000" // /* MW 12 */ + 10981 "00000000" // /* MW 11 */ + 10982 "01010111" // /* MW 10 */ + 10983 "00011010" // /* MW 9 */ + 10984 "01000000" // /* MW 8 */ + 10985 "00000000" // /* MW 7 */ + 10986 "00000000" // /* MW 6 */ + 10987 "10110110" // /* MW 5 */ + 10988 "00000010" // /* MW 4 */ + 10989 "11110000" // /* MW 3 */ + 10990 "00101100" // /* MW 2 */ + 10991 "00000000" // /* MW 1 */ +.label TGT_Fint32_to_float32_80 +.src_ref 10 "softfloat.c" 481 18 first +.return_address + 10992 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10993 "00000000" // /* MW 3 */ + 10994 "00101000" // /* MW 2 */ + 10995 "00010000" // /* MW 1 */ +.delay_slot + 10996 "00011000" // MOVX r0, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10997 "00000001" // /* MW 3 */ + 10998 "00000000" // /* MW 2 */ + 10999 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11001 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11003 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11005 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11007 "00000000" // /* MW 1 */ +.label TGT_Fint32_to_float32_96 +.src_ref 10 "softfloat.c" 482 37 first + 11008 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11009 "00000000" // /* MW 3 */ + 11010 "00101000" // /* MW 2 */ + 11011 "00010000" // /* MW 1 */ +.delay_slot + 11012 "01000100" // MOVXM r0, #-822083584 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11013 "00000000" // /* MW 5 */ + 11014 "00100000" // /* MW 4 */ + 11015 "00000000" // /* MW 3 */ + 11016 "00000000" // /* MW 2 */ + 11017 "11001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11018 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11019 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11021 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11023 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label int32_to_float32__end + 11025 "00000000" // /* MW 1 */ +.label _ZL14addFloat32Sigsjji +.function addFloat32Sigs _ZL14addFloat32Sigsjji +.src_ref 10 "softfloat.c" 70 13 +.src_ref 10 "softfloat.c" 81 14 +.src_ref 10 "softfloat.c" 734 first +.function_start + 11040 "10111010" // MOVA r18, #-23; MOVXM r16, #8388607 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11041 "10010000" // /* MW 9 */ + 11042 "11111111" // /* MW 8 */ + 11043 "00001111" // /* MW 7 */ + 11044 "11111110" // /* MW 6 */ + 11045 "00011111" // /* MW 5 */ + 11046 "00000000" // /* MW 4 */ + 11047 "00000000" // /* MW 3 */ + 11048 "00110010" // /* MW 2 */ + 11049 "11111101" // /* MW 1 */ +.src_ref 10 "softfloat.c" 81 14 first + 11050 "10011000" // LSHL r17, r1, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11051 "00101101" // /* MW 3 */ + 11052 "01100011" // /* MW 2 */ + 11053 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 81 14 + 11054 "10011000" // LSHL r4, r2, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11055 "00101101" // /* MW 3 */ + 11056 "10001001" // /* MW 2 */ + 11057 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 81 21 + 11058 "00011000" // EXTEND.u8 r27, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11059 "10010000" // /* MW 3 */ + 11060 "01110110" // /* MW 2 */ + 11061 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 81 21 + 11062 "00011000" // EXTEND.u8 r25, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11063 "10010000" // /* MW 3 */ + 11064 "00110010" // /* MW 2 */ + 11065 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 38 +.src_ref 10 "softfloat.c" 744 19 first +.src_ref 10 "softfloat.c" 747 11 +.src_ref 10 "softfloat.c" 761 22 +.src_ref 10 "softfloat.c" 772 35 +.src_ref 10 "softfloat.c" 788 24 + 11066 "01100100" // SUB r17, r27, r25; MOV r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11067 "00000001" // /* MW 5 */ + 11068 "00100000" // /* MW 4 */ + 11069 "00111100" // /* MW 3 */ + 11070 "01110010" // /* MW 2 */ + 11071 "11011100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 747 11 first + 11072 "10011000" // LT r4, r24, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11073 "00011010" // /* MW 3 */ + 11074 "00001001" // /* MW 2 */ + 11075 "00010110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 747 4 + 11076 "10000100" // JNZ r4, #11248 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11248 delay_slots=5 */ + 11077 "00000001" // /* MW 5 */ + 11078 "01000000" // /* MW 4 */ + 11079 "11111000" // /* MW 3 */ + 11080 "00010101" // /* MW 2 */ + 11081 "00100000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 70 13 first +.delay_slot + 11082 "10011000" // AND r19, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11083 "00000100" // /* MW 3 */ + 11084 "01100111" // /* MW 2 */ + 11085 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 70 13 +.src_ref 10 "softfloat.c" 745 9 +.src_ref 10 "softfloat.c" 746 9 +.delay_slot + 11086 "01100100" // AND r16, r2, r16; MOV r0, #6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11087 "00011001" // /* MW 5 */ + 11088 "00100000" // /* MW 4 */ + 11089 "10010000" // /* MW 3 */ + 11090 "00100000" // /* MW 2 */ + 11091 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 745 9 first +.delay_slot + 11092 "10011000" // LSHL r19, r19, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11093 "00001101" // /* MW 3 */ + 11094 "11100110" // /* MW 2 */ + 11095 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 746 9 first +.src_ref 10 "softfloat.c" 748 18 +.src_ref 10 "softfloat.c" 762 18 +.delay_slot + 11096 "01100100" // LSHL r16, r16, r0; MOV r20, #255 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11097 "11111101" // /* MW 5 */ + 11098 "00100011" // /* MW 4 */ + 11099 "10111010" // /* MW 3 */ + 11100 "00000001" // /* MW 2 */ + 11101 "10000100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 48 +.src_ref 10 "softfloat.c" 128 31 +.src_ref 10 "softfloat.c" 748 18 first +.delay_slot + 11102 "01100100" // EQ r0, r27, r20; MOV r18, #31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11103 "01111101" // /* MW 5 */ + 11104 "00100000" // /* MW 4 */ + 11105 "11111001" // /* MW 3 */ + 11106 "00101000" // /* MW 2 */ + 11107 "11011000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 761 22 first + 11108 "10011000" // GE r5, r17, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11109 "10001001" // /* MW 3 */ + 11110 "01001011" // /* MW 2 */ + 11111 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 761 9 + 11112 "10000100" // JNZ r5, #11440 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11440 delay_slots=5 */ + 11113 "00000001" // /* MW 5 */ + 11114 "01000000" // /* MW 4 */ + 11115 "01011000" // /* MW 3 */ + 11116 "00010110" // /* MW 2 */ + 11117 "00101000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 31 first +.delay_slot + 11118 "10011000" // LSHL r4, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11119 "00101101" // /* MW 3 */ + 11120 "11001001" // /* MW 2 */ + 11121 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11123 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11125 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11127 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11129 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 762 18 first + 11130 "10011000" // EQ r20, r25, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11131 "01000111" // /* MW 3 */ + 11132 "01101001" // /* MW 2 */ + 11133 "00010110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 762 8 + 11134 "10000100" // JNZ r20, #11392 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11392 delay_slots=5 */ + 11135 "00000001" // /* MW 5 */ + 11136 "01000000" // /* MW 4 */ + 11137 "01000000" // /* MW 3 */ + 11138 "00010110" // /* MW 2 */ + 11139 "10100000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11141 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11143 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11145 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11147 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11149 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 793 11 + 11150 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11151 "10100000" // /* MW 3 */ + 11152 "01010001" // /* MW 2 */ + 11153 "00011000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 787 4 + 11154 "11111000" // MOV r2, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11155 "10100000" // /* MW 3 */ + 11156 "10011100" // /* MW 2 */ + 11157 "00011000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 767 12 first + 11158 "00011000" // ADD r0, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11159 "00000111" // /* MW 3 */ + 11160 "01000000" // /* MW 2 */ + 11161 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 766 8 first + 11162 "00011000" // SEL.EQZ r17, r0, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11163 "00010010" // /* MW 3 */ + 11164 "00100011" // /* MW 2 */ + 11165 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 772 35 first + 11166 "10011000" // SUB r17, r24, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11167 "00010001" // /* MW 3 */ + 11168 "00100011" // /* MW 2 */ + 11169 "00010110" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 46 4 first +.src_ref 10 "softfloat-macros" 46 15 first + 11170 "10000100" // JZ r17, #11344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11344 delay_slots=5 */ + 11171 "00000001" // /* MW 5 */ + 11172 "00000000" // /* MW 4 */ + 11173 "00101000" // /* MW 3 */ + 11174 "00010110" // /* MW 2 */ + 11175 "10001000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 770 17 +.src_ref 10 "softfloat.c" 785 9 +.delay_slot + 11176 "01000100" // MOVXM r20, #536870912 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11177 "00000000" // /* MW 5 */ + 11178 "00100000" // /* MW 4 */ + 11179 "00001010" // /* MW 3 */ + 11180 "00000000" // /* MW 2 */ + 11181 "00100000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 770 17 first +.delay_slot + 11182 "10011000" // OR r3, r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11183 "01000101" // /* MW 3 */ + 11184 "11000111" // /* MW 2 */ + 11185 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 766 8 first +.delay_slot + 11186 "00011000" // SEL.EQZ r19, r19, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11187 "00110010" // /* MW 3 */ + 11188 "11100110" // /* MW 2 */ + 11189 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11191 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11193 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 +.src_ref 10 "softfloat-macros" 50 38 first + 11194 "01100100" // SUB r3, r24, r17; MOV r0, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11195 "10000001" // /* MW 5 */ + 11196 "00100000" // /* MW 4 */ + 11197 "00110000" // /* MW 3 */ + 11198 "11100010" // /* MW 2 */ + 11199 "11000000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 48 + 11200 "10011000" // AND r18, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11201 "00100100" // /* MW 3 */ + 11202 "11100101" // /* MW 2 */ + 11203 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 32 + 11204 "10011000" // LSHL r18, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11205 "00101101" // /* MW 3 */ + 11206 "11100101" // /* MW 2 */ + 11207 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 first + 11208 "10011000" // LT r27, r17, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11209 "00001010" // /* MW 3 */ + 11210 "01110110" // /* MW 2 */ + 11211 "00010100" // /* MW 1 */ + 11212 "10000100" // J #11344 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11344 delay_slots=5 */ + 11213 "00000000" // /* MW 5 */ + 11214 "00000000" // /* MW 4 */ + 11215 "00101000" // /* MW 3 */ + 11216 "00010110" // /* MW 2 */ + 11217 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 15 first +.delay_slot + 11218 "10011000" // LSHL r3, r19, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11219 "00111101" // /* MW 3 */ + 11220 "11000110" // /* MW 2 */ + 11221 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 57 +.delay_slot + 11222 "00011000" // NEZ r18, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11223 "11110000" // /* MW 3 */ + 11224 "10100100" // /* MW 2 */ + 11225 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 53 16 first +.delay_slot + 11226 "00011000" // NEZ r17, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11227 "11110000" // /* MW 3 */ + 11228 "11100010" // /* MW 2 */ + 11229 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 25 first +.delay_slot + 11230 "10011000" // OR r18, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11231 "00100101" // /* MW 3 */ + 11232 "11100101" // /* MW 2 */ + 11233 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 9 first +.delay_slot + 11234 "01111110" // NOPA; NOPB; NOPS; SEL.EQZ r19, r17, r18, r27; NOPM /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 11235 "01100000" // /* MW 13 */ + 11236 "00101011" // /* MW 12 */ + 11237 "00000000" // /* MW 11 */ + 11238 "10101111" // /* MW 10 */ + 11239 "00110100" // /* MW 9 */ + 11240 "00000000" // /* MW 8 */ + 11241 "00100010" // /* MW 7 */ + 11242 "01100111" // /* MW 6 */ + 11243 "00100100" // /* MW 5 */ + 11244 "00000000" // /* MW 4 */ + 11245 "11110000" // /* MW 3 */ + 11246 "00101100" // /* MW 2 */ + 11247 "00000000" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_208 +.src_ref 10 "softfloat.c" 748 8 first + 11248 "10000100" // JNZ r0, #11504 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11504 delay_slots=5 */ + 11249 "00000001" // /* MW 5 */ + 11250 "01000000" // /* MW 4 */ + 11251 "01111000" // /* MW 3 */ + 11252 "00010110" // /* MW 2 */ + 11253 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 756 17 +.src_ref 10 "softfloat.c" 785 9 +.delay_slot + 11254 "01000100" // MOVXM r20, #536870912 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11255 "00000000" // /* MW 5 */ + 11256 "00100000" // /* MW 4 */ + 11257 "00001010" // /* MW 3 */ + 11258 "00000000" // /* MW 2 */ + 11259 "00100000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11261 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11263 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11265 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11267 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 793 11 + 11268 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11269 "10100000" // /* MW 3 */ + 11270 "01010001" // /* MW 2 */ + 11271 "00011000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 753 12 first +.src_ref 10 "softfloat.c" 787 4 + 11272 "11100100" // ADD r3, r17, #-1; MOV r2, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11273 "01000001" // /* MW 5 */ + 11274 "00111011" // /* MW 4 */ + 11275 "11100001" // /* MW 3 */ + 11276 "11111111" // /* MW 2 */ + 11277 "10001000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 752 8 +.src_ref 10 "softfloat.c" 752 18 + 11278 "11111000" // MOV r27, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11279 "10100000" // /* MW 3 */ + 11280 "11011100" // /* MW 2 */ + 11281 "00011110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 752 8 first +.src_ref 10 "softfloat.c" 752 18 first + 11282 "00011000" // SEL.EQZ r17, r3, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11283 "00010010" // /* MW 3 */ + 11284 "11100011" // /* MW 2 */ + 11285 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 46 4 first +.src_ref 10 "softfloat-macros" 46 15 first + 11286 "10000100" // JZ r17, #11344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11344 delay_slots=5 */ + 11287 "00000001" // /* MW 5 */ + 11288 "00000000" // /* MW 4 */ + 11289 "00101000" // /* MW 3 */ + 11290 "00010110" // /* MW 2 */ + 11291 "10001000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 756 17 first +.delay_slot + 11292 "10011000" // OR r0, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11293 "00000101" // /* MW 3 */ + 11294 "00000001" // /* MW 2 */ + 11295 "00010101" // /* MW 1 */ +.src_ref 10 "softfloat.c" 752 8 first +.src_ref 10 "softfloat.c" 752 18 first +.delay_slot + 11296 "00011000" // SEL.EQZ r16, r16, r0, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11297 "00000010" // /* MW 3 */ + 11298 "00100000" // /* MW 2 */ + 11299 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11300 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11301 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11303 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11305 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 +.src_ref 10 "softfloat-macros" 50 38 first + 11306 "01100100" // SUB r3, r24, r17; MOV r0, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11307 "10000001" // /* MW 5 */ + 11308 "00100000" // /* MW 4 */ + 11309 "00110000" // /* MW 3 */ + 11310 "11100010" // /* MW 2 */ + 11311 "11000000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 48 + 11312 "10011000" // AND r18, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11313 "00100100" // /* MW 3 */ + 11314 "11100101" // /* MW 2 */ + 11315 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 32 + 11316 "10011000" // LSHL r18, r16, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11317 "00101101" // /* MW 3 */ + 11318 "00100101" // /* MW 2 */ + 11319 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 15 + 11320 "10011000" // LSHL r3, r16, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11321 "00111101" // /* MW 3 */ + 11322 "00000110" // /* MW 2 */ + 11323 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 first + 11324 "10011000" // LT r27, r17, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11325 "00001010" // /* MW 3 */ + 11326 "01110110" // /* MW 2 */ + 11327 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 57 first + 11328 "00011000" // NEZ r18, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11329 "11110000" // /* MW 3 */ + 11330 "10100100" // /* MW 2 */ + 11331 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 53 16 first + 11332 "00011000" // NEZ r16, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11333 "11110000" // /* MW 3 */ + 11334 "00100000" // /* MW 2 */ + 11335 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 25 first + 11336 "10011000" // OR r17, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11337 "00100101" // /* MW 3 */ + 11338 "11100011" // /* MW 2 */ + 11339 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 9 first + 11340 "00011000" // SEL.EQZ r16, r16, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11341 "00010010" // /* MW 3 */ + 11342 "00100001" // /* MW 2 */ + 11343 "00010100" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_304 +.src_ref 10 "softfloat.c" 785 9 first +.src_ref 10 "softfloat.c" 786 26 +.src_ref 10 "softfloat.c" 787 4 first + 11344 "10111010" // MOVA r18, #1; OR r19, r19, r20; ADD.NC r17, r2, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11345 "11001000" // /* MW 9 */ + 11346 "10111111" // /* MW 8 */ + 11347 "00101000" // /* MW 7 */ + 11348 "00101110" // /* MW 6 */ + 11349 "00111010" // /* MW 5 */ + 11350 "00100111" // /* MW 4 */ + 11351 "00000000" // /* MW 3 */ + 11352 "00110010" // /* MW 2 */ + 11353 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 786 18 first +.src_ref 10 "softfloat.c" 790 8 first + 11354 "00100100" // ADD r19, r19, r16; ADD.NC r16, r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11355 "00000001" // /* MW 5 */ + 11356 "00110001" // /* MW 4 */ + 11357 "00011000" // /* MW 3 */ + 11358 "11100000" // /* MW 2 */ + 11359 "10011100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 786 26 + 11360 "10011000" // LSHL r18, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11361 "00101101" // /* MW 3 */ + 11362 "11100101" // /* MW 2 */ + 11363 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 788 24 first + 11364 "10011000" // LT r27, r18, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11365 "10001010" // /* MW 3 */ + 11366 "10110111" // /* MW 2 */ + 11367 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 788 4 + 11368 "00011000" // SEL.EQZ r2, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11369 "00000010" // /* MW 3 */ + 11370 "01000101" // /* MW 2 */ + 11371 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 788 4 + 11372 "00011000" // SEL.EQZ r3, r18, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11373 "00110010" // /* MW 3 */ + 11374 "10000111" // /* MW 2 */ + 11375 "00010100" // /* MW 1 */ +.label __ll1__ZL14addFloat32Sigsjji +.src_ref 10 "softfloat.c" 793 11 first +.tail_call + 11376 "10000100" // J #10480 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10480 delay_slots=5 */ + 11377 "00000000" // /* MW 5 */ + 11378 "00000000" // /* MW 4 */ + 11379 "01111000" // /* MW 3 */ + 11380 "00010100" // /* MW 2 */ + 11381 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11383 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11385 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11387 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11389 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11391 "00000000" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_352 +.src_ref 10 "softfloat.c" 763 12 first +.return_address + 11392 "10000100" // JNZ r16, #11536 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11536 delay_slots=5 */ + 11393 "00000001" // /* MW 5 */ + 11394 "01000000" // /* MW 4 */ + 11395 "10001000" // /* MW 3 */ + 11396 "00010110" // /* MW 2 */ + 11397 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11399 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11401 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11403 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11405 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11407 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 764 12 first + 11408 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11409 "00000000" // /* MW 3 */ + 11410 "00101000" // /* MW 2 */ + 11411 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 38 +.delay_slot + 11412 "01000100" // MOVXM r16, #2139095040 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11413 "00000000" // /* MW 5 */ + 11414 "00100000" // /* MW 4 */ + 11415 "00001000" // /* MW 3 */ + 11416 "10000000" // /* MW 2 */ + 11417 "01111111" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 38 first +.delay_slot + 11418 "10011000" // ADD r0, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11419 "00000000" // /* MW 3 */ + 11420 "00000001" // /* MW 2 */ + 11421 "00010001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11423 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11425 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11426 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 11427 "00011100" // /* MW 13 */ + 11428 "00000000" // /* MW 12 */ + 11429 "00000000" // /* MW 11 */ + 11430 "01010111" // /* MW 10 */ + 11431 "00011010" // /* MW 9 */ + 11432 "01000000" // /* MW 8 */ + 11433 "00000000" // /* MW 7 */ + 11434 "00000000" // /* MW 6 */ + 11435 "10110110" // /* MW 5 */ + 11436 "00000010" // /* MW 4 */ + 11437 "11110000" // /* MW 3 */ + 11438 "00101100" // /* MW 2 */ + 11439 "00000000" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_400 +.src_ref 10 "softfloat.c" 776 8 first + 11440 "10000100" // JNZ r0, #11552 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11552 delay_slots=5 */ + 11441 "00000001" // /* MW 5 */ + 11442 "01000000" // /* MW 4 */ + 11443 "10010000" // /* MW 3 */ + 11444 "00010110" // /* MW 2 */ + 11445 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11447 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11449 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11451 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11453 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11455 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 780 8 first + 11456 "10000100" // JZ r27, #11600 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11600 delay_slots=5 */ + 11457 "00000001" // /* MW 5 */ + 11458 "00000000" // /* MW 4 */ + 11459 "10101000" // /* MW 3 */ + 11460 "00010110" // /* MW 2 */ + 11461 "11011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11463 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11465 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11467 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11469 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11471 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 793 11 + 11472 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11473 "10100000" // /* MW 3 */ + 11474 "01010001" // /* MW 2 */ + 11475 "00011000" // /* MW 1 */ + 11476 "10000100" // J #11376 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11376 delay_slots=5 */ + 11477 "00000000" // /* MW 5 */ + 11478 "00000000" // /* MW 4 */ + 11479 "00111000" // /* MW 3 */ + 11480 "00010110" // /* MW 2 */ + 11481 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 781 26 +.delay_slot + 11482 "01000100" // MOVXM r17, #1073741824 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11483 "00000000" // /* MW 5 */ + 11484 "10100000" // /* MW 4 */ + 11485 "00001000" // /* MW 3 */ + 11486 "00000000" // /* MW 2 */ + 11487 "01000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 781 26 first +.src_ref 10 "softfloat.c" 793 11 +.delay_slot + 11488 "11100100" // ADD r17, r19, r17; MOV r2, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11489 "01000001" // /* MW 5 */ + 11490 "00111011" // /* MW 4 */ + 11491 "00010001" // /* MW 3 */ + 11492 "01100010" // /* MW 2 */ + 11493 "10011100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 781 33 +.delay_slot + 11494 "10011000" // ADD r3, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11495 "00000000" // /* MW 3 */ + 11496 "01000111" // /* MW 2 */ + 11497 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11499 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11500 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11501 "01100111" // /* MW 3 */ + 11502 "00000001" // /* MW 2 */ + 11503 "00000000" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_464 +.src_ref 10 "softfloat.c" 749 12 first + 11504 "10000100" // JNZ r19, #11632 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11632 delay_slots=5 */ + 11505 "00000001" // /* MW 5 */ + 11506 "01000000" // /* MW 4 */ + 11507 "10111000" // /* MW 3 */ + 11508 "00010110" // /* MW 2 */ + 11509 "10011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11511 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11513 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11515 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11517 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11519 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 750 12 first + 11520 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11521 "00000000" // /* MW 3 */ + 11522 "00101000" // /* MW 2 */ + 11523 "00010000" // /* MW 1 */ +.delay_slot + 11524 "11111000" // MOV r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11525 "10100000" // /* MW 3 */ + 11526 "00010000" // /* MW 2 */ + 11527 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11529 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11531 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11533 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11535 "00000000" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_496 +.src_ref 10 "softfloat.c" 763 31 first +.tail_call + 11536 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */ + 11537 "00000000" // /* MW 5 */ + 11538 "00000000" // /* MW 4 */ + 11539 "01000000" // /* MW 3 */ + 11540 "00010100" // /* MW 2 */ + 11541 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11543 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11545 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11547 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11549 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11551 "00000000" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_512 +.src_ref 10 "softfloat.c" 777 22 first +.return_address + 11552 "10011000" // OR r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11553 "00000101" // /* MW 3 */ + 11554 "11100001" // /* MW 2 */ + 11555 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 777 12 + 11556 "10000100" // JNZ r16, #11648 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11648 delay_slots=5 */ + 11557 "00000001" // /* MW 5 */ + 11558 "01000000" // /* MW 4 */ + 11559 "11000000" // /* MW 3 */ + 11560 "00010110" // /* MW 2 */ + 11561 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11563 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11565 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11567 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11569 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11571 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 778 12 first + 11572 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11573 "00000000" // /* MW 3 */ + 11574 "00101000" // /* MW 2 */ + 11575 "00010000" // /* MW 1 */ +.delay_slot + 11576 "11111000" // MOV r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11577 "10100000" // /* MW 3 */ + 11578 "00010000" // /* MW 2 */ + 11579 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11581 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11583 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11585 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11586 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 11587 "00011100" // /* MW 13 */ + 11588 "00000000" // /* MW 12 */ + 11589 "00000000" // /* MW 11 */ + 11590 "01010111" // /* MW 10 */ + 11591 "00011010" // /* MW 9 */ + 11592 "01000000" // /* MW 8 */ + 11593 "00000000" // /* MW 7 */ + 11594 "00000000" // /* MW 6 */ + 11595 "10110110" // /* MW 5 */ + 11596 "00000010" // /* MW 4 */ + 11597 "11110000" // /* MW 3 */ + 11598 "00101100" // /* MW 2 */ + 11599 "00000000" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_560 +.src_ref 10 "softfloat.c" 780 25 first +.src_ref 10 "softfloat.c" 780 62 first + 11600 "10100100" // RET lr; ADD.NC r16, r19, r16 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 11601 "10000010" // /* MW 5 */ + 11602 "00110011" // /* MW 4 */ + 11603 "00001000" // /* MW 3 */ + 11604 "00000000" // /* MW 2 */ + 11605 "00000101" // /* MW 1 */ +.src_ref 10 "softfloat.c" 780 70 +.delay_slot + 11606 "00011000" // MOVX r17, #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11607 "11101001" // /* MW 3 */ + 11608 "11100010" // /* MW 2 */ + 11609 "00010111" // /* MW 1 */ +.src_ref 10 "softfloat.c" 780 70 +.delay_slot + 11610 "10011000" // LSHL r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11611 "00011101" // /* MW 3 */ + 11612 "00100001" // /* MW 2 */ + 11613 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 66 first +.delay_slot + 11614 "10011000" // ADD r0, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11615 "00000000" // /* MW 3 */ + 11616 "00000001" // /* MW 2 */ + 11617 "00010001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11619 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11620 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 11621 "10000001" // /* MW 11 */ + 11622 "10101101" // /* MW 10 */ + 11623 "00000000" // /* MW 9 */ + 11624 "00000000" // /* MW 8 */ + 11625 "00000000" // /* MW 7 */ + 11626 "00000000" // /* MW 6 */ + 11627 "00100000" // /* MW 5 */ + 11628 "00000000" // /* MW 4 */ + 11629 "11110000" // /* MW 3 */ + 11630 "00101100" // /* MW 2 */ + 11631 "00000000" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_592 +.src_ref 10 "softfloat.c" 749 31 first +.tail_call + 11632 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */ + 11633 "00000000" // /* MW 5 */ + 11634 "00000000" // /* MW 4 */ + 11635 "01000000" // /* MW 3 */ + 11636 "00010100" // /* MW 2 */ + 11637 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11639 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11641 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11643 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11645 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11647 "00000000" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_608 +.src_ref 10 "softfloat.c" 777 38 first +.tail_call +.return_address + 11648 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */ + 11649 "00000000" // /* MW 5 */ + 11650 "00000000" // /* MW 4 */ + 11651 "01000000" // /* MW 3 */ + 11652 "00010100" // /* MW 2 */ + 11653 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11655 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11657 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11659 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11661 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL14addFloat32Sigsjji__end + 11663 "00000000" // /* MW 1 */ +.label _ZL14subFloat32Sigsjji +.function subFloat32Sigs _ZL14subFloat32Sigsjji +.src_ref 10 "softfloat.c" 70 13 +.src_ref 10 "softfloat.c" 81 14 +.src_ref 10 "softfloat.c" 805 first +.function_start + 11664 "10111010" // MOVA r17, #-23; MOVXM r16, #8388607 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11665 "10010000" // /* MW 9 */ + 11666 "11111111" // /* MW 8 */ + 11667 "00001111" // /* MW 7 */ + 11668 "11111110" // /* MW 6 */ + 11669 "00011111" // /* MW 5 */ + 11670 "00000000" // /* MW 4 */ + 11671 "00000000" // /* MW 3 */ + 11672 "00110001" // /* MW 2 */ + 11673 "11111101" // /* MW 1 */ +.src_ref 10 "softfloat.c" 81 14 first + 11674 "10011000" // LSHL r4, r2, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11675 "00011101" // /* MW 3 */ + 11676 "10001001" // /* MW 2 */ + 11677 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 81 14 + 11678 "10011000" // LSHL r18, r1, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11679 "00011101" // /* MW 3 */ + 11680 "01100101" // /* MW 2 */ + 11681 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 70 13 first + 11682 "10011000" // AND r20, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11683 "00000100" // /* MW 3 */ + 11684 "01101001" // /* MW 2 */ + 11685 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 81 21 first + 11686 "00011000" // EXTEND.u8 r25, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11687 "10010000" // /* MW 3 */ + 11688 "00110010" // /* MW 2 */ + 11689 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 81 21 + 11690 "00011000" // EXTEND.u8 r27, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11691 "10010000" // /* MW 3 */ + 11692 "10110110" // /* MW 2 */ + 11693 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 70 13 first +.src_ref 10 "softfloat.c" 816 9 +.src_ref 10 "softfloat.c" 817 9 + 11694 "01100100" // AND r16, r2, r16; MOV r19, #7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11695 "00011101" // /* MW 5 */ + 11696 "10100000" // /* MW 4 */ + 11697 "10011001" // /* MW 3 */ + 11698 "00100000" // /* MW 2 */ + 11699 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 816 9 first + 11700 "10011000" // LSHL r17, r20, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11701 "00111101" // /* MW 3 */ + 11702 "00100011" // /* MW 2 */ + 11703 "00010101" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 38 +.src_ref 10 "softfloat.c" 815 19 first +.src_ref 10 "softfloat.c" 818 11 +.src_ref 10 "softfloat.c" 819 17 +.src_ref 10 "softfloat.c" 843 31 + 11704 "01100100" // SUB r18, r27, r25; MOV r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11705 "00000001" // /* MW 5 */ + 11706 "00100000" // /* MW 4 */ + 11707 "00111100" // /* MW 3 */ + 11708 "10110010" // /* MW 2 */ + 11709 "11011100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 818 11 first + 11710 "10011000" // LT r5, r24, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11711 "00101010" // /* MW 3 */ + 11712 "00001011" // /* MW 2 */ + 11713 "00010110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 818 4 + 11714 "10000100" // JNZ r5, #11904 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11904 delay_slots=5 */ + 11715 "00000001" // /* MW 5 */ + 11716 "01000000" // /* MW 4 */ + 11717 "01000000" // /* MW 3 */ + 11718 "00010111" // /* MW 2 */ + 11719 "00101000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 817 9 first +.delay_slot + 11720 "10011000" // LSHL r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11721 "00111101" // /* MW 3 */ + 11722 "00100001" // /* MW 2 */ + 11723 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 833 14 +.src_ref 10 "softfloat.c" 851 14 +.src_ref 10 "softfloat.c" 859 13 +.src_ref 10 "softfloat.c" 862 9 +.delay_slot + 11724 "10111010" // MOVA r0, #255; MOVXM r4, #1073741824 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11725 "00010000" // /* MW 9 */ + 11726 "00000000" // /* MW 8 */ + 11727 "10001000" // /* MW 7 */ + 11728 "00000000" // /* MW 6 */ + 11729 "00000000" // /* MW 5 */ + 11730 "00010000" // /* MW 4 */ + 11731 "00000000" // /* MW 3 */ + 11732 "11100000" // /* MW 2 */ + 11733 "00011111" // /* MW 1 */ +.src_ref 10 "softfloat.c" 851 14 first +.delay_slot + 11734 "10011000" // EQ r20, r27, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11735 "00000111" // /* MW 3 */ + 11736 "11101000" // /* MW 2 */ + 11737 "00010110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 862 9 first +.delay_slot + 11738 "10011000" // OR r19, r17, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11739 "01000101" // /* MW 3 */ + 11740 "01100110" // /* MW 2 */ + 11741 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 859 13 first +.delay_slot + 11742 "10011000" // OR r4, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11743 "00000101" // /* MW 3 */ + 11744 "00001001" // /* MW 2 */ + 11745 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 819 17 first + 11746 "10011000" // GE r6, r18, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11747 "10001001" // /* MW 3 */ + 11748 "10001101" // /* MW 2 */ + 11749 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 819 4 + 11750 "10000100" // JNZ r6, #12064 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12064 delay_slots=5 */ + 11751 "00000001" // /* MW 5 */ + 11752 "01000000" // /* MW 4 */ + 11753 "10010000" // /* MW 3 */ + 11754 "00010111" // /* MW 2 */ + 11755 "00110000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 825 4 +.src_ref 10 "softfloat.c" 835 34 +.delay_slot + 11756 "00011000" // MOVX r5, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11757 "00000101" // /* MW 3 */ + 11758 "00001010" // /* MW 2 */ + 11759 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 835 34 first +.delay_slot + 11760 "10011000" // XOR r7, r3, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11761 "01010110" // /* MW 3 */ + 11762 "11001110" // /* MW 2 */ + 11763 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11765 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11767 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11769 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 833 14 first + 11770 "10011000" // EQ r20, r25, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11771 "00000111" // /* MW 3 */ + 11772 "01101000" // /* MW 2 */ + 11773 "00010110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 833 4 + 11774 "10000100" // JNZ r20, #12176 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12176 delay_slots=5 */ + 11775 "00000001" // /* MW 5 */ + 11776 "01000000" // /* MW 4 */ + 11777 "11001000" // /* MW 3 */ + 11778 "00010111" // /* MW 2 */ + 11779 "10100000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11781 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11783 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11785 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11787 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11789 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 868 11 + 11790 "11111000" // MOV r1, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11791 "10100000" // /* MW 3 */ + 11792 "01010011" // /* MW 2 */ + 11793 "00011000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 838 8 first + 11794 "00011000" // ADD r16, r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11795 "00000111" // /* MW 3 */ + 11796 "10100000" // /* MW 2 */ + 11797 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 837 4 first + 11798 "00011000" // SEL.EQZ r16, r16, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11799 "00100010" // /* MW 3 */ + 11800 "00100001" // /* MW 2 */ + 11801 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 843 31 first + 11802 "10011000" // SUB r16, r24, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11803 "00000001" // /* MW 3 */ + 11804 "00100001" // /* MW 2 */ + 11805 "00010110" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 46 4 first +.src_ref 10 "softfloat-macros" 46 15 first + 11806 "10000100" // JZ r16, #11872 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11872 delay_slots=5 */ + 11807 "00000001" // /* MW 5 */ + 11808 "00000000" // /* MW 4 */ + 11809 "00110000" // /* MW 3 */ + 11810 "00010111" // /* MW 2 */ + 11811 "10000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 837 4 first +.delay_slot + 11812 "00011000" // SEL.EQZ r17, r17, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11813 "00110010" // /* MW 3 */ + 11814 "01100011" // /* MW 2 */ + 11815 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11817 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11819 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11821 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11823 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 +.src_ref 10 "softfloat-macros" 50 38 first +.src_ref 10 "softfloat-macros" 50 48 + 11824 "10111010" // MOVA r20, #32; SUB r3, r24, r16; MOV r18, #31 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11825 "01011000" // /* MW 9 */ + 11826 "00011111" // /* MW 8 */ + 11827 "01001000" // /* MW 7 */ + 11828 "00001110" // /* MW 6 */ + 11829 "00111000" // /* MW 5 */ + 11830 "00110000" // /* MW 4 */ + 11831 "00000000" // /* MW 3 */ + 11832 "00010100" // /* MW 2 */ + 11833 "00000100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 48 + 11834 "10011000" // AND r18, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11835 "00100100" // /* MW 3 */ + 11836 "11100101" // /* MW 2 */ + 11837 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 32 + 11838 "10011000" // LSHL r18, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11839 "00101101" // /* MW 3 */ + 11840 "01100101" // /* MW 2 */ + 11841 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 53 16 first + 11842 "00011000" // NEZ r19, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11843 "11110000" // /* MW 3 */ + 11844 "01100110" // /* MW 2 */ + 11845 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 first + 11846 "10011000" // LT r27, r16, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11847 "01001010" // /* MW 3 */ + 11848 "00110111" // /* MW 2 */ + 11849 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 15 first + 11850 "10011000" // LSHL r17, r17, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11851 "00111101" // /* MW 3 */ + 11852 "01100010" // /* MW 2 */ + 11853 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 57 + 11854 "00011000" // NEZ r18, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11855 "11110000" // /* MW 3 */ + 11856 "10100100" // /* MW 2 */ + 11857 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 25 + 11858 "10011000" // OR r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11859 "00100101" // /* MW 3 */ + 11860 "01100001" // /* MW 2 */ + 11861 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 9 first + 11862 "01111010" // NOPA; NOPS; SEL.EQZ r17, r19, r16, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11863 "00000010" // /* MW 9 */ + 11864 "11100011" // /* MW 8 */ + 11865 "00000100" // /* MW 7 */ + 11866 "00000000" // /* MW 6 */ + 11867 "01011011" // /* MW 5 */ + 11868 "00000001" // /* MW 4 */ + 11869 "11110000" // /* MW 3 */ + 11870 "00101100" // /* MW 2 */ + 11871 "00000000" // /* MW 1 */ +.label __ll2__ZL14subFloat32Sigsjji + 11872 "10000100" // J #12032 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=12032 delay_slots=5 */ + 11873 "00000000" // /* MW 5 */ + 11874 "00000000" // /* MW 4 */ + 11875 "10000000" // /* MW 3 */ + 11876 "00010111" // /* MW 2 */ + 11877 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 846 16 first +.delay_slot + 11878 "10011000" // SUB r3, r4, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11879 "00010001" // /* MW 3 */ + 11880 "00000111" // /* MW 2 */ + 11881 "00010001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11883 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11885 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11887 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11888 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11889 "00000000" // /* MW 15 */ + 11890 "00000000" // /* MW 14 */ + 11891 "01111000" // /* MW 13 */ + 11892 "10100101" // /* MW 12 */ + 11893 "00000001" // /* MW 11 */ + 11894 "00000000" // /* MW 10 */ + 11895 "00000000" // /* MW 9 */ + 11896 "00000000" // /* MW 8 */ + 11897 "01011011" // /* MW 7 */ + 11898 "00000001" // /* MW 6 */ + 11899 "00100000" // /* MW 5 */ + 11900 "00000000" // /* MW 4 */ + 11901 "11110000" // /* MW 3 */ + 11902 "00101100" // /* MW 2 */ + 11903 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_240 +.src_ref 10 "softfloat.c" 851 4 first + 11904 "10000100" // JNZ r20, #12224 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12224 delay_slots=5 */ + 11905 "00000001" // /* MW 5 */ + 11906 "01000000" // /* MW 4 */ + 11907 "11100000" // /* MW 3 */ + 11908 "00010111" // /* MW 2 */ + 11909 "10100000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11911 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11913 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11915 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11917 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11919 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 867 4 + 11920 "11111000" // MOV r0, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11921 "10100000" // /* MW 3 */ + 11922 "00011101" // /* MW 2 */ + 11923 "00011000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 868 11 + 11924 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11925 "10100000" // /* MW 3 */ + 11926 "01010001" // /* MW 2 */ + 11927 "00011000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 855 4 +.src_ref 10 "softfloat.c" 855 14 + 11928 "11111000" // MOV r27, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11929 "10100000" // /* MW 3 */ + 11930 "11011100" // /* MW 2 */ + 11931 "00011110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 867 4 + 11932 "11111000" // MOV r25, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11933 "00100000" // /* MW 3 */ + 11934 "01010000" // /* MW 2 */ + 11935 "00011110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 856 8 first + 11936 "00011000" // ADD r17, r18, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11937 "11111111" // /* MW 3 */ + 11938 "10100011" // /* MW 2 */ + 11939 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 855 4 first +.src_ref 10 "softfloat.c" 855 14 first + 11940 "00011000" // SEL.EQZ r17, r17, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11941 "00100010" // /* MW 3 */ + 11942 "01100011" // /* MW 2 */ + 11943 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 46 4 first +.src_ref 10 "softfloat-macros" 46 15 first + 11944 "10000100" // JZ r17, #12016 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12016 delay_slots=5 */ + 11945 "00000001" // /* MW 5 */ + 11946 "00000000" // /* MW 4 */ + 11947 "01111000" // /* MW 3 */ + 11948 "00010111" // /* MW 2 */ + 11949 "10001000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 855 4 first +.src_ref 10 "softfloat.c" 855 14 first +.delay_slot + 11950 "00011000" // SEL.EQZ r16, r16, r4, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11951 "01000010" // /* MW 3 */ + 11952 "00100000" // /* MW 2 */ + 11953 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11955 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11957 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11959 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11961 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 +.src_ref 10 "softfloat-macros" 50 38 first +.src_ref 10 "softfloat-macros" 50 48 + 11962 "10111010" // MOVA r3, #32; SUB r18, r24, r17; MOV r20, #31 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11963 "01011000" // /* MW 9 */ + 11964 "00011111" // /* MW 8 */ + 11965 "10001000" // /* MW 7 */ + 11966 "10001110" // /* MW 6 */ + 11967 "00101000" // /* MW 5 */ + 11968 "00110001" // /* MW 4 */ + 11969 "00000000" // /* MW 3 */ + 11970 "00000011" // /* MW 2 */ + 11971 "00000100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 48 + 11972 "10011000" // AND r20, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11973 "01000100" // /* MW 3 */ + 11974 "10101001" // /* MW 2 */ + 11975 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 32 + 11976 "10011000" // LSHL r20, r16, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11977 "01001101" // /* MW 3 */ + 11978 "00101001" // /* MW 2 */ + 11979 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 15 + 11980 "10011000" // LSHL r18, r16, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11981 "00101101" // /* MW 3 */ + 11982 "00100101" // /* MW 2 */ + 11983 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 first + 11984 "10011000" // LT r27, r17, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11985 "00111010" // /* MW 3 */ + 11986 "01110110" // /* MW 2 */ + 11987 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 57 first + 11988 "00011000" // NEZ r20, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11989 "11110000" // /* MW 3 */ + 11990 "00101000" // /* MW 2 */ + 11991 "00010101" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 53 16 first + 11992 "00011000" // NEZ r16, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11993 "11110000" // /* MW 3 */ + 11994 "00100000" // /* MW 2 */ + 11995 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 25 first + 11996 "10011000" // OR r17, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11997 "01000101" // /* MW 3 */ + 11998 "10100011" // /* MW 2 */ + 11999 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 9 first + 12000 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r16, r16, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12001 "00000000" // /* MW 15 */ + 12002 "00000000" // /* MW 14 */ + 12003 "01111000" // /* MW 13 */ + 12004 "10100101" // /* MW 12 */ + 12005 "00000001" // /* MW 11 */ + 12006 "10010000" // /* MW 10 */ + 12007 "00001000" // /* MW 9 */ + 12008 "00100001" // /* MW 8 */ + 12009 "01011011" // /* MW 7 */ + 12010 "00000001" // /* MW 6 */ + 12011 "00100000" // /* MW 5 */ + 12012 "00000000" // /* MW 4 */ + 12013 "11110000" // /* MW 3 */ + 12014 "00101100" // /* MW 2 */ + 12015 "00000000" // /* MW 1 */ +.label __ll1__ZL14subFloat32Sigsjji +.src_ref 10 "softfloat.c" 864 16 first + 12016 "11100001" // NOPA; NOPB; NOPS; SUB r3, r19, r16; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12017 "00000000" // /* MW 15 */ + 12018 "00000000" // /* MW 14 */ + 12019 "01111000" // /* MW 13 */ + 12020 "10100101" // /* MW 12 */ + 12021 "00000001" // /* MW 11 */ + 12022 "00001100" // /* MW 10 */ + 12023 "00111000" // /* MW 9 */ + 12024 "00100110" // /* MW 8 */ + 12025 "01011011" // /* MW 7 */ + 12026 "00000001" // /* MW 6 */ + 12027 "00100000" // /* MW 5 */ + 12028 "00000000" // /* MW 4 */ + 12029 "11110000" // /* MW 3 */ + 12030 "00101100" // /* MW 2 */ + 12031 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_368 +.src_ref 10 "softfloat.c" 868 11 first +.tail_call + 12032 "10000100" // J #10880 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10880 delay_slots=5 */ + 12033 "00000000" // /* MW 5 */ + 12034 "00000000" // /* MW 4 */ + 12035 "01000000" // /* MW 3 */ + 12036 "00010101" // /* MW 2 */ + 12037 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 867 4 first +.delay_slot + 12038 "00011000" // ADD r2, r25, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12039 "11111111" // /* MW 3 */ + 12040 "01000101" // /* MW 2 */ + 12041 "00010110" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12043 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12045 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12047 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12048 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12049 "00000000" // /* MW 15 */ + 12050 "00000000" // /* MW 14 */ + 12051 "01111000" // /* MW 13 */ + 12052 "10100101" // /* MW 12 */ + 12053 "00000001" // /* MW 11 */ + 12054 "00000000" // /* MW 10 */ + 12055 "00000000" // /* MW 9 */ + 12056 "00000000" // /* MW 8 */ + 12057 "01011011" // /* MW 7 */ + 12058 "00000001" // /* MW 6 */ + 12059 "00100000" // /* MW 5 */ + 12060 "00000000" // /* MW 4 */ + 12061 "11110000" // /* MW 3 */ + 12062 "00101100" // /* MW 2 */ + 12063 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_400 +.src_ref 10 "softfloat.c" 820 4 first +.return_address + 12064 "10000100" // JNZ r20, #12256 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12256 delay_slots=5 */ + 12065 "00000001" // /* MW 5 */ + 12066 "01000000" // /* MW 4 */ + 12067 "11110000" // /* MW 3 */ + 12068 "00010111" // /* MW 2 */ + 12069 "10100000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12071 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12073 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12075 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12077 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12078 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12079 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 829 14 first + 12080 "10011000" // LTU r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12081 "00011100" // /* MW 3 */ + 12082 "00100111" // /* MW 2 */ + 12083 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 829 4 + 12084 "10000100" // JNZ r19, #12304 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12304 delay_slots=5 */ + 12085 "00000001" // /* MW 5 */ + 12086 "01000000" // /* MW 4 */ + 12087 "00001000" // /* MW 3 */ + 12088 "00011000" // /* MW 2 */ + 12089 "10011000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 825 4 first +.delay_slot + 12090 "00011000" // SEL.EQZ r24, r5, r25, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12091 "10010010" // /* MW 3 */ + 12092 "01110001" // /* MW 2 */ + 12093 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 825 4 +.delay_slot + 12094 "11111000" // MOV r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12095 "10100000" // /* MW 3 */ + 12096 "10011101" // /* MW 2 */ + 12097 "00011100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 825 4 +.delay_slot + 12098 "00011000" // SEL.EQZ r25, r5, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12099 "00100010" // /* MW 3 */ + 12100 "01110011" // /* MW 2 */ + 12101 "00010001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12103 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12105 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 830 14 first + 12106 "10011000" // LTU r18, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12107 "00001100" // /* MW 3 */ + 12108 "01100101" // /* MW 2 */ + 12109 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 830 4 + 12110 "10000100" // JNZ r18, #12336 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12336 delay_slots=5 */ + 12111 "00000001" // /* MW 5 */ + 12112 "01000000" // /* MW 4 */ + 12113 "00011000" // /* MW 3 */ + 12114 "00011000" // /* MW 2 */ + 12115 "10010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12117 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12119 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12121 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12123 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12125 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 31 + 12126 "00011000" // MOVX r16, #31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12127 "01111101" // /* MW 3 */ + 12128 "00100000" // /* MW 2 */ + 12129 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 831 24 + 12130 "01000100" // MOVXM p0, #509172 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12131 "11101000" // /* MW 5 */ + 12132 "11001001" // /* MW 4 */ + 12133 "11000000" // /* MW 3 */ + 12134 "00000111" // /* MW 2 */ + 12135 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 831 24 first + 12136 "10011000" // LDA r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12137 "01010110" // /* MW 3 */ + 12138 "00000110" // /* MW 2 */ + 12139 "00000000" // /* MW 1 */ +.swstall __RAW__R_1948 + 12140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12141 "00000000" // /* MW 1 */ +.swstall __RAW__R_1948 + 12142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12143 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 831 4 + 12144 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12145 "00000000" // /* MW 3 */ + 12146 "00101000" // /* MW 2 */ + 12147 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 831 44 +.delay_slot + 12148 "00011000" // MOVX r17, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12149 "00001101" // /* MW 3 */ + 12150 "00100010" // /* MW 2 */ + 12151 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12152 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12153 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12155 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 831 44 +.delay_slot + 12156 "10011000" // EQ r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12157 "00100111" // /* MW 3 */ + 12158 "01100011" // /* MW 2 */ + 12159 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 31 first +.delay_slot + 12160 "11100001" // NOPA; NOPB; NOPS; LSHL r0, r17, r16; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12161 "00000000" // /* MW 15 */ + 12162 "00000000" // /* MW 14 */ + 12163 "01111000" // /* MW 13 */ + 12164 "10100101" // /* MW 12 */ + 12165 "00000001" // /* MW 11 */ + 12166 "01101100" // /* MW 10 */ + 12167 "00001000" // /* MW 9 */ + 12168 "00100010" // /* MW 8 */ + 12169 "01011011" // /* MW 7 */ + 12170 "00000001" // /* MW 6 */ + 12171 "00100000" // /* MW 5 */ + 12172 "00000000" // /* MW 4 */ + 12173 "11110000" // /* MW 3 */ + 12174 "00101100" // /* MW 2 */ + 12175 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_512 +.src_ref 10 "softfloat.c" 834 8 first + 12176 "10000100" // JNZ r16, #12368 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12368 delay_slots=5 */ + 12177 "00000001" // /* MW 5 */ + 12178 "01000000" // /* MW 4 */ + 12179 "00101000" // /* MW 3 */ + 12180 "00011000" // /* MW 2 */ + 12181 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12183 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12185 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12187 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12189 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12191 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 31 + 12192 "00011000" // MOVX r16, #31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12193 "01111101" // /* MW 3 */ + 12194 "00100000" // /* MW 2 */ + 12195 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 835 8 first + 12196 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12197 "00000000" // /* MW 3 */ + 12198 "00101000" // /* MW 2 */ + 12199 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 31 first +.delay_slot + 12200 "10011000" // LSHL r16, r7, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12201 "00001101" // /* MW 3 */ + 12202 "11100001" // /* MW 2 */ + 12203 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 38 +.delay_slot + 12204 "01000100" // MOVXM r17, #2139095040 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12205 "00000000" // /* MW 5 */ + 12206 "10100000" // /* MW 4 */ + 12207 "00001000" // /* MW 3 */ + 12208 "10000000" // /* MW 2 */ + 12209 "01111111" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 38 +.delay_slot + 12210 "10011000" // ADD r0, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12211 "00000000" // /* MW 3 */ + 12212 "01000001" // /* MW 2 */ + 12213 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12215 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12216 "00100010" // NOPA; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12217 "00011100" // /* MW 7 */ + 12218 "00000000" // /* MW 6 */ + 12219 "00000000" // /* MW 5 */ + 12220 "00000100" // /* MW 4 */ + 12221 "11110000" // /* MW 3 */ + 12222 "00101100" // /* MW 2 */ + 12223 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_560 +.src_ref 10 "softfloat.c" 852 8 first + 12224 "10000100" // JNZ r17, #12384 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12384 delay_slots=5 */ + 12225 "00000001" // /* MW 5 */ + 12226 "01000000" // /* MW 4 */ + 12227 "00110000" // /* MW 3 */ + 12228 "00011000" // /* MW 2 */ + 12229 "10001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12231 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12233 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12234 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12235 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12236 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12237 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12238 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12239 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 853 8 first + 12240 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12241 "00000000" // /* MW 3 */ + 12242 "00101000" // /* MW 2 */ + 12243 "00010000" // /* MW 1 */ +.delay_slot + 12244 "11111000" // MOV r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12245 "10100000" // /* MW 3 */ + 12246 "00010000" // /* MW 2 */ + 12247 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12248 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12249 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12251 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12253 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12255 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_592 +.src_ref 10 "softfloat.c" 821 18 first + 12256 "10011000" // OR r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12257 "00000101" // /* MW 3 */ + 12258 "01100001" // /* MW 2 */ + 12259 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 821 8 + 12260 "10000100" // JNZ r16, #12400 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12400 delay_slots=5 */ + 12261 "00000001" // /* MW 5 */ + 12262 "01000000" // /* MW 4 */ + 12263 "00111000" // /* MW 3 */ + 12264 "00011000" // /* MW 2 */ + 12265 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12267 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12269 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12271 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12273 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12275 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 823 8 first + 12276 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12277 "00000000" // /* MW 3 */ + 12278 "00101000" // /* MW 2 */ + 12279 "00010000" // /* MW 1 */ +.delay_slot + 12280 "01000100" // MOVXM r0, #2147483647 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12281 "11111110" // /* MW 5 */ + 12282 "00111111" // /* MW 4 */ + 12283 "11110000" // /* MW 3 */ + 12284 "11111111" // /* MW 2 */ + 12285 "01111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12287 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12289 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12291 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12292 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12293 "10000001" // /* MW 11 */ + 12294 "10101101" // /* MW 10 */ + 12295 "00000000" // /* MW 9 */ + 12296 "00000000" // /* MW 8 */ + 12297 "00000000" // /* MW 7 */ + 12298 "00000000" // /* MW 6 */ + 12299 "00100000" // /* MW 5 */ + 12300 "00000000" // /* MW 4 */ + 12301 "11110000" // /* MW 3 */ + 12302 "00101100" // /* MW 2 */ + 12303 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_640 + 12304 "10000100" // J #12016 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=12016 delay_slots=5 */ + 12305 "00000000" // /* MW 5 */ + 12306 "00000000" // /* MW 4 */ + 12307 "01111000" // /* MW 3 */ + 12308 "00010111" // /* MW 2 */ + 12309 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 868 11 +.delay_slot + 12310 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12311 "10100000" // /* MW 3 */ + 12312 "01010001" // /* MW 2 */ + 12313 "00011000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 864 16 +.delay_slot + 12314 "11111000" // MOV r19, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12315 "10100000" // /* MW 3 */ + 12316 "11011000" // /* MW 2 */ + 12317 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12318 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12319 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12321 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12322 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 12323 "00011100" // /* MW 13 */ + 12324 "00000000" // /* MW 12 */ + 12325 "00000000" // /* MW 11 */ + 12326 "01010111" // /* MW 10 */ + 12327 "00011010" // /* MW 9 */ + 12328 "01000000" // /* MW 8 */ + 12329 "00000000" // /* MW 7 */ + 12330 "00000000" // /* MW 6 */ + 12331 "10110110" // /* MW 5 */ + 12332 "00000010" // /* MW 4 */ + 12333 "11110000" // /* MW 3 */ + 12334 "00101100" // /* MW 2 */ + 12335 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_672 + 12336 "10000100" // J #11872 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11872 delay_slots=5 */ + 12337 "00000000" // /* MW 5 */ + 12338 "00000000" // /* MW 4 */ + 12339 "00110000" // /* MW 3 */ + 12340 "00010111" // /* MW 2 */ + 12341 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 846 16 +.delay_slot + 12342 "11111000" // MOV r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12343 "00100000" // /* MW 3 */ + 12344 "00011000" // /* MW 2 */ + 12345 "00011001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 867 4 +.delay_slot + 12346 "11111000" // MOV r25, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12347 "00100000" // /* MW 3 */ + 12348 "01011100" // /* MW 2 */ + 12349 "00011110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 868 11 +.delay_slot + 12350 "11111000" // MOV r1, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12351 "10100000" // /* MW 3 */ + 12352 "01010011" // /* MW 2 */ + 12353 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12355 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12356 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12357 "10000001" // /* MW 11 */ + 12358 "10101101" // /* MW 10 */ + 12359 "00000000" // /* MW 9 */ + 12360 "00000000" // /* MW 8 */ + 12361 "00000000" // /* MW 7 */ + 12362 "00000000" // /* MW 6 */ + 12363 "00100000" // /* MW 5 */ + 12364 "00000000" // /* MW 4 */ + 12365 "11110000" // /* MW 3 */ + 12366 "00101100" // /* MW 2 */ + 12367 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_704 +.src_ref 10 "softfloat.c" 834 27 first +.tail_call + 12368 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */ + 12369 "00000000" // /* MW 5 */ + 12370 "00000000" // /* MW 4 */ + 12371 "01000000" // /* MW 3 */ + 12372 "00010100" // /* MW 2 */ + 12373 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12375 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12377 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12379 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12381 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12383 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_720 +.src_ref 10 "softfloat.c" 852 27 first +.tail_call +.return_address + 12384 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */ + 12385 "00000000" // /* MW 5 */ + 12386 "00000000" // /* MW 4 */ + 12387 "01000000" // /* MW 3 */ + 12388 "00010100" // /* MW 2 */ + 12389 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12391 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12393 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12394 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12395 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12397 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12399 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_736 +.src_ref 10 "softfloat.c" 821 34 first +.tail_call +.return_address + 12400 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */ + 12401 "00000000" // /* MW 5 */ + 12402 "00000000" // /* MW 4 */ + 12403 "01000000" // /* MW 3 */ + 12404 "00010100" // /* MW 2 */ + 12405 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12407 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12409 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12411 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12413 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL14subFloat32Sigsjji__end + 12415 "00000000" // /* MW 1 */ +.label float32_add +.function float32_add float32_add +.src_ref 10 "softfloat.c" 92 12 +.src_ref 10 "softfloat.c" 878 first +.function_start + 12416 "00011000" // MOVX r16, #-31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12417 "10000101" // /* MW 3 */ + 12418 "11100000" // /* MW 2 */ + 12419 "00010111" // /* MW 1 */ +.src_ref 10 "softfloat.c" 92 12 first + 12420 "10011000" // LSHL r3, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12421 "00001101" // /* MW 3 */ + 12422 "01000111" // /* MW 2 */ + 12423 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 92 12 + 12424 "10011000" // LSHL r16, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12425 "00001101" // /* MW 3 */ + 12426 "10100001" // /* MW 2 */ + 12427 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 884 15 first + 12428 "10011000" // EQ r16, r3, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12429 "00000111" // /* MW 3 */ + 12430 "11100001" // /* MW 2 */ + 12431 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 884 4 + 12432 "10000100" // JNZ r16, #12464 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12464 delay_slots=5 */ + 12433 "00000001" // /* MW 5 */ + 12434 "01000000" // /* MW 4 */ + 12435 "01011000" // /* MW 3 */ + 12436 "00011000" // /* MW 2 */ + 12437 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12439 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12441 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12443 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12445 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12447 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 888 15 first +.tail_call + 12448 "10000100" // J #11664 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=11664 delay_slots=5 */ + 12449 "00000000" // /* MW 5 */ + 12450 "00000000" // /* MW 4 */ + 12451 "11001000" // /* MW 3 */ + 12452 "00010110" // /* MW 2 */ + 12453 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12455 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12457 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12459 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12461 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12463 "00000000" // /* MW 1 */ +.label TGT_Ffloat32_add_48 +.src_ref 10 "softfloat.c" 885 15 first +.tail_call +.return_address + 12464 "10000100" // J #11040 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=11040 delay_slots=5 */ + 12465 "00000000" // /* MW 5 */ + 12466 "00000000" // /* MW 4 */ + 12467 "10010000" // /* MW 3 */ + 12468 "00010101" // /* MW 2 */ + 12469 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12471 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12473 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12475 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12477 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label float32_add__end + 12479 "00000000" // /* MW 1 */ +.dir 0 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable2/src" +.dir 1 "/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer" +.dir 2 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common" +.dir 3 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc" +.dir 4 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail" +.dir 5 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2" +.dir 6 "/usr/local/lib/python3.10/dist-packages/data/aie2p/lib" +.dir 7 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p" +.dir 8 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend" +.dir 9 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21708/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib" +.dir 10 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21708/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib/softfloat" diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable6/Release/0_2_reloadable6.cmico b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable6/Release/0_2_reloadable6.cmico new file mode 100644 index 0000000000000000000000000000000000000000..f377058758269f564988080a1597f499edc1b997 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable6/Release/0_2_reloadable6.cmico @@ -0,0 +1 @@ ++Mdec diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable6/Release/0_2_reloadable6.lst b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable6/Release/0_2_reloadable6.lst new file mode 100644 index 0000000000000000000000000000000000000000..da538ba51f010cb935d6faf7c98cc539440d5b5d --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable6/Release/0_2_reloadable6.lst @@ -0,0 +1,4815 @@ + +// File generated by darts version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:46:40 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// darts -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -d -h -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable2 me + +// Release: ipp V-2024.06-TGT-241219 + +.text_segment PM 2352 +.entry_point +.label __Z13kernelWrapperPPvjjjj___func_begin0 +.label _Z13kernelWrapperPPvjjjj +.function_start + 2352 0x00 0xc2 0xd0 0x2f 0x41 0xd4 LDA r16, [p0]; MOV r0, r15 + 2358 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 2364 0xff 0x73 0xb0 0x01 0xe8 0x50 0x70 0x02 ST p7, [sp, #-8]; MOV r15, r1 + 2372 0xff 0x82 0xb0 0x1f 0xa7 0x83 0xb0 0x60 0x79 0x3a ST r0, [sp, #-4]; NEZ r26, r15; MOV p7, p0 + 2382 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12] + 2386 0x00 0x00 NOPX + 2388 0x00 0x00 NOPX + 2390 0x18 0x68 0x02 0x18 ADD.NC p0, r16, #4 + 2394 0x00 0x1e 0x16 0x98 LDA r16, [p0], #4 + 2398 0x00 0x3e 0x56 0x98 LDA r18, [p0], #12 + 2402 0x00 0xee 0x36 0x98 LDA r17, [p0], #-8 + 2406 0x00 0x07 0x76 0x98 LDA r27, [p0] + 2410 0x00 0x00 NOPX + 2412 0x00 0x00 NOPX + 2414 0x00 0x00 NOPX + 2416 0x00 0x00 NOPX + 2418 0x00 0x00 NOPX + 2420 0x00 0x00 NOPX + 2422 0x14 0x21 0x22 0x18 SEL.EQZ r16, r16, r18, r27 + 2426 0x08 0xd6 0x11 0x98 ST r16, [p0, #-12] + 2430 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 2434 0x00 0x00 NOPX + 2436 0x00 0x00 NOPX + 2438 0x00 0x00 NOPX + 2440 0x14 0x57 0x08 0x18 ACQ.COND r17, r16, r26 + 2444 0x00 0x00 NOPX + 2446 0x00 0x00 NOPX + 2448 0x00 0x00 NOPX + 2450 0x07 0x2c 0x1e 0x98 LDA p0, [p7], #8 + 2454 0x07 0xfc 0x9e 0x98 LDA p1, [p7], #-4 + 2458 0x07 0x05 0x1e 0x98 LDA p2, [p7] +.no_stack_arguments + 2462 0x00 0x0e 0xb8 0x00 0x01 0x04 JL #7536 +.delay_slot + 2468 0x0f 0xf3 0x55 0x98 ST r26, [sp, #-16] +.delay_slot +.swstall delay_slot + 2472 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2474 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2476 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2478 0x00 0x00 NOPX +.return_address + 2480 0x07 0xf6 0x16 0x98 LDA r16, [p7, #-4] + 2484 0x07 0xf3 0x51 0x18 LDA r26, [sp, #-16] + 2488 0x00 0x00 NOPX + 2490 0x00 0x00 NOPX + 2492 0x00 0x00 NOPX + 2494 0x00 0x00 NOPX + 2496 0x00 0x00 NOPX + 2498 0x18 0x68 0x08 0x18 ADD.NC p0, r16, #16 + 2502 0x00 0x06 0x16 0x98 LDA r16, [p0] + 2506 0x10 0x22 0x05 0x18 MOVX r17, #1 + 2510 0x00 0x00 NOPX + 2512 0x00 0x00 NOPX + 2514 0x00 0x00 NOPX + 2516 0x00 0x00 NOPX + 2518 0x00 0x00 NOPX + 2520 0x14 0x15 0x18 0x18 REL.COND r16, r17, r26 + 2524 0xfe 0x87 0x2d 0xaf 0x41 0xd4 LDA lr, [sp, #-12]; MOV r27, r15 + 2530 0x00 0xf6 0x16 0x98 LDA r16, [p0, #-4] + 2534 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8] + 2538 0x00 0x00 NOPX + 2540 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] + 2544 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 + 2550 0x00 0x00 NOPX + 2552 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 2556 0x14 0x63 0x01 0x98 SUB r17, r17, r16 +.delay_slot + 2560 0x14 0x21 0x12 0x18 SEL.EQZ r16, r16, r17, r27 +.delay_slot + 2564 0x08 0xf6 0x11 0x98 ST r16, [p0, #-4] +.delay_slot +.swstall delay_slot + 2568 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2570 0x00 0x00 NOPX +.label _Z13kernelWrapperPPvjjjj__end +.label __Z13kernelWrapperPPvjjjj___func_end0 + +.text_segment PM 2576 +.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv___func_begin0 +.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv +.function_start + 2576 0x23 0x8e 0xd3 0x80 0x8b 0x3e 0x67 0x68 0x09 0x60 0x78 0x76 LDA r3, [p1], #4; MOVS p3, p0; MOVX r6, #-5; MOV r0, p1 + 2588 0x02 0x07 0x00 0x3e 0x25 0x09 0x30 0x07 0x08 0xba MOVA r7, #16; MOVX r2, #-24; ADD.NC p2, r0, #28 + 2598 0x00 0x7e 0x00 0x3e 0x17 0xa8 0x08 0x60 0x78 0xba MOVA r30, #3; MOVX r1, #-3; MOV r0, p0 + 2608 0xff 0xe5 0x00 0x00 0x00 0x3c 0x8f 0xfc 0x10 0xba MOVA r5, #-1; MOVXM r4, #65528 + 2618 0xff 0x90 0x00 0x00 0x02 0x00 0x00 0x00 0x70 0xba MOVA r16, #-4; PADDXM [sp], #64 + 2628 0x1c 0x60 0x17 0x18 ADD.NC p4, r0, #46 + 2632 0x00 0x00 NOPX + 2634 0x08 0x1c 0x71 0x98 ST r3, [p0], #4 + 2638 0x01 0x1f 0x56 0x98 LDA r26, [p1], #4 + 2642 0x00 0x00 NOPX + 2644 0x00 0x00 NOPX + 2646 0x00 0x00 NOPX + 2648 0x00 0x00 NOPX + 2650 0x00 0x00 NOPX + 2652 0x00 0x00 NOPX + 2654 0x03 0xea 0x3d 0x44 0x89 0x5c ST r26, [p0], #4; AND r17, r26, r4 + 2660 0x23 0xf6 0xd0 0x06 0x4d 0x7e 0xcc 0x48 0xa8 0xba LDA r29, [p1], #4; MUL r4, r3, r26; ADD.NC r22, r17, r4 + 2670 0x16 0xa4 0x6d 0x98 LSHL r18, r26, r6 + 2674 0x11 0x0c 0x1d 0x98 LSHL r6, r4, r1 + 2678 0xd4 0x43 0xb0 0xb2 0xff 0x24 LSHL r17, r26, r1; ADD.NC r1, r18, #-1 + 2684 0x00 0x00 NOPX + 2686 0x00 0x00 NOPX + 2688 0x00 0x00 NOPX + 2690 0x03 0xf6 0x3e 0x9c 0x4c 0x5c ST r29, [p0], #4; MAC r7, r7, r29, r2 + 2696 0x23 0x8a 0xd7 0xff 0xb5 0x80 0x07 0x49 0xaf 0xfa LDA r2, [p1], #4; ST r29, [sp, #-4]; MUL r4, r29, r26 + 2706 0x10 0xe9 0xdf 0x98 MUL r20, r3, r29 + 2710 0x10 0xf8 0x4f 0x98 MUL r28, r3, r4 + 2714 0x17 0x6b 0xed 0x98 LSHL r21, r29, r30 + 2718 0xec 0x8b 0xbd 0xb5 0xd0 0x24 LSHL r18, r29, r5; ADD.NC r27, r21, #-48 + 2724 0x14 0xaf 0xff 0x18 ADD r23, r18, #-1 + 2728 0x17 0x7b 0x6f 0x98 MUL r29, r29, r22 + 2732 0x03 0x8a 0x3f 0x60 0x55 0x5c ST r2, [p0], #4; LT r24, r30, r2 + 2738 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 2742 0x00 0x00 NOPX + 2744 0x00 0x00 NOPX + 2746 0x00 0x00 NOPX + 2748 0x00 0x00 NOPX + 2750 0x00 0x00 NOPX + 2752 0x00 0x00 NOPX + 2754 0x03 0x85 0x30 0x03 0xf0 0x0e 0x70 0x02 ST el0, [p0], #4; MOV r31, el0 + 2762 0x01 0x04 0x0e 0x98 LDA eh0, [p1] + 2766 0x00 0x00 NOPX + 2768 0x00 0x00 NOPX + 2770 0x00 0x00 NOPX + 2772 0x00 0x00 NOPX + 2774 0x00 0x00 NOPX + 2776 0x00 0x00 NOPX + 2778 0x00 0x81 0x30 0x03 0x30 0x8e 0x70 0x02 ST eh0, [p0]; MOV r25, eh0 + 2786 0x01 0x17 0xd6 0x98 LDA r30, [p1, #4] + 2790 0x00 0x00 NOPX + 2792 0xc0 0x05 0xb0 0x40 0x01 0x84 JNZ r24, #2912 +.delay_slot + 2798 0x17 0x27 0x0d 0x98 LSHL r19, r28, r16 +.delay_slot + 2802 0x17 0xf3 0x9f 0x98 MUL r25, r31, r25 +.delay_slot + 2806 0xa5 0x0b 0xb2 0xb1 0xff 0x24 LSHL r20, r20, r5; ADD.NC r5, r17, #-1 +.delay_slot + 2812 0x11 0x21 0x0d 0x98 LSHL r16, r4, r16 +.delay_slot + 2816 0x02 0xfa 0x3c 0xff 0xdf 0x5c ST r30, [p0, #4]; MUL r31, r25, r30 + 2822 0x10 0x38 0x05 0x18 MOVX r28, #1 + 2826 0x10 0xb9 0xc7 0x98 EQ r28, r2, r28 + 2830 0xe0 0x07 0xe0 0x40 0x01 0x84 JNZ r28, #4032 +.delay_slot +.swstall delay_slot + 2836 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2838 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2840 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2842 0x00 0x00 NOPX +.delay_slot + 2844 0x10 0xed 0xff 0x18 ADD r22, r3, #-1 + 2848 0x10 0x22 0x09 0x18 MOVX r17, #2 + 2852 0x14 0x62 0x27 0x98 EQ r17, r17, r2 + 2856 0x88 0x07 0xa0 0x40 0x01 0x84 JNZ r17, #3904 +.delay_slot +.swstall delay_slot + 2862 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2864 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2866 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2868 0x00 0x00 NOPX +.delay_slot + 2870 0x10 0x0e 0x0d 0x18 MOVX r7, #3 + 2874 0x11 0xc4 0x27 0x98 EQ r2, r7, r2 + 2878 0x10 0x07 0x50 0x40 0x01 0x84 JNZ r2, #3744 +.delay_slot +.swstall delay_slot + 2884 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2886 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2888 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2890 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2892 0x00 0x00 NOPX + 2894 0x00 0x06 0xf0 0x00 0x00 0x84 J #3552 +.delay_slot + 2900 0x10 0x34 0x11 0x18 MOVX r26, #4 +.delay_slot +.swstall delay_slot + 2904 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2906 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2908 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2910 0x00 0x00 NOPX +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_336 + 2912 0x10 0x3a 0x15 0x18 MOVX r29, #5 + 2916 0x17 0x70 0x2a 0x98 LT r24, r29, r2 + 2920 0xc0 0x06 0x50 0x40 0x01 0x84 JNZ r24, #3232 +.delay_slot +.swstall delay_slot + 2926 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2928 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2930 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2932 0x00 0x00 NOPX +.delay_slot + 2934 0x10 0x34 0x11 0x18 MOVX r26, #4 + 2938 0x16 0xa2 0x27 0x98 EQ r17, r26, r2 + 2942 0x88 0x06 0x10 0x40 0x01 0x84 JNZ r17, #3104 +.delay_slot +.swstall delay_slot + 2948 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2950 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2952 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2954 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2956 0x00 0x00 NOPX + 2958 0x17 0x44 0x28 0x98 NE r2, r29, r2 + 2962 0x10 0x06 0xf0 0x40 0x01 0x84 JNZ r2, #3552 +.delay_slot +.swstall delay_slot + 2968 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2970 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2972 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2974 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2976 0x00 0x00 NOPX + 2978 0x83 0xd6 0xe0 0x00 0x22 0x08 0x07 0xec 0x58 0xba ST.s16 r21, [p4], #2; MOVX r2, #16; MOV m0, #-20 + 2988 0x1f 0x9c 0xa0 0xf8 MOV r30, r25 + 2992 0x00 0x00 NOPX + 2994 0x00 0x00 NOPX + 2996 0x00 0x00 NOPX + 2998 0x00 0x00 NOPX + 3000 0x00 0x00 NOPX + 3002 0x04 0x1c 0xf7 0x18 ST.s16 r7, [p4], #2 + 3006 0x00 0x00 NOPX + 3008 0x00 0x00 NOPX + 3010 0x00 0x00 NOPX + 3012 0x00 0x00 NOPX + 3014 0x00 0x00 NOPX + 3016 0x00 0x00 NOPX + 3018 0x04 0x1e 0xf7 0x18 ST.s16 r23, [p4], #2 + 3022 0x00 0x00 NOPX + 3024 0x00 0x00 NOPX + 3026 0x00 0x00 NOPX + 3028 0x00 0x00 NOPX + 3030 0x00 0x00 NOPX + 3032 0x00 0x00 NOPX + 3034 0x04 0x1c 0x57 0x18 ST.s16 r2, [p4], #2 + 3038 0x00 0x00 NOPX + 3040 0x00 0x00 NOPX + 3042 0x00 0x00 NOPX + 3044 0x00 0x00 NOPX + 3046 0x00 0x00 NOPX + 3048 0x00 0x00 NOPX + 3050 0x04 0x1c 0x37 0x18 ST.s16 r1, [p4], #2 + 3054 0x00 0x00 NOPX + 3056 0x00 0x00 NOPX + 3058 0x00 0x00 NOPX + 3060 0x00 0x00 NOPX + 3062 0x00 0x00 NOPX + 3064 0x00 0x00 NOPX + 3066 0x04 0x08 0x57 0x18 ST.s16 r2, [p4], m0 + 3070 0x00 0x00 NOPX + 3072 0x00 0x00 NOPX + 3074 0x00 0x06 0xe8 0x00 0x00 0x84 J #3536 +.delay_slot +.swstall delay_slot + 3080 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3082 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3084 0x00 0x00 NOPX +.delay_slot + 3086 0x0c 0x06 0x51 0x98 ST r18, [p4] +.delay_slot + 3090 0x00 0x2c 0xf8 0x29 0xa3 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; ST r6, [p4, #4]; NOPM; NOPV +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_528 + 3104 0x83 0x92 0xe0 0x3e 0x67 0xa8 0x48 0x10 0x58 0xba ST.s16 r4, [p4], #2; MOVX r6, #-3; MOV r2, #16 + 3114 0xfd 0x80 0x80 0x0c 0x22 0x33 0xd0 0x0e 0x78 0xba MOVA m0, #-20; MAC r2, r2, r6, r4; MOV r30, el0 + 3124 0x00 0x00 NOPX + 3126 0x00 0x00 NOPX + 3128 0x00 0x00 NOPX + 3130 0x00 0x00 NOPX + 3132 0x00 0x00 NOPX + 3134 0x04 0x1c 0x57 0x18 ST.s16 r2, [p4], #2 + 3138 0x00 0x00 NOPX + 3140 0x00 0x00 NOPX + 3142 0x00 0x00 NOPX + 3144 0x00 0x00 NOPX + 3146 0x00 0x00 NOPX + 3148 0x00 0x00 NOPX + 3150 0x04 0x1e 0xf7 0x18 ST.s16 r23, [p4], #2 + 3154 0x00 0x00 NOPX + 3156 0x00 0x00 NOPX + 3158 0x00 0x00 NOPX + 3160 0x00 0x00 NOPX + 3162 0x00 0x00 NOPX + 3164 0x00 0x00 NOPX + 3166 0x04 0x1c 0x57 0x18 ST.s16 r2, [p4], #2 + 3170 0x00 0x00 NOPX + 3172 0x00 0x00 NOPX + 3174 0x00 0x00 NOPX + 3176 0x00 0x00 NOPX + 3178 0x00 0x00 NOPX + 3180 0x00 0x00 NOPX + 3182 0x04 0x1c 0xb7 0x18 ST.s16 r5, [p4], #2 + 3186 0x00 0x00 NOPX + 3188 0x00 0x00 NOPX + 3190 0x00 0x00 NOPX + 3192 0x00 0x00 NOPX + 3194 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3196 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3198 0x04 0x08 0x37 0x18 ST.s16 r1, [p4], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3202 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3204 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3206 0x00 0x06 0xe8 0x00 0x00 0x84 J #3536 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3212 0x10 0x02 0x41 0x18 MOVX r1, #16 +.delay_slot +.swstall delay_slot + 3216 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3218 0x00 0x00 NOPX +.delay_slot + 3220 0x0c 0x14 0x71 0x98 ST r3, [p4, #4] +.delay_slot + 3224 0x80 0xc2 0x30 0x00 0x01 0xa5 0x70 0x02 ST r16, [p4]; NOPM +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_656 + 3232 0xff 0x8e 0x20 0x10 0x32 0x2c LDA r3, [sp, #-4]; MOVX r4, #6 + 3238 0x10 0x88 0x47 0x98 EQ r4, r2, r4 + 3242 0x20 0x06 0xa8 0x40 0x01 0x84 JNZ r4, #3408 +.delay_slot + 3248 0x10 0x02 0x41 0x18 MOVX r1, #16 +.delay_slot +.swstall delay_slot + 3252 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3254 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3256 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3258 0x00 0x00 NOPX + 3260 0x10 0x06 0x1d 0x18 MOVX r3, #7 + 3264 0x10 0xc4 0x28 0x98 NE r2, r3, r2 + 3268 0x10 0x06 0xf0 0x40 0x01 0x84 JNZ r2, #3552 +.delay_slot +.swstall delay_slot + 3274 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3276 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3278 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3280 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3282 0x00 0x00 NOPX + 3284 0x83 0x86 0xe0 0x26 0x2f 0xf8 0x07 0xec 0x58 0xba ST.s16 r1, [p4], #2; ADD r2, r19, #-1; MOV m0, #-20 + 3294 0xff 0x43 0x00 0x00 0x00 0x40 0x40 0x00 0x10 0xba MOVA r3, #-6; MOVXM dj0, #65536 + 3304 0xe0 0xc7 0xbc 0x20 0x01 0x64 LSHL r3, r28, r3; MOV r24, #0 + 3310 0x00 0x00 NOPX + 3312 0x00 0x00 NOPX + 3314 0x00 0x00 NOPX + 3316 0x00 0x00 NOPX + 3318 0x04 0x1c 0x37 0x18 ST.s16 r1, [p4], #2 + 3322 0x00 0x00 NOPX + 3324 0x00 0x00 NOPX + 3326 0x00 0x00 NOPX + 3328 0x00 0x00 NOPX + 3330 0x00 0x00 NOPX + 3332 0x00 0x00 NOPX + 3334 0x04 0x1c 0x57 0x18 ST.s16 r2, [p4], #2 + 3338 0x00 0x00 NOPX + 3340 0x00 0x00 NOPX + 3342 0x00 0x00 NOPX + 3344 0x00 0x00 NOPX + 3346 0x00 0x00 NOPX + 3348 0x00 0x00 NOPX + 3350 0x0c 0x1c 0x41 0x98 ST dj0, [p4], #4 + 3354 0x04 0x0b 0x17 0x18 ST.s16 r24, [p4], m0 + 3358 0x00 0x00 NOPX + 3360 0x00 0x00 NOPX + 3362 0x00 0x00 NOPX + 3364 0x00 0x00 NOPX + 3366 0x00 0x00 NOPX + 3368 0x00 0x00 NOPX + 3370 0x0c 0x07 0x51 0x98 ST r26, [p4] + 3374 0x0c 0x14 0x71 0x98 ST r3, [p4, #4] + 3378 0x00 0x06 0xe8 0x00 0x00 0x84 J #3536 +.delay_slot + 3384 0x1f 0x9f 0xa0 0xf8 MOV r30, r31 +.delay_slot +.swstall delay_slot + 3388 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3390 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3392 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3394 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_832 + 3408 0x83 0x86 0xe0 0x06 0x2b 0x70 0x48 0x10 0x58 0xba ST.s16 r1, [p4], #2; MSC r2, r2, r3, r22; MOV r2, #16 + 3418 0xfd 0x80 0x80 0x3e 0x47 0xa8 0xd0 0x0e 0x78 0xba MOVA m0, #-20; MOVX r4, #-3; MOV r6, el0 + 3428 0x10 0xc8 0x4d 0x98 LSHL r4, r3, r4 + 3432 0xf7 0x8d 0xf1 0xa4 0xff 0x24 MUL r30, r30, r6; ADD.NC r3, r4, #-1 + 3438 0x00 0x00 NOPX + 3440 0x00 0x00 NOPX + 3442 0x00 0x00 NOPX + 3444 0x04 0x1f 0x77 0x18 ST.s16 r27, [p4], #2 + 3448 0x00 0x00 NOPX + 3450 0x00 0x00 NOPX + 3452 0x00 0x00 NOPX + 3454 0x00 0x00 NOPX + 3456 0x00 0x00 NOPX + 3458 0x00 0x00 NOPX + 3460 0x04 0x1c 0xb7 0x18 ST.s16 r5, [p4], #2 + 3464 0x00 0x00 NOPX + 3466 0x00 0x00 NOPX + 3468 0x00 0x00 NOPX + 3470 0x00 0x00 NOPX + 3472 0x00 0x00 NOPX + 3474 0x00 0x00 NOPX + 3476 0x04 0x1c 0x57 0x18 ST.s16 r2, [p4], #2 + 3480 0x00 0x00 NOPX + 3482 0x00 0x00 NOPX + 3484 0x00 0x00 NOPX + 3486 0x00 0x00 NOPX + 3488 0x00 0x00 NOPX + 3490 0x00 0x00 NOPX + 3492 0x04 0x1c 0x77 0x18 ST.s16 r3, [p4], #2 + 3496 0x00 0x00 NOPX + 3498 0x00 0x00 NOPX + 3500 0x00 0x00 NOPX + 3502 0x00 0x00 NOPX + 3504 0x00 0x00 NOPX + 3506 0x00 0x00 NOPX + 3508 0x04 0x08 0x37 0x18 ST.s16 r1, [p4], m0 + 3512 0x00 0x00 NOPX + 3514 0x00 0x00 NOPX + 3516 0x00 0x00 NOPX + 3518 0x00 0x00 NOPX + 3520 0x00 0x00 NOPX + 3522 0x00 0x00 NOPX + 3524 0x0c 0x06 0x31 0x98 ST r17, [p4] + 3528 0x82 0xd2 0x30 0x00 0x01 0xa5 0x70 0x02 ST r20, [p4, #4]; NOPM +.label __ll42__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv + 3536 0x18 0x80 0x40 0xb8 MOV dj0, #32 + 3540 0x60 0x7a 0xe0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 ST.s16 r30, [p3, dj0]; NOPB; NOPS; NOPX +.label __ll70__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv + 3552 0x03 0x08 0x80 0xc0 0x1e 0x14 MOVA m2, #24; ADD.NC p0, r0, #30 + 3558 0x43 0x8a 0xd0 0x00 0x02 0x08 0x07 0xe2 0x58 0xba LDA r2, [p2], #4; MOVX r0, #16; MOV m0, #-30 + 3568 0x40 0x8e 0x52 0x00 0x99 0x54 LDA.s16 r3, [p2]; MOV m1, #38 + 3574 0x02 0x14 0x36 0x98 LDA r1, [p2, #4] + 3578 0x00 0x00 NOPX + 3580 0x00 0x00 NOPX + 3582 0x00 0x2f 0xf7 0x18 ST.s16 r31, [p0], #4 + 3586 0x00 0x00 NOPX + 3588 0x00 0x00 NOPX + 3590 0x00 0x00 NOPX + 3592 0x00 0x00 NOPX + 3594 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 3596 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.noswbrkpt + 3598 0x00 0x5f 0x17 0x18 ST.s16 r24, [p0], #10 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3602 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3604 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3606 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3608 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3610 0x10 0x30 0x01 0x18 MOVX r24, #0 + 3614 0x00 0x00 NOPX + 3616 0x00 0xcf 0x17 0x18 ST.s16 r24, [p0], #-8 + 3620 0x00 0x48 0x9a 0x98 LDA.u16 r4, [p0], m2 + 3624 0x00 0x00 NOPX + 3626 0x00 0x00 NOPX + 3628 0x00 0x00 NOPX + 3630 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 3632 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.noswbrkpt + 3634 0x00 0xfc 0x17 0x18 ST.s16 r0, [p0], #-2 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3638 0x21 0x35 0xb2 0xa4 0xff 0x24 LSHL r4, r4, r26; ADD.NC r5, r4, #-1 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3644 0x10 0x00 0x41 0x98 SUB r0, r0, r4 + 3648 0x00 0x00 NOPX + 3650 0x00 0x00 NOPX + 3652 0x00 0x00 NOPX + 3654 0x00 0x00 NOPX + 3656 0x00 0x08 0xb7 0x18 ST.s16 r5, [p0], m0 + 3660 0x00 0x00 NOPX + 3662 0x00 0x00 NOPX + 3664 0x00 0x00 NOPX + 3666 0x00 0x00 NOPX + 3668 0x00 0x00 NOPX + 3670 0x00 0x00 NOPX + 3672 0x00 0x2a 0x77 0x18 ST.s16 r19, [p0], m1 + 3676 0x00 0x00 NOPX + 3678 0x00 0x00 NOPX + 3680 0x00 0x00 NOPX + 3682 0x00 0x00 NOPX + 3684 0x00 0x00 NOPX + 3686 0x00 0x00 NOPX + 3688 0x00 0xec 0x47 0x18 ST.s8 r2, [p0], #-2 + 3692 0x00 0x00 NOPX + 3694 0x00 0x00 NOPX + 3696 0x00 0x00 NOPX + 3698 0x00 0x00 NOPX + 3700 0x00 0x00 NOPX + 3702 0x00 0x00 NOPX + 3704 0x00 0x04 0x77 0x18 ST.s16 r3, [p0] + 3708 0x00 0x00 NOPX + 3710 0x00 0x00 NOPX + 3712 0x00 0x00 NOPX + 3714 0x00 0x00 NOPX + 3716 0x00 0x00 NOPX + 3718 0x00 0x00 NOPX + 3720 0x00 0xe4 0x27 0x18 ST.s8 r1, [p0, #-2] + 3724 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 3728 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 3734 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3736 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3738 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3740 0x00 0x01 0x67 0x98 NOPA +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_1168 + 3744 0x83 0xd6 0xe0 0x00 0x00 0x3c 0xaf 0xf4 0x10 0xba ST.s16 r21, [p4], #2; MOVXM r5, #65512 + 3754 0xff 0x8a 0x20 0x0a 0x7d 0x04 0x07 0xec 0x58 0xba LDA r2, [sp, #-4]; ADD r7, r5, r26; MOV m0, #-20 + 3764 0x00 0x9a 0x00 0x00 0x00 0x3c 0xcf 0xff 0x90 0xba MOVA r26, #4; MOVXM r6, #65535 + 3774 0x10 0xe2 0x60 0x98 ADD r17, r3, r6 + 3778 0x14 0x7a 0x46 0x18 MAC r29, r29, r17, r4 + 3782 0x14 0x6a 0x4e 0x18 MSC r21, r21, r17, r4 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 3786 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.noswbrkpt + 3788 0x04 0x1c 0x57 0x18 ST.s16 r2, [p4], #2 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3792 0x11 0xc4 0x2f 0x98 MUL r2, r7, r2 + 3796 0x00 0x00 NOPX + 3798 0x00 0x00 NOPX + 3800 0x00 0x00 NOPX + 3802 0x00 0x00 NOPX + 3804 0x00 0x00 NOPX + 3806 0x04 0x1e 0xd7 0x18 ST.s16 r22, [p4], #2 + 3810 0x00 0x00 NOPX + 3812 0x00 0x00 NOPX + 3814 0x00 0x00 NOPX + 3816 0x00 0x00 NOPX + 3818 0x00 0x00 NOPX + 3820 0x00 0x00 NOPX + 3822 0x04 0x1e 0xb7 0x18 ST.s16 r21, [p4], #2 + 3826 0x00 0x00 NOPX + 3828 0x00 0x00 NOPX + 3830 0x00 0x00 NOPX + 3832 0x00 0x00 NOPX + 3834 0x00 0x00 NOPX + 3836 0x00 0x00 NOPX + 3838 0x04 0x1c 0x37 0x18 ST.s16 r1, [p4], #2 + 3842 0x00 0x00 NOPX + 3844 0x00 0x00 NOPX + 3846 0x00 0x00 NOPX + 3848 0x00 0x00 NOPX + 3850 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 3852 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.noswbrkpt + 3854 0x04 0x08 0x57 0x18 ST.s16 r2, [p4], m0 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3858 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3860 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3862 0x00 0x06 0xe8 0x00 0x00 0x84 J #3536 +.delay_slot +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3868 0x10 0x0a 0x41 0x18 MOVX r5, #16 +.delay_slot +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3872 0x11 0x45 0xd1 0x98 SUB r2, r5, r29 +.delay_slot + 3876 0x19 0xa1 0x1c 0xf8 MOV r6, eh0 +.delay_slot + 3880 0x80 0x8e 0x30 0x00 0x01 0xa5 0x70 0x02 ST r3, [p4]; NOPM +.delay_slot + 3888 0x00 0x2c 0xf0 0x00 0x24 0x16 0x11 0xbd 0xe3 0x7c 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p4, #4]; MUL r30, r30, r6; NOPM; NOPV +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_1328 + 3904 0x83 0x92 0xe0 0x00 0x42 0x08 0x07 0xec 0x58 0xba ST.s16 r4, [p4], #2; MOVX r4, #16; MOV m0, #-20 + 3914 0xff 0x86 0x20 0x06 0x2d 0x70 0x48 0x08 0x58 0xba LDA r1, [sp, #-4]; MSC r2, r2, r3, r26; MOV r2, #8 + 3924 0x00 0xc8 0x2d 0x20 0x11 0x64 MOVX r3, #16; MOV r26, #4 + 3930 0x00 0x00 NOPX + 3932 0x00 0x00 NOPX + 3934 0x00 0x00 NOPX + 3936 0x00 0x00 NOPX + 3938 0x04 0x1f 0x77 0x18 ST.s16 r27, [p4], #2 + 3942 0x00 0x00 NOPX + 3944 0x00 0x00 NOPX + 3946 0x00 0x00 NOPX + 3948 0x00 0x00 NOPX + 3950 0x00 0x00 NOPX + 3952 0x00 0x00 NOPX + 3954 0x04 0x1c 0xb7 0x18 ST.s16 r5, [p4], #2 + 3958 0x00 0x00 NOPX + 3960 0x00 0x00 NOPX + 3962 0x00 0x00 NOPX + 3964 0x00 0x00 NOPX + 3966 0x00 0x00 NOPX + 3968 0x00 0x00 NOPX + 3970 0x04 0x1f 0x77 0x18 ST.s16 r27, [p4], #2 + 3974 0x00 0x00 NOPX + 3976 0x00 0x00 NOPX + 3978 0x00 0x00 NOPX + 3980 0x00 0x00 NOPX + 3982 0x00 0x00 NOPX + 3984 0x00 0x00 NOPX + 3986 0x04 0x1e 0xd7 0x18 ST.s16 r22, [p4], #2 + 3990 0x00 0x00 NOPX + 3992 0x00 0x00 NOPX + 3994 0x00 0x00 NOPX + 3996 0x00 0x00 NOPX + 3998 0x00 0x00 NOPX +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 4000 0x00 0x00 NOPX +.aggressive_scheduled_block_id 6 +.noswbrkpt + 4002 0x04 0x08 0x77 0x18 ST.s16 r3, [p4], m0 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 4006 0x00 0x00 NOPX +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 4008 0x00 0x00 NOPX +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 4010 0x00 0x06 0xe8 0x00 0x00 0x84 J #3536 +.delay_slot +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4016 0x10 0x46 0x26 0x18 MAC r3, r3, r1, r2 +.delay_slot +.swstall delay_slot + 4020 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4022 0x00 0x00 NOPX +.delay_slot + 4024 0x0c 0x04 0xd1 0x98 ST r6, [p4] +.delay_slot + 4028 0x0c 0x16 0x51 0x98 ST r18, [p4, #4] +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_1456 + 4032 0x04 0x1e 0xb7 0x18 ST.s16 r21, [p4], #2 + 4036 0x00 0x00 NOPX + 4038 0x00 0x00 NOPX + 4040 0x00 0x00 NOPX + 4042 0x00 0x00 NOPX + 4044 0x00 0x00 NOPX + 4046 0x00 0x00 NOPX + 4048 0x04 0x1c 0xf7 0x18 ST.s16 r7, [p4], #2 + 4052 0x00 0x00 NOPX + 4054 0x00 0x00 NOPX + 4056 0x00 0x00 NOPX + 4058 0x00 0x00 NOPX + 4060 0x00 0x00 NOPX + 4062 0x00 0x00 NOPX + 4064 0x04 0x1e 0xf7 0x18 ST.s16 r23, [p4], #2 + 4068 0x00 0x00 NOPX + 4070 0x00 0x00 NOPX + 4072 0x00 0x00 NOPX + 4074 0x07 0xfc 0x71 0x18 LDA r3, [sp, #-4] + 4078 0x00 0x00 NOPX +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 4080 0x00 0x00 NOPX +.aggressive_scheduled_block_id 7 +.noswbrkpt + 4082 0x04 0x1c 0x37 0x18 ST.s16 r1, [p4], #2 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 4086 0x00 0x00 NOPX +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 4088 0x00 0x00 0xf0 0xbf 0xc0 0x44 MOVXM r1, #65504 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 4094 0x10 0x45 0xa0 0x98 ADD r2, r1, r26 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4098 0x18 0x44 0xc0 0xa0 0x41 0x64 MAC r1, r1, r3, r2; MOV r1, #16 + 4104 0x00 0x00 NOPX + 4106 0x00 0x00 NOPX + 4108 0x04 0x1e 0xd7 0x18 ST.s16 r22, [p4], #2 + 4112 0x00 0x00 NOPX + 4114 0x00 0x00 NOPX + 4116 0x00 0x00 NOPX + 4118 0x00 0x00 NOPX + 4120 0x00 0x00 NOPX +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 4122 0x18 0x0f 0xd8 0xb8 MOV m0, #-20 +.aggressive_scheduled_block_id 8 +.noswbrkpt + 4126 0x04 0x08 0xb7 0x18 ST.s16 r5, [p4], m0 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 4130 0x00 0x00 NOPX +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 4132 0x00 0x00 NOPX +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 4134 0x00 0x06 0xe8 0x00 0x00 0x84 J #3536 +.delay_slot +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4140 0xb1 0x49 0xc2 0xa0 0x41 0x64 MSC r5, r5, r22, r4; MOV r5, #16 +.delay_slot +.swstall delay_slot + 4146 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4148 0x00 0x00 NOPX +.delay_slot + 4150 0x0c 0x06 0x91 0x98 ST r20, [p4] +.delay_slot + 4154 0x82 0xc6 0x30 0x01 0xa0 0x8b 0xd0 0x8e 0x79 0x3a ST r17, [p4, #4]; MOVX r26, #4; MOV r30, eh0 +.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv__end +.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv___func_end0 + +.text_segment PM 4176 +.label __Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t___func_begin0 +.label _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t +.function_start + 4176 0x42 0x82 0xd0 0x3e 0x47 0xc8 0x87 0xe8 0x58 0xba LDA r0, [p2, #4]; MOVX r4, #-2; MOV m1, #-24 + 4186 0x45 0x86 0xd0 0x3e 0x27 0xaa 0x08 0x06 0x58 0xba LDA r1, [p2], #8; MOVX r2, #-3; MOV r16, #6 + 4196 0x4f 0x96 0xd0 0x01 0x80 0x08 0x68 0x60 0x78 0xba LDA r5, [p2], #28; MOVX r24, #0; MOV r3, p0 + 4206 0x02 0x2a 0x52 0x98 LDA.s16 r18, [p2], m1 + 4210 0x02 0x1c 0xd6 0x98 LDA r6, [p2], #4 + 4214 0x02 0x2c 0xf6 0x98 LDA r7, [p2], #8 + 4218 0x02 0x06 0x36 0x98 LDA r17, [p2] + 4222 0x10 0x26 0x4e 0x98 ASHL r19, r0, r4 + 4226 0x02 0x24 0x96 0x98 LDA r4, [p2, #8] + 4230 0x11 0x68 0x2e 0x98 ASHL r20, r5, r2 + 4234 0x18 0x49 0x72 0xf8 VBCST.16 x0, r18 + 4238 0x00 0x00 NOPX + 4240 0x14 0xe5 0x4f 0x98 MUL r18, r19, r20 + 4244 0x10 0x67 0x11 0x98 SUB r19, r1, r17 + 4248 0x14 0xe7 0x2f 0x98 MUL r19, r19, r18 + 4252 0x14 0x63 0x2f 0x98 MUL r17, r17, r18 + 4256 0x14 0xe1 0x0d 0x98 LSHL r16, r19, r16 + 4260 0xc4 0x23 0x34 0xc3 0x82 0xa4 GE r16, r24, r17; ADD.NC p2, r3, r16 + 4266 0x80 0x08 0xa0 0x40 0x01 0x84 JNZ r16, #4416 +.delay_slot + 4272 0x18 0x00 0x92 0xf8 VMOV bmll0, x0 +.delay_slot +.swstall delay_slot + 4276 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4278 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4280 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4282 0x00 0x00 NOPX + 4284 0x00 0x00 0x11 0xe2 0x60 0x44 MOVXM ls, #4400 + 4290 0x00 0x00 0x16 0xe2 0x60 0x44 MOVXM le, #4400 + 4296 0x00 0x2b 0x60 0x02 0xbc 0x50 0x70 0x02 NOPS; MOV lc, r17 + 4304 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 4320 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 4336 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 4352 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 4368 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 4384 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLS_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_224 +.loop_nesting 1 +.begin_of_loop +.end_of_loop + 4400 0x00 0x2c 0xf0 0x00 0x22 0x1c 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmll0, [p2], #64; NOPX; NOPM; NOPV +.label TGT_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_240 +.loop_nesting 0 + 4416 0x00 0x86 0x00 0x0b 0x00 0xfe 0x29 0xcc 0xa8 0xba MOVA r6, #4; MUL r16, r5, r1; ADD.NC r17, r7, r6 + 4426 0x04 0x62 0x32 0x87 0xff 0x24 SUB r17, r0, r17; ADD.NC dn1, r7, #-1 + 4432 0x14 0x62 0x6d 0x98 LSHL r17, r17, r6 + 4436 0x04 0x4e 0x32 0x11 0x10 0x24 SUB r17, r0, r7; ADD.NC m1, r17, #16 + 4442 0x11 0xe1 0x0f 0x98 MUL r16, r7, r16 + 4446 0x14 0x4c 0x6d 0x98 LSHL r6, r17, r6 + 4450 0x81 0x85 0xd4 0xc3 0x32 0xa4 ASHL r6, r16, r2; ADD.NC p2, r3, r6 + 4456 0x16 0x0e 0x69 0x98 GE r7, r24, r6 + 4460 0x38 0x09 0x08 0x40 0x01 0x84 JNZ r7, #4624 +.delay_slot +.swstall delay_slot + 4466 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4468 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4470 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4472 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4474 0x00 0x00 NOPX + 4476 0x00 0x07 0x80 0x00 0x00 0x04 0x79 0x00 0x10 0xba MOVA dc1, #0; MOVXM ls, #4608 + 4486 0x02 0x06 0x80 0x00 0x00 0x05 0xb9 0x00 0x10 0xba MOVA dj1, #16; MOVXM le, #4608 + 4496 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x02 0xb9 0x90 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; MOV lc, r6; NOPV + 4512 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 4528 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 4544 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 4560 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 4576 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 4592 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLS_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_432 +.loop_nesting 1 +.begin_of_loop +.end_of_loop + 4608 0x00 0x2c 0xf0 0x00 0x22 0x30 0x2e 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST.2D.128 wl0, [p2], d1; NOPX; NOPM; NOPV +.label TGT_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_448 +.loop_nesting 0 + 4624 0x7f 0xff 0xf3 0xbf 0xf0 0x44 MOVXM r7, #2147483640 + 4630 0x11 0xce 0x44 0x98 AND r7, r7, r4 + 4634 0x11 0x4e 0x71 0x98 SUB r7, r5, r7 + 4638 0x11 0xce 0x0f 0x98 MUL r7, r7, r0 + 4642 0x11 0x04 0x2e 0x98 ASHL r2, r4, r2 + 4646 0x11 0x48 0x41 0x98 SUB r4, r5, r4 + 4650 0x10 0x84 0x0f 0x98 MUL r2, r2, r0 + 4654 0x11 0x00 0x0f 0x98 MUL r0, r4, r0 + 4658 0x08 0x45 0xf3 0x20 0x05 0x64 MUL r1, r1, r2; MOV r6, #1 + 4664 0x10 0x00 0x6d 0x98 LSHL r0, r0, r6 + 4668 0xc0 0x03 0x34 0xc3 0x02 0xa4 GE r0, r24, r1; ADD.NC p2, r3, r0 + 4674 0x00 0x09 0x70 0x40 0x01 0x84 JNZ r0, #4832 +.delay_slot +.swstall delay_slot + 4680 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4682 0x00 0x00 NOPX +.delay_slot + 4684 0x11 0xc8 0x6d 0x98 LSHL r4, r7, r6 +.delay_slot + 4688 0x18 0x02 0x08 0x18 ADD.NC m0, r4, #16 +.delay_slot + 4692 0x18 0x41 0x7f 0x98 ADD.NC dn0, r2, #-1 + 4696 0x00 0x03 0x80 0x00 0x00 0x04 0x79 0x68 0x10 0xba MOVA dc0, #0; MOVXM ls, #4816 + 4706 0x02 0x02 0x80 0x00 0x00 0x05 0xb9 0x68 0x10 0xba MOVA dj0, #16; MOVXM le, #4816 + 4716 0x1d 0x70 0xa0 0xf8 MOV lc, r1 + 4720 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 4736 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 4752 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 4768 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 4784 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 4800 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLS_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_640 +.loop_nesting 1 +.begin_of_loop +.end_of_loop + 4816 0x00 0x2c 0xf0 0x00 0x22 0x10 0x2e 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST.2D.128 wl0, [p2], d0; NOPX; NOPM; NOPV +.label TGT_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_656 +.loop_nesting 0 + 4832 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot + 4836 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4838 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4840 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4842 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4844 0x00 0x00 NOPX +.label _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t__end +.label __Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t___func_end0 + +.text_segment PM 4848 +.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E___func_begin0 +.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E +.function_start + 4848 0x18 0xd4 0xc0 0xf8 MOV r3, p2 + 4852 0x6b 0x91 0x60 0x03 0xb0 0xcb 0x00 0x02 MOVS dn3, p7; ADD.NC p7, r3, #44 + 4860 0x07 0x8c 0x1a 0x98 LDA.u16 r0, [p7], #-16 + 4864 0x00 0x00 NOPX + 4866 0x00 0x00 NOPX + 4868 0x00 0x00 NOPX + 4870 0x00 0x00 NOPX + 4872 0x00 0x00 NOPX + 4874 0x00 0x00 NOPX + 4876 0x00 0x09 0xf0 0x40 0x01 0x84 JNZ r0, #5088 +.delay_slot + 4882 0x10 0x20 0x01 0x18 MOVX r16, #0 +.delay_slot + 4886 0x18 0xc2 0x72 0xf8 VBCST.32 x1, r16 +.delay_slot +.swstall delay_slot + 4890 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4892 0x00 0x00 NOPX +.delay_slot + 4894 0x00 0x20 0x00 0x00 0x01 0xc4 PADDXM [sp], #256 + 4900 0x1a 0x80 0x48 0xb8 MOV dj2, #36 + 4904 0x02 0x40 0x36 0x98 LDA r1, [p2, dj2] + 4908 0x00 0x00 NOPX + 4910 0x00 0x00 NOPX + 4912 0x00 0x00 NOPX + 4914 0x00 0x00 NOPX + 4916 0x00 0x00 NOPX + 4918 0x00 0x00 NOPX + 4920 0x14 0x04 0x19 0x98 GE r2, r16, r1 + 4924 0x10 0x09 0xf0 0x40 0x01 0x84 JNZ r2, #5088 +.delay_slot + 4930 0x1a 0x02 0x92 0xf8 VMOV bmll2, x1 +.delay_slot +.swstall delay_slot + 4934 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4936 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4938 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4940 0x00 0x00 NOPX + 4942 0x00 0x2c 0xf3 0x84 0x8b 0x00 0x00 0x04 0x79 0xe8 0x10 0x76 NOPA; MOVS p3, p1; MOVXM ls, #5072 + 4954 0x00 0x00 0x16 0xe7 0xa0 0x44 MOVXM le, #5072 + 4960 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x02 0xb8 0x50 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; MOV lc, r1; NOPV + 4976 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 4992 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 5008 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 5024 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 5040 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 5056 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_224 +.loop_nesting 1 +.begin_of_loop +.end_of_loop + 5072 0x00 0x2c 0xf0 0x00 0x23 0x1d 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmll2, [p3], #64; NOPX; NOPM; NOPV +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_240 +.loop_nesting 0 + 5088 0x1c 0x00 0x40 0xb8 MOV m4, #32 + 5092 0x07 0x8a 0x3a 0x98 LDA.u16 r17, [p7], m4 + 5096 0xff 0xda 0x5a 0x1f 0x19 0x54 LDA.s16 r22, [p7], #-2; MOV m5, #-58 + 5102 0xf5 0x6b 0x51 0x00 0xb9 0x54 LDA.u16 r26, [p7], m5; MOV dj0, #46 + 5108 0xe0 0x52 0x59 0xbd 0x81 0xd4 LDA.s16 r20, [p7, dj0]; MOV r19, p7 + 5114 0xe0 0x4e 0x56 0xd3 0x38 0x14 LDA.s16 r19, [p7, dj0]; ADD.NC p3, r19, #56 + 5120 0x03 0xde 0xb2 0x98 LDA.s16 r21, [p3], #-6 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5124 0x03 0xff 0x9a 0x98 LDA.u16 r28, [p3], #-2 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5128 0x67 0xc6 0x50 0x1c 0x12 0x2c LDA.s16 r17, [p3], #6; MOVX r7, #2 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5134 0x7e 0xca 0x50 0x3f 0x27 0xca 0x60 0x00 0x58 0xba LDA.s16 r18, [p3, #-2]; MOVX r18, #-2; MOV dc4, #0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5144 0xe0 0x1e 0x52 0x10 0x4b 0x23 0x29 0x6c 0xc8 0x01 0x58 0x76 LDA.s16 r7, [p7, dj0]; MOVS dc2, dc4; LSHL r18, r17, r18; MOV r6, #1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5156 0x60 0xef 0x52 0x5a 0x0b 0x2c 0x73 0xec 0x48 0x3c 0x58 0x76 LDA.u16 r27, [p3]; MOVS dn2, r26; LSHL r7, r22, r7; MOV r2, #60 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5168 0x08 0x0a 0x83 0x84 0x8b 0x29 0x43 0x6d 0x01 0xd0 0x78 0x76 MOVA dj2, #64; MOVS p3, p1; LSHL r20, r20, r6; MOV m2, r7 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5180 0x6a 0x12 0xb0 0x27 0x33 0x6e 0x85 0x10 0x78 0xba VLDA.2D bmll1, [p3], d2; LSHL r19, r19, r6; MOV m5, r20 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5190 0x15 0x41 0x30 0x2b 0x33 0x6f 0x04 0xd0 0x78 0xba VLDA.CONV.fp32.bf16 bmll4, [p0], m5; LSHL r19, r21, r6; MOV m6, r19 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5200 0x8c 0x4d 0xba 0xf2 0xfe 0x24 LSHL r17, r17, r6; ADD.NC lc, r18, #-2 + 5206 0x94 0x4d 0xb1 0x11 0x41 0xe4 LSHL r17, r18, r6; MOV dj0, r17 + 5212 0x19 0x01 0x30 0x10 0x4b 0x0e 0x63 0x6c 0x04 0xd0 0x78 0x76 VLDA.CONV.fp32.bf16 bmll0, [p0], m6; MOVS dc0, dc4; LSHL r6, r7, r6; MOV m0, r19 + 5224 0x0b 0x81 0x67 0x03 0x20 0xe4 0x14 0x30 0x3d 0x4a MOVS dn0, r28; MOV m7, r6; VADD.f dm4, dm1, dm4, r2 + 5234 0x1d 0x21 0x34 0x5b 0x0b 0x02 0x44 0x50 0x72 0xba VLDA.CONV.fp32.bf16 bmll2, [p0], m7; MOVS dn4, r27; MOV dj4, r17 + 5244 0x03 0x31 0x33 0x93 0x01 0xd4 VLDA.3D.CONV.fp32.bf16 bmll3, [p0], d0; MOV dc1, dc4 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 5250 0x15 0x41 0x30 0x04 0x11 0x80 0x3d 0x62 VLDA.CONV.fp32.bf16 bmll4, [p0], m5; VADD.f dm1, dm4, dm0, r2 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 5258 0x03 0x50 0x95 0x98 VLDA.2D bmll1, [p3], d2 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5262 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5264 0x00 0x00 0x00 0x8f 0x4c 0x02 0x10 0x28 0x3d 0x5a MOVXM ls, #5312; VADD.f dm0, dm1, dm2, r2 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 5274 0x19 0x01 0x37 0x10 0x01 0xd4 VLDA.CONV.fp32.bf16 bmll0, [p0], m6; MOV dj3, m4 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5280 0x08 0x06 0x80 0x00 0x24 0x84 0x8b 0x00 0x44 0x08 0x82 0x00 0x78 0xa1 0x81 0xeb MOVA dj1, #64; NOPB; MOVS p4, p1; MOVX r4, #32; MOV m1, m2; VADD.f dm4, dm1, dm4, r2 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5296 0x1d 0x21 0x30 0x00 0x21 0x5a 0x0b 0x00 0x00 0x05 0xba 0x90 0x10 0x90 0x61 0xeb VLDA.CONV.fp32.bf16 bmll2, [p0], m7;NOPB; MOVS dn1, r26; MOVXM le, #5408; VADD.f dm2, dm0, dm3, r2 +.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_464 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5312 0x00 0x19 0x89 0x98 VLDA.3D.CONV.fp32.bf16 bmll3, [p0], d0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5316 0x6a 0x12 0xb0 0x00 0x20 0x00 0xad 0x8e 0x11 0x80 0x3d 0x66 VLDA.2D bmll1, [p3], d2; NOPB; NOPS; VADD.f dm1, dm4, dm0, r2 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5328 0x15 0x41 0x30 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.CONV.fp32.bf16 bmll4, [p0], m5;NOPB; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5344 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5360 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x81 0x41 0xeb NOPA; NOPB; NOPS; NOPX; NOPM; VADD.f dm0, dm1, dm2, r2 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5376 0x19 0x01 0x30 0x00 0x24 0x31 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.CONV.fp32.bf16 bmll0, [p0], m6;NOPB; VST.2D bmll2, [p4], d1; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5392 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0xa1 0x81 0xeb NOPA; NOPB; NOPS; NOPX; NOPM; VADD.f dm4, dm1, dm4, r2 +.label ZLE_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_560 +.end_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5408 0x1d 0x21 0x30 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x90 0x61 0xeb VLDA.CONV.fp32.bf16 bmll2, [p0], m7;NOPB; NOPS; NOPX; NOPM; VADD.f dm2, dm0, dm3, r2 +.loop_nesting 0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5424 0xec 0x07 0x50 0x00 0x00 0x0c 0xaf 0xc0 0x10 0xba LDA.u16 r1, [p7, dj3]; MOVXM r5, #16256 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5434 0x03 0x31 0x32 0x15 0x72 0xe2 0x11 0x80 0x3d 0x4a VLDA.3D.CONV.fp32.bf16 bmll3, [p0], d0; VBCST.16 x4, r5; VADD.f dm1, dm4, dm0, r2 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5444 0x18 0x41 0x72 0xf8 VBCST.16 x0, r16 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5448 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5450 0x10 0x28 0x3d 0x48 VADD.f dm0, dm1, dm2, r2 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5454 0x0c 0x31 0x06 0x98 VST.2D bmll2, [p4], d1 + 5458 0x00 0x00 NOPX + 5460 0x00 0x02 0x5f 0xf9 0x12 0x0c 0x3d 0x62 ADD r5, r1, #-1; VADD.f dm2, dm0, dm3, r2 + 5468 0x11 0x40 0x08 0x98 NE r0, r5, r0 + 5472 0x00 0x0c 0x70 0x40 0x01 0x84 JNZ r0, #6368 +.delay_slot +.swstall delay_slot + 5478 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5480 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5482 0x00 0x00 NOPX +.delay_slot + 5484 0x0c 0x31 0x06 0x98 VST.2D bmll2, [p4], d1 +.delay_slot +.swstall delay_slot + 5488 0x00 0x00 NOPX + 5490 0x46 0x9a 0xd0 0x14 0x1a 0x2c LDA r6, [p2, #12]; MOVX r5, #3 + 5496 0x00 0x00 NOPX + 5498 0x00 0x00 NOPX + 5500 0x00 0x00 NOPX + 5502 0x00 0x00 NOPX + 5504 0x00 0x00 NOPX + 5506 0x00 0x00 NOPX + 5508 0x11 0x4e 0x69 0x98 GE r7, r5, r6 + 5512 0x38 0x0e 0x40 0x40 0x01 0x84 JNZ r7, #7296 +.delay_slot + 5518 0x10 0x00 0x11 0x18 MOVX r0, #4 +.delay_slot +.swstall delay_slot + 5522 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5524 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5526 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5528 0x00 0x00 NOPX + 5530 0x11 0x8a 0x08 0x98 NE r5, r6, r0 + 5534 0x28 0x0c 0xb8 0x40 0x01 0x84 JNZ r5, #6512 +.delay_slot +.swstall delay_slot + 5540 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5542 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5544 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5546 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5548 0x00 0x00 NOPX + 5550 0x24 0x40 0xa9 0x83 0xc1 0xe4 MOVX r17, #257; MOV dc4, lr + 5556 0x00 0x00 0xfa 0xbf 0xfe 0x44 MOVXM r21, #65535 + 5562 0x00 0x2c 0xf0 0x50 0x02 0x2c NOPA; MOVX r20, #0 +.label __ll91__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E + 5568 0x08 0x0a 0x82 0x83 0x0b 0x00 0x52 0x08 0x48 0x3c 0x58 0x76 MOVA dj2, #64; MOVS p2, r3; MOVX r5, #16; MOV r2, #60 + 5580 0x48 0x1a 0x50 0x11 0x02 0x2c LDA.s16 r6, [p2, dj2]; MOVX r4, #32 + 5586 0x00 0x00 NOPX + 5588 0x00 0x00 NOPX + 5590 0x00 0x00 NOPX + 5592 0x00 0x00 NOPX + 5594 0x00 0x00 NOPX + 5596 0x00 0x01 0x67 0x98 NOPA + 5600 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x0c 0x52 0xf4 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; ASHL r5, r6, r5; NOPM; NOPV +.label __ll93__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E + 5616 0x04 0x8a 0x87 0xfd 0xa5 0x80 0x01 0xf3 0xb2 0x78 0x10 0x76 MOVA dj2, #36; ST dn3, [sp, #-4]; MOVXM p7, #509168 + 5628 0xe0 0xdc 0x57 0xfa 0x65 0x80 0x50 0x08 0x8b 0x39 0x78 0x76 LDA.s8 r23, [p7]; ST dc4, [sp, #-8]; MOVX r5, #0; VBCST.32 x2, r5 + 5640 0x48 0x1a 0xd7 0x84 0x8b 0x3f 0x67 0xe8 0x02 0x49 0x78 0x76 LDA r6, [p2, dj2]; MOVS p7, p1; MOVX r22, #-1; VMOV bmll0, x2 + 5652 0x03 0xf8 0x00 0x02 0xd2 0x01 0x02 0x49 0x78 0xba MOVA r24, #31; MOVX vaddSign0, #1; VMOV bmll2, x2 + 5662 0x02 0x19 0x00 0x00 0x00 0x04 0x7b 0x40 0x10 0xba MOVA r25, #16; MOVXM ls, #5760 + 5672 0xff 0x94 0xb0 0x00 0x00 0x05 0xbc 0x60 0x10 0xba VLDA wl2, [sp, #-32]; MOVXM le, #6336 + 5682 0x10 0x74 0x01 0x18 MOVX r26, #64 + 5686 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 5688 0x15 0xfa 0x80 0x18 MOVX crRnd, r23 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 5692 0x08 0x02 0xc0 0x02 0xb9 0x80 0x00 0x02 VCONV.bf16.fp32 wl0, bmll0; ADD.NC lc, r6, #0 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5700 0x19 0xa0 0x92 0xf8 VMOV x3, x0 + 5704 0x02 0xa6 0x92 0xe6 0x10 0x40 0x83 0x62 VMOV x5, x3; VMSC.f dm0, dm2, x0, x4, r2 + 5712 0x1b 0x2a 0x92 0xf8 VMOV x6, x5 + 5716 0x00 0x00 NOPX + 5718 0x00 0x00 NOPX + 5720 0x00 0x00 NOPX + 5722 0x00 0x00 NOPX + 5724 0x09 0xc0 0x16 0x18 VCONV.bf16.fp32 wl3, bmll0 + 5728 0x00 0x00 NOPX + 5730 0x10 0x06 0x83 0x48 VMSC.f dm0, dm0, x3, x4, r2 + 5734 0x00 0x00 NOPX + 5736 0x00 0x00 NOPX + 5738 0x00 0x00 NOPX + 5740 0x00 0x00 NOPX + 5742 0x00 0x00 NOPX + 5744 0x00 0x2c 0xf0 0x00 0x22 0xc0 0x16 0x00 0x71 0x08 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VCONV.bf16.fp32 wl5, bmll0; MOVX r7, #8; NOPM; NOPV +.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_912 +.loop_nesting 1 +.begin_of_loop + 5760 0x23 0xbe 0x89 0xa5 0x25 0xf4 VLDB x7, [p1], #64; VMOV bmhh4, x9 + 5766 0x1b 0xd6 0x92 0xf8 VMOV bmhh3, x11 + 5770 0x1f 0x1e 0xc0 0xf8 MOV r28, p7 + 5774 0x17 0x3b 0x84 0x98 AND r29, r28, r24 + 5778 0xee 0xc9 0x5e 0x3d 0xe0 0x24 LT r27, r29, r4; ADD.NC r28, r29, #-32 + 5784 0x15 0xbd 0xdd 0x98 LSHL r30, r22, r29 + 5788 0x16 0xbf 0xd1 0x98 SUB r31, r26, r29 + 5792 0x2f 0xbc 0x48 0x70 0xcd 0xa4 SEL.EQZ r30, r5, r30, r27; VSHIFT x8, x7, x0, r25 + 5798 0x1c 0x10 0x92 0xf8 VMOV bmll4, x8 + 5802 0x1c 0x4e 0x22 0xf8 VMOV wl8, wh7 + 5806 0x1d 0x4f 0x22 0xf8 VMOV wl10, wl7 + 5810 0x1c 0x90 0x92 0xf8 VMOV bmhl4, x8 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 5814 0x1b 0x94 0x92 0xf8 VMOV bmhl3, x10 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 5818 0x02 0x12 0x8a 0xe6 0x13 0x28 0x3d 0x62 VMOV cml2, cmh4; VADD.f dm3, dm1, dm2, r2 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5826 0x19 0x0e 0x8a 0xf8 VMOV cml1, cmh3 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 5830 0x04 0x4e 0x22 0xe6 0x12 0x50 0x3d 0x62 VMOV wl8, wh7; VADD.f dm2, dm2, dm4, r2 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5838 0x1a 0x0e 0x92 0xf8 VMOV bmll2, x7 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 5842 0x1c 0xc0 0x66 0xd8 VSHIFT x9, x8, x0, r25 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 5846 0x01 0x10 0x92 0xe6 0x14 0x30 0x3d 0x62 VMOV bmll1, x8; VADD.f dm4, dm1, dm4, r2 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5854 0x1c 0x12 0x92 0xf8 VMOV bmll4, x9 + 5858 0x1c 0x2c 0x12 0xf8 VMOV x8, bmll3 + 5862 0x1c 0xd1 0x22 0xf8 VMOV wl9, wl8 +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 5866 0x1c 0x48 0x66 0xd8 VSHIFT x8, x9, x0, r25 +.aggressive_scheduled_block_id 6 +.noswbrkpt + 5870 0x01 0x10 0x92 0xe6 0x11 0x64 0x3d 0x62 VMOV bmll1, x8; VADD.f dm1, dm3, dm1, r2 +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5878 0x1b 0x12 0x92 0xf8 VMOV bmll3, x9 + 5882 0x1c 0x28 0x12 0xf8 VMOV x8, bmll2 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 5886 0x1d 0x40 0x1e 0xd8 VSHIFT x10, x8, x0, r7 +.aggressive_scheduled_block_id 7 +.noswbrkpt + 5890 0x04 0x30 0x12 0xe6 0x12 0x4c 0x3d 0x62 VMOV x8, bmll4; VADD.f dm2, dm2, dm3, r2 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 5898 0x1b 0x14 0x92 0xf8 VMOV bmll3, x10 +.aggressive_scheduled_block_id 7 +.noswbrkpt + 5902 0x04 0x40 0x1e 0xc6 0x13 0x8c 0x3d 0x62 VSHIFT x8, x8, x0, r7; VADD.f dm3, dm4, dm3, r2 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5910 0x1b 0x10 0x92 0xf8 VMOV bmll3, x8 +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 5914 0x1c 0x24 0x12 0xf8 VMOV x8, bmll1 +.aggressive_scheduled_block_id 8 +.noswbrkpt + 5918 0x04 0x40 0x1e 0xc6 0x11 0x30 0x3d 0x62 VSHIFT x8, x8, x0, r7; VADD.f dm1, dm1, dm4, r2 +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5926 0x1c 0x10 0x92 0xf8 VMOV bmll4, x8 +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id first + 5930 0x1c 0x28 0x12 0xf8 VMOV x8, bmll2 +.aggressive_scheduled_block_id 9 +.noswbrkpt + 5934 0x04 0x40 0x02 0xc6 0x12 0x50 0x3d 0x62 VSHIFT x8, x8, x0, r0; VADD.f dm2, dm2, dm4, r2 +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5942 0x1c 0x10 0x92 0xf8 VMOV bmll4, x8 + 5946 0x1c 0x2c 0x12 0xf8 VMOV x8, bmll3 +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id first + 5950 0x1c 0x40 0x02 0xd8 VSHIFT x8, x8, x0, r0 +.aggressive_scheduled_block_id 10 +.noswbrkpt + 5954 0x05 0x24 0x12 0xe6 0x13 0x70 0x3d 0x62 VMOV x10, bmll1; VADD.f dm3, dm3, dm4, r2 +.aggressive_scheduled_block_id 10 +.nohwbrkpt +.noswbrkpt + 5962 0x1c 0x10 0x92 0xf8 VMOV bmll4, x8 +.aggressive_scheduled_block_id 10 +.noswbrkpt + 5966 0x05 0x50 0x02 0xc6 0x10 0x30 0x3d 0x62 VSHIFT x10, x10, x0, r0; VADD.f dm0, dm1, dm4, r2 +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5974 0x1c 0x14 0x92 0xf8 VMOV bmll4, x10 + 5978 0x1c 0x28 0x12 0xf8 VMOV x8, bmll2 + 5982 0x1d 0xe2 0x01 0xb8 VEXTRACT.32 r23, x8, #0, vaddSign0 +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id first + 5986 0x1d 0x2c 0x12 0xf8 VMOV x10, bmll3 +.aggressive_scheduled_block_id 11 +.noswbrkpt + 5990 0xe2 0xd0 0x83 0x54 0x03 0x74 VLDB wh10, [p7, #32]; VEXTRACT.32 r6, x10, #0, vaddSign0 +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 5996 0x1d 0xa0 0x12 0xf8 VMOV x11, bmll0 +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 6000 0xe0 0xd4 0x8a 0xb4 0x06 0xb4 VLDB wl10, [p7]; VEXTBCST.32 x10, x11, #0 +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 6006 0x1c 0xd4 0xa0 0x38 VSEL.32 x9, x10, x9, r20 +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 6010 0x1d 0x10 0xd1 0x78 VINSERT.32 x10, x2, #0, r6 +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 6014 0x1c 0x12 0xf1 0x78 VINSERT.32 x8, x2, #0, r23 +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 6018 0x1d 0xd3 0x22 0xf8 VMOV wl11, wl9 +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 6022 0x1d 0x93 0x22 0xf8 VMOV wh11, wl9 +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6026 0x1c 0x15 0x22 0xf8 VMOV wh8, wl10 + 6030 0x1c 0x5c 0x00 0x38 VSEL.32 x8, x11, x8, r16 + 6034 0x1c 0x0c 0x08 0x38 VSEL.32 x8, x1, x8, r17 + 6038 0x1b 0xc3 0xa8 0x38 VSEL.32 x7, x8, x7, r21 + 6042 0x18 0x0e 0x92 0xf8 VMOV bmll0, x7 + 6046 0x1c 0xac 0x92 0xf8 VMOV x9, x6 + 6050 0x68 0x02 0xc0 0x01 0x07 0x49 0x70 0x02 VCONV.bf16.fp32 wl6, bmll0; VMOV bmll2, x7 + 6058 0x1c 0x32 0x92 0xf8 VMOV x8, x9 + 6062 0x05 0xbb 0xcd 0xed 0xea 0x0f 0x12 0x4c 0x83 0x5a LSHL r29, r22, r28; MOV r27, r29; VMSC.f dm2, dm2, x6, x4, r2 +.aggressive_scheduled_block_id 12 +.aggressive_scheduled_block_id first + 6072 0x00 0x0b 0x3e 0x91 0x11 0xec 0xa1 0x62 SEL.EQZ r19, r5, r29, r27; VMUL.f dm1, x6, x5, r2 +.aggressive_scheduled_block_id 12 +.noswbrkpt + 6080 0x05 0xa5 0xe2 0x33 0x09 0x2f 0x10 0xec 0x61 0x5a SEL.EQZ r18, r22, r30, r27; VMOV x6, x8; VMUL.f dm0, x6, x3, r2 +.aggressive_scheduled_block_id 12 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6090 0x13 0xec 0x01 0x48 VMUL.f dm3, x6, x0, r2 + 6094 0x00 0x00 NOPX + 6096 0x00 0x00 NOPX + 6098 0x0c 0xc1 0x16 0x18 VCONV.bf16.fp32 wl9, bmll2 + 6102 0x00 0x00 NOPX + 6104 0x12 0x52 0x83 0x48 VMSC.f dm2, dm2, x9, x4, r2 + 6108 0x00 0x00 NOPX + 6110 0x00 0x00 NOPX + 6112 0x00 0x00 NOPX + 6114 0x00 0x00 NOPX + 6116 0x00 0x00 NOPX + 6118 0x0c 0x41 0x16 0x18 VCONV.bf16.fp32 wl8, bmll2 + 6122 0x00 0x00 NOPX + 6124 0x14 0xf0 0xa1 0x48 VMUL.f dm4, x8, x5, r2 + 6128 0x12 0xf0 0x61 0x48 VMUL.f dm2, x8, x3, r2 + 6132 0x00 0x00 NOPX + 6134 0x00 0x00 NOPX +.aggressive_scheduled_block_id 13 +.aggressive_scheduled_block_id first + 6136 0x00 0x00 NOPX +.aggressive_scheduled_block_id 13 +.noswbrkpt + 6138 0x12 0xf2 0xa1 0x48 VMUL.f dm2, x9, x5, r2 +.aggressive_scheduled_block_id 13 +.nohwbrkpt +.noswbrkpt + 6142 0x19 0x70 0x12 0xf8 VMOV lfl0, bmll4 +.aggressive_scheduled_block_id 13 +.nohwbrkpt +.noswbrkpt + 6146 0x14 0x88 0x3d 0x48 VADD.f dm4, dm4, dm2, r2 +.aggressive_scheduled_block_id 13 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6150 0x1c 0x05 0x92 0xf8 VMOV bmll4, lfl0 + 6154 0x00 0x00 NOPX + 6156 0x00 0x00 NOPX +.aggressive_scheduled_block_id 14 +.aggressive_scheduled_block_id first + 6158 0x00 0x00 NOPX +.aggressive_scheduled_block_id 14 +.noswbrkpt + 6160 0x12 0xe1 0x01 0x48 VMUL.f dm2, x0, x8, r2 +.aggressive_scheduled_block_id 14 +.nohwbrkpt +.noswbrkpt + 6164 0x18 0x70 0x12 0xf8 VMOV lfh0, bmll4 +.aggressive_scheduled_block_id 14 +.nohwbrkpt +.noswbrkpt + 6168 0x14 0x88 0x3d 0x48 VADD.f dm4, dm4, dm2, r2 +.aggressive_scheduled_block_id 14 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6172 0x1c 0x01 0x92 0xf8 VMOV bmll4, lfh0 + 6176 0x00 0x00 NOPX + 6178 0x00 0x00 NOPX +.aggressive_scheduled_block_id 15 +.aggressive_scheduled_block_id first + 6180 0x00 0x00 NOPX +.aggressive_scheduled_block_id 15 +.noswbrkpt + 6182 0x11 0xf2 0x61 0x48 VMUL.f dm1, x9, x3, r2 +.aggressive_scheduled_block_id 15 +.nohwbrkpt +.noswbrkpt + 6186 0x19 0x70 0x12 0xf8 VMOV lfl0, bmll4 +.aggressive_scheduled_block_id 15 +.nohwbrkpt +.noswbrkpt + 6190 0x14 0x84 0x3d 0x48 VADD.f dm4, dm4, dm1, r2 +.aggressive_scheduled_block_id 15 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6194 0x1c 0x05 0x92 0xf8 VMOV bmll4, lfl0 + 6198 0x00 0x00 NOPX + 6200 0x00 0x00 NOPX +.aggressive_scheduled_block_id 16 +.aggressive_scheduled_block_id first + 6202 0x00 0x00 NOPX +.aggressive_scheduled_block_id 16 +.noswbrkpt + 6204 0x11 0xf2 0x01 0x48 VMUL.f dm1, x9, x0, r2 +.aggressive_scheduled_block_id 16 +.nohwbrkpt +.noswbrkpt + 6208 0x18 0x70 0x12 0xf8 VMOV lfh0, bmll4 +.aggressive_scheduled_block_id 16 +.nohwbrkpt +.noswbrkpt + 6212 0x14 0x84 0x3d 0x48 VADD.f dm4, dm4, dm1, r2 +.aggressive_scheduled_block_id 16 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6216 0x1c 0x01 0x92 0xf8 VMOV bmll4, lfh0 + 6220 0x00 0x00 NOPX + 6222 0x00 0x00 NOPX + 6224 0x00 0x00 NOPX + 6226 0x00 0x00 NOPX +.aggressive_scheduled_block_id 17 +.aggressive_scheduled_block_id first + 6228 0x1d 0x70 0x12 0xf8 VMOV lfl1, bmll4 +.aggressive_scheduled_block_id 17 +.noswbrkpt + 6232 0x12 0x88 0x3d 0x48 VADD.f dm2, dm4, dm2, r2 +.aggressive_scheduled_block_id 17 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6236 0x1c 0x15 0x92 0xf8 VMOV bmll4, lfl1 + 6240 0x00 0x00 NOPX + 6242 0x00 0x00 NOPX + 6244 0x00 0x00 NOPX + 6246 0x00 0x00 NOPX +.aggressive_scheduled_block_id 18 +.aggressive_scheduled_block_id first + 6248 0x1c 0x68 0x12 0xf8 VMOV lfh1, bmll2 +.aggressive_scheduled_block_id 18 +.noswbrkpt + 6252 0x12 0x44 0x3d 0x48 VADD.f dm2, dm2, dm1, r2 +.aggressive_scheduled_block_id 18 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6256 0x1a 0x11 0x92 0xf8 VMOV bmll2, lfh1 + 6260 0x00 0x00 NOPX + 6262 0x00 0x00 NOPX + 6264 0x00 0x00 NOPX + 6266 0x00 0x00 NOPX +.aggressive_scheduled_block_id 19 +.aggressive_scheduled_block_id first + 6268 0x1d 0x68 0x12 0xf8 VMOV lfl1, bmll2 +.aggressive_scheduled_block_id 19 +.noswbrkpt + 6272 0x10 0x20 0x3d 0x48 VADD.f dm0, dm1, dm0, r2 +.aggressive_scheduled_block_id 19 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6276 0x19 0x15 0x92 0xf8 VMOV bmll1, lfl1 + 6280 0x00 0x00 NOPX + 6282 0x00 0x00 NOPX + 6284 0x00 0x00 NOPX + 6286 0x00 0x00 NOPX +.aggressive_scheduled_block_id 20 +.aggressive_scheduled_block_id first + 6288 0x1c 0x60 0x12 0xf8 VMOV lfh1, bmll0 +.aggressive_scheduled_block_id 20 +.noswbrkpt + 6292 0x10 0x0c 0x3d 0x48 VADD.f dm0, dm0, dm3, r2 +.aggressive_scheduled_block_id 20 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6296 0x18 0x11 0x92 0xf8 VMOV bmll0, lfh1 + 6300 0x00 0x00 NOPX + 6302 0x00 0x00 NOPX + 6304 0x00 0x00 NOPX + 6306 0x00 0x00 NOPX + 6308 0x0d 0xc0 0x16 0x18 VCONV.bf16.fp32 wl11, bmll0 + 6312 0x00 0x00 NOPX + 6314 0x1d 0x85 0xfe 0xd8 VSHIFT x11, x0, x11, r31 + 6318 0x1d 0xd5 0xcc 0x38 VSEL.8 x11, x10, x11, r19:r18 + 6322 0x00 0x00 NOPX + 6324 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x03 0x8b 0x65 0x41 0x36 NOPA; NOPB; VST wh11, [p7, #32]; NOPX +.label ZLE_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1488 +.end_of_loop + 6336 0x00 0x2c 0xf0 0x00 0x27 0x8a 0xea 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST wl11, [p7], m4; NOPX; NOPM; NOPV +.loop_nesting 0 + 6352 0x00 0x0c 0x78 0x00 0x00 0x84 J #6384 +.delay_slot +.swstall delay_slot + 6358 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6360 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6362 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6364 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6366 0x00 0x00 NOPX +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1520 + 6368 0xff 0xb4 0xb0 0xb4 0x80 0x5c ST dn3, [sp, #-4]; MOVX vaddSign0, #1 + 6374 0x00 0x2c 0xf7 0xf8 0x3d 0x80 0x00 0x00 0x00 0x7a NOPA; ST lr, [sp, #-8]; NOPX +.label __ll133__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E + 6384 0x1f 0x61 0x91 0x18 ADD.NC p7, r3, #34 + 6388 0xe0 0x8f 0x5b 0x64 0xc1 0xd4 LDA.u16 r3, [p7]; MOV crMCDEn, vaddSign0 +.aggressive_scheduled_block_id 21 +.aggressive_scheduled_block_id first + 6394 0x18 0x7b 0x60 0xf8 MOV crSCDEn, crMCDEn +.aggressive_scheduled_block_id 21 +.noswbrkpt + 6398 0x07 0x04 0x77 0x18 ST.s16 r3, [p7] +.aggressive_scheduled_block_id 21 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6402 0x00 0x13 0xf8 0x00 0x01 0x04 JL #10224 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 21 +.nohwbrkpt +.noswbrkpt + 6408 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 21 +.nohwbrkpt +.noswbrkpt + 6410 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 21 +.nohwbrkpt +.noswbrkpt + 6412 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 21 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6414 0x10 0xc6 0x07 0x18 ADD r3, r3, #1 +.delay_slot + 6418 0x00 0x2c 0xf0 0x00 0x20 0xc0 0xb0 0x00 0x34 0xaf 0x00 0x2b 0x60 0x7e NOPA; NOPB; NOPS; EXTEND.u16 r0, r3; NOPM +.return_address + 6432 0x07 0xf8 0x39 0x18 LDA lr, [sp, #-8] + 6436 0x07 0xfc 0x99 0x18 LDA p1, [sp, #-4] + 6440 0x07 0x54 0x77 0x18 ST.s16 r3, [p7, #10] + 6444 0xff 0xe0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-256 + 6450 0x00 0x00 NOPX + 6452 0x00 0x00 NOPX + 6454 0x00 0x00 NOPX + 6456 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 6460 0x1f 0x62 0xc0 0xf8 MOV p7, p1 +.delay_slot +.swstall delay_slot + 6464 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6466 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6468 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6470 0x00 0x2c 0xf0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba NOPA; NOPB; NOPM +.label __ll135__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E + 6480 0x1f 0xf4 0x00 0x00 0x02 0xb8 0x00 0x00 0x20 0xba MOVA r20, #255; J #5568 +.delay_slot + 6490 0x10 0x2a 0x01 0x18 MOVX r21, #0 +.delay_slot +.swstall delay_slot + 6494 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6496 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6498 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6500 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1664 + 6512 0x10 0x0a 0x15 0x18 MOVX r5, #5 + 6516 0x11 0x4a 0x67 0x98 EQ r5, r5, r6 + 6520 0x28 0x0e 0x30 0x40 0x01 0x84 JNZ r5, #7264 +.delay_slot +.swstall delay_slot + 6526 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6528 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6530 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6532 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6534 0x00 0x00 NOPX + 6536 0x10 0x0e 0x19 0x18 MOVX r7, #6 + 6540 0x11 0xce 0x67 0x98 EQ r7, r7, r6 + 6544 0x38 0x0e 0xa8 0x40 0x01 0x84 JNZ r7, #7504 +.delay_slot + 6550 0x10 0x0a 0x41 0x18 MOVX r5, #16 +.delay_slot +.swstall delay_slot + 6554 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6556 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6558 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6560 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label __ll67__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E + 6576 0x48 0x1e 0x50 0x01 0x10 0xea 0x60 0xf0 0x78 0xba LDA.s16 r7, [p2, dj2]; MOVX r17, #7; MOV dc4, lr + 6586 0x89 0x8d 0x18 0xa4 0x05 0x64 NE r6, r17, r6; MOV r17, #257 + 6592 0x30 0x0e 0x20 0x40 0x01 0x84 JNZ r6, #7232 +.delay_slot +.swstall delay_slot + 6598 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6600 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6602 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6604 0x00 0x00 NOPX +.delay_slot + 6606 0x11 0xca 0x5e 0x98 ASHL r5, r7, r5 + 6610 0x04 0x8a 0x80 0x84 0x8b 0x00 0x00 0x04 0x7d 0x08 0x10 0x76 MOVA dj2, #36; MOVS p0, p1; MOVXM ls, #6672 + 6622 0x48 0x1e 0xd7 0xfd 0xa5 0x80 0x00 0x05 0xbd 0x38 0x10 0x76 LDA r7, [p2, dj2]; ST dn3, [sp, #-4]; MOVXM le, #6768 + 6634 0x00 0x1d 0x15 0x98 VLDA bmll2, [p0], #64 + 6638 0x00 0x00 NOPX + 6640 0x1c 0xc2 0x92 0xf8 VMOV bmhh4, x1 + 6644 0x00 0x00 NOPX + 6646 0x03 0x13 0x12 0xe6 0x11 0x68 0x3d 0x62 VMOV bmll3, bmhh4; VADD.f dm1, dm3, dm2, r2 + 6654 0x00 0x00 NOPX + 6656 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x02 0xb9 0xff 0xc8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC lc, r7, #-1; NOPV +.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1824 +.loop_nesting 1 +.begin_of_loop + 6672 0x03 0xa2 0xb0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA bmll2, [p0], #64; NOPB; NOPS; NOPX; NOPM; NOPV + 6688 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 6704 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 6720 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 22 +.aggressive_scheduled_block_id first + 6736 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x02 0x62 0x09 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VMOV bmhh4, bmll1; NOPV +.aggressive_scheduled_block_id 22 +.noswbrkpt + 6752 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x8b 0x41 0xeb NOPA; NOPB; NOPS; NOPX; NOPM; VADD.f dm1, dm3, dm2, r2 +.label ZLE_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1920 +.end_of_loop +.aggressive_scheduled_block_id 22 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6768 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x01 0x89 0x89 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VMOV bmll3, bmhh4; NOPV +.loop_nesting 0 +.aggressive_scheduled_block_id 23 +.aggressive_scheduled_block_id first + 6784 0x02 0x10 0x00 0x00 0x01 0xf3 0xb2 0x78 0x10 0xba MOVA r16, #16; MOVXM p7, #509168 +.aggressive_scheduled_block_id 23 +.noswbrkpt + 6794 0xe0 0x90 0x50 0x00 0x61 0x08 0x98 0x01 0x58 0xba LDA.s8 r4, [p7]; MOVX r6, #8; MOV vaddSign0, #1 +.aggressive_scheduled_block_id 23 +.nohwbrkpt +.noswbrkpt + 6804 0x10 0x22 0x05 0x18 MOVX r17, #1 +.aggressive_scheduled_block_id 23 +.nohwbrkpt +.noswbrkpt + 6808 0x00 0x00 NOPX +.aggressive_scheduled_block_id 23 +.nohwbrkpt +.noswbrkpt + 6810 0x1c 0xc4 0x12 0xf8 VMOV bmhh4, bmll1 +.aggressive_scheduled_block_id 23 +.nohwbrkpt +.noswbrkpt + 6814 0x00 0x00 NOPX +.aggressive_scheduled_block_id 23 +.nohwbrkpt +.noswbrkpt + 6816 0x19 0x33 0x12 0xf8 VMOV x2, bmhh4 +.aggressive_scheduled_block_id 23 +.nohwbrkpt +.noswbrkpt + 6820 0x19 0x10 0x12 0xd8 VSHIFT x2, x2, x0, r4 +.aggressive_scheduled_block_id 23 +.noswbrkpt + 6824 0x01 0x3a 0x80 0x00 0x49 0x2f 0x10 0x40 0x3d 0x5a MOVX crRnd, r4; VMOV bmll0, x2; VADD.f dm0, dm2, dm0, r2 +.aggressive_scheduled_block_id 23 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6834 0x1a 0x13 0x12 0xf8 VMOV bmll2, bmhh4 + 6838 0x19 0x16 0x72 0xf8 VBCST.32 x2, r5 + 6842 0x19 0x04 0x92 0xf8 VMOV bmll1, x2 + 6846 0x00 0x00 NOPX + 6848 0x1a 0x04 0x12 0xf8 VMOV bmll2, bmll1 +.aggressive_scheduled_block_id 24 +.aggressive_scheduled_block_id first + 6852 0x19 0x20 0x12 0xf8 VMOV x2, bmll0 +.aggressive_scheduled_block_id 24 +.noswbrkpt + 6856 0x01 0x10 0x42 0xc6 0x10 0x0c 0x3d 0x62 VSHIFT x2, x2, x0, r16; VADD.f dm0, dm0, dm3, r2 +.aggressive_scheduled_block_id 24 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6864 0x1b 0x04 0x92 0xf8 VMOV bmll3, x2 + 6868 0x19 0x20 0x92 0xf8 VMOV x2, x0 + 6872 0x00 0x00 NOPX + 6874 0x08 0x40 0x96 0x18 VCONV.bf16.fp32 wl0, bmll1 + 6878 0x00 0x00 NOPX +.aggressive_scheduled_block_id 25 +.aggressive_scheduled_block_id first + 6880 0x01 0xa0 0x12 0xe6 0x14 0x40 0x83 0x62 VMOV x3, bmll0; VMSC.f dm4, dm2, x0, x4, r2 +.aggressive_scheduled_block_id 25 +.noswbrkpt + 6888 0x01 0x98 0x1a 0xc6 0x10 0x08 0x3d 0x62 VSHIFT x3, x3, x0, r6; VADD.f dm0, dm0, dm2, r2 +.aggressive_scheduled_block_id 25 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6896 0x1a 0x06 0x92 0xf8 VMOV bmll2, x3 + 6900 0x19 0xa4 0x92 0xf8 VMOV x3, x2 + 6904 0x00 0x00 NOPX + 6906 0x00 0x00 NOPX + 6908 0x09 0x42 0x16 0x18 VCONV.bf16.fp32 wl2, bmll4 +.aggressive_scheduled_block_id 26 +.aggressive_scheduled_block_id first + 6912 0x1a 0xa0 0x12 0xf8 VMOV x5, bmll0 +.aggressive_scheduled_block_id 26 +.noswbrkpt + 6916 0x03 0x28 0x02 0xc6 0x10 0x08 0x3d 0x62 VSHIFT x6, x5, x0, r0; VADD.f dm0, dm0, dm2, r2 +.aggressive_scheduled_block_id 26 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6924 0x1a 0x0c 0x92 0xf8 VMOV bmll2, x6 + 6928 0x1a 0xa6 0x92 0xf8 VMOV x5, x3 + 6932 0x00 0x00 NOPX + 6934 0x00 0x00 NOPX + 6936 0x00 0x00 NOPX + 6938 0x1b 0x20 0x12 0xf8 VMOV x6, bmll0 + 6942 0x18 0x1a 0x01 0xb8 VEXTRACT.32 r0, x6, #0, vaddSign0 + 6946 0x00 0x00 NOPX + 6948 0x1b 0x00 0x11 0x78 VINSERT.32 x6, x0, #0, r0 + 6952 0x18 0x8b 0x08 0x38 VSEL.32 x1, x1, x6, r17 + 6956 0x1a 0x02 0x92 0xf8 VMOV bmll2, x1 + 6960 0x18 0xaa 0x92 0xf8 VMOV x1, x5 + 6964 0x58 0x22 0xc0 0x01 0x91 0x49 0x70 0x02 VCONV.bf16.fp32 wl5, bmll2; VMOV x6, x1 + 6972 0x00 0x00 NOPX + 6974 0x11 0x4a 0x83 0x48 VMSC.f dm1, dm2, x5, x4, r2 + 6978 0x10 0xea 0x41 0x48 VMUL.f dm0, x5, x2, r2 + 6982 0x00 0x00 NOPX + 6984 0x00 0x00 NOPX + 6986 0x00 0x00 NOPX + 6988 0x00 0x00 NOPX + 6990 0x08 0xc0 0x96 0x18 VCONV.bf16.fp32 wl1, bmll1 + 6994 0x14 0x84 0x83 0x48 VMSC.f dm4, dm4, x2, x4, r2 + 6998 0x13 0x22 0x83 0x48 VMSC.f dm3, dm1, x1, x4, r2 + 7002 0x00 0x00 NOPX + 7004 0x00 0x00 NOPX + 7006 0x00 0x00 NOPX + 7008 0x00 0x00 NOPX + 7010 0x09 0xc2 0x16 0x18 VCONV.bf16.fp32 wl3, bmll4 + 7014 0x0b 0x41 0x96 0x18 VCONV.bf16.fp32 wl6, bmll3 + 7018 0x00 0x00 NOPX + 7020 0x12 0xec 0x61 0x48 VMUL.f dm2, x6, x3, r2 + 7024 0x13 0xec 0x41 0x48 VMUL.f dm3, x6, x2, r2 + 7028 0x00 0x00 NOPX + 7030 0x00 0x00 NOPX +.aggressive_scheduled_block_id 27 +.aggressive_scheduled_block_id first + 7032 0x00 0x00 NOPX +.aggressive_scheduled_block_id 27 +.noswbrkpt + 7034 0x13 0xe2 0x61 0x48 VMUL.f dm3, x1, x3, r2 +.aggressive_scheduled_block_id 27 +.nohwbrkpt +.noswbrkpt + 7038 0x19 0x68 0x12 0xf8 VMOV lfl0, bmll2 +.aggressive_scheduled_block_id 27 +.nohwbrkpt +.noswbrkpt + 7042 0x12 0x4c 0x3d 0x48 VADD.f dm2, dm2, dm3, r2 +.aggressive_scheduled_block_id 27 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7046 0x1a 0x05 0x92 0xf8 VMOV bmll2, lfl0 + 7050 0x00 0x00 NOPX + 7052 0x00 0x00 NOPX +.aggressive_scheduled_block_id 28 +.aggressive_scheduled_block_id first + 7054 0x00 0x00 NOPX +.aggressive_scheduled_block_id 28 +.noswbrkpt + 7056 0x13 0xea 0x61 0x48 VMUL.f dm3, x5, x3, r2 +.aggressive_scheduled_block_id 28 +.nohwbrkpt +.noswbrkpt + 7060 0x18 0x68 0x12 0xf8 VMOV lfh0, bmll2 +.aggressive_scheduled_block_id 28 +.nohwbrkpt +.noswbrkpt + 7064 0x12 0x4c 0x3d 0x48 VADD.f dm2, dm2, dm3, r2 +.aggressive_scheduled_block_id 28 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7068 0x1a 0x01 0x92 0xf8 VMOV bmll2, lfh0 + 7072 0x00 0x00 NOPX + 7074 0x00 0x00 NOPX +.aggressive_scheduled_block_id 29 +.aggressive_scheduled_block_id first + 7076 0x00 0x00 NOPX +.aggressive_scheduled_block_id 29 +.noswbrkpt + 7078 0x13 0xe2 0x41 0x48 VMUL.f dm3, x1, x2, r2 +.aggressive_scheduled_block_id 29 +.nohwbrkpt +.noswbrkpt + 7082 0x19 0x68 0x12 0xf8 VMOV lfl0, bmll2 +.aggressive_scheduled_block_id 29 +.nohwbrkpt +.noswbrkpt + 7086 0x12 0x4c 0x3d 0x48 VADD.f dm2, dm2, dm3, r2 +.aggressive_scheduled_block_id 29 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7090 0x1a 0x05 0x92 0xf8 VMOV bmll2, lfl0 + 7094 0x00 0x00 NOPX + 7096 0x00 0x00 NOPX + 7098 0x00 0x00 NOPX + 7100 0x00 0x00 NOPX +.aggressive_scheduled_block_id 30 +.aggressive_scheduled_block_id first + 7102 0x18 0x68 0x12 0xf8 VMOV lfh0, bmll2 +.aggressive_scheduled_block_id 30 +.noswbrkpt + 7106 0x12 0x4c 0x3d 0x48 VADD.f dm2, dm2, dm3, r2 +.aggressive_scheduled_block_id 30 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7110 0x1a 0x01 0x92 0xf8 VMOV bmll2, lfh0 + 7114 0x00 0x00 NOPX + 7116 0x00 0x00 NOPX + 7118 0x13 0xe0 0xc1 0x48 VMUL.f dm3, x0, x6, r2 + 7122 0x00 0x00 NOPX +.aggressive_scheduled_block_id 31 +.aggressive_scheduled_block_id first + 7124 0x19 0x68 0x12 0xf8 VMOV lfl0, bmll2 +.aggressive_scheduled_block_id 31 +.noswbrkpt + 7128 0x13 0x4c 0x3d 0x48 VADD.f dm3, dm2, dm3, r2 +.aggressive_scheduled_block_id 31 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7132 0x1a 0x05 0x92 0xf8 VMOV bmll2, lfl0 + 7136 0x00 0x00 NOPX + 7138 0x00 0x00 NOPX + 7140 0x11 0xe2 0x01 0x48 VMUL.f dm1, x1, x0, r2 + 7144 0x00 0x00 NOPX +.aggressive_scheduled_block_id 32 +.aggressive_scheduled_block_id first + 7146 0x18 0x6c 0x12 0xf8 VMOV lfh0, bmll3 +.aggressive_scheduled_block_id 32 +.noswbrkpt + 7150 0x11 0x44 0x3d 0x48 VADD.f dm1, dm2, dm1, r2 +.aggressive_scheduled_block_id 32 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7154 0x1a 0x01 0x92 0xf8 VMOV bmll2, lfh0 + 7158 0x00 0x00 NOPX + 7160 0x00 0x00 NOPX + 7162 0x00 0x00 NOPX +.aggressive_scheduled_block_id 33 +.aggressive_scheduled_block_id first + 7164 0x00 0x00 NOPX +.aggressive_scheduled_block_id 33 +.noswbrkpt + 7166 0x00 0x24 0x12 0xe6 0x10 0x40 0x3d 0x62 VMOV x0, bmll1; VADD.f dm0, dm2, dm0, r2 +.aggressive_scheduled_block_id 33 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7174 0x02 0x00 0x92 0xe6 0x14 0xea 0x01 0x62 VMOV bmll2, x0; VMUL.f dm4, x5, x0, r2 + 7182 0x00 0x00 NOPX + 7184 0x00 0x00 NOPX +.aggressive_scheduled_block_id 34 +.aggressive_scheduled_block_id first + 7186 0x00 0x00 NOPX +.aggressive_scheduled_block_id 34 +.noswbrkpt + 7188 0x10 0x50 0x3d 0x48 VADD.f dm0, dm2, dm4, r2 +.aggressive_scheduled_block_id 34 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7192 0x1a 0x00 0x12 0xf8 VMOV bmll2, bmll0 + 7196 0x00 0x00 NOPX + 7198 0x00 0x0c 0x78 0x00 0x00 0x84 J #6384 +.delay_slot + 7204 0x0f 0xfa 0x65 0x98 ST dc4, [sp, #-8] +.delay_slot +.swstall delay_slot + 7208 0x00 0x00 NOPX +.delay_slot + 7210 0x1a 0x00 0x12 0xf8 VMOV bmll2, bmll0 +.delay_slot +.swstall delay_slot + 7214 0x00 0x00 NOPX +.delay_slot + 7216 0x00 0x2c 0xf0 0x00 0x21 0x05 0x12 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST.CONV.bf16.fp32 bmll2, [p1];NOPX; NOPM; NOPV +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2384 + 7232 0x00 0x14 0x00 0x00 0x02 0xbe 0x00 0x00 0x20 0xba MOVA r20, #0; J #5616 +.delay_slot + 7242 0x10 0x2a 0x01 0x18 MOVX r21, #0 +.delay_slot +.swstall delay_slot + 7246 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7248 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7250 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7252 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2416 + 7264 0x00 0x0e 0x90 0x00 0x00 0x84 J #7456 +.delay_slot + 7270 0xff 0x93 0xb0 0x02 0x60 0xf0 0x70 0x02 ST p1, [sp, #-4]; MOV dc4, lr +.delay_slot +.swstall delay_slot + 7278 0x00 0x00 NOPX +.delay_slot + 7280 0x0f 0xf0 0x33 0x18 VST x0, [sp, #-256] +.delay_slot + 7284 0x0f 0xf5 0x33 0x18 VST x4, [sp, #-192] +.delay_slot + 7288 0xff 0x0e 0x60 0x00 0x01 0xa5 0x70 0x02 VST x1, [sp, #-128]; NOPM +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2448 + 7296 0x10 0x22 0x05 0x18 MOVX r17, #1 + 7300 0x14 0x62 0x67 0x98 EQ r17, r17, r6 + 7304 0x88 0x0e 0x90 0x40 0x01 0x84 JNZ r17, #7456 +.delay_slot +.swstall delay_slot + 7310 0x00 0x00 NOPX +.delay_slot + 7312 0x0f 0xf0 0x33 0x18 VST x0, [sp, #-256] +.delay_slot + 7316 0x0f 0xf5 0x33 0x18 VST x4, [sp, #-192] +.delay_slot + 7320 0x0f 0xf8 0x73 0x18 VST x1, [sp, #-128] +.delay_slot + 7324 0xff 0x93 0xb0 0x00 0x70 0x4a 0x60 0xf0 0x79 0x3a ST p1, [sp, #-4]; MOVX r7, #2; MOV dc4, lr + 7334 0x11 0xce 0x67 0x98 EQ r7, r7, r6 + 7338 0x38 0x0e 0x80 0x40 0x01 0x84 JNZ r7, #7424 +.delay_slot +.swstall delay_slot + 7344 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7346 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7348 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7350 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7352 0x00 0x00 NOPX + 7354 0x11 0x4e 0x67 0x98 EQ r7, r5, r6 + 7358 0x38 0x0e 0x70 0x40 0x01 0x84 JNZ r7, #7392 +.delay_slot + 7364 0x10 0x0a 0x41 0x18 MOVX r5, #16 +.delay_slot +.swstall delay_slot + 7368 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7370 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7372 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7374 0x00 0x00 NOPX + 7376 0x00 0x0c 0xd8 0x00 0x00 0x84 J #6576 +.delay_slot +.swstall delay_slot + 7382 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7384 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7386 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7388 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7390 0x00 0x00 NOPX +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2544 + 7392 0x20 0x31 0x00 0x00 0x02 0xb8 0x00 0x00 0x20 0xba MOVA r17, #257; J #5568 +.delay_slot + 7402 0x05 0x40 0x28 0x00 0x41 0x64 MOVX r21, #0; MOV m4, #16 +.delay_slot + 7408 0x10 0x28 0x01 0x18 MOVX r20, #0 +.delay_slot +.swstall delay_slot + 7412 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7414 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7416 0x00 0x2c 0xf0 0x04 0x00 0x00 0x1c 0x22 NOPA; NOPV +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2576 + 7424 0x00 0x0c 0xa8 0x00 0x00 0x84 J #6480 +.delay_slot + 7430 0x00 0x00 0xf8 0xbf 0xfe 0x44 MOVXM r17, #65535 +.delay_slot + 7436 0x1c 0x00 0x20 0xb8 MOV m4, #16 +.delay_slot +.swstall delay_slot + 7440 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7442 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7444 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.label __ll128__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E + 7456 0xfe 0x07 0x70 0x00 0x02 0xb8 0x00 0x00 0x20 0xba VLDA x0, [sp, #-256]; J #5568 +.delay_slot + 7466 0xfe 0xa7 0x70 0x00 0x00 0x8a 0x88 0x00 0x58 0xba VLDA x4, [sp, #-192]; MOVX r0, #4; MOV r20, #0 +.delay_slot + 7476 0xff 0x93 0x20 0x00 0x00 0x3e 0x0f 0xff 0x90 0xba LDA p1, [sp, #-4]; MOVXM r16, #65535 +.delay_slot + 7486 0x05 0x40 0x28 0x00 0x81 0x64 MOVX r21, #0; MOV m4, #32 +.delay_slot + 7492 0x11 0x22 0x05 0x18 MOVX r17, #257 +.delay_slot + 7496 0xff 0x0f 0x70 0x04 0x00 0x00 0x1c 0x22 VLDA x1, [sp, #-128]; NOPV +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2656 + 7504 0x00 0x0c 0xa8 0x00 0x00 0x84 J #6480 +.delay_slot + 7510 0x1c 0xc1 0xe0 0xf8 MOV dc4, lr +.delay_slot + 7514 0x00 0x00 0xf8 0xbf 0xfe 0x44 MOVXM r17, #65535 +.delay_slot + 7520 0x1c 0x00 0x20 0xb8 MOV m4, #16 +.delay_slot +.swstall delay_slot + 7524 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7526 0x00 0x00 NOPX +.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E__end +.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E___func_end0 + +.text_segment PM 7536 +.label __Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function_start + 7536 0x00 0x10 0x00 0x00 0x01 0xc4 PADDXM [sp], #128 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7542 0xfd 0xf3 0xb0 0x00 0x01 0xf3 0xb2 0x60 0x11 0x3a ST p7, [sp, #-20]; MOVXM p7, #509120 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7552 0xe0 0xc2 0xd7 0xe7 0x1d 0x82 0x0d 0x70 0x72 0xba LDA r16, [p7]; ST p6, [sp, #-28]; MOV r16, CORE_ID +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7562 0xff 0x2e 0xb0 0x21 0x04 0x81 0x68 0xf0 0x79 0x3a ST r11, [sp, #-8]; EXTEND.u8 r16, r16; MOV r11, lr +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7572 0xfe 0x3e 0xb8 0x47 0xf6 0x5c ST r15, [sp, #-16]; ADD r17, r16, #-2 + 7578 0x0f 0xe9 0xb5 0x98 ST r13, [sp, #-24] + 7582 0x00 0x00 NOPX + 7584 0x00 0x00 NOPX + 7586 0x00 0x00 NOPX + 7588 0x80 0x0f 0xf0 0x40 0x01 0x84 JNZ r16, #8160 +.delay_slot + 7594 0x0f 0xfd 0x95 0x98 ST r12, [sp, #-4] +.delay_slot + 7598 0x0f 0xf5 0xd5 0x98 ST r14, [sp, #-12] +.delay_slot + 7602 0x0f 0xe0 0x1d 0x98 ST p0, [sp, #-32] +.delay_slot + 7606 0x00 0x07 0xcc 0xc9 0x90 0x44 MOVXM p6, #509128 +.delay_slot + 7612 0x0e 0x06 0x31 0x98 ST r17, [p6] + 7616 0x00 0x31 0x07 0x88 0x8b 0x00 0x01 0xf1 0x32 0x76 0x10 0x76 MOVA r17, #1; MOVS p7, p2; MOVXM p2, #509164 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 7628 0x40 0xc6 0x30 0x00 0x01 0xf1 0x32 0x78 0x11 0x3a ST r17, [p2]; MOVXM p2, #509168 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 7638 0x40 0xc0 0xec 0xc5 0x81 0xd4 ST.s8 r16, [p2]; MOV p6, p1 +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 7644 0x00 0x05 0x08 0x00 0x01 0x04 JL #2576 +.delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7650 0x00 0x07 0xc0 0xc8 0x80 0x44 MOVXM p0, #508992 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7656 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7658 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7660 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot +.swstall delay_slot + 7664 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.return_address + 7680 0x00 0x11 0x00 0x00 0x01 0xf1 0x32 0x20 0x10 0xba MOVA r17, #0; MOVXM p2, #508992 + 7690 0x40 0xba 0xd0 0x00 0x01 0xf1 0x32 0x64 0x10 0xba LDA r14, [p2]; MOVXM p2, #509128 + 7700 0x40 0xca 0xd0 0x00 0x01 0xf1 0x32 0x22 0x10 0xba LDA r18, [p2]; MOVXM p2, #508996 + 7710 0x43 0xb6 0xd0 0x00 0x01 0xf1 0xb2 0x68 0x10 0xba LDA r13, [p2], #4; MOVXM p3, #509136 + 7720 0x42 0x85 0xd0 0x00 0x01 0xf0 0xb2 0x66 0x10 0xba LDA el0, [p2, #4]; MOVXM p1, #509132 + 7730 0x40 0xbe 0xd8 0x39 0x81 0xd4 LDA r15, [p2]; MOV r16, p6 + 7736 0x1a 0x68 0x14 0x18 ADD.NC p2, r16, #40 + 7740 0x00 0x07 0xcc 0xca 0x00 0x44 MOVXM p6, #509184 + 7746 0x00 0x07 0xc0 0xc9 0xd0 0x44 MOVXM p0, #509160 + 7752 0x13 0xa5 0x2f 0x98 MUL r18, r14, r18 + 7756 0x80 0x00 0x08 0x20 0x00 0x44 MOVXM r16, #-2147483648 + 7762 0x60 0x85 0x36 0xca 0x5f 0x5c ST el0, [p3]; MUL r18, r13, r18 + 7768 0x00 0x00 NOPX + 7770 0x13 0xe5 0x2f 0x98 MUL r18, r15, r18 + 7774 0x00 0x00 NOPX + 7776 0x09 0x06 0x51 0x98 ST r18, [p1] + 7780 0x02 0x4c 0x2e 0x98 LDA el0, [p2], #16 + 7784 0x0e 0x1e 0x31 0x98 ST r17, [p6], #4 + 7788 0x0e 0x1e 0x31 0x98 ST r17, [p6], #4 + 7792 0x0e 0x1e 0x31 0x98 ST r17, [p6], #4 + 7796 0x0e 0x1e 0x31 0x98 ST r17, [p6], #4 + 7800 0x0e 0x1e 0x31 0x98 ST r17, [p6], #4 + 7804 0x0e 0x1e 0x31 0x98 ST r17, [p6], #4 + 7808 0x08 0x04 0x29 0x98 ST el0, [p0] + 7812 0x0e 0x1e 0x31 0x98 ST r17, [p6], #4 + 7816 0x0e 0x1e 0x31 0x98 ST r17, [p6], #4 + 7820 0x0e 0x1e 0x31 0x98 ST r17, [p6], #4 + 7824 0x02 0xdc 0x36 0x98 LDA r1, [p2], #-12 + 7828 0x00 0x00 NOPX + 7830 0x00 0x00 NOPX + 7832 0x00 0x00 NOPX + 7834 0x00 0x00 NOPX + 7836 0x00 0x00 NOPX + 7838 0x00 0x00 NOPX + 7840 0x10 0x63 0x0b 0x98 GEU r17, r1, r16 + 7844 0x88 0x0f 0x78 0x40 0x01 0x84 JNZ r17, #7920 +.delay_slot + 7850 0x1b 0x1e 0xc0 0xf8 MOV r12, p7 +.delay_slot + 7854 0x0f 0xd9 0x1d 0x98 ST p2, [sp, #-40] +.delay_slot +.swstall delay_slot + 7858 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7860 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7862 0x00 0x00 NOPX +.no_stack_arguments + 7864 0x00 0x15 0x50 0x00 0x01 0x04 JL #10912 +.delay_slot + 7870 0x0f 0xdd 0x95 0x98 ST r12, [sp, #-36] +.delay_slot +.swstall delay_slot + 7874 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7876 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7878 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7880 0x00 0x2c 0xf0 0x04 0x00 0x00 0x1c 0x22 NOPA; NOPV +.return_address + 7888 0x00 0x0f 0x98 0x00 0x00 0x84 J #7984 +.delay_slot + 7894 0x00 0x07 0xce 0xc9 0xe0 0x44 MOVXM p7, #509168 +.delay_slot +.swstall delay_slot + 7900 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7902 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7904 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7906 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_384 +.no_stack_arguments + 7920 0x00 0x15 0x50 0x00 0x01 0x04 JL #10912 +.delay_slot +.swstall delay_slot + 7926 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7928 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7930 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7932 0x00 0x01 0x67 0x98 NOPA +.delay_slot + 7936 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x02 0x18 0x0c 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SUB r1, r1, r16; NOPM; NOPV +.return_address +.no_stack_arguments + 7952 0x00 0x18 0x40 0x00 0x01 0x04 JL #12416 +.delay_slot + 7958 0x18 0x50 0x20 0xf8 MOV r1, r0 +.delay_slot + 7962 0x00 0x07 0xce 0xc9 0xe0 0x44 MOVXM p7, #509168 +.delay_slot + 7968 0x4f 0x00 0x01 0x20 0x00 0x44 MOVXM r2, #1325400064 +.delay_slot + 7974 0x0f 0xdd 0x95 0x98 ST r12, [sp, #-36] +.delay_slot +.swstall delay_slot + 7978 0x00 0x2c 0xf0 0x00 0x20 0x3c NOPA; NOPB +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.return_address + 7984 0xe0 0xc0 0x50 0x02 0xd2 0x00 0x00 0x08 0xb8 0xba LDA.s8 r16, [p7]; MOVX vaddSign0, #1; VINSERT.32 x0, x0, #0, r0 + 7994 0xfb 0x40 0x80 0x01 0x80 0x08 0x00 0x49 0x78 0xba MOVA m0, #-38; MOVX r24, #0; VMOV bmll0, x0 + 8004 0xfb 0x23 0x20 0x00 0x01 0xf1 0xb2 0x6a 0x10 0xba LDA p2, [sp, #-40]; MOVXM p3, #509140 + 8014 0x00 0x07 0xc2 0xc9 0xb0 0x44 MOVXM p1, #509144 + 8020 0x00 0x07 0xce 0xc9 0xa0 0x44 MOVXM p7, #509136 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 8026 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.noswbrkpt + 8028 0x06 0x1e 0x17 0x18 ST.s16 r16, [p6], #2 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8032 0x14 0x3a 0x80 0x18 MOVX crRnd, r16 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8036 0x08 0x40 0x16 0x18 VCONV.bf16.fp32 wl0, bmll0 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8040 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8042 0x1c 0x01 0x01 0xb8 VEXTRACT.16 r16, x0, #0, vaddSign0 + 8046 0x00 0x00 NOPX + 8048 0x00 0x00 NOPX + 8050 0x06 0x0b 0x07 0x18 ST.s8 r24, [p6], m0 + 8054 0x00 0x00 NOPX + 8056 0x00 0x00 NOPX + 8058 0x00 0x00 NOPX + 8060 0x00 0x00 NOPX + 8062 0x00 0x00 NOPX + 8064 0x00 0x00 NOPX + 8066 0x0e 0x1d 0xd1 0x98 ST r14, [p6], #4 + 8070 0x0e 0x05 0xf1 0x98 ST r15, [p6] + 8074 0x0e 0x15 0xb1 0x98 ST r13, [p6, #4] + 8078 0x02 0x1c 0x2e 0x98 LDA el0, [p2], #4 + 8082 0x00 0x00 NOPX + 8084 0x00 0x00 NOPX + 8086 0x00 0x00 NOPX + 8088 0x00 0x00 NOPX + 8090 0x00 0x00 NOPX + 8092 0x00 0x00 NOPX + 8094 0x0b 0x04 0x29 0x98 ST el0, [p3] + 8098 0x02 0x04 0x2e 0x98 LDA el0, [p2] + 8102 0x00 0x00 NOPX + 8104 0x00 0x00 NOPX + 8106 0x00 0x00 NOPX + 8108 0x00 0x00 NOPX + 8110 0x00 0x00 NOPX + 8112 0x00 0x00 NOPX + 8114 0x09 0x04 0x29 0x98 ST el0, [p1] + 8118 0x02 0x14 0x2e 0x98 LDA el0, [p2, #4] + 8122 0x00 0x00 NOPX + 8124 0x00 0x0f 0xf8 0x00 0x00 0x84 J #8176 +.delay_slot + 8130 0x00 0x07 0xc0 0xc9 0xb8 0x44 MOVXM p0, #509148 +.delay_slot +.swstall delay_slot + 8136 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8138 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8140 0x00 0x01 0x67 0x98 NOPA +.delay_slot + 8144 0x00 0x2c 0xf0 0x00 0x20 0x04 0x29 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST el0, [p0]; NOPX; NOPM; NOPV +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_624 + 8160 0xfb 0xa3 0xb0 0x00 0x01 0xf3 0xb2 0x68 0x11 0x3a ST p2, [sp, #-36]; MOVXM p7, #509136 + 8170 0x00 0x2c 0xf6 0x29 0x81 0xd4 NOPA; MOV r12, p2 +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_640 + 8176 0xe0 0xc2 0xd0 0x44 0x0a 0x2c LDA r16, [p7]; MOVX r17, #1 + 8182 0x00 0x00 NOPX + 8184 0x00 0x00 NOPX + 8186 0x00 0x00 NOPX + 8188 0x00 0x00 NOPX + 8190 0x00 0x00 NOPX + 8192 0x00 0x00 NOPX + 8194 0x14 0x63 0x08 0x98 NE r17, r17, r16 + 8198 0x88 0x10 0x58 0x40 0x01 0x84 JNZ r17, #8368 +.delay_slot + 8204 0x1e 0x66 0x06 0x18 ADD.NC p6, r12, #12 +.delay_slot +.swstall delay_slot + 8208 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8210 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8212 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8214 0x00 0x00 NOPX + 8216 0x00 0x07 0xc4 0xc9 0x88 0x44 MOVXM p2, #509124 + 8222 0x40 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x30 0x10 0xba LDA r16, [p2]; MOVXM p2, #509024 + 8232 0x02 0x04 0x3a 0x98 LDA.u16 r1, [p2] + 8236 0x00 0x00 NOPX + 8238 0x00 0x00 NOPX +.no_stack_arguments + 8240 0x00 0x13 0xf8 0x00 0x01 0x04 JL #10224 +.delay_slot + 8246 0x10 0x1a 0x01 0x18 MOVX r13, #0 +.delay_slot +.swstall delay_slot + 8250 0x00 0x00 NOPX +.delay_slot + 8252 0x14 0x36 0xda 0x98 LT r27, r16, r13 +.delay_slot + 8256 0x6c 0x60 0x37 0xbb 0x41 0xe4 SUB r17, r13, r16; MOV r15, r27 +.delay_slot + 8262 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x04 0x01 0x12 0x7a NOPA; NOPS; SEL.EQZ r0, r16, r17, r27 +.return_address + 8272 0x6c 0x06 0x3d 0xaf 0x41 0xe4 SUB r16, r13, r3; MOV r27, r15 + 8278 0x10 0xe1 0x02 0x18 SEL.EQZ r16, r3, r16, r27 + 8282 0x80 0x10 0x50 0x40 0x01 0x84 JNZ r16, #8352 +.delay_slot +.swstall delay_slot + 8288 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8290 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8292 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8294 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8296 0x00 0x00 NOPX + 8298 0xfc 0x1f 0xa4 0xd9 0x81 0xe4 MOVX r16, #-1; MOV p2, p6 + 8304 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4 + 8308 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 + 8312 0x02 0xfe 0x56 0x98 LDA r18, [p2], #-4 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 8316 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.noswbrkpt + 8318 0x02 0x46 0x36 0x98 LDA r17, [p2, #16] +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 8322 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 8324 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 8326 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 8328 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 8330 0x14 0xa3 0x12 0x18 SEL.EQZ r17, r18, r17, r27 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8334 0x0a 0x06 0x31 0x98 ST r17, [p2] + 8338 0x00 0x00 NOPX + 8340 0x00 0x00 NOPX + 8342 0x00 0x00 NOPX + 8344 0x00 0x00 NOPX + 8346 0x00 0x2c 0xf8 0xa6 0x10 0x2c NOPA; ACQ r17, r16 +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_816 + 8352 0x00 0x00 NOPX + 8354 0x00 0x00 NOPX + 8356 0x00 0x00 NOPX + 8358 0xe0 0xc2 0xd0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba LDA r16, [p7]; NOPB; NOPM +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_832 + 8368 0x10 0x1c 0x09 0x18 MOVX r14, #2 + 8372 0x00 0x00 NOPX + 8374 0x00 0x00 NOPX + 8376 0x00 0x00 NOPX + 8378 0x00 0x00 NOPX + 8380 0x00 0x00 NOPX + 8382 0x13 0xa1 0x08 0x98 NE r16, r14, r16 + 8386 0x80 0x10 0xb0 0x40 0x01 0x84 JNZ r16, #8544 +.delay_slot +.swstall delay_slot + 8392 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8394 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8396 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8398 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8400 0x00 0x00 NOPX + 8402 0x00 0x07 0xc4 0xc9 0xc0 0x44 MOVXM p2, #509152 + 8408 0x40 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x30 0x10 0xba LDA r16, [p2]; MOVXM p2, #509024 + 8418 0x02 0x04 0x3a 0x98 LDA.u16 r1, [p2] + 8422 0x00 0x00 NOPX + 8424 0x00 0x00 NOPX +.no_stack_arguments + 8426 0x00 0x13 0xf8 0x00 0x01 0x04 JL #10224 +.delay_slot + 8432 0x10 0x1a 0x01 0x18 MOVX r13, #0 +.delay_slot +.swstall delay_slot + 8436 0x00 0x00 NOPX +.delay_slot + 8438 0x14 0x36 0xda 0x98 LT r27, r16, r13 +.delay_slot + 8442 0x6c 0x60 0x37 0xbb 0x41 0xe4 SUB r17, r13, r16; MOV r15, r27 +.delay_slot + 8448 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x20 0x08 0x90 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SEL.EQZ r0, r16, r17, r27; NOPM; NOPV +.return_address + 8464 0x6c 0x06 0x3d 0xaf 0x41 0xe4 SUB r16, r13, r3; MOV r27, r15 + 8470 0x10 0xe1 0x02 0x18 SEL.EQZ r16, r3, r16, r27 + 8474 0x80 0x10 0xb0 0x40 0x01 0x84 JNZ r16, #8544 +.delay_slot +.swstall delay_slot + 8480 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8482 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8484 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8486 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8488 0x00 0x00 NOPX + 8490 0xfc 0x1f 0xa4 0xd9 0x81 0xe4 MOVX r16, #-1; MOV p2, p6 + 8496 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4 + 8500 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 + 8504 0x02 0xfe 0x56 0x98 LDA r18, [p2], #-4 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 8508 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.noswbrkpt + 8510 0x02 0x46 0x36 0x98 LDA r17, [p2, #16] +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 8514 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 8516 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 8518 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 8520 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 8522 0x14 0xa3 0x12 0x18 SEL.EQZ r17, r18, r17, r27 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8526 0x0a 0x06 0x31 0x98 ST r17, [p2] + 8530 0x00 0x00 NOPX + 8532 0x00 0x00 NOPX + 8534 0x00 0x00 NOPX + 8536 0x00 0x00 NOPX + 8538 0x00 0x2c 0xf8 0xa6 0x10 0x2c NOPA; ACQ r17, r16 +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1008 + 8544 0x00 0x00 NOPX + 8546 0x00 0x00 NOPX + 8548 0x00 0x00 NOPX + 8550 0xe0 0xc2 0xd0 0x00 0x01 0xf3 0xb2 0x72 0x10 0xba LDA r16, [p7]; MOVXM p7, #509156 + 8560 0x00 0x00 NOPX + 8562 0x00 0x00 NOPX + 8564 0x00 0x00 NOPX + 8566 0x00 0x00 NOPX + 8568 0x00 0x00 NOPX + 8570 0x10 0x24 0x11 0x18 MOVX r18, #4 + 8574 0x14 0xa1 0x08 0x98 NE r16, r18, r16 + 8578 0x80 0x11 0x20 0x40 0x01 0x84 JNZ r16, #8768 +.delay_slot + 8584 0x00 0x07 0xc4 0xc8 0xc0 0x44 MOVXM p2, #509024 +.delay_slot +.swstall delay_slot + 8590 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8592 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8594 0x00 0x00 NOPX +.delay_slot + 8596 0x10 0x22 0x01 0x18 MOVX r17, #0 + 8600 0xe0 0xc2 0xd0 0x34 0x02 0x2c LDA r16, [p7]; MOVX r13, #0 + 8606 0x02 0x04 0x3a 0x98 LDA.u16 r1, [p2] + 8610 0x00 0x00 NOPX + 8612 0x00 0x00 NOPX +.no_stack_arguments + 8614 0x00 0x13 0xf8 0x00 0x01 0x04 JL #10224 +.delay_slot +.swstall delay_slot + 8620 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8622 0x00 0x00 NOPX +.delay_slot + 8624 0x14 0x37 0x1a 0x98 LT r27, r16, r17 +.delay_slot + 8628 0x8c 0x60 0x37 0xbb 0x41 0xe4 SUB r17, r17, r16; MOV r15, r27 +.delay_slot + 8634 0x00 0x2c 0xf8 0x02 0x24 0x2c NOPA; SEL.EQZ r0, r16, r17, r27 +.return_address + 8640 0x6c 0x06 0x3d 0xaf 0x41 0xe4 SUB r16, r13, r3; MOV r27, r15 + 8646 0x10 0xe1 0x02 0x18 SEL.EQZ r16, r3, r16, r27 + 8650 0x80 0x11 0x10 0x40 0x01 0x84 JNZ r16, #8736 +.delay_slot +.swstall delay_slot + 8656 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8658 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8660 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8662 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8664 0x00 0x00 NOPX + 8666 0xdf 0xee 0xd0 0x3f 0x17 0xea 0x08 0x01 0x58 0xba LDA r27, [p6], #-4; MOVX r17, #-1; MOV r16, #1 + 8676 0x06 0xfe 0x56 0x98 LDA r18, [p6], #-4 + 8680 0x06 0xfe 0x76 0x98 LDA r19, [p6], #-4 +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 8684 0x00 0x00 NOPX +.aggressive_scheduled_block_id 6 +.noswbrkpt + 8686 0x06 0x46 0x56 0x98 LDA r18, [p6, #16] +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 8690 0x00 0x00 NOPX +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 8692 0x00 0x00 NOPX +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 8694 0x00 0x00 NOPX +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 8696 0x00 0x00 NOPX +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 8698 0x14 0xe5 0x22 0x18 SEL.EQZ r18, r19, r18, r27 +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8702 0x0e 0x06 0x51 0x98 ST r18, [p6] + 8706 0x00 0x00 NOPX + 8708 0x00 0x00 NOPX + 8710 0x00 0x11 0x28 0x00 0x00 0x84 J #8784 +.delay_slot +.swstall delay_slot + 8716 0x00 0x00 NOPX +.delay_slot + 8718 0x14 0x93 0x18 0x18 ACQ r18, r17 +.delay_slot +.swstall delay_slot + 8722 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8724 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8726 0x00 0x2c 0xf0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba NOPA; NOPB; NOPM +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1200 + 8736 0x00 0x11 0x28 0x00 0x00 0x84 J #8784 +.delay_slot + 8742 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot +.swstall delay_slot + 8746 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8748 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8750 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8752 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1232 + 8768 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x01 0x00 0x28 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVX r16, #1; NOPM; NOPV +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1248 + 8784 0xfc 0x73 0x20 0x00 0x01 0xf3 0x32 0x66 0x10 0xba LDA p7, [sp, #-32]; MOVXM p6, #509132 + 8794 0xc0 0xd6 0xd0 0x00 0x01 0xf1 0x32 0x68 0x10 0xba LDA r21, [p6]; MOVXM p2, #509136 + 8804 0x40 0xc6 0xd0 0x00 0x01 0xf3 0x32 0x60 0x10 0xba LDA r17, [p2]; MOVXM p6, #509120 + 8814 0x06 0x06 0x96 0x98 LDA r20, [p6] + 8818 0x00 0x00 NOPX + 8820 0x00 0x00 NOPX + 8822 0x00 0x00 NOPX + 8824 0x07 0x06 0x76 0x98 LDA r19, [p7] + 8828 0x15 0x6b 0x0d 0x98 LSHL r21, r21, r16 + 8832 0x14 0x61 0x07 0x98 EQ r16, r17, r16 + 8836 0x80 0x12 0x08 0x40 0x01 0x84 JNZ r16, #9232 +.delay_slot + 8842 0x15 0x28 0x07 0x18 ADD r20, r20, #1 +.delay_slot + 8846 0x0e 0x06 0x91 0x98 ST r20, [p6] +.delay_slot +.swstall delay_slot + 8850 0x00 0x00 NOPX +.delay_slot + 8852 0x18 0x69 0xd5 0x58 ADD.NC p0, r19, r21 +.delay_slot + 8856 0xf7 0x83 0xb0 0x48 0x22 0x5c ST p0, [sp, #-68]; MOVX r18, #4 + 8862 0x14 0x61 0x27 0x98 EQ r16, r17, r18 + 8866 0x80 0x11 0xc0 0x40 0x01 0x84 JNZ r16, #9088 +.delay_slot +.swstall delay_slot + 8872 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8874 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8876 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8878 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8880 0x00 0x00 NOPX + 8882 0x14 0x60 0xe8 0x98 NE r16, r17, r14 + 8886 0x80 0x11 0xa8 0x40 0x01 0x84 JNZ r16, #9040 +.delay_slot + 8892 0x00 0x07 0xcc 0xc9 0xc0 0x44 MOVXM p6, #509152 +.delay_slot +.swstall delay_slot + 8898 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8900 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8902 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8904 0x00 0x00 NOPX + 8906 0xc0 0xca 0xd0 0x00 0x01 0xf3 0x32 0x24 0x10 0xba LDA r18, [p6]; MOVXM p6, #509000 + 8916 0xc0 0xc2 0xd0 0x00 0x01 0xf3 0x32 0x6a 0x10 0xba LDA r16, [p6]; MOVXM p6, #509140 + 8926 0xc0 0xc6 0xd0 0x60 0x02 0x2c LDA r17, [p6]; MOVX r24, #0 + 8932 0x00 0x00 NOPX + 8934 0x00 0x00 NOPX + 8936 0x00 0x00 NOPX + 8938 0x00 0x00 NOPX + 8940 0x00 0x00 NOPX + 8942 0x14 0xa5 0x0f 0x98 MUL r18, r18, r16 + 8946 0x00 0x00 NOPX + 8948 0x8c 0xe4 0x3a 0x32 0x82 0xa4 SUB r19, r17, r18; ADD.NC r20, r18, r16 + 8954 0x15 0x37 0x1c 0x98 LTU r27, r20, r17 + 8958 0x14 0xe7 0x02 0x18 SEL.EQZ r19, r19, r16, r27 + 8962 0x14 0xb7 0x1c 0x98 LTU r27, r18, r17 + 8966 0x16 0x23 0x32 0x18 SEL.EQZ r17, r24, r19, r27 + 8970 0x14 0x25 0x11 0x98 SUB r18, r16, r17 + 8974 0x14 0x61 0x08 0x98 NE r16, r17, r16 + 8978 0x80 0x12 0x40 0x40 0x01 0x84 JNZ r16, #9344 +.delay_slot + 8984 0x00 0x07 0xcc 0xca 0x20 0x44 MOVXM p6, #509200 +.delay_slot + 8990 0x0e 0x06 0x51 0x98 ST r18, [p6] +.delay_slot +.swstall delay_slot + 8994 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8996 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8998 0x00 0x00 NOPX + 9000 0x00 0x11 0xf8 0x00 0x00 0x84 J #9200 +.delay_slot + 9006 0x00 0x4e 0x00 0x00 0x01 0xf3 0xb2 0x68 0x10 0xba MOVA r14, #2; MOVXM p7, #509136 +.delay_slot + 9016 0x00 0x2f 0x00 0x00 0x01 0xf1 0x32 0x20 0x10 0xba MOVA r15, #1; MOVXM p2, #508992 +.delay_slot + 9026 0x10 0x1a 0x01 0x18 MOVX r13, #0 +.delay_slot +.swstall delay_slot + 9030 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9032 0x00 0x2c 0xf0 0x04 0x00 0x00 0x1c 0x22 NOPA; NOPV +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1504 + 9040 0x00 0x11 0xf8 0x00 0x00 0x84 J #9200 +.delay_slot + 9046 0x00 0x4e 0x00 0x00 0x01 0xf3 0xb2 0x68 0x10 0xba MOVA r14, #2; MOVXM p7, #509136 +.delay_slot + 9056 0x00 0x2f 0x00 0x00 0x01 0xf1 0x32 0x20 0x10 0xba MOVA r15, #1; MOVXM p2, #508992 +.delay_slot + 9066 0x10 0x1a 0x01 0x18 MOVX r13, #0 +.delay_slot +.swstall delay_slot + 9070 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9072 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1552 + 9088 0x00 0x0d 0x00 0x00 0x01 0xf3 0x32 0x72 0x10 0xba MOVA r13, #0; MOVXM p6, #509156 + 9098 0xc0 0xca 0xd0 0x00 0x01 0xf1 0x32 0x20 0x10 0xba LDA r18, [p6]; MOVXM p2, #508992 + 9108 0x40 0xc2 0xd0 0x00 0x01 0xf3 0x32 0x6c 0x10 0xba LDA r16, [p2]; MOVXM p6, #509144 + 9118 0xc0 0xc6 0xd0 0x3c 0x0a 0x2c LDA r17, [p6]; MOVX r15, #1 + 9124 0x00 0x00 NOPX + 9126 0x00 0x00 NOPX + 9128 0x00 0x00 NOPX + 9130 0x00 0x00 NOPX + 9132 0x00 0x00 NOPX + 9134 0x14 0xa5 0x0f 0x98 MUL r18, r18, r16 + 9138 0x00 0x00 NOPX + 9140 0x8c 0xe4 0x3a 0x32 0x82 0xa4 SUB r19, r17, r18; ADD.NC r20, r18, r16 + 9146 0x15 0x37 0x1c 0x98 LTU r27, r20, r17 + 9150 0x14 0xe7 0x02 0x18 SEL.EQZ r19, r19, r16, r27 + 9154 0x14 0xb7 0x1c 0x98 LTU r27, r18, r17 + 9158 0x13 0x63 0x32 0x18 SEL.EQZ r17, r13, r19, r27 + 9162 0x14 0x25 0x11 0x98 SUB r18, r16, r17 + 9166 0x14 0x61 0x08 0x98 NE r16, r17, r16 + 9170 0x80 0x12 0x40 0x40 0x01 0x84 JNZ r16, #9344 +.delay_slot + 9176 0x00 0x07 0xcc 0xca 0x30 0x44 MOVXM p6, #509208 +.delay_slot + 9182 0x0e 0x06 0x51 0x98 ST r18, [p6] +.delay_slot +.swstall delay_slot + 9186 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9188 0x00 0x00 NOPX +.delay_slot + 9190 0x00 0x2c 0xf0 0x00 0x01 0xf3 0xb2 0x68 0x10 0xba NOPA; MOVXM p7, #509136 +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1664 + 9200 0xd1 0x81 0x60 0x00 0x04 0x98 0x00 0x00 0x21 0x3a MOVS p6, r12; J #9408 +.delay_slot + 9210 0x07 0xe5 0x91 0x18 LDA r12, [sp, #-28] +.delay_slot +.swstall delay_slot + 9214 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9216 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9218 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9220 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1696 + 9232 0x00 0x07 0xcc 0xc9 0x88 0x44 MOVXM p6, #509124 + 9238 0xc0 0xce 0xd0 0x00 0x01 0xf3 0x32 0x22 0x10 0xba LDA r19, [p6]; MOVXM p6, #508996 + 9248 0xc0 0xc2 0xd0 0x00 0x01 0xf3 0x32 0x6e 0x10 0xba LDA r16, [p6]; MOVXM p6, #509148 + 9258 0x06 0x06 0x56 0x98 LDA r18, [p6] + 9262 0x00 0x00 NOPX + 9264 0x00 0x00 NOPX + 9266 0x00 0x00 NOPX + 9268 0x00 0x00 NOPX + 9270 0x00 0x00 NOPX + 9272 0x14 0xe7 0x0f 0x98 MUL r19, r19, r16 + 9276 0x00 0x00 NOPX + 9278 0x95 0x26 0x3a 0xb3 0x82 0xa4 SUB r20, r18, r19; ADD.NC r21, r19, r16 + 9284 0x15 0x77 0x2c 0x98 LTU r27, r21, r18 + 9288 0x15 0x29 0x02 0x18 SEL.EQZ r20, r20, r16, r27 + 9292 0x9e 0xe5 0x98 0xa0 0x01 0x64 LTU r27, r19, r18; MOV r17, #0 + 9298 0x14 0x63 0x42 0x18 SEL.EQZ r17, r17, r20, r27 + 9302 0x14 0x25 0x11 0x98 SUB r18, r16, r17 + 9306 0x14 0x61 0x07 0x98 EQ r16, r17, r16 + 9310 0x80 0x13 0xe0 0x40 0x01 0x84 JNZ r16, #10176 +.delay_slot + 9316 0x00 0x07 0xcc 0xca 0x40 0x44 MOVXM p6, #509216 +.delay_slot + 9322 0x0e 0x06 0x51 0x98 ST r18, [p6] +.delay_slot +.swstall delay_slot + 9326 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9328 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 9330 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV +.label __ll65__Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.aggressive_scheduled_block_id 7 +.noswbrkpt + 9344 0xf7 0x83 0x26 0x8c 0x0b 0x00 0xe0 0x49 0xe8 0x01 0x58 0x76 LDA p0, [sp, #-68]; MOVS p6, r12; MOVX r14, #2; MOV r15, #1 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 9356 0x07 0xbc 0x99 0x18 LDA p1, [sp, #-68] +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 9360 0x07 0xe5 0x91 0x18 LDA r12, [sp, #-28] +.aggressive_scheduled_block_id 7 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 9364 0x00 0x08 0x28 0x00 0x01 0x04 JL #4176 +.delay_slot +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9370 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.delay_slot + 9374 0x10 0x1a 0x01 0x18 MOVX r13, #0 +.delay_slot +.swstall delay_slot + 9378 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9380 0x00 0x00 NOPX +.delay_slot + 9382 0x00 0x2c 0xf0 0x00 0x01 0xf1 0x32 0x80 0x10 0xba NOPA; MOVXM p2, #509184 +.return_address + 9392 0x13 0x91 0x60 0x00 0x01 0xf1 0x32 0x20 0x11 0x3a MOVS p0, p7; MOVXM p2, #508992 + 9402 0x00 0x07 0xce 0xc9 0xa0 0x44 MOVXM p7, #509136 +.label __ll95__Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + 9408 0x06 0x5c 0x9e 0x98 LDA p1, [p6], #20 +.no_stack_arguments + 9412 0x00 0x09 0x78 0x00 0x01 0x04 JL #4848 +.delay_slot +.swstall delay_slot + 9418 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9420 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9422 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9424 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9426 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV +.return_address + 9440 0x07 0x06 0x16 0x98 LDA r16, [p7] + 9444 0x00 0x00 NOPX + 9446 0x00 0x00 NOPX + 9448 0x00 0x00 NOPX + 9450 0x00 0x00 NOPX + 9452 0x00 0x00 NOPX + 9454 0x00 0x00 NOPX + 9456 0x13 0xe3 0x08 0x98 NE r17, r15, r16 + 9460 0x88 0x12 0xe0 0x40 0x01 0x84 JNZ r17, #9664 +.delay_slot +.swstall delay_slot + 9466 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9468 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9470 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9472 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9474 0x00 0x00 NOPX + 9476 0x00 0x07 0xce 0xc9 0x88 0x44 MOVXM p7, #509124 + 9482 0xe0 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x30 0x10 0xba LDA r16, [p7]; MOVXM p2, #509024 + 9492 0x02 0x04 0x3a 0x98 LDA.u16 r1, [p2] + 9496 0x00 0x00 NOPX + 9498 0x00 0x00 NOPX + 9500 0x00 0x00 NOPX +.no_stack_arguments + 9502 0x00 0x13 0xf8 0x00 0x01 0x04 JL #10224 +.delay_slot +.swstall delay_slot + 9508 0x00 0x00 NOPX +.delay_slot + 9510 0x14 0x20 0x07 0x18 ADD r16, r16, #1 +.delay_slot + 9514 0xe0 0xc2 0x38 0x6d 0xb5 0x5c ST r16, [p7]; LT r27, r16, r13 +.delay_slot + 9520 0x6c 0x60 0x37 0xbb 0x41 0xe4 SUB r17, r13, r16; MOV r15, r27 +.delay_slot + 9526 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x04 0x01 0x12 0x7a NOPA; NOPS; SEL.EQZ r0, r16, r17, r27 +.return_address + 9536 0xfb 0xa3 0x20 0x1b 0x01 0x8f 0x6b 0xd0 0x78 0xba LDA p2, [sp, #-36]; SUB r16, r13, r3; MOV r27, r15 + 9546 0x10 0xe1 0x02 0x18 SEL.EQZ r16, r3, r16, r27 + 9550 0x80 0x12 0xd0 0x40 0x01 0x84 JNZ r16, #9632 +.delay_slot + 9556 0x10 0x1e 0x05 0x18 MOVX r15, #1 +.delay_slot +.swstall delay_slot + 9560 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9562 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9564 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9566 0x00 0x00 NOPX + 9568 0x4a 0xc2 0xde 0x0b 0x63 0x0c LDA r16, [p2, #20]; ST r13, [p7] + 9574 0x00 0x00 NOPX + 9576 0x00 0x00 NOPX + 9578 0x00 0x00 NOPX + 9580 0x00 0x00 NOPX + 9582 0x00 0x00 NOPX + 9584 0x00 0x00 NOPX + 9586 0x14 0x10 0xf8 0x18 REL r16, r15 + 9590 0xdc 0xc2 0xd0 0x00 0x01 0xf3 0xb2 0x68 0x10 0xba LDA r16, [p6, #-8]; MOVXM p7, #509136 + 9600 0x00 0x00 NOPX + 9602 0x00 0x00 NOPX + 9604 0x00 0x12 0xd8 0x00 0x00 0x84 J #9648 +.delay_slot +.swstall delay_slot + 9610 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9612 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9614 0x00 0x00 NOPX +.delay_slot + 9616 0x13 0xe1 0x01 0x98 SUB r16, r15, r16 +.delay_slot + 9620 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x03 0x73 0x08 0xc1 0x36 NOPA; NOPB; ST r16, [p6, #-8]; NOPX +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2096 + 9632 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x01 0xf3 0xb2 0x68 0x10 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVXM p7, #509136; NOPV +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2112 + 9648 0xe0 0xc2 0xd0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 LDA r16, [p7]; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2128 + 9664 0x10 0x22 0x01 0x18 MOVX r17, #0 + 9668 0x00 0x00 NOPX + 9670 0x00 0x00 NOPX + 9672 0x00 0x00 NOPX + 9674 0x00 0x00 NOPX + 9676 0x00 0x00 NOPX + 9678 0x13 0xa1 0x08 0x98 NE r16, r14, r16 + 9682 0x80 0x13 0x48 0x40 0x01 0x84 JNZ r16, #9872 +.delay_slot + 9688 0x00 0x07 0xce 0xc9 0xc0 0x44 MOVXM p7, #509152 +.delay_slot + 9694 0x00 0x07 0xc4 0xc8 0xc0 0x44 MOVXM p2, #509024 +.delay_slot +.swstall delay_slot + 9700 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9702 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9704 0x00 0x00 NOPX + 9706 0x07 0x06 0x16 0x98 LDA r16, [p7] + 9710 0x02 0x04 0x3a 0x98 LDA.u16 r1, [p2] + 9714 0x00 0x00 NOPX + 9716 0x00 0x00 NOPX + 9718 0x00 0x00 NOPX +.no_stack_arguments + 9720 0x00 0x13 0xf8 0x00 0x01 0x04 JL #10224 +.delay_slot +.swstall delay_slot + 9726 0x00 0x00 NOPX +.delay_slot + 9728 0x14 0x20 0x07 0x18 ADD r16, r16, #1 +.delay_slot + 9732 0xe0 0xc2 0x38 0x6e 0x35 0x5c ST r16, [p7]; LT r27, r16, r17 +.delay_slot + 9738 0x8c 0x60 0x37 0x3b 0x41 0xe4 SUB r17, r17, r16; MOV r14, r27 +.delay_slot + 9744 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x20 0x08 0x90 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SEL.EQZ r0, r16, r17, r27; NOPM; NOPV +.return_address + 9760 0xfb 0x93 0x20 0x1b 0x01 0x8f 0x6b 0x90 0x78 0xba LDA p1, [sp, #-36]; SUB r16, r13, r3; MOV r27, r14 + 9770 0x10 0xe1 0x02 0x18 SEL.EQZ r16, r3, r16, r27 + 9774 0x80 0x13 0x38 0x40 0x01 0x84 JNZ r16, #9840 +.delay_slot + 9780 0x00 0x07 0xc4 0xc9 0xa0 0x44 MOVXM p2, #509136 +.delay_slot +.swstall delay_slot + 9786 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9788 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9790 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9792 0x00 0x00 NOPX + 9794 0x2a 0xc2 0xde 0x0b 0x63 0x0c LDA r16, [p1, #20]; ST r13, [p7] + 9800 0x00 0x00 NOPX + 9802 0x00 0x00 NOPX + 9804 0x00 0x00 NOPX + 9806 0x00 0x00 NOPX + 9808 0x00 0x00 NOPX + 9810 0x00 0x00 NOPX + 9812 0x14 0x10 0xf8 0x18 REL r16, r15 + 9816 0x06 0xe6 0x16 0x98 LDA r16, [p6, #-8] + 9820 0x00 0x00 NOPX + 9822 0x00 0x00 NOPX + 9824 0x00 0x00 NOPX + 9826 0x00 0x00 NOPX + 9828 0x00 0x00 NOPX + 9830 0x00 0x00 NOPX + 9832 0x13 0xe1 0x01 0x98 SUB r16, r15, r16 + 9836 0x0e 0xe6 0x11 0x98 ST r16, [p6, #-8] +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2304 + 9840 0x00 0x13 0x50 0x00 0x00 0x84 J #9888 +.delay_slot + 9846 0x1f 0x62 0xc0 0xf8 MOV p7, p1 +.delay_slot +.swstall delay_slot + 9850 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9852 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9854 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9856 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2336 + 9872 0xfb 0xf3 0x20 0x00 0x20 0x01 0x5b 0x00 0x01 0xf1 0x32 0x68 0x10 0x00 0x00 0xe1 LDA p7, [sp, #-36]; NOPB; NOPS; MOVXM p2, #509136; NOPV +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2352 + 9888 0x40 0xc2 0xd0 0x44 0x22 0x2c LDA r16, [p2]; MOVX r17, #4 + 9894 0x00 0x00 NOPX + 9896 0x00 0x00 NOPX + 9898 0x00 0x00 NOPX + 9900 0x00 0x00 NOPX + 9902 0x00 0x00 NOPX + 9904 0x00 0x00 NOPX + 9906 0x14 0x61 0x08 0x98 NE r16, r17, r16 + 9910 0x80 0x13 0xa8 0x40 0x01 0x84 JNZ r16, #10064 +.delay_slot + 9916 0x00 0x07 0xc4 0xc9 0xc8 0x44 MOVXM p2, #509156 +.delay_slot +.swstall delay_slot + 9922 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9924 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9926 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9928 0x00 0x00 NOPX + 9930 0x40 0xc2 0xd0 0x00 0x01 0xf0 0xb2 0x30 0x10 0xba LDA r16, [p2]; MOVXM p1, #509024 + 9940 0x01 0x04 0x3a 0x98 LDA.u16 r1, [p1] + 9944 0x00 0x00 NOPX + 9946 0x00 0x00 NOPX + 9948 0x00 0x00 NOPX +.no_stack_arguments + 9950 0x00 0x13 0xf8 0x00 0x01 0x04 JL #10224 +.delay_slot +.swstall delay_slot + 9956 0x00 0x00 NOPX +.delay_slot + 9958 0x14 0x20 0x07 0x18 ADD r16, r16, #1 +.delay_slot + 9962 0x40 0xc2 0x38 0x6d 0xb5 0x5c ST r16, [p2]; LT r27, r16, r13 +.delay_slot + 9968 0x6c 0x60 0x37 0x3b 0x41 0xe4 SUB r17, r13, r16; MOV r14, r27 +.delay_slot + 9974 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x04 0x01 0x12 0x7a NOPA; NOPS; SEL.EQZ r0, r16, r17, r27 +.return_address + 9984 0x6c 0x06 0x3d 0xae 0x41 0xe4 SUB r16, r13, r3; MOV r27, r14 + 9990 0x10 0xe1 0x02 0x18 SEL.EQZ r16, r3, r16, r27 + 9994 0x80 0x13 0xa8 0x40 0x01 0x84 JNZ r16, #10064 +.delay_slot + 10000 0x00 0x07 0xc4 0xc9 0xc8 0x44 MOVXM p2, #509156 +.delay_slot +.swstall delay_slot + 10006 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10008 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10010 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10012 0x00 0x00 NOPX + 10014 0xea 0xc2 0xd4 0x0b 0x63 0x0c LDA r16, [p7, #20]; ST r13, [p2] + 10020 0x00 0x00 NOPX + 10022 0x00 0x00 NOPX + 10024 0x00 0x00 NOPX + 10026 0x00 0x00 NOPX + 10028 0x00 0x00 NOPX + 10030 0x00 0x00 NOPX + 10032 0x14 0x10 0xf8 0x18 REL r16, r15 + 10036 0x06 0xe6 0x16 0x98 LDA r16, [p6, #-8] + 10040 0x00 0x00 NOPX + 10042 0x00 0x00 NOPX + 10044 0x00 0x00 NOPX + 10046 0x00 0x00 NOPX + 10048 0x00 0x00 NOPX + 10050 0x00 0x00 NOPX + 10052 0x13 0xe1 0x01 0x98 SUB r16, r15, r16 + 10056 0xdc 0xc2 0x30 0x00 0x01 0xa5 0x70 0x02 ST r16, [p6, #-8]; NOPM +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2528 + 10064 0x00 0x07 0xcc 0xc9 0x80 0x44 MOVXM p6, #509120 + 10070 0xc0 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x74 0x10 0xba LDA r16, [p6]; MOVXM p2, #509160 + 10080 0x02 0x06 0x36 0x98 LDA r17, [p2] + 10084 0x00 0x00 NOPX + 10086 0x00 0x00 NOPX + 10088 0x00 0x00 NOPX + 10090 0x00 0x00 NOPX + 10092 0x00 0x00 NOPX + 10094 0x00 0x00 NOPX + 10096 0x14 0x61 0x08 0x98 NE r16, r17, r16 + 10100 0x80 0x13 0xc8 0x40 0x01 0x84 JNZ r16, #10128 +.delay_slot + 10106 0x07 0xef 0x99 0x18 LDA p7, [sp, #-20] +.delay_slot + 10110 0x07 0xf1 0xf1 0x18 LDA r15, [sp, #-16] +.delay_slot + 10114 0x07 0xf5 0xd1 0x18 LDA r14, [sp, #-12] +.delay_slot +.swstall delay_slot + 10118 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10120 0x00 0x00 NOPX + 10122 0x00 0x2c 0xfc 0x0b 0x63 0x0c NOPA; ST r13, [p6] +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2592 +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 10128 0xff 0x2e 0x2e 0xeb 0x41 0xd4 LDA r11, [sp, #-8]; MOV lr, r11 +.aggressive_scheduled_block_id 8 +.noswbrkpt + 10134 0x07 0xfd 0x91 0x18 LDA r12, [sp, #-4] +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 10138 0x07 0xe9 0xb1 0x18 LDA r13, [sp, #-24] +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 10142 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10146 0x1e 0x66 0x20 0xf8 MOV p6, r12 +.delay_slot + 10150 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128 +.delay_slot +.swstall delay_slot + 10156 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10158 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10160 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2640 + 10176 0x00 0x0d 0x06 0x8c 0x0b 0x00 0x04 0x98 0x00 0x00 0x20 0x76 MOVA r13, #0; MOVS p6, r12; J #9408 +.delay_slot + 10188 0x03 0xc0 0xa7 0x20 0x09 0x64 MOVX r15, #1; MOV r14, #2 +.delay_slot + 10194 0x00 0x07 0xc4 0xc8 0x80 0x44 MOVXM p2, #508992 +.delay_slot + 10200 0x00 0x07 0xce 0xc9 0xa0 0x44 MOVXM p7, #509136 +.delay_slot + 10206 0x07 0xe5 0x91 0x18 LDA r12, [sp, #-28] +.delay_slot +.swstall delay_slot + 10210 0x00 0x00 NOPX +.label _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + +.text_segment PM 10224 +.label _ZN12me_primitive10udiv_dstepEjjRjS0_ +.function_start + 10224 0x00 0xc0 0x2f 0xa0 0x41 0xe4 MOVX r3, #0; MOV r31, r0 + 10230 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10234 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10238 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10242 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10246 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10250 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10254 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10258 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10262 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10266 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10270 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10274 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10278 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10282 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10286 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10290 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10294 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10298 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10302 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10306 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10310 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10314 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10318 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10322 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10326 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10330 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10334 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10338 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10342 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 10346 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 10350 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 10354 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 10358 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 10362 0x18 0x9f 0xa0 0xf8 MOV r2, r31 +.label _ZN12me_primitive10udiv_dstepEjjRjS0___end + +.text_segment PM 10368 +.label _ZL19propagateFloat32NaNjj +.function_start + 10368 0xfd 0x43 0x00 0x3f 0xc0 0x02 0x48 0x00 0x10 0xba MOVA r3, #-22; MOVXM r18, #-16777216 + 10378 0x3f 0xe7 0x00 0x00 0x10 0x00 0x08 0x00 0x10 0xba MOVA r7, #511; MOVXM r0, #4194304 + 10388 0x00 0x30 0x00 0x02 0x40 0x2c 0xa9 0xfe 0x58 0xba MOVA r16, #1; OR r4, r1, r0; MOV r5, #510 + 10398 0x10 0x80 0x05 0x98 OR r0, r2, r0 + 10402 0x10 0x4c 0x3d 0x98 LSHL r6, r1, r3 + 10406 0x10 0x86 0x3d 0x98 LSHL r3, r2, r3 + 10410 0x11 0xc6 0x34 0x98 AND r3, r7, r3 + 10414 0x11 0xcc 0x64 0x98 AND r6, r7, r6 + 10418 0x11 0x4c 0x67 0x98 EQ r6, r5, r6 + 10422 0x10 0xa3 0x0d 0x98 LSHL r17, r2, r16 + 10426 0x14 0xb7 0x1c 0x98 LTU r27, r18, r17 + 10430 0x11 0x22 0x02 0x18 SEL.EQZ r17, r4, r0, r27 + 10434 0x00 0x3f 0xf8 0x3f 0xfe 0x44 MOVXM r16, #4194303 + 10440 0x10 0x85 0x04 0x98 AND r2, r2, r16 + 10444 0x10 0x84 0xf0 0x18 NEZ r2, r2 + 10448 0x10 0x43 0x04 0x98 AND r1, r1, r16 + 10452 0x10 0x42 0xf0 0x18 NEZ r1, r1 + 10456 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 10460 0x10 0x76 0x64 0x98 AND r27, r1, r6 +.delay_slot + 10464 0x10 0xc2 0x57 0x98 EQ r1, r3, r5 +.delay_slot + 10468 0x14 0x46 0x42 0x18 SEL.EQZ r3, r17, r4, r27 +.delay_slot + 10472 0x10 0x76 0x24 0x98 AND r27, r1, r2 +.delay_slot + 10476 0x10 0xc0 0x02 0x18 SEL.EQZ r0, r3, r0, r27 +.label _ZL19propagateFloat32NaNjj__end +.label _ZL19roundAndPackFloat32iij +.function_start + 10480 0x08 0x00 0x00 0x00 0x01 0xf0 0x32 0x7a 0x10 0xba MOVA r0, #64; MOVXM p0, #509172 + 10490 0x00 0x92 0xd0 0x99 0xfa 0x2c LDA r4, [p0]; MOVX r6, #127 +.swstall __RAW__R_1948 + 10496 0x00 0x00 NOPX +.swstall __RAW__R_1948 + 10498 0x00 0x00 NOPX +.swstall __RAW__R_1948 + 10500 0x00 0x00 NOPX +.swstall __RAW__R_1948 + 10502 0x00 0x00 NOPX +.swstall __RAW__R_1948 + 10504 0x00 0x00 NOPX +.swstall __RAW__R_1948 + 10506 0x00 0x00 NOPX + 10508 0x20 0x14 0xa8 0x00 0x01 0x84 JZ r4, #10576 +.delay_slot + 10514 0x10 0x4a 0x01 0x18 MOVX r5, #64 +.delay_slot +.swstall delay_slot + 10518 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10520 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10522 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10524 0x00 0x00 NOPX + 10526 0x00 0x70 0x00 0x00 0x70 0x4b 0x08 0x00 0x58 0xba MOVA r16, #3; MOVX r7, #2; MOV r24, #0 + 10536 0x3e 0xc8 0xf2 0xa0 0x05 0x64 EQ r27, r7, r4; MOV r5, #1 + 10542 0x11 0x8f 0x82 0x18 SEL.EQZ r7, r6, r24, r27 + 10546 0x11 0x37 0x07 0x98 EQ r27, r4, r16 + 10550 0x34 0x30 0x4d 0xa1 0x41 0xe4 SEL.EQZ r16, r6, r24, r27; MOV r27, r1 + 10556 0x14 0x0e 0x72 0x18 SEL.EQZ r7, r16, r7, r27 + 10560 0x11 0x76 0x47 0x98 EQ r27, r5, r4 + 10564 0x00 0x2c 0xf0 0x00 0x20 0x0e 0x5c 0x10 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; SEL.EQZ r5, r7, r24, r27 +.label TGT_F_ZL19roundAndPackFloat32iij_96 + 10576 0x14 0x96 0x08 0x23 0xf5 0x64 EXTEND.u16 r18, r2; MOV r16, #253 + 10582 0x14 0xa5 0x0a 0x98 LT r18, r18, r16 + 10586 0x90 0x15 0x08 0x40 0x01 0x84 JNZ r18, #10768 +.delay_slot + 10592 0x10 0xe2 0x64 0x98 AND r17, r3, r6 +.delay_slot + 10596 0x10 0x0e 0x7d 0x18 MOVX r7, #31 +.delay_slot + 10600 0x10 0x42 0x7d 0x98 LSHL r1, r1, r7 +.delay_slot +.swstall delay_slot + 10604 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10606 0x00 0x00 NOPX + 10608 0x00 0x12 0x00 0x05 0x38 0x3e 0x88 0xca 0xa8 0xba MOVA r18, #0; EQ r19, r2, r16; ADD.NC r20, r3, r5 + 10618 0x15 0x29 0x2a 0x98 LT r20, r20, r18 + 10622 0x14 0x20 0x2a 0x98 LT r16, r16, r2 + 10626 0x14 0xe7 0x44 0x98 AND r19, r19, r20 + 10630 0x14 0xe7 0x05 0x98 OR r19, r19, r16 + 10634 0x98 0x15 0x30 0x40 0x01 0x84 JNZ r19, #10848 +.delay_slot + 10640 0x10 0x20 0x01 0x18 MOVX r16, #0 +.delay_slot +.swstall delay_slot + 10644 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10646 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10648 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10650 0x00 0x00 NOPX + 10652 0x10 0xa7 0x09 0x98 GE r19, r2, r16 + 10656 0x98 0x15 0x10 0x40 0x01 0x84 JNZ r19, #10784 +.delay_slot +.swstall delay_slot + 10662 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10664 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10666 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10668 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10670 0x00 0x00 NOPX + 10672 0x14 0x04 0x21 0x98 SUB r2, r16, r2 + 10676 0x10 0x14 0xf8 0x00 0x01 0x84 JZ r2, #10736 +.delay_slot +.swstall delay_slot + 10682 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10684 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10686 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10688 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10690 0x00 0x00 NOPX + 10692 0x84 0x44 0x39 0xa0 0x81 0x64 SUB r17, r16, r2; MOV r19, #32 + 10698 0x11 0xcf 0x14 0x98 AND r7, r7, r17 + 10702 0x10 0xce 0x7d 0x98 LSHL r7, r3, r7 + 10706 0x10 0xe3 0x1d 0x98 LSHL r17, r3, r17 + 10710 0x10 0xb7 0x3a 0x98 LT r27, r2, r19 + 10714 0x11 0xce 0xf0 0x18 NEZ r7, r7 + 10718 0x10 0xc6 0xf0 0x18 NEZ r3, r3 + 10722 0x11 0xc5 0x15 0x98 OR r2, r7, r17 + 10726 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x00 0xc6 0x22 0x7a NOPA; NOPS; SEL.EQZ r3, r3, r2, r27 +.label TGT_F_ZL19roundAndPackFloat32iij_256 + 10736 0x00 0x15 0x10 0x00 0x00 0x84 J #10784 +.delay_slot + 10742 0x10 0xe2 0x64 0x98 AND r17, r3, r6 +.delay_slot + 10746 0x10 0x04 0x01 0x18 MOVX r2, #0 +.delay_slot +.swstall delay_slot + 10750 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10752 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10754 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV +.label TGT_F_ZL19roundAndPackFloat32iij_288 + 10768 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x01 0x00 0x08 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVX r16, #0; NOPM; NOPV +.label TGT_F_ZL19roundAndPackFloat32iij_304 + 10784 0xff 0x20 0x00 0x22 0x30 0x34 0xa8 0xca 0xa8 0xba MOVA r0, #-7; XOR r3, r17, r0; ADD.NC r5, r3, r5 + 10794 0x02 0xe3 0x00 0x06 0x62 0x2c 0x8f 0xff 0x58 0xba MOVA r3, #23; OR r6, r3, r4; MOV r4, #-1 + 10804 0x11 0x8c 0xd0 0x18 EQZ r6, r6 + 10808 0x11 0x40 0x0d 0x98 LSHL r0, r5, r0 + 10812 0x11 0x88 0x46 0x98 XOR r4, r6, r4 + 10816 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 10820 0x11 0x36 0x04 0x98 AND r27, r4, r0 +.delay_slot + 10824 0x14 0x04 0x22 0x18 SEL.EQZ r2, r16, r2, r27 +.delay_slot + 10828 0x10 0x84 0x3d 0x98 LSHL r2, r2, r3 +.delay_slot + 10832 0x10 0x44 0x20 0x98 ADD r2, r1, r2 +.delay_slot + 10836 0x00 0x2c 0xf0 0x00 0x20 0x36 0x01 0x04 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; ADD r0, r27, r2 +.label TGT_F_ZL19roundAndPackFloat32iij_368 + 10848 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 10852 0x7f 0x80 0x01 0x20 0x00 0x44 MOVXM r2, #2139095040 +.delay_slot + 10858 0x10 0x46 0x20 0x98 ADD r3, r1, r2 +.delay_slot + 10862 0x11 0x44 0xd0 0x18 EQZ r2, r5 +.delay_slot + 10866 0x10 0xc0 0x21 0x98 SUB r0, r3, r2 +.delay_slot +.swstall delay_slot + 10870 0x00 0x00 NOPX +.label _ZL19roundAndPackFloat32iij__end + +.text_segment PM 10880 +.label _ZL28normalizeRoundAndPackFloat32iij +.tail_call +.function_start + 10880 0x00 0x14 0x78 0x00 0x00 0x84 J #10480 +.delay_slot + 10886 0x10 0xe0 0x30 0x18 CLZ r16, r3 +.delay_slot + 10890 0x14 0x21 0xff 0x18 ADD r16, r16, #-1 +.delay_slot + 10894 0x10 0x85 0x01 0x98 SUB r2, r2, r16 +.delay_slot + 10898 0x10 0xc7 0x0d 0x98 LSHL r3, r3, r16 +.delay_slot +.swstall delay_slot + 10902 0x00 0x00 NOPX +.label _ZL28normalizeRoundAndPackFloat32iij__end + +.text_segment PM 10912 +.label int32_to_float32 +.function_start + 10912 0x08 0x15 0x78 0x00 0x01 0x84 JZ r1, #10992 +.delay_slot +.swstall delay_slot + 10918 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10920 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10922 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10924 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10926 0x00 0x00 NOPX + 10928 0x80 0x00 0x08 0x20 0x00 0x44 MOVXM r16, #-2147483648 + 10934 0x10 0x61 0x07 0x98 EQ r16, r1, r16 + 10938 0x80 0x15 0x80 0x40 0x01 0x84 JNZ r16, #11008 +.delay_slot +.swstall delay_slot + 10944 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10946 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10948 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10950 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10952 0x00 0x00 NOPX +.tail_call + 10954 0x13 0x82 0x00 0x00 0x05 0x50 0x00 0x00 0x20 0xba MOVA r2, #156; J #10880 +.delay_slot + 10964 0x10 0x47 0x10 0x18 ABS r3, r1 +.delay_slot + 10968 0x10 0x20 0x01 0x18 MOVX r16, #0 +.delay_slot + 10972 0x10 0x43 0x0a 0x98 LT r1, r1, r16 +.delay_slot +.swstall delay_slot + 10976 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10978 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV +.label TGT_Fint32_to_float32_80 +.return_address + 10992 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 10996 0x10 0x00 0x01 0x18 MOVX r0, #0 +.delay_slot +.swstall delay_slot + 11000 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11002 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11004 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11006 0x00 0x00 NOPX +.label TGT_Fint32_to_float32_96 + 11008 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 11012 0xcf 0x00 0x00 0x20 0x00 0x44 MOVXM r0, #-822083584 +.delay_slot +.swstall delay_slot + 11018 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11020 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11022 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11024 0x00 0x00 NOPX +.label int32_to_float32__end + +.text_segment PM 11040 +.label _ZL14addFloat32Sigsjji +.function_start + 11040 0xfd 0x32 0x00 0x00 0x1f 0xfe 0x0f 0xff 0x90 0xba MOVA r18, #-23; MOVXM r16, #8388607 + 11050 0x10 0x63 0x2d 0x98 LSHL r17, r1, r18 + 11054 0x10 0x89 0x2d 0x98 LSHL r4, r2, r18 + 11058 0x14 0x76 0x90 0x18 EXTEND.u8 r27, r17 + 11062 0x11 0x32 0x90 0x18 EXTEND.u8 r25, r4 + 11066 0xdc 0x72 0x3c 0x20 0x01 0x64 SUB r17, r27, r25; MOV r24, #0 + 11072 0x16 0x09 0x1a 0x98 LT r4, r24, r17 + 11076 0x20 0x15 0xf8 0x40 0x01 0x84 JNZ r4, #11248 +.delay_slot + 11082 0x10 0x67 0x04 0x98 AND r19, r1, r16 +.delay_slot + 11086 0x14 0x20 0x90 0x20 0x19 0x64 AND r16, r2, r16; MOV r0, #6 +.delay_slot + 11092 0x14 0xe6 0x0d 0x98 LSHL r19, r19, r0 +.delay_slot + 11096 0x84 0x01 0xba 0x23 0xfd 0x64 LSHL r16, r16, r0; MOV r20, #255 +.delay_slot + 11102 0xd8 0x28 0xf9 0x20 0x7d 0x64 EQ r0, r27, r20; MOV r18, #31 + 11108 0x14 0x4b 0x89 0x98 GE r5, r17, r24 + 11112 0x28 0x16 0x58 0x40 0x01 0x84 JNZ r5, #11440 +.delay_slot + 11118 0x10 0xc9 0x2d 0x98 LSHL r4, r3, r18 +.delay_slot +.swstall delay_slot + 11122 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11124 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11126 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11128 0x00 0x00 NOPX + 11130 0x16 0x69 0x47 0x98 EQ r20, r25, r20 + 11134 0xa0 0x16 0x40 0x40 0x01 0x84 JNZ r20, #11392 +.delay_slot +.swstall delay_slot + 11140 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11142 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11144 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11146 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11148 0x00 0x00 NOPX + 11150 0x18 0x51 0xa0 0xf8 MOV r1, r3 + 11154 0x18 0x9c 0xa0 0xf8 MOV r2, r25 + 11158 0x14 0x40 0x07 0x18 ADD r0, r17, #1 + 11162 0x10 0x23 0x12 0x18 SEL.EQZ r17, r0, r17, r27 + 11166 0x16 0x23 0x11 0x98 SUB r17, r24, r17 + 11170 0x88 0x16 0x28 0x00 0x01 0x84 JZ r17, #11344 +.delay_slot + 11176 0x20 0x00 0x0a 0x20 0x00 0x44 MOVXM r20, #536870912 +.delay_slot + 11182 0x14 0xc7 0x45 0x98 OR r3, r19, r20 +.delay_slot + 11186 0x14 0xe6 0x32 0x18 SEL.EQZ r19, r19, r3, r27 +.delay_slot +.swstall delay_slot + 11190 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11192 0x00 0x00 NOPX + 11194 0xc0 0xe2 0x30 0x20 0x81 0x64 SUB r3, r24, r17; MOV r0, #32 + 11200 0x10 0xe5 0x24 0x98 AND r18, r3, r18 + 11204 0x14 0xe5 0x2d 0x98 LSHL r18, r19, r18 + 11208 0x14 0x76 0x0a 0x98 LT r27, r17, r0 + 11212 0x00 0x16 0x28 0x00 0x00 0x84 J #11344 +.delay_slot + 11218 0x14 0xc6 0x3d 0x98 LSHL r3, r19, r3 +.delay_slot + 11222 0x14 0xa4 0xf0 0x18 NEZ r18, r18 +.delay_slot + 11226 0x14 0xe2 0xf0 0x18 NEZ r17, r19 +.delay_slot + 11230 0x10 0xe5 0x25 0x98 OR r18, r3, r18 +.delay_slot + 11234 0x00 0x2c 0xf0 0x00 0x24 0x67 0x22 0x00 0x34 0xaf 0x00 0x2b 0x60 0x7e NOPA; NOPB; NOPS; SEL.EQZ r19, r17, r18, r27; NOPM +.label TGT_F_ZL14addFloat32Sigsjji_208 + 11248 0x00 0x16 0x78 0x40 0x01 0x84 JNZ r0, #11504 +.delay_slot + 11254 0x20 0x00 0x0a 0x20 0x00 0x44 MOVXM r20, #536870912 +.delay_slot +.swstall delay_slot + 11260 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11262 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11264 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11266 0x00 0x00 NOPX + 11268 0x18 0x51 0xa0 0xf8 MOV r1, r3 + 11272 0x88 0xff 0xe1 0x3b 0x41 0xe4 ADD r3, r17, #-1; MOV r2, r27 + 11278 0x1e 0xdc 0xa0 0xf8 MOV r27, r25 + 11282 0x10 0xe3 0x12 0x18 SEL.EQZ r17, r3, r17, r27 + 11286 0x88 0x16 0x28 0x00 0x01 0x84 JZ r17, #11344 +.delay_slot + 11292 0x15 0x01 0x05 0x98 OR r0, r20, r16 +.delay_slot + 11296 0x14 0x20 0x02 0x18 SEL.EQZ r16, r16, r0, r27 +.delay_slot +.swstall delay_slot + 11300 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11302 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11304 0x00 0x00 NOPX + 11306 0xc0 0xe2 0x30 0x20 0x81 0x64 SUB r3, r24, r17; MOV r0, #32 + 11312 0x10 0xe5 0x24 0x98 AND r18, r3, r18 + 11316 0x14 0x25 0x2d 0x98 LSHL r18, r16, r18 + 11320 0x14 0x06 0x3d 0x98 LSHL r3, r16, r3 + 11324 0x14 0x76 0x0a 0x98 LT r27, r17, r0 + 11328 0x14 0xa4 0xf0 0x18 NEZ r18, r18 + 11332 0x14 0x20 0xf0 0x18 NEZ r16, r16 + 11336 0x10 0xe3 0x25 0x98 OR r17, r3, r18 + 11340 0x14 0x21 0x12 0x18 SEL.EQZ r16, r16, r17, r27 +.label TGT_F_ZL14addFloat32Sigsjji_304 + 11344 0x00 0x32 0x00 0x27 0x3a 0x2e 0x28 0xbf 0xc8 0xba MOVA r18, #1; OR r19, r19, r20; ADD.NC r17, r2, #-1 + 11354 0x9c 0xe0 0x18 0x31 0x01 0x24 ADD r19, r19, r16; ADD.NC r16, r17, #1 + 11360 0x14 0xe5 0x2d 0x98 LSHL r18, r19, r18 + 11364 0x14 0xb7 0x8a 0x98 LT r27, r18, r24 + 11368 0x14 0x45 0x02 0x18 SEL.EQZ r2, r17, r16, r27 + 11372 0x14 0x87 0x32 0x18 SEL.EQZ r3, r18, r19, r27 +.label __ll1__ZL14addFloat32Sigsjji +.tail_call + 11376 0x00 0x14 0x78 0x00 0x00 0x84 J #10480 +.delay_slot +.swstall delay_slot + 11382 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11384 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11386 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11388 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11390 0x00 0x00 NOPX +.label TGT_F_ZL14addFloat32Sigsjji_352 +.return_address + 11392 0x80 0x16 0x88 0x40 0x01 0x84 JNZ r16, #11536 +.delay_slot +.swstall delay_slot + 11398 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11400 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11402 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11404 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11406 0x00 0x00 NOPX + 11408 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 11412 0x7f 0x80 0x08 0x20 0x00 0x44 MOVXM r16, #2139095040 +.delay_slot + 11418 0x11 0x01 0x00 0x98 ADD r0, r4, r16 +.delay_slot +.swstall delay_slot + 11422 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11424 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11426 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV +.label TGT_F_ZL14addFloat32Sigsjji_400 + 11440 0x00 0x16 0x90 0x40 0x01 0x84 JNZ r0, #11552 +.delay_slot +.swstall delay_slot + 11446 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11448 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11450 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11452 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11454 0x00 0x00 NOPX + 11456 0xd8 0x16 0xa8 0x00 0x01 0x84 JZ r27, #11600 +.delay_slot +.swstall delay_slot + 11462 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11464 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11466 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11468 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11470 0x00 0x00 NOPX + 11472 0x18 0x51 0xa0 0xf8 MOV r1, r3 + 11476 0x00 0x16 0x38 0x00 0x00 0x84 J #11376 +.delay_slot + 11482 0x40 0x00 0x08 0xa0 0x00 0x44 MOVXM r17, #1073741824 +.delay_slot + 11488 0x9c 0x62 0x11 0x3b 0x41 0xe4 ADD r17, r19, r17; MOV r2, r27 +.delay_slot + 11494 0x14 0x47 0x00 0x98 ADD r3, r17, r16 +.delay_slot +.swstall delay_slot + 11498 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11500 0x00 0x01 0x67 0x98 NOPA +.label TGT_F_ZL14addFloat32Sigsjji_464 + 11504 0x98 0x16 0xb8 0x40 0x01 0x84 JNZ r19, #11632 +.delay_slot +.swstall delay_slot + 11510 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11512 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11514 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11516 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11518 0x00 0x00 NOPX + 11520 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 11524 0x18 0x10 0xa0 0xf8 MOV r0, r1 +.delay_slot +.swstall delay_slot + 11528 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11530 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11532 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11534 0x00 0x00 NOPX +.label TGT_F_ZL14addFloat32Sigsjji_496 +.tail_call + 11536 0x00 0x14 0x40 0x00 0x00 0x84 J #10368 +.delay_slot +.swstall delay_slot + 11542 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11544 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11546 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11548 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11550 0x00 0x00 NOPX +.label TGT_F_ZL14addFloat32Sigsjji_512 +.return_address + 11552 0x14 0xe1 0x05 0x98 OR r16, r19, r16 + 11556 0x80 0x16 0xc0 0x40 0x01 0x84 JNZ r16, #11648 +.delay_slot +.swstall delay_slot + 11562 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11564 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11566 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11568 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11570 0x00 0x00 NOPX + 11572 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 11576 0x18 0x10 0xa0 0xf8 MOV r0, r1 +.delay_slot +.swstall delay_slot + 11580 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11582 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11584 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11586 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV +.label TGT_F_ZL14addFloat32Sigsjji_560 + 11600 0x05 0x00 0x08 0x33 0x82 0xa4 RET lr; ADD.NC r16, r19, r16 +.delay_slot + 11606 0x17 0xe2 0xe9 0x18 MOVX r17, #-6 +.delay_slot + 11610 0x14 0x21 0x1d 0x98 LSHL r16, r16, r17 +.delay_slot + 11614 0x11 0x01 0x00 0x98 ADD r0, r4, r16 +.delay_slot +.swstall delay_slot + 11618 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11620 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.label TGT_F_ZL14addFloat32Sigsjji_592 +.tail_call + 11632 0x00 0x14 0x40 0x00 0x00 0x84 J #10368 +.delay_slot +.swstall delay_slot + 11638 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11640 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11642 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11644 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11646 0x00 0x00 NOPX +.label TGT_F_ZL14addFloat32Sigsjji_608 +.tail_call +.return_address + 11648 0x00 0x14 0x40 0x00 0x00 0x84 J #10368 +.delay_slot +.swstall delay_slot + 11654 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11656 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11658 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11660 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11662 0x00 0x00 NOPX +.label _ZL14addFloat32Sigsjji__end +.label _ZL14subFloat32Sigsjji +.function_start + 11664 0xfd 0x31 0x00 0x00 0x1f 0xfe 0x0f 0xff 0x90 0xba MOVA r17, #-23; MOVXM r16, #8388607 + 11674 0x10 0x89 0x1d 0x98 LSHL r4, r2, r17 + 11678 0x10 0x65 0x1d 0x98 LSHL r18, r1, r17 + 11682 0x10 0x69 0x04 0x98 AND r20, r1, r16 + 11686 0x11 0x32 0x90 0x18 EXTEND.u8 r25, r4 + 11690 0x14 0xb6 0x90 0x18 EXTEND.u8 r27, r18 + 11694 0x14 0x20 0x99 0xa0 0x1d 0x64 AND r16, r2, r16; MOV r19, #7 + 11700 0x15 0x23 0x3d 0x98 LSHL r17, r20, r19 + 11704 0xdc 0xb2 0x3c 0x20 0x01 0x64 SUB r18, r27, r25; MOV r24, #0 + 11710 0x16 0x0b 0x2a 0x98 LT r5, r24, r18 + 11714 0x28 0x17 0x40 0x40 0x01 0x84 JNZ r5, #11904 +.delay_slot + 11720 0x14 0x21 0x3d 0x98 LSHL r16, r16, r19 +.delay_slot + 11724 0x1f 0xe0 0x00 0x10 0x00 0x00 0x88 0x00 0x10 0xba MOVA r0, #255; MOVXM r4, #1073741824 +.delay_slot + 11734 0x16 0xe8 0x07 0x98 EQ r20, r27, r0 +.delay_slot + 11738 0x14 0x66 0x45 0x98 OR r19, r17, r4 +.delay_slot + 11742 0x11 0x09 0x05 0x98 OR r4, r4, r16 + 11746 0x14 0x8d 0x89 0x98 GE r6, r18, r24 + 11750 0x30 0x17 0x90 0x40 0x01 0x84 JNZ r6, #12064 +.delay_slot + 11756 0x10 0x0a 0x05 0x18 MOVX r5, #1 +.delay_slot + 11760 0x10 0xce 0x56 0x98 XOR r7, r3, r5 +.delay_slot +.swstall delay_slot + 11764 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11766 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11768 0x00 0x00 NOPX + 11770 0x16 0x68 0x07 0x98 EQ r20, r25, r0 + 11774 0xa0 0x17 0xc8 0x40 0x01 0x84 JNZ r20, #12176 +.delay_slot +.swstall delay_slot + 11780 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11782 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11784 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11786 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11788 0x00 0x00 NOPX + 11790 0x18 0x53 0xa0 0xf8 MOV r1, r7 + 11794 0x14 0xa0 0x07 0x18 ADD r16, r18, #1 + 11798 0x14 0x21 0x22 0x18 SEL.EQZ r16, r16, r18, r27 + 11802 0x16 0x21 0x01 0x98 SUB r16, r24, r16 + 11806 0x80 0x17 0x30 0x00 0x01 0x84 JZ r16, #11872 +.delay_slot + 11812 0x14 0x63 0x32 0x18 SEL.EQZ r17, r17, r19, r27 +.delay_slot +.swstall delay_slot + 11816 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11818 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11820 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11822 0x00 0x00 NOPX + 11824 0x04 0x14 0x00 0x30 0x38 0x0e 0x48 0x1f 0x58 0xba MOVA r20, #32; SUB r3, r24, r16; MOV r18, #31 + 11834 0x10 0xe5 0x24 0x98 AND r18, r3, r18 + 11838 0x14 0x65 0x2d 0x98 LSHL r18, r17, r18 + 11842 0x14 0x66 0xf0 0x18 NEZ r19, r17 + 11846 0x14 0x37 0x4a 0x98 LT r27, r16, r20 + 11850 0x14 0x62 0x3d 0x98 LSHL r17, r17, r3 + 11854 0x14 0xa4 0xf0 0x18 NEZ r18, r18 + 11858 0x14 0x61 0x25 0x98 OR r16, r17, r18 + 11862 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x04 0xe3 0x02 0x7a NOPA; NOPS; SEL.EQZ r17, r19, r16, r27 +.label __ll2__ZL14subFloat32Sigsjji + 11872 0x00 0x17 0x80 0x00 0x00 0x84 J #12032 +.delay_slot + 11878 0x11 0x07 0x11 0x98 SUB r3, r4, r17 +.delay_slot +.swstall delay_slot + 11882 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11884 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11886 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11888 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_ZL14subFloat32Sigsjji_240 + 11904 0xa0 0x17 0xe0 0x40 0x01 0x84 JNZ r20, #12224 +.delay_slot +.swstall delay_slot + 11910 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11912 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11914 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11916 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11918 0x00 0x00 NOPX + 11920 0x18 0x1d 0xa0 0xf8 MOV r0, r27 + 11924 0x18 0x51 0xa0 0xf8 MOV r1, r3 + 11928 0x1e 0xdc 0xa0 0xf8 MOV r27, r25 + 11932 0x1e 0x50 0x20 0xf8 MOV r25, r0 + 11936 0x14 0xa3 0xff 0x18 ADD r17, r18, #-1 + 11940 0x14 0x63 0x22 0x18 SEL.EQZ r17, r17, r18, r27 + 11944 0x88 0x17 0x78 0x00 0x01 0x84 JZ r17, #12016 +.delay_slot + 11950 0x14 0x20 0x42 0x18 SEL.EQZ r16, r16, r4, r27 +.delay_slot +.swstall delay_slot + 11954 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11956 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11958 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11960 0x00 0x00 NOPX + 11962 0x04 0x03 0x00 0x31 0x28 0x8e 0x88 0x1f 0x58 0xba MOVA r3, #32; SUB r18, r24, r17; MOV r20, #31 + 11972 0x14 0xa9 0x44 0x98 AND r20, r18, r20 + 11976 0x14 0x29 0x4d 0x98 LSHL r20, r16, r20 + 11980 0x14 0x25 0x2d 0x98 LSHL r18, r16, r18 + 11984 0x14 0x76 0x3a 0x98 LT r27, r17, r3 + 11988 0x15 0x28 0xf0 0x18 NEZ r20, r20 + 11992 0x14 0x20 0xf0 0x18 NEZ r16, r16 + 11996 0x14 0xa3 0x45 0x98 OR r17, r18, r20 + 12000 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x21 0x08 0x90 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SEL.EQZ r16, r16, r17, r27; NOPM; NOPV +.label __ll1__ZL14subFloat32Sigsjji + 12016 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x26 0x38 0x0c 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SUB r3, r19, r16; NOPM; NOPV +.label TGT_F_ZL14subFloat32Sigsjji_368 +.tail_call + 12032 0x00 0x15 0x40 0x00 0x00 0x84 J #10880 +.delay_slot + 12038 0x16 0x45 0xff 0x18 ADD r2, r25, #-1 +.delay_slot +.swstall delay_slot + 12042 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12044 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12046 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12048 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_ZL14subFloat32Sigsjji_400 +.return_address + 12064 0xa0 0x17 0xf0 0x40 0x01 0x84 JNZ r20, #12256 +.delay_slot +.swstall delay_slot + 12070 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12072 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12074 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12076 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12078 0x00 0x00 NOPX + 12080 0x14 0x27 0x1c 0x98 LTU r19, r16, r17 + 12084 0x98 0x18 0x08 0x40 0x01 0x84 JNZ r19, #12304 +.delay_slot + 12090 0x11 0x71 0x92 0x18 SEL.EQZ r24, r5, r25, r27 +.delay_slot + 12094 0x1c 0x9d 0xa0 0xf8 MOV r18, r27 +.delay_slot + 12098 0x11 0x73 0x22 0x18 SEL.EQZ r25, r5, r18, r27 +.delay_slot +.swstall delay_slot + 12102 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12104 0x00 0x00 NOPX + 12106 0x14 0x65 0x0c 0x98 LTU r18, r17, r16 + 12110 0x90 0x18 0x18 0x40 0x01 0x84 JNZ r18, #12336 +.delay_slot +.swstall delay_slot + 12116 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12118 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12120 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12122 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12124 0x00 0x00 NOPX + 12126 0x10 0x20 0x7d 0x18 MOVX r16, #31 + 12130 0x00 0x07 0xc0 0xc9 0xe8 0x44 MOVXM p0, #509172 + 12136 0x00 0x06 0x56 0x98 LDA r18, [p0] +.swstall __RAW__R_1948 + 12140 0x00 0x00 NOPX +.swstall __RAW__R_1948 + 12142 0x00 0x00 NOPX + 12144 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 12148 0x10 0x22 0x0d 0x18 MOVX r17, #3 +.delay_slot +.swstall delay_slot + 12152 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12154 0x00 0x00 NOPX +.delay_slot + 12156 0x14 0x63 0x27 0x98 EQ r17, r17, r18 +.delay_slot + 12160 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x22 0x08 0x6c 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; LSHL r0, r17, r16; NOPM; NOPV +.label TGT_F_ZL14subFloat32Sigsjji_512 + 12176 0x80 0x18 0x28 0x40 0x01 0x84 JNZ r16, #12368 +.delay_slot +.swstall delay_slot + 12182 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12184 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12186 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12188 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12190 0x00 0x00 NOPX + 12192 0x10 0x20 0x7d 0x18 MOVX r16, #31 + 12196 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 12200 0x11 0xe1 0x0d 0x98 LSHL r16, r7, r16 +.delay_slot + 12204 0x7f 0x80 0x08 0xa0 0x00 0x44 MOVXM r17, #2139095040 +.delay_slot + 12210 0x14 0x41 0x00 0x98 ADD r0, r17, r16 +.delay_slot +.swstall delay_slot + 12214 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12216 0x00 0x2c 0xf0 0x04 0x00 0x00 0x1c 0x22 NOPA; NOPV +.label TGT_F_ZL14subFloat32Sigsjji_560 + 12224 0x88 0x18 0x30 0x40 0x01 0x84 JNZ r17, #12384 +.delay_slot +.swstall delay_slot + 12230 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12232 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12234 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12236 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12238 0x00 0x00 NOPX + 12240 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 12244 0x18 0x10 0xa0 0xf8 MOV r0, r1 +.delay_slot +.swstall delay_slot + 12248 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12250 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12252 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12254 0x00 0x00 NOPX +.label TGT_F_ZL14subFloat32Sigsjji_592 + 12256 0x14 0x61 0x05 0x98 OR r16, r17, r16 + 12260 0x80 0x18 0x38 0x40 0x01 0x84 JNZ r16, #12400 +.delay_slot +.swstall delay_slot + 12266 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12268 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12270 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12272 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12274 0x00 0x00 NOPX + 12276 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 12280 0x7f 0xff 0xf0 0x3f 0xfe 0x44 MOVXM r0, #2147483647 +.delay_slot +.swstall delay_slot + 12286 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12288 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12290 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12292 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.label TGT_F_ZL14subFloat32Sigsjji_640 + 12304 0x00 0x17 0x78 0x00 0x00 0x84 J #12016 +.delay_slot + 12310 0x18 0x51 0xa0 0xf8 MOV r1, r3 +.delay_slot + 12314 0x1c 0xd8 0xa0 0xf8 MOV r19, r17 +.delay_slot +.swstall delay_slot + 12318 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12320 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12322 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV +.label TGT_F_ZL14subFloat32Sigsjji_672 + 12336 0x00 0x17 0x30 0x00 0x00 0x84 J #11872 +.delay_slot + 12342 0x19 0x18 0x20 0xf8 MOV r4, r16 +.delay_slot + 12346 0x1e 0x5c 0x20 0xf8 MOV r25, r24 +.delay_slot + 12350 0x18 0x53 0xa0 0xf8 MOV r1, r7 +.delay_slot +.swstall delay_slot + 12354 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12356 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.label TGT_F_ZL14subFloat32Sigsjji_704 +.tail_call + 12368 0x00 0x14 0x40 0x00 0x00 0x84 J #10368 +.delay_slot +.swstall delay_slot + 12374 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12376 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12378 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12380 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12382 0x00 0x00 NOPX +.label TGT_F_ZL14subFloat32Sigsjji_720 +.tail_call +.return_address + 12384 0x00 0x14 0x40 0x00 0x00 0x84 J #10368 +.delay_slot +.swstall delay_slot + 12390 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12392 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12394 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12396 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12398 0x00 0x00 NOPX +.label TGT_F_ZL14subFloat32Sigsjji_736 +.tail_call +.return_address + 12400 0x00 0x14 0x40 0x00 0x00 0x84 J #10368 +.delay_slot +.swstall delay_slot + 12406 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12408 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12410 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12412 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12414 0x00 0x00 NOPX +.label _ZL14subFloat32Sigsjji__end +.label float32_add +.function_start + 12416 0x17 0xe0 0x85 0x18 MOVX r16, #-31 + 12420 0x10 0x47 0x0d 0x98 LSHL r3, r1, r16 + 12424 0x10 0xa1 0x0d 0x98 LSHL r16, r2, r16 + 12428 0x10 0xe1 0x07 0x98 EQ r16, r3, r16 + 12432 0x80 0x18 0x58 0x40 0x01 0x84 JNZ r16, #12464 +.delay_slot +.swstall delay_slot + 12438 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12440 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12442 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12444 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12446 0x00 0x00 NOPX +.tail_call + 12448 0x00 0x16 0xc8 0x00 0x00 0x84 J #11664 +.delay_slot +.swstall delay_slot + 12454 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12456 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12458 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12460 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12462 0x00 0x00 NOPX +.label TGT_Ffloat32_add_48 +.tail_call +.return_address + 12464 0x00 0x15 0x90 0x00 0x00 0x84 J #11040 +.delay_slot +.swstall delay_slot + 12470 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12472 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12474 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12476 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12478 0x00 0x00 NOPX +.label float32_add__end + +.data_segment DMb 508992 +.label reduce_mean_c8_params + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x7 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + +.bss_segment DMb 509120 40 + +.data_segment DMb 509160 +.label _ZL8num_iter + 0x1 + 0x0 + 0x0 + 0x0 + +.bss_segment DMb 509164 4 + +.bss_segment DMb 509168 1 + +.bss_segment DMb 509172 4 + +.bss_segment DMb 509184 64 + +.stack DM_stack 506560 508928 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable6/Release/0_2_reloadable6.map b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable6/Release/0_2_reloadable6.map new file mode 100644 index 0000000000000000000000000000000000000000..a0123fcd2abb0ee7d6fe767c4cfeb9204c35f584 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable6/Release/0_2_reloadable6.map @@ -0,0 +1,177 @@ + +// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:46:40 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// bridge -o../Release/0_0_reloadable2 ../Release/0_0_reloadable2.o -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/isg -g -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable2.bcf -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork1731 -pme + +// Release: ipp V-2024.06-TGT-241219 + +Memory map for memory 'DM_stack': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 2368 + + 0x0007bac0..0x0007c3ff ( 2368 items) : Stack + +Memory map for memory 'DMb': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 2613 + + 0x00000000..0x0007babf ( 506560 items) : Reserved + 0x0007bac0..0x0007c3ff ( 2368 items) : Stack + 0x0007c400..0x0007c43f ( 64 items) : Reserved + 0x0007c440..0x0007c4bf ( 128 items) : ../Release/0_0_reloadable2.o::reduce_mean_c8_params (Data, Global, .data.DMb.64) + 0x0007c4c0..0x0007c4c3 ( 4 items) : ../Release/0_0_reloadable2.o::_ZL9curr_iter (Data, Local, .bss.DMb.4) + 0x0007c4c4..0x0007c4c7 ( 4 items) : ../Release/0_0_reloadable2.o::_ZL10depth_iter (Data, Local, .bss.DMb.4) + 0x0007c4c8..0x0007c4cb ( 4 items) : ../Release/0_0_reloadable2.o::_ZL8core_row (Data, Local, .bss.DMb.4) + 0x0007c4cc..0x0007c4cf ( 4 items) : ../Release/0_0_reloadable2.o::_ZL11ifm1_offset (Data, Local, .bss.DMb.4) + 0x0007c4d0..0x0007c4d3 ( 4 items) : ../Release/0_0_reloadable2.o::_ZL11reduce_axis (Data, Local, .bss.DMb.4) + 0x0007c4d4..0x0007c4d7 ( 4 items) : ../Release/0_0_reloadable2.o::_ZL8l3_width (Data, Local, .bss.DMb.4) + 0x0007c4d8..0x0007c4db ( 4 items) : ../Release/0_0_reloadable2.o::_ZL9l3_height (Data, Local, .bss.DMb.4) + 0x0007c4dc..0x0007c4df ( 4 items) : ../Release/0_0_reloadable2.o::_ZL8l3_depth (Data, Local, .bss.DMb.4) + 0x0007c4e0..0x0007c4e3 ( 4 items) : ../Release/0_0_reloadable2.o::_ZL10width_iter (Data, Local, .bss.DMb.4) + 0x0007c4e4..0x0007c4e7 ( 4 items) : ../Release/0_0_reloadable2.o::_ZL11height_iter (Data, Local, .bss.DMb.4) + 0x0007c4e8..0x0007c4eb ( 4 items) : ../Release/0_0_reloadable2.o::_ZL8num_iter (Data, Local, .data.DMb.4) + 0x0007c4ec..0x0007c4ef ( 4 items) : me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive11control_satE (Data, Global, .bss.DMb.4) + 0x0007c4f0..0x0007c4f0 ( 1 items) : me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive11control_rndE (Data, Global, .bss.DMb.1) + 0x0007c4f4..0x0007c4f7 ( 4 items) : softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a)::float_rounding_mode (Data, Global, .bss.DMb.4) + 0x0007c500..0x0007c53f ( 64 items) : ../Release/0_0_reloadable2.o::pad_3d_params (Data, Global, .bss.DMb.64) + 0x0007ccc0..0x000fffff ( 537408 items) : Reserved + +Memory map for memory 'PM': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 10058 + + 0x00000000..0x0000092f ( 2352 items) : Reserved + 0x00000930..0x00000a0b ( 220 items) : ../Release/0_0_reloadable2.o::_Z13kernelWrapperPPvjjjj (Function, Global, .text) (stack frame size = 64) + + Called functions : _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + + 0x00000a10..0x00001043 ( 1588 items) : ../Release/0_0_reloadable2.o::_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 64) + 0x00001050..0x000012ed ( 670 items) : ../Release/0_0_reloadable2.o::_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t (Function, Weak, .text) (stack frame size = 0) + 0x000012f0..0x00001d67 ( 2680 items) : ../Release/0_0_reloadable2.o::_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E (Function, Weak, .text) (stack frame size = 256) + + Called functions : _ZN12me_primitive10udiv_dstepEjjRjS0_ + + Referenced symbols: _ZN12me_primitive11control_rndE + + 0x00001d70..0x000027e3 ( 2676 items) : ../Release/0_0_reloadable2.o::_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 128) + + Called functions : _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv + int32_to_float32 + float32_add + _ZN12me_primitive10udiv_dstepEjjRjS0_ + _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t + _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + _ZN12me_primitive11control_satE + _ZN12me_primitive11control_rndE + reduce_mean_c8_params + _ZL11reduce_axis + _ZL11ifm1_offset + pad_3d_params + _ZL8num_iter + _ZL8l3_width + _ZL9l3_height + _ZL8l3_depth + _ZL10depth_iter + _ZL10width_iter + _ZL11height_iter + + 0x000027f0..0x0000287d ( 142 items) : me_div.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive10udiv_dstepEjjRjS0_ (Function, Global, .text) (stack frame size = 0) + 0x00002880..0x000028ef ( 112 items) : softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a)::_ZL19propagateFloat32NaNjj (Function, Local, .text) (stack frame size = 0) + 0x000028f0..0x00002a77 ( 392 items) : softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a)::_ZL19roundAndPackFloat32iij (Function, Local, .text) (stack frame size = 0) + + Referenced symbols: float_rounding_mode + + 0x00002a80..0x00002a97 ( 24 items) : softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a)::_ZL28normalizeRoundAndPackFloat32iij (Function, Local, .text) (stack frame size = 0) + + Called functions : _ZL19roundAndPackFloat32iij + + 0x00002aa0..0x00002b11 ( 114 items) : softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a)::int32_to_float32 (Function, Global, .text) (stack frame size = 0) + + Called functions : _ZL28normalizeRoundAndPackFloat32iij + + 0x00002b20..0x00002d8f ( 624 items) : softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a)::_ZL14addFloat32Sigsjji (Function, Local, .text) (stack frame size = 0) + + Called functions : _ZL19roundAndPackFloat32iij + _ZL19propagateFloat32NaNjj + + 0x00002d90..0x0000307f ( 752 items) : softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a)::_ZL14subFloat32Sigsjji (Function, Local, .text) (stack frame size = 0) + + Called functions : _ZL28normalizeRoundAndPackFloat32iij + _ZL19propagateFloat32NaNjj + + Referenced symbols: float_rounding_mode + + 0x00003080..0x000030bf ( 64 items) : softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a)::float32_add (Function, Global, .text) (stack frame size = 0) + + Called functions : _ZL14subFloat32Sigsjji + _ZL14addFloat32Sigsjji + + +External symbols: + + __dso_handle = 0x0 + _ctors_end = 0x0 + _ctors_start = 0x0 + _dtors_end = 0x0 + _dtors_start = 0x0 + _pc_end = 0x30c0 + _pc_start = 0x930 + _sp_end_DM_stack = 0x7c400 + _sp_start_DM_stack = 0x7bac0 + +Section summary for memory 'DM_stack': + + .stack File + ---------- ---------- + 2368 + ---------- ---------- + 2368 Total + +Section summary for memory 'DMb': + + .bss .data File + ---------- ---------- ---------- + 104 132 ../Release/0_0_reloadable2.o + 4 0 softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a) + 5 0 me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a) + ---------- ---------- ---------- + 113 132 Total + +Section summary for memory 'PM': + + .text File + ---------- ---------- + 7834 ../Release/0_0_reloadable2.o + 2082 softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a) + 142 me_div.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a) + ---------- ---------- + 10058 Total + +File summary: + +../Release/0_0_reloadable2.o + DMb 236 + PM 7834 + +me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a) + DMb 5 + +softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a) + DMb 4 + PM 2082 + +me_div.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a) + PM 142 + diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable6/Release/0_2_reloadable6.sdr b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable6/Release/0_2_reloadable6.sdr new file mode 100644 index 0000000000000000000000000000000000000000..efa1bd1f1f0feebb4e1aac96628ff9f168810f9e --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable6/Release/0_2_reloadable6.sdr @@ -0,0 +1,96 @@ + +// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:46:40 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// bridge -o../Release/0_0_reloadable2 ../Release/0_0_reloadable2.o -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/isg -g -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable2.bcf -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork1731 -pme + +// Release: ipp V-2024.06-TGT-241219 + +// Symbols in memory 'DM_bankA': +// Symbols in memory 'DM_bankAB': +// Symbols in memory 'DM_bankAC': +// Symbols in memory 'DM_bankAD': +// Symbols in memory 'DM_bankB': +// Symbols in memory 'DM_bankBC': +// Symbols in memory 'DM_bankBD': +// Symbols in memory 'DM_bankC': +// Symbols in memory 'DM_bankCD': +// Symbols in memory 'DM_bankD': +// Symbols in memory 'DM_stack': +// Symbols in memory 'DM_test': +// Symbols in memory 'DMb': +_symbol reduce_mean_c8_params 0x0007c440 +_symbol _ZN12me_primitive11control_satE 0x0007c4ec +_symbol _ZN12me_primitive11control_rndE 0x0007c4f0 +_symbol float_rounding_mode 0x0007c4f4 +_symbol pad_3d_params 0x0007c500 +// Symbols in memory 'DMh': +// Symbols in memory 'DMh_bankA': +// Symbols in memory 'DMh_bankAB': +// Symbols in memory 'DMh_bankAC': +// Symbols in memory 'DMh_bankAD': +// Symbols in memory 'DMh_bankB': +// Symbols in memory 'DMh_bankBC': +// Symbols in memory 'DMh_bankBD': +// Symbols in memory 'DMh_bankC': +// Symbols in memory 'DMh_bankCD': +// Symbols in memory 'DMh_bankD': +// Symbols in memory 'DMh_stack': +// Symbols in memory 'DMs': +// Symbols in memory 'DMs_bankA': +// Symbols in memory 'DMs_bankAB': +// Symbols in memory 'DMs_bankAC': +// Symbols in memory 'DMs_bankAD': +// Symbols in memory 'DMs_bankB': +// Symbols in memory 'DMs_bankBC': +// Symbols in memory 'DMs_bankBD': +// Symbols in memory 'DMs_bankC': +// Symbols in memory 'DMs_bankCD': +// Symbols in memory 'DMs_bankD': +// Symbols in memory 'DMs_stack': +// Symbols in memory 'DMv': +// Symbols in memory 'DMv_bankA': +// Symbols in memory 'DMv_bankAB': +// Symbols in memory 'DMv_bankAC': +// Symbols in memory 'DMv_bankAD': +// Symbols in memory 'DMv_bankB': +// Symbols in memory 'DMv_bankBC': +// Symbols in memory 'DMv_bankBD': +// Symbols in memory 'DMv_bankC': +// Symbols in memory 'DMv_bankCD': +// Symbols in memory 'DMv_bankD': +// Symbols in memory 'DMv_stack': +// Symbols in memory 'DMw': +// Symbols in memory 'DMw_bankA': +// Symbols in memory 'DMw_bankAB': +// Symbols in memory 'DMw_bankAC': +// Symbols in memory 'DMw_bankAD': +// Symbols in memory 'DMw_bankB': +// Symbols in memory 'DMw_bankBC': +// Symbols in memory 'DMw_bankBD': +// Symbols in memory 'DMw_bankC': +// Symbols in memory 'DMw_bankCD': +// Symbols in memory 'DMw_bankD': +// Symbols in memory 'DMw_stack': +// Symbols in memory 'DMx': +// Symbols in memory 'DMx_bankA': +// Symbols in memory 'DMx_bankAB': +// Symbols in memory 'DMx_bankAC': +// Symbols in memory 'DMx_bankAD': +// Symbols in memory 'DMx_bankB': +// Symbols in memory 'DMx_bankBC': +// Symbols in memory 'DMx_bankBD': +// Symbols in memory 'DMx_bankC': +// Symbols in memory 'DMx_bankCD': +// Symbols in memory 'DMx_bankD': +// Symbols in memory 'DMx_stack': +// Symbols in memory 'PM': +_symbol _Z13kernelWrapperPPvjjjj 0x00000930 +_symbol _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv 0x00000a10 +_symbol _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t 0x00001050 +_symbol _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E 0x000012f0 +_symbol _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00001d70 +_symbol _ZN12me_primitive10udiv_dstepEjjRjS0_ 0x000027f0 +_symbol int32_to_float32 0x00002aa0 +_symbol float32_add 0x00003080 +// Symbols in memory 'PMw': +// Symbols in memory 'TM4': diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable6/Release/0_2_reloadable6.srv b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable6/Release/0_2_reloadable6.srv new file mode 100644 index 0000000000000000000000000000000000000000..d037f49ea23915d17f1d140dbcf225735acc1af1 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable6/Release/0_2_reloadable6.srv @@ -0,0 +1,14427 @@ + +// File generated by darts version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:46:41 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// darts -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -d -h -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable2 me + +// Release: ipp V-2024.06-TGT-241219 +.label __Z13kernelWrapperPPvjjjj___func_begin0 +.label _Z13kernelWrapperPPvjjjj +.function kernelWrapper _Z13kernelWrapperPPvjjjj +.src_ref 0 "0_0_reloadable2.cc" 29 first +.src_ref 0 "0_0_reloadable2.cc" 31 60 first +.function_start + 2352 "11010100" // LDA r16, [p0]; MOV r0, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2353 "01000001" // /* MW 5 */ + 2354 "00101111" // /* MW 4 */ + 2355 "11010000" // /* MW 3 */ + 2356 "11000010" // /* MW 2 */ + 2357 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 29 + 2358 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2359 "00000001" // /* MW 5 */ + 2360 "00000000" // /* MW 4 */ + 2361 "00000000" // /* MW 3 */ + 2362 "00001000" // /* MW 2 */ + 2363 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 31 110 +.src_ref 1 "io_buffer_compiler.h" 606 24 + 2364 "00000010" // ST p7, [sp, #-8]; MOV r15, r1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2365 "01110000" // /* MW 7 */ + 2366 "01010000" // /* MW 6 */ + 2367 "11101000" // /* MW 5 */ + 2368 "00000001" // /* MW 4 */ + 2369 "10110000" // /* MW 3 */ + 2370 "01110011" // /* MW 2 */ + 2371 "11111111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 17 79 +.src_ref 0 "0_0_reloadable2.cc" 31 110 first + 2372 "00111010" // ST r0, [sp, #-4]; NEZ r26, r15; MOV p7, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2373 "01111001" // /* MW 9 */ + 2374 "01100000" // /* MW 8 */ + 2375 "10110000" // /* MW 7 */ + 2376 "10000011" // /* MW 6 */ + 2377 "10100111" // /* MW 5 */ + 2378 "00011111" // /* MW 4 */ + 2379 "10110000" // /* MW 3 */ + 2380 "10000010" // /* MW 2 */ + 2381 "11111111" // /* MW 1 */ + 2382 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2383 "00111101" // /* MW 3 */ + 2384 "11110100" // /* MW 2 */ + 2385 "00001111" // /* MW 1 */ + 2386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2387 "00000000" // /* MW 1 */ + 2388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2389 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 first + 2390 "00011000" // ADD.NC p0, r16, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2391 "00000010" // /* MW 3 */ + 2392 "01101000" // /* MW 2 */ + 2393 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 + 2394 "10011000" // LDA r16, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2395 "00010110" // /* MW 3 */ + 2396 "00011110" // /* MW 2 */ + 2397 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 36 + 2398 "10011000" // LDA r18, [p0], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2399 "01010110" // /* MW 3 */ + 2400 "00111110" // /* MW 2 */ + 2401 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 46 + 2402 "10011000" // LDA r17, [p0], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2403 "00110110" // /* MW 3 */ + 2404 "11101110" // /* MW 2 */ + 2405 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 60 + 2406 "10011000" // LDA r27, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2407 "01110110" // /* MW 3 */ + 2408 "00000111" // /* MW 2 */ + 2409 "00000000" // /* MW 1 */ + 2410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2411 "00000000" // /* MW 1 */ + 2412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2413 "00000000" // /* MW 1 */ + 2414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2415 "00000000" // /* MW 1 */ + 2416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2417 "00000000" // /* MW 1 */ + 2418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2419 "00000000" // /* MW 1 */ + 2420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2421 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 195 30 first +.src_ref 1 "io_buffer_compiler.h" 195 37 first + 2422 "00011000" // SEL.EQZ r16, r16, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2423 "00100010" // /* MW 3 */ + 2424 "00100001" // /* MW 2 */ + 2425 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 194 23 first + 2426 "10011000" // ST r16, [p0, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2427 "00010001" // /* MW 3 */ + 2428 "11010110" // /* MW 2 */ + 2429 "00001000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 + 2430 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2431 "11111101" // /* MW 3 */ + 2432 "11100000" // /* MW 2 */ + 2433 "00010111" // /* MW 1 */ + 2434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2435 "00000000" // /* MW 1 */ + 2436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2437 "00000000" // /* MW 1 */ + 2438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2439 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 first + 2440 "00011000" // ACQ.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2441 "00001000" // /* MW 3 */ + 2442 "01010111" // /* MW 2 */ + 2443 "00010100" // /* MW 1 */ + 2444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2445 "00000000" // /* MW 1 */ + 2446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2447 "00000000" // /* MW 1 */ + 2448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2449 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 17 79 first + 2450 "10011000" // LDA p0, [p7], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2451 "00011110" // /* MW 3 */ + 2452 "00101100" // /* MW 2 */ + 2453 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 18 47 first + 2454 "10011000" // LDA p1, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2455 "10011110" // /* MW 3 */ + 2456 "11111100" // /* MW 2 */ + 2457 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 19 81 first + 2458 "10011000" // LDA p2, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2459 "00011110" // /* MW 3 */ + 2460 "00000101" // /* MW 2 */ + 2461 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 16 4 first +.no_stack_arguments + 2462 "00000100" // JL #7536 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7536 delay_slots=5 */ + 2463 "00000001" // /* MW 5 */ + 2464 "00000000" // /* MW 4 */ + 2465 "10111000" // /* MW 3 */ + 2466 "00001110" // /* MW 2 */ + 2467 "00000000" // /* MW 1 */ +.delay_slot + 2468 "10011000" // ST r26, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2469 "01010101" // /* MW 3 */ + 2470 "11110011" // /* MW 2 */ + 2471 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2473 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2475 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2477 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2479 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 38 60 first +.return_address + 2480 "10011000" // LDA r16, [p7, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2481 "00010110" // /* MW 3 */ + 2482 "11110110" // /* MW 2 */ + 2483 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 + 2484 "00011000" // LDA r26, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2485 "01010001" // /* MW 3 */ + 2486 "11110011" // /* MW 2 */ + 2487 "00000111" // /* MW 1 */ + 2488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2489 "00000000" // /* MW 1 */ + 2490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2491 "00000000" // /* MW 1 */ + 2492 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2493 "00000000" // /* MW 1 */ + 2494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2495 "00000000" // /* MW 1 */ + 2496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2497 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 first + 2498 "00011000" // ADD.NC p0, r16, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2499 "00001000" // /* MW 3 */ + 2500 "01101000" // /* MW 2 */ + 2501 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 + 2502 "10011000" // LDA r16, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2503 "00010110" // /* MW 3 */ + 2504 "00000110" // /* MW 2 */ + 2505 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_main.h" 440 8 + 2506 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2507 "00000101" // /* MW 3 */ + 2508 "00100010" // /* MW 2 */ + 2509 "00010000" // /* MW 1 */ + 2510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2511 "00000000" // /* MW 1 */ + 2512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2513 "00000000" // /* MW 1 */ + 2514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2515 "00000000" // /* MW 1 */ + 2516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2517 "00000000" // /* MW 1 */ + 2518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2519 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 first + 2520 "00011000" // REL.COND r16, r17, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2521 "00011000" // /* MW 3 */ + 2522 "00010101" // /* MW 2 */ + 2523 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 41 +.src_ref 1 "io_buffer_compiler.h" 606 24 + 2524 "11010100" // LDA lr, [sp, #-12]; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2525 "01000001" // /* MW 5 */ + 2526 "10101111" // /* MW 4 */ + 2527 "00101101" // /* MW 3 */ + 2528 "10000111" // /* MW 2 */ + 2529 "11111110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 first + 2530 "10011000" // LDA r16, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2531 "00010110" // /* MW 3 */ + 2532 "11110110" // /* MW 2 */ + 2533 "00000000" // /* MW 1 */ + 2534 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2535 "10011001" // /* MW 3 */ + 2536 "11111011" // /* MW 2 */ + 2537 "00000111" // /* MW 1 */ + 2538 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2539 "00000000" // /* MW 1 */ + 2540 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2541 "11110001" // /* MW 3 */ + 2542 "11111101" // /* MW 2 */ + 2543 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 41 first + 2544 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2545 "00000001" // /* MW 5 */ + 2546 "00000000" // /* MW 4 */ + 2547 "00000000" // /* MW 3 */ + 2548 "11111000" // /* MW 2 */ + 2549 "11111111" // /* MW 1 */ + 2550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2551 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 41 + 2552 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 2553 "00000000" // /* MW 3 */ + 2554 "00101000" // /* MW 2 */ + 2555 "00010000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 first +.delay_slot + 2556 "10011000" // SUB r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2557 "00000001" // /* MW 3 */ + 2558 "01100011" // /* MW 2 */ + 2559 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.delay_slot + 2560 "00011000" // SEL.EQZ r16, r16, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2561 "00010010" // /* MW 3 */ + 2562 "00100001" // /* MW 2 */ + 2563 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 22 +.delay_slot + 2564 "10011000" // ST r16, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2565 "00010001" // /* MW 3 */ + 2566 "11110110" // /* MW 2 */ + 2567 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2569 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13kernelWrapperPPvjjjj__end +.label __Z13kernelWrapperPPvjjjj___func_end0 + 2571 "00000000" // /* MW 1 */ +.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv___func_begin0 +.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv +.function setup _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv +.src_ref 2 "reduce_base_c8.h" 218 first +.src_ref 2 "reduce_base_c8.h" 220 27 first +.src_ref 2 "reduce_base_c8.h" 290 63 +.src_ref 2 "reduce_base_c8.h" 348 46 +.function_start + 2576 "01110110" // LDA r3, [p1], #4; MOVS p3, p0; MOVX r6, #-5; MOV r0, p1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 2577 "01111000" // /* MW 11 */ + 2578 "01100000" // /* MW 10 */ + 2579 "00001001" // /* MW 9 */ + 2580 "01101000" // /* MW 8 */ + 2581 "01100111" // /* MW 7 */ + 2582 "00111110" // /* MW 6 */ + 2583 "10001011" // /* MW 5 */ + 2584 "10000000" // /* MW 4 */ + 2585 "11010011" // /* MW 3 */ + 2586 "10001110" // /* MW 2 */ + 2587 "00100011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 287 40 +.src_ref 2 "reduce_base_c8.h" 287 40 +.src_ref 2 "reduce_base_c8.h" 348 46 first + 2588 "10111010" // MOVA r7, #16; MOVX r2, #-24; ADD.NC p2, r0, #28 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2589 "00001000" // /* MW 9 */ + 2590 "00000111" // /* MW 8 */ + 2591 "00110000" // /* MW 7 */ + 2592 "00001001" // /* MW 6 */ + 2593 "00100101" // /* MW 5 */ + 2594 "00111110" // /* MW 4 */ + 2595 "00000000" // /* MW 3 */ + 2596 "00000111" // /* MW 2 */ + 2597 "00000010" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 +.src_ref 2 "reduce_base_c8.h" 293 77 +.src_ref 2 "reduce_base_c8.h" 298 44 +.src_ref 2 "reduce_base_c8.h" 299 40 +.src_ref 2 "reduce_base_c8.h" 300 59 +.src_ref 2 "reduce_base_c8.h" 326 79 + 2598 "10111010" // MOVA r30, #3; MOVX r1, #-3; MOV r0, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2599 "01111000" // /* MW 9 */ + 2600 "01100000" // /* MW 8 */ + 2601 "00001000" // /* MW 7 */ + 2602 "10101000" // /* MW 6 */ + 2603 "00010111" // /* MW 5 */ + 2604 "00111110" // /* MW 4 */ + 2605 "00000000" // /* MW 3 */ + 2606 "01111110" // /* MW 2 */ + 2607 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 276 57 +.src_ref 2 "reduce_base_c8.h" 301 81 +.src_ref 2 "reduce_base_c8.h" 305 77 + 2608 "10111010" // MOVA r5, #-1; MOVXM r4, #65528 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2609 "00010000" // /* MW 9 */ + 2610 "11111100" // /* MW 8 */ + 2611 "10001111" // /* MW 7 */ + 2612 "00111100" // /* MW 6 */ + 2613 "00000000" // /* MW 5 */ + 2614 "00000000" // /* MW 4 */ + 2615 "00000000" // /* MW 3 */ + 2616 "11100101" // /* MW 2 */ + 2617 "11111111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 218 +.src_ref 2 "reduce_base_c8.h" 280 76 +.src_ref 2 "reduce_base_c8.h" 312 98 + 2618 "10111010" // MOVA r16, #-4; PADDXM [sp], #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2619 "01110000" // /* MW 9 */ + 2620 "00000000" // /* MW 8 */ + 2621 "00000000" // /* MW 7 */ + 2622 "00000000" // /* MW 6 */ + 2623 "00000010" // /* MW 5 */ + 2624 "00000000" // /* MW 4 */ + 2625 "00000000" // /* MW 3 */ + 2626 "10010000" // /* MW 2 */ + 2627 "11111111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 298 44 first + 2628 "00011000" // ADD.NC p4, r0, #46 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2629 "00010111" // /* MW 3 */ + 2630 "01100000" // /* MW 2 */ + 2631 "00011100" // /* MW 1 */ + 2632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2633 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 220 25 first + 2634 "10011000" // ST r3, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2635 "01110001" // /* MW 3 */ + 2636 "00011100" // /* MW 2 */ + 2637 "00001000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 221 28 first + 2638 "10011000" // LDA r26, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2639 "01010110" // /* MW 3 */ + 2640 "00011111" // /* MW 2 */ + 2641 "00000001" // /* MW 1 */ + 2642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2643 "00000000" // /* MW 1 */ + 2644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2645 "00000000" // /* MW 1 */ + 2646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2647 "00000000" // /* MW 1 */ + 2648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2649 "00000000" // /* MW 1 */ + 2650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2651 "00000000" // /* MW 1 */ + 2652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2653 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 221 26 +.src_ref 2 "reduce_base_c8.h" 301 81 first + 2654 "01011100" // ST r26, [p0], #4; AND r17, r26, r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2655 "10001001" // /* MW 5 */ + 2656 "01000100" // /* MW 4 */ + 2657 "00111101" // /* MW 3 */ + 2658 "11101010" // /* MW 2 */ + 2659 "00000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 222 26 first +.src_ref 2 "reduce_base_c8.h" 293 58 first +.src_ref 2 "reduce_base_c8.h" 301 81 + 2660 "10111010" // LDA r29, [p1], #4; MUL r4, r3, r26; ADD.NC r22, r17, r4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2661 "10101000" // /* MW 9 */ + 2662 "01001000" // /* MW 8 */ + 2663 "11001100" // /* MW 7 */ + 2664 "01111110" // /* MW 6 */ + 2665 "01001101" // /* MW 5 */ + 2666 "00000110" // /* MW 4 */ + 2667 "11010000" // /* MW 3 */ + 2668 "11110110" // /* MW 2 */ + 2669 "00100011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 290 63 first + 2670 "10011000" // LSHL r18, r26, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2671 "01101101" // /* MW 3 */ + 2672 "10100100" // /* MW 2 */ + 2673 "00010110" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 293 77 first + 2674 "10011000" // LSHL r6, r4, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2675 "00011101" // /* MW 3 */ + 2676 "00001100" // /* MW 2 */ + 2677 "00010001" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 290 41 first +.src_ref 2 "reduce_base_c8.h" 300 59 first + 2678 "00100100" // LSHL r17, r26, r1; ADD.NC r1, r18, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2679 "11111111" // /* MW 5 */ + 2680 "10110010" // /* MW 4 */ + 2681 "10110000" // /* MW 3 */ + 2682 "01000011" // /* MW 2 */ + 2683 "11010100" // /* MW 1 */ + 2684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2685 "00000000" // /* MW 1 */ + 2686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2687 "00000000" // /* MW 1 */ + 2688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2689 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 222 24 first +.src_ref 2 "reduce_base_c8.h" 287 40 first + 2690 "01011100" // ST r29, [p0], #4; MAC r7, r7, r29, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2691 "01001100" // /* MW 5 */ + 2692 "10011100" // /* MW 4 */ + 2693 "00111110" // /* MW 3 */ + 2694 "11110110" // /* MW 2 */ + 2695 "00000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 223 29 first +.src_ref 2 "reduce_base_c8.h" 312 60 first + 2696 "11111010" // LDA r2, [p1], #4; ST r29, [sp, #-4]; MUL r4, r29, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2697 "10101111" // /* MW 9 */ + 2698 "01001001" // /* MW 8 */ + 2699 "00000111" // /* MW 7 */ + 2700 "10000000" // /* MW 6 */ + 2701 "10110101" // /* MW 5 */ + 2702 "11111111" // /* MW 4 */ + 2703 "11010111" // /* MW 3 */ + 2704 "10001010" // /* MW 2 */ + 2705 "00100011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 305 57 first + 2706 "10011000" // MUL r20, r3, r29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2707 "11011111" // /* MW 3 */ + 2708 "11101001" // /* MW 2 */ + 2709 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 312 78 first + 2710 "10011000" // MUL r28, r3, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2711 "01001111" // /* MW 3 */ + 2712 "11111000" // /* MW 2 */ + 2713 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 299 40 first + 2714 "10011000" // LSHL r21, r29, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2715 "11101101" // /* MW 3 */ + 2716 "01101011" // /* MW 2 */ + 2717 "00010111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 276 57 first +.src_ref 2 "reduce_base_c8.h" 299 40 + 2718 "00100100" // LSHL r18, r29, r5; ADD.NC r27, r21, #-48 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2719 "11010000" // /* MW 5 */ + 2720 "10110101" // /* MW 4 */ + 2721 "10111101" // /* MW 3 */ + 2722 "10001011" // /* MW 2 */ + 2723 "11101100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 276 41 + 2724 "00011000" // ADD r23, r18, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2725 "11111111" // /* MW 3 */ + 2726 "10101111" // /* MW 2 */ + 2727 "00010100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 301 85 first + 2728 "10011000" // MUL r29, r29, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2729 "01101111" // /* MW 3 */ + 2730 "01111011" // /* MW 2 */ + 2731 "00010111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 223 27 first +.src_ref 2 "reduce_base_c8.h" 236 8 first + 2732 "01011100" // ST r2, [p0], #4; LT r24, r30, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2733 "01010101" // /* MW 5 */ + 2734 "01100000" // /* MW 4 */ + 2735 "00111111" // /* MW 3 */ + 2736 "10001010" // /* MW 2 */ + 2737 "00000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 224 33 first + 2738 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2739 "00101110" // /* MW 3 */ + 2740 "00011100" // /* MW 2 */ + 2741 "00000001" // /* MW 1 */ + 2742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2743 "00000000" // /* MW 1 */ + 2744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2745 "00000000" // /* MW 1 */ + 2746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2747 "00000000" // /* MW 1 */ + 2748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2749 "00000000" // /* MW 1 */ + 2750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2751 "00000000" // /* MW 1 */ + 2752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2753 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 224 31 +.src_ref 2 "reduce_base_c8.h" 318 64 + 2754 "00000010" // ST el0, [p0], #4; MOV r31, el0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2755 "01110000" // /* MW 7 */ + 2756 "00001110" // /* MW 6 */ + 2757 "11110000" // /* MW 5 */ + 2758 "00000011" // /* MW 4 */ + 2759 "00110000" // /* MW 3 */ + 2760 "10000101" // /* MW 2 */ + 2761 "00000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 225 34 first + 2762 "10011000" // LDA eh0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2763 "00001110" // /* MW 3 */ + 2764 "00000100" // /* MW 2 */ + 2765 "00000001" // /* MW 1 */ + 2766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2767 "00000000" // /* MW 1 */ + 2768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2769 "00000000" // /* MW 1 */ + 2770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2771 "00000000" // /* MW 1 */ + 2772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2773 "00000000" // /* MW 1 */ + 2774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2775 "00000000" // /* MW 1 */ + 2776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2777 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 225 32 +.src_ref 2 "reduce_base_c8.h" 318 64 + 2778 "00000010" // ST eh0, [p0]; MOV r25, eh0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2779 "01110000" // /* MW 7 */ + 2780 "10001110" // /* MW 6 */ + 2781 "00110000" // /* MW 5 */ + 2782 "00000011" // /* MW 4 */ + 2783 "00110000" // /* MW 3 */ + 2784 "10000001" // /* MW 2 */ + 2785 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 226 32 first + 2786 "10011000" // LDA r30, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2787 "11010110" // /* MW 3 */ + 2788 "00010111" // /* MW 2 */ + 2789 "00000001" // /* MW 1 */ + 2790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2791 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 first + 2792 "10000100" // JNZ r24, #2912 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=2912 delay_slots=5 */ + 2793 "00000001" // /* MW 5 */ + 2794 "01000000" // /* MW 4 */ + 2795 "10110000" // /* MW 3 */ + 2796 "00000101" // /* MW 2 */ + 2797 "11000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 312 98 first +.delay_slot + 2798 "10011000" // LSHL r19, r28, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2799 "00001101" // /* MW 3 */ + 2800 "00100111" // /* MW 2 */ + 2801 "00010111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 318 64 first +.delay_slot + 2802 "10011000" // MUL r25, r31, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2803 "10011111" // /* MW 3 */ + 2804 "11110011" // /* MW 2 */ + 2805 "00010111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 300 41 first +.src_ref 2 "reduce_base_c8.h" 305 77 first +.delay_slot + 2806 "00100100" // LSHL r20, r20, r5; ADD.NC r5, r17, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2807 "11111111" // /* MW 5 */ + 2808 "10110001" // /* MW 4 */ + 2809 "10110010" // /* MW 3 */ + 2810 "00001011" // /* MW 2 */ + 2811 "10100101" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 280 76 first +.delay_slot + 2812 "10011000" // LSHL r16, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2813 "00001101" // /* MW 3 */ + 2814 "00100001" // /* MW 2 */ + 2815 "00010001" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 226 30 first +.src_ref 2 "reduce_base_c8.h" 318 88 first +.delay_slot + 2816 "01011100" // ST r30, [p0, #4]; MUL r31, r25, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2817 "11011111" // /* MW 5 */ + 2818 "11111111" // /* MW 4 */ + 2819 "00111100" // /* MW 3 */ + 2820 "11111010" // /* MW 2 */ + 2821 "00000010" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2822 "00011000" // MOVX r28, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2823 "00000101" // /* MW 3 */ + 2824 "00111000" // /* MW 2 */ + 2825 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 first + 2826 "10011000" // EQ r28, r2, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2827 "11000111" // /* MW 3 */ + 2828 "10111001" // /* MW 2 */ + 2829 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2830 "10000100" // JNZ r28, #4032 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4032 delay_slots=5 */ + 2831 "00000001" // /* MW 5 */ + 2832 "01000000" // /* MW 4 */ + 2833 "11100000" // /* MW 3 */ + 2834 "00000111" // /* MW 2 */ + 2835 "11100000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2837 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2839 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2840 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2841 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2843 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 242 41 first +.delay_slot + 2844 "00011000" // ADD r22, r3, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2845 "11111111" // /* MW 3 */ + 2846 "11101101" // /* MW 2 */ + 2847 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2848 "00011000" // MOVX r17, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2849 "00001001" // /* MW 3 */ + 2850 "00100010" // /* MW 2 */ + 2851 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 first + 2852 "10011000" // EQ r17, r17, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2853 "00100111" // /* MW 3 */ + 2854 "01100010" // /* MW 2 */ + 2855 "00010100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2856 "10000100" // JNZ r17, #3904 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3904 delay_slots=5 */ + 2857 "00000001" // /* MW 5 */ + 2858 "01000000" // /* MW 4 */ + 2859 "10100000" // /* MW 3 */ + 2860 "00000111" // /* MW 2 */ + 2861 "10001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2863 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2865 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2867 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2869 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 +.delay_slot + 2870 "00011000" // MOVX r7, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2871 "00001101" // /* MW 3 */ + 2872 "00001110" // /* MW 2 */ + 2873 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2874 "10011000" // EQ r2, r7, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2875 "00100111" // /* MW 3 */ + 2876 "11000100" // /* MW 2 */ + 2877 "00010001" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2878 "10000100" // JNZ r2, #3744 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3744 delay_slots=5 */ + 2879 "00000001" // /* MW 5 */ + 2880 "01000000" // /* MW 4 */ + 2881 "01010000" // /* MW 3 */ + 2882 "00000111" // /* MW 2 */ + 2883 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2885 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2887 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2889 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2891 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2892 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2893 "00000000" // /* MW 1 */ + 2894 "10000100" // J #3552 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3552 delay_slots=5 */ + 2895 "00000000" // /* MW 5 */ + 2896 "00000000" // /* MW 4 */ + 2897 "11110000" // /* MW 3 */ + 2898 "00000110" // /* MW 2 */ + 2899 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 329 30 +.delay_slot + 2900 "00011000" // MOVX r26, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2901 "00010001" // /* MW 3 */ + 2902 "00110100" // /* MW 2 */ + 2903 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2905 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2907 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2909 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2911 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_336 +.src_ref 2 "reduce_base_c8.h" 236 8 + 2912 "00011000" // MOVX r29, #5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2913 "00010101" // /* MW 3 */ + 2914 "00111010" // /* MW 2 */ + 2915 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2916 "10011000" // LT r24, r29, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2917 "00101010" // /* MW 3 */ + 2918 "01110000" // /* MW 2 */ + 2919 "00010111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2920 "10000100" // JNZ r24, #3232 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3232 delay_slots=5 */ + 2921 "00000001" // /* MW 5 */ + 2922 "01000000" // /* MW 4 */ + 2923 "01010000" // /* MW 3 */ + 2924 "00000110" // /* MW 2 */ + 2925 "11000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2927 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2929 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2931 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2933 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 +.src_ref 2 "reduce_base_c8.h" 316 38 +.src_ref 2 "reduce_base_c8.h" 329 30 +.delay_slot + 2934 "00011000" // MOVX r26, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2935 "00010001" // /* MW 3 */ + 2936 "00110100" // /* MW 2 */ + 2937 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2938 "10011000" // EQ r17, r26, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2939 "00100111" // /* MW 3 */ + 2940 "10100010" // /* MW 2 */ + 2941 "00010110" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2942 "10000100" // JNZ r17, #3104 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3104 delay_slots=5 */ + 2943 "00000001" // /* MW 5 */ + 2944 "01000000" // /* MW 4 */ + 2945 "00010000" // /* MW 3 */ + 2946 "00000110" // /* MW 2 */ + 2947 "10001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2949 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2951 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2953 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2955 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2957 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2958 "10011000" // NE r2, r29, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2959 "00101000" // /* MW 3 */ + 2960 "01000100" // /* MW 2 */ + 2961 "00010111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2962 "10000100" // JNZ r2, #3552 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3552 delay_slots=5 */ + 2963 "00000001" // /* MW 5 */ + 2964 "01000000" // /* MW 4 */ + 2965 "11110000" // /* MW 3 */ + 2966 "00000110" // /* MW 2 */ + 2967 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2969 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2971 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2973 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2975 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2977 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 286 44 first +.src_ref 2 "reduce_base_c8.h" 289 38 +.src_ref 2 "reduce_base_c8.h" 291 40 +.src_ref 2 "reduce_base_c8.h" 291 40 + 2978 "10111010" // ST.s16 r21, [p4], #2; MOVX r2, #16; MOV m0, #-20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2979 "01011000" // /* MW 9 */ + 2980 "11101100" // /* MW 8 */ + 2981 "00000111" // /* MW 7 */ + 2982 "00001000" // /* MW 6 */ + 2983 "00100010" // /* MW 5 */ + 2984 "00000000" // /* MW 4 */ + 2985 "11100000" // /* MW 3 */ + 2986 "11010110" // /* MW 2 */ + 2987 "10000011" // /* MW 1 */ + 2988 "11111000" // MOV r30, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2989 "10100000" // /* MW 3 */ + 2990 "10011100" // /* MW 2 */ + 2991 "00011111" // /* MW 1 */ + 2992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2993 "00000000" // /* MW 1 */ + 2994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2995 "00000000" // /* MW 1 */ + 2996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2997 "00000000" // /* MW 1 */ + 2998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2999 "00000000" // /* MW 1 */ + 3000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3001 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 287 38 first + 3002 "00011000" // ST.s16 r7, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3003 "11110111" // /* MW 3 */ + 3004 "00011100" // /* MW 2 */ + 3005 "00000100" // /* MW 1 */ + 3006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3007 "00000000" // /* MW 1 */ + 3008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3009 "00000000" // /* MW 1 */ + 3010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3011 "00000000" // /* MW 1 */ + 3012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3013 "00000000" // /* MW 1 */ + 3014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3015 "00000000" // /* MW 1 */ + 3016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3017 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 288 39 first + 3018 "00011000" // ST.s16 r23, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3019 "11110111" // /* MW 3 */ + 3020 "00011110" // /* MW 2 */ + 3021 "00000100" // /* MW 1 */ + 3022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3023 "00000000" // /* MW 1 */ + 3024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3025 "00000000" // /* MW 1 */ + 3026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3027 "00000000" // /* MW 1 */ + 3028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3029 "00000000" // /* MW 1 */ + 3030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3031 "00000000" // /* MW 1 */ + 3032 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3033 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 289 38 first + 3034 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3035 "01010111" // /* MW 3 */ + 3036 "00011100" // /* MW 2 */ + 3037 "00000100" // /* MW 1 */ + 3038 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3039 "00000000" // /* MW 1 */ + 3040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3041 "00000000" // /* MW 1 */ + 3042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3043 "00000000" // /* MW 1 */ + 3044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3045 "00000000" // /* MW 1 */ + 3046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3047 "00000000" // /* MW 1 */ + 3048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3049 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 290 39 first + 3050 "00011000" // ST.s16 r1, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3051 "00110111" // /* MW 3 */ + 3052 "00011100" // /* MW 2 */ + 3053 "00000100" // /* MW 1 */ + 3054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3055 "00000000" // /* MW 1 */ + 3056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3057 "00000000" // /* MW 1 */ + 3058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3059 "00000000" // /* MW 1 */ + 3060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3061 "00000000" // /* MW 1 */ + 3062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3063 "00000000" // /* MW 1 */ + 3064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3065 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 291 40 first + 3066 "00011000" // ST.s16 r2, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3067 "01010111" // /* MW 3 */ + 3068 "00001000" // /* MW 2 */ + 3069 "00000100" // /* MW 1 */ + 3070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3071 "00000000" // /* MW 1 */ + 3072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3073 "00000000" // /* MW 1 */ + 3074 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */ + 3075 "00000000" // /* MW 5 */ + 3076 "00000000" // /* MW 4 */ + 3077 "11101000" // /* MW 3 */ + 3078 "00000110" // /* MW 2 */ + 3079 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3080 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3081 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3083 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3085 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 292 38 first +.delay_slot + 3086 "10011000" // ST r18, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3087 "01010001" // /* MW 3 */ + 3088 "00000110" // /* MW 2 */ + 3089 "00001100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 293 38 first +.delay_slot + 3090 "00101110" // NOPA; ST r6, [p4, #4]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 3091 "00011100" // /* MW 13 */ + 3092 "00000000" // /* MW 12 */ + 3093 "00000000" // /* MW 11 */ + 3094 "01010111" // /* MW 10 */ + 3095 "00011010" // /* MW 9 */ + 3096 "01000000" // /* MW 8 */ + 3097 "00000000" // /* MW 7 */ + 3098 "00000000" // /* MW 6 */ + 3099 "10100011" // /* MW 5 */ + 3100 "00101001" // /* MW 4 */ + 3101 "11111000" // /* MW 3 */ + 3102 "00101100" // /* MW 2 */ + 3103 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_528 +.src_ref 2 "reduce_base_c8.h" 274 44 first +.src_ref 2 "reduce_base_c8.h" 275 40 +.src_ref 2 "reduce_base_c8.h" 275 40 + 3104 "10111010" // ST.s16 r4, [p4], #2; MOVX r6, #-3; MOV r2, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3105 "01011000" // /* MW 9 */ + 3106 "00010000" // /* MW 8 */ + 3107 "01001000" // /* MW 7 */ + 3108 "10101000" // /* MW 6 */ + 3109 "01100111" // /* MW 5 */ + 3110 "00111110" // /* MW 4 */ + 3111 "11100000" // /* MW 3 */ + 3112 "10010010" // /* MW 2 */ + 3113 "10000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 275 40 first +.src_ref 2 "reduce_base_c8.h" 279 40 + 3114 "10111010" // MOVA m0, #-20; MAC r2, r2, r6, r4; MOV r30, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3115 "01111000" // /* MW 9 */ + 3116 "00001110" // /* MW 8 */ + 3117 "11010000" // /* MW 7 */ + 3118 "00110011" // /* MW 6 */ + 3119 "00100010" // /* MW 5 */ + 3120 "00001100" // /* MW 4 */ + 3121 "10000000" // /* MW 3 */ + 3122 "10000000" // /* MW 2 */ + 3123 "11111101" // /* MW 1 */ + 3124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3125 "00000000" // /* MW 1 */ + 3126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3127 "00000000" // /* MW 1 */ + 3128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3129 "00000000" // /* MW 1 */ + 3130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3131 "00000000" // /* MW 1 */ + 3132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3133 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 275 38 + 3134 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3135 "01010111" // /* MW 3 */ + 3136 "00011100" // /* MW 2 */ + 3137 "00000100" // /* MW 1 */ + 3138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3139 "00000000" // /* MW 1 */ + 3140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3141 "00000000" // /* MW 1 */ + 3142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3143 "00000000" // /* MW 1 */ + 3144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3145 "00000000" // /* MW 1 */ + 3146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3147 "00000000" // /* MW 1 */ + 3148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3149 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 276 39 first + 3150 "00011000" // ST.s16 r23, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3151 "11110111" // /* MW 3 */ + 3152 "00011110" // /* MW 2 */ + 3153 "00000100" // /* MW 1 */ + 3154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3155 "00000000" // /* MW 1 */ + 3156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3157 "00000000" // /* MW 1 */ + 3158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3159 "00000000" // /* MW 1 */ + 3160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3161 "00000000" // /* MW 1 */ + 3162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3163 "00000000" // /* MW 1 */ + 3164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3165 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 275 38 first +.src_ref 2 "reduce_base_c8.h" 277 38 first + 3166 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3167 "01010111" // /* MW 3 */ + 3168 "00011100" // /* MW 2 */ + 3169 "00000100" // /* MW 1 */ + 3170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3171 "00000000" // /* MW 1 */ + 3172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3173 "00000000" // /* MW 1 */ + 3174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3175 "00000000" // /* MW 1 */ + 3176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3177 "00000000" // /* MW 1 */ + 3178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3179 "00000000" // /* MW 1 */ + 3180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3181 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 278 39 first + 3182 "00011000" // ST.s16 r5, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3183 "10110111" // /* MW 3 */ + 3184 "00011100" // /* MW 2 */ + 3185 "00000100" // /* MW 1 */ + 3186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3187 "00000000" // /* MW 1 */ + 3188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3189 "00000000" // /* MW 1 */ + 3190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3191 "00000000" // /* MW 1 */ + 3192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3193 "00000000" // /* MW 1 */ + 3194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3195 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3197 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 279 40 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3198 "00011000" // ST.s16 r1, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3199 "00110111" // /* MW 3 */ + 3200 "00001000" // /* MW 2 */ + 3201 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3203 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3205 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3206 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */ + 3207 "00000000" // /* MW 5 */ + 3208 "00000000" // /* MW 4 */ + 3209 "11101000" // /* MW 3 */ + 3210 "00000110" // /* MW 2 */ + 3211 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 279 40 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3212 "00011000" // MOVX r1, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3213 "01000001" // /* MW 3 */ + 3214 "00000010" // /* MW 2 */ + 3215 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3216 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3217 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3219 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 281 38 first +.delay_slot + 3220 "10011000" // ST r3, [p4, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3221 "01110001" // /* MW 3 */ + 3222 "00010100" // /* MW 2 */ + 3223 "00001100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 280 38 first +.delay_slot + 3224 "00000010" // ST r16, [p4]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3225 "01110000" // /* MW 7 */ + 3226 "10100101" // /* MW 6 */ + 3227 "00000001" // /* MW 5 */ + 3228 "00000000" // /* MW 4 */ + 3229 "00110000" // /* MW 3 */ + 3230 "11000010" // /* MW 2 */ + 3231 "10000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_656 +.src_ref 2 "reduce_base_c8.h" 236 8 +.src_ref 2 "reduce_base_c8.h" 301 40 +.src_ref 2 "reduce_base_c8.h" 302 76 + 3232 "00101100" // LDA r3, [sp, #-4]; MOVX r4, #6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3233 "00110010" // /* MW 5 */ + 3234 "00010000" // /* MW 4 */ + 3235 "00100000" // /* MW 3 */ + 3236 "10001110" // /* MW 2 */ + 3237 "11111111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 first + 3238 "10011000" // EQ r4, r2, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3239 "01000111" // /* MW 3 */ + 3240 "10001000" // /* MW 2 */ + 3241 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 3242 "10000100" // JNZ r4, #3408 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3408 delay_slots=5 */ + 3243 "00000001" // /* MW 5 */ + 3244 "01000000" // /* MW 4 */ + 3245 "10101000" // /* MW 3 */ + 3246 "00000110" // /* MW 2 */ + 3247 "00100000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 298 44 +.src_ref 2 "reduce_base_c8.h" 303 40 +.src_ref 2 "reduce_base_c8.h" 310 44 +.src_ref 2 "reduce_base_c8.h" 311 38 +.delay_slot + 3248 "00011000" // MOVX r1, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3249 "01000001" // /* MW 3 */ + 3250 "00000010" // /* MW 2 */ + 3251 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3253 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3255 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3256 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3257 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3259 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 3260 "00011000" // MOVX r3, #7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3261 "00011101" // /* MW 3 */ + 3262 "00000110" // /* MW 2 */ + 3263 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 3264 "10011000" // NE r2, r3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3265 "00101000" // /* MW 3 */ + 3266 "11000100" // /* MW 2 */ + 3267 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 3268 "10000100" // JNZ r2, #3552 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3552 delay_slots=5 */ + 3269 "00000001" // /* MW 5 */ + 3270 "01000000" // /* MW 4 */ + 3271 "11110000" // /* MW 3 */ + 3272 "00000110" // /* MW 2 */ + 3273 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3275 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3277 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3279 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3281 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3283 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 310 44 first +.src_ref 2 "reduce_base_c8.h" 312 41 first +.src_ref 2 "reduce_base_c8.h" 315 40 + 3284 "10111010" // ST.s16 r1, [p4], #2; ADD r2, r19, #-1; MOV m0, #-20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3285 "01011000" // /* MW 9 */ + 3286 "11101100" // /* MW 8 */ + 3287 "00000111" // /* MW 7 */ + 3288 "11111000" // /* MW 6 */ + 3289 "00101111" // /* MW 5 */ + 3290 "00100110" // /* MW 4 */ + 3291 "11100000" // /* MW 3 */ + 3292 "10000110" // /* MW 2 */ + 3293 "10000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 313 38 +.src_ref 2 "reduce_base_c8.h" 317 97 + 3294 "10111010" // MOVA r3, #-6; MOVXM dj0, #65536 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3295 "00010000" // /* MW 9 */ + 3296 "00000000" // /* MW 8 */ + 3297 "01000000" // /* MW 7 */ + 3298 "01000000" // /* MW 6 */ + 3299 "00000000" // /* MW 5 */ + 3300 "00000000" // /* MW 4 */ + 3301 "00000000" // /* MW 3 */ + 3302 "01000011" // /* MW 2 */ + 3303 "11111111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 315 40 +.src_ref 2 "reduce_base_c8.h" 317 97 first + 3304 "01100100" // LSHL r3, r28, r3; MOV r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3305 "00000001" // /* MW 5 */ + 3306 "00100000" // /* MW 4 */ + 3307 "10111100" // /* MW 3 */ + 3308 "11000111" // /* MW 2 */ + 3309 "11100000" // /* MW 1 */ + 3310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3311 "00000000" // /* MW 1 */ + 3312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3313 "00000000" // /* MW 1 */ + 3314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3315 "00000000" // /* MW 1 */ + 3316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3317 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 311 38 first + 3318 "00011000" // ST.s16 r1, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3319 "00110111" // /* MW 3 */ + 3320 "00011100" // /* MW 2 */ + 3321 "00000100" // /* MW 1 */ + 3322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3323 "00000000" // /* MW 1 */ + 3324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3325 "00000000" // /* MW 1 */ + 3326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3327 "00000000" // /* MW 1 */ + 3328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3329 "00000000" // /* MW 1 */ + 3330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3331 "00000000" // /* MW 1 */ + 3332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3333 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 312 39 first + 3334 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3335 "01010111" // /* MW 3 */ + 3336 "00011100" // /* MW 2 */ + 3337 "00000100" // /* MW 1 */ + 3338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3339 "00000000" // /* MW 1 */ + 3340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3341 "00000000" // /* MW 1 */ + 3342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3343 "00000000" // /* MW 1 */ + 3344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3345 "00000000" // /* MW 1 */ + 3346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3347 "00000000" // /* MW 1 */ + 3348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3349 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 313 38 first + 3350 "10011000" // ST dj0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3351 "01000001" // /* MW 3 */ + 3352 "00011100" // /* MW 2 */ + 3353 "00001100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 315 40 first + 3354 "00011000" // ST.s16 r24, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3355 "00010111" // /* MW 3 */ + 3356 "00001011" // /* MW 2 */ + 3357 "00000100" // /* MW 1 */ + 3358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3359 "00000000" // /* MW 1 */ + 3360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3361 "00000000" // /* MW 1 */ + 3362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3363 "00000000" // /* MW 1 */ + 3364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3365 "00000000" // /* MW 1 */ + 3366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3367 "00000000" // /* MW 1 */ + 3368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3369 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 316 38 first + 3370 "10011000" // ST r26, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3371 "01010001" // /* MW 3 */ + 3372 "00000111" // /* MW 2 */ + 3373 "00001100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 317 38 first + 3374 "10011000" // ST r3, [p4, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3375 "01110001" // /* MW 3 */ + 3376 "00010100" // /* MW 2 */ + 3377 "00001100" // /* MW 1 */ + 3378 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */ + 3379 "00000000" // /* MW 5 */ + 3380 "00000000" // /* MW 4 */ + 3381 "11101000" // /* MW 3 */ + 3382 "00000110" // /* MW 2 */ + 3383 "00000000" // /* MW 1 */ +.delay_slot + 3384 "11111000" // MOV r30, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3385 "10100000" // /* MW 3 */ + 3386 "10011111" // /* MW 2 */ + 3387 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3389 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3391 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3393 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3394 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 3395 "00011100" // /* MW 13 */ + 3396 "00000000" // /* MW 12 */ + 3397 "00000000" // /* MW 11 */ + 3398 "01010111" // /* MW 10 */ + 3399 "00011010" // /* MW 9 */ + 3400 "01000000" // /* MW 8 */ + 3401 "00000000" // /* MW 7 */ + 3402 "00000000" // /* MW 6 */ + 3403 "10110110" // /* MW 5 */ + 3404 "00000010" // /* MW 4 */ + 3405 "11110000" // /* MW 3 */ + 3406 "00101100" // /* MW 2 */ + 3407 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_832 +.src_ref 2 "reduce_base_c8.h" 298 44 first +.src_ref 2 "reduce_base_c8.h" 301 40 +.src_ref 2 "reduce_base_c8.h" 301 40 first + 3408 "10111010" // ST.s16 r1, [p4], #2; MSC r2, r2, r3, r22; MOV r2, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3409 "01011000" // /* MW 9 */ + 3410 "00010000" // /* MW 8 */ + 3411 "01001000" // /* MW 7 */ + 3412 "01110000" // /* MW 6 */ + 3413 "00101011" // /* MW 5 */ + 3414 "00000110" // /* MW 4 */ + 3415 "11100000" // /* MW 3 */ + 3416 "10000110" // /* MW 2 */ + 3417 "10000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 302 76 +.src_ref 2 "reduce_base_c8.h" 303 40 +.src_ref 2 "reduce_base_c8.h" 306 62 + 3418 "10111010" // MOVA m0, #-20; MOVX r4, #-3; MOV r6, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3419 "01111000" // /* MW 9 */ + 3420 "00001110" // /* MW 8 */ + 3421 "11010000" // /* MW 7 */ + 3422 "10101000" // /* MW 6 */ + 3423 "01000111" // /* MW 5 */ + 3424 "00111110" // /* MW 4 */ + 3425 "10000000" // /* MW 3 */ + 3426 "10000000" // /* MW 2 */ + 3427 "11111101" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 302 76 first + 3428 "10011000" // LSHL r4, r3, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3429 "01001101" // /* MW 3 */ + 3430 "11001000" // /* MW 2 */ + 3431 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 302 41 +.src_ref 2 "reduce_base_c8.h" 306 62 first + 3432 "00100100" // MUL r30, r30, r6; ADD.NC r3, r4, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3433 "11111111" // /* MW 5 */ + 3434 "10100100" // /* MW 4 */ + 3435 "11110001" // /* MW 3 */ + 3436 "10001101" // /* MW 2 */ + 3437 "11110111" // /* MW 1 */ + 3438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3439 "00000000" // /* MW 1 */ + 3440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3441 "00000000" // /* MW 1 */ + 3442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3443 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 299 38 first + 3444 "00011000" // ST.s16 r27, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3445 "01110111" // /* MW 3 */ + 3446 "00011111" // /* MW 2 */ + 3447 "00000100" // /* MW 1 */ + 3448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3449 "00000000" // /* MW 1 */ + 3450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3451 "00000000" // /* MW 1 */ + 3452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3453 "00000000" // /* MW 1 */ + 3454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3455 "00000000" // /* MW 1 */ + 3456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3457 "00000000" // /* MW 1 */ + 3458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3459 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 300 39 first + 3460 "00011000" // ST.s16 r5, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3461 "10110111" // /* MW 3 */ + 3462 "00011100" // /* MW 2 */ + 3463 "00000100" // /* MW 1 */ + 3464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3465 "00000000" // /* MW 1 */ + 3466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3467 "00000000" // /* MW 1 */ + 3468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3469 "00000000" // /* MW 1 */ + 3470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3471 "00000000" // /* MW 1 */ + 3472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3473 "00000000" // /* MW 1 */ + 3474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3475 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 301 38 first + 3476 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3477 "01010111" // /* MW 3 */ + 3478 "00011100" // /* MW 2 */ + 3479 "00000100" // /* MW 1 */ + 3480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3481 "00000000" // /* MW 1 */ + 3482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3483 "00000000" // /* MW 1 */ + 3484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3485 "00000000" // /* MW 1 */ + 3486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3487 "00000000" // /* MW 1 */ + 3488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3489 "00000000" // /* MW 1 */ + 3490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3491 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 302 39 first + 3492 "00011000" // ST.s16 r3, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3493 "01110111" // /* MW 3 */ + 3494 "00011100" // /* MW 2 */ + 3495 "00000100" // /* MW 1 */ + 3496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3497 "00000000" // /* MW 1 */ + 3498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3499 "00000000" // /* MW 1 */ + 3500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3501 "00000000" // /* MW 1 */ + 3502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3503 "00000000" // /* MW 1 */ + 3504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3505 "00000000" // /* MW 1 */ + 3506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3507 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 303 40 first + 3508 "00011000" // ST.s16 r1, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3509 "00110111" // /* MW 3 */ + 3510 "00001000" // /* MW 2 */ + 3511 "00000100" // /* MW 1 */ + 3512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3513 "00000000" // /* MW 1 */ + 3514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3515 "00000000" // /* MW 1 */ + 3516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3517 "00000000" // /* MW 1 */ + 3518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3519 "00000000" // /* MW 1 */ + 3520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3521 "00000000" // /* MW 1 */ + 3522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3523 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 304 38 first + 3524 "10011000" // ST r17, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3525 "00110001" // /* MW 3 */ + 3526 "00000110" // /* MW 2 */ + 3527 "00001100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 305 38 first + 3528 "00000010" // ST r20, [p4, #4]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3529 "01110000" // /* MW 7 */ + 3530 "10100101" // /* MW 6 */ + 3531 "00000001" // /* MW 5 */ + 3532 "00000000" // /* MW 4 */ + 3533 "00110000" // /* MW 3 */ + 3534 "11010010" // /* MW 2 */ + 3535 "10000010" // /* MW 1 */ +.label __ll42__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv + 3536 "10111000" // MOV dj0, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3537 "01000000" // /* MW 3 */ + 3538 "10000000" // /* MW 2 */ + 3539 "00011000" // /* MW 1 */ + 3540 "00110110" // ST.s16 r30, [p3, dj0]; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3541 "10000001" // /* MW 11 */ + 3542 "10101101" // /* MW 10 */ + 3543 "00000000" // /* MW 9 */ + 3544 "00000000" // /* MW 8 */ + 3545 "00000000" // /* MW 7 */ + 3546 "00000000" // /* MW 6 */ + 3547 "00100000" // /* MW 5 */ + 3548 "00000000" // /* MW 4 */ + 3549 "11100000" // /* MW 3 */ + 3550 "01111010" // /* MW 2 */ + 3551 "01100000" // /* MW 1 */ +.label __ll70__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv +.src_ref 2 "reduce_base_c8.h" 326 79 first +.src_ref 2 "reduce_base_c8.h" 329 51 + 3552 "00010100" // MOVA m2, #24; ADD.NC p0, r0, #30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3553 "00011110" // /* MW 5 */ + 3554 "11000000" // /* MW 4 */ + 3555 "10000000" // /* MW 3 */ + 3556 "00001000" // /* MW 2 */ + 3557 "00000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 329 30 +.src_ref 2 "reduce_base_c8.h" 330 26 +.src_ref 3 "reduce_mean_c8_impl.h" 139 51 first + 3558 "10111010" // LDA r2, [p2], #4; MOVX r0, #16; MOV m0, #-30 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3559 "01011000" // /* MW 9 */ + 3560 "11100010" // /* MW 8 */ + 3561 "00000111" // /* MW 7 */ + 3562 "00001000" // /* MW 6 */ + 3563 "00000010" // /* MW 5 */ + 3564 "00000000" // /* MW 4 */ + 3565 "11010000" // /* MW 3 */ + 3566 "10001010" // /* MW 2 */ + 3567 "01000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 331 24 +.src_ref 3 "reduce_mean_c8_impl.h" 140 34 first + 3568 "01010100" // LDA.s16 r3, [p2]; MOV m1, #38 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3569 "10011001" // /* MW 5 */ + 3570 "00000000" // /* MW 4 */ + 3571 "01010010" // /* MW 3 */ + 3572 "10001110" // /* MW 2 */ + 3573 "01000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 141 49 first + 3574 "10011000" // LDA r1, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3575 "00110110" // /* MW 3 */ + 3576 "00010100" // /* MW 2 */ + 3577 "00000010" // /* MW 1 */ + 3578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3579 "00000000" // /* MW 1 */ + 3580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3581 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 326 28 first + 3582 "00011000" // ST.s16 r31, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3583 "11110111" // /* MW 3 */ + 3584 "00101111" // /* MW 2 */ + 3585 "00000000" // /* MW 1 */ + 3586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3587 "00000000" // /* MW 1 */ + 3588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3589 "00000000" // /* MW 1 */ + 3590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3591 "00000000" // /* MW 1 */ + 3592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3593 "00000000" // /* MW 1 */ + 3594 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3595 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 3596 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3597 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 327 31 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 3598 "00011000" // ST.s16 r24, [p0], #10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3599 "00010111" // /* MW 3 */ + 3600 "01011111" // /* MW 2 */ + 3601 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3603 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3605 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3607 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3608 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3609 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 327 31 +.src_ref 2 "reduce_base_c8.h" 328 23 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3610 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3611 "00000001" // /* MW 3 */ + 3612 "00110000" // /* MW 2 */ + 3613 "00010000" // /* MW 1 */ + 3614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3615 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 328 23 first + 3616 "00011000" // ST.s16 r24, [p0], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3617 "00010111" // /* MW 3 */ + 3618 "11001111" // /* MW 2 */ + 3619 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 329 51 first + 3620 "10011000" // LDA.u16 r4, [p0], m2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3621 "10011010" // /* MW 3 */ + 3622 "01001000" // /* MW 2 */ + 3623 "00000000" // /* MW 1 */ + 3624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3625 "00000000" // /* MW 1 */ + 3626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3627 "00000000" // /* MW 1 */ + 3628 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3629 "00000000" // /* MW 1 */ + 3630 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3631 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 3632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3633 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 329 28 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 3634 "00011000" // ST.s16 r0, [p0], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3635 "00010111" // /* MW 3 */ + 3636 "11111100" // /* MW 2 */ + 3637 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 329 30 +.src_ref 2 "reduce_base_c8.h" 330 28 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3638 "00100100" // LSHL r4, r4, r26; ADD.NC r5, r4, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3639 "11111111" // /* MW 5 */ + 3640 "10100100" // /* MW 4 */ + 3641 "10110010" // /* MW 3 */ + 3642 "00110101" // /* MW 2 */ + 3643 "00100001" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 329 30 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3644 "10011000" // SUB r0, r0, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3645 "01000001" // /* MW 3 */ + 3646 "00000000" // /* MW 2 */ + 3647 "00010000" // /* MW 1 */ + 3648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3649 "00000000" // /* MW 1 */ + 3650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3651 "00000000" // /* MW 1 */ + 3652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3653 "00000000" // /* MW 1 */ + 3654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3655 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 330 26 first + 3656 "00011000" // ST.s16 r5, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3657 "10110111" // /* MW 3 */ + 3658 "00001000" // /* MW 2 */ + 3659 "00000000" // /* MW 1 */ + 3660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3661 "00000000" // /* MW 1 */ + 3662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3663 "00000000" // /* MW 1 */ + 3664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3665 "00000000" // /* MW 1 */ + 3666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3667 "00000000" // /* MW 1 */ + 3668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3669 "00000000" // /* MW 1 */ + 3670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3671 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 331 24 first + 3672 "00011000" // ST.s16 r19, [p0], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3673 "01110111" // /* MW 3 */ + 3674 "00101010" // /* MW 2 */ + 3675 "00000000" // /* MW 1 */ + 3676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3677 "00000000" // /* MW 1 */ + 3678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3679 "00000000" // /* MW 1 */ + 3680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3681 "00000000" // /* MW 1 */ + 3682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3683 "00000000" // /* MW 1 */ + 3684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3685 "00000000" // /* MW 1 */ + 3686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3687 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 139 40 first + 3688 "00011000" // ST.s8 r2, [p0], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3689 "01000111" // /* MW 3 */ + 3690 "11101100" // /* MW 2 */ + 3691 "00000000" // /* MW 1 */ + 3692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3693 "00000000" // /* MW 1 */ + 3694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3695 "00000000" // /* MW 1 */ + 3696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3697 "00000000" // /* MW 1 */ + 3698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3699 "00000000" // /* MW 1 */ + 3700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3701 "00000000" // /* MW 1 */ + 3702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3703 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 140 34 first + 3704 "00011000" // ST.s16 r3, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3705 "01110111" // /* MW 3 */ + 3706 "00000100" // /* MW 2 */ + 3707 "00000000" // /* MW 1 */ + 3708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3709 "00000000" // /* MW 1 */ + 3710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3711 "00000000" // /* MW 1 */ + 3712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3713 "00000000" // /* MW 1 */ + 3714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3715 "00000000" // /* MW 1 */ + 3716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3717 "00000000" // /* MW 1 */ + 3718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3719 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 141 38 first + 3720 "00011000" // ST.s8 r1, [p0, #-2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3721 "00100111" // /* MW 3 */ + 3722 "11100100" // /* MW 2 */ + 3723 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 349 4 first + 3724 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3725 "00000000" // /* MW 3 */ + 3726 "00101000" // /* MW 2 */ + 3727 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 349 4 +.delay_slot + 3728 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3729 "00000001" // /* MW 5 */ + 3730 "00000000" // /* MW 4 */ + 3731 "00000000" // /* MW 3 */ + 3732 "11111000" // /* MW 2 */ + 3733 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3735 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3736 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3737 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3738 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3739 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3740 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3741 "01100111" // /* MW 3 */ + 3742 "00000001" // /* MW 2 */ + 3743 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_1168 +.src_ref 2 "reduce_base_c8.h" 262 44 first +.src_ref 2 "reduce_base_c8.h" 263 77 + 3744 "10111010" // ST.s16 r21, [p4], #2; MOVXM r5, #65512 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3745 "00010000" // /* MW 9 */ + 3746 "11110100" // /* MW 8 */ + 3747 "10101111" // /* MW 7 */ + 3748 "00111100" // /* MW 6 */ + 3749 "00000000" // /* MW 5 */ + 3750 "00000000" // /* MW 4 */ + 3751 "11100000" // /* MW 3 */ + 3752 "11010110" // /* MW 2 */ + 3753 "10000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 263 56 +.src_ref 2 "reduce_base_c8.h" 263 77 first +.src_ref 2 "reduce_base_c8.h" 267 40 + 3754 "10111010" // LDA r2, [sp, #-4]; ADD r7, r5, r26; MOV m0, #-20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3755 "01011000" // /* MW 9 */ + 3756 "11101100" // /* MW 8 */ + 3757 "00000111" // /* MW 7 */ + 3758 "00000100" // /* MW 6 */ + 3759 "01111101" // /* MW 5 */ + 3760 "00001010" // /* MW 4 */ + 3761 "00100000" // /* MW 3 */ + 3762 "10001010" // /* MW 2 */ + 3763 "11111111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 265 118 +.src_ref 2 "reduce_base_c8.h" 329 30 + 3764 "10111010" // MOVA r26, #4; MOVXM r6, #65535 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3765 "10010000" // /* MW 9 */ + 3766 "11111111" // /* MW 8 */ + 3767 "11001111" // /* MW 7 */ + 3768 "00111100" // /* MW 6 */ + 3769 "00000000" // /* MW 5 */ + 3770 "00000000" // /* MW 4 */ + 3771 "00000000" // /* MW 3 */ + 3772 "10011010" // /* MW 2 */ + 3773 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 265 118 first + 3774 "10011000" // ADD r17, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3775 "01100000" // /* MW 3 */ + 3776 "11100010" // /* MW 2 */ + 3777 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 265 98 +.src_ref 2 "reduce_base_c8.h" 267 116 first + 3778 "00011000" // MAC r29, r29, r17, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3779 "01000110" // /* MW 3 */ + 3780 "01111010" // /* MW 2 */ + 3781 "00010100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 265 60 first +.src_ref 2 "reduce_base_c8.h" 265 98 first + 3782 "00011000" // MSC r21, r21, r17, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3783 "01001110" // /* MW 3 */ + 3784 "01101010" // /* MW 2 */ + 3785 "00010100" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 3786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3787 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 263 38 first +.aggressive_scheduled_block_id 4 +.noswbrkpt + 3788 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3789 "01010111" // /* MW 3 */ + 3790 "00011100" // /* MW 2 */ + 3791 "00000100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 263 56 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3792 "10011000" // MUL r2, r7, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3793 "00101111" // /* MW 3 */ + 3794 "11000100" // /* MW 2 */ + 3795 "00010001" // /* MW 1 */ + 3796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3797 "00000000" // /* MW 1 */ + 3798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3799 "00000000" // /* MW 1 */ + 3800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3801 "00000000" // /* MW 1 */ + 3802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3803 "00000000" // /* MW 1 */ + 3804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3805 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 264 39 first + 3806 "00011000" // ST.s16 r22, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3807 "11010111" // /* MW 3 */ + 3808 "00011110" // /* MW 2 */ + 3809 "00000100" // /* MW 1 */ + 3810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3811 "00000000" // /* MW 1 */ + 3812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3813 "00000000" // /* MW 1 */ + 3814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3815 "00000000" // /* MW 1 */ + 3816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3817 "00000000" // /* MW 1 */ + 3818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3819 "00000000" // /* MW 1 */ + 3820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3821 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 265 38 first + 3822 "00011000" // ST.s16 r21, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3823 "10110111" // /* MW 3 */ + 3824 "00011110" // /* MW 2 */ + 3825 "00000100" // /* MW 1 */ + 3826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3827 "00000000" // /* MW 1 */ + 3828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3829 "00000000" // /* MW 1 */ + 3830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3831 "00000000" // /* MW 1 */ + 3832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3833 "00000000" // /* MW 1 */ + 3834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3835 "00000000" // /* MW 1 */ + 3836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3837 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 266 39 first + 3838 "00011000" // ST.s16 r1, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3839 "00110111" // /* MW 3 */ + 3840 "00011100" // /* MW 2 */ + 3841 "00000100" // /* MW 1 */ + 3842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3843 "00000000" // /* MW 1 */ + 3844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3845 "00000000" // /* MW 1 */ + 3846 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3847 "00000000" // /* MW 1 */ + 3848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3849 "00000000" // /* MW 1 */ + 3850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3851 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 3852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3853 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 267 40 first +.aggressive_scheduled_block_id 5 +.noswbrkpt + 3854 "00011000" // ST.s16 r2, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3855 "01010111" // /* MW 3 */ + 3856 "00001000" // /* MW 2 */ + 3857 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3859 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3861 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3862 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */ + 3863 "00000000" // /* MW 5 */ + 3864 "00000000" // /* MW 4 */ + 3865 "11101000" // /* MW 3 */ + 3866 "00000110" // /* MW 2 */ + 3867 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 267 42 +.delay_slot +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3868 "00011000" // MOVX r5, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3869 "01000001" // /* MW 3 */ + 3870 "00001010" // /* MW 2 */ + 3871 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 267 42 +.delay_slot +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3872 "10011000" // SUB r2, r5, r29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3873 "11010001" // /* MW 3 */ + 3874 "01000101" // /* MW 2 */ + 3875 "00010001" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 270 64 +.delay_slot + 3876 "11111000" // MOV r6, eh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3877 "00011100" // /* MW 3 */ + 3878 "10100001" // /* MW 2 */ + 3879 "00011001" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 268 38 first +.delay_slot + 3880 "00000010" // ST r3, [p4]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3881 "01110000" // /* MW 7 */ + 3882 "10100101" // /* MW 6 */ + 3883 "00000001" // /* MW 5 */ + 3884 "00000000" // /* MW 4 */ + 3885 "00110000" // /* MW 3 */ + 3886 "10001110" // /* MW 2 */ + 3887 "10000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 269 38 first +.src_ref 2 "reduce_base_c8.h" 270 64 first +.delay_slot + 3888 "11100001" // NOPA; NOPB; ST r16, [p4, #4]; MUL r30, r30, r6; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3889 "00000000" // /* MW 15 */ + 3890 "00000000" // /* MW 14 */ + 3891 "01111000" // /* MW 13 */ + 3892 "10100101" // /* MW 12 */ + 3893 "00000001" // /* MW 11 */ + 3894 "01111100" // /* MW 10 */ + 3895 "11100011" // /* MW 9 */ + 3896 "10111101" // /* MW 8 */ + 3897 "00010001" // /* MW 7 */ + 3898 "00010110" // /* MW 6 */ + 3899 "00100100" // /* MW 5 */ + 3900 "00000000" // /* MW 4 */ + 3901 "11110000" // /* MW 3 */ + 3902 "00101100" // /* MW 2 */ + 3903 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_1328 +.src_ref 2 "reduce_base_c8.h" 250 44 +.src_ref 2 "reduce_base_c8.h" 250 44 first +.src_ref 2 "reduce_base_c8.h" 255 40 + 3904 "10111010" // ST.s16 r4, [p4], #2; MOVX r4, #16; MOV m0, #-20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3905 "01011000" // /* MW 9 */ + 3906 "11101100" // /* MW 8 */ + 3907 "00000111" // /* MW 7 */ + 3908 "00001000" // /* MW 6 */ + 3909 "01000010" // /* MW 5 */ + 3910 "00000000" // /* MW 4 */ + 3911 "11100000" // /* MW 3 */ + 3912 "10010010" // /* MW 2 */ + 3913 "10000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 255 42 +.src_ref 2 "reduce_base_c8.h" 255 113 +.src_ref 2 "reduce_base_c8.h" 255 113 +.src_ref 2 "reduce_base_c8.h" 255 113 first + 3914 "10111010" // LDA r1, [sp, #-4]; MSC r2, r2, r3, r26; MOV r2, #8 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3915 "01011000" // /* MW 9 */ + 3916 "00001000" // /* MW 8 */ + 3917 "01001000" // /* MW 7 */ + 3918 "01110000" // /* MW 6 */ + 3919 "00101101" // /* MW 5 */ + 3920 "00000110" // /* MW 4 */ + 3921 "00100000" // /* MW 3 */ + 3922 "10000110" // /* MW 2 */ + 3923 "11111111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 255 42 +.src_ref 2 "reduce_base_c8.h" 255 113 +.src_ref 2 "reduce_base_c8.h" 329 30 + 3924 "01100100" // MOVX r3, #16; MOV r26, #4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3925 "00010001" // /* MW 5 */ + 3926 "00100000" // /* MW 4 */ + 3927 "00101101" // /* MW 3 */ + 3928 "11001000" // /* MW 2 */ + 3929 "00000000" // /* MW 1 */ + 3930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3931 "00000000" // /* MW 1 */ + 3932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3933 "00000000" // /* MW 1 */ + 3934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3935 "00000000" // /* MW 1 */ + 3936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3937 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 251 38 first + 3938 "00011000" // ST.s16 r27, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3939 "01110111" // /* MW 3 */ + 3940 "00011111" // /* MW 2 */ + 3941 "00000100" // /* MW 1 */ + 3942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3943 "00000000" // /* MW 1 */ + 3944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3945 "00000000" // /* MW 1 */ + 3946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3947 "00000000" // /* MW 1 */ + 3948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3949 "00000000" // /* MW 1 */ + 3950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3951 "00000000" // /* MW 1 */ + 3952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3953 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 252 39 first + 3954 "00011000" // ST.s16 r5, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3955 "10110111" // /* MW 3 */ + 3956 "00011100" // /* MW 2 */ + 3957 "00000100" // /* MW 1 */ + 3958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3959 "00000000" // /* MW 1 */ + 3960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3961 "00000000" // /* MW 1 */ + 3962 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3963 "00000000" // /* MW 1 */ + 3964 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3965 "00000000" // /* MW 1 */ + 3966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3967 "00000000" // /* MW 1 */ + 3968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3969 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 253 38 first + 3970 "00011000" // ST.s16 r27, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3971 "01110111" // /* MW 3 */ + 3972 "00011111" // /* MW 2 */ + 3973 "00000100" // /* MW 1 */ + 3974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3975 "00000000" // /* MW 1 */ + 3976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3977 "00000000" // /* MW 1 */ + 3978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3979 "00000000" // /* MW 1 */ + 3980 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3981 "00000000" // /* MW 1 */ + 3982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3983 "00000000" // /* MW 1 */ + 3984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3985 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 254 39 first + 3986 "00011000" // ST.s16 r22, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3987 "11010111" // /* MW 3 */ + 3988 "00011110" // /* MW 2 */ + 3989 "00000100" // /* MW 1 */ + 3990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3991 "00000000" // /* MW 1 */ + 3992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3993 "00000000" // /* MW 1 */ + 3994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3995 "00000000" // /* MW 1 */ + 3996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3997 "00000000" // /* MW 1 */ + 3998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3999 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 4000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4001 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 255 40 first +.aggressive_scheduled_block_id 6 +.noswbrkpt + 4002 "00011000" // ST.s16 r3, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4003 "01110111" // /* MW 3 */ + 4004 "00001000" // /* MW 2 */ + 4005 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 4006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4007 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 4008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4009 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 4010 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */ + 4011 "00000000" // /* MW 5 */ + 4012 "00000000" // /* MW 4 */ + 4013 "11101000" // /* MW 3 */ + 4014 "00000110" // /* MW 2 */ + 4015 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 255 42 +.src_ref 2 "reduce_base_c8.h" 255 113 +.delay_slot +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4016 "00011000" // MAC r3, r3, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4017 "00100110" // /* MW 3 */ + 4018 "01000110" // /* MW 2 */ + 4019 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4021 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4023 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 256 38 first +.delay_slot + 4024 "10011000" // ST r6, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4025 "11010001" // /* MW 3 */ + 4026 "00000100" // /* MW 2 */ + 4027 "00001100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 257 38 first +.delay_slot + 4028 "10011000" // ST r18, [p4, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4029 "01010001" // /* MW 3 */ + 4030 "00010110" // /* MW 2 */ + 4031 "00001100" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_1456 +.src_ref 2 "reduce_base_c8.h" 238 44 first + 4032 "00011000" // ST.s16 r21, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4033 "10110111" // /* MW 3 */ + 4034 "00011110" // /* MW 2 */ + 4035 "00000100" // /* MW 1 */ + 4036 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4037 "00000000" // /* MW 1 */ + 4038 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4039 "00000000" // /* MW 1 */ + 4040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4041 "00000000" // /* MW 1 */ + 4042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4043 "00000000" // /* MW 1 */ + 4044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4045 "00000000" // /* MW 1 */ + 4046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4047 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 239 38 first + 4048 "00011000" // ST.s16 r7, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4049 "11110111" // /* MW 3 */ + 4050 "00011100" // /* MW 2 */ + 4051 "00000100" // /* MW 1 */ + 4052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4053 "00000000" // /* MW 1 */ + 4054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4055 "00000000" // /* MW 1 */ + 4056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4057 "00000000" // /* MW 1 */ + 4058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4059 "00000000" // /* MW 1 */ + 4060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4061 "00000000" // /* MW 1 */ + 4062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4063 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 240 39 first + 4064 "00011000" // ST.s16 r23, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4065 "11110111" // /* MW 3 */ + 4066 "00011110" // /* MW 2 */ + 4067 "00000100" // /* MW 1 */ + 4068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4069 "00000000" // /* MW 1 */ + 4070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4071 "00000000" // /* MW 1 */ + 4072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4073 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 241 40 +.src_ref 2 "reduce_base_c8.h" 241 94 + 4074 "00011000" // LDA r3, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4075 "01110001" // /* MW 3 */ + 4076 "11111100" // /* MW 2 */ + 4077 "00000111" // /* MW 1 */ + 4078 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4079 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 4080 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4081 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 241 38 first +.aggressive_scheduled_block_id 7 +.noswbrkpt + 4082 "00011000" // ST.s16 r1, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4083 "00110111" // /* MW 3 */ + 4084 "00011100" // /* MW 2 */ + 4085 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 4086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4087 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 241 94 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 4088 "01000100" // MOVXM r1, #65504 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4089 "11000000" // /* MW 5 */ + 4090 "10111111" // /* MW 4 */ + 4091 "11110000" // /* MW 3 */ + 4092 "00000000" // /* MW 2 */ + 4093 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 241 94 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 4094 "10011000" // ADD r2, r1, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4095 "10100000" // /* MW 3 */ + 4096 "01000101" // /* MW 2 */ + 4097 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 241 40 +.src_ref 2 "reduce_base_c8.h" 241 40 +.src_ref 2 "reduce_base_c8.h" 241 94 +.src_ref 2 "reduce_base_c8.h" 241 94 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4098 "01100100" // MAC r1, r1, r3, r2; MOV r1, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4099 "01000001" // /* MW 5 */ + 4100 "10100000" // /* MW 4 */ + 4101 "11000000" // /* MW 3 */ + 4102 "01000100" // /* MW 2 */ + 4103 "00011000" // /* MW 1 */ + 4104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4105 "00000000" // /* MW 1 */ + 4106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4107 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 242 39 first + 4108 "00011000" // ST.s16 r22, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4109 "11010111" // /* MW 3 */ + 4110 "00011110" // /* MW 2 */ + 4111 "00000100" // /* MW 1 */ + 4112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4113 "00000000" // /* MW 1 */ + 4114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4115 "00000000" // /* MW 1 */ + 4116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4117 "00000000" // /* MW 1 */ + 4118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4119 "00000000" // /* MW 1 */ + 4120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4121 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 243 40 +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 4122 "10111000" // MOV m0, #-20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4123 "11011000" // /* MW 3 */ + 4124 "00001111" // /* MW 2 */ + 4125 "00011000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 243 40 first +.aggressive_scheduled_block_id 8 +.noswbrkpt + 4126 "00011000" // ST.s16 r5, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4127 "10110111" // /* MW 3 */ + 4128 "00001000" // /* MW 2 */ + 4129 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 4130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4131 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 4132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4133 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 4134 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */ + 4135 "00000000" // /* MW 5 */ + 4136 "00000000" // /* MW 4 */ + 4137 "11101000" // /* MW 3 */ + 4138 "00000110" // /* MW 2 */ + 4139 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 243 42 +.src_ref 2 "reduce_base_c8.h" 243 42 +.src_ref 2 "reduce_base_c8.h" 243 91 +.src_ref 2 "reduce_base_c8.h" 243 91 +.delay_slot +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4140 "01100100" // MSC r5, r5, r22, r4; MOV r5, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4141 "01000001" // /* MW 5 */ + 4142 "10100000" // /* MW 4 */ + 4143 "11000010" // /* MW 3 */ + 4144 "01001001" // /* MW 2 */ + 4145 "10110001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4147 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4149 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 244 38 first +.delay_slot + 4150 "10011000" // ST r20, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4151 "10010001" // /* MW 3 */ + 4152 "00000110" // /* MW 2 */ + 4153 "00001100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 245 38 first +.src_ref 2 "reduce_base_c8.h" 329 30 +.delay_slot + 4154 "00111010" // ST r17, [p4, #4]; MOVX r26, #4; MOV r30, eh0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4155 "01111001" // /* MW 9 */ + 4156 "10001110" // /* MW 8 */ + 4157 "11010000" // /* MW 7 */ + 4158 "10001011" // /* MW 6 */ + 4159 "10100000" // /* MW 5 */ + 4160 "00000001" // /* MW 4 */ + 4161 "00110000" // /* MW 3 */ + 4162 "11000110" // /* MW 2 */ +.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv__end +.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv___func_end0 + 4163 "10000010" // /* MW 1 */ +.label __Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t___func_begin0 +.label _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t +.function pad_3d<(pad_3d_mode)0, bfloat16, 1> _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t +.src_ref 3 "pad_3d.h" 266 first +.src_ref 3 "pad_3d.h" 465 37 first +.src_ref 3 "pad_3d.h" 468 21 first +.src_ref 3 "pad_3d.h" 471 29 +.src_ref 3 "pad_3d.h" 479 21 +.function_start + 4176 "10111010" // LDA r0, [p2, #4]; MOVX r4, #-2; MOV m1, #-24 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4177 "01011000" // /* MW 9 */ + 4178 "11101000" // /* MW 8 */ + 4179 "10000111" // /* MW 7 */ + 4180 "11001000" // /* MW 6 */ + 4181 "01000111" // /* MW 5 */ + 4182 "00111110" // /* MW 4 */ + 4183 "11010000" // /* MW 3 */ + 4184 "10000010" // /* MW 2 */ + 4185 "01000010" // /* MW 1 */ +.src_ref 4 "array_helpers.hpp" 950 13 +.src_ref 3 "pad_3d.h" 469 21 first +.src_ref 3 "pad_3d.h" 478 21 +.src_ref 3 "pad_3d.h" 499 52 +.src_ref 3 "pad_3d.h" 511 25 + 4186 "10111010" // LDA r1, [p2], #8; MOVX r2, #-3; MOV r16, #6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4187 "01011000" // /* MW 9 */ + 4188 "00000110" // /* MW 8 */ + 4189 "00001000" // /* MW 7 */ + 4190 "10101010" // /* MW 6 */ + 4191 "00100111" // /* MW 5 */ + 4192 "00111110" // /* MW 4 */ + 4193 "11010000" // /* MW 3 */ + 4194 "10000110" // /* MW 2 */ + 4195 "01000101" // /* MW 1 */ +.src_ref 4 "array_helpers.hpp" 950 13 +.src_ref 3 "pad_3d.h" 470 21 first +.src_ref 3 "pad_3d.h" 486 26 +.src_ref 3 "pad_3d.h" 498 10 +.src_ref 3 "pad_3d.h" 499 26 +.src_ref 3 "pad_3d.h" 509 21 +.src_ref 3 "pad_3d.h" 517 22 + 4196 "10111010" // LDA r5, [p2], #28; MOVX r24, #0; MOV r3, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4197 "01111000" // /* MW 9 */ + 4198 "01100000" // /* MW 8 */ + 4199 "01101000" // /* MW 7 */ + 4200 "00001000" // /* MW 6 */ + 4201 "10000000" // /* MW 5 */ + 4202 "00000001" // /* MW 4 */ + 4203 "11010000" // /* MW 3 */ + 4204 "10010110" // /* MW 2 */ + 4205 "01001111" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 471 29 first + 4206 "10011000" // LDA.s16 r18, [p2], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4207 "01010010" // /* MW 3 */ + 4208 "00101010" // /* MW 2 */ + 4209 "00000010" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 472 25 first + 4210 "10011000" // LDA r6, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4211 "11010110" // /* MW 3 */ + 4212 "00011100" // /* MW 2 */ + 4213 "00000010" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 473 26 first + 4214 "10011000" // LDA r7, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4215 "11110110" // /* MW 3 */ + 4216 "00101100" // /* MW 2 */ + 4217 "00000010" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 475 24 first + 4218 "10011000" // LDA r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4219 "00110110" // /* MW 3 */ + 4220 "00000110" // /* MW 2 */ + 4221 "00000010" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 479 21 first + 4222 "10011000" // ASHL r19, r0, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4223 "01001110" // /* MW 3 */ + 4224 "00100110" // /* MW 2 */ + 4225 "00010000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 477 23 first + 4226 "10011000" // LDA r4, [p2, #8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4227 "10010110" // /* MW 3 */ + 4228 "00100100" // /* MW 2 */ + 4229 "00000010" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 478 21 first + 4230 "10011000" // ASHL r20, r5, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4231 "00101110" // /* MW 3 */ + 4232 "01101000" // /* MW 2 */ + 4233 "00010001" // /* MW 1 */ +.src_ref 5 "broadcast.hpp" 56 25 first + 4234 "11111000" // VBCST.16 x0, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4235 "01110010" // /* MW 3 */ + 4236 "01001001" // /* MW 2 */ + 4237 "00011000" // /* MW 1 */ + 4238 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4239 "00000000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 485 45 first + 4240 "10011000" // MUL r18, r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4241 "01001111" // /* MW 3 */ + 4242 "11100101" // /* MW 2 */ + 4243 "00010100" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 485 34 + 4244 "10011000" // SUB r19, r1, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4245 "00010001" // /* MW 3 */ + 4246 "01100111" // /* MW 2 */ + 4247 "00010000" // /* MW 1 */ +.src_ref 4 "array_helpers.hpp" 998 25 first + 4248 "10011000" // MUL r19, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4249 "00101111" // /* MW 3 */ + 4250 "11100111" // /* MW 2 */ + 4251 "00010100" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 486 43 first + 4252 "10011000" // MUL r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4253 "00101111" // /* MW 3 */ + 4254 "01100011" // /* MW 2 */ + 4255 "00010100" // /* MW 1 */ +.src_ref 4 "array_helpers.hpp" 950 13 first + 4256 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4257 "00001101" // /* MW 3 */ + 4258 "11100001" // /* MW 2 */ + 4259 "00010100" // /* MW 1 */ +.src_ref 4 "array_helpers.hpp" 950 13 +.src_ref 3 "pad_3d.h" 486 26 first + 4260 "10100100" // GE r16, r24, r17; ADD.NC p2, r3, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4261 "10000010" // /* MW 5 */ + 4262 "11000011" // /* MW 4 */ + 4263 "00110100" // /* MW 3 */ + 4264 "00100011" // /* MW 2 */ + 4265 "11000100" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 486 4 + 4266 "10000100" // JNZ r16, #4416 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4416 delay_slots=5 */ + 4267 "00000001" // /* MW 5 */ + 4268 "01000000" // /* MW 4 */ + 4269 "10100000" // /* MW 3 */ + 4270 "00001000" // /* MW 2 */ + 4271 "10000000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 487 22 +.delay_slot + 4272 "11111000" // VMOV bmll0, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4273 "10010010" // /* MW 3 */ + 4274 "00000000" // /* MW 2 */ + 4275 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4277 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4279 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4281 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4283 "00000000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 486 4 + 4284 "01000100" // MOVXM ls, #4400 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4285 "01100000" // /* MW 5 */ + 4286 "11100010" // /* MW 4 */ + 4287 "00010001" // /* MW 3 */ + 4288 "00000000" // /* MW 2 */ + 4289 "00000000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 486 4 + 4290 "01000100" // MOVXM le, #4400 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4291 "01100000" // /* MW 5 */ + 4292 "11100010" // /* MW 4 */ + 4293 "00010110" // /* MW 3 */ + 4294 "00000000" // /* MW 2 */ + 4295 "00000000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 486 4 + 4296 "00000010" // NOPS; MOV lc, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4297 "01110000" // /* MW 7 */ + 4298 "01010000" // /* MW 6 */ + 4299 "10111100" // /* MW 5 */ + 4300 "00000010" // /* MW 4 */ + 4301 "01100000" // /* MW 3 */ + 4302 "00101011" // /* MW 2 */ + 4303 "00000000" // /* MW 1 */ + 4304 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4305 "00000000" // /* MW 15 */ + 4306 "00000000" // /* MW 14 */ + 4307 "01111000" // /* MW 13 */ + 4308 "10100101" // /* MW 12 */ + 4309 "00000001" // /* MW 11 */ + 4310 "00000000" // /* MW 10 */ + 4311 "00000000" // /* MW 9 */ + 4312 "00000000" // /* MW 8 */ + 4313 "01011011" // /* MW 7 */ + 4314 "00000001" // /* MW 6 */ + 4315 "00100000" // /* MW 5 */ + 4316 "00000000" // /* MW 4 */ + 4317 "11110000" // /* MW 3 */ + 4318 "00101100" // /* MW 2 */ + 4319 "00000000" // /* MW 1 */ + 4320 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4321 "00000000" // /* MW 15 */ + 4322 "00000000" // /* MW 14 */ + 4323 "01111000" // /* MW 13 */ + 4324 "10100101" // /* MW 12 */ + 4325 "00000001" // /* MW 11 */ + 4326 "00000000" // /* MW 10 */ + 4327 "00000000" // /* MW 9 */ + 4328 "00000000" // /* MW 8 */ + 4329 "01011011" // /* MW 7 */ + 4330 "00000001" // /* MW 6 */ + 4331 "00100000" // /* MW 5 */ + 4332 "00000000" // /* MW 4 */ + 4333 "11110000" // /* MW 3 */ + 4334 "00101100" // /* MW 2 */ + 4335 "00000000" // /* MW 1 */ + 4336 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4337 "00000000" // /* MW 15 */ + 4338 "00000000" // /* MW 14 */ + 4339 "01111000" // /* MW 13 */ + 4340 "10100101" // /* MW 12 */ + 4341 "00000001" // /* MW 11 */ + 4342 "00000000" // /* MW 10 */ + 4343 "00000000" // /* MW 9 */ + 4344 "00000000" // /* MW 8 */ + 4345 "01011011" // /* MW 7 */ + 4346 "00000001" // /* MW 6 */ + 4347 "00100000" // /* MW 5 */ + 4348 "00000000" // /* MW 4 */ + 4349 "11110000" // /* MW 3 */ + 4350 "00101100" // /* MW 2 */ + 4351 "00000000" // /* MW 1 */ + 4352 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4353 "00000000" // /* MW 15 */ + 4354 "00000000" // /* MW 14 */ + 4355 "01111000" // /* MW 13 */ + 4356 "10100101" // /* MW 12 */ + 4357 "00000001" // /* MW 11 */ + 4358 "00000000" // /* MW 10 */ + 4359 "00000000" // /* MW 9 */ + 4360 "00000000" // /* MW 8 */ + 4361 "01011011" // /* MW 7 */ + 4362 "00000001" // /* MW 6 */ + 4363 "00100000" // /* MW 5 */ + 4364 "00000000" // /* MW 4 */ + 4365 "11110000" // /* MW 3 */ + 4366 "00101100" // /* MW 2 */ + 4367 "00000000" // /* MW 1 */ + 4368 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4369 "00000000" // /* MW 15 */ + 4370 "00000000" // /* MW 14 */ + 4371 "01111000" // /* MW 13 */ + 4372 "10100101" // /* MW 12 */ + 4373 "00000001" // /* MW 11 */ + 4374 "00000000" // /* MW 10 */ + 4375 "00000000" // /* MW 9 */ + 4376 "00000000" // /* MW 8 */ + 4377 "01011011" // /* MW 7 */ + 4378 "00000001" // /* MW 6 */ + 4379 "00100000" // /* MW 5 */ + 4380 "00000000" // /* MW 4 */ + 4381 "11110000" // /* MW 3 */ + 4382 "00101100" // /* MW 2 */ + 4383 "00000000" // /* MW 1 */ + 4384 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4385 "00000000" // /* MW 15 */ + 4386 "00000000" // /* MW 14 */ + 4387 "01111000" // /* MW 13 */ + 4388 "10100101" // /* MW 12 */ + 4389 "00000001" // /* MW 11 */ + 4390 "00000000" // /* MW 10 */ + 4391 "00000000" // /* MW 9 */ + 4392 "00000000" // /* MW 8 */ + 4393 "01011011" // /* MW 7 */ + 4394 "00000001" // /* MW 6 */ + 4395 "00100000" // /* MW 5 */ + 4396 "00000000" // /* MW 4 */ + 4397 "11110000" // /* MW 3 */ + 4398 "00101100" // /* MW 2 */ + 4399 "00000000" // /* MW 1 */ +.label ZLS_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_224 +.src_ref 3 "pad_3d.h" 487 22 first +.begin_of_loop +.end_of_loop +.loop_nesting 1 + 4400 "11100001" // NOPA; NOPB; VST bmll0, [p2], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4401 "00000000" // /* MW 15 */ + 4402 "00000000" // /* MW 14 */ + 4403 "01111000" // /* MW 13 */ + 4404 "10100101" // /* MW 12 */ + 4405 "00000001" // /* MW 11 */ + 4406 "00000000" // /* MW 10 */ + 4407 "00000000" // /* MW 9 */ + 4408 "10000000" // /* MW 8 */ + 4409 "00000110" // /* MW 7 */ + 4410 "00011100" // /* MW 6 */ + 4411 "00100010" // /* MW 5 */ + 4412 "00000000" // /* MW 4 */ + 4413 "11110000" // /* MW 3 */ + 4414 "00101100" // /* MW 2 */ + 4415 "00000000" // /* MW 1 */ +.label TGT_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_240 +.src_ref 3 "pad_3d.h" 495 21 +.src_ref 3 "pad_3d.h" 495 40 first +.src_ref 3 "pad_3d.h" 498 10 +.src_ref 3 "pad_3d.h" 499 38 first +.loop_nesting 0 + 4416 "10111010" // MOVA r6, #4; MUL r16, r5, r1; ADD.NC r17, r7, r6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4417 "10101000" // /* MW 9 */ + 4418 "11001100" // /* MW 8 */ + 4419 "00101001" // /* MW 7 */ + 4420 "11111110" // /* MW 6 */ + 4421 "00000000" // /* MW 5 */ + 4422 "00001011" // /* MW 4 */ + 4423 "00000000" // /* MW 3 */ + 4424 "10000110" // /* MW 2 */ + 4425 "00000000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 495 40 +.src_ref 3 "pad_3d.h" 496 29 first + 4426 "00100100" // SUB r17, r0, r17; ADD.NC dn1, r7, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4427 "11111111" // /* MW 5 */ + 4428 "10000111" // /* MW 4 */ + 4429 "00110010" // /* MW 3 */ + 4430 "01100010" // /* MW 2 */ + 4431 "00000100" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 495 21 first + 4432 "10011000" // LSHL r17, r17, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4433 "01101101" // /* MW 3 */ + 4434 "01100010" // /* MW 2 */ + 4435 "00010100" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 495 58 +.src_ref 3 "pad_3d.h" 498 23 first + 4436 "00100100" // SUB r17, r0, r7; ADD.NC m1, r17, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4437 "00010000" // /* MW 5 */ + 4438 "00010001" // /* MW 4 */ + 4439 "00110010" // /* MW 3 */ + 4440 "01001110" // /* MW 2 */ + 4441 "00000100" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 499 45 first + 4442 "10011000" // MUL r16, r7, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4443 "00001111" // /* MW 3 */ + 4444 "11100001" // /* MW 2 */ + 4445 "00010001" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 498 10 first + 4446 "10011000" // LSHL r6, r17, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4447 "01101101" // /* MW 3 */ + 4448 "01001100" // /* MW 2 */ + 4449 "00010100" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 498 10 +.src_ref 3 "pad_3d.h" 499 52 first + 4450 "10100100" // ASHL r6, r16, r2; ADD.NC p2, r3, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4451 "00110010" // /* MW 5 */ + 4452 "11000011" // /* MW 4 */ + 4453 "11010100" // /* MW 3 */ + 4454 "10000101" // /* MW 2 */ + 4455 "10000001" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 499 26 + 4456 "10011000" // GE r7, r24, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4457 "01101001" // /* MW 3 */ + 4458 "00001110" // /* MW 2 */ + 4459 "00010110" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 499 4 + 4460 "10000100" // JNZ r7, #4624 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4624 delay_slots=5 */ + 4461 "00000001" // /* MW 5 */ + 4462 "01000000" // /* MW 4 */ + 4463 "00001000" // /* MW 3 */ + 4464 "00001001" // /* MW 2 */ + 4465 "00111000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4467 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4469 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4471 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4473 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4475 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 3 "pad_3d.h" 499 4 + 4476 "10111010" // MOVA dc1, #0; MOVXM ls, #4608 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4477 "00010000" // /* MW 9 */ + 4478 "00000000" // /* MW 8 */ + 4479 "01111001" // /* MW 7 */ + 4480 "00000100" // /* MW 6 */ + 4481 "00000000" // /* MW 5 */ + 4482 "00000000" // /* MW 4 */ + 4483 "10000000" // /* MW 3 */ + 4484 "00000111" // /* MW 2 */ + 4485 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 3 "pad_3d.h" 499 4 + 4486 "10111010" // MOVA dj1, #16; MOVXM le, #4608 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4487 "00010000" // /* MW 9 */ + 4488 "00000000" // /* MW 8 */ + 4489 "10111001" // /* MW 7 */ + 4490 "00000101" // /* MW 6 */ + 4491 "00000000" // /* MW 5 */ + 4492 "00000000" // /* MW 4 */ + 4493 "10000000" // /* MW 3 */ + 4494 "00000110" // /* MW 2 */ + 4495 "00000010" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 499 4 + 4496 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV lc, r6; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4497 "00000000" // /* MW 15 */ + 4498 "00000000" // /* MW 14 */ + 4499 "01111000" // /* MW 13 */ + 4500 "10010000" // /* MW 12 */ + 4501 "10111001" // /* MW 11 */ + 4502 "00000010" // /* MW 10 */ + 4503 "00000000" // /* MW 9 */ + 4504 "00000000" // /* MW 8 */ + 4505 "01011011" // /* MW 7 */ + 4506 "00000001" // /* MW 6 */ + 4507 "00100000" // /* MW 5 */ + 4508 "00000000" // /* MW 4 */ + 4509 "11110000" // /* MW 3 */ + 4510 "00101100" // /* MW 2 */ + 4511 "00000000" // /* MW 1 */ + 4512 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4513 "00000000" // /* MW 15 */ + 4514 "00000000" // /* MW 14 */ + 4515 "01111000" // /* MW 13 */ + 4516 "10100101" // /* MW 12 */ + 4517 "00000001" // /* MW 11 */ + 4518 "00000000" // /* MW 10 */ + 4519 "00000000" // /* MW 9 */ + 4520 "00000000" // /* MW 8 */ + 4521 "01011011" // /* MW 7 */ + 4522 "00000001" // /* MW 6 */ + 4523 "00100000" // /* MW 5 */ + 4524 "00000000" // /* MW 4 */ + 4525 "11110000" // /* MW 3 */ + 4526 "00101100" // /* MW 2 */ + 4527 "00000000" // /* MW 1 */ + 4528 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4529 "00000000" // /* MW 15 */ + 4530 "00000000" // /* MW 14 */ + 4531 "01111000" // /* MW 13 */ + 4532 "10100101" // /* MW 12 */ + 4533 "00000001" // /* MW 11 */ + 4534 "00000000" // /* MW 10 */ + 4535 "00000000" // /* MW 9 */ + 4536 "00000000" // /* MW 8 */ + 4537 "01011011" // /* MW 7 */ + 4538 "00000001" // /* MW 6 */ + 4539 "00100000" // /* MW 5 */ + 4540 "00000000" // /* MW 4 */ + 4541 "11110000" // /* MW 3 */ + 4542 "00101100" // /* MW 2 */ + 4543 "00000000" // /* MW 1 */ + 4544 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4545 "00000000" // /* MW 15 */ + 4546 "00000000" // /* MW 14 */ + 4547 "01111000" // /* MW 13 */ + 4548 "10100101" // /* MW 12 */ + 4549 "00000001" // /* MW 11 */ + 4550 "00000000" // /* MW 10 */ + 4551 "00000000" // /* MW 9 */ + 4552 "00000000" // /* MW 8 */ + 4553 "01011011" // /* MW 7 */ + 4554 "00000001" // /* MW 6 */ + 4555 "00100000" // /* MW 5 */ + 4556 "00000000" // /* MW 4 */ + 4557 "11110000" // /* MW 3 */ + 4558 "00101100" // /* MW 2 */ + 4559 "00000000" // /* MW 1 */ + 4560 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4561 "00000000" // /* MW 15 */ + 4562 "00000000" // /* MW 14 */ + 4563 "01111000" // /* MW 13 */ + 4564 "10100101" // /* MW 12 */ + 4565 "00000001" // /* MW 11 */ + 4566 "00000000" // /* MW 10 */ + 4567 "00000000" // /* MW 9 */ + 4568 "00000000" // /* MW 8 */ + 4569 "01011011" // /* MW 7 */ + 4570 "00000001" // /* MW 6 */ + 4571 "00100000" // /* MW 5 */ + 4572 "00000000" // /* MW 4 */ + 4573 "11110000" // /* MW 3 */ + 4574 "00101100" // /* MW 2 */ + 4575 "00000000" // /* MW 1 */ + 4576 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4577 "00000000" // /* MW 15 */ + 4578 "00000000" // /* MW 14 */ + 4579 "01111000" // /* MW 13 */ + 4580 "10100101" // /* MW 12 */ + 4581 "00000001" // /* MW 11 */ + 4582 "00000000" // /* MW 10 */ + 4583 "00000000" // /* MW 9 */ + 4584 "00000000" // /* MW 8 */ + 4585 "01011011" // /* MW 7 */ + 4586 "00000001" // /* MW 6 */ + 4587 "00100000" // /* MW 5 */ + 4588 "00000000" // /* MW 4 */ + 4589 "11110000" // /* MW 3 */ + 4590 "00101100" // /* MW 2 */ + 4591 "00000000" // /* MW 1 */ + 4592 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4593 "00000000" // /* MW 15 */ + 4594 "00000000" // /* MW 14 */ + 4595 "01111000" // /* MW 13 */ + 4596 "10100101" // /* MW 12 */ + 4597 "00000001" // /* MW 11 */ + 4598 "00000000" // /* MW 10 */ + 4599 "00000000" // /* MW 9 */ + 4600 "00000000" // /* MW 8 */ + 4601 "01011011" // /* MW 7 */ + 4602 "00000001" // /* MW 6 */ + 4603 "00100000" // /* MW 5 */ + 4604 "00000000" // /* MW 4 */ + 4605 "11110000" // /* MW 3 */ + 4606 "00101100" // /* MW 2 */ + 4607 "00000000" // /* MW 1 */ +.label ZLS_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_432 +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 5 "vector.hpp" 1159 33 first +.begin_of_loop +.end_of_loop +.loop_nesting 1 + 4608 "11100001" // NOPA; NOPB; VST.2D.128 wl0, [p2], d1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4609 "00000000" // /* MW 15 */ + 4610 "00000000" // /* MW 14 */ + 4611 "01111000" // /* MW 13 */ + 4612 "10100101" // /* MW 12 */ + 4613 "00000001" // /* MW 11 */ + 4614 "00000000" // /* MW 10 */ + 4615 "00000000" // /* MW 9 */ + 4616 "00000000" // /* MW 8 */ + 4617 "00101110" // /* MW 7 */ + 4618 "00110000" // /* MW 6 */ + 4619 "00100010" // /* MW 5 */ + 4620 "00000000" // /* MW 4 */ + 4621 "11110000" // /* MW 3 */ + 4622 "00101100" // /* MW 2 */ + 4623 "00000000" // /* MW 1 */ +.label TGT_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_448 +.src_ref 3 "pad_3d.h" 514 39 +.loop_nesting 0 + 4624 "01000100" // MOVXM r7, #2147483640 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4625 "11110000" // /* MW 5 */ + 4626 "10111111" // /* MW 4 */ + 4627 "11110011" // /* MW 3 */ + 4628 "11111111" // /* MW 2 */ + 4629 "01111111" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 514 39 first + 4630 "10011000" // AND r7, r7, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4631 "01000100" // /* MW 3 */ + 4632 "11001110" // /* MW 2 */ + 4633 "00010001" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 514 35 + 4634 "10011000" // SUB r7, r5, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4635 "01110001" // /* MW 3 */ + 4636 "01001110" // /* MW 2 */ + 4637 "00010001" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 514 55 + 4638 "10011000" // MUL r7, r7, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4639 "00001111" // /* MW 3 */ + 4640 "11001110" // /* MW 2 */ + 4641 "00010001" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 511 25 first + 4642 "10011000" // ASHL r2, r4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4643 "00101110" // /* MW 3 */ + 4644 "00000100" // /* MW 2 */ + 4645 "00010001" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 509 36 first + 4646 "10011000" // SUB r4, r5, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4647 "01000001" // /* MW 3 */ + 4648 "01001000" // /* MW 2 */ + 4649 "00010001" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 515 30 first + 4650 "10011000" // MUL r2, r2, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4651 "00001111" // /* MW 3 */ + 4652 "10000100" // /* MW 2 */ + 4653 "00010000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 509 28 first + 4654 "10011000" // MUL r0, r4, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4655 "00001111" // /* MW 3 */ + 4656 "00000000" // /* MW 2 */ + 4657 "00010001" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 509 21 +.src_ref 3 "pad_3d.h" 514 55 +.src_ref 3 "pad_3d.h" 517 39 first + 4658 "01100100" // MUL r1, r1, r2; MOV r6, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4659 "00000101" // /* MW 5 */ + 4660 "00100000" // /* MW 4 */ + 4661 "11110011" // /* MW 3 */ + 4662 "01000101" // /* MW 2 */ + 4663 "00001000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 509 21 first + 4664 "10011000" // LSHL r0, r0, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4665 "01101101" // /* MW 3 */ + 4666 "00000000" // /* MW 2 */ + 4667 "00010000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 509 21 +.src_ref 3 "pad_3d.h" 517 22 first + 4668 "10100100" // GE r0, r24, r1; ADD.NC p2, r3, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4669 "00000010" // /* MW 5 */ + 4670 "11000011" // /* MW 4 */ + 4671 "00110100" // /* MW 3 */ + 4672 "00000011" // /* MW 2 */ + 4673 "11000000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 517 4 + 4674 "10000100" // JNZ r0, #4832 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4832 delay_slots=5 */ + 4675 "00000001" // /* MW 5 */ + 4676 "01000000" // /* MW 4 */ + 4677 "01110000" // /* MW 3 */ + 4678 "00001001" // /* MW 2 */ + 4679 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4681 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4683 "00000000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 514 55 first +.delay_slot + 4684 "10011000" // LSHL r4, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4685 "01101101" // /* MW 3 */ + 4686 "11001000" // /* MW 2 */ + 4687 "00010001" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 514 55 +.delay_slot + 4688 "00011000" // ADD.NC m0, r4, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4689 "00001000" // /* MW 3 */ + 4690 "00000010" // /* MW 2 */ + 4691 "00011000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 515 37 first +.delay_slot + 4692 "10011000" // ADD.NC dn0, r2, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4693 "01111111" // /* MW 3 */ + 4694 "01000001" // /* MW 2 */ + 4695 "00011000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 3 "pad_3d.h" 517 4 first + 4696 "10111010" // MOVA dc0, #0; MOVXM ls, #4816 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4697 "00010000" // /* MW 9 */ + 4698 "01101000" // /* MW 8 */ + 4699 "01111001" // /* MW 7 */ + 4700 "00000100" // /* MW 6 */ + 4701 "00000000" // /* MW 5 */ + 4702 "00000000" // /* MW 4 */ + 4703 "10000000" // /* MW 3 */ + 4704 "00000011" // /* MW 2 */ + 4705 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 3 "pad_3d.h" 517 4 + 4706 "10111010" // MOVA dj0, #16; MOVXM le, #4816 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4707 "00010000" // /* MW 9 */ + 4708 "01101000" // /* MW 8 */ + 4709 "10111001" // /* MW 7 */ + 4710 "00000101" // /* MW 6 */ + 4711 "00000000" // /* MW 5 */ + 4712 "00000000" // /* MW 4 */ + 4713 "10000000" // /* MW 3 */ + 4714 "00000010" // /* MW 2 */ + 4715 "00000010" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 517 4 + 4716 "11111000" // MOV lc, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4717 "10100000" // /* MW 3 */ + 4718 "01110000" // /* MW 2 */ + 4719 "00011101" // /* MW 1 */ + 4720 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4721 "00000000" // /* MW 15 */ + 4722 "00000000" // /* MW 14 */ + 4723 "01111000" // /* MW 13 */ + 4724 "10100101" // /* MW 12 */ + 4725 "00000001" // /* MW 11 */ + 4726 "00000000" // /* MW 10 */ + 4727 "00000000" // /* MW 9 */ + 4728 "00000000" // /* MW 8 */ + 4729 "01011011" // /* MW 7 */ + 4730 "00000001" // /* MW 6 */ + 4731 "00100000" // /* MW 5 */ + 4732 "00000000" // /* MW 4 */ + 4733 "11110000" // /* MW 3 */ + 4734 "00101100" // /* MW 2 */ + 4735 "00000000" // /* MW 1 */ + 4736 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4737 "00000000" // /* MW 15 */ + 4738 "00000000" // /* MW 14 */ + 4739 "01111000" // /* MW 13 */ + 4740 "10100101" // /* MW 12 */ + 4741 "00000001" // /* MW 11 */ + 4742 "00000000" // /* MW 10 */ + 4743 "00000000" // /* MW 9 */ + 4744 "00000000" // /* MW 8 */ + 4745 "01011011" // /* MW 7 */ + 4746 "00000001" // /* MW 6 */ + 4747 "00100000" // /* MW 5 */ + 4748 "00000000" // /* MW 4 */ + 4749 "11110000" // /* MW 3 */ + 4750 "00101100" // /* MW 2 */ + 4751 "00000000" // /* MW 1 */ + 4752 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4753 "00000000" // /* MW 15 */ + 4754 "00000000" // /* MW 14 */ + 4755 "01111000" // /* MW 13 */ + 4756 "10100101" // /* MW 12 */ + 4757 "00000001" // /* MW 11 */ + 4758 "00000000" // /* MW 10 */ + 4759 "00000000" // /* MW 9 */ + 4760 "00000000" // /* MW 8 */ + 4761 "01011011" // /* MW 7 */ + 4762 "00000001" // /* MW 6 */ + 4763 "00100000" // /* MW 5 */ + 4764 "00000000" // /* MW 4 */ + 4765 "11110000" // /* MW 3 */ + 4766 "00101100" // /* MW 2 */ + 4767 "00000000" // /* MW 1 */ + 4768 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4769 "00000000" // /* MW 15 */ + 4770 "00000000" // /* MW 14 */ + 4771 "01111000" // /* MW 13 */ + 4772 "10100101" // /* MW 12 */ + 4773 "00000001" // /* MW 11 */ + 4774 "00000000" // /* MW 10 */ + 4775 "00000000" // /* MW 9 */ + 4776 "00000000" // /* MW 8 */ + 4777 "01011011" // /* MW 7 */ + 4778 "00000001" // /* MW 6 */ + 4779 "00100000" // /* MW 5 */ + 4780 "00000000" // /* MW 4 */ + 4781 "11110000" // /* MW 3 */ + 4782 "00101100" // /* MW 2 */ + 4783 "00000000" // /* MW 1 */ + 4784 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4785 "00000000" // /* MW 15 */ + 4786 "00000000" // /* MW 14 */ + 4787 "01111000" // /* MW 13 */ + 4788 "10100101" // /* MW 12 */ + 4789 "00000001" // /* MW 11 */ + 4790 "00000000" // /* MW 10 */ + 4791 "00000000" // /* MW 9 */ + 4792 "00000000" // /* MW 8 */ + 4793 "01011011" // /* MW 7 */ + 4794 "00000001" // /* MW 6 */ + 4795 "00100000" // /* MW 5 */ + 4796 "00000000" // /* MW 4 */ + 4797 "11110000" // /* MW 3 */ + 4798 "00101100" // /* MW 2 */ + 4799 "00000000" // /* MW 1 */ + 4800 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4801 "00000000" // /* MW 15 */ + 4802 "00000000" // /* MW 14 */ + 4803 "01111000" // /* MW 13 */ + 4804 "10100101" // /* MW 12 */ + 4805 "00000001" // /* MW 11 */ + 4806 "00000000" // /* MW 10 */ + 4807 "00000000" // /* MW 9 */ + 4808 "00000000" // /* MW 8 */ + 4809 "01011011" // /* MW 7 */ + 4810 "00000001" // /* MW 6 */ + 4811 "00100000" // /* MW 5 */ + 4812 "00000000" // /* MW 4 */ + 4813 "11110000" // /* MW 3 */ + 4814 "00101100" // /* MW 2 */ + 4815 "00000000" // /* MW 1 */ +.label ZLS_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_640 +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 5 "vector.hpp" 1159 33 first +.begin_of_loop +.end_of_loop +.loop_nesting 1 + 4816 "11100001" // NOPA; NOPB; VST.2D.128 wl0, [p2], d0; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4817 "00000000" // /* MW 15 */ + 4818 "00000000" // /* MW 14 */ + 4819 "01111000" // /* MW 13 */ + 4820 "10100101" // /* MW 12 */ + 4821 "00000001" // /* MW 11 */ + 4822 "00000000" // /* MW 10 */ + 4823 "00000000" // /* MW 9 */ + 4824 "00000000" // /* MW 8 */ + 4825 "00101110" // /* MW 7 */ + 4826 "00010000" // /* MW 6 */ + 4827 "00100010" // /* MW 5 */ + 4828 "00000000" // /* MW 4 */ + 4829 "11110000" // /* MW 3 */ + 4830 "00101100" // /* MW 2 */ + 4831 "00000000" // /* MW 1 */ +.label TGT_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_656 +.src_ref 3 "pad_3d.h" 282 first +.loop_nesting 0 + 4832 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 4833 "00000000" // /* MW 3 */ + 4834 "00101000" // /* MW 2 */ + 4835 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4837 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4839 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4840 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4841 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4843 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t__end +.label __Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t___func_end0 + 4845 "00000000" // /* MW 1 */ +.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E___func_begin0 +.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E +.function run _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E +.src_ref 2 "reduce_base_c8.h" 352 30 +.src_ref 2 "reduce_base_c8.h" 362 first +.src_ref 2 "reduce_base_c8.h" 365 18 +.src_ref 3 "reduce_mean_c8_impl.h" 200 65 +.src_ref 3 "reduce_mean_c8_impl.h" 223 35 +.function_start + 4848 "11111000" // MOV r3, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4849 "11000000" // /* MW 3 */ + 4850 "11010100" // /* MW 2 */ + 4851 "00011000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 365 18 first + 4852 "00000010" // MOVS dn3, p7; ADD.NC p7, r3, #44 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4853 "00000000" // /* MW 7 */ + 4854 "11001011" // /* MW 6 */ + 4855 "10110000" // /* MW 5 */ + 4856 "00000011" // /* MW 4 */ + 4857 "01100000" // /* MW 3 */ + 4858 "10010001" // /* MW 2 */ + 4859 "01101011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 367 19 first + 4860 "10011000" // LDA.u16 r0, [p7], #-16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4861 "00011010" // /* MW 3 */ + 4862 "10001100" // /* MW 2 */ + 4863 "00000111" // /* MW 1 */ + 4864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4865 "00000000" // /* MW 1 */ + 4866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4867 "00000000" // /* MW 1 */ + 4868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4869 "00000000" // /* MW 1 */ + 4870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4871 "00000000" // /* MW 1 */ + 4872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4873 "00000000" // /* MW 1 */ + 4874 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4875 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 367 12 +.src_ref 2 "reduce_base_c8.h" 367 19 + 4876 "10000100" // JNZ r0, #5088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5088 delay_slots=5 */ + 4877 "00000001" // /* MW 5 */ + 4878 "01000000" // /* MW 4 */ + 4879 "11110000" // /* MW 3 */ + 4880 "00001001" // /* MW 2 */ + 4881 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 101 18 +.src_ref 5 "broadcast.hpp" 80 25 +.src_ref 5 "blend.hpp" 170 36 +.src_ref 2 "reduce_base_c8.h" 372 34 +.delay_slot + 4882 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4883 "00000001" // /* MW 3 */ + 4884 "00100000" // /* MW 2 */ + 4885 "00010000" // /* MW 1 */ +.src_ref 5 "broadcast.hpp" 80 25 first +.delay_slot + 4886 "11111000" // VBCST.32 x1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4887 "01110010" // /* MW 3 */ + 4888 "11000010" // /* MW 2 */ + 4889 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4891 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4892 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4893 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 362 +.delay_slot + 4894 "11000100" // PADDXM [sp], #256 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4895 "00000001" // /* MW 5 */ + 4896 "00000000" // /* MW 4 */ + 4897 "00000000" // /* MW 3 */ + 4898 "00100000" // /* MW 2 */ + 4899 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 372 43 + 4900 "10111000" // MOV dj2, #36 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4901 "01001000" // /* MW 3 */ + 4902 "10000000" // /* MW 2 */ + 4903 "00011010" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 372 43 first + 4904 "10011000" // LDA r1, [p2, dj2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4905 "00110110" // /* MW 3 */ + 4906 "01000000" // /* MW 2 */ + 4907 "00000010" // /* MW 1 */ + 4908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4909 "00000000" // /* MW 1 */ + 4910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4911 "00000000" // /* MW 1 */ + 4912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4913 "00000000" // /* MW 1 */ + 4914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4915 "00000000" // /* MW 1 */ + 4916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4917 "00000000" // /* MW 1 */ + 4918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4919 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 372 34 + 4920 "10011000" // GE r2, r16, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4921 "00011001" // /* MW 3 */ + 4922 "00000100" // /* MW 2 */ + 4923 "00010100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 372 12 + 4924 "10000100" // JNZ r2, #5088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5088 delay_slots=5 */ + 4925 "00000001" // /* MW 5 */ + 4926 "01000000" // /* MW 4 */ + 4927 "11110000" // /* MW 3 */ + 4928 "00001001" // /* MW 2 */ + 4929 "00010000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 2 "reduce_base_c8.h" 374 29 +.delay_slot + 4930 "11111000" // VMOV bmll2, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4931 "10010010" // /* MW 3 */ + 4932 "00000010" // /* MW 2 */ + 4933 "00011010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4935 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4937 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4939 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4941 "00000000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 2 "reduce_base_c8.h" 372 12 +.src_ref 2 "reduce_base_c8.h" 374 29 + 4942 "01110110" // NOPA; MOVS p3, p1; MOVXM ls, #5072 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4943 "00010000" // /* MW 11 */ + 4944 "11101000" // /* MW 10 */ + 4945 "01111001" // /* MW 9 */ + 4946 "00000100" // /* MW 8 */ + 4947 "00000000" // /* MW 7 */ + 4948 "00000000" // /* MW 6 */ + 4949 "10001011" // /* MW 5 */ + 4950 "10000100" // /* MW 4 */ + 4951 "11110011" // /* MW 3 */ + 4952 "00101100" // /* MW 2 */ + 4953 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 372 12 + 4954 "01000100" // MOVXM le, #5072 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4955 "10100000" // /* MW 5 */ + 4956 "11100111" // /* MW 4 */ + 4957 "00010110" // /* MW 3 */ + 4958 "00000000" // /* MW 2 */ + 4959 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 372 12 + 4960 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV lc, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4961 "00000000" // /* MW 15 */ + 4962 "00000000" // /* MW 14 */ + 4963 "01111000" // /* MW 13 */ + 4964 "01010000" // /* MW 12 */ + 4965 "10111000" // /* MW 11 */ + 4966 "00000010" // /* MW 10 */ + 4967 "00000000" // /* MW 9 */ + 4968 "00000000" // /* MW 8 */ + 4969 "01011011" // /* MW 7 */ + 4970 "00000001" // /* MW 6 */ + 4971 "00100000" // /* MW 5 */ + 4972 "00000000" // /* MW 4 */ + 4973 "11110000" // /* MW 3 */ + 4974 "00101100" // /* MW 2 */ + 4975 "00000000" // /* MW 1 */ + 4976 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4977 "00000000" // /* MW 15 */ + 4978 "00000000" // /* MW 14 */ + 4979 "01111000" // /* MW 13 */ + 4980 "10100101" // /* MW 12 */ + 4981 "00000001" // /* MW 11 */ + 4982 "00000000" // /* MW 10 */ + 4983 "00000000" // /* MW 9 */ + 4984 "00000000" // /* MW 8 */ + 4985 "01011011" // /* MW 7 */ + 4986 "00000001" // /* MW 6 */ + 4987 "00100000" // /* MW 5 */ + 4988 "00000000" // /* MW 4 */ + 4989 "11110000" // /* MW 3 */ + 4990 "00101100" // /* MW 2 */ + 4991 "00000000" // /* MW 1 */ + 4992 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4993 "00000000" // /* MW 15 */ + 4994 "00000000" // /* MW 14 */ + 4995 "01111000" // /* MW 13 */ + 4996 "10100101" // /* MW 12 */ + 4997 "00000001" // /* MW 11 */ + 4998 "00000000" // /* MW 10 */ + 4999 "00000000" // /* MW 9 */ + 5000 "00000000" // /* MW 8 */ + 5001 "01011011" // /* MW 7 */ + 5002 "00000001" // /* MW 6 */ + 5003 "00100000" // /* MW 5 */ + 5004 "00000000" // /* MW 4 */ + 5005 "11110000" // /* MW 3 */ + 5006 "00101100" // /* MW 2 */ + 5007 "00000000" // /* MW 1 */ + 5008 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5009 "00000000" // /* MW 15 */ + 5010 "00000000" // /* MW 14 */ + 5011 "01111000" // /* MW 13 */ + 5012 "10100101" // /* MW 12 */ + 5013 "00000001" // /* MW 11 */ + 5014 "00000000" // /* MW 10 */ + 5015 "00000000" // /* MW 9 */ + 5016 "00000000" // /* MW 8 */ + 5017 "01011011" // /* MW 7 */ + 5018 "00000001" // /* MW 6 */ + 5019 "00100000" // /* MW 5 */ + 5020 "00000000" // /* MW 4 */ + 5021 "11110000" // /* MW 3 */ + 5022 "00101100" // /* MW 2 */ + 5023 "00000000" // /* MW 1 */ + 5024 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5025 "00000000" // /* MW 15 */ + 5026 "00000000" // /* MW 14 */ + 5027 "01111000" // /* MW 13 */ + 5028 "10100101" // /* MW 12 */ + 5029 "00000001" // /* MW 11 */ + 5030 "00000000" // /* MW 10 */ + 5031 "00000000" // /* MW 9 */ + 5032 "00000000" // /* MW 8 */ + 5033 "01011011" // /* MW 7 */ + 5034 "00000001" // /* MW 6 */ + 5035 "00100000" // /* MW 5 */ + 5036 "00000000" // /* MW 4 */ + 5037 "11110000" // /* MW 3 */ + 5038 "00101100" // /* MW 2 */ + 5039 "00000000" // /* MW 1 */ + 5040 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5041 "00000000" // /* MW 15 */ + 5042 "00000000" // /* MW 14 */ + 5043 "01111000" // /* MW 13 */ + 5044 "10100101" // /* MW 12 */ + 5045 "00000001" // /* MW 11 */ + 5046 "00000000" // /* MW 10 */ + 5047 "00000000" // /* MW 9 */ + 5048 "00000000" // /* MW 8 */ + 5049 "01011011" // /* MW 7 */ + 5050 "00000001" // /* MW 6 */ + 5051 "00100000" // /* MW 5 */ + 5052 "00000000" // /* MW 4 */ + 5053 "11110000" // /* MW 3 */ + 5054 "00101100" // /* MW 2 */ + 5055 "00000000" // /* MW 1 */ + 5056 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5057 "00000000" // /* MW 15 */ + 5058 "00000000" // /* MW 14 */ + 5059 "01111000" // /* MW 13 */ + 5060 "10100101" // /* MW 12 */ + 5061 "00000001" // /* MW 11 */ + 5062 "00000000" // /* MW 10 */ + 5063 "00000000" // /* MW 9 */ + 5064 "00000000" // /* MW 8 */ + 5065 "01011011" // /* MW 7 */ + 5066 "00000001" // /* MW 6 */ + 5067 "00100000" // /* MW 5 */ + 5068 "00000000" // /* MW 4 */ + 5069 "11110000" // /* MW 3 */ + 5070 "00101100" // /* MW 2 */ + 5071 "00000000" // /* MW 1 */ +.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_224 +.src_ref 5 "vector.hpp" 1159 33 first +.src_ref 2 "reduce_base_c8.h" 374 29 first +.begin_of_loop +.end_of_loop +.loop_nesting 1 + 5072 "11100001" // NOPA; NOPB; VST bmll2, [p3], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5073 "00000000" // /* MW 15 */ + 5074 "00000000" // /* MW 14 */ + 5075 "01111000" // /* MW 13 */ + 5076 "10100101" // /* MW 12 */ + 5077 "00000001" // /* MW 11 */ + 5078 "00000000" // /* MW 10 */ + 5079 "00000000" // /* MW 9 */ + 5080 "10000000" // /* MW 8 */ + 5081 "00000110" // /* MW 7 */ + 5082 "00011101" // /* MW 6 */ + 5083 "00100011" // /* MW 5 */ + 5084 "00000000" // /* MW 4 */ + 5085 "11110000" // /* MW 3 */ + 5086 "00101100" // /* MW 2 */ + 5087 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_240 +.src_ref 5 "vector.hpp" 57 98 +.src_ref 2 "reduce_base_c8.h" 388 28 +.src_ref 2 "reduce_base_c8.h" 412 41 +.src_ref 3 "reduce_mean_c8_impl.h" 268 19 +.loop_nesting 0 + 5088 "10111000" // MOV m4, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5089 "01000000" // /* MW 3 */ + 5090 "00000000" // /* MW 2 */ + 5091 "00011100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 388 28 first + 5092 "10011000" // LDA.u16 r17, [p7], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5093 "00111010" // /* MW 3 */ + 5094 "10001010" // /* MW 2 */ + 5095 "00000111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 388 28 +.src_ref 2 "reduce_base_c8.h" 388 28 + 5096 "01010100" // LDA.s16 r22, [p7], #-2; MOV m5, #-58 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5097 "00011001" // /* MW 5 */ + 5098 "00011111" // /* MW 4 */ + 5099 "01011010" // /* MW 3 */ + 5100 "11011010" // /* MW 2 */ + 5101 "11111111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 388 28 +.src_ref 2 "reduce_base_c8.h" 570 33 +.src_ref 2 "reduce_base_c8.h" 570 33 +.src_ref 2 "reduce_base_c8.h" 570 33 + 5102 "01010100" // LDA.u16 r26, [p7], m5; MOV dj0, #46 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5103 "10111001" // /* MW 5 */ + 5104 "00000000" // /* MW 4 */ + 5105 "01010001" // /* MW 3 */ + 5106 "01101011" // /* MW 2 */ + 5107 "11110101" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 570 33 first +.src_ref 2 "reduce_base_c8.h" 594 43 + 5108 "11010100" // LDA.s16 r20, [p7, dj0]; MOV r19, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5109 "10000001" // /* MW 5 */ + 5110 "10111101" // /* MW 4 */ + 5111 "01011001" // /* MW 3 */ + 5112 "01010010" // /* MW 2 */ + 5113 "11100000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 570 33 +.src_ref 2 "reduce_base_c8.h" 594 43 first + 5114 "00010100" // LDA.s16 r19, [p7, dj0]; ADD.NC p3, r19, #56 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5115 "00111000" // /* MW 5 */ + 5116 "11010011" // /* MW 4 */ + 5117 "01010110" // /* MW 3 */ + 5118 "01001110" // /* MW 2 */ + 5119 "11100000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 594 43 + 5120 "10011000" // LDA.s16 r21, [p3], #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5121 "10110010" // /* MW 3 */ + 5122 "11011110" // /* MW 2 */ + 5123 "00000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 594 64 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5124 "10011000" // LDA.u16 r28, [p3], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5125 "10011010" // /* MW 3 */ + 5126 "11111111" // /* MW 2 */ + 5127 "00000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 388 28 +.src_ref 2 "reduce_base_c8.h" 595 56 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5128 "00101100" // LDA.s16 r17, [p3], #6; MOVX r7, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5129 "00010010" // /* MW 5 */ + 5130 "00011100" // /* MW 4 */ + 5131 "01010000" // /* MW 3 */ + 5132 "11000110" // /* MW 2 */ + 5133 "01100111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "aie_core.h" 90 15 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 7 "accum.hpp" 153 115 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 943 89 +.src_ref 2 "reduce_base_c8.h" 388 28 +.src_ref 2 "reduce_base_c8.h" 596 56 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5134 "10111010" // LDA.s16 r18, [p3, #-2]; MOVX r18, #-2; MOV dc4, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5135 "01011000" // /* MW 9 */ + 5136 "00000000" // /* MW 8 */ + 5137 "01100000" // /* MW 7 */ + 5138 "11001010" // /* MW 6 */ + 5139 "00100111" // /* MW 5 */ + 5140 "00111111" // /* MW 4 */ + 5141 "01010000" // /* MW 3 */ + 5142 "11001010" // /* MW 2 */ + 5143 "01111110" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "aie_core.h" 90 15 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 2 "reduce_base_c8.h" 388 28 first +.src_ref 2 "reduce_base_c8.h" 570 24 +.src_ref 2 "reduce_base_c8.h" 570 24 +.src_ref 2 "reduce_base_c8.h" 570 24 +.src_ref 2 "reduce_base_c8.h" 570 33 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5144 "01110110" // LDA.s16 r7, [p7, dj0]; MOVS dc2, dc4; LSHL r18, r17, r18; MOV r6, #1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5145 "01011000" // /* MW 11 */ + 5146 "00000001" // /* MW 10 */ + 5147 "11001000" // /* MW 9 */ + 5148 "01101100" // /* MW 8 */ + 5149 "00101001" // /* MW 7 */ + 5150 "00100011" // /* MW 6 */ + 5151 "01001011" // /* MW 5 */ + 5152 "00010000" // /* MW 4 */ + 5153 "01010010" // /* MW 3 */ + 5154 "00011110" // /* MW 2 */ + 5155 "11100000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 +.src_ref 6 "me_vmult_float_emulated.h" 117 42 +.src_ref 6 "me_vmult_float_emulated.h" 117 42 +.src_ref 6 "me_vmult_float_emulated.h" 118 6 +.src_ref 6 "me_vmult_float_emulated.h" 118 6 +.src_ref 6 "me_vmult_float_emulated.h" 118 9 +.src_ref 6 "me_vmult_float_emulated.h" 118 9 +.src_ref 6 "me_vmult_float_emulated.h" 119 6 +.src_ref 6 "me_vmult_float_emulated.h" 119 6 +.src_ref 6 "me_vmult_float_emulated.h" 119 9 +.src_ref 6 "me_vmult_float_emulated.h" 119 9 +.src_ref 6 "me_vmult_float_emulated.h" 120 6 +.src_ref 6 "me_vmult_float_emulated.h" 120 6 +.src_ref 6 "me_vmult_float_emulated.h" 120 9 +.src_ref 6 "me_vmult_float_emulated.h" 120 9 +.src_ref 6 "me_vmult_float_emulated.h" 121 6 +.src_ref 6 "me_vmult_float_emulated.h" 121 6 +.src_ref 6 "me_vmult_float_emulated.h" 121 9 +.src_ref 6 "me_vmult_float_emulated.h" 121 9 +.src_ref 6 "me_vmult_float_emulated.h" 122 6 +.src_ref 6 "me_vmult_float_emulated.h" 122 6 +.src_ref 6 "me_vmult_float_emulated.h" 122 9 +.src_ref 6 "me_vmult_float_emulated.h" 122 9 +.src_ref 6 "me_vmult_float_emulated.h" 123 6 +.src_ref 6 "me_vmult_float_emulated.h" 123 6 +.src_ref 6 "me_vmult_float_emulated.h" 123 9 +.src_ref 6 "me_vmult_float_emulated.h" 123 9 +.src_ref 6 "me_vmult_float_emulated.h" 124 6 +.src_ref 6 "me_vmult_float_emulated.h" 124 6 +.src_ref 6 "me_vmult_float_emulated.h" 124 9 +.src_ref 6 "me_vmult_float_emulated.h" 124 9 +.src_ref 6 "me_vmult_float_emulated.h" 125 6 +.src_ref 6 "me_vmult_float_emulated.h" 125 6 +.src_ref 6 "me_vmult_float_emulated.h" 125 9 +.src_ref 6 "me_vmult_float_emulated.h" 125 9 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 5 "add.hpp" 28 49 +.src_ref 5 "add.hpp" 28 49 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 7 "add_accum.hpp" 19 92 +.src_ref 7 "add_accum.hpp" 19 92 +.src_ref 7 "add_accum.hpp" 19 92 +.src_ref 7 "add_accum.hpp" 19 92 +.src_ref 2 "reduce_base_c8.h" 388 28 +.src_ref 2 "reduce_base_c8.h" 595 75 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5156 "01110110" // LDA.u16 r27, [p3]; MOVS dn2, r26; LSHL r7, r22, r7; MOV r2, #60 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5157 "01011000" // /* MW 11 */ + 5158 "00111100" // /* MW 10 */ + 5159 "01001000" // /* MW 9 */ + 5160 "11101100" // /* MW 8 */ + 5161 "01110011" // /* MW 7 */ + 5162 "00101100" // /* MW 6 */ + 5163 "00001011" // /* MW 5 */ + 5164 "01011010" // /* MW 4 */ + 5165 "01010010" // /* MW 3 */ + 5166 "11101111" // /* MW 2 */ + 5167 "01100000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "aie_core.h" 73 15 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 7 "accum.hpp" 153 115 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 2 "reduce_base_c8.h" 570 24 first +.src_ref 3 "reduce_mean_c8_impl.h" 200 65 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5168 "01110110" // MOVA dj2, #64; MOVS p3, p1; LSHL r20, r20, r6; MOV m2, r7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5169 "01111000" // /* MW 11 */ + 5170 "11010000" // /* MW 10 */ + 5171 "00000001" // /* MW 9 */ + 5172 "01101101" // /* MW 8 */ + 5173 "01000011" // /* MW 7 */ + 5174 "00101001" // /* MW 6 */ + 5175 "10001011" // /* MW 5 */ + 5176 "10000100" // /* MW 4 */ + 5177 "10000011" // /* MW 3 */ + 5178 "00001010" // /* MW 2 */ + 5179 "00001000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 first +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 943 89 +.src_ref 2 "reduce_base_c8.h" 570 24 +.src_ref 2 "reduce_base_c8.h" 570 24 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5180 "10111010" // VLDA.2D bmll1, [p3], d2; LSHL r19, r19, r6; MOV m5, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5181 "01111000" // /* MW 9 */ + 5182 "00010000" // /* MW 8 */ + 5183 "10000101" // /* MW 7 */ + 5184 "01101110" // /* MW 6 */ + 5185 "00110011" // /* MW 5 */ + 5186 "00100111" // /* MW 4 */ + 5187 "10110000" // /* MW 3 */ + 5188 "00010010" // /* MW 2 */ + 5189 "01101010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 90 15 first +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 943 89 +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 2 "reduce_base_c8.h" 570 24 +.src_ref 2 "reduce_base_c8.h" 570 24 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5190 "10111010" // VLDA.CONV.fp32.bf16 bmll4, [p0], m5; LSHL r19, r21, r6; MOV m6, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5191 "01111000" // /* MW 9 */ + 5192 "11010000" // /* MW 8 */ + 5193 "00000100" // /* MW 7 */ + 5194 "01101111" // /* MW 6 */ + 5195 "00110011" // /* MW 5 */ + 5196 "00101011" // /* MW 4 */ + 5197 "00110000" // /* MW 3 */ + 5198 "01000001" // /* MW 2 */ + 5199 "00010101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 90 15 +.src_ref 2 "reduce_base_c8.h" 391 8 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5200 "00100100" // LSHL r17, r17, r6; ADD.NC lc, r18, #-2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5201 "11111110" // /* MW 5 */ + 5202 "11110010" // /* MW 4 */ + 5203 "10111010" // /* MW 3 */ + 5204 "01001101" // /* MW 2 */ + 5205 "10001100" // /* MW 1 */ +.src_ref 6 "aie_core.h" 90 15 +.src_ref 6 "aie_core.h" 90 15 first +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 943 89 + 5206 "11100100" // LSHL r17, r18, r6; MOV dj0, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5207 "01000001" // /* MW 5 */ + 5208 "00010001" // /* MW 4 */ + 5209 "10110001" // /* MW 3 */ + 5210 "01001101" // /* MW 2 */ + 5211 "10010100" // /* MW 1 */ +.src_ref 6 "aie_core.h" 90 15 +.src_ref 6 "aie_core.h" 90 15 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 943 89 +.src_ref 7 "accum.hpp" 943 89 +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 2 "reduce_base_c8.h" 570 24 first +.src_ref 2 "reduce_base_c8.h" 570 24 first + 5212 "01110110" // VLDA.CONV.fp32.bf16 bmll0, [p0], m6; MOVS dc0, dc4; LSHL r6, r7, r6; MOV m0, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5213 "01111000" // /* MW 11 */ + 5214 "11010000" // /* MW 10 */ + 5215 "00000100" // /* MW 9 */ + 5216 "01101100" // /* MW 8 */ + 5217 "01100011" // /* MW 7 */ + 5218 "00001110" // /* MW 6 */ + 5219 "01001011" // /* MW 5 */ + 5220 "00010000" // /* MW 4 */ + 5221 "00110000" // /* MW 3 */ + 5222 "00000001" // /* MW 2 */ + 5223 "00011001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 90 15 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 943 89 +.src_ref 7 "accum.hpp" 943 89 +.src_ref 7 "add_accum.hpp" 19 92 first +.src_ref 2 "reduce_base_c8.h" 570 24 + 5224 "01001010" // MOVS dn0, r28; MOV m7, r6; VADD.f dm4, dm1, dm4, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5225 "00111101" // /* MW 9 */ + 5226 "00110000" // /* MW 8 */ + 5227 "00010100" // /* MW 7 */ + 5228 "11100100" // /* MW 6 */ + 5229 "00100000" // /* MW 5 */ + 5230 "00000011" // /* MW 4 */ + 5231 "01100111" // /* MW 3 */ + 5232 "10000001" // /* MW 2 */ + 5233 "00001011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 90 15 +.src_ref 6 "aie_core.h" 90 15 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 943 89 +.src_ref 7 "accum.hpp" 943 89 +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 2 "reduce_base_c8.h" 570 24 first + 5234 "10111010" // VLDA.CONV.fp32.bf16 bmll2, [p0], m7; MOVS dn4, r27; MOV dj4, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5235 "01110010" // /* MW 9 */ + 5236 "01010000" // /* MW 8 */ + 5237 "01000100" // /* MW 7 */ + 5238 "00000010" // /* MW 6 */ + 5239 "00001011" // /* MW 5 */ + 5240 "01011011" // /* MW 4 */ + 5241 "00110100" // /* MW 3 */ + 5242 "00100001" // /* MW 2 */ + 5243 "00011101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "aie_core.h" 90 15 first +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 7 "accum.hpp" 153 115 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 943 89 + 5244 "11010100" // VLDA.3D.CONV.fp32.bf16 bmll3, [p0], d0; MOV dc1, dc4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5245 "00000001" // /* MW 5 */ + 5246 "10010011" // /* MW 4 */ + 5247 "00110011" // /* MW 3 */ + 5248 "00110001" // /* MW 2 */ + 5249 "00000011" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 7 "add_accum.hpp" 19 92 first +.src_ref 2 "reduce_base_c8.h" 570 24 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 5250 "01100010" // VLDA.CONV.fp32.bf16 bmll4, [p0], m5; VADD.f dm1, dm4, dm0, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5251 "00111101" // /* MW 7 */ + 5252 "10000000" // /* MW 6 */ + 5253 "00010001" // /* MW 5 */ + 5254 "00000100" // /* MW 4 */ + 5255 "00110000" // /* MW 3 */ + 5256 "01000001" // /* MW 2 */ + 5257 "00010101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 first +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 7 "accum.hpp" 198 120 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 5258 "10011000" // VLDA.2D bmll1, [p3], d2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5259 "10010101" // /* MW 3 */ + 5260 "01010000" // /* MW 2 */ + 5261 "00000011" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5263 "00000000" // /* MW 1 */ +.src_ref 7 "add_accum.hpp" 19 92 first +.src_ref 2 "reduce_base_c8.h" 391 8 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5264 "01011010" // MOVXM ls, #5312; VADD.f dm0, dm1, dm2, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5265 "00111101" // /* MW 9 */ + 5266 "00101000" // /* MW 8 */ + 5267 "00010000" // /* MW 7 */ + 5268 "00000010" // /* MW 6 */ + 5269 "01001100" // /* MW 5 */ + 5270 "10001111" // /* MW 4 */ + 5271 "00000000" // /* MW 3 */ + 5272 "00000000" // /* MW 2 */ + 5273 "00000000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 2 "reduce_base_c8.h" 412 41 +.src_ref 2 "reduce_base_c8.h" 570 24 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 5274 "11010100" // VLDA.CONV.fp32.bf16 bmll0, [p0], m6; MOV dj3, m4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5275 "00000001" // /* MW 5 */ + 5276 "00010000" // /* MW 4 */ + 5277 "00110111" // /* MW 3 */ + 5278 "00000001" // /* MW 2 */ + 5279 "00011001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "aie_core.h" 73 15 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 5 "vector.hpp" 1285 72 +.src_ref 7 "accum.hpp" 153 115 +.src_ref 7 "accum.hpp" 153 115 +.src_ref 7 "accum.hpp" 153 115 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 7 "add_accum.hpp" 19 92 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5280 "11101011" // MOVA dj1, #64; NOPB; MOVS p4, p1; MOVX r4, #32; MOV m1, m2; VADD.f dm4, dm1, dm4, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5281 "10000001" // /* MW 15 */ + 5282 "10100001" // /* MW 14 */ + 5283 "01111000" // /* MW 13 */ + 5284 "00000000" // /* MW 12 */ + 5285 "10000010" // /* MW 11 */ + 5286 "00001000" // /* MW 10 */ + 5287 "01000100" // /* MW 9 */ + 5288 "00000000" // /* MW 8 */ + 5289 "10001011" // /* MW 7 */ + 5290 "10000100" // /* MW 6 */ + 5291 "00100100" // /* MW 5 */ + 5292 "00000000" // /* MW 4 */ + 5293 "10000000" // /* MW 3 */ + 5294 "00000110" // /* MW 2 */ + 5295 "00001000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 7 "accum.hpp" 153 115 +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 7 "add_accum.hpp" 19 92 +.src_ref 2 "reduce_base_c8.h" 391 8 first +.src_ref 2 "reduce_base_c8.h" 570 24 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5296 "11101011" // VLDA.CONV.fp32.bf16 bmll2, [p0], m7;NOPB; MOVS dn1, r26; MOVXM le, #5408; VADD.f dm2, dm0, dm3, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5297 "01100001" // /* MW 15 */ + 5298 "10010000" // /* MW 14 */ + 5299 "00010000" // /* MW 13 */ + 5300 "10010000" // /* MW 12 */ + 5301 "10111010" // /* MW 11 */ + 5302 "00000101" // /* MW 10 */ + 5303 "00000000" // /* MW 9 */ + 5304 "00000000" // /* MW 8 */ + 5305 "00001011" // /* MW 7 */ + 5306 "01011010" // /* MW 6 */ + 5307 "00100001" // /* MW 5 */ + 5308 "00000000" // /* MW 4 */ + 5309 "00110000" // /* MW 3 */ + 5310 "00100001" // /* MW 2 */ + 5311 "00011101" // /* MW 1 */ +.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_464 +.src_ref 6 "aie_core.h" 90 15 first +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 943 89 +.begin_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 5312 "10011000" // VLDA.3D.CONV.fp32.bf16 bmll3, [p0], d0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5313 "10001001" // /* MW 3 */ + 5314 "00011001" // /* MW 2 */ + 5315 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 first +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "add_accum.hpp" 19 92 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5316 "01100110" // VLDA.2D bmll1, [p3], d2; NOPB; NOPS; VADD.f dm1, dm4, dm0, r2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5317 "00111101" // /* MW 11 */ + 5318 "10000000" // /* MW 10 */ + 5319 "00010001" // /* MW 9 */ + 5320 "10001110" // /* MW 8 */ + 5321 "10101101" // /* MW 7 */ + 5322 "00000000" // /* MW 6 */ + 5323 "00100000" // /* MW 5 */ + 5324 "00000000" // /* MW 4 */ + 5325 "10110000" // /* MW 3 */ + 5326 "00010010" // /* MW 2 */ + 5327 "01101010" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 2 "reduce_base_c8.h" 570 24 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5328 "11100001" // VLDA.CONV.fp32.bf16 bmll4, [p0], m5;NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5329 "00000000" // /* MW 15 */ + 5330 "00000000" // /* MW 14 */ + 5331 "01111000" // /* MW 13 */ + 5332 "10100101" // /* MW 12 */ + 5333 "00000001" // /* MW 11 */ + 5334 "00000000" // /* MW 10 */ + 5335 "00000000" // /* MW 9 */ + 5336 "00000000" // /* MW 8 */ + 5337 "01011011" // /* MW 7 */ + 5338 "00000001" // /* MW 6 */ + 5339 "00100000" // /* MW 5 */ + 5340 "00000000" // /* MW 4 */ + 5341 "00110000" // /* MW 3 */ + 5342 "01000001" // /* MW 2 */ + 5343 "00010101" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5344 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5345 "00000000" // /* MW 15 */ + 5346 "00000000" // /* MW 14 */ + 5347 "01111000" // /* MW 13 */ + 5348 "10100101" // /* MW 12 */ + 5349 "00000001" // /* MW 11 */ + 5350 "00000000" // /* MW 10 */ + 5351 "00000000" // /* MW 9 */ + 5352 "00000000" // /* MW 8 */ + 5353 "01011011" // /* MW 7 */ + 5354 "00000001" // /* MW 6 */ + 5355 "00100000" // /* MW 5 */ + 5356 "00000000" // /* MW 4 */ + 5357 "11110000" // /* MW 3 */ + 5358 "00101100" // /* MW 2 */ + 5359 "00000000" // /* MW 1 */ +.src_ref 7 "add_accum.hpp" 19 92 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5360 "11101011" // NOPA; NOPB; NOPS; NOPX; NOPM; VADD.f dm0, dm1, dm2, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5361 "01000001" // /* MW 15 */ + 5362 "10000001" // /* MW 14 */ + 5363 "01111000" // /* MW 13 */ + 5364 "10100101" // /* MW 12 */ + 5365 "00000001" // /* MW 11 */ + 5366 "00000000" // /* MW 10 */ + 5367 "00000000" // /* MW 9 */ + 5368 "00000000" // /* MW 8 */ + 5369 "01011011" // /* MW 7 */ + 5370 "00000001" // /* MW 6 */ + 5371 "00100000" // /* MW 5 */ + 5372 "00000000" // /* MW 4 */ + 5373 "11110000" // /* MW 3 */ + 5374 "00101100" // /* MW 2 */ + 5375 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 first +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 5 "vector.hpp" 1159 33 first +.src_ref 7 "accum.hpp" 153 115 first +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 2 "reduce_base_c8.h" 570 24 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5376 "11100001" // VLDA.CONV.fp32.bf16 bmll0, [p0], m6;NOPB; VST.2D bmll2, [p4], d1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5377 "00000000" // /* MW 15 */ + 5378 "00000000" // /* MW 14 */ + 5379 "01111000" // /* MW 13 */ + 5380 "10100101" // /* MW 12 */ + 5381 "00000001" // /* MW 11 */ + 5382 "00000000" // /* MW 10 */ + 5383 "00000000" // /* MW 9 */ + 5384 "10000000" // /* MW 8 */ + 5385 "00000110" // /* MW 7 */ + 5386 "00110001" // /* MW 6 */ + 5387 "00100100" // /* MW 5 */ + 5388 "00000000" // /* MW 4 */ + 5389 "00110000" // /* MW 3 */ + 5390 "00000001" // /* MW 2 */ + 5391 "00011001" // /* MW 1 */ +.src_ref 7 "add_accum.hpp" 19 92 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5392 "11101011" // NOPA; NOPB; NOPS; NOPX; NOPM; VADD.f dm4, dm1, dm4, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5393 "10000001" // /* MW 15 */ + 5394 "10100001" // /* MW 14 */ + 5395 "01111000" // /* MW 13 */ + 5396 "10100101" // /* MW 12 */ + 5397 "00000001" // /* MW 11 */ + 5398 "00000000" // /* MW 10 */ + 5399 "00000000" // /* MW 9 */ + 5400 "00000000" // /* MW 8 */ + 5401 "01011011" // /* MW 7 */ + 5402 "00000001" // /* MW 6 */ + 5403 "00100000" // /* MW 5 */ + 5404 "00000000" // /* MW 4 */ + 5405 "11110000" // /* MW 3 */ + 5406 "00101100" // /* MW 2 */ + 5407 "00000000" // /* MW 1 */ +.label ZLE_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_560 +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 7 "add_accum.hpp" 19 92 +.src_ref 2 "reduce_base_c8.h" 570 24 first +.end_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5408 "11101011" // VLDA.CONV.fp32.bf16 bmll2, [p0], m7;NOPB; NOPS; NOPX; NOPM; VADD.f dm2, dm0, dm3, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5409 "01100001" // /* MW 15 */ + 5410 "10010000" // /* MW 14 */ + 5411 "01111000" // /* MW 13 */ + 5412 "10100101" // /* MW 12 */ + 5413 "00000001" // /* MW 11 */ + 5414 "00000000" // /* MW 10 */ + 5415 "00000000" // /* MW 9 */ + 5416 "00000000" // /* MW 8 */ + 5417 "01011011" // /* MW 7 */ + 5418 "00000001" // /* MW 6 */ + 5419 "00100000" // /* MW 5 */ + 5420 "00000000" // /* MW 4 */ + 5421 "00110000" // /* MW 3 */ + 5422 "00100001" // /* MW 2 */ + 5423 "00011101" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 107 23 +.src_ref 2 "reduce_base_c8.h" 412 41 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 5424 "10111010" // LDA.u16 r1, [p7, dj3]; MOVXM r5, #16256 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5425 "00010000" // /* MW 9 */ + 5426 "11000000" // /* MW 8 */ + 5427 "10101111" // /* MW 7 */ + 5428 "00001100" // /* MW 6 */ + 5429 "00000000" // /* MW 5 */ + 5430 "00000000" // /* MW 4 */ + 5431 "01010000" // /* MW 3 */ + 5432 "00000111" // /* MW 2 */ + 5433 "11101100" // /* MW 1 */ +.src_ref 6 "aie_core.h" 90 15 first +.src_ref 6 "me_vmult_float_emulated.h" 107 23 first +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 7 "add_accum.hpp" 19 92 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5434 "01001010" // VLDA.3D.CONV.fp32.bf16 bmll3, [p0], d0; VBCST.16 x4, r5; VADD.f dm1, dm4, dm0, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5435 "00111101" // /* MW 9 */ + 5436 "10000000" // /* MW 8 */ + 5437 "00010001" // /* MW 7 */ + 5438 "11100010" // /* MW 6 */ + 5439 "01110010" // /* MW 5 */ + 5440 "00010101" // /* MW 4 */ + 5441 "00110010" // /* MW 3 */ + 5442 "00110001" // /* MW 2 */ + 5443 "00000011" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 101 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5444 "11111000" // VBCST.16 x0, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5445 "01110010" // /* MW 3 */ + 5446 "01000001" // /* MW 2 */ + 5447 "00011000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5449 "00000000" // /* MW 1 */ +.src_ref 7 "add_accum.hpp" 19 92 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5450 "01001000" // VADD.f dm0, dm1, dm2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5451 "00111101" // /* MW 3 */ + 5452 "00101000" // /* MW 2 */ + 5453 "00010000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 first +.src_ref 5 "vector.hpp" 1159 33 first +.src_ref 7 "accum.hpp" 153 115 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5454 "10011000" // VST.2D bmll2, [p4], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5455 "00000110" // /* MW 3 */ + 5456 "00110001" // /* MW 2 */ + 5457 "00001100" // /* MW 1 */ + 5458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5459 "00000000" // /* MW 1 */ +.src_ref 7 "add_accum.hpp" 19 92 first +.src_ref 2 "reduce_base_c8.h" 412 52 first + 5460 "01100010" // ADD r5, r1, #-1; VADD.f dm2, dm0, dm3, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5461 "00111101" // /* MW 7 */ + 5462 "00001100" // /* MW 6 */ + 5463 "00010010" // /* MW 5 */ + 5464 "11111001" // /* MW 4 */ + 5465 "01011111" // /* MW 3 */ + 5466 "00000010" // /* MW 2 */ + 5467 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 412 31 + 5468 "10011000" // NE r0, r5, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5469 "00001000" // /* MW 3 */ + 5470 "01000000" // /* MW 2 */ + 5471 "00010001" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 412 16 + 5472 "10000100" // JNZ r0, #6368 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6368 delay_slots=5 */ + 5473 "00000001" // /* MW 5 */ + 5474 "01000000" // /* MW 4 */ + 5475 "01110000" // /* MW 3 */ + 5476 "00001100" // /* MW 2 */ + 5477 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5479 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5481 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5483 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 first +.src_ref 5 "vector.hpp" 1159 33 first +.src_ref 7 "accum.hpp" 153 115 first +.delay_slot + 5484 "10011000" // VST.2D bmll2, [p4], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5485 "00000110" // /* MW 3 */ + 5486 "00110001" // /* MW 2 */ + 5487 "00001100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5489 "00000000" // /* MW 1 */ +.src_ref 5 "broadcast.hpp" 80 25 first +.src_ref 3 "reduce_mean_c8_impl.h" 184 15 first +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 5490 "00101100" // LDA r6, [p2, #12]; MOVX r5, #3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5491 "00011010" // /* MW 5 */ + 5492 "00010100" // /* MW 4 */ + 5493 "11010000" // /* MW 3 */ + 5494 "10011010" // /* MW 2 */ + 5495 "01000110" // /* MW 1 */ + 5496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5497 "00000000" // /* MW 1 */ + 5498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5499 "00000000" // /* MW 1 */ + 5500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5501 "00000000" // /* MW 1 */ + 5502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5503 "00000000" // /* MW 1 */ + 5504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5505 "00000000" // /* MW 1 */ + 5506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5507 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 5508 "10011000" // GE r7, r5, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5509 "01101001" // /* MW 3 */ + 5510 "01001110" // /* MW 2 */ + 5511 "00010001" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 5512 "10000100" // JNZ r7, #7296 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7296 delay_slots=5 */ + 5513 "00000001" // /* MW 5 */ + 5514 "01000000" // /* MW 4 */ + 5515 "01000000" // /* MW 3 */ + 5516 "00001110" // /* MW 2 */ + 5517 "00111000" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 +.delay_slot + 5518 "00011000" // MOVX r0, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5519 "00010001" // /* MW 3 */ + 5520 "00000000" // /* MW 2 */ + 5521 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5523 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5525 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5527 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5529 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 5530 "10011000" // NE r5, r6, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5531 "00001000" // /* MW 3 */ + 5532 "10001010" // /* MW 2 */ + 5533 "00010001" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 5534 "10000100" // JNZ r5, #6512 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6512 delay_slots=5 */ + 5535 "00000001" // /* MW 5 */ + 5536 "01000000" // /* MW 4 */ + 5537 "10111000" // /* MW 3 */ + 5538 "00001100" // /* MW 2 */ + 5539 "00101000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5541 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5543 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5545 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5547 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5549 "00000000" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 + 5550 "11100100" // MOVX r17, #257; MOV dc4, lr /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5551 "11000001" // /* MW 5 */ + 5552 "10000011" // /* MW 4 */ + 5553 "10101001" // /* MW 3 */ + 5554 "01000000" // /* MW 2 */ + 5555 "00100100" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 + 5556 "01000100" // MOVXM r21, #65535 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5557 "11111110" // /* MW 5 */ + 5558 "10111111" // /* MW 4 */ + 5559 "11111010" // /* MW 3 */ + 5560 "00000000" // /* MW 2 */ + 5561 "00000000" // /* MW 1 */ +.src_ref 5 "blend.hpp" 163 48 + 5562 "00101100" // NOPA; MOVX r20, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5563 "00000010" // /* MW 5 */ + 5564 "01010000" // /* MW 4 */ + 5565 "11110000" // /* MW 3 */ + 5566 "00101100" // /* MW 2 */ + 5567 "00000000" // /* MW 1 */ +.label __ll91__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 +.src_ref 6 "me_vmult_float_emulated.h" 117 42 +.src_ref 6 "me_vmult_float_emulated.h" 118 6 +.src_ref 6 "me_vmult_float_emulated.h" 118 9 +.src_ref 6 "me_vmult_float_emulated.h" 119 6 +.src_ref 6 "me_vmult_float_emulated.h" 119 9 +.src_ref 6 "me_vmult_float_emulated.h" 120 6 +.src_ref 6 "me_vmult_float_emulated.h" 120 9 +.src_ref 6 "me_vmult_float_emulated.h" 121 6 +.src_ref 6 "me_vmult_float_emulated.h" 121 9 +.src_ref 6 "me_vmult_float_emulated.h" 122 6 +.src_ref 6 "me_vmult_float_emulated.h" 122 9 +.src_ref 6 "me_vmult_float_emulated.h" 123 6 +.src_ref 6 "me_vmult_float_emulated.h" 123 9 +.src_ref 6 "me_vmult_float_emulated.h" 124 6 +.src_ref 6 "me_vmult_float_emulated.h" 124 9 +.src_ref 6 "me_vmult_float_emulated.h" 125 6 +.src_ref 6 "me_vmult_float_emulated.h" 125 9 +.src_ref 5 "add.hpp" 28 49 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 3 "reduce_mean_c8_impl.h" 200 49 +.src_ref 3 "reduce_mean_c8_impl.h" 200 65 +.src_ref 3 "reduce_mean_c8_impl.h" 200 65 +.src_ref 3 "reduce_mean_c8_impl.h" 223 35 + 5568 "01110110" // MOVA dj2, #64; MOVS p2, r3; MOVX r5, #16; MOV r2, #60 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5569 "01011000" // /* MW 11 */ + 5570 "00111100" // /* MW 10 */ + 5571 "01001000" // /* MW 9 */ + 5572 "00001000" // /* MW 8 */ + 5573 "01010010" // /* MW 7 */ + 5574 "00000000" // /* MW 6 */ + 5575 "00001011" // /* MW 5 */ + 5576 "10000011" // /* MW 4 */ + 5577 "10000010" // /* MW 3 */ + 5578 "00001010" // /* MW 2 */ + 5579 "00001000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1285 72 +.src_ref 3 "reduce_mean_c8_impl.h" 200 65 first + 5580 "00101100" // LDA.s16 r6, [p2, dj2]; MOVX r4, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5581 "00000010" // /* MW 5 */ + 5582 "00010001" // /* MW 4 */ + 5583 "01010000" // /* MW 3 */ + 5584 "00011010" // /* MW 2 */ + 5585 "01001000" // /* MW 1 */ + 5586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5587 "00000000" // /* MW 1 */ + 5588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5589 "00000000" // /* MW 1 */ + 5590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5591 "00000000" // /* MW 1 */ + 5592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5593 "00000000" // /* MW 1 */ + 5594 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5595 "00000000" // /* MW 1 */ + 5596 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5597 "01100111" // /* MW 3 */ + 5598 "00000001" // /* MW 2 */ + 5599 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 200 49 + 5600 "11100001" // NOPA; NOPB; NOPS; ASHL r5, r6, r5; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5601 "00000000" // /* MW 15 */ + 5602 "00000000" // /* MW 14 */ + 5603 "01111000" // /* MW 13 */ + 5604 "10100101" // /* MW 12 */ + 5605 "00000001" // /* MW 11 */ + 5606 "11110100" // /* MW 10 */ + 5607 "01010010" // /* MW 9 */ + 5608 "00001100" // /* MW 8 */ + 5609 "01011011" // /* MW 7 */ + 5610 "00000001" // /* MW 6 */ + 5611 "00100000" // /* MW 5 */ + 5612 "00000000" // /* MW 4 */ + 5613 "11110000" // /* MW 3 */ + 5614 "00101100" // /* MW 2 */ + 5615 "00000000" // /* MW 1 */ +.label __ll93__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E +.src_ref 3 "reduce_mean_c8_impl.h" 223 35 + 5616 "01110110" // MOVA dj2, #36; ST dn3, [sp, #-4]; MOVXM p7, #509168 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5617 "00010000" // /* MW 11 */ + 5618 "01111000" // /* MW 10 */ + 5619 "10110010" // /* MW 9 */ + 5620 "11110011" // /* MW 8 */ + 5621 "00000001" // /* MW 7 */ + 5622 "10000000" // /* MW 6 */ + 5623 "10100101" // /* MW 5 */ + 5624 "11111101" // /* MW 4 */ + 5625 "10000111" // /* MW 3 */ + 5626 "10001010" // /* MW 2 */ + 5627 "00000100" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1285 72 +.src_ref 5 "vector.hpp" 1289 16 + 5628 "01110110" // LDA.s8 r23, [p7]; ST dc4, [sp, #-8]; MOVX r5, #0; VBCST.32 x2, r5 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5629 "01111000" // /* MW 11 */ + 5630 "00111001" // /* MW 10 */ + 5631 "10001011" // /* MW 9 */ + 5632 "00001000" // /* MW 8 */ + 5633 "01010000" // /* MW 7 */ + 5634 "10000000" // /* MW 6 */ + 5635 "01100101" // /* MW 5 */ + 5636 "11111010" // /* MW 4 */ + 5637 "01010111" // /* MW 3 */ + 5638 "11011100" // /* MW 2 */ + 5639 "11100000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 112 6 +.src_ref 6 "me_vmult_float_emulated.h" 112 19 +.src_ref 5 "vector.hpp" 57 98 +.src_ref 5 "vector.hpp" 57 98 +.src_ref 5 "vector.hpp" 1280 49 +.src_ref 5 "vector.hpp" 1285 72 +.src_ref 5 "vector.hpp" 1287 41 +.src_ref 5 "vector.hpp" 1288 16 +.src_ref 5 "vector.hpp" 1289 16 +.src_ref 5 "vector.hpp" 1292 26 +.src_ref 3 "reduce_mean_c8_impl.h" 223 35 first +.src_ref 3 "reduce_mean_c8_impl.h" 268 19 + 5640 "01110110" // LDA r6, [p2, dj2]; MOVS p7, p1; MOVX r22, #-1; VMOV bmll0, x2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5641 "01111000" // /* MW 11 */ + 5642 "01001001" // /* MW 10 */ + 5643 "00000010" // /* MW 9 */ + 5644 "11101000" // /* MW 8 */ + 5645 "01100111" // /* MW 7 */ + 5646 "00111111" // /* MW 6 */ + 5647 "10001011" // /* MW 5 */ + 5648 "10000100" // /* MW 4 */ + 5649 "11010111" // /* MW 3 */ + 5650 "00011010" // /* MW 2 */ + 5651 "01001000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 915 23 +.src_ref 5 "vector.hpp" 915 23 +.src_ref 5 "vector.hpp" 1280 49 + 5652 "10111010" // MOVA r24, #31; MOVX vaddSign0, #1; VMOV bmll2, x2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5653 "01111000" // /* MW 9 */ + 5654 "01001001" // /* MW 8 */ + 5655 "00000010" // /* MW 7 */ + 5656 "00000001" // /* MW 6 */ + 5657 "11010010" // /* MW 5 */ + 5658 "00000010" // /* MW 4 */ + 5659 "00000000" // /* MW 3 */ + 5660 "11111000" // /* MW 2 */ + 5661 "00000011" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 3 "reduce_mean_c8_impl.h" 223 9 first + 5662 "10111010" // MOVA r25, #16; MOVXM ls, #5760 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5663 "00010000" // /* MW 9 */ + 5664 "01000000" // /* MW 8 */ + 5665 "01111011" // /* MW 7 */ + 5666 "00000100" // /* MW 6 */ + 5667 "00000000" // /* MW 5 */ + 5668 "00000000" // /* MW 4 */ + 5669 "00000000" // /* MW 3 */ + 5670 "00011001" // /* MW 2 */ + 5671 "00000010" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 223 9 + 5672 "10111010" // VLDA wl2, [sp, #-32]; MOVXM le, #6336 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5673 "00010000" // /* MW 9 */ + 5674 "01100000" // /* MW 8 */ + 5675 "10111100" // /* MW 7 */ + 5676 "00000101" // /* MW 6 */ + 5677 "00000000" // /* MW 5 */ + 5678 "00000000" // /* MW 4 */ + 5679 "10110000" // /* MW 3 */ + 5680 "10010100" // /* MW 2 */ + 5681 "11111111" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1286 98 + 5682 "00011000" // MOVX r26, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5683 "00000001" // /* MW 3 */ + 5684 "01110100" // /* MW 2 */ + 5685 "00010000" // /* MW 1 */ + 5686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5687 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 112 6 +.src_ref 6 "me_vmult_float_emulated.h" 112 19 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 114 6 +.src_ref 6 "me_vmult_float_emulated.h" 114 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 6 +.src_ref 6 "me_vmult_float_emulated.h" 115 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 +.src_ref 5 "vector.hpp" 1286 72 +.src_ref 7 "accum.hpp" 1108 103 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 5688 "00011000" // MOVX crRnd, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5689 "10000000" // /* MW 3 */ + 5690 "11111010" // /* MW 2 */ + 5691 "00010101" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 112 6 first +.src_ref 6 "me_vmult_float_emulated.h" 112 19 first +.src_ref 3 "reduce_mean_c8_impl.h" 223 9 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 5692 "00000010" // VCONV.bf16.fp32 wl0, bmll0; ADD.NC lc, r6, #0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5693 "00000000" // /* MW 7 */ + 5694 "10000000" // /* MW 6 */ + 5695 "10111001" // /* MW 5 */ + 5696 "00000010" // /* MW 4 */ + 5697 "11000000" // /* MW 3 */ + 5698 "00000010" // /* MW 2 */ + 5699 "00001000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 114 6 +.src_ref 6 "me_vmult_float_emulated.h" 114 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 6 +.src_ref 6 "me_vmult_float_emulated.h" 115 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5700 "11111000" // VMOV x3, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5701 "10010010" // /* MW 3 */ + 5702 "10100000" // /* MW 2 */ + 5703 "00011001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 first +.src_ref 6 "me_vmult_float_emulated.h" 115 6 +.src_ref 6 "me_vmult_float_emulated.h" 115 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 + 5704 "01100010" // VMOV x5, x3; VMSC.f dm0, dm2, x0, x4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5705 "10000011" // /* MW 7 */ + 5706 "01000000" // /* MW 6 */ + 5707 "00010000" // /* MW 5 */ + 5708 "11100110" // /* MW 4 */ + 5709 "10010010" // /* MW 3 */ + 5710 "10100110" // /* MW 2 */ + 5711 "00000010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 + 5712 "11111000" // VMOV x6, x5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5713 "10010010" // /* MW 3 */ + 5714 "00101010" // /* MW 2 */ + 5715 "00011011" // /* MW 1 */ + 5716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5717 "00000000" // /* MW 1 */ + 5718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5719 "00000000" // /* MW 1 */ + 5720 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5721 "00000000" // /* MW 1 */ + 5722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5723 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 114 6 first +.src_ref 6 "me_vmult_float_emulated.h" 114 19 first + 5724 "00011000" // VCONV.bf16.fp32 wl3, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5725 "00010110" // /* MW 3 */ + 5726 "11000000" // /* MW 2 */ + 5727 "00001001" // /* MW 1 */ + 5728 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5729 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 115 34 first + 5730 "01001000" // VMSC.f dm0, dm0, x3, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5731 "10000011" // /* MW 3 */ + 5732 "00000110" // /* MW 2 */ + 5733 "00010000" // /* MW 1 */ + 5734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5735 "00000000" // /* MW 1 */ + 5736 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5737 "00000000" // /* MW 1 */ + 5738 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5739 "00000000" // /* MW 1 */ + 5740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5741 "00000000" // /* MW 1 */ + 5742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5743 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 115 6 +.src_ref 6 "me_vmult_float_emulated.h" 115 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "add_reduce.hpp" 322 47 + 5744 "11100001" // NOPA; NOPB; VCONV.bf16.fp32 wl5, bmll0; MOVX r7, #8; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5745 "00000000" // /* MW 15 */ + 5746 "00000000" // /* MW 14 */ + 5747 "01111000" // /* MW 13 */ + 5748 "10100101" // /* MW 12 */ + 5749 "00000001" // /* MW 11 */ + 5750 "00001000" // /* MW 10 */ + 5751 "01110001" // /* MW 9 */ + 5752 "00000000" // /* MW 8 */ + 5753 "00010110" // /* MW 7 */ + 5754 "11000000" // /* MW 6 */ + 5755 "00100010" // /* MW 5 */ + 5756 "00000000" // /* MW 4 */ + 5757 "11110000" // /* MW 3 */ + 5758 "00101100" // /* MW 2 */ + 5759 "00000000" // /* MW 1 */ +.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_912 +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 3 "reduce_mean_c8_impl.h" 226 22 first +.begin_of_loop +.loop_nesting 1 + 5760 "11110100" // VLDB x7, [p1], #64; VMOV bmhh4, x9 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5761 "00100101" // /* MW 5 */ + 5762 "10100101" // /* MW 4 */ + 5763 "10001001" // /* MW 3 */ + 5764 "10111110" // /* MW 2 */ + 5765 "00100011" // /* MW 1 */ + 5766 "11111000" // VMOV bmhh3, x11 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5767 "10010010" // /* MW 3 */ + 5768 "11010110" // /* MW 2 */ + 5769 "00011011" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1280 49 + 5770 "11111000" // MOV r28, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5771 "11000000" // /* MW 3 */ + 5772 "00011110" // /* MW 2 */ + 5773 "00011111" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1280 49 first + 5774 "10011000" // AND r29, r28, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5775 "10000100" // /* MW 3 */ + 5776 "00111011" // /* MW 2 */ + 5777 "00010111" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1285 72 first +.src_ref 5 "vector.hpp" 1285 72 first + 5778 "00100100" // LT r27, r29, r4; ADD.NC r28, r29, #-32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5779 "11100000" // /* MW 5 */ + 5780 "00111101" // /* MW 4 */ + 5781 "01011110" // /* MW 3 */ + 5782 "11001001" // /* MW 2 */ + 5783 "11101110" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1285 72 + 5784 "10011000" // LSHL r30, r22, r29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5785 "11011101" // /* MW 3 */ + 5786 "10111101" // /* MW 2 */ + 5787 "00010101" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1286 98 first + 5788 "10011000" // SUB r31, r26, r29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5789 "11010001" // /* MW 3 */ + 5790 "10111111" // /* MW 2 */ + 5791 "00010110" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1285 72 first +.src_ref 5 "add_reduce.hpp" 322 47 first + 5792 "10100100" // SEL.EQZ r30, r5, r30, r27; VSHIFT x8, x7, x0, r25 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5793 "11001101" // /* MW 5 */ + 5794 "01110000" // /* MW 4 */ + 5795 "01001000" // /* MW 3 */ + 5796 "10111100" // /* MW 2 */ + 5797 "00101111" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first + 5798 "11111000" // VMOV bmll4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5799 "10010010" // /* MW 3 */ + 5800 "00010000" // /* MW 2 */ + 5801 "00011100" // /* MW 1 */ +.src_ref 5 "vector.hpp" 142 95 first +.src_ref 7 "accum.hpp" 198 120 + 5802 "11111000" // VMOV wl8, wh7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5803 "00100010" // /* MW 3 */ + 5804 "01001110" // /* MW 2 */ + 5805 "00011100" // /* MW 1 */ +.src_ref 5 "vector.hpp" 142 95 +.src_ref 7 "accum.hpp" 198 120 first + 5806 "11111000" // VMOV wl10, wl7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5807 "00100010" // /* MW 3 */ + 5808 "01001111" // /* MW 2 */ + 5809 "00011101" // /* MW 1 */ + 5810 "11111000" // VMOV bmhl4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5811 "10010010" // /* MW 3 */ + 5812 "10010000" // /* MW 2 */ + 5813 "00011100" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 5814 "11111000" // VMOV bmhl3, x10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5815 "10010010" // /* MW 3 */ + 5816 "10010100" // /* MW 2 */ + 5817 "00011011" // /* MW 1 */ +.src_ref 7 "accum.hpp" 199 120 first +.src_ref 5 "add.hpp" 28 49 first +.aggressive_scheduled_block_id 4 +.noswbrkpt + 5818 "01100010" // VMOV cml2, cmh4; VADD.f dm3, dm1, dm2, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5819 "00111101" // /* MW 7 */ + 5820 "00101000" // /* MW 6 */ + 5821 "00010011" // /* MW 5 */ + 5822 "11100110" // /* MW 4 */ + 5823 "10001010" // /* MW 3 */ + 5824 "00010010" // /* MW 2 */ + 5825 "00000010" // /* MW 1 */ +.src_ref 7 "accum.hpp" 199 120 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5826 "11111000" // VMOV cml1, cmh3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5827 "10001010" // /* MW 3 */ + 5828 "00001110" // /* MW 2 */ + 5829 "00011001" // /* MW 1 */ +.src_ref 5 "vector.hpp" 142 95 first +.src_ref 5 "vector.hpp" 243 115 first +.src_ref 5 "add_reduce.hpp" 324 44 first +.aggressive_scheduled_block_id 4 +.noswbrkpt + 5830 "01100010" // VMOV wl8, wh7; VADD.f dm2, dm2, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5831 "00111101" // /* MW 7 */ + 5832 "01010000" // /* MW 6 */ + 5833 "00010010" // /* MW 5 */ + 5834 "11100110" // /* MW 4 */ + 5835 "00100010" // /* MW 3 */ + 5836 "01001110" // /* MW 2 */ + 5837 "00000100" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5838 "11111000" // VMOV bmll2, x7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5839 "10010010" // /* MW 3 */ + 5840 "00001110" // /* MW 2 */ + 5841 "00011010" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 5842 "11011000" // VSHIFT x9, x8, x0, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5843 "01100110" // /* MW 3 */ + 5844 "11000000" // /* MW 2 */ + 5845 "00011100" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 5 "add_reduce.hpp" 324 44 first +.aggressive_scheduled_block_id 5 +.noswbrkpt + 5846 "01100010" // VMOV bmll1, x8; VADD.f dm4, dm1, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5847 "00111101" // /* MW 7 */ + 5848 "00110000" // /* MW 6 */ + 5849 "00010100" // /* MW 5 */ + 5850 "11100110" // /* MW 4 */ + 5851 "10010010" // /* MW 3 */ + 5852 "00010000" // /* MW 2 */ + 5853 "00000001" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5854 "11111000" // VMOV bmll4, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5855 "10010010" // /* MW 3 */ + 5856 "00010010" // /* MW 2 */ + 5857 "00011100" // /* MW 1 */ +.src_ref 7 "accum.hpp" 151 136 first + 5858 "11111000" // VMOV x8, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5859 "00010010" // /* MW 3 */ + 5860 "00101100" // /* MW 2 */ + 5861 "00011100" // /* MW 1 */ +.src_ref 5 "vector.hpp" 243 115 first +.src_ref 7 "accum.hpp" 151 115 + 5862 "11111000" // VMOV wl9, wl8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5863 "00100010" // /* MW 3 */ + 5864 "11010001" // /* MW 2 */ + 5865 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 5866 "11011000" // VSHIFT x8, x9, x0, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5867 "01100110" // /* MW 3 */ + 5868 "01001000" // /* MW 2 */ + 5869 "00011100" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 5 "add_reduce.hpp" 324 44 first +.aggressive_scheduled_block_id 6 +.noswbrkpt + 5870 "01100010" // VMOV bmll1, x8; VADD.f dm1, dm3, dm1, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5871 "00111101" // /* MW 7 */ + 5872 "01100100" // /* MW 6 */ + 5873 "00010001" // /* MW 5 */ + 5874 "11100110" // /* MW 4 */ + 5875 "10010010" // /* MW 3 */ + 5876 "00010000" // /* MW 2 */ + 5877 "00000001" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5878 "11111000" // VMOV bmll3, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5879 "10010010" // /* MW 3 */ + 5880 "00010010" // /* MW 2 */ + 5881 "00011011" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 + 5882 "11111000" // VMOV x8, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5883 "00010010" // /* MW 3 */ + 5884 "00101000" // /* MW 2 */ + 5885 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 5886 "11011000" // VSHIFT x10, x8, x0, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5887 "00011110" // /* MW 3 */ + 5888 "01000000" // /* MW 2 */ + 5889 "00011101" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first +.src_ref 5 "add_reduce.hpp" 324 44 first +.aggressive_scheduled_block_id 7 +.noswbrkpt + 5890 "01100010" // VMOV x8, bmll4; VADD.f dm2, dm2, dm3, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5891 "00111101" // /* MW 7 */ + 5892 "01001100" // /* MW 6 */ + 5893 "00010010" // /* MW 5 */ + 5894 "11100110" // /* MW 4 */ + 5895 "00010010" // /* MW 3 */ + 5896 "00110000" // /* MW 2 */ + 5897 "00000100" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 5898 "11111000" // VMOV bmll3, x10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5899 "10010010" // /* MW 3 */ + 5900 "00010100" // /* MW 2 */ + 5901 "00011011" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.src_ref 5 "add_reduce.hpp" 324 44 first +.aggressive_scheduled_block_id 7 +.noswbrkpt + 5902 "01100010" // VSHIFT x8, x8, x0, r7; VADD.f dm3, dm4, dm3, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5903 "00111101" // /* MW 7 */ + 5904 "10001100" // /* MW 6 */ + 5905 "00010011" // /* MW 5 */ + 5906 "11000110" // /* MW 4 */ + 5907 "00011110" // /* MW 3 */ + 5908 "01000000" // /* MW 2 */ + 5909 "00000100" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5910 "11111000" // VMOV bmll3, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5911 "10010010" // /* MW 3 */ + 5912 "00010000" // /* MW 2 */ + 5913 "00011011" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 5914 "11111000" // VMOV x8, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5915 "00010010" // /* MW 3 */ + 5916 "00100100" // /* MW 2 */ + 5917 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.src_ref 5 "add_reduce.hpp" 324 44 +.aggressive_scheduled_block_id 8 +.noswbrkpt + 5918 "01100010" // VSHIFT x8, x8, x0, r7; VADD.f dm1, dm1, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5919 "00111101" // /* MW 7 */ + 5920 "00110000" // /* MW 6 */ + 5921 "00010001" // /* MW 5 */ + 5922 "11000110" // /* MW 4 */ + 5923 "00011110" // /* MW 3 */ + 5924 "01000000" // /* MW 2 */ + 5925 "00000100" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5926 "11111000" // VMOV bmll4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5927 "10010010" // /* MW 3 */ + 5928 "00010000" // /* MW 2 */ + 5929 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id first + 5930 "11111000" // VMOV x8, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5931 "00010010" // /* MW 3 */ + 5932 "00101000" // /* MW 2 */ + 5933 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.src_ref 5 "add_reduce.hpp" 324 44 +.aggressive_scheduled_block_id 9 +.noswbrkpt + 5934 "01100010" // VSHIFT x8, x8, x0, r0; VADD.f dm2, dm2, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5935 "00111101" // /* MW 7 */ + 5936 "01010000" // /* MW 6 */ + 5937 "00010010" // /* MW 5 */ + 5938 "11000110" // /* MW 4 */ + 5939 "00000010" // /* MW 3 */ + 5940 "01000000" // /* MW 2 */ + 5941 "00000100" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5942 "11111000" // VMOV bmll4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5943 "10010010" // /* MW 3 */ + 5944 "00010000" // /* MW 2 */ + 5945 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first + 5946 "11111000" // VMOV x8, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5947 "00010010" // /* MW 3 */ + 5948 "00101100" // /* MW 2 */ + 5949 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id first + 5950 "11011000" // VSHIFT x8, x8, x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5951 "00000010" // /* MW 3 */ + 5952 "01000000" // /* MW 2 */ + 5953 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first +.src_ref 5 "add_reduce.hpp" 324 44 first +.aggressive_scheduled_block_id 10 +.noswbrkpt + 5954 "01100010" // VMOV x10, bmll1; VADD.f dm3, dm3, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5955 "00111101" // /* MW 7 */ + 5956 "01110000" // /* MW 6 */ + 5957 "00010011" // /* MW 5 */ + 5958 "11100110" // /* MW 4 */ + 5959 "00010010" // /* MW 3 */ + 5960 "00100100" // /* MW 2 */ + 5961 "00000101" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 10 +.nohwbrkpt +.noswbrkpt + 5962 "11111000" // VMOV bmll4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5963 "10010010" // /* MW 3 */ + 5964 "00010000" // /* MW 2 */ + 5965 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.src_ref 5 "add_reduce.hpp" 324 44 first +.aggressive_scheduled_block_id 10 +.noswbrkpt + 5966 "01100010" // VSHIFT x10, x10, x0, r0; VADD.f dm0, dm1, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5967 "00111101" // /* MW 7 */ + 5968 "00110000" // /* MW 6 */ + 5969 "00010000" // /* MW 5 */ + 5970 "11000110" // /* MW 4 */ + 5971 "00000010" // /* MW 3 */ + 5972 "01010000" // /* MW 2 */ + 5973 "00000101" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5974 "11111000" // VMOV bmll4, x10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5975 "10010010" // /* MW 3 */ + 5976 "00010100" // /* MW 2 */ + 5977 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first + 5978 "11111000" // VMOV x8, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5979 "00010010" // /* MW 3 */ + 5980 "00101000" // /* MW 2 */ + 5981 "00011100" // /* MW 1 */ +.src_ref 5 "vector.hpp" 915 23 first + 5982 "10111000" // VEXTRACT.32 r23, x8, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5983 "00000001" // /* MW 3 */ + 5984 "11100010" // /* MW 2 */ + 5985 "00011101" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id first + 5986 "11111000" // VMOV x10, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5987 "00010010" // /* MW 3 */ + 5988 "00101100" // /* MW 2 */ + 5989 "00011101" // /* MW 1 */ +.src_ref 5 "vector.hpp" 915 23 first +.src_ref 5 "vector.hpp" 1288 16 first +.aggressive_scheduled_block_id 11 +.noswbrkpt + 5990 "01110100" // VLDB wh10, [p7, #32]; VEXTRACT.32 r6, x10, #0, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5991 "00000011" // /* MW 5 */ + 5992 "01010100" // /* MW 4 */ + 5993 "10000011" // /* MW 3 */ + 5994 "11010000" // /* MW 2 */ + 5995 "11100010" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 5996 "11111000" // VMOV x11, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5997 "00010010" // /* MW 3 */ + 5998 "10100000" // /* MW 2 */ + 5999 "00011101" // /* MW 1 */ +.src_ref 5 "vector.hpp" 915 23 first +.src_ref 5 "vector.hpp" 1287 41 first +.src_ref 5 "broadcast.hpp" 80 25 first +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 6000 "10110100" // VLDB wl10, [p7]; VEXTBCST.32 x10, x11, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6001 "00000110" // /* MW 5 */ + 6002 "10110100" // /* MW 4 */ + 6003 "10001010" // /* MW 3 */ + 6004 "11010100" // /* MW 2 */ + 6005 "11100000" // /* MW 1 */ +.src_ref 5 "blend.hpp" 163 48 first +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 6006 "00111000" // VSEL.32 x9, x10, x9, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6007 "10100000" // /* MW 3 */ + 6008 "11010100" // /* MW 2 */ + 6009 "00011100" // /* MW 1 */ +.src_ref 5 "vector.hpp" 853 46 first +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 6010 "01111000" // VINSERT.32 x10, x2, #0, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6011 "11010001" // /* MW 3 */ + 6012 "00010000" // /* MW 2 */ + 6013 "00011101" // /* MW 1 */ +.src_ref 5 "vector.hpp" 853 46 +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 6014 "01111000" // VINSERT.32 x8, x2, #0, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6015 "11110001" // /* MW 3 */ + 6016 "00010010" // /* MW 2 */ + 6017 "00011100" // /* MW 1 */ +.src_ref 5 "vector.hpp" 142 95 first +.src_ref 5 "vector.hpp" 1413 19 first +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 6018 "11111000" // VMOV wl11, wl9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6019 "00100010" // /* MW 3 */ + 6020 "11010011" // /* MW 2 */ + 6021 "00011101" // /* MW 1 */ +.src_ref 5 "vector.hpp" 142 95 +.src_ref 5 "vector.hpp" 1413 19 +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 6022 "11111000" // VMOV wh11, wl9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6023 "00100010" // /* MW 3 */ + 6024 "10010011" // /* MW 2 */ + 6025 "00011101" // /* MW 1 */ +.src_ref 5 "vector.hpp" 142 95 +.src_ref 5 "vector.hpp" 1413 19 +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6026 "11111000" // VMOV wh8, wl10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6027 "00100010" // /* MW 3 */ + 6028 "00010101" // /* MW 2 */ + 6029 "00011100" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 first + 6030 "00111000" // VSEL.32 x8, x11, x8, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6031 "00000000" // /* MW 3 */ + 6032 "01011100" // /* MW 2 */ + 6033 "00011100" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 + 6034 "00111000" // VSEL.32 x8, x1, x8, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6035 "00001000" // /* MW 3 */ + 6036 "00001100" // /* MW 2 */ + 6037 "00011100" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 + 6038 "00111000" // VSEL.32 x7, x8, x7, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6039 "10101000" // /* MW 3 */ + 6040 "11000011" // /* MW 2 */ + 6041 "00011011" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 + 6042 "11111000" // VMOV bmll0, x7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6043 "10010010" // /* MW 3 */ + 6044 "00001110" // /* MW 2 */ + 6045 "00011000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 + 6046 "11111000" // VMOV x9, x6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6047 "10010010" // /* MW 3 */ + 6048 "10101100" // /* MW 2 */ + 6049 "00011100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 first +.src_ref 6 "me_vmult_float_emulated.h" 108 19 first +.src_ref 6 "me_vmult_float_emulated.h" 109 21 first + 6050 "00000010" // VCONV.bf16.fp32 wl6, bmll0; VMOV bmll2, x7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6051 "01110000" // /* MW 7 */ + 6052 "01001001" // /* MW 6 */ + 6053 "00000111" // /* MW 5 */ + 6054 "00000001" // /* MW 4 */ + 6055 "11000000" // /* MW 3 */ + 6056 "00000010" // /* MW 2 */ + 6057 "01101000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 + 6058 "11111000" // VMOV x8, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6059 "10010010" // /* MW 3 */ + 6060 "00110010" // /* MW 2 */ + 6061 "00011100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 5 "vector.hpp" 1285 72 +.src_ref 5 "vector.hpp" 1285 72 first +.src_ref 5 "vector.hpp" 1289 16 + 6062 "01011010" // LSHL r29, r22, r28; MOV r27, r29; VMSC.f dm2, dm2, x6, x4, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6063 "10000011" // /* MW 9 */ + 6064 "01001100" // /* MW 8 */ + 6065 "00010010" // /* MW 7 */ + 6066 "00001111" // /* MW 6 */ + 6067 "11101010" // /* MW 5 */ + 6068 "11101101" // /* MW 4 */ + 6069 "11001101" // /* MW 3 */ + 6070 "10111011" // /* MW 2 */ + 6071 "00000101" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 120 9 first +.src_ref 5 "vector.hpp" 1285 72 +.src_ref 5 "vector.hpp" 1289 16 first +.aggressive_scheduled_block_id 12 +.aggressive_scheduled_block_id first + 6072 "01100010" // SEL.EQZ r19, r5, r29, r27; VMUL.f dm1, x6, x5, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6073 "10100001" // /* MW 7 */ + 6074 "11101100" // /* MW 6 */ + 6075 "00010001" // /* MW 5 */ + 6076 "10010001" // /* MW 4 */ + 6077 "00111110" // /* MW 3 */ + 6078 "00001011" // /* MW 2 */ + 6079 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 124 9 first +.src_ref 5 "vector.hpp" 1285 72 first +.src_ref 5 "vector.hpp" 1289 16 +.aggressive_scheduled_block_id 12 +.noswbrkpt + 6080 "01011010" // SEL.EQZ r18, r22, r30, r27; VMOV x6, x8; VMUL.f dm0, x6, x3, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6081 "01100001" // /* MW 9 */ + 6082 "11101100" // /* MW 8 */ + 6083 "00010000" // /* MW 7 */ + 6084 "00101111" // /* MW 6 */ + 6085 "00001001" // /* MW 5 */ + 6086 "00110011" // /* MW 4 */ + 6087 "11100010" // /* MW 3 */ + 6088 "10100101" // /* MW 2 */ + 6089 "00000101" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 125 9 first +.aggressive_scheduled_block_id 12 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6090 "01001000" // VMUL.f dm3, x6, x0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6091 "00000001" // /* MW 3 */ + 6092 "11101100" // /* MW 2 */ + 6093 "00010011" // /* MW 1 */ + 6094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6095 "00000000" // /* MW 1 */ + 6096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6097 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 109 21 first +.src_ref 6 "me_vmult_float_emulated.h" 110 6 first +.src_ref 6 "me_vmult_float_emulated.h" 110 19 first + 6098 "00011000" // VCONV.bf16.fp32 wl9, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6099 "00010110" // /* MW 3 */ + 6100 "11000001" // /* MW 2 */ + 6101 "00001100" // /* MW 1 */ + 6102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6103 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 111 34 first + 6104 "01001000" // VMSC.f dm2, dm2, x9, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6105 "10000011" // /* MW 3 */ + 6106 "01010010" // /* MW 2 */ + 6107 "00010010" // /* MW 1 */ + 6108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6109 "00000000" // /* MW 1 */ + 6110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6111 "00000000" // /* MW 1 */ + 6112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6113 "00000000" // /* MW 1 */ + 6114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6115 "00000000" // /* MW 1 */ + 6116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6117 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 + 6118 "00011000" // VCONV.bf16.fp32 wl8, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6119 "00010110" // /* MW 3 */ + 6120 "01000001" // /* MW 2 */ + 6121 "00001100" // /* MW 1 */ + 6122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6123 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 117 42 first + 6124 "01001000" // VMUL.f dm4, x8, x5, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6125 "10100001" // /* MW 3 */ + 6126 "11110000" // /* MW 2 */ + 6127 "00010100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 118 9 first + 6128 "01001000" // VMUL.f dm2, x8, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6129 "01100001" // /* MW 3 */ + 6130 "11110000" // /* MW 2 */ + 6131 "00010010" // /* MW 1 */ + 6132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6133 "00000000" // /* MW 1 */ + 6134 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6135 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 13 +.aggressive_scheduled_block_id first + 6136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6137 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 119 9 first +.aggressive_scheduled_block_id 13 +.noswbrkpt + 6138 "01001000" // VMUL.f dm2, x9, x5, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6139 "10100001" // /* MW 3 */ + 6140 "11110010" // /* MW 2 */ + 6141 "00010010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 117 42 first +.aggressive_scheduled_block_id 13 +.nohwbrkpt +.noswbrkpt + 6142 "11111000" // VMOV lfl0, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6143 "00010010" // /* MW 3 */ + 6144 "01110000" // /* MW 2 */ + 6145 "00011001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 118 6 first +.aggressive_scheduled_block_id 13 +.nohwbrkpt +.noswbrkpt + 6146 "01001000" // VADD.f dm4, dm4, dm2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6147 "00111101" // /* MW 3 */ + 6148 "10001000" // /* MW 2 */ + 6149 "00010100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 118 6 +.aggressive_scheduled_block_id 13 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6150 "11111000" // VMOV bmll4, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6151 "10010010" // /* MW 3 */ + 6152 "00000101" // /* MW 2 */ + 6153 "00011100" // /* MW 1 */ + 6154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6155 "00000000" // /* MW 1 */ + 6156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6157 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 14 +.aggressive_scheduled_block_id first + 6158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6159 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 122 9 first +.aggressive_scheduled_block_id 14 +.noswbrkpt + 6160 "01001000" // VMUL.f dm2, x0, x8, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6161 "00000001" // /* MW 3 */ + 6162 "11100001" // /* MW 2 */ + 6163 "00010010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 118 6 first +.aggressive_scheduled_block_id 14 +.nohwbrkpt +.noswbrkpt + 6164 "11111000" // VMOV lfh0, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6165 "00010010" // /* MW 3 */ + 6166 "01110000" // /* MW 2 */ + 6167 "00011000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 119 6 first +.aggressive_scheduled_block_id 14 +.nohwbrkpt +.noswbrkpt + 6168 "01001000" // VADD.f dm4, dm4, dm2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6169 "00111101" // /* MW 3 */ + 6170 "10001000" // /* MW 2 */ + 6171 "00010100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 119 6 +.aggressive_scheduled_block_id 14 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6172 "11111000" // VMOV bmll4, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6173 "10010010" // /* MW 3 */ + 6174 "00000001" // /* MW 2 */ + 6175 "00011100" // /* MW 1 */ + 6176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6177 "00000000" // /* MW 1 */ + 6178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6179 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 15 +.aggressive_scheduled_block_id first + 6180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6181 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 121 9 first +.aggressive_scheduled_block_id 15 +.noswbrkpt + 6182 "01001000" // VMUL.f dm1, x9, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6183 "01100001" // /* MW 3 */ + 6184 "11110010" // /* MW 2 */ + 6185 "00010001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 119 6 first +.aggressive_scheduled_block_id 15 +.nohwbrkpt +.noswbrkpt + 6186 "11111000" // VMOV lfl0, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6187 "00010010" // /* MW 3 */ + 6188 "01110000" // /* MW 2 */ + 6189 "00011001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 120 6 first +.aggressive_scheduled_block_id 15 +.nohwbrkpt +.noswbrkpt + 6190 "01001000" // VADD.f dm4, dm4, dm1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6191 "00111101" // /* MW 3 */ + 6192 "10000100" // /* MW 2 */ + 6193 "00010100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 120 6 +.aggressive_scheduled_block_id 15 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6194 "11111000" // VMOV bmll4, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6195 "10010010" // /* MW 3 */ + 6196 "00000101" // /* MW 2 */ + 6197 "00011100" // /* MW 1 */ + 6198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6199 "00000000" // /* MW 1 */ + 6200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6201 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 16 +.aggressive_scheduled_block_id first + 6202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6203 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 123 9 first +.aggressive_scheduled_block_id 16 +.noswbrkpt + 6204 "01001000" // VMUL.f dm1, x9, x0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6205 "00000001" // /* MW 3 */ + 6206 "11110010" // /* MW 2 */ + 6207 "00010001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 120 6 first +.aggressive_scheduled_block_id 16 +.nohwbrkpt +.noswbrkpt + 6208 "11111000" // VMOV lfh0, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6209 "00010010" // /* MW 3 */ + 6210 "01110000" // /* MW 2 */ + 6211 "00011000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 121 6 first +.aggressive_scheduled_block_id 16 +.nohwbrkpt +.noswbrkpt + 6212 "01001000" // VADD.f dm4, dm4, dm1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6213 "00111101" // /* MW 3 */ + 6214 "10000100" // /* MW 2 */ + 6215 "00010100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 121 6 +.aggressive_scheduled_block_id 16 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6216 "11111000" // VMOV bmll4, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6217 "10010010" // /* MW 3 */ + 6218 "00000001" // /* MW 2 */ + 6219 "00011100" // /* MW 1 */ + 6220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6221 "00000000" // /* MW 1 */ + 6222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6223 "00000000" // /* MW 1 */ + 6224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6225 "00000000" // /* MW 1 */ + 6226 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6227 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 121 6 +.aggressive_scheduled_block_id 17 +.aggressive_scheduled_block_id first + 6228 "11111000" // VMOV lfl1, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6229 "00010010" // /* MW 3 */ + 6230 "01110000" // /* MW 2 */ + 6231 "00011101" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 122 6 first +.aggressive_scheduled_block_id 17 +.noswbrkpt + 6232 "01001000" // VADD.f dm2, dm4, dm2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6233 "00111101" // /* MW 3 */ + 6234 "10001000" // /* MW 2 */ + 6235 "00010010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 122 6 +.aggressive_scheduled_block_id 17 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6236 "11111000" // VMOV bmll4, lfl1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6237 "10010010" // /* MW 3 */ + 6238 "00010101" // /* MW 2 */ + 6239 "00011100" // /* MW 1 */ + 6240 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6241 "00000000" // /* MW 1 */ + 6242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6243 "00000000" // /* MW 1 */ + 6244 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6245 "00000000" // /* MW 1 */ + 6246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6247 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 122 6 +.aggressive_scheduled_block_id 18 +.aggressive_scheduled_block_id first + 6248 "11111000" // VMOV lfh1, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6249 "00010010" // /* MW 3 */ + 6250 "01101000" // /* MW 2 */ + 6251 "00011100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 123 6 first +.aggressive_scheduled_block_id 18 +.noswbrkpt + 6252 "01001000" // VADD.f dm2, dm2, dm1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6253 "00111101" // /* MW 3 */ + 6254 "01000100" // /* MW 2 */ + 6255 "00010010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 123 6 +.aggressive_scheduled_block_id 18 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6256 "11111000" // VMOV bmll2, lfh1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6257 "10010010" // /* MW 3 */ + 6258 "00010001" // /* MW 2 */ + 6259 "00011010" // /* MW 1 */ + 6260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6261 "00000000" // /* MW 1 */ + 6262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6263 "00000000" // /* MW 1 */ + 6264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6265 "00000000" // /* MW 1 */ + 6266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6267 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 123 6 +.aggressive_scheduled_block_id 19 +.aggressive_scheduled_block_id first + 6268 "11111000" // VMOV lfl1, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6269 "00010010" // /* MW 3 */ + 6270 "01101000" // /* MW 2 */ + 6271 "00011101" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 124 6 first +.aggressive_scheduled_block_id 19 +.noswbrkpt + 6272 "01001000" // VADD.f dm0, dm1, dm0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6273 "00111101" // /* MW 3 */ + 6274 "00100000" // /* MW 2 */ + 6275 "00010000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 124 6 +.aggressive_scheduled_block_id 19 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6276 "11111000" // VMOV bmll1, lfl1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6277 "10010010" // /* MW 3 */ + 6278 "00010101" // /* MW 2 */ + 6279 "00011001" // /* MW 1 */ + 6280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6281 "00000000" // /* MW 1 */ + 6282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6283 "00000000" // /* MW 1 */ + 6284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6285 "00000000" // /* MW 1 */ + 6286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6287 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 124 6 +.aggressive_scheduled_block_id 20 +.aggressive_scheduled_block_id first + 6288 "11111000" // VMOV lfh1, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6289 "00010010" // /* MW 3 */ + 6290 "01100000" // /* MW 2 */ + 6291 "00011100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 125 6 first +.aggressive_scheduled_block_id 20 +.noswbrkpt + 6292 "01001000" // VADD.f dm0, dm0, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6293 "00111101" // /* MW 3 */ + 6294 "00001100" // /* MW 2 */ + 6295 "00010000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 125 6 +.aggressive_scheduled_block_id 20 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6296 "11111000" // VMOV bmll0, lfh1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6297 "10010010" // /* MW 3 */ + 6298 "00010001" // /* MW 2 */ + 6299 "00011000" // /* MW 1 */ + 6300 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6301 "00000000" // /* MW 1 */ + 6302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6303 "00000000" // /* MW 1 */ + 6304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6305 "00000000" // /* MW 1 */ + 6306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6307 "00000000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1286 72 first +.src_ref 7 "accum.hpp" 1108 103 first + 6308 "00011000" // VCONV.bf16.fp32 wl11, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6309 "00010110" // /* MW 3 */ + 6310 "11000000" // /* MW 2 */ + 6311 "00001101" // /* MW 1 */ + 6312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6313 "00000000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1286 41 + 6314 "11011000" // VSHIFT x11, x0, x11, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6315 "11111110" // /* MW 3 */ + 6316 "10000101" // /* MW 2 */ + 6317 "00011101" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1289 16 first + 6318 "00111000" // VSEL.8 x11, x10, x11, r19:r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6319 "11001100" // /* MW 3 */ + 6320 "11010101" // /* MW 2 */ + 6321 "00011101" // /* MW 1 */ + 6322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6323 "00000000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 57 98 first +.src_ref 5 "vector.hpp" 1292 26 first + 6324 "00110110" // NOPA; NOPB; VST wh11, [p7, #32]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6325 "01000001" // /* MW 11 */ + 6326 "01100101" // /* MW 10 */ + 6327 "10001011" // /* MW 9 */ + 6328 "00000011" // /* MW 8 */ + 6329 "00000000" // /* MW 7 */ + 6330 "00000000" // /* MW 6 */ + 6331 "00100000" // /* MW 5 */ + 6332 "00000000" // /* MW 4 */ + 6333 "11110000" // /* MW 3 */ + 6334 "00101100" // /* MW 2 */ + 6335 "00000000" // /* MW 1 */ +.label ZLE_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1488 +.src_ref 5 "vector.hpp" 57 98 +.src_ref 3 "reduce_mean_c8_impl.h" 268 19 first +.end_of_loop + 6336 "11100001" // NOPA; NOPB; VST wl11, [p7], m4; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6337 "00000000" // /* MW 15 */ + 6338 "00000000" // /* MW 14 */ + 6339 "01111000" // /* MW 13 */ + 6340 "10100101" // /* MW 12 */ + 6341 "00000001" // /* MW 11 */ + 6342 "00000000" // /* MW 10 */ + 6343 "00000000" // /* MW 9 */ + 6344 "10000000" // /* MW 8 */ + 6345 "11101010" // /* MW 7 */ + 6346 "10001010" // /* MW 6 */ + 6347 "00100111" // /* MW 5 */ + 6348 "00000000" // /* MW 4 */ + 6349 "11110000" // /* MW 3 */ + 6350 "00101100" // /* MW 2 */ + 6351 "00000000" // /* MW 1 */ +.loop_nesting 0 + 6352 "10000100" // J #6384 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6384 delay_slots=5 */ + 6353 "00000000" // /* MW 5 */ + 6354 "00000000" // /* MW 4 */ + 6355 "01111000" // /* MW 3 */ + 6356 "00001100" // /* MW 2 */ + 6357 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6359 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6361 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6363 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6365 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6367 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1520 + 6368 "01011100" // ST dn3, [sp, #-4]; MOVX vaddSign0, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6369 "10000000" // /* MW 5 */ + 6370 "10110100" // /* MW 4 */ + 6371 "10110000" // /* MW 3 */ + 6372 "10110100" // /* MW 2 */ + 6373 "11111111" // /* MW 1 */ + 6374 "01111010" // NOPA; ST lr, [sp, #-8]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6375 "00000000" // /* MW 9 */ + 6376 "00000000" // /* MW 8 */ + 6377 "00000000" // /* MW 7 */ + 6378 "10000000" // /* MW 6 */ + 6379 "00111101" // /* MW 5 */ + 6380 "11111000" // /* MW 4 */ + 6381 "11110111" // /* MW 3 */ + 6382 "00101100" // /* MW 2 */ + 6383 "00000000" // /* MW 1 */ +.label __ll133__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E +.src_ref 2 "reduce_base_c8.h" 352 30 first + 6384 "00011000" // ADD.NC p7, r3, #34 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6385 "10010001" // /* MW 3 */ + 6386 "01100001" // /* MW 2 */ + 6387 "00011111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 352 30 + 6388 "11010100" // LDA.u16 r3, [p7]; MOV crMCDEn, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6389 "11000001" // /* MW 5 */ + 6390 "01100100" // /* MW 4 */ + 6391 "01011011" // /* MW 3 */ + 6392 "10001111" // /* MW 2 */ + 6393 "11100000" // /* MW 1 */ +.aggressive_scheduled_block_id 21 +.aggressive_scheduled_block_id first + 6394 "11111000" // MOV crSCDEn, crMCDEn /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6395 "01100000" // /* MW 3 */ + 6396 "01111011" // /* MW 2 */ + 6397 "00011000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 352 30 +.aggressive_scheduled_block_id 21 +.noswbrkpt + 6398 "00011000" // ST.s16 r3, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6399 "01110111" // /* MW 3 */ + 6400 "00000100" // /* MW 2 */ + 6401 "00000111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 353 57 first +.aggressive_scheduled_block_id 21 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6402 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */ + 6403 "00000001" // /* MW 5 */ + 6404 "00000000" // /* MW 4 */ + 6405 "11111000" // /* MW 3 */ + 6406 "00010011" // /* MW 2 */ + 6407 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 21 +.nohwbrkpt +.noswbrkpt + 6408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6409 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 21 +.nohwbrkpt +.noswbrkpt + 6410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6411 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 21 +.nohwbrkpt +.noswbrkpt + 6412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6413 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 352 30 first +.delay_slot +.aggressive_scheduled_block_id 21 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6414 "00011000" // ADD r3, r3, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6415 "00000111" // /* MW 3 */ + 6416 "11000110" // /* MW 2 */ + 6417 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 352 30 +.delay_slot + 6418 "01111110" // NOPA; NOPB; NOPS; EXTEND.u16 r0, r3; NOPM /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 6419 "01100000" // /* MW 13 */ + 6420 "00101011" // /* MW 12 */ + 6421 "00000000" // /* MW 11 */ + 6422 "10101111" // /* MW 10 */ + 6423 "00110100" // /* MW 9 */ + 6424 "00000000" // /* MW 8 */ + 6425 "10110000" // /* MW 7 */ + 6426 "11000000" // /* MW 6 */ + 6427 "00100000" // /* MW 5 */ + 6428 "00000000" // /* MW 4 */ + 6429 "11110000" // /* MW 3 */ + 6430 "00101100" // /* MW 2 */ + 6431 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 420 4 +.return_address + 6432 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6433 "00111001" // /* MW 3 */ + 6434 "11111000" // /* MW 2 */ + 6435 "00000111" // /* MW 1 */ + 6436 "00011000" // LDA p1, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6437 "10011001" // /* MW 3 */ + 6438 "11111100" // /* MW 2 */ + 6439 "00000111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 353 23 first + 6440 "00011000" // ST.s16 r3, [p7, #10] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6441 "01110111" // /* MW 3 */ + 6442 "01010100" // /* MW 2 */ + 6443 "00000111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 420 4 first + 6444 "11000100" // PADDXM [sp], #-256 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6445 "00000001" // /* MW 5 */ + 6446 "00000000" // /* MW 4 */ + 6447 "00000000" // /* MW 3 */ + 6448 "11100000" // /* MW 2 */ + 6449 "11111111" // /* MW 1 */ + 6450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6451 "00000000" // /* MW 1 */ + 6452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6453 "00000000" // /* MW 1 */ + 6454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6455 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 420 4 + 6456 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 6457 "00000000" // /* MW 3 */ + 6458 "00101000" // /* MW 2 */ + 6459 "00010000" // /* MW 1 */ +.delay_slot + 6460 "11111000" // MOV p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6461 "11000000" // /* MW 3 */ + 6462 "01100010" // /* MW 2 */ + 6463 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6465 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6467 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6469 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6470 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6471 "01111110" // /* MW 9 */ + 6472 "10100101" // /* MW 8 */ + 6473 "00000001" // /* MW 7 */ + 6474 "00000000" // /* MW 6 */ + 6475 "00010000" // /* MW 5 */ + 6476 "00000000" // /* MW 4 */ + 6477 "11110000" // /* MW 3 */ + 6478 "00101100" // /* MW 2 */ + 6479 "00000000" // /* MW 1 */ +.label __ll135__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E +.src_ref 5 "blend.hpp" 163 48 + 6480 "10111010" // MOVA r20, #255; J #5568 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=5568 delay_slots=5 */ + 6481 "00100000" // /* MW 9 */ + 6482 "00000000" // /* MW 8 */ + 6483 "00000000" // /* MW 7 */ + 6484 "10111000" // /* MW 6 */ + 6485 "00000010" // /* MW 5 */ + 6486 "00000000" // /* MW 4 */ + 6487 "00000000" // /* MW 3 */ + 6488 "11110100" // /* MW 2 */ + 6489 "00011111" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 +.delay_slot + 6490 "00011000" // MOVX r21, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6491 "00000001" // /* MW 3 */ + 6492 "00101010" // /* MW 2 */ + 6493 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6495 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6497 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6499 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6500 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6501 "10000001" // /* MW 11 */ + 6502 "10101101" // /* MW 10 */ + 6503 "00000000" // /* MW 9 */ + 6504 "00000000" // /* MW 8 */ + 6505 "00000000" // /* MW 7 */ + 6506 "00000000" // /* MW 6 */ + 6507 "00100000" // /* MW 5 */ + 6508 "00000000" // /* MW 4 */ + 6509 "11110000" // /* MW 3 */ + 6510 "00101100" // /* MW 2 */ + 6511 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1664 +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 6512 "00011000" // MOVX r5, #5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6513 "00010101" // /* MW 3 */ + 6514 "00001010" // /* MW 2 */ + 6515 "00010000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 first + 6516 "10011000" // EQ r5, r5, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6517 "01100111" // /* MW 3 */ + 6518 "01001010" // /* MW 2 */ + 6519 "00010001" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 6520 "10000100" // JNZ r5, #7264 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7264 delay_slots=5 */ + 6521 "00000001" // /* MW 5 */ + 6522 "01000000" // /* MW 4 */ + 6523 "00110000" // /* MW 3 */ + 6524 "00001110" // /* MW 2 */ + 6525 "00101000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6527 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6529 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6531 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6533 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6535 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 6536 "00011000" // MOVX r7, #6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6537 "00011001" // /* MW 3 */ + 6538 "00001110" // /* MW 2 */ + 6539 "00010000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 6540 "10011000" // EQ r7, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6541 "01100111" // /* MW 3 */ + 6542 "11001110" // /* MW 2 */ + 6543 "00010001" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 6544 "10000100" // JNZ r7, #7504 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7504 delay_slots=5 */ + 6545 "00000001" // /* MW 5 */ + 6546 "01000000" // /* MW 4 */ + 6547 "10101000" // /* MW 3 */ + 6548 "00001110" // /* MW 2 */ + 6549 "00111000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 200 49 +.delay_slot + 6550 "00011000" // MOVX r5, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6551 "01000001" // /* MW 3 */ + 6552 "00001010" // /* MW 2 */ + 6553 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6555 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6557 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6558 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6559 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6560 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6561 "00000000" // /* MW 15 */ + 6562 "00000000" // /* MW 14 */ + 6563 "01111000" // /* MW 13 */ + 6564 "10100101" // /* MW 12 */ + 6565 "00000001" // /* MW 11 */ + 6566 "00000000" // /* MW 10 */ + 6567 "00000000" // /* MW 9 */ + 6568 "00000000" // /* MW 8 */ + 6569 "01011011" // /* MW 7 */ + 6570 "00000001" // /* MW 6 */ + 6571 "00100000" // /* MW 5 */ + 6572 "00000000" // /* MW 4 */ + 6573 "11110000" // /* MW 3 */ + 6574 "00101100" // /* MW 2 */ + 6575 "00000000" // /* MW 1 */ +.label __ll67__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E +.src_ref 3 "reduce_mean_c8_impl.h" 200 65 first +.src_ref 3 "reduce_mean_c8_impl.h" 202 30 + 6576 "10111010" // LDA.s16 r7, [p2, dj2]; MOVX r17, #7; MOV dc4, lr /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6577 "01111000" // /* MW 9 */ + 6578 "11110000" // /* MW 8 */ + 6579 "01100000" // /* MW 7 */ + 6580 "11101010" // /* MW 6 */ + 6581 "00010000" // /* MW 5 */ + 6582 "00000001" // /* MW 4 */ + 6583 "01010000" // /* MW 3 */ + 6584 "00011110" // /* MW 2 */ + 6585 "01001000" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 +.src_ref 3 "reduce_mean_c8_impl.h" 202 30 first + 6586 "01100100" // NE r6, r17, r6; MOV r17, #257 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6587 "00000101" // /* MW 5 */ + 6588 "10100100" // /* MW 4 */ + 6589 "00011000" // /* MW 3 */ + 6590 "10001101" // /* MW 2 */ + 6591 "10001001" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 202 12 + 6592 "10000100" // JNZ r6, #7232 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7232 delay_slots=5 */ + 6593 "00000001" // /* MW 5 */ + 6594 "01000000" // /* MW 4 */ + 6595 "00100000" // /* MW 3 */ + 6596 "00001110" // /* MW 2 */ + 6597 "00110000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6598 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6599 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6601 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6603 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6605 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 200 49 first +.delay_slot + 6606 "10011000" // ASHL r5, r7, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6607 "01011110" // /* MW 3 */ + 6608 "11001010" // /* MW 2 */ + 6609 "00010001" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 7 "accum.hpp" 199 120 +.src_ref 3 "reduce_mean_c8_impl.h" 206 9 first +.src_ref 3 "reduce_mean_c8_impl.h" 206 35 +.src_ref 3 "reduce_mean_c8_impl.h" 209 22 + 6610 "01110110" // MOVA dj2, #36; MOVS p0, p1; MOVXM ls, #6672 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6611 "00010000" // /* MW 11 */ + 6612 "00001000" // /* MW 10 */ + 6613 "01111101" // /* MW 9 */ + 6614 "00000100" // /* MW 8 */ + 6615 "00000000" // /* MW 7 */ + 6616 "00000000" // /* MW 6 */ + 6617 "10001011" // /* MW 5 */ + 6618 "10000100" // /* MW 4 */ + 6619 "10000000" // /* MW 3 */ + 6620 "10001010" // /* MW 2 */ + 6621 "00000100" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 206 9 +.src_ref 3 "reduce_mean_c8_impl.h" 206 35 + 6622 "01110110" // LDA r7, [p2, dj2]; ST dn3, [sp, #-4]; MOVXM le, #6768 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6623 "00010000" // /* MW 11 */ + 6624 "00111000" // /* MW 10 */ + 6625 "10111101" // /* MW 9 */ + 6626 "00000101" // /* MW 8 */ + 6627 "00000000" // /* MW 7 */ + 6628 "10000000" // /* MW 6 */ + 6629 "10100101" // /* MW 5 */ + 6630 "11111101" // /* MW 4 */ + 6631 "11010111" // /* MW 3 */ + 6632 "00011110" // /* MW 2 */ + 6633 "01001000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 199 120 first +.src_ref 3 "reduce_mean_c8_impl.h" 209 22 first + 6634 "10011000" // VLDA bmll2, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6635 "00010101" // /* MW 3 */ + 6636 "00011101" // /* MW 2 */ + 6637 "00000000" // /* MW 1 */ + 6638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6639 "00000000" // /* MW 1 */ +.src_ref 7 "accum.hpp" 199 120 + 6640 "11111000" // VMOV bmhh4, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6641 "10010010" // /* MW 3 */ + 6642 "11000010" // /* MW 2 */ + 6643 "00011100" // /* MW 1 */ + 6644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6645 "00000000" // /* MW 1 */ +.src_ref 7 "accum.hpp" 199 120 +.src_ref 5 "add.hpp" 28 49 first + 6646 "01100010" // VMOV bmll3, bmhh4; VADD.f dm1, dm3, dm2, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6647 "00111101" // /* MW 7 */ + 6648 "01101000" // /* MW 6 */ + 6649 "00010001" // /* MW 5 */ + 6650 "11100110" // /* MW 4 */ + 6651 "00010010" // /* MW 3 */ + 6652 "00010011" // /* MW 2 */ + 6653 "00000011" // /* MW 1 */ + 6654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6655 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 206 9 first + 6656 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC lc, r7, #-1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6657 "00000000" // /* MW 15 */ + 6658 "00000000" // /* MW 14 */ + 6659 "11001000" // /* MW 13 */ + 6660 "11111111" // /* MW 12 */ + 6661 "10111001" // /* MW 11 */ + 6662 "00000010" // /* MW 10 */ + 6663 "00000000" // /* MW 9 */ + 6664 "00000000" // /* MW 8 */ + 6665 "01011011" // /* MW 7 */ + 6666 "00000001" // /* MW 6 */ + 6667 "00100000" // /* MW 5 */ + 6668 "00000000" // /* MW 4 */ + 6669 "11110000" // /* MW 3 */ + 6670 "00101100" // /* MW 2 */ + 6671 "00000000" // /* MW 1 */ +.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1824 +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 199 120 first +.src_ref 3 "reduce_mean_c8_impl.h" 209 22 first +.begin_of_loop +.loop_nesting 1 + 6672 "11100001" // VLDA bmll2, [p0], #64; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6673 "00000000" // /* MW 15 */ + 6674 "00000000" // /* MW 14 */ + 6675 "01111000" // /* MW 13 */ + 6676 "10100101" // /* MW 12 */ + 6677 "00000001" // /* MW 11 */ + 6678 "00000000" // /* MW 10 */ + 6679 "00000000" // /* MW 9 */ + 6680 "00000000" // /* MW 8 */ + 6681 "01011011" // /* MW 7 */ + 6682 "00000001" // /* MW 6 */ + 6683 "00100000" // /* MW 5 */ + 6684 "00000000" // /* MW 4 */ + 6685 "10110000" // /* MW 3 */ + 6686 "10100010" // /* MW 2 */ + 6687 "00000011" // /* MW 1 */ + 6688 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6689 "00000000" // /* MW 15 */ + 6690 "00000000" // /* MW 14 */ + 6691 "01111000" // /* MW 13 */ + 6692 "10100101" // /* MW 12 */ + 6693 "00000001" // /* MW 11 */ + 6694 "00000000" // /* MW 10 */ + 6695 "00000000" // /* MW 9 */ + 6696 "00000000" // /* MW 8 */ + 6697 "01011011" // /* MW 7 */ + 6698 "00000001" // /* MW 6 */ + 6699 "00100000" // /* MW 5 */ + 6700 "00000000" // /* MW 4 */ + 6701 "11110000" // /* MW 3 */ + 6702 "00101100" // /* MW 2 */ + 6703 "00000000" // /* MW 1 */ + 6704 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6705 "00000000" // /* MW 15 */ + 6706 "00000000" // /* MW 14 */ + 6707 "01111000" // /* MW 13 */ + 6708 "10100101" // /* MW 12 */ + 6709 "00000001" // /* MW 11 */ + 6710 "00000000" // /* MW 10 */ + 6711 "00000000" // /* MW 9 */ + 6712 "00000000" // /* MW 8 */ + 6713 "01011011" // /* MW 7 */ + 6714 "00000001" // /* MW 6 */ + 6715 "00100000" // /* MW 5 */ + 6716 "00000000" // /* MW 4 */ + 6717 "11110000" // /* MW 3 */ + 6718 "00101100" // /* MW 2 */ + 6719 "00000000" // /* MW 1 */ + 6720 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6721 "00000000" // /* MW 15 */ + 6722 "00000000" // /* MW 14 */ + 6723 "01111000" // /* MW 13 */ + 6724 "10100101" // /* MW 12 */ + 6725 "00000001" // /* MW 11 */ + 6726 "00000000" // /* MW 10 */ + 6727 "00000000" // /* MW 9 */ + 6728 "00000000" // /* MW 8 */ + 6729 "01011011" // /* MW 7 */ + 6730 "00000001" // /* MW 6 */ + 6731 "00100000" // /* MW 5 */ + 6732 "00000000" // /* MW 4 */ + 6733 "11110000" // /* MW 3 */ + 6734 "00101100" // /* MW 2 */ + 6735 "00000000" // /* MW 1 */ +.src_ref 7 "accum.hpp" 150 115 first +.aggressive_scheduled_block_id 22 +.aggressive_scheduled_block_id first + 6736 "11100001" // NOPA; NOPB; NOPS; NOPX; VMOV bmhh4, bmll1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6737 "00000000" // /* MW 15 */ + 6738 "00000000" // /* MW 14 */ + 6739 "01111000" // /* MW 13 */ + 6740 "00001001" // /* MW 12 */ + 6741 "01100010" // /* MW 11 */ + 6742 "00000010" // /* MW 10 */ + 6743 "00000000" // /* MW 9 */ + 6744 "00000000" // /* MW 8 */ + 6745 "01011011" // /* MW 7 */ + 6746 "00000001" // /* MW 6 */ + 6747 "00100000" // /* MW 5 */ + 6748 "00000000" // /* MW 4 */ + 6749 "11110000" // /* MW 3 */ + 6750 "00101100" // /* MW 2 */ + 6751 "00000000" // /* MW 1 */ +.src_ref 5 "add.hpp" 28 49 first +.aggressive_scheduled_block_id 22 +.noswbrkpt + 6752 "11101011" // NOPA; NOPB; NOPS; NOPX; NOPM; VADD.f dm1, dm3, dm2, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6753 "01000001" // /* MW 15 */ + 6754 "10001011" // /* MW 14 */ + 6755 "01111000" // /* MW 13 */ + 6756 "10100101" // /* MW 12 */ + 6757 "00000001" // /* MW 11 */ + 6758 "00000000" // /* MW 10 */ + 6759 "00000000" // /* MW 9 */ + 6760 "00000000" // /* MW 8 */ + 6761 "01011011" // /* MW 7 */ + 6762 "00000001" // /* MW 6 */ + 6763 "00100000" // /* MW 5 */ + 6764 "00000000" // /* MW 4 */ + 6765 "11110000" // /* MW 3 */ + 6766 "00101100" // /* MW 2 */ + 6767 "00000000" // /* MW 1 */ +.label ZLE_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1920 +.src_ref 7 "accum.hpp" 199 120 first +.end_of_loop +.aggressive_scheduled_block_id 22 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6768 "11100001" // NOPA; NOPB; NOPS; NOPX; VMOV bmll3, bmhh4; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6769 "00000000" // /* MW 15 */ + 6770 "00000000" // /* MW 14 */ + 6771 "01111000" // /* MW 13 */ + 6772 "10001001" // /* MW 12 */ + 6773 "10001001" // /* MW 11 */ + 6774 "00000001" // /* MW 10 */ + 6775 "00000000" // /* MW 9 */ + 6776 "00000000" // /* MW 8 */ + 6777 "01011011" // /* MW 7 */ + 6778 "00000001" // /* MW 6 */ + 6779 "00100000" // /* MW 5 */ + 6780 "00000000" // /* MW 4 */ + 6781 "11110000" // /* MW 3 */ + 6782 "00101100" // /* MW 2 */ + 6783 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 5 "add_reduce.hpp" 322 47 +.aggressive_scheduled_block_id 23 +.aggressive_scheduled_block_id first +.loop_nesting 0 + 6784 "10111010" // MOVA r16, #16; MOVXM p7, #509168 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6785 "00010000" // /* MW 9 */ + 6786 "01111000" // /* MW 8 */ + 6787 "10110010" // /* MW 7 */ + 6788 "11110011" // /* MW 6 */ + 6789 "00000001" // /* MW 5 */ + 6790 "00000000" // /* MW 4 */ + 6791 "00000000" // /* MW 3 */ + 6792 "00010000" // /* MW 2 */ + 6793 "00000010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 19 first +.src_ref 5 "vector.hpp" 915 23 +.src_ref 5 "add_reduce.hpp" 322 47 +.aggressive_scheduled_block_id 23 +.noswbrkpt + 6794 "10111010" // LDA.s8 r4, [p7]; MOVX r6, #8; MOV vaddSign0, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6795 "01011000" // /* MW 9 */ + 6796 "00000001" // /* MW 8 */ + 6797 "10011000" // /* MW 7 */ + 6798 "00001000" // /* MW 6 */ + 6799 "01100001" // /* MW 5 */ + 6800 "00000000" // /* MW 4 */ + 6801 "01010000" // /* MW 3 */ + 6802 "10010000" // /* MW 2 */ + 6803 "11100000" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 +.aggressive_scheduled_block_id 23 +.nohwbrkpt +.noswbrkpt + 6804 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6805 "00000101" // /* MW 3 */ + 6806 "00100010" // /* MW 2 */ + 6807 "00010000" // /* MW 1 */ +.aggressive_scheduled_block_id 23 +.nohwbrkpt +.noswbrkpt + 6808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6809 "00000000" // /* MW 1 */ +.src_ref 7 "accum.hpp" 150 115 first +.aggressive_scheduled_block_id 23 +.nohwbrkpt +.noswbrkpt + 6810 "11111000" // VMOV bmhh4, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6811 "00010010" // /* MW 3 */ + 6812 "11000100" // /* MW 2 */ + 6813 "00011100" // /* MW 1 */ +.aggressive_scheduled_block_id 23 +.nohwbrkpt +.noswbrkpt + 6814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6815 "00000000" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 +.aggressive_scheduled_block_id 23 +.nohwbrkpt +.noswbrkpt + 6816 "11111000" // VMOV x2, bmhh4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6817 "00010010" // /* MW 3 */ + 6818 "00110011" // /* MW 2 */ + 6819 "00011001" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.aggressive_scheduled_block_id 23 +.nohwbrkpt +.noswbrkpt + 6820 "11011000" // VSHIFT x2, x2, x0, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6821 "00010010" // /* MW 3 */ + 6822 "00010000" // /* MW 2 */ + 6823 "00011001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 112 6 +.src_ref 6 "me_vmult_float_emulated.h" 112 19 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 114 6 +.src_ref 6 "me_vmult_float_emulated.h" 114 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 6 +.src_ref 6 "me_vmult_float_emulated.h" 115 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 1108 103 +.src_ref 5 "add_reduce.hpp" 324 44 first +.aggressive_scheduled_block_id 23 +.noswbrkpt + 6824 "01011010" // MOVX crRnd, r4; VMOV bmll0, x2; VADD.f dm0, dm2, dm0, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6825 "00111101" // /* MW 9 */ + 6826 "01000000" // /* MW 8 */ + 6827 "00010000" // /* MW 7 */ + 6828 "00101111" // /* MW 6 */ + 6829 "01001001" // /* MW 5 */ + 6830 "00000000" // /* MW 4 */ + 6831 "10000000" // /* MW 3 */ + 6832 "00111010" // /* MW 2 */ + 6833 "00000001" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 +.aggressive_scheduled_block_id 23 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6834 "11111000" // VMOV bmll2, bmhh4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6835 "00010010" // /* MW 3 */ + 6836 "00010011" // /* MW 2 */ + 6837 "00011010" // /* MW 1 */ +.src_ref 5 "broadcast.hpp" 80 25 first + 6838 "11111000" // VBCST.32 x2, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6839 "01110010" // /* MW 3 */ + 6840 "00010110" // /* MW 2 */ + 6841 "00011001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 112 6 +.src_ref 6 "me_vmult_float_emulated.h" 112 19 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 + 6842 "11111000" // VMOV bmll1, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6843 "10010010" // /* MW 3 */ + 6844 "00000100" // /* MW 2 */ + 6845 "00011001" // /* MW 1 */ + 6846 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6847 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 113 21 first + 6848 "11111000" // VMOV bmll2, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6849 "00010010" // /* MW 3 */ + 6850 "00000100" // /* MW 2 */ + 6851 "00011010" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first +.aggressive_scheduled_block_id 24 +.aggressive_scheduled_block_id first + 6852 "11111000" // VMOV x2, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6853 "00010010" // /* MW 3 */ + 6854 "00100000" // /* MW 2 */ + 6855 "00011001" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.src_ref 5 "add_reduce.hpp" 324 44 +.aggressive_scheduled_block_id 24 +.noswbrkpt + 6856 "01100010" // VSHIFT x2, x2, x0, r16; VADD.f dm0, dm0, dm3, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6857 "00111101" // /* MW 7 */ + 6858 "00001100" // /* MW 6 */ + 6859 "00010000" // /* MW 5 */ + 6860 "11000110" // /* MW 4 */ + 6861 "01000010" // /* MW 3 */ + 6862 "00010000" // /* MW 2 */ + 6863 "00000001" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 24 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6864 "11111000" // VMOV bmll3, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6865 "10010010" // /* MW 3 */ + 6866 "00000100" // /* MW 2 */ + 6867 "00011011" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 114 6 +.src_ref 6 "me_vmult_float_emulated.h" 114 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 6 +.src_ref 6 "me_vmult_float_emulated.h" 115 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 + 6868 "11111000" // VMOV x2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6869 "10010010" // /* MW 3 */ + 6870 "00100000" // /* MW 2 */ + 6871 "00011001" // /* MW 1 */ + 6872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6873 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 112 6 first +.src_ref 6 "me_vmult_float_emulated.h" 112 19 first + 6874 "00011000" // VCONV.bf16.fp32 wl0, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6875 "10010110" // /* MW 3 */ + 6876 "01000000" // /* MW 2 */ + 6877 "00001000" // /* MW 1 */ + 6878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6879 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 113 21 first +.src_ref 5 "add_reduce.hpp" 324 22 first +.aggressive_scheduled_block_id 25 +.aggressive_scheduled_block_id first + 6880 "01100010" // VMOV x3, bmll0; VMSC.f dm4, dm2, x0, x4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6881 "10000011" // /* MW 7 */ + 6882 "01000000" // /* MW 6 */ + 6883 "00010100" // /* MW 5 */ + 6884 "11100110" // /* MW 4 */ + 6885 "00010010" // /* MW 3 */ + 6886 "10100000" // /* MW 2 */ + 6887 "00000001" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.src_ref 5 "add_reduce.hpp" 324 44 +.aggressive_scheduled_block_id 25 +.noswbrkpt + 6888 "01100010" // VSHIFT x3, x3, x0, r6; VADD.f dm0, dm0, dm2, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6889 "00111101" // /* MW 7 */ + 6890 "00001000" // /* MW 6 */ + 6891 "00010000" // /* MW 5 */ + 6892 "11000110" // /* MW 4 */ + 6893 "00011010" // /* MW 3 */ + 6894 "10011000" // /* MW 2 */ + 6895 "00000001" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 25 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6896 "11111000" // VMOV bmll2, x3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6897 "10010010" // /* MW 3 */ + 6898 "00000110" // /* MW 2 */ + 6899 "00011010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 115 6 +.src_ref 6 "me_vmult_float_emulated.h" 115 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 + 6900 "11111000" // VMOV x3, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6901 "10010010" // /* MW 3 */ + 6902 "10100100" // /* MW 2 */ + 6903 "00011001" // /* MW 1 */ + 6904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6905 "00000000" // /* MW 1 */ + 6906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6907 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 113 21 first +.src_ref 6 "me_vmult_float_emulated.h" 114 6 first +.src_ref 6 "me_vmult_float_emulated.h" 114 19 first + 6908 "00011000" // VCONV.bf16.fp32 wl2, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6909 "00010110" // /* MW 3 */ + 6910 "01000010" // /* MW 2 */ + 6911 "00001001" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first +.aggressive_scheduled_block_id 26 +.aggressive_scheduled_block_id first + 6912 "11111000" // VMOV x5, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6913 "00010010" // /* MW 3 */ + 6914 "10100000" // /* MW 2 */ + 6915 "00011010" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.src_ref 5 "add_reduce.hpp" 324 44 +.aggressive_scheduled_block_id 26 +.noswbrkpt + 6916 "01100010" // VSHIFT x6, x5, x0, r0; VADD.f dm0, dm0, dm2, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6917 "00111101" // /* MW 7 */ + 6918 "00001000" // /* MW 6 */ + 6919 "00010000" // /* MW 5 */ + 6920 "11000110" // /* MW 4 */ + 6921 "00000010" // /* MW 3 */ + 6922 "00101000" // /* MW 2 */ + 6923 "00000011" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 26 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6924 "11111000" // VMOV bmll2, x6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6925 "10010010" // /* MW 3 */ + 6926 "00001100" // /* MW 2 */ + 6927 "00011010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 + 6928 "11111000" // VMOV x5, x3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6929 "10010010" // /* MW 3 */ + 6930 "10100110" // /* MW 2 */ + 6931 "00011010" // /* MW 1 */ + 6932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6933 "00000000" // /* MW 1 */ + 6934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6935 "00000000" // /* MW 1 */ + 6936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6937 "00000000" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first + 6938 "11111000" // VMOV x6, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6939 "00010010" // /* MW 3 */ + 6940 "00100000" // /* MW 2 */ + 6941 "00011011" // /* MW 1 */ +.src_ref 5 "vector.hpp" 915 23 first + 6942 "10111000" // VEXTRACT.32 r0, x6, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6943 "00000001" // /* MW 3 */ + 6944 "00011010" // /* MW 2 */ + 6945 "00011000" // /* MW 1 */ + 6946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6947 "00000000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 856 23 first + 6948 "01111000" // VINSERT.32 x6, x0, #0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6949 "00010001" // /* MW 3 */ + 6950 "00000000" // /* MW 2 */ + 6951 "00011011" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 first + 6952 "00111000" // VSEL.32 x1, x1, x6, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6953 "00001000" // /* MW 3 */ + 6954 "10001011" // /* MW 2 */ + 6955 "00011000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 + 6956 "11111000" // VMOV bmll2, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6957 "10010010" // /* MW 3 */ + 6958 "00000010" // /* MW 2 */ + 6959 "00011010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 + 6960 "11111000" // VMOV x1, x5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6961 "10010010" // /* MW 3 */ + 6962 "10101010" // /* MW 2 */ + 6963 "00011000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 first +.src_ref 6 "me_vmult_float_emulated.h" 108 19 first +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 + 6964 "00000010" // VCONV.bf16.fp32 wl5, bmll2; VMOV x6, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6965 "01110000" // /* MW 7 */ + 6966 "01001001" // /* MW 6 */ + 6967 "10010001" // /* MW 5 */ + 6968 "00000001" // /* MW 4 */ + 6969 "11000000" // /* MW 3 */ + 6970 "00100010" // /* MW 2 */ + 6971 "01011000" // /* MW 1 */ + 6972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6973 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 109 21 first + 6974 "01001000" // VMSC.f dm1, dm2, x5, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6975 "10000011" // /* MW 3 */ + 6976 "01001010" // /* MW 2 */ + 6977 "00010001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 124 9 first + 6978 "01001000" // VMUL.f dm0, x5, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6979 "01000001" // /* MW 3 */ + 6980 "11101010" // /* MW 2 */ + 6981 "00010000" // /* MW 1 */ + 6982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6983 "00000000" // /* MW 1 */ + 6984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6985 "00000000" // /* MW 1 */ + 6986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6987 "00000000" // /* MW 1 */ + 6988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6989 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 109 21 first +.src_ref 6 "me_vmult_float_emulated.h" 110 6 first +.src_ref 6 "me_vmult_float_emulated.h" 110 19 first + 6990 "00011000" // VCONV.bf16.fp32 wl1, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6991 "10010110" // /* MW 3 */ + 6992 "11000000" // /* MW 2 */ + 6993 "00001000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 115 34 first + 6994 "01001000" // VMSC.f dm4, dm4, x2, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6995 "10000011" // /* MW 3 */ + 6996 "10000100" // /* MW 2 */ + 6997 "00010100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 111 34 first + 6998 "01001000" // VMSC.f dm3, dm1, x1, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6999 "10000011" // /* MW 3 */ + 7000 "00100010" // /* MW 2 */ + 7001 "00010011" // /* MW 1 */ + 7002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7003 "00000000" // /* MW 1 */ + 7004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7005 "00000000" // /* MW 1 */ + 7006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7007 "00000000" // /* MW 1 */ + 7008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7009 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 115 6 first +.src_ref 6 "me_vmult_float_emulated.h" 115 19 first +.src_ref 6 "me_vmult_float_emulated.h" 115 34 first + 7010 "00011000" // VCONV.bf16.fp32 wl3, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7011 "00010110" // /* MW 3 */ + 7012 "11000010" // /* MW 2 */ + 7013 "00001001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 111 6 first +.src_ref 6 "me_vmult_float_emulated.h" 111 19 first +.src_ref 6 "me_vmult_float_emulated.h" 111 34 first + 7014 "00011000" // VCONV.bf16.fp32 wl6, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7015 "10010110" // /* MW 3 */ + 7016 "01000001" // /* MW 2 */ + 7017 "00001011" // /* MW 1 */ + 7018 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7019 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 117 42 first + 7020 "01001000" // VMUL.f dm2, x6, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7021 "01100001" // /* MW 3 */ + 7022 "11101100" // /* MW 2 */ + 7023 "00010010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 118 9 first + 7024 "01001000" // VMUL.f dm3, x6, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7025 "01000001" // /* MW 3 */ + 7026 "11101100" // /* MW 2 */ + 7027 "00010011" // /* MW 1 */ + 7028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7029 "00000000" // /* MW 1 */ + 7030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7031 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 27 +.aggressive_scheduled_block_id first + 7032 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7033 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 119 9 first +.aggressive_scheduled_block_id 27 +.noswbrkpt + 7034 "01001000" // VMUL.f dm3, x1, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7035 "01100001" // /* MW 3 */ + 7036 "11100010" // /* MW 2 */ + 7037 "00010011" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 117 42 first +.aggressive_scheduled_block_id 27 +.nohwbrkpt +.noswbrkpt + 7038 "11111000" // VMOV lfl0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7039 "00010010" // /* MW 3 */ + 7040 "01101000" // /* MW 2 */ + 7041 "00011001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 118 6 first +.aggressive_scheduled_block_id 27 +.nohwbrkpt +.noswbrkpt + 7042 "01001000" // VADD.f dm2, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7043 "00111101" // /* MW 3 */ + 7044 "01001100" // /* MW 2 */ + 7045 "00010010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 118 6 +.aggressive_scheduled_block_id 27 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7046 "11111000" // VMOV bmll2, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7047 "10010010" // /* MW 3 */ + 7048 "00000101" // /* MW 2 */ + 7049 "00011010" // /* MW 1 */ + 7050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7051 "00000000" // /* MW 1 */ + 7052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7053 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 28 +.aggressive_scheduled_block_id first + 7054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7055 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 120 9 first +.aggressive_scheduled_block_id 28 +.noswbrkpt + 7056 "01001000" // VMUL.f dm3, x5, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7057 "01100001" // /* MW 3 */ + 7058 "11101010" // /* MW 2 */ + 7059 "00010011" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 118 6 first +.aggressive_scheduled_block_id 28 +.nohwbrkpt +.noswbrkpt + 7060 "11111000" // VMOV lfh0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7061 "00010010" // /* MW 3 */ + 7062 "01101000" // /* MW 2 */ + 7063 "00011000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 119 6 first +.aggressive_scheduled_block_id 28 +.nohwbrkpt +.noswbrkpt + 7064 "01001000" // VADD.f dm2, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7065 "00111101" // /* MW 3 */ + 7066 "01001100" // /* MW 2 */ + 7067 "00010010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 119 6 +.aggressive_scheduled_block_id 28 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7068 "11111000" // VMOV bmll2, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7069 "10010010" // /* MW 3 */ + 7070 "00000001" // /* MW 2 */ + 7071 "00011010" // /* MW 1 */ + 7072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7073 "00000000" // /* MW 1 */ + 7074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7075 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 29 +.aggressive_scheduled_block_id first + 7076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7077 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 121 9 first +.aggressive_scheduled_block_id 29 +.noswbrkpt + 7078 "01001000" // VMUL.f dm3, x1, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7079 "01000001" // /* MW 3 */ + 7080 "11100010" // /* MW 2 */ + 7081 "00010011" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 119 6 first +.aggressive_scheduled_block_id 29 +.nohwbrkpt +.noswbrkpt + 7082 "11111000" // VMOV lfl0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7083 "00010010" // /* MW 3 */ + 7084 "01101000" // /* MW 2 */ + 7085 "00011001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 120 6 first +.aggressive_scheduled_block_id 29 +.nohwbrkpt +.noswbrkpt + 7086 "01001000" // VADD.f dm2, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7087 "00111101" // /* MW 3 */ + 7088 "01001100" // /* MW 2 */ + 7089 "00010010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 120 6 +.aggressive_scheduled_block_id 29 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7090 "11111000" // VMOV bmll2, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7091 "10010010" // /* MW 3 */ + 7092 "00000101" // /* MW 2 */ + 7093 "00011010" // /* MW 1 */ + 7094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7095 "00000000" // /* MW 1 */ + 7096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7097 "00000000" // /* MW 1 */ + 7098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7099 "00000000" // /* MW 1 */ + 7100 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7101 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 120 6 +.aggressive_scheduled_block_id 30 +.aggressive_scheduled_block_id first + 7102 "11111000" // VMOV lfh0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7103 "00010010" // /* MW 3 */ + 7104 "01101000" // /* MW 2 */ + 7105 "00011000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 121 6 first +.aggressive_scheduled_block_id 30 +.noswbrkpt + 7106 "01001000" // VADD.f dm2, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7107 "00111101" // /* MW 3 */ + 7108 "01001100" // /* MW 2 */ + 7109 "00010010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 121 6 +.aggressive_scheduled_block_id 30 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7110 "11111000" // VMOV bmll2, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7111 "10010010" // /* MW 3 */ + 7112 "00000001" // /* MW 2 */ + 7113 "00011010" // /* MW 1 */ + 7114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7115 "00000000" // /* MW 1 */ + 7116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7117 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 122 9 first + 7118 "01001000" // VMUL.f dm3, x0, x6, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7119 "11000001" // /* MW 3 */ + 7120 "11100000" // /* MW 2 */ + 7121 "00010011" // /* MW 1 */ + 7122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7123 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 121 6 first +.aggressive_scheduled_block_id 31 +.aggressive_scheduled_block_id first + 7124 "11111000" // VMOV lfl0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7125 "00010010" // /* MW 3 */ + 7126 "01101000" // /* MW 2 */ + 7127 "00011001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 122 6 first +.aggressive_scheduled_block_id 31 +.noswbrkpt + 7128 "01001000" // VADD.f dm3, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7129 "00111101" // /* MW 3 */ + 7130 "01001100" // /* MW 2 */ + 7131 "00010011" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 122 6 +.aggressive_scheduled_block_id 31 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7132 "11111000" // VMOV bmll2, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7133 "10010010" // /* MW 3 */ + 7134 "00000101" // /* MW 2 */ + 7135 "00011010" // /* MW 1 */ + 7136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7137 "00000000" // /* MW 1 */ + 7138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7139 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 123 9 first + 7140 "01001000" // VMUL.f dm1, x1, x0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7141 "00000001" // /* MW 3 */ + 7142 "11100010" // /* MW 2 */ + 7143 "00010001" // /* MW 1 */ + 7144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7145 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 122 6 first +.aggressive_scheduled_block_id 32 +.aggressive_scheduled_block_id first + 7146 "11111000" // VMOV lfh0, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7147 "00010010" // /* MW 3 */ + 7148 "01101100" // /* MW 2 */ + 7149 "00011000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 123 6 first +.aggressive_scheduled_block_id 32 +.noswbrkpt + 7150 "01001000" // VADD.f dm1, dm2, dm1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7151 "00111101" // /* MW 3 */ + 7152 "01000100" // /* MW 2 */ + 7153 "00010001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 123 6 +.aggressive_scheduled_block_id 32 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7154 "11111000" // VMOV bmll2, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7155 "10010010" // /* MW 3 */ + 7156 "00000001" // /* MW 2 */ + 7157 "00011010" // /* MW 1 */ + 7158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7159 "00000000" // /* MW 1 */ + 7160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7161 "00000000" // /* MW 1 */ + 7162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7163 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 33 +.aggressive_scheduled_block_id first + 7164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7165 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 123 6 +.src_ref 6 "me_vmult_float_emulated.h" 124 6 first +.aggressive_scheduled_block_id 33 +.noswbrkpt + 7166 "01100010" // VMOV x0, bmll1; VADD.f dm0, dm2, dm0, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7167 "00111101" // /* MW 7 */ + 7168 "01000000" // /* MW 6 */ + 7169 "00010000" // /* MW 5 */ + 7170 "11100110" // /* MW 4 */ + 7171 "00010010" // /* MW 3 */ + 7172 "00100100" // /* MW 2 */ + 7173 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 124 6 +.src_ref 6 "me_vmult_float_emulated.h" 125 9 first +.aggressive_scheduled_block_id 33 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7174 "01100010" // VMOV bmll2, x0; VMUL.f dm4, x5, x0, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7175 "00000001" // /* MW 7 */ + 7176 "11101010" // /* MW 6 */ + 7177 "00010100" // /* MW 5 */ + 7178 "11100110" // /* MW 4 */ + 7179 "10010010" // /* MW 3 */ + 7180 "00000000" // /* MW 2 */ + 7181 "00000010" // /* MW 1 */ + 7182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7183 "00000000" // /* MW 1 */ + 7184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7185 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 34 +.aggressive_scheduled_block_id first + 7186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7187 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 125 6 +.aggressive_scheduled_block_id 34 +.noswbrkpt + 7188 "01001000" // VADD.f dm0, dm2, dm4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7189 "00111101" // /* MW 3 */ + 7190 "01010000" // /* MW 2 */ + 7191 "00010000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 124 6 first +.aggressive_scheduled_block_id 34 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7192 "11111000" // VMOV bmll2, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7193 "00010010" // /* MW 3 */ + 7194 "00000000" // /* MW 2 */ + 7195 "00011010" // /* MW 1 */ + 7196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7197 "00000000" // /* MW 1 */ + 7198 "10000100" // J #6384 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6384 delay_slots=5 */ + 7199 "00000000" // /* MW 5 */ + 7200 "00000000" // /* MW 4 */ + 7201 "01111000" // /* MW 3 */ + 7202 "00001100" // /* MW 2 */ + 7203 "00000000" // /* MW 1 */ +.delay_slot + 7204 "10011000" // ST dc4, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7205 "01100101" // /* MW 3 */ + 7206 "11111010" // /* MW 2 */ + 7207 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7209 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 125 6 first +.delay_slot + 7210 "11111000" // VMOV bmll2, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7211 "00010010" // /* MW 3 */ + 7212 "00000000" // /* MW 2 */ + 7213 "00011010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7215 "00000000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1159 33 first +.src_ref 7 "accum.hpp" 1108 103 first +.delay_slot + 7216 "11100001" // NOPA; NOPB; VST.CONV.bf16.fp32 bmll2, [p1];NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7217 "00000000" // /* MW 15 */ + 7218 "00000000" // /* MW 14 */ + 7219 "01111000" // /* MW 13 */ + 7220 "10100101" // /* MW 12 */ + 7221 "00000001" // /* MW 11 */ + 7222 "00000000" // /* MW 10 */ + 7223 "00000000" // /* MW 9 */ + 7224 "10000000" // /* MW 8 */ + 7225 "00010010" // /* MW 7 */ + 7226 "00000101" // /* MW 6 */ + 7227 "00100001" // /* MW 5 */ + 7228 "00000000" // /* MW 4 */ + 7229 "11110000" // /* MW 3 */ + 7230 "00101100" // /* MW 2 */ + 7231 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2384 +.src_ref 5 "blend.hpp" 163 48 + 7232 "10111010" // MOVA r20, #0; J #5616 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=5616 delay_slots=5 */ + 7233 "00100000" // /* MW 9 */ + 7234 "00000000" // /* MW 8 */ + 7235 "00000000" // /* MW 7 */ + 7236 "10111110" // /* MW 6 */ + 7237 "00000010" // /* MW 5 */ + 7238 "00000000" // /* MW 4 */ + 7239 "00000000" // /* MW 3 */ + 7240 "00010100" // /* MW 2 */ + 7241 "00000000" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 +.delay_slot + 7242 "00011000" // MOVX r21, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7243 "00000001" // /* MW 3 */ + 7244 "00101010" // /* MW 2 */ + 7245 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7247 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7248 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7249 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7251 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7252 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7253 "10000001" // /* MW 11 */ + 7254 "10101101" // /* MW 10 */ + 7255 "00000000" // /* MW 9 */ + 7256 "00000000" // /* MW 8 */ + 7257 "00000000" // /* MW 7 */ + 7258 "00000000" // /* MW 6 */ + 7259 "00100000" // /* MW 5 */ + 7260 "00000000" // /* MW 4 */ + 7261 "11110000" // /* MW 3 */ + 7262 "00101100" // /* MW 2 */ + 7263 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2416 + 7264 "10000100" // J #7456 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=7456 delay_slots=5 */ + 7265 "00000000" // /* MW 5 */ + 7266 "00000000" // /* MW 4 */ + 7267 "10010000" // /* MW 3 */ + 7268 "00001110" // /* MW 2 */ + 7269 "00000000" // /* MW 1 */ +.delay_slot + 7270 "00000010" // ST p1, [sp, #-4]; MOV dc4, lr /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7271 "01110000" // /* MW 7 */ + 7272 "11110000" // /* MW 6 */ + 7273 "01100000" // /* MW 5 */ + 7274 "00000010" // /* MW 4 */ + 7275 "10110000" // /* MW 3 */ + 7276 "10010011" // /* MW 2 */ + 7277 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7279 "00000000" // /* MW 1 */ +.delay_slot + 7280 "00011000" // VST x0, [sp, #-256] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7281 "00110011" // /* MW 3 */ + 7282 "11110000" // /* MW 2 */ + 7283 "00001111" // /* MW 1 */ +.delay_slot + 7284 "00011000" // VST x4, [sp, #-192] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7285 "00110011" // /* MW 3 */ + 7286 "11110101" // /* MW 2 */ + 7287 "00001111" // /* MW 1 */ +.delay_slot + 7288 "00000010" // VST x1, [sp, #-128]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7289 "01110000" // /* MW 7 */ + 7290 "10100101" // /* MW 6 */ + 7291 "00000001" // /* MW 5 */ + 7292 "00000000" // /* MW 4 */ + 7293 "01100000" // /* MW 3 */ + 7294 "00001110" // /* MW 2 */ + 7295 "11111111" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2448 +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 7296 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7297 "00000101" // /* MW 3 */ + 7298 "00100010" // /* MW 2 */ + 7299 "00010000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 first + 7300 "10011000" // EQ r17, r17, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7301 "01100111" // /* MW 3 */ + 7302 "01100010" // /* MW 2 */ + 7303 "00010100" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 7304 "10000100" // JNZ r17, #7456 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7456 delay_slots=5 */ + 7305 "00000001" // /* MW 5 */ + 7306 "01000000" // /* MW 4 */ + 7307 "10010000" // /* MW 3 */ + 7308 "00001110" // /* MW 2 */ + 7309 "10001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7311 "00000000" // /* MW 1 */ +.delay_slot + 7312 "00011000" // VST x0, [sp, #-256] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7313 "00110011" // /* MW 3 */ + 7314 "11110000" // /* MW 2 */ + 7315 "00001111" // /* MW 1 */ +.delay_slot + 7316 "00011000" // VST x4, [sp, #-192] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7317 "00110011" // /* MW 3 */ + 7318 "11110101" // /* MW 2 */ + 7319 "00001111" // /* MW 1 */ +.delay_slot + 7320 "00011000" // VST x1, [sp, #-128] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7321 "01110011" // /* MW 3 */ + 7322 "11111000" // /* MW 2 */ + 7323 "00001111" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 +.delay_slot + 7324 "00111010" // ST p1, [sp, #-4]; MOVX r7, #2; MOV dc4, lr /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7325 "01111001" // /* MW 9 */ + 7326 "11110000" // /* MW 8 */ + 7327 "01100000" // /* MW 7 */ + 7328 "01001010" // /* MW 6 */ + 7329 "01110000" // /* MW 5 */ + 7330 "00000000" // /* MW 4 */ + 7331 "10110000" // /* MW 3 */ + 7332 "10010011" // /* MW 2 */ + 7333 "11111111" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 7334 "10011000" // EQ r7, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7335 "01100111" // /* MW 3 */ + 7336 "11001110" // /* MW 2 */ + 7337 "00010001" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 7338 "10000100" // JNZ r7, #7424 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7424 delay_slots=5 */ + 7339 "00000001" // /* MW 5 */ + 7340 "01000000" // /* MW 4 */ + 7341 "10000000" // /* MW 3 */ + 7342 "00001110" // /* MW 2 */ + 7343 "00111000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7345 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7347 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7349 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7351 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7353 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 7354 "10011000" // EQ r7, r5, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7355 "01100111" // /* MW 3 */ + 7356 "01001110" // /* MW 2 */ + 7357 "00010001" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 7358 "10000100" // JNZ r7, #7392 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7392 delay_slots=5 */ + 7359 "00000001" // /* MW 5 */ + 7360 "01000000" // /* MW 4 */ + 7361 "01110000" // /* MW 3 */ + 7362 "00001110" // /* MW 2 */ + 7363 "00111000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 200 49 +.delay_slot + 7364 "00011000" // MOVX r5, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7365 "01000001" // /* MW 3 */ + 7366 "00001010" // /* MW 2 */ + 7367 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7369 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7371 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7373 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7375 "00000000" // /* MW 1 */ + 7376 "10000100" // J #6576 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6576 delay_slots=5 */ + 7377 "00000000" // /* MW 5 */ + 7378 "00000000" // /* MW 4 */ + 7379 "11011000" // /* MW 3 */ + 7380 "00001100" // /* MW 2 */ + 7381 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7383 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7385 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7387 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7389 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7391 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2544 +.src_ref 5 "blend.hpp" 170 36 + 7392 "10111010" // MOVA r17, #257; J #5568 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=5568 delay_slots=5 */ + 7393 "00100000" // /* MW 9 */ + 7394 "00000000" // /* MW 8 */ + 7395 "00000000" // /* MW 7 */ + 7396 "10111000" // /* MW 6 */ + 7397 "00000010" // /* MW 5 */ + 7398 "00000000" // /* MW 4 */ + 7399 "00000000" // /* MW 3 */ + 7400 "00110001" // /* MW 2 */ + 7401 "00100000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 57 98 +.src_ref 5 "blend.hpp" 170 36 +.src_ref 3 "reduce_mean_c8_impl.h" 268 19 +.delay_slot + 7402 "01100100" // MOVX r21, #0; MOV m4, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7403 "01000001" // /* MW 5 */ + 7404 "00000000" // /* MW 4 */ + 7405 "00101000" // /* MW 3 */ + 7406 "01000000" // /* MW 2 */ + 7407 "00000101" // /* MW 1 */ +.src_ref 5 "blend.hpp" 163 48 +.delay_slot + 7408 "00011000" // MOVX r20, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7409 "00000001" // /* MW 3 */ + 7410 "00101000" // /* MW 2 */ + 7411 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7413 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7415 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7416 "00100010" // NOPA; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7417 "00011100" // /* MW 7 */ + 7418 "00000000" // /* MW 6 */ + 7419 "00000000" // /* MW 5 */ + 7420 "00000100" // /* MW 4 */ + 7421 "11110000" // /* MW 3 */ + 7422 "00101100" // /* MW 2 */ + 7423 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2576 + 7424 "10000100" // J #6480 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6480 delay_slots=5 */ + 7425 "00000000" // /* MW 5 */ + 7426 "00000000" // /* MW 4 */ + 7427 "10101000" // /* MW 3 */ + 7428 "00001100" // /* MW 2 */ + 7429 "00000000" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 +.delay_slot + 7430 "01000100" // MOVXM r17, #65535 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7431 "11111110" // /* MW 5 */ + 7432 "10111111" // /* MW 4 */ + 7433 "11111000" // /* MW 3 */ + 7434 "00000000" // /* MW 2 */ + 7435 "00000000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 57 98 +.src_ref 3 "reduce_mean_c8_impl.h" 268 19 +.delay_slot + 7436 "10111000" // MOV m4, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7437 "00100000" // /* MW 3 */ + 7438 "00000000" // /* MW 2 */ + 7439 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7441 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7443 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7444 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7445 "10000001" // /* MW 11 */ + 7446 "10101101" // /* MW 10 */ + 7447 "00000000" // /* MW 9 */ + 7448 "00000000" // /* MW 8 */ + 7449 "00000000" // /* MW 7 */ + 7450 "00000000" // /* MW 6 */ + 7451 "00100000" // /* MW 5 */ + 7452 "00000000" // /* MW 4 */ + 7453 "11110000" // /* MW 3 */ + 7454 "00101100" // /* MW 2 */ + 7455 "00000000" // /* MW 1 */ +.label __ll128__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 112 6 +.src_ref 6 "me_vmult_float_emulated.h" 112 19 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 114 6 +.src_ref 6 "me_vmult_float_emulated.h" 114 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 6 +.src_ref 6 "me_vmult_float_emulated.h" 115 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 + 7456 "10111010" // VLDA x0, [sp, #-256]; J #5568 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=5568 delay_slots=5 */ + 7457 "00100000" // /* MW 9 */ + 7458 "00000000" // /* MW 8 */ + 7459 "00000000" // /* MW 7 */ + 7460 "10111000" // /* MW 6 */ + 7461 "00000010" // /* MW 5 */ + 7462 "00000000" // /* MW 4 */ + 7463 "01110000" // /* MW 3 */ + 7464 "00000111" // /* MW 2 */ + 7465 "11111110" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "blend.hpp" 163 48 +.delay_slot + 7466 "10111010" // VLDA x4, [sp, #-192]; MOVX r0, #4; MOV r20, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7467 "01011000" // /* MW 9 */ + 7468 "00000000" // /* MW 8 */ + 7469 "10001000" // /* MW 7 */ + 7470 "10001010" // /* MW 6 */ + 7471 "00000000" // /* MW 5 */ + 7472 "00000000" // /* MW 4 */ + 7473 "01110000" // /* MW 3 */ + 7474 "10100111" // /* MW 2 */ + 7475 "11111110" // /* MW 1 */ +.src_ref 5 "vector.hpp" 57 98 +.src_ref 5 "vector.hpp" 57 98 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1280 49 +.src_ref 5 "vector.hpp" 1287 41 +.src_ref 5 "vector.hpp" 1288 16 +.src_ref 5 "vector.hpp" 1292 26 +.src_ref 5 "blend.hpp" 170 36 +.src_ref 3 "reduce_mean_c8_impl.h" 226 22 +.src_ref 3 "reduce_mean_c8_impl.h" 268 19 +.delay_slot + 7476 "10111010" // LDA p1, [sp, #-4]; MOVXM r16, #65535 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7477 "10010000" // /* MW 9 */ + 7478 "11111111" // /* MW 8 */ + 7479 "00001111" // /* MW 7 */ + 7480 "00111110" // /* MW 6 */ + 7481 "00000000" // /* MW 5 */ + 7482 "00000000" // /* MW 4 */ + 7483 "00100000" // /* MW 3 */ + 7484 "10010011" // /* MW 2 */ + 7485 "11111111" // /* MW 1 */ +.src_ref 5 "vector.hpp" 57 98 +.src_ref 5 "blend.hpp" 170 36 +.src_ref 3 "reduce_mean_c8_impl.h" 268 19 +.delay_slot + 7486 "01100100" // MOVX r21, #0; MOV m4, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7487 "10000001" // /* MW 5 */ + 7488 "00000000" // /* MW 4 */ + 7489 "00101000" // /* MW 3 */ + 7490 "01000000" // /* MW 2 */ + 7491 "00000101" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 +.delay_slot + 7492 "00011000" // MOVX r17, #257 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7493 "00000101" // /* MW 3 */ + 7494 "00100010" // /* MW 2 */ + 7495 "00010001" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 +.delay_slot + 7496 "00100010" // VLDA x1, [sp, #-128]; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7497 "00011100" // /* MW 7 */ + 7498 "00000000" // /* MW 6 */ + 7499 "00000000" // /* MW 5 */ + 7500 "00000100" // /* MW 4 */ + 7501 "01110000" // /* MW 3 */ + 7502 "00001111" // /* MW 2 */ + 7503 "11111111" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2656 + 7504 "10000100" // J #6480 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6480 delay_slots=5 */ + 7505 "00000000" // /* MW 5 */ + 7506 "00000000" // /* MW 4 */ + 7507 "10101000" // /* MW 3 */ + 7508 "00001100" // /* MW 2 */ + 7509 "00000000" // /* MW 1 */ +.delay_slot + 7510 "11111000" // MOV dc4, lr /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7511 "11100000" // /* MW 3 */ + 7512 "11000001" // /* MW 2 */ + 7513 "00011100" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 +.delay_slot + 7514 "01000100" // MOVXM r17, #65535 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7515 "11111110" // /* MW 5 */ + 7516 "10111111" // /* MW 4 */ + 7517 "11111000" // /* MW 3 */ + 7518 "00000000" // /* MW 2 */ + 7519 "00000000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 57 98 +.src_ref 3 "reduce_mean_c8_impl.h" 268 19 +.delay_slot + 7520 "10111000" // MOV m4, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7521 "00100000" // /* MW 3 */ + 7522 "00000000" // /* MW 2 */ + 7523 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7525 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E__end +.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E___func_end0 + 7527 "00000000" // /* MW 1 */ +.label __Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_reduce_mean_c8 _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 8 "superkernels.cpp" 472 +.src_ref 8 "superkernels.cpp" 472 first +.function_start + 7536 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7537 "00000001" // /* MW 5 */ + 7538 "00000000" // /* MW 4 */ + 7539 "00000000" // /* MW 3 */ + 7540 "00010000" // /* MW 2 */ + 7541 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 477 6 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7542 "00111010" // ST p7, [sp, #-20]; MOVXM p7, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7543 "00010001" // /* MW 9 */ + 7544 "01100000" // /* MW 8 */ + 7545 "10110010" // /* MW 7 */ + 7546 "11110011" // /* MW 6 */ + 7547 "00000001" // /* MW 5 */ + 7548 "00000000" // /* MW 4 */ + 7549 "10110000" // /* MW 3 */ + 7550 "11110011" // /* MW 2 */ + 7551 "11111101" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 477 6 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7552 "10111010" // LDA r16, [p7]; ST p6, [sp, #-28]; MOV r16, CORE_ID /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7553 "01110010" // /* MW 9 */ + 7554 "01110000" // /* MW 8 */ + 7555 "00001101" // /* MW 7 */ + 7556 "10000010" // /* MW 6 */ + 7557 "00011101" // /* MW 5 */ + 7558 "11100111" // /* MW 4 */ + 7559 "11010111" // /* MW 3 */ + 7560 "11000010" // /* MW 2 */ + 7561 "11100000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 474 22 first +.src_ref 8 "superkernels.cpp" 569 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7562 "00111010" // ST r11, [sp, #-8]; EXTEND.u8 r16, r16; MOV r11, lr /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7563 "01111001" // /* MW 9 */ + 7564 "11110000" // /* MW 8 */ + 7565 "01101000" // /* MW 7 */ + 7566 "10000001" // /* MW 6 */ + 7567 "00000100" // /* MW 5 */ + 7568 "00100001" // /* MW 4 */ + 7569 "10110000" // /* MW 3 */ + 7570 "00101110" // /* MW 2 */ + 7571 "11111111" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 474 30 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7572 "01011100" // ST r15, [sp, #-16]; ADD r17, r16, #-2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7573 "11110110" // /* MW 5 */ + 7574 "01000111" // /* MW 4 */ + 7575 "10111000" // /* MW 3 */ + 7576 "00111110" // /* MW 2 */ + 7577 "11111110" // /* MW 1 */ + 7578 "10011000" // ST r13, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7579 "10110101" // /* MW 3 */ + 7580 "11101001" // /* MW 2 */ + 7581 "00001111" // /* MW 1 */ + 7582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7583 "00000000" // /* MW 1 */ + 7584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7585 "00000000" // /* MW 1 */ + 7586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7587 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 477 6 first +.src_ref 8 "superkernels.cpp" 477 16 first + 7588 "10000100" // JNZ r16, #8160 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8160 delay_slots=5 */ + 7589 "00000001" // /* MW 5 */ + 7590 "01000000" // /* MW 4 */ + 7591 "11110000" // /* MW 3 */ + 7592 "00001111" // /* MW 2 */ + 7593 "10000000" // /* MW 1 */ +.delay_slot + 7594 "10011000" // ST r12, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7595 "10010101" // /* MW 3 */ + 7596 "11111101" // /* MW 2 */ + 7597 "00001111" // /* MW 1 */ +.delay_slot + 7598 "10011000" // ST r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7599 "11010101" // /* MW 3 */ + 7600 "11110101" // /* MW 2 */ + 7601 "00001111" // /* MW 1 */ +.delay_slot + 7602 "10011000" // ST p0, [sp, #-32] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7603 "00011101" // /* MW 3 */ + 7604 "11100000" // /* MW 2 */ + 7605 "00001111" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 474 11 +.delay_slot + 7606 "01000100" // MOVXM p6, #509128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7607 "10010000" // /* MW 5 */ + 7608 "11001001" // /* MW 4 */ + 7609 "11001100" // /* MW 3 */ + 7610 "00000111" // /* MW 2 */ + 7611 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 474 11 first +.delay_slot + 7612 "10011000" // ST r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7613 "00110001" // /* MW 3 */ + 7614 "00000110" // /* MW 2 */ + 7615 "00001110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 1 "io_buffer_main.h" 218 49 +.src_ref 1 "io_buffer_main.h" 324 51 +.src_ref 5 "tile.hpp" 74 8 +.src_ref 5 "tile.hpp" 74 8 + 7616 "01110110" // MOVA r17, #1; MOVS p7, p2; MOVXM p2, #509164 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7617 "00010000" // /* MW 11 */ + 7618 "01110110" // /* MW 10 */ + 7619 "00110010" // /* MW 9 */ + 7620 "11110001" // /* MW 8 */ + 7621 "00000001" // /* MW 7 */ + 7622 "00000000" // /* MW 6 */ + 7623 "10001011" // /* MW 5 */ + 7624 "10001000" // /* MW 4 */ + 7625 "00000111" // /* MW 3 */ + 7626 "00110001" // /* MW 2 */ + 7627 "00000000" // /* MW 1 */ +.src_ref 5 "tile.hpp" 74 8 first +.src_ref 5 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 7628 "00111010" // ST r17, [p2]; MOVXM p2, #509168 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7629 "00010001" // /* MW 9 */ + 7630 "01111000" // /* MW 8 */ + 7631 "00110010" // /* MW 7 */ + 7632 "11110001" // /* MW 6 */ + 7633 "00000001" // /* MW 5 */ + 7634 "00000000" // /* MW 4 */ + 7635 "00110000" // /* MW 3 */ + 7636 "11000110" // /* MW 2 */ + 7637 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 483 44 +.src_ref 5 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 7638 "11010100" // ST.s8 r16, [p2]; MOV p6, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7639 "10000001" // /* MW 5 */ + 7640 "11000101" // /* MW 4 */ + 7641 "11101100" // /* MW 3 */ + 7642 "11000000" // /* MW 2 */ + 7643 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 480 4 first +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 7644 "00000100" // JL #2576 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=2576 delay_slots=5 */ + 7645 "00000001" // /* MW 5 */ + 7646 "00000000" // /* MW 4 */ + 7647 "00001000" // /* MW 3 */ + 7648 "00000101" // /* MW 2 */ + 7649 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 480 4 +.delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7650 "01000100" // MOVXM p0, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7651 "10000000" // /* MW 5 */ + 7652 "11001000" // /* MW 4 */ + 7653 "11000000" // /* MW 3 */ + 7654 "00000111" // /* MW 2 */ + 7655 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7657 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7659 "00000000" // /* MW 1 */ +.src_ref 5 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7660 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7661 "00110001" // /* MW 3 */ + 7662 "00100000" // /* MW 2 */ + 7663 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7664 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7665 "00000000" // /* MW 15 */ + 7666 "00000000" // /* MW 14 */ + 7667 "01111000" // /* MW 13 */ + 7668 "10100101" // /* MW 12 */ + 7669 "00000001" // /* MW 11 */ + 7670 "00000000" // /* MW 10 */ + 7671 "00000000" // /* MW 9 */ + 7672 "00000000" // /* MW 8 */ + 7673 "01011011" // /* MW 7 */ + 7674 "00000001" // /* MW 6 */ + 7675 "00100000" // /* MW 5 */ + 7676 "00000000" // /* MW 4 */ + 7677 "11110000" // /* MW 3 */ + 7678 "00101100" // /* MW 2 */ + 7679 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 51 +.src_ref 8 "superkernels.cpp" 487 47 +.return_address + 7680 "10111010" // MOVA r17, #0; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7681 "00010000" // /* MW 9 */ + 7682 "00100000" // /* MW 8 */ + 7683 "00110010" // /* MW 7 */ + 7684 "11110001" // /* MW 6 */ + 7685 "00000001" // /* MW 5 */ + 7686 "00000000" // /* MW 4 */ + 7687 "00000000" // /* MW 3 */ + 7688 "00010001" // /* MW 2 */ + 7689 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 18 +.src_ref 8 "superkernels.cpp" 481 51 first + 7690 "10111010" // LDA r14, [p2]; MOVXM p2, #509128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7691 "00010000" // /* MW 9 */ + 7692 "01100100" // /* MW 8 */ + 7693 "00110010" // /* MW 7 */ + 7694 "11110001" // /* MW 6 */ + 7695 "00000001" // /* MW 5 */ + 7696 "00000000" // /* MW 4 */ + 7697 "11010000" // /* MW 3 */ + 7698 "10111010" // /* MW 2 */ + 7699 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 18 +.src_ref 8 "superkernels.cpp" 481 85 + 7700 "10111010" // LDA r18, [p2]; MOVXM p2, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7701 "00010000" // /* MW 9 */ + 7702 "00100010" // /* MW 8 */ + 7703 "00110010" // /* MW 7 */ + 7704 "11110001" // /* MW 6 */ + 7705 "00000001" // /* MW 5 */ + 7706 "00000000" // /* MW 4 */ + 7707 "11010000" // /* MW 3 */ + 7708 "11001010" // /* MW 2 */ + 7709 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 85 +.src_ref 8 "superkernels.cpp" 482 16 + 7710 "10111010" // LDA r13, [p2], #4; MOVXM p3, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7711 "00010000" // /* MW 9 */ + 7712 "01101000" // /* MW 8 */ + 7713 "10110010" // /* MW 7 */ + 7714 "11110001" // /* MW 6 */ + 7715 "00000001" // /* MW 5 */ + 7716 "00000000" // /* MW 4 */ + 7717 "11010000" // /* MW 3 */ + 7718 "10110110" // /* MW 2 */ + 7719 "01000011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 16 +.src_ref 8 "superkernels.cpp" 482 40 first + 7720 "10111010" // LDA el0, [p2, #4]; MOVXM p1, #509132 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7721 "00010000" // /* MW 9 */ + 7722 "01100110" // /* MW 8 */ + 7723 "10110010" // /* MW 7 */ + 7724 "11110000" // /* MW 6 */ + 7725 "00000001" // /* MW 5 */ + 7726 "00000000" // /* MW 4 */ + 7727 "11010000" // /* MW 3 */ + 7728 "10000101" // /* MW 2 */ + 7729 "01000010" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 120 first +.src_ref 8 "superkernels.cpp" 483 44 + 7730 "11010100" // LDA r15, [p2]; MOV r16, p6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7731 "10000001" // /* MW 5 */ + 7732 "00111001" // /* MW 4 */ + 7733 "11011000" // /* MW 3 */ + 7734 "10111110" // /* MW 2 */ + 7735 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 483 44 + 7736 "00011000" // ADD.NC p2, r16, #40 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7737 "00010100" // /* MW 3 */ + 7738 "01101000" // /* MW 2 */ + 7739 "00011010" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 + 7740 "01000100" // MOVXM p6, #509184 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7741 "00000000" // /* MW 5 */ + 7742 "11001010" // /* MW 4 */ + 7743 "11001100" // /* MW 3 */ + 7744 "00000111" // /* MW 2 */ + 7745 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 483 13 + 7746 "01000100" // MOVXM p0, #509160 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7747 "11010000" // /* MW 5 */ + 7748 "11001001" // /* MW 4 */ + 7749 "11000000" // /* MW 3 */ + 7750 "00000111" // /* MW 2 */ + 7751 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 27 + 7752 "10011000" // MUL r18, r14, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7753 "00101111" // /* MW 3 */ + 7754 "10100101" // /* MW 2 */ + 7755 "00010011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 + 7756 "01000100" // MOVXM r16, #-2147483648 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7757 "00000000" // /* MW 5 */ + 7758 "00100000" // /* MW 4 */ + 7759 "00001000" // /* MW 3 */ + 7760 "00000000" // /* MW 2 */ + 7761 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 61 +.src_ref 8 "superkernels.cpp" 482 16 first + 7762 "01011100" // ST el0, [p3]; MUL r18, r13, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7763 "01011111" // /* MW 5 */ + 7764 "11001010" // /* MW 4 */ + 7765 "00110110" // /* MW 3 */ + 7766 "10000101" // /* MW 2 */ + 7767 "01100000" // /* MW 1 */ + 7768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7769 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 96 first + 7770 "10011000" // MUL r18, r15, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7771 "00101111" // /* MW 3 */ + 7772 "11100101" // /* MW 2 */ + 7773 "00010011" // /* MW 1 */ + 7774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7775 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 16 + 7776 "10011000" // ST r18, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7777 "01010001" // /* MW 3 */ + 7778 "00000110" // /* MW 2 */ + 7779 "00001001" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 483 15 first + 7780 "10011000" // LDA el0, [p2], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7781 "00101110" // /* MW 3 */ + 7782 "01001100" // /* MW 2 */ + 7783 "00000010" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 first + 7784 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7785 "00110001" // /* MW 3 */ + 7786 "00011110" // /* MW 2 */ + 7787 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 + 7788 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7789 "00110001" // /* MW 3 */ + 7790 "00011110" // /* MW 2 */ + 7791 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 + 7792 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7793 "00110001" // /* MW 3 */ + 7794 "00011110" // /* MW 2 */ + 7795 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 + 7796 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7797 "00110001" // /* MW 3 */ + 7798 "00011110" // /* MW 2 */ + 7799 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 + 7800 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7801 "00110001" // /* MW 3 */ + 7802 "00011110" // /* MW 2 */ + 7803 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 + 7804 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7805 "00110001" // /* MW 3 */ + 7806 "00011110" // /* MW 2 */ + 7807 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 483 13 first + 7808 "10011000" // ST el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7809 "00101001" // /* MW 3 */ + 7810 "00000100" // /* MW 2 */ + 7811 "00001000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 first + 7812 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7813 "00110001" // /* MW 3 */ + 7814 "00011110" // /* MW 2 */ + 7815 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 + 7816 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7817 "00110001" // /* MW 3 */ + 7818 "00011110" // /* MW 2 */ + 7819 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 + 7820 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7821 "00110001" // /* MW 3 */ + 7822 "00011110" // /* MW 2 */ + 7823 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 first + 7824 "10011000" // LDA r1, [p2], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7825 "00110110" // /* MW 3 */ + 7826 "11011100" // /* MW 2 */ + 7827 "00000010" // /* MW 1 */ + 7828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7829 "00000000" // /* MW 1 */ + 7830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7831 "00000000" // /* MW 1 */ + 7832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7833 "00000000" // /* MW 1 */ + 7834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7835 "00000000" // /* MW 1 */ + 7836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7837 "00000000" // /* MW 1 */ + 7838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7839 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 + 7840 "10011000" // GEU r17, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7841 "00001011" // /* MW 3 */ + 7842 "01100011" // /* MW 2 */ + 7843 "00010000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 + 7844 "10000100" // JNZ r17, #7920 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7920 delay_slots=5 */ + 7845 "00000001" // /* MW 5 */ + 7846 "01000000" // /* MW 4 */ + 7847 "01111000" // /* MW 3 */ + 7848 "00001111" // /* MW 2 */ + 7849 "10001000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 1 "io_buffer_main.h" 218 49 +.src_ref 1 "io_buffer_main.h" 324 51 +.delay_slot + 7850 "11111000" // MOV r12, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7851 "11000000" // /* MW 3 */ + 7852 "00011110" // /* MW 2 */ + 7853 "00011011" // /* MW 1 */ +.delay_slot + 7854 "10011000" // ST p2, [sp, #-40] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7855 "00011101" // /* MW 3 */ + 7856 "11011001" // /* MW 2 */ + 7857 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7859 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7861 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7863 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.no_stack_arguments + 7864 "00000100" // JL #10912 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10912 delay_slots=5 */ + 7865 "00000001" // /* MW 5 */ + 7866 "00000000" // /* MW 4 */ + 7867 "01010000" // /* MW 3 */ + 7868 "00010101" // /* MW 2 */ + 7869 "00000000" // /* MW 1 */ +.delay_slot + 7870 "10011000" // ST r12, [sp, #-36] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7871 "10010101" // /* MW 3 */ + 7872 "11011101" // /* MW 2 */ + 7873 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7874 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7875 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7877 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7879 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7880 "00100010" // NOPA; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7881 "00011100" // /* MW 7 */ + 7882 "00000000" // /* MW 6 */ + 7883 "00000000" // /* MW 5 */ + 7884 "00000100" // /* MW 4 */ + 7885 "11110000" // /* MW 3 */ + 7886 "00101100" // /* MW 2 */ + 7887 "00000000" // /* MW 1 */ +.return_address + 7888 "10000100" // J #7984 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=7984 delay_slots=5 */ + 7889 "00000000" // /* MW 5 */ + 7890 "00000000" // /* MW 4 */ + 7891 "10011000" // /* MW 3 */ + 7892 "00001111" // /* MW 2 */ + 7893 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.delay_slot + 7894 "01000100" // MOVXM p7, #509168 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7895 "11100000" // /* MW 5 */ + 7896 "11001001" // /* MW 4 */ + 7897 "11001110" // /* MW 3 */ + 7898 "00000111" // /* MW 2 */ + 7899 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7901 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7903 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7905 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7906 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 7907 "00011100" // /* MW 13 */ + 7908 "00000000" // /* MW 12 */ + 7909 "00000000" // /* MW 11 */ + 7910 "01010111" // /* MW 10 */ + 7911 "00011010" // /* MW 9 */ + 7912 "01000000" // /* MW 8 */ + 7913 "00000000" // /* MW 7 */ + 7914 "00000000" // /* MW 6 */ + 7915 "10110110" // /* MW 5 */ + 7916 "00000010" // /* MW 4 */ + 7917 "11110000" // /* MW 3 */ + 7918 "00101100" // /* MW 2 */ + 7919 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_384 +.src_ref 8 "superkernels.cpp" 491 40 +.no_stack_arguments + 7920 "00000100" // JL #10912 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10912 delay_slots=5 */ + 7921 "00000001" // /* MW 5 */ + 7922 "00000000" // /* MW 4 */ + 7923 "01010000" // /* MW 3 */ + 7924 "00010101" // /* MW 2 */ + 7925 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7927 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7929 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7931 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7932 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7933 "01100111" // /* MW 3 */ + 7934 "00000001" // /* MW 2 */ + 7935 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.delay_slot + 7936 "11100001" // NOPA; NOPB; NOPS; SUB r1, r1, r16; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7937 "00000000" // /* MW 15 */ + 7938 "00000000" // /* MW 14 */ + 7939 "01111000" // /* MW 13 */ + 7940 "10100101" // /* MW 12 */ + 7941 "00000001" // /* MW 11 */ + 7942 "00001100" // /* MW 10 */ + 7943 "00011000" // /* MW 9 */ + 7944 "00000010" // /* MW 8 */ + 7945 "01011011" // /* MW 7 */ + 7946 "00000001" // /* MW 6 */ + 7947 "00100000" // /* MW 5 */ + 7948 "00000000" // /* MW 4 */ + 7949 "11110000" // /* MW 3 */ + 7950 "00101100" // /* MW 2 */ + 7951 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.return_address +.no_stack_arguments + 7952 "00000100" // JL #12416 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12416 delay_slots=5 */ + 7953 "00000001" // /* MW 5 */ + 7954 "00000000" // /* MW 4 */ + 7955 "01000000" // /* MW 3 */ + 7956 "00011000" // /* MW 2 */ + 7957 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.delay_slot + 7958 "11111000" // MOV r1, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7959 "00100000" // /* MW 3 */ + 7960 "01010000" // /* MW 2 */ + 7961 "00011000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.delay_slot + 7962 "01000100" // MOVXM p7, #509168 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7963 "11100000" // /* MW 5 */ + 7964 "11001001" // /* MW 4 */ + 7965 "11001110" // /* MW 3 */ + 7966 "00000111" // /* MW 2 */ + 7967 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.delay_slot + 7968 "01000100" // MOVXM r2, #1325400064 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7969 "00000000" // /* MW 5 */ + 7970 "00100000" // /* MW 4 */ + 7971 "00000001" // /* MW 3 */ + 7972 "00000000" // /* MW 2 */ + 7973 "01001111" // /* MW 1 */ +.delay_slot + 7974 "10011000" // ST r12, [sp, #-36] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7975 "10010101" // /* MW 3 */ + 7976 "11011101" // /* MW 2 */ + 7977 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7978 "00111100" // NOPA; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7979 "00100000" // /* MW 5 */ + 7980 "00000000" // /* MW 4 */ + 7981 "11110000" // /* MW 3 */ + 7982 "00101100" // /* MW 2 */ + 7983 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 8 "superkernels.cpp" 491 40 +.src_ref 8 "superkernels.cpp" 491 40 +.src_ref 8 "superkernels.cpp" 491 40 +.return_address + 7984 "10111010" // LDA.s8 r16, [p7]; MOVX vaddSign0, #1; VINSERT.32 x0, x0, #0, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7985 "10111000" // /* MW 9 */ + 7986 "00001000" // /* MW 8 */ + 7987 "00000000" // /* MW 7 */ + 7988 "00000000" // /* MW 6 */ + 7989 "11010010" // /* MW 5 */ + 7990 "00000010" // /* MW 4 */ + 7991 "01010000" // /* MW 3 */ + 7992 "11000000" // /* MW 2 */ + 7993 "11100000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.src_ref 8 "superkernels.cpp" 492 38 +.src_ref 8 "superkernels.cpp" 492 38 + 7994 "10111010" // MOVA m0, #-38; MOVX r24, #0; VMOV bmll0, x0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7995 "01111000" // /* MW 9 */ + 7996 "01001001" // /* MW 8 */ + 7997 "00000000" // /* MW 7 */ + 7998 "00001000" // /* MW 6 */ + 7999 "10000000" // /* MW 5 */ + 8000 "00000001" // /* MW 4 */ + 8001 "10000000" // /* MW 3 */ + 8002 "01000000" // /* MW 2 */ + 8003 "11111011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 498 13 +.src_ref 8 "superkernels.cpp" 498 15 + 8004 "10111010" // LDA p2, [sp, #-40]; MOVXM p3, #509140 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8005 "00010000" // /* MW 9 */ + 8006 "01101010" // /* MW 8 */ + 8007 "10110010" // /* MW 7 */ + 8008 "11110001" // /* MW 6 */ + 8009 "00000001" // /* MW 5 */ + 8010 "00000000" // /* MW 4 */ + 8011 "00100000" // /* MW 3 */ + 8012 "00100011" // /* MW 2 */ + 8013 "11111011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 499 14 + 8014 "01000100" // MOVXM p1, #509144 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8015 "10110000" // /* MW 5 */ + 8016 "11001001" // /* MW 4 */ + 8017 "11000010" // /* MW 3 */ + 8018 "00000111" // /* MW 2 */ + 8019 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 7 +.src_ref 8 "superkernels.cpp" 508 7 +.src_ref 8 "superkernels.cpp" 511 7 + 8020 "01000100" // MOVXM p7, #509136 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8021 "10100000" // /* MW 5 */ + 8022 "11001001" // /* MW 4 */ + 8023 "11001110" // /* MW 3 */ + 8024 "00000111" // /* MW 2 */ + 8025 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 8026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8027 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 38 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 8028 "00011000" // ST.s16 r16, [p6], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8029 "00010111" // /* MW 3 */ + 8030 "00011110" // /* MW 2 */ + 8031 "00000110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8032 "00011000" // MOVX crRnd, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8033 "10000000" // /* MW 3 */ + 8034 "00111010" // /* MW 2 */ + 8035 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8036 "00011000" // VCONV.bf16.fp32 wl0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8037 "00010110" // /* MW 3 */ + 8038 "01000000" // /* MW 2 */ + 8039 "00001000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8041 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8042 "10111000" // VEXTRACT.16 r16, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8043 "00000001" // /* MW 3 */ + 8044 "00000001" // /* MW 2 */ + 8045 "00011100" // /* MW 1 */ + 8046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8047 "00000000" // /* MW 1 */ + 8048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8049 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 492 38 first + 8050 "00011000" // ST.s8 r24, [p6], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8051 "00000111" // /* MW 3 */ + 8052 "00001011" // /* MW 2 */ + 8053 "00000110" // /* MW 1 */ + 8054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8055 "00000000" // /* MW 1 */ + 8056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8057 "00000000" // /* MW 1 */ + 8058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8059 "00000000" // /* MW 1 */ + 8060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8061 "00000000" // /* MW 1 */ + 8062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8063 "00000000" // /* MW 1 */ + 8064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8065 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 494 25 first + 8066 "10011000" // ST r14, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8067 "11010001" // /* MW 3 */ + 8068 "00011101" // /* MW 2 */ + 8069 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 495 24 first + 8070 "10011000" // ST r15, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8071 "11110001" // /* MW 3 */ + 8072 "00000101" // /* MW 2 */ + 8073 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 496 24 first + 8074 "10011000" // ST r13, [p6, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8075 "10110001" // /* MW 3 */ + 8076 "00010101" // /* MW 2 */ + 8077 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 498 15 first + 8078 "10011000" // LDA el0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8079 "00101110" // /* MW 3 */ + 8080 "00011100" // /* MW 2 */ + 8081 "00000010" // /* MW 1 */ + 8082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8083 "00000000" // /* MW 1 */ + 8084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8085 "00000000" // /* MW 1 */ + 8086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8087 "00000000" // /* MW 1 */ + 8088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8089 "00000000" // /* MW 1 */ + 8090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8091 "00000000" // /* MW 1 */ + 8092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8093 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 498 13 + 8094 "10011000" // ST el0, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8095 "00101001" // /* MW 3 */ + 8096 "00000100" // /* MW 2 */ + 8097 "00001011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 499 16 first + 8098 "10011000" // LDA el0, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8099 "00101110" // /* MW 3 */ + 8100 "00000100" // /* MW 2 */ + 8101 "00000010" // /* MW 1 */ + 8102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8103 "00000000" // /* MW 1 */ + 8104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8105 "00000000" // /* MW 1 */ + 8106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8107 "00000000" // /* MW 1 */ + 8108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8109 "00000000" // /* MW 1 */ + 8110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8111 "00000000" // /* MW 1 */ + 8112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8113 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 499 14 + 8114 "10011000" // ST el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8115 "00101001" // /* MW 3 */ + 8116 "00000100" // /* MW 2 */ + 8117 "00001001" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 500 15 first + 8118 "10011000" // LDA el0, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8119 "00101110" // /* MW 3 */ + 8120 "00010100" // /* MW 2 */ + 8121 "00000010" // /* MW 1 */ + 8122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8123 "00000000" // /* MW 1 */ + 8124 "10000100" // J #8176 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8176 delay_slots=5 */ + 8125 "00000000" // /* MW 5 */ + 8126 "00000000" // /* MW 4 */ + 8127 "11111000" // /* MW 3 */ + 8128 "00001111" // /* MW 2 */ + 8129 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 500 13 +.delay_slot + 8130 "01000100" // MOVXM p0, #509148 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8131 "10111000" // /* MW 5 */ + 8132 "11001001" // /* MW 4 */ + 8133 "11000000" // /* MW 3 */ + 8134 "00000111" // /* MW 2 */ + 8135 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8137 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8139 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8140 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8141 "01100111" // /* MW 3 */ + 8142 "00000001" // /* MW 2 */ + 8143 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 500 13 +.delay_slot + 8144 "11100001" // NOPA; NOPB; ST el0, [p0]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8145 "00000000" // /* MW 15 */ + 8146 "00000000" // /* MW 14 */ + 8147 "01111000" // /* MW 13 */ + 8148 "10100101" // /* MW 12 */ + 8149 "00000001" // /* MW 11 */ + 8150 "00000000" // /* MW 10 */ + 8151 "00000000" // /* MW 9 */ + 8152 "10000000" // /* MW 8 */ + 8153 "00101001" // /* MW 7 */ + 8154 "00000100" // /* MW 6 */ + 8155 "00100000" // /* MW 5 */ + 8156 "00000000" // /* MW 4 */ + 8157 "11110000" // /* MW 3 */ + 8158 "00101100" // /* MW 2 */ + 8159 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_624 +.src_ref 8 "superkernels.cpp" 505 7 +.src_ref 8 "superkernels.cpp" 508 7 +.src_ref 8 "superkernels.cpp" 511 7 + 8160 "00111010" // ST p2, [sp, #-36]; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8161 "00010001" // /* MW 9 */ + 8162 "01101000" // /* MW 8 */ + 8163 "10110010" // /* MW 7 */ + 8164 "11110011" // /* MW 6 */ + 8165 "00000001" // /* MW 5 */ + 8166 "00000000" // /* MW 4 */ + 8167 "10110000" // /* MW 3 */ + 8168 "10100011" // /* MW 2 */ + 8169 "11111011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 1 "io_buffer_main.h" 218 49 +.src_ref 1 "io_buffer_main.h" 324 51 + 8170 "11010100" // NOPA; MOV r12, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8171 "10000001" // /* MW 5 */ + 8172 "00101001" // /* MW 4 */ + 8173 "11110110" // /* MW 3 */ + 8174 "00101100" // /* MW 2 */ + 8175 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_640 +.src_ref 8 "superkernels.cpp" 505 7 first +.src_ref 8 "superkernels.cpp" 505 19 + 8176 "00101100" // LDA r16, [p7]; MOVX r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8177 "00001010" // /* MW 5 */ + 8178 "01000100" // /* MW 4 */ + 8179 "11010000" // /* MW 3 */ + 8180 "11000010" // /* MW 2 */ + 8181 "11100000" // /* MW 1 */ + 8182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8183 "00000000" // /* MW 1 */ + 8184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8185 "00000000" // /* MW 1 */ + 8186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8187 "00000000" // /* MW 1 */ + 8188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8189 "00000000" // /* MW 1 */ + 8190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8191 "00000000" // /* MW 1 */ + 8192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8193 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 19 + 8194 "10011000" // NE r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8195 "00001000" // /* MW 3 */ + 8196 "01100011" // /* MW 2 */ + 8197 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 25 + 8198 "10000100" // JNZ r17, #8368 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8368 delay_slots=5 */ + 8199 "00000001" // /* MW 5 */ + 8200 "01000000" // /* MW 4 */ + 8201 "01011000" // /* MW 3 */ + 8202 "00010000" // /* MW 2 */ + 8203 "10001000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 first +.delay_slot + 8204 "00011000" // ADD.NC p6, r12, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8205 "00000110" // /* MW 3 */ + 8206 "01100110" // /* MW 2 */ + 8207 "00011110" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8209 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8211 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8212 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8213 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8215 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 29 + 8216 "01000100" // MOVXM p2, #509124 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8217 "10001000" // /* MW 5 */ + 8218 "11001001" // /* MW 4 */ + 8219 "11000100" // /* MW 3 */ + 8220 "00000111" // /* MW 2 */ + 8221 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 29 first +.src_ref 8 "superkernels.cpp" 505 65 + 8222 "10111010" // LDA r16, [p2]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8223 "00010000" // /* MW 9 */ + 8224 "00110000" // /* MW 8 */ + 8225 "00110010" // /* MW 7 */ + 8226 "11110001" // /* MW 6 */ + 8227 "00000001" // /* MW 5 */ + 8228 "00000000" // /* MW 4 */ + 8229 "11010000" // /* MW 3 */ + 8230 "11000010" // /* MW 2 */ + 8231 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 65 + 8232 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8233 "00111010" // /* MW 3 */ + 8234 "00000100" // /* MW 2 */ + 8235 "00000010" // /* MW 1 */ + 8236 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8237 "00000000" // /* MW 1 */ + 8238 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8239 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 41 +.no_stack_arguments + 8240 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */ + 8241 "00000001" // /* MW 5 */ + 8242 "00000000" // /* MW 4 */ + 8243 "11111000" // /* MW 3 */ + 8244 "00010011" // /* MW 2 */ + 8245 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 41 +.delay_slot + 8246 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8247 "00000001" // /* MW 3 */ + 8248 "00011010" // /* MW 2 */ + 8249 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8251 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 41 +.delay_slot + 8252 "10011000" // LT r27, r16, r13 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8253 "11011010" // /* MW 3 */ + 8254 "00110110" // /* MW 2 */ + 8255 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 41 +.src_ref 8 "superkernels.cpp" 505 41 +.delay_slot + 8256 "11100100" // SUB r17, r13, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8257 "01000001" // /* MW 5 */ + 8258 "10111011" // /* MW 4 */ + 8259 "00110111" // /* MW 3 */ + 8260 "01100000" // /* MW 2 */ + 8261 "01101100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 41 +.delay_slot + 8262 "01111010" // NOPA; NOPS; SEL.EQZ r0, r16, r17, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8263 "00010010" // /* MW 9 */ + 8264 "00000001" // /* MW 8 */ + 8265 "00000100" // /* MW 7 */ + 8266 "00000000" // /* MW 6 */ + 8267 "01011011" // /* MW 5 */ + 8268 "00000001" // /* MW 4 */ + 8269 "11110000" // /* MW 3 */ + 8270 "00101100" // /* MW 2 */ + 8271 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 41 +.src_ref 8 "superkernels.cpp" 505 41 +.return_address + 8272 "11100100" // SUB r16, r13, r3; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8273 "01000001" // /* MW 5 */ + 8274 "10101111" // /* MW 4 */ + 8275 "00111101" // /* MW 3 */ + 8276 "00000110" // /* MW 2 */ + 8277 "01101100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 41 + 8278 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8279 "00000010" // /* MW 3 */ + 8280 "11100001" // /* MW 2 */ + 8281 "00010000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 6 +.src_ref 8 "superkernels.cpp" 505 76 + 8282 "10000100" // JNZ r16, #8352 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8352 delay_slots=5 */ + 8283 "00000001" // /* MW 5 */ + 8284 "01000000" // /* MW 4 */ + 8285 "01010000" // /* MW 3 */ + 8286 "00010000" // /* MW 2 */ + 8287 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8289 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8291 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8293 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8294 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8295 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8297 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 +.src_ref 1 "io_buffer_main.h" 395 8 + 8298 "11100100" // MOVX r16, #-1; MOV p2, p6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8299 "10000001" // /* MW 5 */ + 8300 "11011001" // /* MW 4 */ + 8301 "10100100" // /* MW 3 */ + 8302 "00011111" // /* MW 2 */ + 8303 "11111100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 first + 8304 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8305 "01110110" // /* MW 3 */ + 8306 "11111111" // /* MW 2 */ + 8307 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 8308 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8309 "00110110" // /* MW 3 */ + 8310 "11111110" // /* MW 2 */ + 8311 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 8312 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8313 "01010110" // /* MW 3 */ + 8314 "11111110" // /* MW 2 */ + 8315 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 8316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8317 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first +.aggressive_scheduled_block_id 4 +.noswbrkpt + 8318 "10011000" // LDA r17, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8319 "00110110" // /* MW 3 */ + 8320 "01000110" // /* MW 2 */ + 8321 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 8322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8323 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 8324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8325 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 8326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8327 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 8328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8329 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 8330 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8331 "00010010" // /* MW 3 */ + 8332 "10100011" // /* MW 2 */ + 8333 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8334 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8335 "00110001" // /* MW 3 */ + 8336 "00000110" // /* MW 2 */ + 8337 "00001010" // /* MW 1 */ + 8338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8339 "00000000" // /* MW 1 */ + 8340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8341 "00000000" // /* MW 1 */ + 8342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8343 "00000000" // /* MW 1 */ + 8344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8345 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 8346 "00101100" // NOPA; ACQ r17, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8347 "00010000" // /* MW 5 */ + 8348 "10100110" // /* MW 4 */ + 8349 "11111000" // /* MW 3 */ + 8350 "00101100" // /* MW 2 */ + 8351 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_816 + 8352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8353 "00000000" // /* MW 1 */ + 8354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8355 "00000000" // /* MW 1 */ + 8356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8357 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 7 first + 8358 "10111010" // LDA r16, [p7]; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8359 "01111110" // /* MW 9 */ + 8360 "10100101" // /* MW 8 */ + 8361 "00000001" // /* MW 7 */ + 8362 "00000000" // /* MW 6 */ + 8363 "00010000" // /* MW 5 */ + 8364 "00000000" // /* MW 4 */ + 8365 "11010000" // /* MW 3 */ + 8366 "11000010" // /* MW 2 */ + 8367 "11100000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_832 +.src_ref 8 "superkernels.cpp" 508 19 +.src_ref 8 "superkernels.cpp" 522 6 +.src_ref 8 "superkernels.cpp" 558 19 + 8368 "00011000" // MOVX r14, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8369 "00001001" // /* MW 3 */ + 8370 "00011100" // /* MW 2 */ + 8371 "00010000" // /* MW 1 */ + 8372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8373 "00000000" // /* MW 1 */ + 8374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8375 "00000000" // /* MW 1 */ + 8376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8377 "00000000" // /* MW 1 */ + 8378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8379 "00000000" // /* MW 1 */ + 8380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8381 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 19 + 8382 "10011000" // NE r16, r14, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8383 "00001000" // /* MW 3 */ + 8384 "10100001" // /* MW 2 */ + 8385 "00010011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 25 + 8386 "10000100" // JNZ r16, #8544 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8544 delay_slots=5 */ + 8387 "00000001" // /* MW 5 */ + 8388 "01000000" // /* MW 4 */ + 8389 "10110000" // /* MW 3 */ + 8390 "00010000" // /* MW 2 */ + 8391 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8393 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8394 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8395 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8397 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8399 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8401 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 29 + 8402 "01000100" // MOVXM p2, #509152 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8403 "11000000" // /* MW 5 */ + 8404 "11001001" // /* MW 4 */ + 8405 "11000100" // /* MW 3 */ + 8406 "00000111" // /* MW 2 */ + 8407 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 29 +.src_ref 8 "superkernels.cpp" 508 65 + 8408 "10111010" // LDA r16, [p2]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8409 "00010000" // /* MW 9 */ + 8410 "00110000" // /* MW 8 */ + 8411 "00110010" // /* MW 7 */ + 8412 "11110001" // /* MW 6 */ + 8413 "00000001" // /* MW 5 */ + 8414 "00000000" // /* MW 4 */ + 8415 "11010000" // /* MW 3 */ + 8416 "11000010" // /* MW 2 */ + 8417 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 65 + 8418 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8419 "00111010" // /* MW 3 */ + 8420 "00000100" // /* MW 2 */ + 8421 "00000010" // /* MW 1 */ + 8422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8423 "00000000" // /* MW 1 */ + 8424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8425 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 41 +.no_stack_arguments + 8426 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */ + 8427 "00000001" // /* MW 5 */ + 8428 "00000000" // /* MW 4 */ + 8429 "11111000" // /* MW 3 */ + 8430 "00010011" // /* MW 2 */ + 8431 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 41 +.delay_slot + 8432 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8433 "00000001" // /* MW 3 */ + 8434 "00011010" // /* MW 2 */ + 8435 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8437 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 41 +.delay_slot + 8438 "10011000" // LT r27, r16, r13 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8439 "11011010" // /* MW 3 */ + 8440 "00110110" // /* MW 2 */ + 8441 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 41 +.src_ref 8 "superkernels.cpp" 508 41 +.delay_slot + 8442 "11100100" // SUB r17, r13, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8443 "01000001" // /* MW 5 */ + 8444 "10111011" // /* MW 4 */ + 8445 "00110111" // /* MW 3 */ + 8446 "01100000" // /* MW 2 */ + 8447 "01101100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 41 +.delay_slot + 8448 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r0, r16, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8449 "00000000" // /* MW 15 */ + 8450 "00000000" // /* MW 14 */ + 8451 "01111000" // /* MW 13 */ + 8452 "10100101" // /* MW 12 */ + 8453 "00000001" // /* MW 11 */ + 8454 "10010000" // /* MW 10 */ + 8455 "00001000" // /* MW 9 */ + 8456 "00100000" // /* MW 8 */ + 8457 "01011011" // /* MW 7 */ + 8458 "00000001" // /* MW 6 */ + 8459 "00100000" // /* MW 5 */ + 8460 "00000000" // /* MW 4 */ + 8461 "11110000" // /* MW 3 */ + 8462 "00101100" // /* MW 2 */ + 8463 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 41 +.src_ref 8 "superkernels.cpp" 508 41 +.return_address + 8464 "11100100" // SUB r16, r13, r3; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8465 "01000001" // /* MW 5 */ + 8466 "10101111" // /* MW 4 */ + 8467 "00111101" // /* MW 3 */ + 8468 "00000110" // /* MW 2 */ + 8469 "01101100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 41 + 8470 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8471 "00000010" // /* MW 3 */ + 8472 "11100001" // /* MW 2 */ + 8473 "00010000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 6 +.src_ref 8 "superkernels.cpp" 508 76 + 8474 "10000100" // JNZ r16, #8544 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8544 delay_slots=5 */ + 8475 "00000001" // /* MW 5 */ + 8476 "01000000" // /* MW 4 */ + 8477 "10110000" // /* MW 3 */ + 8478 "00010000" // /* MW 2 */ + 8479 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8481 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8483 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8485 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8487 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8489 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 +.src_ref 1 "io_buffer_main.h" 395 8 + 8490 "11100100" // MOVX r16, #-1; MOV p2, p6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8491 "10000001" // /* MW 5 */ + 8492 "11011001" // /* MW 4 */ + 8493 "10100100" // /* MW 3 */ + 8494 "00011111" // /* MW 2 */ + 8495 "11111100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 first + 8496 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8497 "01110110" // /* MW 3 */ + 8498 "11111111" // /* MW 2 */ + 8499 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 8500 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8501 "00110110" // /* MW 3 */ + 8502 "11111110" // /* MW 2 */ + 8503 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 8504 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8505 "01010110" // /* MW 3 */ + 8506 "11111110" // /* MW 2 */ + 8507 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 8508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8509 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first +.aggressive_scheduled_block_id 5 +.noswbrkpt + 8510 "10011000" // LDA r17, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8511 "00110110" // /* MW 3 */ + 8512 "01000110" // /* MW 2 */ + 8513 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 8514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8515 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 8516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8517 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 8518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8519 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 8520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8521 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 8522 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8523 "00010010" // /* MW 3 */ + 8524 "10100011" // /* MW 2 */ + 8525 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8526 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8527 "00110001" // /* MW 3 */ + 8528 "00000110" // /* MW 2 */ + 8529 "00001010" // /* MW 1 */ + 8530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8531 "00000000" // /* MW 1 */ + 8532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8533 "00000000" // /* MW 1 */ + 8534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8535 "00000000" // /* MW 1 */ + 8536 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8537 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 8538 "00101100" // NOPA; ACQ r17, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8539 "00010000" // /* MW 5 */ + 8540 "10100110" // /* MW 4 */ + 8541 "11111000" // /* MW 3 */ + 8542 "00101100" // /* MW 2 */ + 8543 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1008 + 8544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8545 "00000000" // /* MW 1 */ + 8546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8547 "00000000" // /* MW 1 */ + 8548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8549 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 7 first +.src_ref 8 "superkernels.cpp" 511 29 + 8550 "10111010" // LDA r16, [p7]; MOVXM p7, #509156 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8551 "00010000" // /* MW 9 */ + 8552 "01110010" // /* MW 8 */ + 8553 "10110010" // /* MW 7 */ + 8554 "11110011" // /* MW 6 */ + 8555 "00000001" // /* MW 5 */ + 8556 "00000000" // /* MW 4 */ + 8557 "11010000" // /* MW 3 */ + 8558 "11000010" // /* MW 2 */ + 8559 "11100000" // /* MW 1 */ + 8560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8561 "00000000" // /* MW 1 */ + 8562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8563 "00000000" // /* MW 1 */ + 8564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8565 "00000000" // /* MW 1 */ + 8566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8567 "00000000" // /* MW 1 */ + 8568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8569 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 19 + 8570 "00011000" // MOVX r18, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8571 "00010001" // /* MW 3 */ + 8572 "00100100" // /* MW 2 */ + 8573 "00010000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 19 + 8574 "10011000" // NE r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8575 "00001000" // /* MW 3 */ + 8576 "10100001" // /* MW 2 */ + 8577 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 25 + 8578 "10000100" // JNZ r16, #8768 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8768 delay_slots=5 */ + 8579 "00000001" // /* MW 5 */ + 8580 "01000000" // /* MW 4 */ + 8581 "00100000" // /* MW 3 */ + 8582 "00010001" // /* MW 2 */ + 8583 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 66 +.delay_slot + 8584 "01000100" // MOVXM p2, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8585 "11000000" // /* MW 5 */ + 8586 "11001000" // /* MW 4 */ + 8587 "11000100" // /* MW 3 */ + 8588 "00000111" // /* MW 2 */ + 8589 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8591 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8593 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8594 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8595 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 42 +.delay_slot + 8596 "00011000" // MOVX r17, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8597 "00000001" // /* MW 3 */ + 8598 "00100010" // /* MW 2 */ + 8599 "00010000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 29 +.src_ref 8 "superkernels.cpp" 511 42 + 8600 "00101100" // LDA r16, [p7]; MOVX r13, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8601 "00000010" // /* MW 5 */ + 8602 "00110100" // /* MW 4 */ + 8603 "11010000" // /* MW 3 */ + 8604 "11000010" // /* MW 2 */ + 8605 "11100000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 66 + 8606 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8607 "00111010" // /* MW 3 */ + 8608 "00000100" // /* MW 2 */ + 8609 "00000010" // /* MW 1 */ + 8610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8611 "00000000" // /* MW 1 */ + 8612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8613 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 42 +.no_stack_arguments + 8614 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */ + 8615 "00000001" // /* MW 5 */ + 8616 "00000000" // /* MW 4 */ + 8617 "11111000" // /* MW 3 */ + 8618 "00010011" // /* MW 2 */ + 8619 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8621 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8623 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 42 +.delay_slot + 8624 "10011000" // LT r27, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8625 "00011010" // /* MW 3 */ + 8626 "00110111" // /* MW 2 */ + 8627 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 42 +.src_ref 8 "superkernels.cpp" 511 42 +.delay_slot + 8628 "11100100" // SUB r17, r17, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8629 "01000001" // /* MW 5 */ + 8630 "10111011" // /* MW 4 */ + 8631 "00110111" // /* MW 3 */ + 8632 "01100000" // /* MW 2 */ + 8633 "10001100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 42 +.delay_slot + 8634 "00101100" // NOPA; SEL.EQZ r0, r16, r17, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8635 "00100100" // /* MW 5 */ + 8636 "00000010" // /* MW 4 */ + 8637 "11111000" // /* MW 3 */ + 8638 "00101100" // /* MW 2 */ + 8639 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 42 +.src_ref 8 "superkernels.cpp" 511 42 +.return_address + 8640 "11100100" // SUB r16, r13, r3; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8641 "01000001" // /* MW 5 */ + 8642 "10101111" // /* MW 4 */ + 8643 "00111101" // /* MW 3 */ + 8644 "00000110" // /* MW 2 */ + 8645 "01101100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 42 + 8646 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8647 "00000010" // /* MW 3 */ + 8648 "11100001" // /* MW 2 */ + 8649 "00010000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 6 +.src_ref 8 "superkernels.cpp" 511 77 + 8650 "10000100" // JNZ r16, #8736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8736 delay_slots=5 */ + 8651 "00000001" // /* MW 5 */ + 8652 "01000000" // /* MW 4 */ + 8653 "00010000" // /* MW 3 */ + 8654 "00010001" // /* MW 2 */ + 8655 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8657 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8659 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8661 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8663 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8665 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 516 45 +.src_ref 8 "superkernels.cpp" 522 6 +.src_ref 1 "io_buffer_main.h" 218 49 first +.src_ref 1 "io_buffer_main.h" 395 8 + 8666 "10111010" // LDA r27, [p6], #-4; MOVX r17, #-1; MOV r16, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8667 "01011000" // /* MW 9 */ + 8668 "00000001" // /* MW 8 */ + 8669 "00001000" // /* MW 7 */ + 8670 "11101010" // /* MW 6 */ + 8671 "00010111" // /* MW 5 */ + 8672 "00111111" // /* MW 4 */ + 8673 "11010000" // /* MW 3 */ + 8674 "11101110" // /* MW 2 */ + 8675 "11011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 8676 "10011000" // LDA r18, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8677 "01010110" // /* MW 3 */ + 8678 "11111110" // /* MW 2 */ + 8679 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 8680 "10011000" // LDA r19, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8681 "01110110" // /* MW 3 */ + 8682 "11111110" // /* MW 2 */ + 8683 "00000110" // /* MW 1 */ +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 8684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8685 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first +.aggressive_scheduled_block_id 6 +.noswbrkpt + 8686 "10011000" // LDA r18, [p6, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8687 "01010110" // /* MW 3 */ + 8688 "01000110" // /* MW 2 */ + 8689 "00000110" // /* MW 1 */ +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 8690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8691 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 8692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8693 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 8694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8695 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 8696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8697 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 8698 "00011000" // SEL.EQZ r18, r19, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8699 "00100010" // /* MW 3 */ + 8700 "11100101" // /* MW 2 */ + 8701 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8702 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8703 "01010001" // /* MW 3 */ + 8704 "00000110" // /* MW 2 */ + 8705 "00001110" // /* MW 1 */ + 8706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8707 "00000000" // /* MW 1 */ + 8708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8709 "00000000" // /* MW 1 */ + 8710 "10000100" // J #8784 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8784 delay_slots=5 */ + 8711 "00000000" // /* MW 5 */ + 8712 "00000000" // /* MW 4 */ + 8713 "00101000" // /* MW 3 */ + 8714 "00010001" // /* MW 2 */ + 8715 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8717 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first +.delay_slot + 8718 "00011000" // ACQ r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8719 "00011000" // /* MW 3 */ + 8720 "10010011" // /* MW 2 */ + 8721 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8723 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8725 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8726 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8727 "01111110" // /* MW 9 */ + 8728 "10100101" // /* MW 8 */ + 8729 "00000001" // /* MW 7 */ + 8730 "00000000" // /* MW 6 */ + 8731 "00010000" // /* MW 5 */ + 8732 "00000000" // /* MW 4 */ + 8733 "11110000" // /* MW 3 */ + 8734 "00101100" // /* MW 2 */ + 8735 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1200 + 8736 "10000100" // J #8784 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8784 delay_slots=5 */ + 8737 "00000000" // /* MW 5 */ + 8738 "00000000" // /* MW 4 */ + 8739 "00101000" // /* MW 3 */ + 8740 "00010001" // /* MW 2 */ + 8741 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 516 45 +.src_ref 8 "superkernels.cpp" 522 6 +.delay_slot + 8742 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8743 "00000101" // /* MW 3 */ + 8744 "00100000" // /* MW 2 */ + 8745 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8747 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8749 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8751 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8752 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8753 "00000000" // /* MW 15 */ + 8754 "00000000" // /* MW 14 */ + 8755 "01111000" // /* MW 13 */ + 8756 "10100101" // /* MW 12 */ + 8757 "00000001" // /* MW 11 */ + 8758 "00000000" // /* MW 10 */ + 8759 "00000000" // /* MW 9 */ + 8760 "00000000" // /* MW 8 */ + 8761 "01011011" // /* MW 7 */ + 8762 "00000001" // /* MW 6 */ + 8763 "00100000" // /* MW 5 */ + 8764 "00000000" // /* MW 4 */ + 8765 "11110000" // /* MW 3 */ + 8766 "00101100" // /* MW 2 */ + 8767 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1232 +.src_ref 8 "superkernels.cpp" 516 45 +.src_ref 8 "superkernels.cpp" 522 6 + 8768 "11100001" // NOPA; NOPB; NOPS; MOVX r16, #1; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8769 "00000000" // /* MW 15 */ + 8770 "00000000" // /* MW 14 */ + 8771 "01111000" // /* MW 13 */ + 8772 "10100101" // /* MW 12 */ + 8773 "00000001" // /* MW 11 */ + 8774 "00101000" // /* MW 10 */ + 8775 "00000000" // /* MW 9 */ + 8776 "00000001" // /* MW 8 */ + 8777 "01011011" // /* MW 7 */ + 8778 "00000001" // /* MW 6 */ + 8779 "00100000" // /* MW 5 */ + 8780 "00000000" // /* MW 4 */ + 8781 "11110000" // /* MW 3 */ + 8782 "00101100" // /* MW 2 */ + 8783 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1248 +.src_ref 8 "superkernels.cpp" 516 47 +.src_ref 1 "io_buffer_main.h" 125 25 + 8784 "10111010" // LDA p7, [sp, #-32]; MOVXM p6, #509132 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8785 "00010000" // /* MW 9 */ + 8786 "01100110" // /* MW 8 */ + 8787 "00110010" // /* MW 7 */ + 8788 "11110011" // /* MW 6 */ + 8789 "00000001" // /* MW 5 */ + 8790 "00000000" // /* MW 4 */ + 8791 "00100000" // /* MW 3 */ + 8792 "01110011" // /* MW 2 */ + 8793 "11111100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 516 47 first +.src_ref 8 "superkernels.cpp" 522 6 + 8794 "10111010" // LDA r21, [p6]; MOVXM p2, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8795 "00010000" // /* MW 9 */ + 8796 "01101000" // /* MW 8 */ + 8797 "00110010" // /* MW 7 */ + 8798 "11110001" // /* MW 6 */ + 8799 "00000001" // /* MW 5 */ + 8800 "00000000" // /* MW 4 */ + 8801 "11010000" // /* MW 3 */ + 8802 "11010110" // /* MW 2 */ + 8803 "11000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 514 2 +.src_ref 8 "superkernels.cpp" 522 6 first + 8804 "10111010" // LDA r17, [p2]; MOVXM p6, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8805 "00010000" // /* MW 9 */ + 8806 "01100000" // /* MW 8 */ + 8807 "00110010" // /* MW 7 */ + 8808 "11110011" // /* MW 6 */ + 8809 "00000001" // /* MW 5 */ + 8810 "00000000" // /* MW 4 */ + 8811 "11010000" // /* MW 3 */ + 8812 "11000110" // /* MW 2 */ + 8813 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 514 2 first + 8814 "10011000" // LDA r20, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8815 "10010110" // /* MW 3 */ + 8816 "00000110" // /* MW 2 */ + 8817 "00000110" // /* MW 1 */ + 8818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8819 "00000000" // /* MW 1 */ + 8820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8821 "00000000" // /* MW 1 */ + 8822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8823 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 8824 "10011000" // LDA r19, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8825 "01110110" // /* MW 3 */ + 8826 "00000110" // /* MW 2 */ + 8827 "00000111" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 516 45 first + 8828 "10011000" // LSHL r21, r21, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8829 "00001101" // /* MW 3 */ + 8830 "01101011" // /* MW 2 */ + 8831 "00010101" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 522 6 first + 8832 "10011000" // EQ r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8833 "00000111" // /* MW 3 */ + 8834 "01100001" // /* MW 2 */ + 8835 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 522 6 + 8836 "10000100" // JNZ r16, #9232 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9232 delay_slots=5 */ + 8837 "00000001" // /* MW 5 */ + 8838 "01000000" // /* MW 4 */ + 8839 "00001000" // /* MW 3 */ + 8840 "00010010" // /* MW 2 */ + 8841 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 514 2 first +.delay_slot + 8842 "00011000" // ADD r20, r20, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8843 "00000111" // /* MW 3 */ + 8844 "00101000" // /* MW 2 */ + 8845 "00010101" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 514 2 +.delay_slot + 8846 "10011000" // ST r20, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8847 "10010001" // /* MW 3 */ + 8848 "00000110" // /* MW 2 */ + 8849 "00001110" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8851 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 516 45 first +.delay_slot + 8852 "01011000" // ADD.NC p0, r19, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8853 "11010101" // /* MW 3 */ + 8854 "01101001" // /* MW 2 */ + 8855 "00011000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 516 12 +.src_ref 8 "superkernels.cpp" 522 6 +.delay_slot + 8856 "01011100" // ST p0, [sp, #-68]; MOVX r18, #4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8857 "00100010" // /* MW 5 */ + 8858 "01001000" // /* MW 4 */ + 8859 "10110000" // /* MW 3 */ + 8860 "10000011" // /* MW 2 */ + 8861 "11110111" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 522 6 first + 8862 "10011000" // EQ r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8863 "00100111" // /* MW 3 */ + 8864 "01100001" // /* MW 2 */ + 8865 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 522 6 + 8866 "10000100" // JNZ r16, #9088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9088 delay_slots=5 */ + 8867 "00000001" // /* MW 5 */ + 8868 "01000000" // /* MW 4 */ + 8869 "11000000" // /* MW 3 */ + 8870 "00010001" // /* MW 2 */ + 8871 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8873 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8874 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8875 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8877 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8879 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8881 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 522 6 + 8882 "10011000" // NE r16, r17, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8883 "11101000" // /* MW 3 */ + 8884 "01100000" // /* MW 2 */ + 8885 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 522 6 + 8886 "10000100" // JNZ r16, #9040 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9040 delay_slots=5 */ + 8887 "00000001" // /* MW 5 */ + 8888 "01000000" // /* MW 4 */ + 8889 "10101000" // /* MW 3 */ + 8890 "00010001" // /* MW 2 */ + 8891 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 523 26 +.delay_slot + 8892 "01000100" // MOVXM p6, #509152 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8893 "11000000" // /* MW 5 */ + 8894 "11001001" // /* MW 4 */ + 8895 "11001100" // /* MW 3 */ + 8896 "00000111" // /* MW 2 */ + 8897 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8899 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8901 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8903 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8905 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 523 26 first +.src_ref 8 "superkernels.cpp" 523 61 + 8906 "10111010" // LDA r18, [p6]; MOVXM p6, #509000 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8907 "00010000" // /* MW 9 */ + 8908 "00100100" // /* MW 8 */ + 8909 "00110010" // /* MW 7 */ + 8910 "11110011" // /* MW 6 */ + 8911 "00000001" // /* MW 5 */ + 8912 "00000000" // /* MW 4 */ + 8913 "11010000" // /* MW 3 */ + 8914 "11001010" // /* MW 2 */ + 8915 "11000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 523 61 +.src_ref 8 "superkernels.cpp" 524 44 + 8916 "10111010" // LDA r16, [p6]; MOVXM p6, #509140 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8917 "00010000" // /* MW 9 */ + 8918 "01101010" // /* MW 8 */ + 8919 "00110010" // /* MW 7 */ + 8920 "11110011" // /* MW 6 */ + 8921 "00000001" // /* MW 5 */ + 8922 "00000000" // /* MW 4 */ + 8923 "11010000" // /* MW 3 */ + 8924 "11000010" // /* MW 2 */ + 8925 "11000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 524 30 +.src_ref 8 "superkernels.cpp" 524 44 first + 8926 "00101100" // LDA r17, [p6]; MOVX r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8927 "00000010" // /* MW 5 */ + 8928 "01100000" // /* MW 4 */ + 8929 "11010000" // /* MW 3 */ + 8930 "11000110" // /* MW 2 */ + 8931 "11000000" // /* MW 1 */ + 8932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8933 "00000000" // /* MW 1 */ + 8934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8935 "00000000" // /* MW 1 */ + 8936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8937 "00000000" // /* MW 1 */ + 8938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8939 "00000000" // /* MW 1 */ + 8940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8941 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 523 37 first + 8942 "10011000" // MUL r18, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8943 "00001111" // /* MW 3 */ + 8944 "10100101" // /* MW 2 */ + 8945 "00010100" // /* MW 1 */ + 8946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8947 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 524 30 first +.src_ref 8 "superkernels.cpp" 524 30 first + 8948 "10100100" // SUB r19, r17, r18; ADD.NC r20, r18, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8949 "10000010" // /* MW 5 */ + 8950 "00110010" // /* MW 4 */ + 8951 "00111010" // /* MW 3 */ + 8952 "11100100" // /* MW 2 */ + 8953 "10001100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 524 30 + 8954 "10011000" // LTU r27, r20, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8955 "00011100" // /* MW 3 */ + 8956 "00110111" // /* MW 2 */ + 8957 "00010101" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 524 30 + 8958 "00011000" // SEL.EQZ r19, r19, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8959 "00000010" // /* MW 3 */ + 8960 "11100111" // /* MW 2 */ + 8961 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 524 42 + 8962 "10011000" // LTU r27, r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8963 "00011100" // /* MW 3 */ + 8964 "10110111" // /* MW 2 */ + 8965 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 524 30 + 8966 "00011000" // SEL.EQZ r17, r24, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8967 "00110010" // /* MW 3 */ + 8968 "00100011" // /* MW 2 */ + 8969 "00010110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 525 65 first + 8970 "10011000" // SUB r18, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8971 "00010001" // /* MW 3 */ + 8972 "00100101" // /* MW 2 */ + 8973 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 526 36 first + 8974 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8975 "00001000" // /* MW 3 */ + 8976 "01100001" // /* MW 2 */ + 8977 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 547 6 first + 8978 "10000100" // JNZ r16, #9344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9344 delay_slots=5 */ + 8979 "00000001" // /* MW 5 */ + 8980 "01000000" // /* MW 4 */ + 8981 "01000000" // /* MW 3 */ + 8982 "00010010" // /* MW 2 */ + 8983 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 525 32 +.delay_slot + 8984 "01000100" // MOVXM p6, #509200 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8985 "00100000" // /* MW 5 */ + 8986 "11001010" // /* MW 4 */ + 8987 "11001100" // /* MW 3 */ + 8988 "00000111" // /* MW 2 */ + 8989 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 525 32 first +.delay_slot + 8990 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8991 "01010001" // /* MW 3 */ + 8992 "00000110" // /* MW 2 */ + 8993 "00001110" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8995 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8997 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8999 "00000000" // /* MW 1 */ + 9000 "10000100" // J #9200 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9200 delay_slots=5 */ + 9001 "00000000" // /* MW 5 */ + 9002 "00000000" // /* MW 4 */ + 9003 "11111000" // /* MW 3 */ + 9004 "00010001" // /* MW 2 */ + 9005 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 7 +.src_ref 8 "superkernels.cpp" 558 19 +.delay_slot + 9006 "10111010" // MOVA r14, #2; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9007 "00010000" // /* MW 9 */ + 9008 "01101000" // /* MW 8 */ + 9009 "10110010" // /* MW 7 */ + 9010 "11110011" // /* MW 6 */ + 9011 "00000001" // /* MW 5 */ + 9012 "00000000" // /* MW 4 */ + 9013 "00000000" // /* MW 3 */ + 9014 "01001110" // /* MW 2 */ + 9015 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 552 2 +.src_ref 8 "superkernels.cpp" 554 19 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 +.delay_slot + 9016 "10111010" // MOVA r15, #1; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9017 "00010000" // /* MW 9 */ + 9018 "00100000" // /* MW 8 */ + 9019 "00110010" // /* MW 7 */ + 9020 "11110001" // /* MW 6 */ + 9021 "00000001" // /* MW 5 */ + 9022 "00000000" // /* MW 4 */ + 9023 "00000000" // /* MW 3 */ + 9024 "00101111" // /* MW 2 */ + 9025 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 43 +.src_ref 8 "superkernels.cpp" 555 15 +.src_ref 8 "superkernels.cpp" 558 43 +.src_ref 8 "superkernels.cpp" 559 15 +.src_ref 8 "superkernels.cpp" 562 44 +.src_ref 8 "superkernels.cpp" 563 16 +.src_ref 8 "superkernels.cpp" 567 14 +.delay_slot + 9026 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9027 "00000001" // /* MW 3 */ + 9028 "00011010" // /* MW 2 */ + 9029 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9031 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9032 "00100010" // NOPA; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9033 "00011100" // /* MW 7 */ + 9034 "00000000" // /* MW 6 */ + 9035 "00000000" // /* MW 5 */ + 9036 "00000100" // /* MW 4 */ + 9037 "11110000" // /* MW 3 */ + 9038 "00101100" // /* MW 2 */ + 9039 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1504 + 9040 "10000100" // J #9200 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9200 delay_slots=5 */ + 9041 "00000000" // /* MW 5 */ + 9042 "00000000" // /* MW 4 */ + 9043 "11111000" // /* MW 3 */ + 9044 "00010001" // /* MW 2 */ + 9045 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 7 +.src_ref 8 "superkernels.cpp" 558 19 +.delay_slot + 9046 "10111010" // MOVA r14, #2; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9047 "00010000" // /* MW 9 */ + 9048 "01101000" // /* MW 8 */ + 9049 "10110010" // /* MW 7 */ + 9050 "11110011" // /* MW 6 */ + 9051 "00000001" // /* MW 5 */ + 9052 "00000000" // /* MW 4 */ + 9053 "00000000" // /* MW 3 */ + 9054 "01001110" // /* MW 2 */ + 9055 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 552 2 +.src_ref 8 "superkernels.cpp" 554 19 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 +.delay_slot + 9056 "10111010" // MOVA r15, #1; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9057 "00010000" // /* MW 9 */ + 9058 "00100000" // /* MW 8 */ + 9059 "00110010" // /* MW 7 */ + 9060 "11110001" // /* MW 6 */ + 9061 "00000001" // /* MW 5 */ + 9062 "00000000" // /* MW 4 */ + 9063 "00000000" // /* MW 3 */ + 9064 "00101111" // /* MW 2 */ + 9065 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 43 +.src_ref 8 "superkernels.cpp" 555 15 +.src_ref 8 "superkernels.cpp" 558 43 +.src_ref 8 "superkernels.cpp" 559 15 +.src_ref 8 "superkernels.cpp" 562 44 +.src_ref 8 "superkernels.cpp" 563 16 +.src_ref 8 "superkernels.cpp" 567 14 +.delay_slot + 9066 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9067 "00000001" // /* MW 3 */ + 9068 "00011010" // /* MW 2 */ + 9069 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9071 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9072 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9073 "00000000" // /* MW 15 */ + 9074 "00000000" // /* MW 14 */ + 9075 "01111000" // /* MW 13 */ + 9076 "10100101" // /* MW 12 */ + 9077 "00000001" // /* MW 11 */ + 9078 "00000000" // /* MW 10 */ + 9079 "00000000" // /* MW 9 */ + 9080 "00000000" // /* MW 8 */ + 9081 "01011011" // /* MW 7 */ + 9082 "00000001" // /* MW 6 */ + 9083 "00100000" // /* MW 5 */ + 9084 "00000000" // /* MW 4 */ + 9085 "11110000" // /* MW 3 */ + 9086 "00101100" // /* MW 2 */ + 9087 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1552 +.src_ref 8 "superkernels.cpp" 532 27 +.src_ref 8 "superkernels.cpp" 533 31 +.src_ref 8 "superkernels.cpp" 554 43 +.src_ref 8 "superkernels.cpp" 555 15 +.src_ref 8 "superkernels.cpp" 558 43 +.src_ref 8 "superkernels.cpp" 559 15 +.src_ref 8 "superkernels.cpp" 562 44 +.src_ref 8 "superkernels.cpp" 563 16 +.src_ref 8 "superkernels.cpp" 567 14 + 9088 "10111010" // MOVA r13, #0; MOVXM p6, #509156 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9089 "00010000" // /* MW 9 */ + 9090 "01110010" // /* MW 8 */ + 9091 "00110010" // /* MW 7 */ + 9092 "11110011" // /* MW 6 */ + 9093 "00000001" // /* MW 5 */ + 9094 "00000000" // /* MW 4 */ + 9095 "00000000" // /* MW 3 */ + 9096 "00001101" // /* MW 2 */ + 9097 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 532 27 first +.src_ref 8 "superkernels.cpp" 532 63 +.src_ref 8 "superkernels.cpp" 552 2 + 9098 "10111010" // LDA r18, [p6]; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9099 "00010000" // /* MW 9 */ + 9100 "00100000" // /* MW 8 */ + 9101 "00110010" // /* MW 7 */ + 9102 "11110001" // /* MW 6 */ + 9103 "00000001" // /* MW 5 */ + 9104 "00000000" // /* MW 4 */ + 9105 "11010000" // /* MW 3 */ + 9106 "11001010" // /* MW 2 */ + 9107 "11000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 532 63 +.src_ref 8 "superkernels.cpp" 533 46 + 9108 "10111010" // LDA r16, [p2]; MOVXM p6, #509144 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9109 "00010000" // /* MW 9 */ + 9110 "01101100" // /* MW 8 */ + 9111 "00110010" // /* MW 7 */ + 9112 "11110011" // /* MW 6 */ + 9113 "00000001" // /* MW 5 */ + 9114 "00000000" // /* MW 4 */ + 9115 "11010000" // /* MW 3 */ + 9116 "11000010" // /* MW 2 */ + 9117 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 533 46 first +.src_ref 8 "superkernels.cpp" 554 19 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 + 9118 "00101100" // LDA r17, [p6]; MOVX r15, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9119 "00001010" // /* MW 5 */ + 9120 "00111100" // /* MW 4 */ + 9121 "11010000" // /* MW 3 */ + 9122 "11000110" // /* MW 2 */ + 9123 "11000000" // /* MW 1 */ + 9124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9125 "00000000" // /* MW 1 */ + 9126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9127 "00000000" // /* MW 1 */ + 9128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9129 "00000000" // /* MW 1 */ + 9130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9131 "00000000" // /* MW 1 */ + 9132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9133 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 532 39 first + 9134 "10011000" // MUL r18, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9135 "00001111" // /* MW 3 */ + 9136 "10100101" // /* MW 2 */ + 9137 "00010100" // /* MW 1 */ + 9138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9139 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 533 31 first +.src_ref 8 "superkernels.cpp" 533 31 first + 9140 "10100100" // SUB r19, r17, r18; ADD.NC r20, r18, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9141 "10000010" // /* MW 5 */ + 9142 "00110010" // /* MW 4 */ + 9143 "00111010" // /* MW 3 */ + 9144 "11100100" // /* MW 2 */ + 9145 "10001100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 533 31 + 9146 "10011000" // LTU r27, r20, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9147 "00011100" // /* MW 3 */ + 9148 "00110111" // /* MW 2 */ + 9149 "00010101" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 533 31 + 9150 "00011000" // SEL.EQZ r19, r19, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9151 "00000010" // /* MW 3 */ + 9152 "11100111" // /* MW 2 */ + 9153 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 533 44 + 9154 "10011000" // LTU r27, r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9155 "00011100" // /* MW 3 */ + 9156 "10110111" // /* MW 2 */ + 9157 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 533 31 + 9158 "00011000" // SEL.EQZ r17, r13, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9159 "00110010" // /* MW 3 */ + 9160 "01100011" // /* MW 2 */ + 9161 "00010011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 534 67 first + 9162 "10011000" // SUB r18, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9163 "00010001" // /* MW 3 */ + 9164 "00100101" // /* MW 2 */ + 9165 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 535 37 first + 9166 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9167 "00001000" // /* MW 3 */ + 9168 "01100001" // /* MW 2 */ + 9169 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 547 6 first + 9170 "10000100" // JNZ r16, #9344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9344 delay_slots=5 */ + 9171 "00000001" // /* MW 5 */ + 9172 "01000000" // /* MW 4 */ + 9173 "01000000" // /* MW 3 */ + 9174 "00010010" // /* MW 2 */ + 9175 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 534 33 +.delay_slot + 9176 "01000100" // MOVXM p6, #509208 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9177 "00110000" // /* MW 5 */ + 9178 "11001010" // /* MW 4 */ + 9179 "11001100" // /* MW 3 */ + 9180 "00000111" // /* MW 2 */ + 9181 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 534 33 first +.delay_slot + 9182 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9183 "01010001" // /* MW 3 */ + 9184 "00000110" // /* MW 2 */ + 9185 "00001110" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9187 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9189 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 7 +.delay_slot + 9190 "10111010" // NOPA; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9191 "00010000" // /* MW 9 */ + 9192 "01101000" // /* MW 8 */ + 9193 "10110010" // /* MW 7 */ + 9194 "11110011" // /* MW 6 */ + 9195 "00000001" // /* MW 5 */ + 9196 "00000000" // /* MW 4 */ + 9197 "11110000" // /* MW 3 */ + 9198 "00101100" // /* MW 2 */ + 9199 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1664 +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 1 "io_buffer_main.h" 324 51 + 9200 "00111010" // MOVS p6, r12; J #9408 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=9408 delay_slots=5 */ + 9201 "00100001" // /* MW 9 */ + 9202 "00000000" // /* MW 8 */ + 9203 "00000000" // /* MW 7 */ + 9204 "10011000" // /* MW 6 */ + 9205 "00000100" // /* MW 5 */ + 9206 "00000000" // /* MW 4 */ + 9207 "01100000" // /* MW 3 */ + 9208 "10000001" // /* MW 2 */ + 9209 "11010001" // /* MW 1 */ +.delay_slot + 9210 "00011000" // LDA r12, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9211 "10010001" // /* MW 3 */ + 9212 "11100101" // /* MW 2 */ + 9213 "00000111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9215 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9216 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9217 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9219 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9220 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9221 "10000001" // /* MW 11 */ + 9222 "10101101" // /* MW 10 */ + 9223 "00000000" // /* MW 9 */ + 9224 "00000000" // /* MW 8 */ + 9225 "00000000" // /* MW 7 */ + 9226 "00000000" // /* MW 6 */ + 9227 "00100000" // /* MW 5 */ + 9228 "00000000" // /* MW 4 */ + 9229 "11110000" // /* MW 3 */ + 9230 "00101100" // /* MW 2 */ + 9231 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1696 +.src_ref 8 "superkernels.cpp" 541 26 + 9232 "01000100" // MOVXM p6, #509124 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9233 "10001000" // /* MW 5 */ + 9234 "11001001" // /* MW 4 */ + 9235 "11001100" // /* MW 3 */ + 9236 "00000111" // /* MW 2 */ + 9237 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 541 26 first +.src_ref 8 "superkernels.cpp" 541 61 + 9238 "10111010" // LDA r19, [p6]; MOVXM p6, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9239 "00010000" // /* MW 9 */ + 9240 "00100010" // /* MW 8 */ + 9241 "00110010" // /* MW 7 */ + 9242 "11110011" // /* MW 6 */ + 9243 "00000001" // /* MW 5 */ + 9244 "00000000" // /* MW 4 */ + 9245 "11010000" // /* MW 3 */ + 9246 "11001110" // /* MW 2 */ + 9247 "11000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 541 61 +.src_ref 8 "superkernels.cpp" 542 44 + 9248 "10111010" // LDA r16, [p6]; MOVXM p6, #509148 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9249 "00010000" // /* MW 9 */ + 9250 "01101110" // /* MW 8 */ + 9251 "00110010" // /* MW 7 */ + 9252 "11110011" // /* MW 6 */ + 9253 "00000001" // /* MW 5 */ + 9254 "00000000" // /* MW 4 */ + 9255 "11010000" // /* MW 3 */ + 9256 "11000010" // /* MW 2 */ + 9257 "11000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 542 44 first + 9258 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9259 "01010110" // /* MW 3 */ + 9260 "00000110" // /* MW 2 */ + 9261 "00000110" // /* MW 1 */ + 9262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9263 "00000000" // /* MW 1 */ + 9264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9265 "00000000" // /* MW 1 */ + 9266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9267 "00000000" // /* MW 1 */ + 9268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9269 "00000000" // /* MW 1 */ + 9270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9271 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 541 37 first + 9272 "10011000" // MUL r19, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9273 "00001111" // /* MW 3 */ + 9274 "11100111" // /* MW 2 */ + 9275 "00010100" // /* MW 1 */ + 9276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9277 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 542 30 first +.src_ref 8 "superkernels.cpp" 542 30 first + 9278 "10100100" // SUB r20, r18, r19; ADD.NC r21, r19, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9279 "10000010" // /* MW 5 */ + 9280 "10110011" // /* MW 4 */ + 9281 "00111010" // /* MW 3 */ + 9282 "00100110" // /* MW 2 */ + 9283 "10010101" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 542 30 + 9284 "10011000" // LTU r27, r21, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9285 "00101100" // /* MW 3 */ + 9286 "01110111" // /* MW 2 */ + 9287 "00010101" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 542 30 + 9288 "00011000" // SEL.EQZ r20, r20, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9289 "00000010" // /* MW 3 */ + 9290 "00101001" // /* MW 2 */ + 9291 "00010101" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 542 30 +.src_ref 8 "superkernels.cpp" 542 42 + 9292 "01100100" // LTU r27, r19, r18; MOV r17, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9293 "00000001" // /* MW 5 */ + 9294 "10100000" // /* MW 4 */ + 9295 "10011000" // /* MW 3 */ + 9296 "11100101" // /* MW 2 */ + 9297 "10011110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 542 30 + 9298 "00011000" // SEL.EQZ r17, r17, r20, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9299 "01000010" // /* MW 3 */ + 9300 "01100011" // /* MW 2 */ + 9301 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 543 69 first + 9302 "10011000" // SUB r18, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9303 "00010001" // /* MW 3 */ + 9304 "00100101" // /* MW 2 */ + 9305 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 544 38 first + 9306 "10011000" // EQ r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9307 "00000111" // /* MW 3 */ + 9308 "01100001" // /* MW 2 */ + 9309 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 547 6 first + 9310 "10000100" // JNZ r16, #10176 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10176 delay_slots=5 */ + 9311 "00000001" // /* MW 5 */ + 9312 "01000000" // /* MW 4 */ + 9313 "11100000" // /* MW 3 */ + 9314 "00010011" // /* MW 2 */ + 9315 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 543 34 +.delay_slot + 9316 "01000100" // MOVXM p6, #509216 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9317 "01000000" // /* MW 5 */ + 9318 "11001010" // /* MW 4 */ + 9319 "11001100" // /* MW 3 */ + 9320 "00000111" // /* MW 2 */ + 9321 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 543 34 first +.delay_slot + 9322 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9323 "01010001" // /* MW 3 */ + 9324 "00000110" // /* MW 2 */ + 9325 "00001110" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9327 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9329 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 9330 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 9331 "00011100" // /* MW 13 */ + 9332 "00000000" // /* MW 12 */ + 9333 "00000000" // /* MW 11 */ + 9334 "01010111" // /* MW 10 */ + 9335 "00011010" // /* MW 9 */ + 9336 "01000000" // /* MW 8 */ + 9337 "00000000" // /* MW 7 */ + 9338 "00000000" // /* MW 6 */ + 9339 "10110110" // /* MW 5 */ + 9340 "00000010" // /* MW 4 */ + 9341 "11110000" // /* MW 3 */ + 9342 "00101100" // /* MW 2 */ + 9343 "00000000" // /* MW 1 */ +.label __ll65__Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 8 "superkernels.cpp" 554 19 +.src_ref 8 "superkernels.cpp" 558 19 +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 1 "io_buffer_main.h" 324 51 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 +.aggressive_scheduled_block_id 7 +.noswbrkpt + 9344 "01110110" // LDA p0, [sp, #-68]; MOVS p6, r12; MOVX r14, #2; MOV r15, #1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9345 "01011000" // /* MW 11 */ + 9346 "00000001" // /* MW 10 */ + 9347 "11101000" // /* MW 9 */ + 9348 "01001001" // /* MW 8 */ + 9349 "11100000" // /* MW 7 */ + 9350 "00000000" // /* MW 6 */ + 9351 "00001011" // /* MW 5 */ + 9352 "10001100" // /* MW 4 */ + 9353 "00100110" // /* MW 3 */ + 9354 "10000011" // /* MW 2 */ + 9355 "11110111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 9356 "00011000" // LDA p1, [sp, #-68] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9357 "10011001" // /* MW 3 */ + 9358 "10111100" // /* MW 2 */ + 9359 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 9360 "00011000" // LDA r12, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9361 "10010001" // /* MW 3 */ + 9362 "11100101" // /* MW 2 */ + 9363 "00000111" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 287 11 first +.aggressive_scheduled_block_id 7 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 9364 "00000100" // JL #4176 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4176 delay_slots=5 */ + 9365 "00000001" // /* MW 5 */ + 9366 "00000000" // /* MW 4 */ + 9367 "00101000" // /* MW 3 */ + 9368 "00001000" // /* MW 2 */ + 9369 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 552 2 +.delay_slot +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9370 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9371 "11000000" // /* MW 3 */ + 9372 "01100000" // /* MW 2 */ + 9373 "00011111" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 43 +.src_ref 8 "superkernels.cpp" 555 15 +.src_ref 8 "superkernels.cpp" 558 43 +.src_ref 8 "superkernels.cpp" 559 15 +.src_ref 8 "superkernels.cpp" 562 44 +.src_ref 8 "superkernels.cpp" 563 16 +.src_ref 8 "superkernels.cpp" 567 14 +.delay_slot + 9374 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9375 "00000001" // /* MW 3 */ + 9376 "00011010" // /* MW 2 */ + 9377 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9379 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9381 "00000000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 287 11 +.delay_slot + 9382 "10111010" // NOPA; MOVXM p2, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9383 "00010000" // /* MW 9 */ + 9384 "10000000" // /* MW 8 */ + 9385 "00110010" // /* MW 7 */ + 9386 "11110001" // /* MW 6 */ + 9387 "00000001" // /* MW 5 */ + 9388 "00000000" // /* MW 4 */ + 9389 "11110000" // /* MW 3 */ + 9390 "00101100" // /* MW 2 */ + 9391 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 552 2 +.src_ref 8 "superkernels.cpp" 552 2 +.return_address + 9392 "00111010" // MOVS p0, p7; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9393 "00010001" // /* MW 9 */ + 9394 "00100000" // /* MW 8 */ + 9395 "00110010" // /* MW 7 */ + 9396 "11110001" // /* MW 6 */ + 9397 "00000001" // /* MW 5 */ + 9398 "00000000" // /* MW 4 */ + 9399 "01100000" // /* MW 3 */ + 9400 "10010001" // /* MW 2 */ + 9401 "00010011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 7 + 9402 "01000100" // MOVXM p7, #509136 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9403 "10100000" // /* MW 5 */ + 9404 "11001001" // /* MW 4 */ + 9405 "11001110" // /* MW 3 */ + 9406 "00000111" // /* MW 2 */ + 9407 "00000000" // /* MW 1 */ +.label __ll95__Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 1 "io_buffer_main.h" 324 51 first + 9408 "10011000" // LDA p1, [p6], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9409 "10011110" // /* MW 3 */ + 9410 "01011100" // /* MW 2 */ + 9411 "00000110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 552 2 first +.no_stack_arguments + 9412 "00000100" // JL #4848 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4848 delay_slots=5 */ + 9413 "00000001" // /* MW 5 */ + 9414 "00000000" // /* MW 4 */ + 9415 "01111000" // /* MW 3 */ + 9416 "00001001" // /* MW 2 */ + 9417 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9419 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9421 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9423 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9425 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9426 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 9427 "00011100" // /* MW 13 */ + 9428 "00000000" // /* MW 12 */ + 9429 "00000000" // /* MW 11 */ + 9430 "01010111" // /* MW 10 */ + 9431 "00011010" // /* MW 9 */ + 9432 "01000000" // /* MW 8 */ + 9433 "00000000" // /* MW 7 */ + 9434 "00000000" // /* MW 6 */ + 9435 "10110110" // /* MW 5 */ + 9436 "00000010" // /* MW 4 */ + 9437 "11110000" // /* MW 3 */ + 9438 "00101100" // /* MW 2 */ + 9439 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 7 first +.return_address + 9440 "10011000" // LDA r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9441 "00010110" // /* MW 3 */ + 9442 "00000110" // /* MW 2 */ + 9443 "00000111" // /* MW 1 */ + 9444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9445 "00000000" // /* MW 1 */ + 9446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9447 "00000000" // /* MW 1 */ + 9448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9449 "00000000" // /* MW 1 */ + 9450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9451 "00000000" // /* MW 1 */ + 9452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9453 "00000000" // /* MW 1 */ + 9454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9455 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 19 + 9456 "10011000" // NE r17, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9457 "00001000" // /* MW 3 */ + 9458 "11100011" // /* MW 2 */ + 9459 "00010011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 25 + 9460 "10000100" // JNZ r17, #9664 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9664 delay_slots=5 */ + 9461 "00000001" // /* MW 5 */ + 9462 "01000000" // /* MW 4 */ + 9463 "11100000" // /* MW 3 */ + 9464 "00010010" // /* MW 2 */ + 9465 "10001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9467 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9469 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9471 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9473 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9475 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 29 +.src_ref 8 "superkernels.cpp" 555 15 + 9476 "01000100" // MOVXM p7, #509124 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9477 "10001000" // /* MW 5 */ + 9478 "11001001" // /* MW 4 */ + 9479 "11001110" // /* MW 3 */ + 9480 "00000111" // /* MW 2 */ + 9481 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 29 +.src_ref 8 "superkernels.cpp" 554 67 + 9482 "10111010" // LDA r16, [p7]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9483 "00010000" // /* MW 9 */ + 9484 "00110000" // /* MW 8 */ + 9485 "00110010" // /* MW 7 */ + 9486 "11110001" // /* MW 6 */ + 9487 "00000001" // /* MW 5 */ + 9488 "00000000" // /* MW 4 */ + 9489 "11010000" // /* MW 3 */ + 9490 "11000010" // /* MW 2 */ + 9491 "11100000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 67 + 9492 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9493 "00111010" // /* MW 3 */ + 9494 "00000100" // /* MW 2 */ + 9495 "00000010" // /* MW 1 */ + 9496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9497 "00000000" // /* MW 1 */ + 9498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9499 "00000000" // /* MW 1 */ + 9500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9501 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 43 +.no_stack_arguments + 9502 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */ + 9503 "00000001" // /* MW 5 */ + 9504 "00000000" // /* MW 4 */ + 9505 "11111000" // /* MW 3 */ + 9506 "00010011" // /* MW 2 */ + 9507 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9509 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 29 +.delay_slot + 9510 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9511 "00000111" // /* MW 3 */ + 9512 "00100000" // /* MW 2 */ + 9513 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 29 +.src_ref 8 "superkernels.cpp" 554 43 +.delay_slot + 9514 "01011100" // ST r16, [p7]; LT r27, r16, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9515 "10110101" // /* MW 5 */ + 9516 "01101101" // /* MW 4 */ + 9517 "00111000" // /* MW 3 */ + 9518 "11000010" // /* MW 2 */ + 9519 "11100000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 43 +.src_ref 8 "superkernels.cpp" 554 43 +.delay_slot + 9520 "11100100" // SUB r17, r13, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9521 "01000001" // /* MW 5 */ + 9522 "10111011" // /* MW 4 */ + 9523 "00110111" // /* MW 3 */ + 9524 "01100000" // /* MW 2 */ + 9525 "01101100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 43 +.delay_slot + 9526 "01111010" // NOPA; NOPS; SEL.EQZ r0, r16, r17, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9527 "00010010" // /* MW 9 */ + 9528 "00000001" // /* MW 8 */ + 9529 "00000100" // /* MW 7 */ + 9530 "00000000" // /* MW 6 */ + 9531 "01011011" // /* MW 5 */ + 9532 "00000001" // /* MW 4 */ + 9533 "11110000" // /* MW 3 */ + 9534 "00101100" // /* MW 2 */ + 9535 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 43 +.src_ref 8 "superkernels.cpp" 554 43 +.src_ref 1 "io_buffer_main.h" 324 51 +.return_address + 9536 "10111010" // LDA p2, [sp, #-36]; SUB r16, r13, r3; MOV r27, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9537 "01111000" // /* MW 9 */ + 9538 "11010000" // /* MW 8 */ + 9539 "01101011" // /* MW 7 */ + 9540 "10001111" // /* MW 6 */ + 9541 "00000001" // /* MW 5 */ + 9542 "00011011" // /* MW 4 */ + 9543 "00100000" // /* MW 3 */ + 9544 "10100011" // /* MW 2 */ + 9545 "11111011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 43 + 9546 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9547 "00000010" // /* MW 3 */ + 9548 "11100001" // /* MW 2 */ + 9549 "00010000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 6 +.src_ref 8 "superkernels.cpp" 554 78 + 9550 "10000100" // JNZ r16, #9632 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9632 delay_slots=5 */ + 9551 "00000001" // /* MW 5 */ + 9552 "01000000" // /* MW 4 */ + 9553 "11010000" // /* MW 3 */ + 9554 "00010010" // /* MW 2 */ + 9555 "10000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 +.delay_slot + 9556 "00011000" // MOVX r15, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9557 "00000101" // /* MW 3 */ + 9558 "00011110" // /* MW 2 */ + 9559 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9561 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9563 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9565 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9567 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 555 15 first +.src_ref 1 "io_buffer_main.h" 324 51 first + 9568 "00001100" // LDA r16, [p2, #20]; ST r13, [p7] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9569 "01100011" // /* MW 5 */ + 9570 "00001011" // /* MW 4 */ + 9571 "11011110" // /* MW 3 */ + 9572 "11000010" // /* MW 2 */ + 9573 "01001010" // /* MW 1 */ + 9574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9575 "00000000" // /* MW 1 */ + 9576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9577 "00000000" // /* MW 1 */ + 9578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9579 "00000000" // /* MW 1 */ + 9580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9581 "00000000" // /* MW 1 */ + 9582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9583 "00000000" // /* MW 1 */ + 9584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9585 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 9586 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9587 "11111000" // /* MW 3 */ + 9588 "00010000" // /* MW 2 */ + 9589 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 7 +.src_ref 1 "io_buffer_main.h" 327 40 first + 9590 "10111010" // LDA r16, [p6, #-8]; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9591 "00010000" // /* MW 9 */ + 9592 "01101000" // /* MW 8 */ + 9593 "10110010" // /* MW 7 */ + 9594 "11110011" // /* MW 6 */ + 9595 "00000001" // /* MW 5 */ + 9596 "00000000" // /* MW 4 */ + 9597 "11010000" // /* MW 3 */ + 9598 "11000010" // /* MW 2 */ + 9599 "11011100" // /* MW 1 */ + 9600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9601 "00000000" // /* MW 1 */ + 9602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9603 "00000000" // /* MW 1 */ + 9604 "10000100" // J #9648 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9648 delay_slots=5 */ + 9605 "00000000" // /* MW 5 */ + 9606 "00000000" // /* MW 4 */ + 9607 "11011000" // /* MW 3 */ + 9608 "00010010" // /* MW 2 */ + 9609 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9611 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9613 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9615 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 +.delay_slot + 9616 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9617 "00000001" // /* MW 3 */ + 9618 "11100001" // /* MW 2 */ + 9619 "00010011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 +.delay_slot + 9620 "00110110" // NOPA; NOPB; ST r16, [p6, #-8]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9621 "11000001" // /* MW 11 */ + 9622 "00001000" // /* MW 10 */ + 9623 "01110011" // /* MW 9 */ + 9624 "00000011" // /* MW 8 */ + 9625 "00000000" // /* MW 7 */ + 9626 "00000000" // /* MW 6 */ + 9627 "00100000" // /* MW 5 */ + 9628 "00000000" // /* MW 4 */ + 9629 "11110000" // /* MW 3 */ + 9630 "00101100" // /* MW 2 */ + 9631 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2096 +.src_ref 8 "superkernels.cpp" 558 7 + 9632 "11100001" // NOPA; NOPB; NOPS; MOVXM p7, #509136; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9633 "00000000" // /* MW 15 */ + 9634 "00000000" // /* MW 14 */ + 9635 "00010000" // /* MW 13 */ + 9636 "01101000" // /* MW 12 */ + 9637 "10110010" // /* MW 11 */ + 9638 "11110011" // /* MW 10 */ + 9639 "00000001" // /* MW 9 */ + 9640 "00000000" // /* MW 8 */ + 9641 "01011011" // /* MW 7 */ + 9642 "00000001" // /* MW 6 */ + 9643 "00100000" // /* MW 5 */ + 9644 "00000000" // /* MW 4 */ + 9645 "11110000" // /* MW 3 */ + 9646 "00101100" // /* MW 2 */ + 9647 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2112 +.src_ref 8 "superkernels.cpp" 558 7 first + 9648 "11100001" // LDA r16, [p7]; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9649 "00000000" // /* MW 15 */ + 9650 "00000000" // /* MW 14 */ + 9651 "01111000" // /* MW 13 */ + 9652 "10100101" // /* MW 12 */ + 9653 "00000001" // /* MW 11 */ + 9654 "00000000" // /* MW 10 */ + 9655 "00000000" // /* MW 9 */ + 9656 "00000000" // /* MW 8 */ + 9657 "01011011" // /* MW 7 */ + 9658 "00000001" // /* MW 6 */ + 9659 "00100000" // /* MW 5 */ + 9660 "00000000" // /* MW 4 */ + 9661 "11010000" // /* MW 3 */ + 9662 "11000010" // /* MW 2 */ + 9663 "11100000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2128 +.src_ref 8 "superkernels.cpp" 558 43 + 9664 "00011000" // MOVX r17, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9665 "00000001" // /* MW 3 */ + 9666 "00100010" // /* MW 2 */ + 9667 "00010000" // /* MW 1 */ + 9668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9669 "00000000" // /* MW 1 */ + 9670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9671 "00000000" // /* MW 1 */ + 9672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9673 "00000000" // /* MW 1 */ + 9674 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9675 "00000000" // /* MW 1 */ + 9676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9677 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 19 + 9678 "10011000" // NE r16, r14, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9679 "00001000" // /* MW 3 */ + 9680 "10100001" // /* MW 2 */ + 9681 "00010011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 25 + 9682 "10000100" // JNZ r16, #9872 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9872 delay_slots=5 */ + 9683 "00000001" // /* MW 5 */ + 9684 "01000000" // /* MW 4 */ + 9685 "01001000" // /* MW 3 */ + 9686 "00010011" // /* MW 2 */ + 9687 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 29 +.src_ref 8 "superkernels.cpp" 559 15 +.delay_slot + 9688 "01000100" // MOVXM p7, #509152 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9689 "11000000" // /* MW 5 */ + 9690 "11001001" // /* MW 4 */ + 9691 "11001110" // /* MW 3 */ + 9692 "00000111" // /* MW 2 */ + 9693 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 67 +.delay_slot + 9694 "01000100" // MOVXM p2, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9695 "11000000" // /* MW 5 */ + 9696 "11001000" // /* MW 4 */ + 9697 "11000100" // /* MW 3 */ + 9698 "00000111" // /* MW 2 */ + 9699 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9701 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9703 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9704 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9705 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 29 + 9706 "10011000" // LDA r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9707 "00010110" // /* MW 3 */ + 9708 "00000110" // /* MW 2 */ + 9709 "00000111" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 67 + 9710 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9711 "00111010" // /* MW 3 */ + 9712 "00000100" // /* MW 2 */ + 9713 "00000010" // /* MW 1 */ + 9714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9715 "00000000" // /* MW 1 */ + 9716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9717 "00000000" // /* MW 1 */ + 9718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9719 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 43 +.no_stack_arguments + 9720 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */ + 9721 "00000001" // /* MW 5 */ + 9722 "00000000" // /* MW 4 */ + 9723 "11111000" // /* MW 3 */ + 9724 "00010011" // /* MW 2 */ + 9725 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9727 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 29 +.delay_slot + 9728 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9729 "00000111" // /* MW 3 */ + 9730 "00100000" // /* MW 2 */ + 9731 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 29 +.src_ref 8 "superkernels.cpp" 558 43 +.delay_slot + 9732 "01011100" // ST r16, [p7]; LT r27, r16, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9733 "00110101" // /* MW 5 */ + 9734 "01101110" // /* MW 4 */ + 9735 "00111000" // /* MW 3 */ + 9736 "11000010" // /* MW 2 */ + 9737 "11100000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 43 +.src_ref 8 "superkernels.cpp" 558 43 +.delay_slot + 9738 "11100100" // SUB r17, r17, r16; MOV r14, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9739 "01000001" // /* MW 5 */ + 9740 "00111011" // /* MW 4 */ + 9741 "00110111" // /* MW 3 */ + 9742 "01100000" // /* MW 2 */ + 9743 "10001100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 43 +.delay_slot + 9744 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r0, r16, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9745 "00000000" // /* MW 15 */ + 9746 "00000000" // /* MW 14 */ + 9747 "01111000" // /* MW 13 */ + 9748 "10100101" // /* MW 12 */ + 9749 "00000001" // /* MW 11 */ + 9750 "10010000" // /* MW 10 */ + 9751 "00001000" // /* MW 9 */ + 9752 "00100000" // /* MW 8 */ + 9753 "01011011" // /* MW 7 */ + 9754 "00000001" // /* MW 6 */ + 9755 "00100000" // /* MW 5 */ + 9756 "00000000" // /* MW 4 */ + 9757 "11110000" // /* MW 3 */ + 9758 "00101100" // /* MW 2 */ + 9759 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 43 +.src_ref 8 "superkernels.cpp" 558 43 +.src_ref 1 "io_buffer_main.h" 324 51 +.src_ref 1 "io_buffer_main.h" 324 51 +.return_address + 9760 "10111010" // LDA p1, [sp, #-36]; SUB r16, r13, r3; MOV r27, r14 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9761 "01111000" // /* MW 9 */ + 9762 "10010000" // /* MW 8 */ + 9763 "01101011" // /* MW 7 */ + 9764 "10001111" // /* MW 6 */ + 9765 "00000001" // /* MW 5 */ + 9766 "00011011" // /* MW 4 */ + 9767 "00100000" // /* MW 3 */ + 9768 "10010011" // /* MW 2 */ + 9769 "11111011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 43 + 9770 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9771 "00000010" // /* MW 3 */ + 9772 "11100001" // /* MW 2 */ + 9773 "00010000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 6 +.src_ref 8 "superkernels.cpp" 558 78 + 9774 "10000100" // JNZ r16, #9840 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9840 delay_slots=5 */ + 9775 "00000001" // /* MW 5 */ + 9776 "01000000" // /* MW 4 */ + 9777 "00111000" // /* MW 3 */ + 9778 "00010011" // /* MW 2 */ + 9779 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 7 +.delay_slot + 9780 "01000100" // MOVXM p2, #509136 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9781 "10100000" // /* MW 5 */ + 9782 "11001001" // /* MW 4 */ + 9783 "11000100" // /* MW 3 */ + 9784 "00000111" // /* MW 2 */ + 9785 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9787 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9789 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9791 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9793 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 559 15 first +.src_ref 1 "io_buffer_main.h" 324 51 first + 9794 "00001100" // LDA r16, [p1, #20]; ST r13, [p7] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9795 "01100011" // /* MW 5 */ + 9796 "00001011" // /* MW 4 */ + 9797 "11011110" // /* MW 3 */ + 9798 "11000010" // /* MW 2 */ + 9799 "00101010" // /* MW 1 */ + 9800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9801 "00000000" // /* MW 1 */ + 9802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9803 "00000000" // /* MW 1 */ + 9804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9805 "00000000" // /* MW 1 */ + 9806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9807 "00000000" // /* MW 1 */ + 9808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9809 "00000000" // /* MW 1 */ + 9810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9811 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 9812 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9813 "11111000" // /* MW 3 */ + 9814 "00010000" // /* MW 2 */ + 9815 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 40 first + 9816 "10011000" // LDA r16, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9817 "00010110" // /* MW 3 */ + 9818 "11100110" // /* MW 2 */ + 9819 "00000110" // /* MW 1 */ + 9820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9821 "00000000" // /* MW 1 */ + 9822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9823 "00000000" // /* MW 1 */ + 9824 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9825 "00000000" // /* MW 1 */ + 9826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9827 "00000000" // /* MW 1 */ + 9828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9829 "00000000" // /* MW 1 */ + 9830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9831 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 + 9832 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9833 "00000001" // /* MW 3 */ + 9834 "11100001" // /* MW 2 */ + 9835 "00010011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 9836 "10011000" // ST r16, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9837 "00010001" // /* MW 3 */ + 9838 "11100110" // /* MW 2 */ + 9839 "00001110" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2304 + 9840 "10000100" // J #9888 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9888 delay_slots=5 */ + 9841 "00000000" // /* MW 5 */ + 9842 "00000000" // /* MW 4 */ + 9843 "01010000" // /* MW 3 */ + 9844 "00010011" // /* MW 2 */ + 9845 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 +.delay_slot + 9846 "11111000" // MOV p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9847 "11000000" // /* MW 3 */ + 9848 "01100010" // /* MW 2 */ + 9849 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9851 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9853 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9854 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9855 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9856 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9857 "00000000" // /* MW 15 */ + 9858 "00000000" // /* MW 14 */ + 9859 "01111000" // /* MW 13 */ + 9860 "10100101" // /* MW 12 */ + 9861 "00000001" // /* MW 11 */ + 9862 "00000000" // /* MW 10 */ + 9863 "00000000" // /* MW 9 */ + 9864 "00000000" // /* MW 8 */ + 9865 "01011011" // /* MW 7 */ + 9866 "00000001" // /* MW 6 */ + 9867 "00100000" // /* MW 5 */ + 9868 "00000000" // /* MW 4 */ + 9869 "11110000" // /* MW 3 */ + 9870 "00101100" // /* MW 2 */ + 9871 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2336 +.src_ref 8 "superkernels.cpp" 562 7 +.src_ref 1 "io_buffer_main.h" 324 51 + 9872 "11100001" // LDA p7, [sp, #-36]; NOPB; NOPS; MOVXM p2, #509136; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9873 "00000000" // /* MW 15 */ + 9874 "00000000" // /* MW 14 */ + 9875 "00010000" // /* MW 13 */ + 9876 "01101000" // /* MW 12 */ + 9877 "00110010" // /* MW 11 */ + 9878 "11110001" // /* MW 10 */ + 9879 "00000001" // /* MW 9 */ + 9880 "00000000" // /* MW 8 */ + 9881 "01011011" // /* MW 7 */ + 9882 "00000001" // /* MW 6 */ + 9883 "00100000" // /* MW 5 */ + 9884 "00000000" // /* MW 4 */ + 9885 "00100000" // /* MW 3 */ + 9886 "11110011" // /* MW 2 */ + 9887 "11111011" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2352 +.src_ref 8 "superkernels.cpp" 562 7 first +.src_ref 8 "superkernels.cpp" 562 19 + 9888 "00101100" // LDA r16, [p2]; MOVX r17, #4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9889 "00100010" // /* MW 5 */ + 9890 "01000100" // /* MW 4 */ + 9891 "11010000" // /* MW 3 */ + 9892 "11000010" // /* MW 2 */ + 9893 "01000000" // /* MW 1 */ + 9894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9895 "00000000" // /* MW 1 */ + 9896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9897 "00000000" // /* MW 1 */ + 9898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9899 "00000000" // /* MW 1 */ + 9900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9901 "00000000" // /* MW 1 */ + 9902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9903 "00000000" // /* MW 1 */ + 9904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9905 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 19 + 9906 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9907 "00001000" // /* MW 3 */ + 9908 "01100001" // /* MW 2 */ + 9909 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 25 + 9910 "10000100" // JNZ r16, #10064 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10064 delay_slots=5 */ + 9911 "00000001" // /* MW 5 */ + 9912 "01000000" // /* MW 4 */ + 9913 "10101000" // /* MW 3 */ + 9914 "00010011" // /* MW 2 */ + 9915 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 29 +.delay_slot + 9916 "01000100" // MOVXM p2, #509156 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9917 "11001000" // /* MW 5 */ + 9918 "11001001" // /* MW 4 */ + 9919 "11000100" // /* MW 3 */ + 9920 "00000111" // /* MW 2 */ + 9921 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9923 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9925 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9927 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9929 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 29 +.src_ref 8 "superkernels.cpp" 562 68 + 9930 "10111010" // LDA r16, [p2]; MOVXM p1, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9931 "00010000" // /* MW 9 */ + 9932 "00110000" // /* MW 8 */ + 9933 "10110010" // /* MW 7 */ + 9934 "11110000" // /* MW 6 */ + 9935 "00000001" // /* MW 5 */ + 9936 "00000000" // /* MW 4 */ + 9937 "11010000" // /* MW 3 */ + 9938 "11000010" // /* MW 2 */ + 9939 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 68 + 9940 "10011000" // LDA.u16 r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9941 "00111010" // /* MW 3 */ + 9942 "00000100" // /* MW 2 */ + 9943 "00000001" // /* MW 1 */ + 9944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9945 "00000000" // /* MW 1 */ + 9946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9947 "00000000" // /* MW 1 */ + 9948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9949 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 44 +.no_stack_arguments + 9950 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */ + 9951 "00000001" // /* MW 5 */ + 9952 "00000000" // /* MW 4 */ + 9953 "11111000" // /* MW 3 */ + 9954 "00010011" // /* MW 2 */ + 9955 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9957 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 29 +.delay_slot + 9958 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9959 "00000111" // /* MW 3 */ + 9960 "00100000" // /* MW 2 */ + 9961 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 29 +.src_ref 8 "superkernels.cpp" 562 44 +.delay_slot + 9962 "01011100" // ST r16, [p2]; LT r27, r16, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9963 "10110101" // /* MW 5 */ + 9964 "01101101" // /* MW 4 */ + 9965 "00111000" // /* MW 3 */ + 9966 "11000010" // /* MW 2 */ + 9967 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 44 +.src_ref 8 "superkernels.cpp" 562 44 +.delay_slot + 9968 "11100100" // SUB r17, r13, r16; MOV r14, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9969 "01000001" // /* MW 5 */ + 9970 "00111011" // /* MW 4 */ + 9971 "00110111" // /* MW 3 */ + 9972 "01100000" // /* MW 2 */ + 9973 "01101100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 44 +.delay_slot + 9974 "01111010" // NOPA; NOPS; SEL.EQZ r0, r16, r17, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9975 "00010010" // /* MW 9 */ + 9976 "00000001" // /* MW 8 */ + 9977 "00000100" // /* MW 7 */ + 9978 "00000000" // /* MW 6 */ + 9979 "01011011" // /* MW 5 */ + 9980 "00000001" // /* MW 4 */ + 9981 "11110000" // /* MW 3 */ + 9982 "00101100" // /* MW 2 */ + 9983 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 44 +.src_ref 8 "superkernels.cpp" 562 44 +.return_address + 9984 "11100100" // SUB r16, r13, r3; MOV r27, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9985 "01000001" // /* MW 5 */ + 9986 "10101110" // /* MW 4 */ + 9987 "00111101" // /* MW 3 */ + 9988 "00000110" // /* MW 2 */ + 9989 "01101100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 44 + 9990 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9991 "00000010" // /* MW 3 */ + 9992 "11100001" // /* MW 2 */ + 9993 "00010000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 6 +.src_ref 8 "superkernels.cpp" 562 79 + 9994 "10000100" // JNZ r16, #10064 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10064 delay_slots=5 */ + 9995 "00000001" // /* MW 5 */ + 9996 "01000000" // /* MW 4 */ + 9997 "10101000" // /* MW 3 */ + 9998 "00010011" // /* MW 2 */ + 9999 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 563 16 +.delay_slot + 10000 "01000100" // MOVXM p2, #509156 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10001 "11001000" // /* MW 5 */ + 10002 "11001001" // /* MW 4 */ + 10003 "11000100" // /* MW 3 */ + 10004 "00000111" // /* MW 2 */ + 10005 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10007 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10009 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10011 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10013 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 563 16 first +.src_ref 1 "io_buffer_main.h" 324 51 first + 10014 "00001100" // LDA r16, [p7, #20]; ST r13, [p2] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10015 "01100011" // /* MW 5 */ + 10016 "00001011" // /* MW 4 */ + 10017 "11010100" // /* MW 3 */ + 10018 "11000010" // /* MW 2 */ + 10019 "11101010" // /* MW 1 */ + 10020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10021 "00000000" // /* MW 1 */ + 10022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10023 "00000000" // /* MW 1 */ + 10024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10025 "00000000" // /* MW 1 */ + 10026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10027 "00000000" // /* MW 1 */ + 10028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10029 "00000000" // /* MW 1 */ + 10030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10031 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 10032 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10033 "11111000" // /* MW 3 */ + 10034 "00010000" // /* MW 2 */ + 10035 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 40 first + 10036 "10011000" // LDA r16, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10037 "00010110" // /* MW 3 */ + 10038 "11100110" // /* MW 2 */ + 10039 "00000110" // /* MW 1 */ + 10040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10041 "00000000" // /* MW 1 */ + 10042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10043 "00000000" // /* MW 1 */ + 10044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10045 "00000000" // /* MW 1 */ + 10046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10047 "00000000" // /* MW 1 */ + 10048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10049 "00000000" // /* MW 1 */ + 10050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10051 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 + 10052 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10053 "00000001" // /* MW 3 */ + 10054 "11100001" // /* MW 2 */ + 10055 "00010011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 10056 "00000010" // ST r16, [p6, #-8]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10057 "01110000" // /* MW 7 */ + 10058 "10100101" // /* MW 6 */ + 10059 "00000001" // /* MW 5 */ + 10060 "00000000" // /* MW 4 */ + 10061 "00110000" // /* MW 3 */ + 10062 "11000010" // /* MW 2 */ + 10063 "11011100" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2528 +.src_ref 8 "superkernels.cpp" 566 6 +.src_ref 8 "superkernels.cpp" 567 14 + 10064 "01000100" // MOVXM p6, #509120 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10065 "10000000" // /* MW 5 */ + 10066 "11001001" // /* MW 4 */ + 10067 "11001100" // /* MW 3 */ + 10068 "00000111" // /* MW 2 */ + 10069 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 566 6 first +.src_ref 8 "superkernels.cpp" 566 19 + 10070 "10111010" // LDA r16, [p6]; MOVXM p2, #509160 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10071 "00010000" // /* MW 9 */ + 10072 "01110100" // /* MW 8 */ + 10073 "00110010" // /* MW 7 */ + 10074 "11110001" // /* MW 6 */ + 10075 "00000001" // /* MW 5 */ + 10076 "00000000" // /* MW 4 */ + 10077 "11010000" // /* MW 3 */ + 10078 "11000010" // /* MW 2 */ + 10079 "11000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 566 19 + 10080 "10011000" // LDA r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10081 "00110110" // /* MW 3 */ + 10082 "00000110" // /* MW 2 */ + 10083 "00000010" // /* MW 1 */ + 10084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10085 "00000000" // /* MW 1 */ + 10086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10087 "00000000" // /* MW 1 */ + 10088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10089 "00000000" // /* MW 1 */ + 10090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10091 "00000000" // /* MW 1 */ + 10092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10093 "00000000" // /* MW 1 */ + 10094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10095 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 566 16 + 10096 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10097 "00001000" // /* MW 3 */ + 10098 "01100001" // /* MW 2 */ + 10099 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 566 6 + 10100 "10000100" // JNZ r16, #10128 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10128 delay_slots=5 */ + 10101 "00000001" // /* MW 5 */ + 10102 "01000000" // /* MW 4 */ + 10103 "11001000" // /* MW 3 */ + 10104 "00010011" // /* MW 2 */ + 10105 "10000000" // /* MW 1 */ +.delay_slot + 10106 "00011000" // LDA p7, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10107 "10011001" // /* MW 3 */ + 10108 "11101111" // /* MW 2 */ + 10109 "00000111" // /* MW 1 */ +.delay_slot + 10110 "00011000" // LDA r15, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10111 "11110001" // /* MW 3 */ + 10112 "11110001" // /* MW 2 */ + 10113 "00000111" // /* MW 1 */ +.delay_slot + 10114 "00011000" // LDA r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10115 "11010001" // /* MW 3 */ + 10116 "11110101" // /* MW 2 */ + 10117 "00000111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10119 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10121 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 567 14 first + 10122 "00001100" // NOPA; ST r13, [p6] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10123 "01100011" // /* MW 5 */ + 10124 "00001011" // /* MW 4 */ + 10125 "11111100" // /* MW 3 */ + 10126 "00101100" // /* MW 2 */ + 10127 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2592 +.src_ref 8 "superkernels.cpp" 569 +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 10128 "11010100" // LDA r11, [sp, #-8]; MOV lr, r11 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10129 "01000001" // /* MW 5 */ + 10130 "11101011" // /* MW 4 */ + 10131 "00101110" // /* MW 3 */ + 10132 "00101110" // /* MW 2 */ + 10133 "11111111" // /* MW 1 */ +.aggressive_scheduled_block_id 8 +.noswbrkpt + 10134 "00011000" // LDA r12, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10135 "10010001" // /* MW 3 */ + 10136 "11111101" // /* MW 2 */ + 10137 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 10138 "00011000" // LDA r13, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10139 "10110001" // /* MW 3 */ + 10140 "11101001" // /* MW 2 */ + 10141 "00000111" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 569 first +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 10142 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10143 "00000000" // /* MW 3 */ + 10144 "00101000" // /* MW 2 */ + 10145 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10146 "11111000" // MOV p6, r12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10147 "00100000" // /* MW 3 */ + 10148 "01100110" // /* MW 2 */ + 10149 "00011110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 569 +.delay_slot + 10150 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10151 "00000001" // /* MW 5 */ + 10152 "00000000" // /* MW 4 */ + 10153 "00000000" // /* MW 3 */ + 10154 "11110000" // /* MW 2 */ + 10155 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10157 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10159 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10160 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10161 "00000000" // /* MW 15 */ + 10162 "00000000" // /* MW 14 */ + 10163 "01111000" // /* MW 13 */ + 10164 "10100101" // /* MW 12 */ + 10165 "00000001" // /* MW 11 */ + 10166 "00000000" // /* MW 10 */ + 10167 "00000000" // /* MW 9 */ + 10168 "00000000" // /* MW 8 */ + 10169 "01011011" // /* MW 7 */ + 10170 "00000001" // /* MW 6 */ + 10171 "00100000" // /* MW 5 */ + 10172 "00000000" // /* MW 4 */ + 10173 "11110000" // /* MW 3 */ + 10174 "00101100" // /* MW 2 */ + 10175 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2640 +.src_ref 8 "superkernels.cpp" 554 43 +.src_ref 8 "superkernels.cpp" 555 15 +.src_ref 8 "superkernels.cpp" 558 43 +.src_ref 8 "superkernels.cpp" 559 15 +.src_ref 8 "superkernels.cpp" 562 44 +.src_ref 8 "superkernels.cpp" 563 16 +.src_ref 8 "superkernels.cpp" 567 14 +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 1 "io_buffer_main.h" 324 51 + 10176 "01110110" // MOVA r13, #0; MOVS p6, r12; J #9408 /* MW 12 */ /* control_operation: words=12 jump unconditional cycles_taken=1 direct absolute target_address=9408 delay_slots=5 */ + 10177 "00100000" // /* MW 11 */ + 10178 "00000000" // /* MW 10 */ + 10179 "00000000" // /* MW 9 */ + 10180 "10011000" // /* MW 8 */ + 10181 "00000100" // /* MW 7 */ + 10182 "00000000" // /* MW 6 */ + 10183 "00001011" // /* MW 5 */ + 10184 "10001100" // /* MW 4 */ + 10185 "00000110" // /* MW 3 */ + 10186 "00001101" // /* MW 2 */ + 10187 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 19 +.src_ref 8 "superkernels.cpp" 558 19 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 +.delay_slot + 10188 "01100100" // MOVX r15, #1; MOV r14, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10189 "00001001" // /* MW 5 */ + 10190 "00100000" // /* MW 4 */ + 10191 "10100111" // /* MW 3 */ + 10192 "11000000" // /* MW 2 */ + 10193 "00000011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 552 2 +.delay_slot + 10194 "01000100" // MOVXM p2, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10195 "10000000" // /* MW 5 */ + 10196 "11001000" // /* MW 4 */ + 10197 "11000100" // /* MW 3 */ + 10198 "00000111" // /* MW 2 */ + 10199 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 7 +.delay_slot + 10200 "01000100" // MOVXM p7, #509136 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10201 "10100000" // /* MW 5 */ + 10202 "11001001" // /* MW 4 */ + 10203 "11001110" // /* MW 3 */ + 10204 "00000111" // /* MW 2 */ + 10205 "00000000" // /* MW 1 */ +.delay_slot + 10206 "00011000" // LDA r12, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10207 "10010001" // /* MW 3 */ + 10208 "11100101" // /* MW 2 */ + 10209 "00000111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 10211 "00000000" // /* MW 1 */ +.label _ZN12me_primitive10udiv_dstepEjjRjS0_ +.function udiv_dstep _ZN12me_primitive10udiv_dstepEjjRjS0_ +.src_ref 9 "me_div.c" 108 19 +.src_ref 9 "me_div.c" 108 19 +.src_ref 9 "me_div.c" 115 4 first +.function_start + 10224 "11100100" // MOVX r3, #0; MOV r31, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10225 "01000001" // /* MW 5 */ + 10226 "10100000" // /* MW 4 */ + 10227 "00101111" // /* MW 3 */ + 10228 "11000000" // /* MW 2 */ + 10229 "00000000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10230 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10231 "00011100" // /* MW 3 */ + 10232 "11000110" // /* MW 2 */ + 10233 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10234 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10235 "00011100" // /* MW 3 */ + 10236 "11000110" // /* MW 2 */ + 10237 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10238 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10239 "00011100" // /* MW 3 */ + 10240 "11000110" // /* MW 2 */ + 10241 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10242 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10243 "00011100" // /* MW 3 */ + 10244 "11000110" // /* MW 2 */ + 10245 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10246 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10247 "00011100" // /* MW 3 */ + 10248 "11000110" // /* MW 2 */ + 10249 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10250 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10251 "00011100" // /* MW 3 */ + 10252 "11000110" // /* MW 2 */ + 10253 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10254 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10255 "00011100" // /* MW 3 */ + 10256 "11000110" // /* MW 2 */ + 10257 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10258 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10259 "00011100" // /* MW 3 */ + 10260 "11000110" // /* MW 2 */ + 10261 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10262 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10263 "00011100" // /* MW 3 */ + 10264 "11000110" // /* MW 2 */ + 10265 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10266 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10267 "00011100" // /* MW 3 */ + 10268 "11000110" // /* MW 2 */ + 10269 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10270 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10271 "00011100" // /* MW 3 */ + 10272 "11000110" // /* MW 2 */ + 10273 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10274 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10275 "00011100" // /* MW 3 */ + 10276 "11000110" // /* MW 2 */ + 10277 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10278 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10279 "00011100" // /* MW 3 */ + 10280 "11000110" // /* MW 2 */ + 10281 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10282 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10283 "00011100" // /* MW 3 */ + 10284 "11000110" // /* MW 2 */ + 10285 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10286 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10287 "00011100" // /* MW 3 */ + 10288 "11000110" // /* MW 2 */ + 10289 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10290 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10291 "00011100" // /* MW 3 */ + 10292 "11000110" // /* MW 2 */ + 10293 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10294 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10295 "00011100" // /* MW 3 */ + 10296 "11000110" // /* MW 2 */ + 10297 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10298 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10299 "00011100" // /* MW 3 */ + 10300 "11000110" // /* MW 2 */ + 10301 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10302 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10303 "00011100" // /* MW 3 */ + 10304 "11000110" // /* MW 2 */ + 10305 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10306 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10307 "00011100" // /* MW 3 */ + 10308 "11000110" // /* MW 2 */ + 10309 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10310 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10311 "00011100" // /* MW 3 */ + 10312 "11000110" // /* MW 2 */ + 10313 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10314 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10315 "00011100" // /* MW 3 */ + 10316 "11000110" // /* MW 2 */ + 10317 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10318 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10319 "00011100" // /* MW 3 */ + 10320 "11000110" // /* MW 2 */ + 10321 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10322 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10323 "00011100" // /* MW 3 */ + 10324 "11000110" // /* MW 2 */ + 10325 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10326 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10327 "00011100" // /* MW 3 */ + 10328 "11000110" // /* MW 2 */ + 10329 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10330 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10331 "00011100" // /* MW 3 */ + 10332 "11000110" // /* MW 2 */ + 10333 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10334 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10335 "00011100" // /* MW 3 */ + 10336 "11000110" // /* MW 2 */ + 10337 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10338 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10339 "00011100" // /* MW 3 */ + 10340 "11000110" // /* MW 2 */ + 10341 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 119 first + 10342 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10343 "00000000" // /* MW 3 */ + 10344 "00101000" // /* MW 2 */ + 10345 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 first +.delay_slot + 10346 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10347 "00011100" // /* MW 3 */ + 10348 "11000110" // /* MW 2 */ + 10349 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 +.delay_slot + 10350 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10351 "00011100" // /* MW 3 */ + 10352 "11000110" // /* MW 2 */ + 10353 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 +.delay_slot + 10354 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10355 "00011100" // /* MW 3 */ + 10356 "11000110" // /* MW 2 */ + 10357 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 +.delay_slot + 10358 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10359 "00011100" // /* MW 3 */ + 10360 "11000110" // /* MW 2 */ + 10361 "00010000" // /* MW 1 */ +.delay_slot + 10362 "11111000" // MOV r2, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10363 "10100000" // /* MW 3 */ + 10364 "10011111" // /* MW 2 */ +.label _ZN12me_primitive10udiv_dstepEjjRjS0___end + 10365 "00011000" // /* MW 1 */ +.label _ZL19propagateFloat32NaNjj +.function propagateFloat32NaN _ZL19propagateFloat32NaNjj +.src_ref 10 "softfloat-specialize" 78 24 +.src_ref 10 "softfloat-specialize" 137 22 +.src_ref 10 "softfloat-specialize" 139 22 +.src_ref 10 "softfloat-specialize" 143 4 first +.function_start + 10368 "10111010" // MOVA r3, #-22; MOVXM r18, #-16777216 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10369 "00010000" // /* MW 9 */ + 10370 "00000000" // /* MW 8 */ + 10371 "01001000" // /* MW 7 */ + 10372 "00000010" // /* MW 6 */ + 10373 "11000000" // /* MW 5 */ + 10374 "00111111" // /* MW 4 */ + 10375 "00000000" // /* MW 3 */ + 10376 "01000011" // /* MW 2 */ + 10377 "11111101" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 137 22 +.src_ref 10 "softfloat-specialize" 139 22 +.src_ref 10 "softfloat-specialize" 140 6 +.src_ref 10 "softfloat-specialize" 141 6 + 10378 "10111010" // MOVA r7, #511; MOVXM r0, #4194304 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10379 "00010000" // /* MW 9 */ + 10380 "00000000" // /* MW 8 */ + 10381 "00001000" // /* MW 7 */ + 10382 "00000000" // /* MW 6 */ + 10383 "00010000" // /* MW 5 */ + 10384 "00000000" // /* MW 4 */ + 10385 "00000000" // /* MW 3 */ + 10386 "11100111" // /* MW 2 */ + 10387 "00111111" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 78 38 +.src_ref 10 "softfloat-specialize" 137 22 +.src_ref 10 "softfloat-specialize" 139 22 +.src_ref 10 "softfloat-specialize" 140 6 first + 10388 "10111010" // MOVA r16, #1; OR r4, r1, r0; MOV r5, #510 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10389 "01011000" // /* MW 9 */ + 10390 "11111110" // /* MW 8 */ + 10391 "10101001" // /* MW 7 */ + 10392 "00101100" // /* MW 6 */ + 10393 "01000000" // /* MW 5 */ + 10394 "00000010" // /* MW 4 */ + 10395 "00000000" // /* MW 3 */ + 10396 "00110000" // /* MW 2 */ + 10397 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 141 6 first + 10398 "10011000" // OR r0, r2, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10399 "00000101" // /* MW 3 */ + 10400 "10000000" // /* MW 2 */ + 10401 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 137 22 first + 10402 "10011000" // LSHL r6, r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10403 "00111101" // /* MW 3 */ + 10404 "01001100" // /* MW 2 */ + 10405 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 139 22 first + 10406 "10011000" // LSHL r3, r2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10407 "00111101" // /* MW 3 */ + 10408 "10000110" // /* MW 2 */ + 10409 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 139 22 + 10410 "10011000" // AND r3, r7, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10411 "00110100" // /* MW 3 */ + 10412 "11000110" // /* MW 2 */ + 10413 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 137 22 first + 10414 "10011000" // AND r6, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10415 "01100100" // /* MW 3 */ + 10416 "11001100" // /* MW 2 */ + 10417 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 137 22 + 10418 "10011000" // EQ r6, r5, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10419 "01100111" // /* MW 3 */ + 10420 "01001100" // /* MW 2 */ + 10421 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 78 38 first + 10422 "10011000" // LSHL r17, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10423 "00001101" // /* MW 3 */ + 10424 "10100011" // /* MW 2 */ + 10425 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 78 24 + 10426 "10011000" // LTU r27, r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10427 "00011100" // /* MW 3 */ + 10428 "10110111" // /* MW 2 */ + 10429 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 143 62 first + 10430 "00011000" // SEL.EQZ r17, r4, r0, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10431 "00000010" // /* MW 3 */ + 10432 "00100010" // /* MW 2 */ + 10433 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 137 22 +.src_ref 10 "softfloat-specialize" 139 22 + 10434 "01000100" // MOVXM r16, #4194303 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10435 "11111110" // /* MW 5 */ + 10436 "00111111" // /* MW 4 */ + 10437 "11111000" // /* MW 3 */ + 10438 "00111111" // /* MW 2 */ + 10439 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 139 22 first + 10440 "10011000" // AND r2, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10441 "00000100" // /* MW 3 */ + 10442 "10000101" // /* MW 2 */ + 10443 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 139 22 + 10444 "00011000" // NEZ r2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10445 "11110000" // /* MW 3 */ + 10446 "10000100" // /* MW 2 */ + 10447 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 137 22 first + 10448 "10011000" // AND r1, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10449 "00000100" // /* MW 3 */ + 10450 "01000011" // /* MW 2 */ + 10451 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 137 22 + 10452 "00011000" // NEZ r1, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10453 "11110000" // /* MW 3 */ + 10454 "01000010" // /* MW 2 */ + 10455 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 143 4 first + 10456 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10457 "00000000" // /* MW 3 */ + 10458 "00101000" // /* MW 2 */ + 10459 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 137 22 first +.delay_slot + 10460 "10011000" // AND r27, r1, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10461 "01100100" // /* MW 3 */ + 10462 "01110110" // /* MW 2 */ + 10463 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 139 22 first +.delay_slot + 10464 "10011000" // EQ r1, r3, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10465 "01010111" // /* MW 3 */ + 10466 "11000010" // /* MW 2 */ + 10467 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 143 49 first +.delay_slot + 10468 "00011000" // SEL.EQZ r3, r17, r4, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10469 "01000010" // /* MW 3 */ + 10470 "01000110" // /* MW 2 */ + 10471 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 139 22 first +.delay_slot + 10472 "10011000" // AND r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10473 "00100100" // /* MW 3 */ + 10474 "01110110" // /* MW 2 */ + 10475 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 143 27 first +.delay_slot + 10476 "00011000" // SEL.EQZ r0, r3, r0, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10477 "00000010" // /* MW 3 */ + 10478 "11000000" // /* MW 2 */ +.label _ZL19propagateFloat32NaNjj__end + 10479 "00010000" // /* MW 1 */ +.label _ZL19roundAndPackFloat32iij +.function roundAndPackFloat32 _ZL19roundAndPackFloat32iij +.src_ref 10 "softfloat.c" 154 first +.src_ref 10 "softfloat.c" 161 19 +.src_ref 10 "softfloat.c" 203 30 +.function_start + 10480 "10111010" // MOVA r0, #64; MOVXM p0, #509172 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10481 "00010000" // /* MW 9 */ + 10482 "01111010" // /* MW 8 */ + 10483 "00110010" // /* MW 7 */ + 10484 "11110000" // /* MW 6 */ + 10485 "00000001" // /* MW 5 */ + 10486 "00000000" // /* MW 4 */ + 10487 "00000000" // /* MW 3 */ + 10488 "00000000" // /* MW 2 */ + 10489 "00001000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 161 19 first +.src_ref 10 "softfloat.c" 171 16 +.src_ref 10 "softfloat.c" 174 16 +.src_ref 10 "softfloat.c" 178 21 +.src_ref 10 "softfloat.c" 194 29 + 10490 "00101100" // LDA r4, [p0]; MOVX r6, #127 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10491 "11111010" // /* MW 5 */ + 10492 "10011001" // /* MW 4 */ + 10493 "11010000" // /* MW 3 */ + 10494 "10010010" // /* MW 2 */ + 10495 "00000000" // /* MW 1 */ +.swstall __RAW__R_1948 + 10496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10497 "00000000" // /* MW 1 */ +.swstall __RAW__R_1948 + 10498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10499 "00000000" // /* MW 1 */ +.swstall __RAW__R_1948 + 10500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10501 "00000000" // /* MW 1 */ +.swstall __RAW__R_1948 + 10502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10503 "00000000" // /* MW 1 */ +.swstall __RAW__R_1948 + 10504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10505 "00000000" // /* MW 1 */ +.swstall __RAW__R_1948 + 10506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10507 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 162 36 first +.src_ref 10 "softfloat.c" 164 4 first + 10508 "10000100" // JZ r4, #10576 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10576 delay_slots=5 */ + 10509 "00000001" // /* MW 5 */ + 10510 "00000000" // /* MW 4 */ + 10511 "10101000" // /* MW 3 */ + 10512 "00010100" // /* MW 2 */ + 10513 "00100000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 182 40 +.src_ref 10 "softfloat.c" 185 68 +.src_ref 10 "softfloat.c" 202 18 +.delay_slot + 10514 "00011000" // MOVX r5, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10515 "00000001" // /* MW 3 */ + 10516 "01001010" // /* MW 2 */ + 10517 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10519 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10521 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10523 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10525 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 165 8 +.src_ref 10 "softfloat.c" 171 16 +.src_ref 10 "softfloat.c" 171 34 +.src_ref 10 "softfloat.c" 174 16 +.src_ref 10 "softfloat.c" 174 34 + 10526 "10111010" // MOVA r16, #3; MOVX r7, #2; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10527 "01011000" // /* MW 9 */ + 10528 "00000000" // /* MW 8 */ + 10529 "00001000" // /* MW 7 */ + 10530 "01001011" // /* MW 6 */ + 10531 "01110000" // /* MW 5 */ + 10532 "00000000" // /* MW 4 */ + 10533 "00000000" // /* MW 3 */ + 10534 "01110000" // /* MW 2 */ + 10535 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 165 26 +.src_ref 10 "softfloat.c" 171 34 first + 10536 "01100100" // EQ r27, r7, r4; MOV r5, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10537 "00000101" // /* MW 5 */ + 10538 "10100000" // /* MW 4 */ + 10539 "11110010" // /* MW 3 */ + 10540 "11001000" // /* MW 2 */ + 10541 "00111110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 171 16 + 10542 "00011000" // SEL.EQZ r7, r6, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10543 "10000010" // /* MW 3 */ + 10544 "10001111" // /* MW 2 */ + 10545 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 174 34 first + 10546 "10011000" // EQ r27, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10547 "00000111" // /* MW 3 */ + 10548 "00110111" // /* MW 2 */ + 10549 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 170 12 +.src_ref 10 "softfloat.c" 174 16 + 10550 "11100100" // SEL.EQZ r16, r6, r24, r27; MOV r27, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10551 "01000001" // /* MW 5 */ + 10552 "10100001" // /* MW 4 */ + 10553 "01001101" // /* MW 3 */ + 10554 "00110000" // /* MW 2 */ + 10555 "00110100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 170 12 first +.src_ref 10 "softfloat.c" 170 12 first + 10556 "00011000" // SEL.EQZ r7, r16, r7, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10557 "01110010" // /* MW 3 */ + 10558 "00001110" // /* MW 2 */ + 10559 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 165 26 first + 10560 "10011000" // EQ r27, r5, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10561 "01000111" // /* MW 3 */ + 10562 "01110110" // /* MW 2 */ + 10563 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 165 8 + 10564 "00110110" // NOPA; NOPB; NOPS; SEL.EQZ r5, r7, r24, r27 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10565 "10000001" // /* MW 11 */ + 10566 "10101101" // /* MW 10 */ + 10567 "00000000" // /* MW 9 */ + 10568 "00010000" // /* MW 8 */ + 10569 "01011100" // /* MW 7 */ + 10570 "00001110" // /* MW 6 */ + 10571 "00100000" // /* MW 5 */ + 10572 "00000000" // /* MW 4 */ + 10573 "11110000" // /* MW 3 */ + 10574 "00101100" // /* MW 2 */ + 10575 "00000000" // /* MW 1 */ +.label TGT_F_ZL19roundAndPackFloat32iij_96 +.src_ref 10 "softfloat.c" 179 14 +.src_ref 10 "softfloat.c" 179 17 first +.src_ref 10 "softfloat.c" 180 23 +.src_ref 10 "softfloat.c" 181 28 + 10576 "01100100" // EXTEND.u16 r18, r2; MOV r16, #253 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10577 "11110101" // /* MW 5 */ + 10578 "00100011" // /* MW 4 */ + 10579 "00001000" // /* MW 3 */ + 10580 "10010110" // /* MW 2 */ + 10581 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 179 14 + 10582 "10011000" // LT r18, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10583 "00001010" // /* MW 3 */ + 10584 "10100101" // /* MW 2 */ + 10585 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 179 4 + 10586 "10000100" // JNZ r18, #10768 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10768 delay_slots=5 */ + 10587 "00000001" // /* MW 5 */ + 10588 "01000000" // /* MW 4 */ + 10589 "00001000" // /* MW 3 */ + 10590 "00010101" // /* MW 2 */ + 10591 "10010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 178 21 first +.delay_slot + 10592 "10011000" // AND r17, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10593 "01100100" // /* MW 3 */ + 10594 "11100010" // /* MW 2 */ + 10595 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 48 +.src_ref 10 "softfloat.c" 128 31 +.delay_slot + 10596 "00011000" // MOVX r7, #31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10597 "01111101" // /* MW 3 */ + 10598 "00001110" // /* MW 2 */ + 10599 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 31 first +.delay_slot + 10600 "10011000" // LSHL r1, r1, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10601 "01111101" // /* MW 3 */ + 10602 "01000010" // /* MW 2 */ + 10603 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10605 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10607 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 181 28 first +.src_ref 10 "softfloat.c" 182 40 first +.src_ref 10 "softfloat.c" 182 59 + 10608 "10111010" // MOVA r18, #0; EQ r19, r2, r16; ADD.NC r20, r3, r5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10609 "10101000" // /* MW 9 */ + 10610 "11001010" // /* MW 8 */ + 10611 "10001000" // /* MW 7 */ + 10612 "00111110" // /* MW 6 */ + 10613 "00111000" // /* MW 5 */ + 10614 "00000101" // /* MW 4 */ + 10615 "00000000" // /* MW 3 */ + 10616 "00010010" // /* MW 2 */ + 10617 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 182 59 + 10618 "10011000" // LT r20, r20, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10619 "00101010" // /* MW 3 */ + 10620 "00101001" // /* MW 2 */ + 10621 "00010101" // /* MW 1 */ +.src_ref 10 "softfloat.c" 180 23 first + 10622 "10011000" // LT r16, r16, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10623 "00101010" // /* MW 3 */ + 10624 "00100000" // /* MW 2 */ + 10625 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 182 18 first + 10626 "10011000" // AND r19, r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10627 "01000100" // /* MW 3 */ + 10628 "11100111" // /* MW 2 */ + 10629 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 181 13 first + 10630 "10011000" // OR r19, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10631 "00000101" // /* MW 3 */ + 10632 "11100111" // /* MW 2 */ + 10633 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 180 8 first + 10634 "10000100" // JNZ r19, #10848 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10848 delay_slots=5 */ + 10635 "00000001" // /* MW 5 */ + 10636 "01000000" // /* MW 4 */ + 10637 "00110000" // /* MW 3 */ + 10638 "00010101" // /* MW 2 */ + 10639 "10011000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 38 +.src_ref 10 "softfloat.c" 187 18 +.src_ref 10 "softfloat.c" 192 39 +.src_ref 10 "softfloat.c" 204 4 +.src_ref 10 "softfloat.c" 204 14 +.delay_slot + 10640 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10641 "00000001" // /* MW 3 */ + 10642 "00100000" // /* MW 2 */ + 10643 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10645 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10647 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10649 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10651 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 187 18 first + 10652 "10011000" // GE r19, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10653 "00001001" // /* MW 3 */ + 10654 "10100111" // /* MW 2 */ + 10655 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 187 8 + 10656 "10000100" // JNZ r19, #10784 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10784 delay_slots=5 */ + 10657 "00000001" // /* MW 5 */ + 10658 "01000000" // /* MW 4 */ + 10659 "00010000" // /* MW 3 */ + 10660 "00010101" // /* MW 2 */ + 10661 "10011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10663 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10665 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10667 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10669 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10671 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 192 39 first + 10672 "10011000" // SUB r2, r16, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10673 "00100001" // /* MW 3 */ + 10674 "00000100" // /* MW 2 */ + 10675 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 46 4 first +.src_ref 10 "softfloat-macros" 46 15 first + 10676 "10000100" // JZ r2, #10736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10736 delay_slots=5 */ + 10677 "00000001" // /* MW 5 */ + 10678 "00000000" // /* MW 4 */ + 10679 "11111000" // /* MW 3 */ + 10680 "00010100" // /* MW 2 */ + 10681 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10683 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10685 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10687 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10689 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10691 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 +.src_ref 10 "softfloat-macros" 50 38 first + 10692 "01100100" // SUB r17, r16, r2; MOV r19, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10693 "10000001" // /* MW 5 */ + 10694 "10100000" // /* MW 4 */ + 10695 "00111001" // /* MW 3 */ + 10696 "01000100" // /* MW 2 */ + 10697 "10000100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 48 + 10698 "10011000" // AND r7, r7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10699 "00010100" // /* MW 3 */ + 10700 "11001111" // /* MW 2 */ + 10701 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 32 + 10702 "10011000" // LSHL r7, r3, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10703 "01111101" // /* MW 3 */ + 10704 "11001110" // /* MW 2 */ + 10705 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 15 + 10706 "10011000" // LSHL r17, r3, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10707 "00011101" // /* MW 3 */ + 10708 "11100011" // /* MW 2 */ + 10709 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 first + 10710 "10011000" // LT r27, r2, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10711 "00111010" // /* MW 3 */ + 10712 "10110111" // /* MW 2 */ + 10713 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 57 first + 10714 "00011000" // NEZ r7, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10715 "11110000" // /* MW 3 */ + 10716 "11001110" // /* MW 2 */ + 10717 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 53 16 first + 10718 "00011000" // NEZ r3, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10719 "11110000" // /* MW 3 */ + 10720 "11000110" // /* MW 2 */ + 10721 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 25 first + 10722 "10011000" // OR r2, r7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10723 "00010101" // /* MW 3 */ + 10724 "11000101" // /* MW 2 */ + 10725 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 9 first + 10726 "01111010" // NOPA; NOPS; SEL.EQZ r3, r3, r2, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10727 "00100010" // /* MW 9 */ + 10728 "11000110" // /* MW 8 */ + 10729 "00000000" // /* MW 7 */ + 10730 "00000000" // /* MW 6 */ + 10731 "01011011" // /* MW 5 */ + 10732 "00000001" // /* MW 4 */ + 10733 "11110000" // /* MW 3 */ + 10734 "00101100" // /* MW 2 */ + 10735 "00000000" // /* MW 1 */ +.label TGT_F_ZL19roundAndPackFloat32iij_256 + 10736 "10000100" // J #10784 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10784 delay_slots=5 */ + 10737 "00000000" // /* MW 5 */ + 10738 "00000000" // /* MW 4 */ + 10739 "00010000" // /* MW 3 */ + 10740 "00010101" // /* MW 2 */ + 10741 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 194 29 first +.delay_slot + 10742 "10011000" // AND r17, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10743 "01100100" // /* MW 3 */ + 10744 "11100010" // /* MW 2 */ + 10745 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 204 4 +.src_ref 10 "softfloat.c" 204 14 +.delay_slot + 10746 "00011000" // MOVX r2, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10747 "00000001" // /* MW 3 */ + 10748 "00000100" // /* MW 2 */ + 10749 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10751 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10753 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10754 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 10755 "00011100" // /* MW 13 */ + 10756 "00000000" // /* MW 12 */ + 10757 "00000000" // /* MW 11 */ + 10758 "01010111" // /* MW 10 */ + 10759 "00011010" // /* MW 9 */ + 10760 "01000000" // /* MW 8 */ + 10761 "00000000" // /* MW 7 */ + 10762 "00000000" // /* MW 6 */ + 10763 "10110110" // /* MW 5 */ + 10764 "00000010" // /* MW 4 */ + 10765 "11110000" // /* MW 3 */ + 10766 "00101100" // /* MW 2 */ + 10767 "00000000" // /* MW 1 */ +.label TGT_F_ZL19roundAndPackFloat32iij_288 +.src_ref 10 "softfloat.c" 204 4 +.src_ref 10 "softfloat.c" 204 14 + 10768 "11100001" // NOPA; NOPB; NOPS; MOVX r16, #0; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10769 "00000000" // /* MW 15 */ + 10770 "00000000" // /* MW 14 */ + 10771 "01111000" // /* MW 13 */ + 10772 "10100101" // /* MW 12 */ + 10773 "00000001" // /* MW 11 */ + 10774 "00001000" // /* MW 10 */ + 10775 "00000000" // /* MW 9 */ + 10776 "00000001" // /* MW 8 */ + 10777 "01011011" // /* MW 7 */ + 10778 "00000001" // /* MW 6 */ + 10779 "00100000" // /* MW 5 */ + 10780 "00000000" // /* MW 4 */ + 10781 "11110000" // /* MW 3 */ + 10782 "00101100" // /* MW 2 */ + 10783 "00000000" // /* MW 1 */ +.label TGT_F_ZL19roundAndPackFloat32iij_304 +.src_ref 10 "softfloat.c" 202 18 first +.src_ref 10 "softfloat.c" 202 36 +.src_ref 10 "softfloat.c" 203 30 first + 10784 "10111010" // MOVA r0, #-7; XOR r3, r17, r0; ADD.NC r5, r3, r5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10785 "10101000" // /* MW 9 */ + 10786 "11001010" // /* MW 8 */ + 10787 "10101000" // /* MW 7 */ + 10788 "00110100" // /* MW 6 */ + 10789 "00110000" // /* MW 5 */ + 10790 "00100010" // /* MW 4 */ + 10791 "00000000" // /* MW 3 */ + 10792 "00100000" // /* MW 2 */ + 10793 "11111111" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 59 +.src_ref 10 "softfloat.c" 203 12 +.src_ref 10 "softfloat.c" 203 46 + 10794 "10111010" // MOVA r3, #23; OR r6, r3, r4; MOV r4, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10795 "01011000" // /* MW 9 */ + 10796 "11111111" // /* MW 8 */ + 10797 "10001111" // /* MW 7 */ + 10798 "00101100" // /* MW 6 */ + 10799 "01100010" // /* MW 5 */ + 10800 "00000110" // /* MW 4 */ + 10801 "00000000" // /* MW 3 */ + 10802 "11100011" // /* MW 2 */ + 10803 "00000010" // /* MW 1 */ +.src_ref 10 "softfloat.c" 203 46 + 10804 "00011000" // EQZ r6, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10805 "11010000" // /* MW 3 */ + 10806 "10001100" // /* MW 2 */ + 10807 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 202 36 + 10808 "10011000" // LSHL r0, r5, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10809 "00001101" // /* MW 3 */ + 10810 "01000000" // /* MW 2 */ + 10811 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 203 12 + 10812 "10011000" // XOR r4, r6, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10813 "01000110" // /* MW 3 */ + 10814 "10001000" // /* MW 2 */ + 10815 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 205 4 first + 10816 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10817 "00000000" // /* MW 3 */ + 10818 "00101000" // /* MW 2 */ + 10819 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 203 9 first +.delay_slot + 10820 "10011000" // AND r27, r4, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10821 "00000100" // /* MW 3 */ + 10822 "00110110" // /* MW 2 */ + 10823 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 204 4 first +.src_ref 10 "softfloat.c" 204 14 first +.delay_slot + 10824 "00011000" // SEL.EQZ r2, r16, r2, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10825 "00100010" // /* MW 3 */ + 10826 "00000100" // /* MW 2 */ + 10827 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 59 first +.delay_slot + 10828 "10011000" // LSHL r2, r2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10829 "00111101" // /* MW 3 */ + 10830 "10000100" // /* MW 2 */ + 10831 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 38 +.delay_slot + 10832 "10011000" // ADD r2, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10833 "00100000" // /* MW 3 */ + 10834 "01000100" // /* MW 2 */ + 10835 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 66 +.delay_slot + 10836 "00110110" // NOPA; NOPB; NOPS; ADD r0, r27, r2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10837 "10000001" // /* MW 11 */ + 10838 "10101101" // /* MW 10 */ + 10839 "00000000" // /* MW 9 */ + 10840 "00000100" // /* MW 8 */ + 10841 "00000001" // /* MW 7 */ + 10842 "00110110" // /* MW 6 */ + 10843 "00100000" // /* MW 5 */ + 10844 "00000000" // /* MW 4 */ + 10845 "11110000" // /* MW 3 */ + 10846 "00101100" // /* MW 2 */ + 10847 "00000000" // /* MW 1 */ +.label TGT_F_ZL19roundAndPackFloat32iij_368 +.src_ref 10 "softfloat.c" 185 12 first + 10848 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10849 "00000000" // /* MW 3 */ + 10850 "00101000" // /* MW 2 */ + 10851 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 38 +.delay_slot + 10852 "01000100" // MOVXM r2, #2139095040 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10853 "00000000" // /* MW 5 */ + 10854 "00100000" // /* MW 4 */ + 10855 "00000001" // /* MW 3 */ + 10856 "10000000" // /* MW 2 */ + 10857 "01111111" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 38 first +.delay_slot + 10858 "10011000" // ADD r3, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10859 "00100000" // /* MW 3 */ + 10860 "01000110" // /* MW 2 */ + 10861 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 185 68 first +.delay_slot + 10862 "00011000" // EQZ r2, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10863 "11010000" // /* MW 3 */ + 10864 "01000100" // /* MW 2 */ + 10865 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 185 49 +.delay_slot + 10866 "10011000" // SUB r0, r3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10867 "00100001" // /* MW 3 */ + 10868 "11000000" // /* MW 2 */ + 10869 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL19roundAndPackFloat32iij__end + 10871 "00000000" // /* MW 1 */ +.label _ZL28normalizeRoundAndPackFloat32iij +.function normalizeRoundAndPackFloat32 _ZL28normalizeRoundAndPackFloat32iij +.src_ref 10 "softfloat.c" 218 first +.src_ref 10 "softfloat.c" 224 11 first +.tail_call +.function_start + 10880 "10000100" // J #10480 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10480 delay_slots=5 */ + 10881 "00000000" // /* MW 5 */ + 10882 "00000000" // /* MW 4 */ + 10883 "01111000" // /* MW 3 */ + 10884 "00010100" // /* MW 2 */ + 10885 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 552 53 first +.delay_slot + 10886 "00011000" // CLZ r16, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10887 "00110000" // /* MW 3 */ + 10888 "11100000" // /* MW 2 */ + 10889 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 223 45 first +.delay_slot + 10890 "00011000" // ADD r16, r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10891 "11111111" // /* MW 3 */ + 10892 "00100001" // /* MW 2 */ + 10893 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 224 44 first +.delay_slot + 10894 "10011000" // SUB r2, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10895 "00000001" // /* MW 3 */ + 10896 "10000101" // /* MW 2 */ + 10897 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 224 62 +.delay_slot + 10898 "10011000" // LSHL r3, r3, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10899 "00001101" // /* MW 3 */ + 10900 "11000111" // /* MW 2 */ + 10901 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL28normalizeRoundAndPackFloat32iij__end + 10903 "00000000" // /* MW 1 */ +.label int32_to_float32 +.function int32_to_float32 int32_to_float32 +.src_ref 10 "softfloat.c" 477 first +.src_ref 10 "softfloat.c" 481 4 +.src_ref 10 "softfloat.c" 481 11 first +.function_start + 10912 "10000100" // JZ r1, #10992 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10992 delay_slots=5 */ + 10913 "00000001" // /* MW 5 */ + 10914 "00000000" // /* MW 4 */ + 10915 "01111000" // /* MW 3 */ + 10916 "00010101" // /* MW 2 */ + 10917 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10919 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10921 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10923 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10925 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10927 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 482 11 + 10928 "01000100" // MOVXM r16, #-2147483648 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10929 "00000000" // /* MW 5 */ + 10930 "00100000" // /* MW 4 */ + 10931 "00001000" // /* MW 3 */ + 10932 "00000000" // /* MW 2 */ + 10933 "10000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 482 11 first + 10934 "10011000" // EQ r16, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10935 "00000111" // /* MW 3 */ + 10936 "01100001" // /* MW 2 */ + 10937 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 482 4 + 10938 "10000100" // JNZ r16, #11008 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11008 delay_slots=5 */ + 10939 "00000001" // /* MW 5 */ + 10940 "01000000" // /* MW 4 */ + 10941 "10000000" // /* MW 3 */ + 10942 "00010101" // /* MW 2 */ + 10943 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10945 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10947 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10949 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10951 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10953 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 484 11 +.src_ref 10 "softfloat.c" 484 11 first +.tail_call + 10954 "10111010" // MOVA r2, #156; J #10880 /* MW 10 */ /* control_operation: words=10 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10880 delay_slots=5 */ + 10955 "00100000" // /* MW 9 */ + 10956 "00000000" // /* MW 8 */ + 10957 "00000000" // /* MW 7 */ + 10958 "01010000" // /* MW 6 */ + 10959 "00000101" // /* MW 5 */ + 10960 "00000000" // /* MW 4 */ + 10961 "00000000" // /* MW 3 */ + 10962 "10000010" // /* MW 2 */ + 10963 "00010011" // /* MW 1 */ +.src_ref 10 "softfloat.c" 484 60 +.src_ref 10 "softfloat.c" 484 62 +.delay_slot + 10964 "00011000" // ABS r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10965 "00010000" // /* MW 3 */ + 10966 "01000111" // /* MW 2 */ + 10967 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 483 16 +.delay_slot + 10968 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10969 "00000001" // /* MW 3 */ + 10970 "00100000" // /* MW 2 */ + 10971 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 483 16 first +.delay_slot + 10972 "10011000" // LT r1, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10973 "00001010" // /* MW 3 */ + 10974 "01000011" // /* MW 2 */ + 10975 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10977 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10978 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 10979 "00011100" // /* MW 13 */ + 10980 "00000000" // /* MW 12 */ + 10981 "00000000" // /* MW 11 */ + 10982 "01010111" // /* MW 10 */ + 10983 "00011010" // /* MW 9 */ + 10984 "01000000" // /* MW 8 */ + 10985 "00000000" // /* MW 7 */ + 10986 "00000000" // /* MW 6 */ + 10987 "10110110" // /* MW 5 */ + 10988 "00000010" // /* MW 4 */ + 10989 "11110000" // /* MW 3 */ + 10990 "00101100" // /* MW 2 */ + 10991 "00000000" // /* MW 1 */ +.label TGT_Fint32_to_float32_80 +.src_ref 10 "softfloat.c" 481 18 first +.return_address + 10992 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10993 "00000000" // /* MW 3 */ + 10994 "00101000" // /* MW 2 */ + 10995 "00010000" // /* MW 1 */ +.delay_slot + 10996 "00011000" // MOVX r0, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10997 "00000001" // /* MW 3 */ + 10998 "00000000" // /* MW 2 */ + 10999 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11001 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11003 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11005 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11007 "00000000" // /* MW 1 */ +.label TGT_Fint32_to_float32_96 +.src_ref 10 "softfloat.c" 482 37 first + 11008 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11009 "00000000" // /* MW 3 */ + 11010 "00101000" // /* MW 2 */ + 11011 "00010000" // /* MW 1 */ +.delay_slot + 11012 "01000100" // MOVXM r0, #-822083584 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11013 "00000000" // /* MW 5 */ + 11014 "00100000" // /* MW 4 */ + 11015 "00000000" // /* MW 3 */ + 11016 "00000000" // /* MW 2 */ + 11017 "11001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11018 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11019 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11021 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11023 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label int32_to_float32__end + 11025 "00000000" // /* MW 1 */ +.label _ZL14addFloat32Sigsjji +.function addFloat32Sigs _ZL14addFloat32Sigsjji +.src_ref 10 "softfloat.c" 70 13 +.src_ref 10 "softfloat.c" 81 14 +.src_ref 10 "softfloat.c" 734 first +.function_start + 11040 "10111010" // MOVA r18, #-23; MOVXM r16, #8388607 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11041 "10010000" // /* MW 9 */ + 11042 "11111111" // /* MW 8 */ + 11043 "00001111" // /* MW 7 */ + 11044 "11111110" // /* MW 6 */ + 11045 "00011111" // /* MW 5 */ + 11046 "00000000" // /* MW 4 */ + 11047 "00000000" // /* MW 3 */ + 11048 "00110010" // /* MW 2 */ + 11049 "11111101" // /* MW 1 */ +.src_ref 10 "softfloat.c" 81 14 first + 11050 "10011000" // LSHL r17, r1, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11051 "00101101" // /* MW 3 */ + 11052 "01100011" // /* MW 2 */ + 11053 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 81 14 + 11054 "10011000" // LSHL r4, r2, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11055 "00101101" // /* MW 3 */ + 11056 "10001001" // /* MW 2 */ + 11057 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 81 21 + 11058 "00011000" // EXTEND.u8 r27, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11059 "10010000" // /* MW 3 */ + 11060 "01110110" // /* MW 2 */ + 11061 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 81 21 + 11062 "00011000" // EXTEND.u8 r25, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11063 "10010000" // /* MW 3 */ + 11064 "00110010" // /* MW 2 */ + 11065 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 38 +.src_ref 10 "softfloat.c" 744 19 first +.src_ref 10 "softfloat.c" 747 11 +.src_ref 10 "softfloat.c" 761 22 +.src_ref 10 "softfloat.c" 772 35 +.src_ref 10 "softfloat.c" 788 24 + 11066 "01100100" // SUB r17, r27, r25; MOV r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11067 "00000001" // /* MW 5 */ + 11068 "00100000" // /* MW 4 */ + 11069 "00111100" // /* MW 3 */ + 11070 "01110010" // /* MW 2 */ + 11071 "11011100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 747 11 first + 11072 "10011000" // LT r4, r24, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11073 "00011010" // /* MW 3 */ + 11074 "00001001" // /* MW 2 */ + 11075 "00010110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 747 4 + 11076 "10000100" // JNZ r4, #11248 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11248 delay_slots=5 */ + 11077 "00000001" // /* MW 5 */ + 11078 "01000000" // /* MW 4 */ + 11079 "11111000" // /* MW 3 */ + 11080 "00010101" // /* MW 2 */ + 11081 "00100000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 70 13 first +.delay_slot + 11082 "10011000" // AND r19, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11083 "00000100" // /* MW 3 */ + 11084 "01100111" // /* MW 2 */ + 11085 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 70 13 +.src_ref 10 "softfloat.c" 745 9 +.src_ref 10 "softfloat.c" 746 9 +.delay_slot + 11086 "01100100" // AND r16, r2, r16; MOV r0, #6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11087 "00011001" // /* MW 5 */ + 11088 "00100000" // /* MW 4 */ + 11089 "10010000" // /* MW 3 */ + 11090 "00100000" // /* MW 2 */ + 11091 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 745 9 first +.delay_slot + 11092 "10011000" // LSHL r19, r19, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11093 "00001101" // /* MW 3 */ + 11094 "11100110" // /* MW 2 */ + 11095 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 746 9 first +.src_ref 10 "softfloat.c" 748 18 +.src_ref 10 "softfloat.c" 762 18 +.delay_slot + 11096 "01100100" // LSHL r16, r16, r0; MOV r20, #255 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11097 "11111101" // /* MW 5 */ + 11098 "00100011" // /* MW 4 */ + 11099 "10111010" // /* MW 3 */ + 11100 "00000001" // /* MW 2 */ + 11101 "10000100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 48 +.src_ref 10 "softfloat.c" 128 31 +.src_ref 10 "softfloat.c" 748 18 first +.delay_slot + 11102 "01100100" // EQ r0, r27, r20; MOV r18, #31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11103 "01111101" // /* MW 5 */ + 11104 "00100000" // /* MW 4 */ + 11105 "11111001" // /* MW 3 */ + 11106 "00101000" // /* MW 2 */ + 11107 "11011000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 761 22 first + 11108 "10011000" // GE r5, r17, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11109 "10001001" // /* MW 3 */ + 11110 "01001011" // /* MW 2 */ + 11111 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 761 9 + 11112 "10000100" // JNZ r5, #11440 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11440 delay_slots=5 */ + 11113 "00000001" // /* MW 5 */ + 11114 "01000000" // /* MW 4 */ + 11115 "01011000" // /* MW 3 */ + 11116 "00010110" // /* MW 2 */ + 11117 "00101000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 31 first +.delay_slot + 11118 "10011000" // LSHL r4, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11119 "00101101" // /* MW 3 */ + 11120 "11001001" // /* MW 2 */ + 11121 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11123 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11125 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11127 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11129 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 762 18 first + 11130 "10011000" // EQ r20, r25, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11131 "01000111" // /* MW 3 */ + 11132 "01101001" // /* MW 2 */ + 11133 "00010110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 762 8 + 11134 "10000100" // JNZ r20, #11392 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11392 delay_slots=5 */ + 11135 "00000001" // /* MW 5 */ + 11136 "01000000" // /* MW 4 */ + 11137 "01000000" // /* MW 3 */ + 11138 "00010110" // /* MW 2 */ + 11139 "10100000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11141 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11143 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11145 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11147 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11149 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 793 11 + 11150 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11151 "10100000" // /* MW 3 */ + 11152 "01010001" // /* MW 2 */ + 11153 "00011000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 787 4 + 11154 "11111000" // MOV r2, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11155 "10100000" // /* MW 3 */ + 11156 "10011100" // /* MW 2 */ + 11157 "00011000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 767 12 first + 11158 "00011000" // ADD r0, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11159 "00000111" // /* MW 3 */ + 11160 "01000000" // /* MW 2 */ + 11161 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 766 8 first + 11162 "00011000" // SEL.EQZ r17, r0, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11163 "00010010" // /* MW 3 */ + 11164 "00100011" // /* MW 2 */ + 11165 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 772 35 first + 11166 "10011000" // SUB r17, r24, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11167 "00010001" // /* MW 3 */ + 11168 "00100011" // /* MW 2 */ + 11169 "00010110" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 46 4 first +.src_ref 10 "softfloat-macros" 46 15 first + 11170 "10000100" // JZ r17, #11344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11344 delay_slots=5 */ + 11171 "00000001" // /* MW 5 */ + 11172 "00000000" // /* MW 4 */ + 11173 "00101000" // /* MW 3 */ + 11174 "00010110" // /* MW 2 */ + 11175 "10001000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 770 17 +.src_ref 10 "softfloat.c" 785 9 +.delay_slot + 11176 "01000100" // MOVXM r20, #536870912 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11177 "00000000" // /* MW 5 */ + 11178 "00100000" // /* MW 4 */ + 11179 "00001010" // /* MW 3 */ + 11180 "00000000" // /* MW 2 */ + 11181 "00100000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 770 17 first +.delay_slot + 11182 "10011000" // OR r3, r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11183 "01000101" // /* MW 3 */ + 11184 "11000111" // /* MW 2 */ + 11185 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 766 8 first +.delay_slot + 11186 "00011000" // SEL.EQZ r19, r19, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11187 "00110010" // /* MW 3 */ + 11188 "11100110" // /* MW 2 */ + 11189 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11191 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11193 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 +.src_ref 10 "softfloat-macros" 50 38 first + 11194 "01100100" // SUB r3, r24, r17; MOV r0, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11195 "10000001" // /* MW 5 */ + 11196 "00100000" // /* MW 4 */ + 11197 "00110000" // /* MW 3 */ + 11198 "11100010" // /* MW 2 */ + 11199 "11000000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 48 + 11200 "10011000" // AND r18, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11201 "00100100" // /* MW 3 */ + 11202 "11100101" // /* MW 2 */ + 11203 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 32 + 11204 "10011000" // LSHL r18, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11205 "00101101" // /* MW 3 */ + 11206 "11100101" // /* MW 2 */ + 11207 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 first + 11208 "10011000" // LT r27, r17, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11209 "00001010" // /* MW 3 */ + 11210 "01110110" // /* MW 2 */ + 11211 "00010100" // /* MW 1 */ + 11212 "10000100" // J #11344 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11344 delay_slots=5 */ + 11213 "00000000" // /* MW 5 */ + 11214 "00000000" // /* MW 4 */ + 11215 "00101000" // /* MW 3 */ + 11216 "00010110" // /* MW 2 */ + 11217 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 15 first +.delay_slot + 11218 "10011000" // LSHL r3, r19, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11219 "00111101" // /* MW 3 */ + 11220 "11000110" // /* MW 2 */ + 11221 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 57 +.delay_slot + 11222 "00011000" // NEZ r18, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11223 "11110000" // /* MW 3 */ + 11224 "10100100" // /* MW 2 */ + 11225 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 53 16 first +.delay_slot + 11226 "00011000" // NEZ r17, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11227 "11110000" // /* MW 3 */ + 11228 "11100010" // /* MW 2 */ + 11229 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 25 first +.delay_slot + 11230 "10011000" // OR r18, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11231 "00100101" // /* MW 3 */ + 11232 "11100101" // /* MW 2 */ + 11233 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 9 first +.delay_slot + 11234 "01111110" // NOPA; NOPB; NOPS; SEL.EQZ r19, r17, r18, r27; NOPM /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 11235 "01100000" // /* MW 13 */ + 11236 "00101011" // /* MW 12 */ + 11237 "00000000" // /* MW 11 */ + 11238 "10101111" // /* MW 10 */ + 11239 "00110100" // /* MW 9 */ + 11240 "00000000" // /* MW 8 */ + 11241 "00100010" // /* MW 7 */ + 11242 "01100111" // /* MW 6 */ + 11243 "00100100" // /* MW 5 */ + 11244 "00000000" // /* MW 4 */ + 11245 "11110000" // /* MW 3 */ + 11246 "00101100" // /* MW 2 */ + 11247 "00000000" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_208 +.src_ref 10 "softfloat.c" 748 8 first + 11248 "10000100" // JNZ r0, #11504 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11504 delay_slots=5 */ + 11249 "00000001" // /* MW 5 */ + 11250 "01000000" // /* MW 4 */ + 11251 "01111000" // /* MW 3 */ + 11252 "00010110" // /* MW 2 */ + 11253 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 756 17 +.src_ref 10 "softfloat.c" 785 9 +.delay_slot + 11254 "01000100" // MOVXM r20, #536870912 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11255 "00000000" // /* MW 5 */ + 11256 "00100000" // /* MW 4 */ + 11257 "00001010" // /* MW 3 */ + 11258 "00000000" // /* MW 2 */ + 11259 "00100000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11261 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11263 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11265 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11267 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 793 11 + 11268 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11269 "10100000" // /* MW 3 */ + 11270 "01010001" // /* MW 2 */ + 11271 "00011000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 753 12 first +.src_ref 10 "softfloat.c" 787 4 + 11272 "11100100" // ADD r3, r17, #-1; MOV r2, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11273 "01000001" // /* MW 5 */ + 11274 "00111011" // /* MW 4 */ + 11275 "11100001" // /* MW 3 */ + 11276 "11111111" // /* MW 2 */ + 11277 "10001000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 752 8 +.src_ref 10 "softfloat.c" 752 18 + 11278 "11111000" // MOV r27, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11279 "10100000" // /* MW 3 */ + 11280 "11011100" // /* MW 2 */ + 11281 "00011110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 752 8 first +.src_ref 10 "softfloat.c" 752 18 first + 11282 "00011000" // SEL.EQZ r17, r3, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11283 "00010010" // /* MW 3 */ + 11284 "11100011" // /* MW 2 */ + 11285 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 46 4 first +.src_ref 10 "softfloat-macros" 46 15 first + 11286 "10000100" // JZ r17, #11344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11344 delay_slots=5 */ + 11287 "00000001" // /* MW 5 */ + 11288 "00000000" // /* MW 4 */ + 11289 "00101000" // /* MW 3 */ + 11290 "00010110" // /* MW 2 */ + 11291 "10001000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 756 17 first +.delay_slot + 11292 "10011000" // OR r0, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11293 "00000101" // /* MW 3 */ + 11294 "00000001" // /* MW 2 */ + 11295 "00010101" // /* MW 1 */ +.src_ref 10 "softfloat.c" 752 8 first +.src_ref 10 "softfloat.c" 752 18 first +.delay_slot + 11296 "00011000" // SEL.EQZ r16, r16, r0, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11297 "00000010" // /* MW 3 */ + 11298 "00100000" // /* MW 2 */ + 11299 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11300 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11301 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11303 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11305 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 +.src_ref 10 "softfloat-macros" 50 38 first + 11306 "01100100" // SUB r3, r24, r17; MOV r0, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11307 "10000001" // /* MW 5 */ + 11308 "00100000" // /* MW 4 */ + 11309 "00110000" // /* MW 3 */ + 11310 "11100010" // /* MW 2 */ + 11311 "11000000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 48 + 11312 "10011000" // AND r18, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11313 "00100100" // /* MW 3 */ + 11314 "11100101" // /* MW 2 */ + 11315 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 32 + 11316 "10011000" // LSHL r18, r16, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11317 "00101101" // /* MW 3 */ + 11318 "00100101" // /* MW 2 */ + 11319 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 15 + 11320 "10011000" // LSHL r3, r16, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11321 "00111101" // /* MW 3 */ + 11322 "00000110" // /* MW 2 */ + 11323 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 first + 11324 "10011000" // LT r27, r17, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11325 "00001010" // /* MW 3 */ + 11326 "01110110" // /* MW 2 */ + 11327 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 57 first + 11328 "00011000" // NEZ r18, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11329 "11110000" // /* MW 3 */ + 11330 "10100100" // /* MW 2 */ + 11331 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 53 16 first + 11332 "00011000" // NEZ r16, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11333 "11110000" // /* MW 3 */ + 11334 "00100000" // /* MW 2 */ + 11335 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 25 first + 11336 "10011000" // OR r17, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11337 "00100101" // /* MW 3 */ + 11338 "11100011" // /* MW 2 */ + 11339 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 9 first + 11340 "00011000" // SEL.EQZ r16, r16, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11341 "00010010" // /* MW 3 */ + 11342 "00100001" // /* MW 2 */ + 11343 "00010100" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_304 +.src_ref 10 "softfloat.c" 785 9 first +.src_ref 10 "softfloat.c" 786 26 +.src_ref 10 "softfloat.c" 787 4 first + 11344 "10111010" // MOVA r18, #1; OR r19, r19, r20; ADD.NC r17, r2, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11345 "11001000" // /* MW 9 */ + 11346 "10111111" // /* MW 8 */ + 11347 "00101000" // /* MW 7 */ + 11348 "00101110" // /* MW 6 */ + 11349 "00111010" // /* MW 5 */ + 11350 "00100111" // /* MW 4 */ + 11351 "00000000" // /* MW 3 */ + 11352 "00110010" // /* MW 2 */ + 11353 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 786 18 first +.src_ref 10 "softfloat.c" 790 8 first + 11354 "00100100" // ADD r19, r19, r16; ADD.NC r16, r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11355 "00000001" // /* MW 5 */ + 11356 "00110001" // /* MW 4 */ + 11357 "00011000" // /* MW 3 */ + 11358 "11100000" // /* MW 2 */ + 11359 "10011100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 786 26 + 11360 "10011000" // LSHL r18, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11361 "00101101" // /* MW 3 */ + 11362 "11100101" // /* MW 2 */ + 11363 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 788 24 first + 11364 "10011000" // LT r27, r18, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11365 "10001010" // /* MW 3 */ + 11366 "10110111" // /* MW 2 */ + 11367 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 788 4 + 11368 "00011000" // SEL.EQZ r2, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11369 "00000010" // /* MW 3 */ + 11370 "01000101" // /* MW 2 */ + 11371 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 788 4 + 11372 "00011000" // SEL.EQZ r3, r18, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11373 "00110010" // /* MW 3 */ + 11374 "10000111" // /* MW 2 */ + 11375 "00010100" // /* MW 1 */ +.label __ll1__ZL14addFloat32Sigsjji +.src_ref 10 "softfloat.c" 793 11 first +.tail_call + 11376 "10000100" // J #10480 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10480 delay_slots=5 */ + 11377 "00000000" // /* MW 5 */ + 11378 "00000000" // /* MW 4 */ + 11379 "01111000" // /* MW 3 */ + 11380 "00010100" // /* MW 2 */ + 11381 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11383 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11385 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11387 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11389 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11391 "00000000" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_352 +.src_ref 10 "softfloat.c" 763 12 first +.return_address + 11392 "10000100" // JNZ r16, #11536 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11536 delay_slots=5 */ + 11393 "00000001" // /* MW 5 */ + 11394 "01000000" // /* MW 4 */ + 11395 "10001000" // /* MW 3 */ + 11396 "00010110" // /* MW 2 */ + 11397 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11399 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11401 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11403 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11405 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11407 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 764 12 first + 11408 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11409 "00000000" // /* MW 3 */ + 11410 "00101000" // /* MW 2 */ + 11411 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 38 +.delay_slot + 11412 "01000100" // MOVXM r16, #2139095040 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11413 "00000000" // /* MW 5 */ + 11414 "00100000" // /* MW 4 */ + 11415 "00001000" // /* MW 3 */ + 11416 "10000000" // /* MW 2 */ + 11417 "01111111" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 38 first +.delay_slot + 11418 "10011000" // ADD r0, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11419 "00000000" // /* MW 3 */ + 11420 "00000001" // /* MW 2 */ + 11421 "00010001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11423 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11425 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11426 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 11427 "00011100" // /* MW 13 */ + 11428 "00000000" // /* MW 12 */ + 11429 "00000000" // /* MW 11 */ + 11430 "01010111" // /* MW 10 */ + 11431 "00011010" // /* MW 9 */ + 11432 "01000000" // /* MW 8 */ + 11433 "00000000" // /* MW 7 */ + 11434 "00000000" // /* MW 6 */ + 11435 "10110110" // /* MW 5 */ + 11436 "00000010" // /* MW 4 */ + 11437 "11110000" // /* MW 3 */ + 11438 "00101100" // /* MW 2 */ + 11439 "00000000" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_400 +.src_ref 10 "softfloat.c" 776 8 first + 11440 "10000100" // JNZ r0, #11552 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11552 delay_slots=5 */ + 11441 "00000001" // /* MW 5 */ + 11442 "01000000" // /* MW 4 */ + 11443 "10010000" // /* MW 3 */ + 11444 "00010110" // /* MW 2 */ + 11445 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11447 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11449 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11451 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11453 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11455 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 780 8 first + 11456 "10000100" // JZ r27, #11600 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11600 delay_slots=5 */ + 11457 "00000001" // /* MW 5 */ + 11458 "00000000" // /* MW 4 */ + 11459 "10101000" // /* MW 3 */ + 11460 "00010110" // /* MW 2 */ + 11461 "11011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11463 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11465 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11467 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11469 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11471 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 793 11 + 11472 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11473 "10100000" // /* MW 3 */ + 11474 "01010001" // /* MW 2 */ + 11475 "00011000" // /* MW 1 */ + 11476 "10000100" // J #11376 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11376 delay_slots=5 */ + 11477 "00000000" // /* MW 5 */ + 11478 "00000000" // /* MW 4 */ + 11479 "00111000" // /* MW 3 */ + 11480 "00010110" // /* MW 2 */ + 11481 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 781 26 +.delay_slot + 11482 "01000100" // MOVXM r17, #1073741824 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11483 "00000000" // /* MW 5 */ + 11484 "10100000" // /* MW 4 */ + 11485 "00001000" // /* MW 3 */ + 11486 "00000000" // /* MW 2 */ + 11487 "01000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 781 26 first +.src_ref 10 "softfloat.c" 793 11 +.delay_slot + 11488 "11100100" // ADD r17, r19, r17; MOV r2, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11489 "01000001" // /* MW 5 */ + 11490 "00111011" // /* MW 4 */ + 11491 "00010001" // /* MW 3 */ + 11492 "01100010" // /* MW 2 */ + 11493 "10011100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 781 33 +.delay_slot + 11494 "10011000" // ADD r3, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11495 "00000000" // /* MW 3 */ + 11496 "01000111" // /* MW 2 */ + 11497 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11499 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11500 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11501 "01100111" // /* MW 3 */ + 11502 "00000001" // /* MW 2 */ + 11503 "00000000" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_464 +.src_ref 10 "softfloat.c" 749 12 first + 11504 "10000100" // JNZ r19, #11632 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11632 delay_slots=5 */ + 11505 "00000001" // /* MW 5 */ + 11506 "01000000" // /* MW 4 */ + 11507 "10111000" // /* MW 3 */ + 11508 "00010110" // /* MW 2 */ + 11509 "10011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11511 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11513 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11515 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11517 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11519 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 750 12 first + 11520 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11521 "00000000" // /* MW 3 */ + 11522 "00101000" // /* MW 2 */ + 11523 "00010000" // /* MW 1 */ +.delay_slot + 11524 "11111000" // MOV r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11525 "10100000" // /* MW 3 */ + 11526 "00010000" // /* MW 2 */ + 11527 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11529 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11531 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11533 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11535 "00000000" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_496 +.src_ref 10 "softfloat.c" 763 31 first +.tail_call + 11536 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */ + 11537 "00000000" // /* MW 5 */ + 11538 "00000000" // /* MW 4 */ + 11539 "01000000" // /* MW 3 */ + 11540 "00010100" // /* MW 2 */ + 11541 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11543 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11545 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11547 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11549 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11551 "00000000" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_512 +.src_ref 10 "softfloat.c" 777 22 first +.return_address + 11552 "10011000" // OR r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11553 "00000101" // /* MW 3 */ + 11554 "11100001" // /* MW 2 */ + 11555 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 777 12 + 11556 "10000100" // JNZ r16, #11648 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11648 delay_slots=5 */ + 11557 "00000001" // /* MW 5 */ + 11558 "01000000" // /* MW 4 */ + 11559 "11000000" // /* MW 3 */ + 11560 "00010110" // /* MW 2 */ + 11561 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11563 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11565 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11567 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11569 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11571 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 778 12 first + 11572 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11573 "00000000" // /* MW 3 */ + 11574 "00101000" // /* MW 2 */ + 11575 "00010000" // /* MW 1 */ +.delay_slot + 11576 "11111000" // MOV r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11577 "10100000" // /* MW 3 */ + 11578 "00010000" // /* MW 2 */ + 11579 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11581 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11583 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11585 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11586 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 11587 "00011100" // /* MW 13 */ + 11588 "00000000" // /* MW 12 */ + 11589 "00000000" // /* MW 11 */ + 11590 "01010111" // /* MW 10 */ + 11591 "00011010" // /* MW 9 */ + 11592 "01000000" // /* MW 8 */ + 11593 "00000000" // /* MW 7 */ + 11594 "00000000" // /* MW 6 */ + 11595 "10110110" // /* MW 5 */ + 11596 "00000010" // /* MW 4 */ + 11597 "11110000" // /* MW 3 */ + 11598 "00101100" // /* MW 2 */ + 11599 "00000000" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_560 +.src_ref 10 "softfloat.c" 780 25 first +.src_ref 10 "softfloat.c" 780 62 first + 11600 "10100100" // RET lr; ADD.NC r16, r19, r16 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 11601 "10000010" // /* MW 5 */ + 11602 "00110011" // /* MW 4 */ + 11603 "00001000" // /* MW 3 */ + 11604 "00000000" // /* MW 2 */ + 11605 "00000101" // /* MW 1 */ +.src_ref 10 "softfloat.c" 780 70 +.delay_slot + 11606 "00011000" // MOVX r17, #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11607 "11101001" // /* MW 3 */ + 11608 "11100010" // /* MW 2 */ + 11609 "00010111" // /* MW 1 */ +.src_ref 10 "softfloat.c" 780 70 +.delay_slot + 11610 "10011000" // LSHL r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11611 "00011101" // /* MW 3 */ + 11612 "00100001" // /* MW 2 */ + 11613 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 66 first +.delay_slot + 11614 "10011000" // ADD r0, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11615 "00000000" // /* MW 3 */ + 11616 "00000001" // /* MW 2 */ + 11617 "00010001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11619 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11620 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 11621 "10000001" // /* MW 11 */ + 11622 "10101101" // /* MW 10 */ + 11623 "00000000" // /* MW 9 */ + 11624 "00000000" // /* MW 8 */ + 11625 "00000000" // /* MW 7 */ + 11626 "00000000" // /* MW 6 */ + 11627 "00100000" // /* MW 5 */ + 11628 "00000000" // /* MW 4 */ + 11629 "11110000" // /* MW 3 */ + 11630 "00101100" // /* MW 2 */ + 11631 "00000000" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_592 +.src_ref 10 "softfloat.c" 749 31 first +.tail_call + 11632 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */ + 11633 "00000000" // /* MW 5 */ + 11634 "00000000" // /* MW 4 */ + 11635 "01000000" // /* MW 3 */ + 11636 "00010100" // /* MW 2 */ + 11637 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11639 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11641 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11643 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11645 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11647 "00000000" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_608 +.src_ref 10 "softfloat.c" 777 38 first +.tail_call +.return_address + 11648 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */ + 11649 "00000000" // /* MW 5 */ + 11650 "00000000" // /* MW 4 */ + 11651 "01000000" // /* MW 3 */ + 11652 "00010100" // /* MW 2 */ + 11653 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11655 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11657 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11659 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11661 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL14addFloat32Sigsjji__end + 11663 "00000000" // /* MW 1 */ +.label _ZL14subFloat32Sigsjji +.function subFloat32Sigs _ZL14subFloat32Sigsjji +.src_ref 10 "softfloat.c" 70 13 +.src_ref 10 "softfloat.c" 81 14 +.src_ref 10 "softfloat.c" 805 first +.function_start + 11664 "10111010" // MOVA r17, #-23; MOVXM r16, #8388607 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11665 "10010000" // /* MW 9 */ + 11666 "11111111" // /* MW 8 */ + 11667 "00001111" // /* MW 7 */ + 11668 "11111110" // /* MW 6 */ + 11669 "00011111" // /* MW 5 */ + 11670 "00000000" // /* MW 4 */ + 11671 "00000000" // /* MW 3 */ + 11672 "00110001" // /* MW 2 */ + 11673 "11111101" // /* MW 1 */ +.src_ref 10 "softfloat.c" 81 14 first + 11674 "10011000" // LSHL r4, r2, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11675 "00011101" // /* MW 3 */ + 11676 "10001001" // /* MW 2 */ + 11677 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 81 14 + 11678 "10011000" // LSHL r18, r1, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11679 "00011101" // /* MW 3 */ + 11680 "01100101" // /* MW 2 */ + 11681 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 70 13 first + 11682 "10011000" // AND r20, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11683 "00000100" // /* MW 3 */ + 11684 "01101001" // /* MW 2 */ + 11685 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 81 21 first + 11686 "00011000" // EXTEND.u8 r25, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11687 "10010000" // /* MW 3 */ + 11688 "00110010" // /* MW 2 */ + 11689 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 81 21 + 11690 "00011000" // EXTEND.u8 r27, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11691 "10010000" // /* MW 3 */ + 11692 "10110110" // /* MW 2 */ + 11693 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 70 13 first +.src_ref 10 "softfloat.c" 816 9 +.src_ref 10 "softfloat.c" 817 9 + 11694 "01100100" // AND r16, r2, r16; MOV r19, #7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11695 "00011101" // /* MW 5 */ + 11696 "10100000" // /* MW 4 */ + 11697 "10011001" // /* MW 3 */ + 11698 "00100000" // /* MW 2 */ + 11699 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 816 9 first + 11700 "10011000" // LSHL r17, r20, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11701 "00111101" // /* MW 3 */ + 11702 "00100011" // /* MW 2 */ + 11703 "00010101" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 38 +.src_ref 10 "softfloat.c" 815 19 first +.src_ref 10 "softfloat.c" 818 11 +.src_ref 10 "softfloat.c" 819 17 +.src_ref 10 "softfloat.c" 843 31 + 11704 "01100100" // SUB r18, r27, r25; MOV r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11705 "00000001" // /* MW 5 */ + 11706 "00100000" // /* MW 4 */ + 11707 "00111100" // /* MW 3 */ + 11708 "10110010" // /* MW 2 */ + 11709 "11011100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 818 11 first + 11710 "10011000" // LT r5, r24, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11711 "00101010" // /* MW 3 */ + 11712 "00001011" // /* MW 2 */ + 11713 "00010110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 818 4 + 11714 "10000100" // JNZ r5, #11904 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11904 delay_slots=5 */ + 11715 "00000001" // /* MW 5 */ + 11716 "01000000" // /* MW 4 */ + 11717 "01000000" // /* MW 3 */ + 11718 "00010111" // /* MW 2 */ + 11719 "00101000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 817 9 first +.delay_slot + 11720 "10011000" // LSHL r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11721 "00111101" // /* MW 3 */ + 11722 "00100001" // /* MW 2 */ + 11723 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 833 14 +.src_ref 10 "softfloat.c" 851 14 +.src_ref 10 "softfloat.c" 859 13 +.src_ref 10 "softfloat.c" 862 9 +.delay_slot + 11724 "10111010" // MOVA r0, #255; MOVXM r4, #1073741824 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11725 "00010000" // /* MW 9 */ + 11726 "00000000" // /* MW 8 */ + 11727 "10001000" // /* MW 7 */ + 11728 "00000000" // /* MW 6 */ + 11729 "00000000" // /* MW 5 */ + 11730 "00010000" // /* MW 4 */ + 11731 "00000000" // /* MW 3 */ + 11732 "11100000" // /* MW 2 */ + 11733 "00011111" // /* MW 1 */ +.src_ref 10 "softfloat.c" 851 14 first +.delay_slot + 11734 "10011000" // EQ r20, r27, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11735 "00000111" // /* MW 3 */ + 11736 "11101000" // /* MW 2 */ + 11737 "00010110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 862 9 first +.delay_slot + 11738 "10011000" // OR r19, r17, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11739 "01000101" // /* MW 3 */ + 11740 "01100110" // /* MW 2 */ + 11741 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 859 13 first +.delay_slot + 11742 "10011000" // OR r4, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11743 "00000101" // /* MW 3 */ + 11744 "00001001" // /* MW 2 */ + 11745 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 819 17 first + 11746 "10011000" // GE r6, r18, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11747 "10001001" // /* MW 3 */ + 11748 "10001101" // /* MW 2 */ + 11749 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 819 4 + 11750 "10000100" // JNZ r6, #12064 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12064 delay_slots=5 */ + 11751 "00000001" // /* MW 5 */ + 11752 "01000000" // /* MW 4 */ + 11753 "10010000" // /* MW 3 */ + 11754 "00010111" // /* MW 2 */ + 11755 "00110000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 825 4 +.src_ref 10 "softfloat.c" 835 34 +.delay_slot + 11756 "00011000" // MOVX r5, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11757 "00000101" // /* MW 3 */ + 11758 "00001010" // /* MW 2 */ + 11759 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 835 34 first +.delay_slot + 11760 "10011000" // XOR r7, r3, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11761 "01010110" // /* MW 3 */ + 11762 "11001110" // /* MW 2 */ + 11763 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11765 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11767 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11769 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 833 14 first + 11770 "10011000" // EQ r20, r25, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11771 "00000111" // /* MW 3 */ + 11772 "01101000" // /* MW 2 */ + 11773 "00010110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 833 4 + 11774 "10000100" // JNZ r20, #12176 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12176 delay_slots=5 */ + 11775 "00000001" // /* MW 5 */ + 11776 "01000000" // /* MW 4 */ + 11777 "11001000" // /* MW 3 */ + 11778 "00010111" // /* MW 2 */ + 11779 "10100000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11781 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11783 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11785 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11787 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11789 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 868 11 + 11790 "11111000" // MOV r1, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11791 "10100000" // /* MW 3 */ + 11792 "01010011" // /* MW 2 */ + 11793 "00011000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 838 8 first + 11794 "00011000" // ADD r16, r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11795 "00000111" // /* MW 3 */ + 11796 "10100000" // /* MW 2 */ + 11797 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 837 4 first + 11798 "00011000" // SEL.EQZ r16, r16, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11799 "00100010" // /* MW 3 */ + 11800 "00100001" // /* MW 2 */ + 11801 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 843 31 first + 11802 "10011000" // SUB r16, r24, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11803 "00000001" // /* MW 3 */ + 11804 "00100001" // /* MW 2 */ + 11805 "00010110" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 46 4 first +.src_ref 10 "softfloat-macros" 46 15 first + 11806 "10000100" // JZ r16, #11872 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11872 delay_slots=5 */ + 11807 "00000001" // /* MW 5 */ + 11808 "00000000" // /* MW 4 */ + 11809 "00110000" // /* MW 3 */ + 11810 "00010111" // /* MW 2 */ + 11811 "10000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 837 4 first +.delay_slot + 11812 "00011000" // SEL.EQZ r17, r17, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11813 "00110010" // /* MW 3 */ + 11814 "01100011" // /* MW 2 */ + 11815 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11817 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11819 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11821 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11823 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 +.src_ref 10 "softfloat-macros" 50 38 first +.src_ref 10 "softfloat-macros" 50 48 + 11824 "10111010" // MOVA r20, #32; SUB r3, r24, r16; MOV r18, #31 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11825 "01011000" // /* MW 9 */ + 11826 "00011111" // /* MW 8 */ + 11827 "01001000" // /* MW 7 */ + 11828 "00001110" // /* MW 6 */ + 11829 "00111000" // /* MW 5 */ + 11830 "00110000" // /* MW 4 */ + 11831 "00000000" // /* MW 3 */ + 11832 "00010100" // /* MW 2 */ + 11833 "00000100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 48 + 11834 "10011000" // AND r18, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11835 "00100100" // /* MW 3 */ + 11836 "11100101" // /* MW 2 */ + 11837 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 32 + 11838 "10011000" // LSHL r18, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11839 "00101101" // /* MW 3 */ + 11840 "01100101" // /* MW 2 */ + 11841 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 53 16 first + 11842 "00011000" // NEZ r19, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11843 "11110000" // /* MW 3 */ + 11844 "01100110" // /* MW 2 */ + 11845 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 first + 11846 "10011000" // LT r27, r16, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11847 "01001010" // /* MW 3 */ + 11848 "00110111" // /* MW 2 */ + 11849 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 15 first + 11850 "10011000" // LSHL r17, r17, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11851 "00111101" // /* MW 3 */ + 11852 "01100010" // /* MW 2 */ + 11853 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 57 + 11854 "00011000" // NEZ r18, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11855 "11110000" // /* MW 3 */ + 11856 "10100100" // /* MW 2 */ + 11857 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 25 + 11858 "10011000" // OR r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11859 "00100101" // /* MW 3 */ + 11860 "01100001" // /* MW 2 */ + 11861 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 9 first + 11862 "01111010" // NOPA; NOPS; SEL.EQZ r17, r19, r16, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11863 "00000010" // /* MW 9 */ + 11864 "11100011" // /* MW 8 */ + 11865 "00000100" // /* MW 7 */ + 11866 "00000000" // /* MW 6 */ + 11867 "01011011" // /* MW 5 */ + 11868 "00000001" // /* MW 4 */ + 11869 "11110000" // /* MW 3 */ + 11870 "00101100" // /* MW 2 */ + 11871 "00000000" // /* MW 1 */ +.label __ll2__ZL14subFloat32Sigsjji + 11872 "10000100" // J #12032 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=12032 delay_slots=5 */ + 11873 "00000000" // /* MW 5 */ + 11874 "00000000" // /* MW 4 */ + 11875 "10000000" // /* MW 3 */ + 11876 "00010111" // /* MW 2 */ + 11877 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 846 16 first +.delay_slot + 11878 "10011000" // SUB r3, r4, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11879 "00010001" // /* MW 3 */ + 11880 "00000111" // /* MW 2 */ + 11881 "00010001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11883 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11885 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11887 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11888 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11889 "00000000" // /* MW 15 */ + 11890 "00000000" // /* MW 14 */ + 11891 "01111000" // /* MW 13 */ + 11892 "10100101" // /* MW 12 */ + 11893 "00000001" // /* MW 11 */ + 11894 "00000000" // /* MW 10 */ + 11895 "00000000" // /* MW 9 */ + 11896 "00000000" // /* MW 8 */ + 11897 "01011011" // /* MW 7 */ + 11898 "00000001" // /* MW 6 */ + 11899 "00100000" // /* MW 5 */ + 11900 "00000000" // /* MW 4 */ + 11901 "11110000" // /* MW 3 */ + 11902 "00101100" // /* MW 2 */ + 11903 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_240 +.src_ref 10 "softfloat.c" 851 4 first + 11904 "10000100" // JNZ r20, #12224 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12224 delay_slots=5 */ + 11905 "00000001" // /* MW 5 */ + 11906 "01000000" // /* MW 4 */ + 11907 "11100000" // /* MW 3 */ + 11908 "00010111" // /* MW 2 */ + 11909 "10100000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11911 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11913 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11915 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11917 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11919 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 867 4 + 11920 "11111000" // MOV r0, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11921 "10100000" // /* MW 3 */ + 11922 "00011101" // /* MW 2 */ + 11923 "00011000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 868 11 + 11924 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11925 "10100000" // /* MW 3 */ + 11926 "01010001" // /* MW 2 */ + 11927 "00011000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 855 4 +.src_ref 10 "softfloat.c" 855 14 + 11928 "11111000" // MOV r27, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11929 "10100000" // /* MW 3 */ + 11930 "11011100" // /* MW 2 */ + 11931 "00011110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 867 4 + 11932 "11111000" // MOV r25, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11933 "00100000" // /* MW 3 */ + 11934 "01010000" // /* MW 2 */ + 11935 "00011110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 856 8 first + 11936 "00011000" // ADD r17, r18, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11937 "11111111" // /* MW 3 */ + 11938 "10100011" // /* MW 2 */ + 11939 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 855 4 first +.src_ref 10 "softfloat.c" 855 14 first + 11940 "00011000" // SEL.EQZ r17, r17, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11941 "00100010" // /* MW 3 */ + 11942 "01100011" // /* MW 2 */ + 11943 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 46 4 first +.src_ref 10 "softfloat-macros" 46 15 first + 11944 "10000100" // JZ r17, #12016 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12016 delay_slots=5 */ + 11945 "00000001" // /* MW 5 */ + 11946 "00000000" // /* MW 4 */ + 11947 "01111000" // /* MW 3 */ + 11948 "00010111" // /* MW 2 */ + 11949 "10001000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 855 4 first +.src_ref 10 "softfloat.c" 855 14 first +.delay_slot + 11950 "00011000" // SEL.EQZ r16, r16, r4, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11951 "01000010" // /* MW 3 */ + 11952 "00100000" // /* MW 2 */ + 11953 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11955 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11957 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11959 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11961 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 +.src_ref 10 "softfloat-macros" 50 38 first +.src_ref 10 "softfloat-macros" 50 48 + 11962 "10111010" // MOVA r3, #32; SUB r18, r24, r17; MOV r20, #31 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11963 "01011000" // /* MW 9 */ + 11964 "00011111" // /* MW 8 */ + 11965 "10001000" // /* MW 7 */ + 11966 "10001110" // /* MW 6 */ + 11967 "00101000" // /* MW 5 */ + 11968 "00110001" // /* MW 4 */ + 11969 "00000000" // /* MW 3 */ + 11970 "00000011" // /* MW 2 */ + 11971 "00000100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 48 + 11972 "10011000" // AND r20, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11973 "01000100" // /* MW 3 */ + 11974 "10101001" // /* MW 2 */ + 11975 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 32 + 11976 "10011000" // LSHL r20, r16, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11977 "01001101" // /* MW 3 */ + 11978 "00101001" // /* MW 2 */ + 11979 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 15 + 11980 "10011000" // LSHL r18, r16, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11981 "00101101" // /* MW 3 */ + 11982 "00100101" // /* MW 2 */ + 11983 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 first + 11984 "10011000" // LT r27, r17, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11985 "00111010" // /* MW 3 */ + 11986 "01110110" // /* MW 2 */ + 11987 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 57 first + 11988 "00011000" // NEZ r20, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11989 "11110000" // /* MW 3 */ + 11990 "00101000" // /* MW 2 */ + 11991 "00010101" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 53 16 first + 11992 "00011000" // NEZ r16, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11993 "11110000" // /* MW 3 */ + 11994 "00100000" // /* MW 2 */ + 11995 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 25 first + 11996 "10011000" // OR r17, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11997 "01000101" // /* MW 3 */ + 11998 "10100011" // /* MW 2 */ + 11999 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 9 first + 12000 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r16, r16, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12001 "00000000" // /* MW 15 */ + 12002 "00000000" // /* MW 14 */ + 12003 "01111000" // /* MW 13 */ + 12004 "10100101" // /* MW 12 */ + 12005 "00000001" // /* MW 11 */ + 12006 "10010000" // /* MW 10 */ + 12007 "00001000" // /* MW 9 */ + 12008 "00100001" // /* MW 8 */ + 12009 "01011011" // /* MW 7 */ + 12010 "00000001" // /* MW 6 */ + 12011 "00100000" // /* MW 5 */ + 12012 "00000000" // /* MW 4 */ + 12013 "11110000" // /* MW 3 */ + 12014 "00101100" // /* MW 2 */ + 12015 "00000000" // /* MW 1 */ +.label __ll1__ZL14subFloat32Sigsjji +.src_ref 10 "softfloat.c" 864 16 first + 12016 "11100001" // NOPA; NOPB; NOPS; SUB r3, r19, r16; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12017 "00000000" // /* MW 15 */ + 12018 "00000000" // /* MW 14 */ + 12019 "01111000" // /* MW 13 */ + 12020 "10100101" // /* MW 12 */ + 12021 "00000001" // /* MW 11 */ + 12022 "00001100" // /* MW 10 */ + 12023 "00111000" // /* MW 9 */ + 12024 "00100110" // /* MW 8 */ + 12025 "01011011" // /* MW 7 */ + 12026 "00000001" // /* MW 6 */ + 12027 "00100000" // /* MW 5 */ + 12028 "00000000" // /* MW 4 */ + 12029 "11110000" // /* MW 3 */ + 12030 "00101100" // /* MW 2 */ + 12031 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_368 +.src_ref 10 "softfloat.c" 868 11 first +.tail_call + 12032 "10000100" // J #10880 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10880 delay_slots=5 */ + 12033 "00000000" // /* MW 5 */ + 12034 "00000000" // /* MW 4 */ + 12035 "01000000" // /* MW 3 */ + 12036 "00010101" // /* MW 2 */ + 12037 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 867 4 first +.delay_slot + 12038 "00011000" // ADD r2, r25, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12039 "11111111" // /* MW 3 */ + 12040 "01000101" // /* MW 2 */ + 12041 "00010110" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12043 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12045 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12047 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12048 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12049 "00000000" // /* MW 15 */ + 12050 "00000000" // /* MW 14 */ + 12051 "01111000" // /* MW 13 */ + 12052 "10100101" // /* MW 12 */ + 12053 "00000001" // /* MW 11 */ + 12054 "00000000" // /* MW 10 */ + 12055 "00000000" // /* MW 9 */ + 12056 "00000000" // /* MW 8 */ + 12057 "01011011" // /* MW 7 */ + 12058 "00000001" // /* MW 6 */ + 12059 "00100000" // /* MW 5 */ + 12060 "00000000" // /* MW 4 */ + 12061 "11110000" // /* MW 3 */ + 12062 "00101100" // /* MW 2 */ + 12063 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_400 +.src_ref 10 "softfloat.c" 820 4 first +.return_address + 12064 "10000100" // JNZ r20, #12256 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12256 delay_slots=5 */ + 12065 "00000001" // /* MW 5 */ + 12066 "01000000" // /* MW 4 */ + 12067 "11110000" // /* MW 3 */ + 12068 "00010111" // /* MW 2 */ + 12069 "10100000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12071 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12073 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12075 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12077 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12078 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12079 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 829 14 first + 12080 "10011000" // LTU r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12081 "00011100" // /* MW 3 */ + 12082 "00100111" // /* MW 2 */ + 12083 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 829 4 + 12084 "10000100" // JNZ r19, #12304 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12304 delay_slots=5 */ + 12085 "00000001" // /* MW 5 */ + 12086 "01000000" // /* MW 4 */ + 12087 "00001000" // /* MW 3 */ + 12088 "00011000" // /* MW 2 */ + 12089 "10011000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 825 4 first +.delay_slot + 12090 "00011000" // SEL.EQZ r24, r5, r25, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12091 "10010010" // /* MW 3 */ + 12092 "01110001" // /* MW 2 */ + 12093 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 825 4 +.delay_slot + 12094 "11111000" // MOV r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12095 "10100000" // /* MW 3 */ + 12096 "10011101" // /* MW 2 */ + 12097 "00011100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 825 4 +.delay_slot + 12098 "00011000" // SEL.EQZ r25, r5, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12099 "00100010" // /* MW 3 */ + 12100 "01110011" // /* MW 2 */ + 12101 "00010001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12103 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12105 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 830 14 first + 12106 "10011000" // LTU r18, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12107 "00001100" // /* MW 3 */ + 12108 "01100101" // /* MW 2 */ + 12109 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 830 4 + 12110 "10000100" // JNZ r18, #12336 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12336 delay_slots=5 */ + 12111 "00000001" // /* MW 5 */ + 12112 "01000000" // /* MW 4 */ + 12113 "00011000" // /* MW 3 */ + 12114 "00011000" // /* MW 2 */ + 12115 "10010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12117 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12119 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12121 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12123 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12125 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 31 + 12126 "00011000" // MOVX r16, #31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12127 "01111101" // /* MW 3 */ + 12128 "00100000" // /* MW 2 */ + 12129 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 831 24 + 12130 "01000100" // MOVXM p0, #509172 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12131 "11101000" // /* MW 5 */ + 12132 "11001001" // /* MW 4 */ + 12133 "11000000" // /* MW 3 */ + 12134 "00000111" // /* MW 2 */ + 12135 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 831 24 first + 12136 "10011000" // LDA r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12137 "01010110" // /* MW 3 */ + 12138 "00000110" // /* MW 2 */ + 12139 "00000000" // /* MW 1 */ +.swstall __RAW__R_1948 + 12140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12141 "00000000" // /* MW 1 */ +.swstall __RAW__R_1948 + 12142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12143 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 831 4 + 12144 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12145 "00000000" // /* MW 3 */ + 12146 "00101000" // /* MW 2 */ + 12147 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 831 44 +.delay_slot + 12148 "00011000" // MOVX r17, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12149 "00001101" // /* MW 3 */ + 12150 "00100010" // /* MW 2 */ + 12151 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12152 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12153 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12155 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 831 44 +.delay_slot + 12156 "10011000" // EQ r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12157 "00100111" // /* MW 3 */ + 12158 "01100011" // /* MW 2 */ + 12159 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 31 first +.delay_slot + 12160 "11100001" // NOPA; NOPB; NOPS; LSHL r0, r17, r16; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12161 "00000000" // /* MW 15 */ + 12162 "00000000" // /* MW 14 */ + 12163 "01111000" // /* MW 13 */ + 12164 "10100101" // /* MW 12 */ + 12165 "00000001" // /* MW 11 */ + 12166 "01101100" // /* MW 10 */ + 12167 "00001000" // /* MW 9 */ + 12168 "00100010" // /* MW 8 */ + 12169 "01011011" // /* MW 7 */ + 12170 "00000001" // /* MW 6 */ + 12171 "00100000" // /* MW 5 */ + 12172 "00000000" // /* MW 4 */ + 12173 "11110000" // /* MW 3 */ + 12174 "00101100" // /* MW 2 */ + 12175 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_512 +.src_ref 10 "softfloat.c" 834 8 first + 12176 "10000100" // JNZ r16, #12368 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12368 delay_slots=5 */ + 12177 "00000001" // /* MW 5 */ + 12178 "01000000" // /* MW 4 */ + 12179 "00101000" // /* MW 3 */ + 12180 "00011000" // /* MW 2 */ + 12181 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12183 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12185 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12187 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12189 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12191 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 31 + 12192 "00011000" // MOVX r16, #31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12193 "01111101" // /* MW 3 */ + 12194 "00100000" // /* MW 2 */ + 12195 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 835 8 first + 12196 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12197 "00000000" // /* MW 3 */ + 12198 "00101000" // /* MW 2 */ + 12199 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 31 first +.delay_slot + 12200 "10011000" // LSHL r16, r7, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12201 "00001101" // /* MW 3 */ + 12202 "11100001" // /* MW 2 */ + 12203 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 38 +.delay_slot + 12204 "01000100" // MOVXM r17, #2139095040 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12205 "00000000" // /* MW 5 */ + 12206 "10100000" // /* MW 4 */ + 12207 "00001000" // /* MW 3 */ + 12208 "10000000" // /* MW 2 */ + 12209 "01111111" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 38 +.delay_slot + 12210 "10011000" // ADD r0, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12211 "00000000" // /* MW 3 */ + 12212 "01000001" // /* MW 2 */ + 12213 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12215 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12216 "00100010" // NOPA; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12217 "00011100" // /* MW 7 */ + 12218 "00000000" // /* MW 6 */ + 12219 "00000000" // /* MW 5 */ + 12220 "00000100" // /* MW 4 */ + 12221 "11110000" // /* MW 3 */ + 12222 "00101100" // /* MW 2 */ + 12223 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_560 +.src_ref 10 "softfloat.c" 852 8 first + 12224 "10000100" // JNZ r17, #12384 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12384 delay_slots=5 */ + 12225 "00000001" // /* MW 5 */ + 12226 "01000000" // /* MW 4 */ + 12227 "00110000" // /* MW 3 */ + 12228 "00011000" // /* MW 2 */ + 12229 "10001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12231 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12233 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12234 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12235 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12236 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12237 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12238 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12239 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 853 8 first + 12240 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12241 "00000000" // /* MW 3 */ + 12242 "00101000" // /* MW 2 */ + 12243 "00010000" // /* MW 1 */ +.delay_slot + 12244 "11111000" // MOV r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12245 "10100000" // /* MW 3 */ + 12246 "00010000" // /* MW 2 */ + 12247 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12248 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12249 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12251 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12253 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12255 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_592 +.src_ref 10 "softfloat.c" 821 18 first + 12256 "10011000" // OR r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12257 "00000101" // /* MW 3 */ + 12258 "01100001" // /* MW 2 */ + 12259 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 821 8 + 12260 "10000100" // JNZ r16, #12400 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12400 delay_slots=5 */ + 12261 "00000001" // /* MW 5 */ + 12262 "01000000" // /* MW 4 */ + 12263 "00111000" // /* MW 3 */ + 12264 "00011000" // /* MW 2 */ + 12265 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12267 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12269 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12271 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12273 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12275 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 823 8 first + 12276 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12277 "00000000" // /* MW 3 */ + 12278 "00101000" // /* MW 2 */ + 12279 "00010000" // /* MW 1 */ +.delay_slot + 12280 "01000100" // MOVXM r0, #2147483647 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12281 "11111110" // /* MW 5 */ + 12282 "00111111" // /* MW 4 */ + 12283 "11110000" // /* MW 3 */ + 12284 "11111111" // /* MW 2 */ + 12285 "01111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12287 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12289 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12291 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12292 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12293 "10000001" // /* MW 11 */ + 12294 "10101101" // /* MW 10 */ + 12295 "00000000" // /* MW 9 */ + 12296 "00000000" // /* MW 8 */ + 12297 "00000000" // /* MW 7 */ + 12298 "00000000" // /* MW 6 */ + 12299 "00100000" // /* MW 5 */ + 12300 "00000000" // /* MW 4 */ + 12301 "11110000" // /* MW 3 */ + 12302 "00101100" // /* MW 2 */ + 12303 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_640 + 12304 "10000100" // J #12016 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=12016 delay_slots=5 */ + 12305 "00000000" // /* MW 5 */ + 12306 "00000000" // /* MW 4 */ + 12307 "01111000" // /* MW 3 */ + 12308 "00010111" // /* MW 2 */ + 12309 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 868 11 +.delay_slot + 12310 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12311 "10100000" // /* MW 3 */ + 12312 "01010001" // /* MW 2 */ + 12313 "00011000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 864 16 +.delay_slot + 12314 "11111000" // MOV r19, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12315 "10100000" // /* MW 3 */ + 12316 "11011000" // /* MW 2 */ + 12317 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12318 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12319 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12321 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12322 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 12323 "00011100" // /* MW 13 */ + 12324 "00000000" // /* MW 12 */ + 12325 "00000000" // /* MW 11 */ + 12326 "01010111" // /* MW 10 */ + 12327 "00011010" // /* MW 9 */ + 12328 "01000000" // /* MW 8 */ + 12329 "00000000" // /* MW 7 */ + 12330 "00000000" // /* MW 6 */ + 12331 "10110110" // /* MW 5 */ + 12332 "00000010" // /* MW 4 */ + 12333 "11110000" // /* MW 3 */ + 12334 "00101100" // /* MW 2 */ + 12335 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_672 + 12336 "10000100" // J #11872 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11872 delay_slots=5 */ + 12337 "00000000" // /* MW 5 */ + 12338 "00000000" // /* MW 4 */ + 12339 "00110000" // /* MW 3 */ + 12340 "00010111" // /* MW 2 */ + 12341 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 846 16 +.delay_slot + 12342 "11111000" // MOV r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12343 "00100000" // /* MW 3 */ + 12344 "00011000" // /* MW 2 */ + 12345 "00011001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 867 4 +.delay_slot + 12346 "11111000" // MOV r25, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12347 "00100000" // /* MW 3 */ + 12348 "01011100" // /* MW 2 */ + 12349 "00011110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 868 11 +.delay_slot + 12350 "11111000" // MOV r1, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12351 "10100000" // /* MW 3 */ + 12352 "01010011" // /* MW 2 */ + 12353 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12355 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12356 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12357 "10000001" // /* MW 11 */ + 12358 "10101101" // /* MW 10 */ + 12359 "00000000" // /* MW 9 */ + 12360 "00000000" // /* MW 8 */ + 12361 "00000000" // /* MW 7 */ + 12362 "00000000" // /* MW 6 */ + 12363 "00100000" // /* MW 5 */ + 12364 "00000000" // /* MW 4 */ + 12365 "11110000" // /* MW 3 */ + 12366 "00101100" // /* MW 2 */ + 12367 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_704 +.src_ref 10 "softfloat.c" 834 27 first +.tail_call + 12368 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */ + 12369 "00000000" // /* MW 5 */ + 12370 "00000000" // /* MW 4 */ + 12371 "01000000" // /* MW 3 */ + 12372 "00010100" // /* MW 2 */ + 12373 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12375 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12377 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12379 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12381 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12383 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_720 +.src_ref 10 "softfloat.c" 852 27 first +.tail_call +.return_address + 12384 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */ + 12385 "00000000" // /* MW 5 */ + 12386 "00000000" // /* MW 4 */ + 12387 "01000000" // /* MW 3 */ + 12388 "00010100" // /* MW 2 */ + 12389 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12391 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12393 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12394 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12395 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12397 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12399 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_736 +.src_ref 10 "softfloat.c" 821 34 first +.tail_call +.return_address + 12400 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */ + 12401 "00000000" // /* MW 5 */ + 12402 "00000000" // /* MW 4 */ + 12403 "01000000" // /* MW 3 */ + 12404 "00010100" // /* MW 2 */ + 12405 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12407 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12409 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12411 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12413 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL14subFloat32Sigsjji__end + 12415 "00000000" // /* MW 1 */ +.label float32_add +.function float32_add float32_add +.src_ref 10 "softfloat.c" 92 12 +.src_ref 10 "softfloat.c" 878 first +.function_start + 12416 "00011000" // MOVX r16, #-31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12417 "10000101" // /* MW 3 */ + 12418 "11100000" // /* MW 2 */ + 12419 "00010111" // /* MW 1 */ +.src_ref 10 "softfloat.c" 92 12 first + 12420 "10011000" // LSHL r3, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12421 "00001101" // /* MW 3 */ + 12422 "01000111" // /* MW 2 */ + 12423 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 92 12 + 12424 "10011000" // LSHL r16, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12425 "00001101" // /* MW 3 */ + 12426 "10100001" // /* MW 2 */ + 12427 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 884 15 first + 12428 "10011000" // EQ r16, r3, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12429 "00000111" // /* MW 3 */ + 12430 "11100001" // /* MW 2 */ + 12431 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 884 4 + 12432 "10000100" // JNZ r16, #12464 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12464 delay_slots=5 */ + 12433 "00000001" // /* MW 5 */ + 12434 "01000000" // /* MW 4 */ + 12435 "01011000" // /* MW 3 */ + 12436 "00011000" // /* MW 2 */ + 12437 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12439 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12441 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12443 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12445 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12447 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 888 15 first +.tail_call + 12448 "10000100" // J #11664 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=11664 delay_slots=5 */ + 12449 "00000000" // /* MW 5 */ + 12450 "00000000" // /* MW 4 */ + 12451 "11001000" // /* MW 3 */ + 12452 "00010110" // /* MW 2 */ + 12453 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12455 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12457 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12459 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12461 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12463 "00000000" // /* MW 1 */ +.label TGT_Ffloat32_add_48 +.src_ref 10 "softfloat.c" 885 15 first +.tail_call +.return_address + 12464 "10000100" // J #11040 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=11040 delay_slots=5 */ + 12465 "00000000" // /* MW 5 */ + 12466 "00000000" // /* MW 4 */ + 12467 "10010000" // /* MW 3 */ + 12468 "00010101" // /* MW 2 */ + 12469 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12471 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12473 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12475 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12477 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label float32_add__end + 12479 "00000000" // /* MW 1 */ +.dir 0 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable2/src" +.dir 1 "/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer" +.dir 2 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common" +.dir 3 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc" +.dir 4 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail" +.dir 5 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2" +.dir 6 "/usr/local/lib/python3.10/dist-packages/data/aie2p/lib" +.dir 7 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p" +.dir 8 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend" +.dir 9 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21708/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib" +.dir 10 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21708/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib/softfloat" diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable6/Release/0_2_reloadable6.txt b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable6/Release/0_2_reloadable6.txt new file mode 100644 index 0000000000000000000000000000000000000000..2d1d5946a6747db932adeab9e7d141d4fd318d32 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable6/Release/0_2_reloadable6.txt @@ -0,0 +1,2975 @@ +Contents of the .debug_line section: + +sigmoid_carf_templated_lut.h: +File name Line number Starting address View Stmt + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 218 0xa10 x +reduce_base_c8.h 220 0xa10 1 x +reduce_base_c8.h 290 0xa10 2 +reduce_base_c8.h 348 0xa10 3 +reduce_base_c8.h 287 0xa1c +reduce_base_c8.h 287 0xa1c 1 +reduce_base_c8.h 348 0xa1c 2 x +reduce_base_c8.h 236 0xa26 +reduce_base_c8.h 293 0xa26 1 +reduce_base_c8.h 298 0xa26 2 +reduce_base_c8.h 299 0xa26 3 +reduce_base_c8.h 300 0xa26 4 +reduce_base_c8.h 326 0xa26 5 +reduce_base_c8.h 276 0xa30 +reduce_base_c8.h 301 0xa30 1 +reduce_base_c8.h 305 0xa30 2 +reduce_base_c8.h 218 0xa3a +reduce_base_c8.h 280 0xa3a 1 +reduce_base_c8.h 312 0xa3a 2 +reduce_base_c8.h 298 0xa44 x +reduce_base_c8.h 220 0xa4a x +reduce_base_c8.h 221 0xa4e x +reduce_base_c8.h 221 0xa5e +reduce_base_c8.h 301 0xa5e 1 x +reduce_base_c8.h 222 0xa64 x +reduce_base_c8.h 293 0xa64 1 x +reduce_base_c8.h 301 0xa64 2 +reduce_base_c8.h 290 0xa6e x +reduce_base_c8.h 293 0xa72 x +reduce_base_c8.h 290 0xa76 x +reduce_base_c8.h 300 0xa76 1 x +reduce_base_c8.h 222 0xa82 x +reduce_base_c8.h 287 0xa82 1 x +reduce_base_c8.h 223 0xa88 x +reduce_base_c8.h 312 0xa88 1 x +reduce_base_c8.h 305 0xa92 x +reduce_base_c8.h 312 0xa96 x +reduce_base_c8.h 299 0xa9a x +reduce_base_c8.h 276 0xa9e x +reduce_base_c8.h 299 0xa9e 1 +reduce_base_c8.h 276 0xaa4 +reduce_base_c8.h 301 0xaa8 x +reduce_base_c8.h 223 0xaac x +reduce_base_c8.h 236 0xaac 1 x +reduce_base_c8.h 224 0xab2 x +reduce_base_c8.h 224 0xac2 +reduce_base_c8.h 318 0xac2 1 +reduce_base_c8.h 225 0xaca x +reduce_base_c8.h 225 0xada +reduce_base_c8.h 318 0xada 1 +reduce_base_c8.h 226 0xae2 x +reduce_base_c8.h 236 0xae8 x +reduce_base_c8.h 312 0xaee x +reduce_base_c8.h 318 0xaf2 x +reduce_base_c8.h 300 0xaf6 x +reduce_base_c8.h 305 0xaf6 1 x +reduce_base_c8.h 280 0xafc x +reduce_base_c8.h 226 0xb00 x +reduce_base_c8.h 318 0xb00 1 x +reduce_base_c8.h 236 0xb06 +reduce_base_c8.h 236 0xb0a x +reduce_base_c8.h 236 0xb0e +reduce_base_c8.h 242 0xb1c x +reduce_base_c8.h 236 0xb20 +reduce_base_c8.h 236 0xb24 x +reduce_base_c8.h 236 0xb28 +reduce_base_c8.h 236 0xb36 +reduce_base_c8.h 236 0xb3a +reduce_base_c8.h 236 0xb3e +reduce_base_c8.h 329 0xb54 +reduce_base_c8.h 236 0xb60 +reduce_base_c8.h 236 0xb64 +reduce_base_c8.h 236 0xb68 +reduce_base_c8.h 236 0xb76 +reduce_base_c8.h 316 0xb76 1 +reduce_base_c8.h 329 0xb76 2 +reduce_base_c8.h 236 0xb7a +reduce_base_c8.h 236 0xb7e +reduce_base_c8.h 236 0xb8e +reduce_base_c8.h 236 0xb92 +reduce_base_c8.h 286 0xba2 x +reduce_base_c8.h 289 0xba2 1 +reduce_base_c8.h 291 0xba2 2 +reduce_base_c8.h 291 0xba2 3 +reduce_base_c8.h 287 0xbba x +reduce_base_c8.h 288 0xbca x +reduce_base_c8.h 289 0xbda x +reduce_base_c8.h 290 0xbea x +reduce_base_c8.h 291 0xbfa x +reduce_base_c8.h 292 0xc0e x +reduce_base_c8.h 293 0xc12 x +reduce_base_c8.h 274 0xc20 x +reduce_base_c8.h 275 0xc20 1 +reduce_base_c8.h 275 0xc20 2 +reduce_base_c8.h 275 0xc2a x +reduce_base_c8.h 279 0xc2a 1 +reduce_base_c8.h 275 0xc3e +reduce_base_c8.h 276 0xc4e x +reduce_base_c8.h 275 0xc5e x +reduce_base_c8.h 277 0xc5e 1 x +reduce_base_c8.h 278 0xc6e x +reduce_base_c8.h 279 0xc7e x +reduce_base_c8.h 279 0xc8c +reduce_base_c8.h 281 0xc94 x +reduce_base_c8.h 280 0xc98 x +reduce_base_c8.h 236 0xca0 +reduce_base_c8.h 301 0xca0 1 +reduce_base_c8.h 302 0xca0 2 +reduce_base_c8.h 236 0xca6 x +reduce_base_c8.h 236 0xcaa +reduce_base_c8.h 298 0xcb0 +reduce_base_c8.h 303 0xcb0 1 +reduce_base_c8.h 310 0xcb0 2 +reduce_base_c8.h 311 0xcb0 3 +reduce_base_c8.h 236 0xcbc +reduce_base_c8.h 236 0xcc0 +reduce_base_c8.h 236 0xcc4 +reduce_base_c8.h 310 0xcd4 x +reduce_base_c8.h 312 0xcd4 1 x +reduce_base_c8.h 315 0xcd4 2 +reduce_base_c8.h 313 0xcde +reduce_base_c8.h 317 0xcde 1 +reduce_base_c8.h 315 0xce8 +reduce_base_c8.h 317 0xce8 1 x +reduce_base_c8.h 311 0xcf6 x +reduce_base_c8.h 312 0xd06 x +reduce_base_c8.h 313 0xd16 x +reduce_base_c8.h 315 0xd1a x +reduce_base_c8.h 316 0xd2a x +reduce_base_c8.h 317 0xd2e x +reduce_base_c8.h 298 0xd50 x +reduce_base_c8.h 301 0xd50 1 +reduce_base_c8.h 301 0xd50 2 x +reduce_base_c8.h 302 0xd5a +reduce_base_c8.h 303 0xd5a 1 +reduce_base_c8.h 306 0xd5a 2 +reduce_base_c8.h 302 0xd64 x +reduce_base_c8.h 302 0xd68 +reduce_base_c8.h 306 0xd68 1 x +reduce_base_c8.h 299 0xd74 x +reduce_base_c8.h 300 0xd84 x +reduce_base_c8.h 301 0xd94 x +reduce_base_c8.h 302 0xda4 x +reduce_base_c8.h 303 0xdb4 x +reduce_base_c8.h 304 0xdc4 x +reduce_base_c8.h 305 0xdc8 x +reduce_base_c8.h 326 0xde0 x +reduce_base_c8.h 329 0xde0 1 +reduce_base_c8.h 329 0xde6 +reduce_base_c8.h 330 0xde6 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 139 0xde6 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 331 0xdf0 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 140 0xdf0 1 x +reduce_mean_c8_impl.h 141 0xdf6 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 326 0xdfe x +reduce_base_c8.h 327 0xe0e x +reduce_base_c8.h 327 0xe1a +reduce_base_c8.h 328 0xe1a 1 +reduce_base_c8.h 328 0xe20 x +reduce_base_c8.h 329 0xe24 x +reduce_base_c8.h 329 0xe32 +reduce_base_c8.h 329 0xe36 +reduce_base_c8.h 330 0xe36 1 +reduce_base_c8.h 329 0xe3c +reduce_base_c8.h 330 0xe48 x +reduce_base_c8.h 331 0xe58 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 139 0xe68 x +reduce_mean_c8_impl.h 140 0xe78 x +reduce_mean_c8_impl.h 141 0xe88 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 349 0xe8c x +reduce_base_c8.h 349 0xe90 +reduce_base_c8.h 262 0xea0 x +reduce_base_c8.h 263 0xea0 1 +reduce_base_c8.h 263 0xeaa +reduce_base_c8.h 263 0xeaa 1 x +reduce_base_c8.h 267 0xeaa 2 +reduce_base_c8.h 265 0xeb4 +reduce_base_c8.h 329 0xeb4 1 +reduce_base_c8.h 265 0xebe x +reduce_base_c8.h 265 0xec2 +reduce_base_c8.h 267 0xec2 1 x +reduce_base_c8.h 265 0xec6 x +reduce_base_c8.h 265 0xec6 1 x +reduce_base_c8.h 263 0xecc x +reduce_base_c8.h 263 0xed0 +reduce_base_c8.h 264 0xede x +reduce_base_c8.h 265 0xeee x +reduce_base_c8.h 266 0xefe x +reduce_base_c8.h 267 0xf0e x +reduce_base_c8.h 267 0xf1c +reduce_base_c8.h 267 0xf20 +reduce_base_c8.h 270 0xf24 +reduce_base_c8.h 268 0xf28 x +reduce_base_c8.h 269 0xf30 x +reduce_base_c8.h 270 0xf30 1 x +reduce_base_c8.h 250 0xf40 +reduce_base_c8.h 250 0xf40 1 x +reduce_base_c8.h 255 0xf40 2 +reduce_base_c8.h 255 0xf4a +reduce_base_c8.h 255 0xf4a 1 +reduce_base_c8.h 255 0xf4a 2 +reduce_base_c8.h 255 0xf4a 3 x +reduce_base_c8.h 255 0xf54 +reduce_base_c8.h 255 0xf54 1 +reduce_base_c8.h 329 0xf54 2 +reduce_base_c8.h 251 0xf62 x +reduce_base_c8.h 252 0xf72 x +reduce_base_c8.h 253 0xf82 x +reduce_base_c8.h 254 0xf92 x +reduce_base_c8.h 255 0xfa2 x +reduce_base_c8.h 255 0xfb0 +reduce_base_c8.h 255 0xfb0 1 +reduce_base_c8.h 256 0xfb8 x +reduce_base_c8.h 257 0xfbc x +reduce_base_c8.h 238 0xfc0 x +reduce_base_c8.h 239 0xfd0 x +reduce_base_c8.h 240 0xfe0 x +reduce_base_c8.h 241 0xfea +reduce_base_c8.h 241 0xfea 1 +reduce_base_c8.h 241 0xff2 x +reduce_base_c8.h 241 0xff8 +reduce_base_c8.h 241 0xffe +reduce_base_c8.h 241 0x1002 +reduce_base_c8.h 241 0x1002 1 +reduce_base_c8.h 241 0x1002 2 +reduce_base_c8.h 241 0x1002 3 +reduce_base_c8.h 242 0x100c x +reduce_base_c8.h 243 0x101a +reduce_base_c8.h 243 0x101e x +reduce_base_c8.h 243 0x102c +reduce_base_c8.h 243 0x102c 1 +reduce_base_c8.h 243 0x102c 2 +reduce_base_c8.h 243 0x102c 3 +reduce_base_c8.h 244 0x1036 x +reduce_base_c8.h 245 0x103a x +reduce_base_c8.h 329 0x103a 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h: +pad_3d.h 266 0x1050 x +pad_3d.h 465 0x1050 1 x +pad_3d.h 468 0x1050 2 x +pad_3d.h 471 0x1050 3 +pad_3d.h 479 0x1050 4 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 950 0x105a + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h: +pad_3d.h 469 0x105a 1 x +pad_3d.h 478 0x105a 2 +pad_3d.h 499 0x105a 3 +pad_3d.h 511 0x105a 4 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 950 0x1064 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h: +pad_3d.h 470 0x1064 1 x +pad_3d.h 486 0x1064 2 +pad_3d.h 498 0x1064 3 +pad_3d.h 499 0x1064 4 +pad_3d.h 509 0x1064 5 +pad_3d.h 517 0x1064 6 +pad_3d.h 471 0x106e x +pad_3d.h 472 0x1072 x +pad_3d.h 473 0x1076 x +pad_3d.h 475 0x107a x +pad_3d.h 479 0x107e x +pad_3d.h 477 0x1082 x +pad_3d.h 478 0x1086 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 56 0x108a x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h: +pad_3d.h 485 0x1090 x +pad_3d.h 485 0x1094 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 998 0x1098 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h: +pad_3d.h 486 0x109c x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 950 0x10a0 x +array_helpers.hpp 950 0x10a4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h: +pad_3d.h 486 0x10a4 1 x +pad_3d.h 486 0x10aa +pad_3d.h 487 0x10b0 +pad_3d.h 486 0x10bc +pad_3d.h 486 0x10c2 +pad_3d.h 486 0x10c8 +pad_3d.h 487 0x1130 x +pad_3d.h 495 0x1140 +pad_3d.h 495 0x1140 1 x +pad_3d.h 498 0x1140 2 +pad_3d.h 499 0x1140 3 x +pad_3d.h 495 0x114a +pad_3d.h 496 0x114a 1 x +pad_3d.h 495 0x1150 x +pad_3d.h 495 0x1154 +pad_3d.h 498 0x1154 1 x +pad_3d.h 499 0x115a x +pad_3d.h 498 0x115e x +pad_3d.h 498 0x1162 +pad_3d.h 499 0x1162 1 x +pad_3d.h 499 0x1168 +pad_3d.h 499 0x116c + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x117c + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x117c 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h: +pad_3d.h 499 0x117c 2 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x1186 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1186 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h: +pad_3d.h 499 0x1186 2 +pad_3d.h 499 0x1190 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x1200 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1200 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h: +pad_3d.h 514 0x1210 +pad_3d.h 514 0x1216 x +pad_3d.h 514 0x121a +pad_3d.h 514 0x121e +pad_3d.h 511 0x1222 x +pad_3d.h 509 0x1226 x +pad_3d.h 515 0x122a x +pad_3d.h 509 0x122e x +pad_3d.h 509 0x1232 +pad_3d.h 514 0x1232 1 +pad_3d.h 517 0x1232 2 x +pad_3d.h 509 0x1238 x +pad_3d.h 509 0x123c +pad_3d.h 517 0x123c 1 x +pad_3d.h 517 0x1242 +pad_3d.h 514 0x124c x +pad_3d.h 514 0x1250 +pad_3d.h 515 0x1254 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x1258 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1258 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h: +pad_3d.h 517 0x1258 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x1262 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1262 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h: +pad_3d.h 517 0x1262 2 +pad_3d.h 517 0x126c + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x12d0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x12d0 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h: +pad_3d.h 282 0x12e0 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 352 0x12f0 +reduce_base_c8.h 362 0x12f0 1 x +reduce_base_c8.h 365 0x12f0 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 200 0x12f0 3 +reduce_mean_c8_impl.h 223 0x12f0 4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 365 0x12f4 x +reduce_base_c8.h 367 0x12fc x +reduce_base_c8.h 367 0x130c +reduce_base_c8.h 367 0x130c 1 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 101 0x1312 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 80 0x1312 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp: +blend.hpp 170 0x1312 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 372 0x1312 3 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 80 0x1316 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 362 0x131e +reduce_base_c8.h 372 0x1324 +reduce_base_c8.h 372 0x1328 x +reduce_base_c8.h 372 0x1338 +reduce_base_c8.h 372 0x133c + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1342 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 374 0x1342 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x134e + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 372 0x134e 1 +reduce_base_c8.h 374 0x134e 2 +reduce_base_c8.h 372 0x135a +reduce_base_c8.h 372 0x1360 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x13d0 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 374 0x13d0 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 57 0x13e0 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 388 0x13e0 1 +reduce_base_c8.h 412 0x13e0 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 268 0x13e0 3 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 388 0x13e4 x +reduce_base_c8.h 388 0x13e8 +reduce_base_c8.h 388 0x13e8 1 +reduce_base_c8.h 388 0x13ee +reduce_base_c8.h 570 0x13ee 1 +reduce_base_c8.h 570 0x13ee 2 +reduce_base_c8.h 570 0x13ee 3 +reduce_base_c8.h 570 0x13f4 x +reduce_base_c8.h 594 0x13f4 1 +reduce_base_c8.h 570 0x13fa +reduce_base_c8.h 594 0x13fa 1 x +reduce_base_c8.h 594 0x1400 +reduce_base_c8.h 594 0x1404 +reduce_base_c8.h 388 0x1408 +reduce_base_c8.h 595 0x1408 1 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x140e +aie_core.h 73 0x140e 1 +aie_core.h 90 0x140e 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x140e 3 +vector.hpp 1139 0x140e 4 +vector.hpp 1159 0x140e 5 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x140e 6 +accum.hpp 198 0x140e 7 +accum.hpp 198 0x140e 8 +accum.hpp 943 0x140e 9 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 388 0x140e 10 +reduce_base_c8.h 596 0x140e 11 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x1418 +aie_core.h 90 0x1418 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1418 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x1418 3 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 388 0x1418 4 x +reduce_base_c8.h 570 0x1418 5 +reduce_base_c8.h 570 0x1418 6 +reduce_base_c8.h 570 0x1418 7 +reduce_base_c8.h 570 0x1418 8 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x1424 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 109 0x1424 1 +me_vmult_float_emulated.h 109 0x1424 2 +me_vmult_float_emulated.h 111 0x1424 3 +me_vmult_float_emulated.h 111 0x1424 4 +me_vmult_float_emulated.h 113 0x1424 5 +me_vmult_float_emulated.h 113 0x1424 6 +me_vmult_float_emulated.h 115 0x1424 7 +me_vmult_float_emulated.h 115 0x1424 8 +me_vmult_float_emulated.h 117 0x1424 9 +me_vmult_float_emulated.h 117 0x1424 10 +me_vmult_float_emulated.h 118 0x1424 11 +me_vmult_float_emulated.h 118 0x1424 12 +me_vmult_float_emulated.h 118 0x1424 13 +me_vmult_float_emulated.h 118 0x1424 14 +me_vmult_float_emulated.h 119 0x1424 15 +me_vmult_float_emulated.h 119 0x1424 16 +me_vmult_float_emulated.h 119 0x1424 17 +me_vmult_float_emulated.h 119 0x1424 18 +me_vmult_float_emulated.h 120 0x1424 19 +me_vmult_float_emulated.h 120 0x1424 20 +me_vmult_float_emulated.h 120 0x1424 21 +me_vmult_float_emulated.h 120 0x1424 22 +me_vmult_float_emulated.h 121 0x1424 23 +me_vmult_float_emulated.h 121 0x1424 24 +me_vmult_float_emulated.h 121 0x1424 25 +me_vmult_float_emulated.h 121 0x1424 26 +me_vmult_float_emulated.h 122 0x1424 27 +me_vmult_float_emulated.h 122 0x1424 28 +me_vmult_float_emulated.h 122 0x1424 29 +me_vmult_float_emulated.h 122 0x1424 30 +me_vmult_float_emulated.h 123 0x1424 31 +me_vmult_float_emulated.h 123 0x1424 32 +me_vmult_float_emulated.h 123 0x1424 33 +me_vmult_float_emulated.h 123 0x1424 34 +me_vmult_float_emulated.h 124 0x1424 35 +me_vmult_float_emulated.h 124 0x1424 36 +me_vmult_float_emulated.h 124 0x1424 37 +me_vmult_float_emulated.h 124 0x1424 38 +me_vmult_float_emulated.h 125 0x1424 39 +me_vmult_float_emulated.h 125 0x1424 40 +me_vmult_float_emulated.h 125 0x1424 41 +me_vmult_float_emulated.h 125 0x1424 42 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1424 43 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x1424 44 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add.hpp: +add.hpp 28 0x1424 45 +add.hpp 28 0x1424 46 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x1424 47 +add_reduce.hpp 324 0x1424 48 +add_reduce.hpp 324 0x1424 49 +add_reduce.hpp 324 0x1424 50 +add_reduce.hpp 324 0x1424 51 +add_reduce.hpp 324 0x1424 52 +add_reduce.hpp 324 0x1424 53 +add_reduce.hpp 324 0x1424 54 +add_reduce.hpp 324 0x1424 55 +add_reduce.hpp 324 0x1424 56 +add_reduce.hpp 324 0x1424 57 +add_reduce.hpp 324 0x1424 58 +add_reduce.hpp 324 0x1424 59 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1424 60 +add_accum.hpp 19 0x1424 61 +add_accum.hpp 19 0x1424 62 +add_accum.hpp 19 0x1424 63 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 388 0x1424 64 +reduce_base_c8.h 595 0x1424 65 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x1430 +aie_core.h 73 0x1430 1 +aie_core.h 73 0x1430 2 +aie_core.h 73 0x1430 3 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1430 4 +vector.hpp 1139 0x1430 5 +vector.hpp 1139 0x1430 6 +vector.hpp 1159 0x1430 7 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x1430 8 +accum.hpp 198 0x1430 9 +accum.hpp 198 0x1430 10 +accum.hpp 198 0x1430 11 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 570 0x1430 12 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 200 0x1430 13 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x143c x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x143c 1 +vector.hpp 1139 0x143c 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x143c 3 +accum.hpp 198 0x143c 4 x +accum.hpp 943 0x143c 5 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 570 0x143c 6 +reduce_base_c8.h 570 0x143c 7 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 90 0x1446 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1446 1 +vector.hpp 1139 0x1446 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x1446 3 +accum.hpp 198 0x1446 4 +accum.hpp 943 0x1446 5 +accum.hpp 943 0x1446 6 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 570 0x1446 7 +reduce_base_c8.h 570 0x1446 8 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 90 0x1450 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 391 0x1450 1 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 90 0x1456 +aie_core.h 90 0x1456 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1456 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x1456 3 +accum.hpp 943 0x1456 4 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 90 0x145c +aie_core.h 90 0x145c 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x145c 2 +vector.hpp 1139 0x145c 3 +vector.hpp 1139 0x145c 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x145c 5 +accum.hpp 198 0x145c 6 +accum.hpp 198 0x145c 7 x +accum.hpp 943 0x145c 8 +accum.hpp 943 0x145c 9 +accum.hpp 943 0x145c 10 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 570 0x145c 11 x +reduce_base_c8.h 570 0x145c 12 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 90 0x1468 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1468 1 +vector.hpp 1139 0x1468 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x1468 3 +accum.hpp 198 0x1468 4 +accum.hpp 943 0x1468 5 +accum.hpp 943 0x1468 6 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1468 7 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 570 0x1468 8 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 90 0x1472 +aie_core.h 90 0x1472 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1472 2 +vector.hpp 1139 0x1472 3 +vector.hpp 1139 0x1472 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x1472 5 +accum.hpp 198 0x1472 6 +accum.hpp 198 0x1472 7 x +accum.hpp 943 0x1472 8 +accum.hpp 943 0x1472 9 +accum.hpp 943 0x1472 10 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 570 0x1472 11 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x147c +aie_core.h 90 0x147c 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x147c 2 +vector.hpp 1159 0x147c 3 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x147c 4 +accum.hpp 198 0x147c 5 +accum.hpp 943 0x147c 6 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1482 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x1482 1 x +accum.hpp 943 0x1482 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1482 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 570 0x1482 4 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x148a x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x148a 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x148a 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1490 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 391 0x1490 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x149a x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x149a 1 x +accum.hpp 943 0x149a 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 412 0x149a 3 +reduce_base_c8.h 570 0x149a 4 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x14a0 +aie_core.h 73 0x14a0 1 +aie_core.h 73 0x14a0 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x14a0 3 +vector.hpp 1159 0x14a0 4 +vector.hpp 1159 0x14a0 5 +vector.hpp 1285 0x14a0 6 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x14a0 7 +accum.hpp 153 0x14a0 8 +accum.hpp 153 0x14a0 9 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 322 0x14a0 10 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x14a0 11 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x14b0 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x14b0 1 x +vector.hpp 1159 0x14b0 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x14b0 3 +accum.hpp 198 0x14b0 4 x +accum.hpp 943 0x14b0 5 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x14b0 6 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 391 0x14b0 7 x +reduce_base_c8.h 570 0x14b0 8 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 90 0x14c0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x14c0 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x14c0 2 +accum.hpp 943 0x14c0 3 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x14c4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x14c4 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x14c4 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x14c4 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x14d0 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x14d0 1 +accum.hpp 943 0x14d0 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 570 0x14d0 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x14f0 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x1500 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1500 1 x +vector.hpp 1159 0x1500 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x1500 3 x +accum.hpp 198 0x1500 4 x +accum.hpp 943 0x1500 5 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 570 0x1500 6 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1510 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1520 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x1520 1 x +accum.hpp 943 0x1520 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1520 3 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 570 0x1520 4 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 107 0x1530 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 412 0x1530 1 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 90 0x153a x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 107 0x153a 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x153a 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x153a 3 x +accum.hpp 943 0x153a 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x153a 5 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 101 0x1544 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x154a x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x154e x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x154e 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x154e 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1554 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 412 0x1554 1 x +reduce_base_c8.h 412 0x155c +reduce_base_c8.h 412 0x1560 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x156c x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x156c 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x156c 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 80 0x1572 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 184 0x1572 1 x +reduce_mean_c8_impl.h 184 0x1572 2 +reduce_mean_c8_impl.h 184 0x1584 +reduce_mean_c8_impl.h 184 0x1588 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 322 0x158e +add_reduce.hpp 322 0x158e 1 +add_reduce.hpp 322 0x158e 2 +add_reduce.hpp 322 0x158e 3 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 184 0x158e 4 +reduce_mean_c8_impl.h 184 0x159a +reduce_mean_c8_impl.h 184 0x159e + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp: +blend.hpp 170 0x15ae +blend.hpp 170 0x15b4 +blend.hpp 163 0x15ba + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 109 0x15c0 +me_vmult_float_emulated.h 111 0x15c0 1 +me_vmult_float_emulated.h 113 0x15c0 2 +me_vmult_float_emulated.h 115 0x15c0 3 +me_vmult_float_emulated.h 117 0x15c0 4 +me_vmult_float_emulated.h 118 0x15c0 5 +me_vmult_float_emulated.h 118 0x15c0 6 +me_vmult_float_emulated.h 119 0x15c0 7 +me_vmult_float_emulated.h 119 0x15c0 8 +me_vmult_float_emulated.h 120 0x15c0 9 +me_vmult_float_emulated.h 120 0x15c0 10 +me_vmult_float_emulated.h 121 0x15c0 11 +me_vmult_float_emulated.h 121 0x15c0 12 +me_vmult_float_emulated.h 122 0x15c0 13 +me_vmult_float_emulated.h 122 0x15c0 14 +me_vmult_float_emulated.h 123 0x15c0 15 +me_vmult_float_emulated.h 123 0x15c0 16 +me_vmult_float_emulated.h 124 0x15c0 17 +me_vmult_float_emulated.h 124 0x15c0 18 +me_vmult_float_emulated.h 125 0x15c0 19 +me_vmult_float_emulated.h 125 0x15c0 20 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add.hpp: +add.hpp 28 0x15c0 21 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x15c0 22 +add_reduce.hpp 324 0x15c0 23 +add_reduce.hpp 324 0x15c0 24 +add_reduce.hpp 324 0x15c0 25 +add_reduce.hpp 324 0x15c0 26 +add_reduce.hpp 324 0x15c0 27 +add_reduce.hpp 324 0x15c0 28 +add_reduce.hpp 324 0x15c0 29 +add_reduce.hpp 324 0x15c0 30 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 200 0x15c0 31 +reduce_mean_c8_impl.h 200 0x15c0 32 +reduce_mean_c8_impl.h 200 0x15c0 33 +reduce_mean_c8_impl.h 223 0x15c0 34 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1285 0x15cc + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 200 0x15cc 1 x +reduce_mean_c8_impl.h 200 0x15e0 +reduce_mean_c8_impl.h 223 0x15f0 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1285 0x15fc +vector.hpp 1289 0x15fc 1 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 112 0x1608 +me_vmult_float_emulated.h 112 0x1608 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 57 0x1608 2 +vector.hpp 57 0x1608 3 +vector.hpp 1280 0x1608 4 +vector.hpp 1285 0x1608 5 +vector.hpp 1287 0x1608 6 +vector.hpp 1288 0x1608 7 +vector.hpp 1289 0x1608 8 +vector.hpp 1292 0x1608 9 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 223 0x1608 10 x +reduce_mean_c8_impl.h 268 0x1608 11 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x1614 +vector.hpp 915 0x1614 1 +vector.hpp 1280 0x1614 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 322 0x161e +add_reduce.hpp 322 0x161e 1 +add_reduce.hpp 322 0x161e 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 223 0x161e 3 x +reduce_mean_c8_impl.h 223 0x1628 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1286 0x1632 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 108 0x1638 +me_vmult_float_emulated.h 108 0x1638 1 +me_vmult_float_emulated.h 109 0x1638 2 +me_vmult_float_emulated.h 110 0x1638 3 +me_vmult_float_emulated.h 110 0x1638 4 +me_vmult_float_emulated.h 111 0x1638 5 +me_vmult_float_emulated.h 111 0x1638 6 +me_vmult_float_emulated.h 111 0x1638 7 +me_vmult_float_emulated.h 112 0x1638 8 +me_vmult_float_emulated.h 112 0x1638 9 +me_vmult_float_emulated.h 113 0x1638 10 +me_vmult_float_emulated.h 114 0x1638 11 +me_vmult_float_emulated.h 114 0x1638 12 +me_vmult_float_emulated.h 115 0x1638 13 +me_vmult_float_emulated.h 115 0x1638 14 +me_vmult_float_emulated.h 115 0x1638 15 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1286 0x1638 16 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1108 0x1638 17 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 112 0x163c x +me_vmult_float_emulated.h 112 0x163c 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 223 0x163c 2 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 108 0x1644 +me_vmult_float_emulated.h 108 0x1644 1 +me_vmult_float_emulated.h 109 0x1644 2 +me_vmult_float_emulated.h 110 0x1644 3 +me_vmult_float_emulated.h 110 0x1644 4 +me_vmult_float_emulated.h 111 0x1644 5 +me_vmult_float_emulated.h 111 0x1644 6 +me_vmult_float_emulated.h 111 0x1644 7 +me_vmult_float_emulated.h 113 0x1644 8 +me_vmult_float_emulated.h 114 0x1644 9 +me_vmult_float_emulated.h 114 0x1644 10 +me_vmult_float_emulated.h 115 0x1644 11 +me_vmult_float_emulated.h 115 0x1644 12 +me_vmult_float_emulated.h 115 0x1644 13 +me_vmult_float_emulated.h 108 0x1648 +me_vmult_float_emulated.h 108 0x1648 1 +me_vmult_float_emulated.h 109 0x1648 2 +me_vmult_float_emulated.h 110 0x1648 3 +me_vmult_float_emulated.h 110 0x1648 4 +me_vmult_float_emulated.h 111 0x1648 5 +me_vmult_float_emulated.h 111 0x1648 6 +me_vmult_float_emulated.h 111 0x1648 7 +me_vmult_float_emulated.h 113 0x1648 8 x +me_vmult_float_emulated.h 115 0x1648 9 +me_vmult_float_emulated.h 115 0x1648 10 +me_vmult_float_emulated.h 115 0x1648 11 +me_vmult_float_emulated.h 108 0x1650 +me_vmult_float_emulated.h 108 0x1650 1 +me_vmult_float_emulated.h 109 0x1650 2 +me_vmult_float_emulated.h 110 0x1650 3 +me_vmult_float_emulated.h 110 0x1650 4 +me_vmult_float_emulated.h 111 0x1650 5 +me_vmult_float_emulated.h 111 0x1650 6 +me_vmult_float_emulated.h 111 0x1650 7 +me_vmult_float_emulated.h 113 0x165c +me_vmult_float_emulated.h 114 0x165c 1 x +me_vmult_float_emulated.h 114 0x165c 2 x +me_vmult_float_emulated.h 115 0x1662 x +me_vmult_float_emulated.h 115 0x1670 +me_vmult_float_emulated.h 115 0x1670 1 +me_vmult_float_emulated.h 115 0x1670 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 322 0x1670 3 +add_reduce.hpp 322 0x1670 4 +add_reduce.hpp 322 0x1670 5 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1680 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 226 0x1680 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1280 0x168a +vector.hpp 1280 0x168e x +vector.hpp 1285 0x1692 x +vector.hpp 1285 0x1692 1 x +vector.hpp 1285 0x1698 +vector.hpp 1286 0x169c x +vector.hpp 1285 0x16a0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 322 0x16a0 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x16a6 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 142 0x16aa x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x16aa 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 142 0x16ae + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x16ae 1 x +accum.hpp 199 0x16ba x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add.hpp: +add.hpp 28 0x16ba 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 199 0x16c2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 142 0x16c6 x +vector.hpp 243 0x16c6 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x16c6 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x16ce x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 322 0x16d2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x16d6 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x16d6 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x16de +accum.hpp 151 0x16e2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 243 0x16e6 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 151 0x16e6 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 322 0x16ea x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x16ee x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x16ee 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x16f6 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x16fa +add_reduce.hpp 322 0x16fe x +add_reduce.hpp 324 0x1702 x +add_reduce.hpp 324 0x1702 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x170a x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 322 0x170e x +add_reduce.hpp 324 0x170e 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x1716 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x171a x +add_reduce.hpp 322 0x171e x +add_reduce.hpp 324 0x171e 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x1726 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x172a x +add_reduce.hpp 322 0x172e x +add_reduce.hpp 324 0x172e 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x1736 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x173a x +add_reduce.hpp 322 0x173e x +add_reduce.hpp 324 0x1742 x +add_reduce.hpp 324 0x1742 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x174a x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 322 0x174e x +add_reduce.hpp 324 0x174e 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x1756 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x175a x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x175e x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x1762 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x1766 x +vector.hpp 1288 0x1766 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x176c x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x1770 x +vector.hpp 1287 0x1770 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 80 0x1770 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp: +blend.hpp 163 0x1776 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 853 0x177a x +vector.hpp 853 0x177e +vector.hpp 142 0x1782 x +vector.hpp 1413 0x1782 1 x +vector.hpp 142 0x1786 +vector.hpp 1413 0x1786 1 +vector.hpp 142 0x178a +vector.hpp 1413 0x178a 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp: +blend.hpp 170 0x178e x +blend.hpp 170 0x1792 +blend.hpp 170 0x1796 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 108 0x179a +me_vmult_float_emulated.h 108 0x179a 1 +me_vmult_float_emulated.h 108 0x179e +me_vmult_float_emulated.h 108 0x179e 1 +me_vmult_float_emulated.h 109 0x179e 2 +me_vmult_float_emulated.h 110 0x179e 3 +me_vmult_float_emulated.h 110 0x179e 4 +me_vmult_float_emulated.h 111 0x179e 5 +me_vmult_float_emulated.h 111 0x179e 6 +me_vmult_float_emulated.h 111 0x179e 7 +me_vmult_float_emulated.h 108 0x17a2 x +me_vmult_float_emulated.h 108 0x17a2 1 x +me_vmult_float_emulated.h 109 0x17a2 2 x +me_vmult_float_emulated.h 108 0x17aa +me_vmult_float_emulated.h 108 0x17aa 1 +me_vmult_float_emulated.h 109 0x17aa 2 +me_vmult_float_emulated.h 110 0x17aa 3 +me_vmult_float_emulated.h 110 0x17aa 4 +me_vmult_float_emulated.h 111 0x17aa 5 +me_vmult_float_emulated.h 111 0x17aa 6 +me_vmult_float_emulated.h 111 0x17aa 7 +me_vmult_float_emulated.h 109 0x17ae + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1285 0x17ae 1 +vector.hpp 1285 0x17ae 2 x +vector.hpp 1289 0x17ae 3 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 120 0x17b8 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1285 0x17b8 1 +vector.hpp 1289 0x17b8 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 108 0x17c0 +me_vmult_float_emulated.h 108 0x17c0 1 +me_vmult_float_emulated.h 109 0x17c0 2 +me_vmult_float_emulated.h 110 0x17c0 3 +me_vmult_float_emulated.h 110 0x17c0 4 +me_vmult_float_emulated.h 111 0x17c0 5 +me_vmult_float_emulated.h 111 0x17c0 6 +me_vmult_float_emulated.h 111 0x17c0 7 +me_vmult_float_emulated.h 124 0x17c0 8 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1285 0x17c0 9 x +vector.hpp 1289 0x17c0 10 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 125 0x17ca x +me_vmult_float_emulated.h 109 0x17d2 x +me_vmult_float_emulated.h 110 0x17d2 1 x +me_vmult_float_emulated.h 110 0x17d2 2 x +me_vmult_float_emulated.h 111 0x17d8 x +me_vmult_float_emulated.h 111 0x17e6 +me_vmult_float_emulated.h 111 0x17e6 1 +me_vmult_float_emulated.h 111 0x17e6 2 +me_vmult_float_emulated.h 117 0x17ec x +me_vmult_float_emulated.h 118 0x17f0 x +me_vmult_float_emulated.h 119 0x17fa x +me_vmult_float_emulated.h 117 0x17fe x +me_vmult_float_emulated.h 118 0x1802 x +me_vmult_float_emulated.h 118 0x1806 +me_vmult_float_emulated.h 122 0x1810 x +me_vmult_float_emulated.h 118 0x1814 x +me_vmult_float_emulated.h 119 0x1818 x +me_vmult_float_emulated.h 119 0x181c +me_vmult_float_emulated.h 121 0x1826 x +me_vmult_float_emulated.h 119 0x182a x +me_vmult_float_emulated.h 120 0x182e x +me_vmult_float_emulated.h 120 0x1832 +me_vmult_float_emulated.h 123 0x183c x +me_vmult_float_emulated.h 120 0x1840 x +me_vmult_float_emulated.h 121 0x1844 x +me_vmult_float_emulated.h 121 0x1848 +me_vmult_float_emulated.h 121 0x1854 +me_vmult_float_emulated.h 122 0x1858 x +me_vmult_float_emulated.h 122 0x185c +me_vmult_float_emulated.h 122 0x1868 +me_vmult_float_emulated.h 123 0x186c x +me_vmult_float_emulated.h 123 0x1870 +me_vmult_float_emulated.h 123 0x187c +me_vmult_float_emulated.h 124 0x1880 x +me_vmult_float_emulated.h 124 0x1884 +me_vmult_float_emulated.h 124 0x1890 +me_vmult_float_emulated.h 125 0x1894 x +me_vmult_float_emulated.h 125 0x1898 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1286 0x18a4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1108 0x18a4 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1286 0x18aa +vector.hpp 1289 0x18ae x +vector.hpp 57 0x18b4 x +vector.hpp 1292 0x18b4 1 x +vector.hpp 57 0x18c0 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 268 0x18c0 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 352 0x18f0 x +reduce_base_c8.h 352 0x18f4 +reduce_base_c8.h 352 0x18fe +reduce_base_c8.h 353 0x1902 x +reduce_base_c8.h 352 0x190e x +reduce_base_c8.h 352 0x1912 +reduce_base_c8.h 420 0x1920 +reduce_base_c8.h 353 0x1928 x +reduce_base_c8.h 420 0x192c x +reduce_base_c8.h 420 0x1938 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp: +blend.hpp 163 0x1950 +blend.hpp 170 0x195a + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 184 0x1970 +reduce_mean_c8_impl.h 184 0x1974 x +reduce_mean_c8_impl.h 184 0x1978 +reduce_mean_c8_impl.h 184 0x1988 +reduce_mean_c8_impl.h 184 0x198c +reduce_mean_c8_impl.h 184 0x1990 +reduce_mean_c8_impl.h 200 0x1996 +reduce_mean_c8_impl.h 200 0x19b0 x +reduce_mean_c8_impl.h 202 0x19b0 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp: +blend.hpp 170 0x19ba + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 202 0x19ba 1 x +reduce_mean_c8_impl.h 202 0x19c0 +reduce_mean_c8_impl.h 200 0x19ce x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x19d2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 199 0x19d2 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 206 0x19d2 2 x +reduce_mean_c8_impl.h 206 0x19d2 3 +reduce_mean_c8_impl.h 209 0x19d2 4 +reduce_mean_c8_impl.h 206 0x19de +reduce_mean_c8_impl.h 206 0x19de 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x19ea x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 199 0x19ea 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 209 0x19ea 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 199 0x19f0 +accum.hpp 199 0x19f6 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add.hpp: +add.hpp 28 0x19f6 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 206 0x1a00 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1a10 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 199 0x1a10 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 209 0x1a10 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 150 0x1a50 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add.hpp: +add.hpp 28 0x1a60 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 199 0x1a70 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 108 0x1a80 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 322 0x1a80 1 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 108 0x1a8a x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x1a8a 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 322 0x1a8a 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp: +blend.hpp 170 0x1a94 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 150 0x1a9a x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 322 0x1aa0 +add_reduce.hpp 322 0x1aa4 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 108 0x1aa8 +me_vmult_float_emulated.h 108 0x1aa8 1 +me_vmult_float_emulated.h 109 0x1aa8 2 +me_vmult_float_emulated.h 110 0x1aa8 3 +me_vmult_float_emulated.h 110 0x1aa8 4 +me_vmult_float_emulated.h 111 0x1aa8 5 +me_vmult_float_emulated.h 111 0x1aa8 6 +me_vmult_float_emulated.h 111 0x1aa8 7 +me_vmult_float_emulated.h 112 0x1aa8 8 +me_vmult_float_emulated.h 112 0x1aa8 9 +me_vmult_float_emulated.h 113 0x1aa8 10 +me_vmult_float_emulated.h 114 0x1aa8 11 +me_vmult_float_emulated.h 114 0x1aa8 12 +me_vmult_float_emulated.h 115 0x1aa8 13 +me_vmult_float_emulated.h 115 0x1aa8 14 +me_vmult_float_emulated.h 115 0x1aa8 15 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1aa8 16 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x1aa8 17 x +accum.hpp 1108 0x1aa8 18 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x1aa8 19 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x1ab2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 80 0x1ab6 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 112 0x1aba +me_vmult_float_emulated.h 112 0x1aba 1 +me_vmult_float_emulated.h 113 0x1aba 2 +me_vmult_float_emulated.h 113 0x1ac0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x1ac4 x +add_reduce.hpp 322 0x1ac8 x +add_reduce.hpp 324 0x1ac8 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x1ad0 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 108 0x1ad4 +me_vmult_float_emulated.h 108 0x1ad4 1 +me_vmult_float_emulated.h 109 0x1ad4 2 +me_vmult_float_emulated.h 110 0x1ad4 3 +me_vmult_float_emulated.h 110 0x1ad4 4 +me_vmult_float_emulated.h 111 0x1ad4 5 +me_vmult_float_emulated.h 111 0x1ad4 6 +me_vmult_float_emulated.h 111 0x1ad4 7 +me_vmult_float_emulated.h 113 0x1ad4 8 +me_vmult_float_emulated.h 114 0x1ad4 9 +me_vmult_float_emulated.h 114 0x1ad4 10 +me_vmult_float_emulated.h 115 0x1ad4 11 +me_vmult_float_emulated.h 115 0x1ad4 12 +me_vmult_float_emulated.h 115 0x1ad4 13 +me_vmult_float_emulated.h 112 0x1ada x +me_vmult_float_emulated.h 112 0x1ada 1 x +me_vmult_float_emulated.h 113 0x1ae0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x1ae0 1 x +add_reduce.hpp 322 0x1ae8 x +add_reduce.hpp 324 0x1ae8 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x1af0 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 108 0x1af4 +me_vmult_float_emulated.h 108 0x1af4 1 +me_vmult_float_emulated.h 109 0x1af4 2 +me_vmult_float_emulated.h 110 0x1af4 3 +me_vmult_float_emulated.h 110 0x1af4 4 +me_vmult_float_emulated.h 111 0x1af4 5 +me_vmult_float_emulated.h 111 0x1af4 6 +me_vmult_float_emulated.h 111 0x1af4 7 +me_vmult_float_emulated.h 115 0x1af4 8 +me_vmult_float_emulated.h 115 0x1af4 9 +me_vmult_float_emulated.h 115 0x1af4 10 +me_vmult_float_emulated.h 113 0x1afc x +me_vmult_float_emulated.h 114 0x1afc 1 x +me_vmult_float_emulated.h 114 0x1afc 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x1b00 x +add_reduce.hpp 322 0x1b04 x +add_reduce.hpp 324 0x1b04 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x1b0c x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 108 0x1b10 +me_vmult_float_emulated.h 108 0x1b10 1 +me_vmult_float_emulated.h 109 0x1b10 2 +me_vmult_float_emulated.h 110 0x1b10 3 +me_vmult_float_emulated.h 110 0x1b10 4 +me_vmult_float_emulated.h 111 0x1b10 5 +me_vmult_float_emulated.h 111 0x1b10 6 +me_vmult_float_emulated.h 111 0x1b10 7 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x1b1a x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x1b1e x +vector.hpp 856 0x1b24 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp: +blend.hpp 170 0x1b28 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 108 0x1b2c +me_vmult_float_emulated.h 108 0x1b2c 1 +me_vmult_float_emulated.h 109 0x1b30 +me_vmult_float_emulated.h 110 0x1b30 1 +me_vmult_float_emulated.h 110 0x1b30 2 +me_vmult_float_emulated.h 111 0x1b30 3 +me_vmult_float_emulated.h 111 0x1b30 4 +me_vmult_float_emulated.h 111 0x1b30 5 +me_vmult_float_emulated.h 108 0x1b34 x +me_vmult_float_emulated.h 108 0x1b34 1 x +me_vmult_float_emulated.h 111 0x1b34 2 +me_vmult_float_emulated.h 111 0x1b34 3 +me_vmult_float_emulated.h 111 0x1b34 4 +me_vmult_float_emulated.h 109 0x1b3e x +me_vmult_float_emulated.h 124 0x1b42 x +me_vmult_float_emulated.h 109 0x1b4e x +me_vmult_float_emulated.h 110 0x1b4e 1 x +me_vmult_float_emulated.h 110 0x1b4e 2 x +me_vmult_float_emulated.h 115 0x1b52 x +me_vmult_float_emulated.h 111 0x1b56 x +me_vmult_float_emulated.h 115 0x1b62 x +me_vmult_float_emulated.h 115 0x1b62 1 x +me_vmult_float_emulated.h 115 0x1b62 2 x +me_vmult_float_emulated.h 111 0x1b66 x +me_vmult_float_emulated.h 111 0x1b66 1 x +me_vmult_float_emulated.h 111 0x1b66 2 x +me_vmult_float_emulated.h 117 0x1b6c x +me_vmult_float_emulated.h 118 0x1b70 x +me_vmult_float_emulated.h 119 0x1b7a x +me_vmult_float_emulated.h 117 0x1b7e x +me_vmult_float_emulated.h 118 0x1b82 x +me_vmult_float_emulated.h 118 0x1b86 +me_vmult_float_emulated.h 120 0x1b90 x +me_vmult_float_emulated.h 118 0x1b94 x +me_vmult_float_emulated.h 119 0x1b98 x +me_vmult_float_emulated.h 119 0x1b9c +me_vmult_float_emulated.h 121 0x1ba6 x +me_vmult_float_emulated.h 119 0x1baa x +me_vmult_float_emulated.h 120 0x1bae x +me_vmult_float_emulated.h 120 0x1bb2 +me_vmult_float_emulated.h 120 0x1bbe +me_vmult_float_emulated.h 121 0x1bc2 x +me_vmult_float_emulated.h 121 0x1bc6 +me_vmult_float_emulated.h 122 0x1bce x +me_vmult_float_emulated.h 121 0x1bd4 x +me_vmult_float_emulated.h 122 0x1bd8 x +me_vmult_float_emulated.h 122 0x1bdc +me_vmult_float_emulated.h 123 0x1be4 x +me_vmult_float_emulated.h 122 0x1bea x +me_vmult_float_emulated.h 123 0x1bee x +me_vmult_float_emulated.h 123 0x1bf2 +me_vmult_float_emulated.h 123 0x1bfe +me_vmult_float_emulated.h 124 0x1bfe 1 x +me_vmult_float_emulated.h 124 0x1c06 +me_vmult_float_emulated.h 125 0x1c06 1 x +me_vmult_float_emulated.h 125 0x1c14 +me_vmult_float_emulated.h 124 0x1c18 x +me_vmult_float_emulated.h 125 0x1c2a x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1c30 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1108 0x1c30 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp: +blend.hpp 163 0x1c40 +blend.hpp 170 0x1c4a + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 184 0x1c80 +reduce_mean_c8_impl.h 184 0x1c84 x +reduce_mean_c8_impl.h 184 0x1c88 +reduce_mean_c8_impl.h 184 0x1c9c +reduce_mean_c8_impl.h 184 0x1ca6 +reduce_mean_c8_impl.h 184 0x1caa +reduce_mean_c8_impl.h 184 0x1cba +reduce_mean_c8_impl.h 184 0x1cbe +reduce_mean_c8_impl.h 200 0x1cc4 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp: +blend.hpp 170 0x1ce0 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 57 0x1cea + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp: +blend.hpp 170 0x1cea 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 268 0x1cea 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp: +blend.hpp 163 0x1cf0 +blend.hpp 170 0x1d06 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 57 0x1d0c + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 268 0x1d0c 1 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 108 0x1d20 +me_vmult_float_emulated.h 108 0x1d20 1 +me_vmult_float_emulated.h 109 0x1d20 2 +me_vmult_float_emulated.h 110 0x1d20 3 +me_vmult_float_emulated.h 110 0x1d20 4 +me_vmult_float_emulated.h 111 0x1d20 5 +me_vmult_float_emulated.h 111 0x1d20 6 +me_vmult_float_emulated.h 111 0x1d20 7 +me_vmult_float_emulated.h 112 0x1d20 8 +me_vmult_float_emulated.h 112 0x1d20 9 +me_vmult_float_emulated.h 113 0x1d20 10 +me_vmult_float_emulated.h 114 0x1d20 11 +me_vmult_float_emulated.h 114 0x1d20 12 +me_vmult_float_emulated.h 115 0x1d20 13 +me_vmult_float_emulated.h 115 0x1d20 14 +me_vmult_float_emulated.h 115 0x1d20 15 +me_vmult_float_emulated.h 109 0x1d2a +me_vmult_float_emulated.h 111 0x1d2a 1 +me_vmult_float_emulated.h 113 0x1d2a 2 +me_vmult_float_emulated.h 115 0x1d2a 3 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 322 0x1d2a 4 +add_reduce.hpp 322 0x1d2a 5 +add_reduce.hpp 322 0x1d2a 6 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp: +blend.hpp 163 0x1d2a 7 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 57 0x1d34 +vector.hpp 57 0x1d34 1 +vector.hpp 1139 0x1d34 2 +vector.hpp 1280 0x1d34 3 +vector.hpp 1287 0x1d34 4 +vector.hpp 1288 0x1d34 5 +vector.hpp 1292 0x1d34 6 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp: +blend.hpp 170 0x1d34 7 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 226 0x1d34 8 +reduce_mean_c8_impl.h 268 0x1d34 9 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 57 0x1d3e + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp: +blend.hpp 170 0x1d3e 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 268 0x1d3e 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp: +blend.hpp 170 0x1d44 +blend.hpp 170 0x1d48 +blend.hpp 170 0x1d5a + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 57 0x1d60 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 268 0x1d60 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 472 0x1d70 +superkernels.cpp 472 0x1d70 1 x +superkernels.cpp 477 0x1d76 +superkernels.cpp 477 0x1d80 x +superkernels.cpp 474 0x1d8a x +superkernels.cpp 569 0x1d8a 1 +superkernels.cpp 474 0x1d94 +superkernels.cpp 477 0x1da4 x +superkernels.cpp 477 0x1da4 1 x +superkernels.cpp 474 0x1db6 +superkernels.cpp 474 0x1dbc x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x1dc0 +io_buffer_main.h 218 0x1dc0 1 +io_buffer_main.h 324 0x1dc0 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x1dc0 3 +tile.hpp 74 0x1dc0 4 +tile.hpp 74 0x1dcc x +tile.hpp 86 0x1dcc 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 483 0x1dd6 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x1dd6 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 480 0x1ddc x +superkernels.cpp 480 0x1de2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x1dec + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 481 0x1e00 +superkernels.cpp 487 0x1e00 1 +superkernels.cpp 481 0x1e0a +superkernels.cpp 481 0x1e0a 1 x +superkernels.cpp 481 0x1e14 +superkernels.cpp 481 0x1e14 1 +superkernels.cpp 481 0x1e1e +superkernels.cpp 482 0x1e1e 1 +superkernels.cpp 481 0x1e28 +superkernels.cpp 482 0x1e28 1 x +superkernels.cpp 481 0x1e32 x +superkernels.cpp 483 0x1e32 1 +superkernels.cpp 483 0x1e38 +superkernels.cpp 487 0x1e3c +superkernels.cpp 483 0x1e42 +superkernels.cpp 481 0x1e48 +superkernels.cpp 491 0x1e4c +superkernels.cpp 481 0x1e52 +superkernels.cpp 482 0x1e52 1 x +superkernels.cpp 481 0x1e5a x +superkernels.cpp 481 0x1e60 +superkernels.cpp 483 0x1e64 x +superkernels.cpp 487 0x1e68 x +superkernels.cpp 487 0x1e6c +superkernels.cpp 487 0x1e70 +superkernels.cpp 487 0x1e74 +superkernels.cpp 487 0x1e78 +superkernels.cpp 487 0x1e7c +superkernels.cpp 483 0x1e80 x +superkernels.cpp 487 0x1e84 x +superkernels.cpp 487 0x1e88 +superkernels.cpp 487 0x1e8c +superkernels.cpp 491 0x1e90 x +superkernels.cpp 491 0x1ea0 +superkernels.cpp 491 0x1ea4 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x1eaa +io_buffer_main.h 218 0x1eaa 1 +io_buffer_main.h 324 0x1eaa 2 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 491 0x1eb8 +superkernels.cpp 491 0x1ed6 +superkernels.cpp 491 0x1ef0 +superkernels.cpp 491 0x1f00 +superkernels.cpp 491 0x1f10 +superkernels.cpp 491 0x1f16 +superkernels.cpp 491 0x1f1a +superkernels.cpp 491 0x1f20 +superkernels.cpp 491 0x1f30 +superkernels.cpp 491 0x1f30 1 +superkernels.cpp 491 0x1f30 2 +superkernels.cpp 491 0x1f3a +superkernels.cpp 492 0x1f3a 1 +superkernels.cpp 492 0x1f3a 2 +superkernels.cpp 498 0x1f44 +superkernels.cpp 498 0x1f44 1 +superkernels.cpp 499 0x1f4e +superkernels.cpp 505 0x1f54 +superkernels.cpp 508 0x1f54 1 +superkernels.cpp 511 0x1f54 2 +superkernels.cpp 491 0x1f5c +superkernels.cpp 491 0x1f60 +superkernels.cpp 491 0x1f64 +superkernels.cpp 491 0x1f6a +superkernels.cpp 492 0x1f72 x +superkernels.cpp 494 0x1f82 x +superkernels.cpp 495 0x1f86 x +superkernels.cpp 496 0x1f8a x +superkernels.cpp 498 0x1f8e x +superkernels.cpp 498 0x1f9e +superkernels.cpp 499 0x1fa2 x +superkernels.cpp 499 0x1fb2 +superkernels.cpp 500 0x1fb6 x +superkernels.cpp 500 0x1fc2 +superkernels.cpp 500 0x1fd0 +superkernels.cpp 505 0x1fe0 +superkernels.cpp 508 0x1fe0 1 +superkernels.cpp 511 0x1fe0 2 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x1fea +io_buffer_main.h 218 0x1fea 1 +io_buffer_main.h 324 0x1fea 2 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 505 0x1ff0 x +superkernels.cpp 505 0x1ff0 1 +superkernels.cpp 505 0x2002 +superkernels.cpp 505 0x2006 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x200c x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 505 0x2018 +superkernels.cpp 505 0x201e x +superkernels.cpp 505 0x201e 1 +superkernels.cpp 505 0x2028 +superkernels.cpp 505 0x2030 +superkernels.cpp 505 0x2036 +superkernels.cpp 505 0x203c +superkernels.cpp 505 0x2040 +superkernels.cpp 505 0x2040 1 +superkernels.cpp 505 0x2046 +superkernels.cpp 505 0x2050 +superkernels.cpp 505 0x2050 1 +superkernels.cpp 505 0x2056 +superkernels.cpp 505 0x205a +superkernels.cpp 505 0x205a 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x206a +io_buffer_main.h 395 0x206a 1 +io_buffer_main.h 218 0x2070 x +io_buffer_main.h 218 0x2074 +io_buffer_main.h 218 0x2078 +io_buffer_main.h 235 0x207e x +io_buffer_main.h 218 0x208a x +io_buffer_main.h 218 0x208a 1 x +io_buffer_main.h 218 0x208e +io_buffer_main.h 395 0x209a x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 508 0x20a6 x +superkernels.cpp 508 0x20b0 +superkernels.cpp 522 0x20b0 1 +superkernels.cpp 558 0x20b0 2 +superkernels.cpp 508 0x20be +superkernels.cpp 508 0x20c2 +superkernels.cpp 508 0x20d2 +superkernels.cpp 508 0x20d8 +superkernels.cpp 508 0x20d8 1 +superkernels.cpp 508 0x20e2 +superkernels.cpp 508 0x20ea +superkernels.cpp 508 0x20f0 +superkernels.cpp 508 0x20f6 +superkernels.cpp 508 0x20fa +superkernels.cpp 508 0x20fa 1 +superkernels.cpp 508 0x2100 +superkernels.cpp 508 0x2110 +superkernels.cpp 508 0x2110 1 +superkernels.cpp 508 0x2116 +superkernels.cpp 508 0x211a +superkernels.cpp 508 0x211a 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x212a +io_buffer_main.h 395 0x212a 1 +io_buffer_main.h 218 0x2130 x +io_buffer_main.h 218 0x2134 +io_buffer_main.h 218 0x2138 +io_buffer_main.h 235 0x213e x +io_buffer_main.h 218 0x214a x +io_buffer_main.h 218 0x214a 1 x +io_buffer_main.h 218 0x214e +io_buffer_main.h 395 0x215a x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 511 0x2166 x +superkernels.cpp 511 0x2166 1 +superkernels.cpp 511 0x217a +superkernels.cpp 511 0x217e +superkernels.cpp 511 0x2182 +superkernels.cpp 511 0x2188 +superkernels.cpp 511 0x2194 +superkernels.cpp 511 0x2198 +superkernels.cpp 511 0x2198 1 +superkernels.cpp 511 0x219e +superkernels.cpp 511 0x21a6 +superkernels.cpp 511 0x21b0 +superkernels.cpp 511 0x21b4 +superkernels.cpp 511 0x21b4 1 +superkernels.cpp 511 0x21ba +superkernels.cpp 511 0x21c0 +superkernels.cpp 511 0x21c0 1 +superkernels.cpp 511 0x21c6 +superkernels.cpp 511 0x21ca +superkernels.cpp 511 0x21ca 1 +superkernels.cpp 516 0x21da +superkernels.cpp 522 0x21da 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x21da 2 x +io_buffer_main.h 395 0x21da 3 +io_buffer_main.h 218 0x21e4 +io_buffer_main.h 218 0x21e8 +io_buffer_main.h 235 0x21ee x +io_buffer_main.h 218 0x21fa x +io_buffer_main.h 218 0x21fa 1 x +io_buffer_main.h 218 0x21fe +io_buffer_main.h 395 0x220e x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 516 0x2226 +superkernels.cpp 522 0x2226 1 +superkernels.cpp 516 0x2240 +superkernels.cpp 522 0x2240 1 +superkernels.cpp 516 0x2250 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x2250 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 516 0x225a x +superkernels.cpp 522 0x225a 1 +superkernels.cpp 514 0x2264 +superkernels.cpp 522 0x2264 1 x +superkernels.cpp 514 0x226e x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x2278 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 516 0x227c x +superkernels.cpp 522 0x2280 x +superkernels.cpp 522 0x2284 +superkernels.cpp 514 0x228a x +superkernels.cpp 514 0x228e +superkernels.cpp 516 0x2294 x +superkernels.cpp 516 0x2298 +superkernels.cpp 522 0x2298 1 +superkernels.cpp 522 0x229e x +superkernels.cpp 522 0x22a2 +superkernels.cpp 522 0x22b2 +superkernels.cpp 522 0x22b6 +superkernels.cpp 523 0x22bc +superkernels.cpp 523 0x22ca x +superkernels.cpp 523 0x22ca 1 +superkernels.cpp 523 0x22d4 +superkernels.cpp 524 0x22d4 1 +superkernels.cpp 524 0x22de +superkernels.cpp 524 0x22de 1 x +superkernels.cpp 523 0x22ee x +superkernels.cpp 524 0x22f4 x +superkernels.cpp 524 0x22f4 1 x +superkernels.cpp 524 0x22fa +superkernels.cpp 524 0x22fe +superkernels.cpp 524 0x2302 +superkernels.cpp 524 0x2306 +superkernels.cpp 525 0x230a x +superkernels.cpp 526 0x230e x +superkernels.cpp 547 0x2312 x +superkernels.cpp 525 0x2318 +superkernels.cpp 525 0x231e x +superkernels.cpp 554 0x232e +superkernels.cpp 558 0x232e 1 +superkernels.cpp 552 0x2338 +superkernels.cpp 554 0x2338 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x2338 2 +io_buffer_main.h 327 0x2338 3 +io_buffer_main.h 425 0x2338 4 +io_buffer_main.h 425 0x2338 5 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 554 0x2342 +superkernels.cpp 555 0x2342 1 +superkernels.cpp 558 0x2342 2 +superkernels.cpp 559 0x2342 3 +superkernels.cpp 562 0x2342 4 +superkernels.cpp 563 0x2342 5 +superkernels.cpp 567 0x2342 6 +superkernels.cpp 554 0x2356 +superkernels.cpp 558 0x2356 1 +superkernels.cpp 552 0x2360 +superkernels.cpp 554 0x2360 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x2360 2 +io_buffer_main.h 327 0x2360 3 +io_buffer_main.h 425 0x2360 4 +io_buffer_main.h 425 0x2360 5 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 554 0x236a +superkernels.cpp 555 0x236a 1 +superkernels.cpp 558 0x236a 2 +superkernels.cpp 559 0x236a 3 +superkernels.cpp 562 0x236a 4 +superkernels.cpp 563 0x236a 5 +superkernels.cpp 567 0x236a 6 +superkernels.cpp 532 0x2380 +superkernels.cpp 533 0x2380 1 +superkernels.cpp 554 0x2380 2 +superkernels.cpp 555 0x2380 3 +superkernels.cpp 558 0x2380 4 +superkernels.cpp 559 0x2380 5 +superkernels.cpp 562 0x2380 6 +superkernels.cpp 563 0x2380 7 +superkernels.cpp 567 0x2380 8 +superkernels.cpp 532 0x238a x +superkernels.cpp 532 0x238a 1 +superkernels.cpp 552 0x238a 2 +superkernels.cpp 532 0x2394 +superkernels.cpp 533 0x2394 1 +superkernels.cpp 533 0x239e x +superkernels.cpp 554 0x239e 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x239e 2 +io_buffer_main.h 327 0x239e 3 +io_buffer_main.h 425 0x239e 4 +io_buffer_main.h 425 0x239e 5 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 532 0x23ae x +superkernels.cpp 533 0x23b4 x +superkernels.cpp 533 0x23b4 1 x +superkernels.cpp 533 0x23ba +superkernels.cpp 533 0x23be +superkernels.cpp 533 0x23c2 +superkernels.cpp 533 0x23c6 +superkernels.cpp 534 0x23ca x +superkernels.cpp 535 0x23ce x +superkernels.cpp 547 0x23d2 x +superkernels.cpp 534 0x23d8 +superkernels.cpp 534 0x23de x +superkernels.cpp 554 0x23e6 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x23f0 +io_buffer_main.h 324 0x23f0 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 541 0x2410 +superkernels.cpp 541 0x2416 x +superkernels.cpp 541 0x2416 1 +superkernels.cpp 541 0x2420 +superkernels.cpp 542 0x2420 1 +superkernels.cpp 542 0x242a x +superkernels.cpp 541 0x2438 x +superkernels.cpp 542 0x243e x +superkernels.cpp 542 0x243e 1 x +superkernels.cpp 542 0x2444 +superkernels.cpp 542 0x2448 +superkernels.cpp 542 0x244c +superkernels.cpp 542 0x244c 1 +superkernels.cpp 542 0x2452 +superkernels.cpp 543 0x2456 x +superkernels.cpp 544 0x245a x +superkernels.cpp 547 0x245e x +superkernels.cpp 543 0x2464 +superkernels.cpp 543 0x246a x +superkernels.cpp 554 0x2480 +superkernels.cpp 558 0x2480 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x2480 2 +io_buffer_main.h 125 0x2480 3 x +io_buffer_main.h 324 0x2480 4 +io_buffer_main.h 327 0x2480 5 +io_buffer_main.h 327 0x2480 6 +io_buffer_main.h 425 0x2480 7 +io_buffer_main.h 425 0x2480 8 +io_buffer_main.h 125 0x248c + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h: +pad_3d.h 287 0x2494 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 552 0x249a +superkernels.cpp 554 0x249e +superkernels.cpp 555 0x249e 1 +superkernels.cpp 558 0x249e 2 +superkernels.cpp 559 0x249e 3 +superkernels.cpp 562 0x249e 4 +superkernels.cpp 563 0x249e 5 +superkernels.cpp 567 0x249e 6 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h: +pad_3d.h 287 0x24a6 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 552 0x24b0 +superkernels.cpp 552 0x24b0 1 +superkernels.cpp 554 0x24ba + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x24c0 x +io_buffer_main.h 324 0x24c0 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 552 0x24c4 x +superkernels.cpp 554 0x24e0 x +superkernels.cpp 554 0x24f0 +superkernels.cpp 554 0x24f4 +superkernels.cpp 554 0x2504 +superkernels.cpp 555 0x2504 1 +superkernels.cpp 554 0x250a +superkernels.cpp 554 0x250a 1 +superkernels.cpp 554 0x2514 +superkernels.cpp 554 0x251e +superkernels.cpp 554 0x2526 +superkernels.cpp 554 0x252a +superkernels.cpp 554 0x252a 1 +superkernels.cpp 554 0x2530 +superkernels.cpp 554 0x2530 1 +superkernels.cpp 554 0x2536 +superkernels.cpp 554 0x2540 +superkernels.cpp 554 0x2540 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x2540 2 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 554 0x254a +superkernels.cpp 554 0x254e +superkernels.cpp 554 0x254e 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x2554 +io_buffer_main.h 327 0x2554 1 +io_buffer_main.h 327 0x2554 2 +io_buffer_main.h 425 0x2554 3 +io_buffer_main.h 425 0x2554 4 +io_buffer_main.h 425 0x2554 5 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 555 0x2560 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x2560 1 x +io_buffer_main.h 425 0x2572 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 558 0x2576 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x2576 1 x +io_buffer_main.h 327 0x2590 +io_buffer_main.h 327 0x2594 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 558 0x25a0 +superkernels.cpp 558 0x25b0 x +superkernels.cpp 558 0x25c0 +superkernels.cpp 558 0x25ce +superkernels.cpp 558 0x25d2 +superkernels.cpp 558 0x25d8 +superkernels.cpp 559 0x25d8 1 +superkernels.cpp 558 0x25de +superkernels.cpp 558 0x25ea +superkernels.cpp 558 0x25ee +superkernels.cpp 558 0x25f8 +superkernels.cpp 558 0x2600 +superkernels.cpp 558 0x2604 +superkernels.cpp 558 0x2604 1 +superkernels.cpp 558 0x260a +superkernels.cpp 558 0x260a 1 +superkernels.cpp 558 0x2610 +superkernels.cpp 558 0x2620 +superkernels.cpp 558 0x2620 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x2620 2 +io_buffer_main.h 324 0x2620 3 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 558 0x262a +superkernels.cpp 558 0x262e +superkernels.cpp 558 0x262e 1 +superkernels.cpp 562 0x2634 +superkernels.cpp 559 0x2642 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x2642 1 x +io_buffer_main.h 425 0x2654 x +io_buffer_main.h 327 0x2658 x +io_buffer_main.h 327 0x2668 +io_buffer_main.h 327 0x266c +io_buffer_main.h 324 0x2676 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 562 0x2690 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x2690 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 562 0x26a0 x +superkernels.cpp 562 0x26a0 1 +superkernels.cpp 562 0x26b2 +superkernels.cpp 562 0x26b6 +superkernels.cpp 562 0x26bc +superkernels.cpp 562 0x26ca +superkernels.cpp 562 0x26ca 1 +superkernels.cpp 562 0x26d4 +superkernels.cpp 562 0x26de +superkernels.cpp 562 0x26e6 +superkernels.cpp 562 0x26ea +superkernels.cpp 562 0x26ea 1 +superkernels.cpp 562 0x26f0 +superkernels.cpp 562 0x26f0 1 +superkernels.cpp 562 0x26f6 +superkernels.cpp 562 0x2700 +superkernels.cpp 562 0x2700 1 +superkernels.cpp 562 0x2706 +superkernels.cpp 562 0x270a +superkernels.cpp 562 0x270a 1 +superkernels.cpp 563 0x2710 +superkernels.cpp 563 0x271e x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x271e 1 x +io_buffer_main.h 425 0x2730 x +io_buffer_main.h 327 0x2734 x +io_buffer_main.h 327 0x2744 +io_buffer_main.h 327 0x2748 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 566 0x2750 +superkernels.cpp 567 0x2750 1 +superkernels.cpp 566 0x2756 x +superkernels.cpp 566 0x2756 1 +superkernels.cpp 566 0x2760 +superkernels.cpp 566 0x2770 +superkernels.cpp 566 0x2774 +superkernels.cpp 567 0x278a x +superkernels.cpp 569 0x2790 +superkernels.cpp 569 0x279e x +superkernels.cpp 569 0x27a6 +superkernels.cpp 554 0x27c0 +superkernels.cpp 555 0x27c0 1 +superkernels.cpp 558 0x27c0 2 +superkernels.cpp 559 0x27c0 3 +superkernels.cpp 562 0x27c0 4 +superkernels.cpp 563 0x27c0 5 +superkernels.cpp 567 0x27c0 6 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x27c0 7 +io_buffer_main.h 324 0x27c0 8 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 554 0x27cc +superkernels.cpp 558 0x27cc 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x27cc 2 +io_buffer_main.h 327 0x27cc 3 +io_buffer_main.h 425 0x27cc 4 +io_buffer_main.h 425 0x27cc 5 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 552 0x27d2 +superkernels.cpp 554 0x27d8 +superkernels.cpp - 0x27d9 + + +superkernels.cpp: +File name Line number Starting address View Stmt + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable2/src/0_0_reloadable2.cc: +0_0_reloadable2.cc 29 0x930 x +0_0_reloadable2.cc 31 0x930 1 x +0_0_reloadable2.cc 29 0x936 +0_0_reloadable2.cc 31 0x93c + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0x93c 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable2/src/0_0_reloadable2.cc: +0_0_reloadable2.cc 17 0x944 +0_0_reloadable2.cc 31 0x944 1 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 590 0x956 x +io_buffer_compiler.h 590 0x95a +io_buffer_compiler.h 590 0x95e +io_buffer_compiler.h 590 0x962 +io_buffer_compiler.h 590 0x966 +io_buffer_compiler.h 195 0x976 x +io_buffer_compiler.h 195 0x976 1 x +io_buffer_compiler.h 194 0x97a x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 410 0x97e +io_buffer_main.h 410 0x988 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable2/src/0_0_reloadable2.cc: +0_0_reloadable2.cc 17 0x992 x +0_0_reloadable2.cc 18 0x996 x +0_0_reloadable2.cc 19 0x99a x +0_0_reloadable2.cc 16 0x99e x +0_0_reloadable2.cc 38 0x9b0 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 440 0x9b4 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 605 0x9c2 x +io_buffer_compiler.h 605 0x9c6 +io_buffer_compiler.h 606 0x9ca + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 440 0x9ca 1 +io_buffer_main.h 440 0x9d8 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable2/src/0_0_reloadable2.cc: +0_0_reloadable2.cc 41 0x9dc + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0x9dc 1 +io_buffer_compiler.h 606 0x9e2 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable2/src/0_0_reloadable2.cc: +0_0_reloadable2.cc 41 0x9f0 x +0_0_reloadable2.cc 41 0x9f8 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0x9fc x +io_buffer_compiler.h 606 0xa00 +io_buffer_compiler.h 606 0xa04 +io_buffer_compiler.h - 0xa05 + + +CU: me_div.c: +File name Line number Starting address View Stmt + +./me_div.c:[++] +me_div.c 108 0x27f0 +me_div.c 108 0x27f0 1 +me_div.c 115 0x27f0 2 x +me_div.c 108 0x27f6 +me_div.c 108 0x27fa +me_div.c 108 0x27fe +me_div.c 108 0x2802 +me_div.c 108 0x2806 +me_div.c 108 0x280a +me_div.c 108 0x280e +me_div.c 108 0x2812 +me_div.c 108 0x2816 +me_div.c 108 0x281a +me_div.c 108 0x281e +me_div.c 108 0x2822 +me_div.c 108 0x2826 +me_div.c 108 0x282a +me_div.c 108 0x282e +me_div.c 108 0x2832 +me_div.c 108 0x2836 +me_div.c 108 0x283a +me_div.c 108 0x283e +me_div.c 108 0x2842 +me_div.c 108 0x2846 +me_div.c 108 0x284a +me_div.c 108 0x284e +me_div.c 108 0x2852 +me_div.c 108 0x2856 +me_div.c 108 0x285a +me_div.c 108 0x285e +me_div.c 108 0x2862 +me_div.c 119 0x2866 x +me_div.c 108 0x286a x +me_div.c 108 0x286e +me_div.c 108 0x2872 +me_div.c 108 0x2876 +me_div.c - 0x2877 + + +CU: No directory table +CU: Empty file name table + - 0x1 + + +CU: softfloat-specialize: +File name Line number Starting address View Stmt + +./softfloat-specialize:[++] +softfloat-specialize 78 0x2880 +softfloat-specialize 137 0x2880 1 +softfloat-specialize 139 0x2880 2 +softfloat-specialize 143 0x2880 3 x +softfloat-specialize 137 0x288a +softfloat-specialize 139 0x288a 1 +softfloat-specialize 140 0x288a 2 +softfloat-specialize 141 0x288a 3 +softfloat-specialize 78 0x2894 +softfloat-specialize 137 0x2894 1 +softfloat-specialize 139 0x2894 2 +softfloat-specialize 140 0x2894 3 x +softfloat-specialize 141 0x289e x +softfloat-specialize 137 0x28a2 x +softfloat-specialize 139 0x28a6 x +softfloat-specialize 139 0x28aa +softfloat-specialize 137 0x28ae x +softfloat-specialize 137 0x28b2 +softfloat-specialize 78 0x28b6 x +softfloat-specialize 78 0x28ba +softfloat-specialize 143 0x28be x +softfloat-specialize 137 0x28c2 +softfloat-specialize 139 0x28c2 1 +softfloat-specialize 139 0x28c8 x +softfloat-specialize 139 0x28cc +softfloat-specialize 137 0x28d0 x +softfloat-specialize 137 0x28d4 +softfloat-specialize 143 0x28d8 x +softfloat-specialize 137 0x28dc x +softfloat-specialize 139 0x28e0 x +softfloat-specialize 143 0x28e4 x +softfloat-specialize 139 0x28e8 x +softfloat-specialize 143 0x28ec x + +./softfloat.c:[++] +softfloat.c 154 0x28f0 x +softfloat.c 161 0x28f0 1 +softfloat.c 203 0x28f0 2 +softfloat.c 161 0x28fa x +softfloat.c 171 0x28fa 1 +softfloat.c 174 0x28fa 2 +softfloat.c 178 0x28fa 3 +softfloat.c 194 0x28fa 4 +softfloat.c 162 0x290c x +softfloat.c 164 0x290c 1 x +softfloat.c 182 0x2912 +softfloat.c 185 0x2912 1 +softfloat.c 202 0x2912 2 +softfloat.c 165 0x291e +softfloat.c 171 0x291e 1 +softfloat.c 171 0x291e 2 +softfloat.c 174 0x291e 3 +softfloat.c 174 0x291e 4 +softfloat.c 165 0x2928 +softfloat.c 171 0x2928 1 x +softfloat.c 171 0x292e +softfloat.c 174 0x2932 x +softfloat.c 170 0x2936 +softfloat.c 174 0x2936 1 +softfloat.c 170 0x293c x +softfloat.c 170 0x293c 1 x +softfloat.c 165 0x2940 x +softfloat.c 165 0x2944 +softfloat.c 179 0x2950 +softfloat.c 179 0x2950 1 x +softfloat.c 180 0x2950 2 +softfloat.c 181 0x2950 3 +softfloat.c 179 0x2956 +softfloat.c 179 0x295a +softfloat.c 178 0x2960 x + +./softfloat-macros:[++] +softfloat-macros 50 0x2964 + +./softfloat.c:[++] +softfloat.c 128 0x2964 1 +softfloat.c 128 0x2968 x +softfloat.c 181 0x2970 x +softfloat.c 182 0x2970 1 x +softfloat.c 182 0x2970 2 +softfloat.c 182 0x297a +softfloat.c 180 0x297e x +softfloat.c 182 0x2982 x +softfloat.c 181 0x2986 x +softfloat.c 180 0x298a x + +./softfloat-macros:[++] +softfloat-macros 50 0x2990 + +./softfloat.c:[++] +softfloat.c 187 0x2990 1 +softfloat.c 192 0x2990 2 +softfloat.c 204 0x2990 3 +softfloat.c 204 0x2990 4 +softfloat.c 187 0x299c x +softfloat.c 187 0x29a0 +softfloat.c 192 0x29b0 x + +./softfloat-macros:[++] +softfloat-macros 46 0x29b4 x +softfloat-macros 46 0x29b4 1 x +softfloat-macros 49 0x29c4 +softfloat-macros 50 0x29c4 1 x +softfloat-macros 50 0x29ca +softfloat-macros 50 0x29ce +softfloat-macros 50 0x29d2 +softfloat-macros 49 0x29d6 x +softfloat-macros 50 0x29da x +softfloat-macros 53 0x29de x +softfloat-macros 50 0x29e2 x +softfloat-macros 49 0x29e6 x + +./softfloat.c:[++] +softfloat.c 194 0x29f6 x +softfloat.c 204 0x29fa +softfloat.c 204 0x29fa 1 +softfloat.c 204 0x2a10 +softfloat.c 204 0x2a10 1 +softfloat.c 202 0x2a20 x +softfloat.c 202 0x2a20 1 +softfloat.c 203 0x2a20 2 x +softfloat.c 128 0x2a2a +softfloat.c 203 0x2a2a 1 +softfloat.c 203 0x2a2a 2 +softfloat.c 203 0x2a34 +softfloat.c 202 0x2a38 +softfloat.c 203 0x2a3c +softfloat.c 205 0x2a40 x +softfloat.c 203 0x2a44 x +softfloat.c 204 0x2a48 x +softfloat.c 204 0x2a48 1 x +softfloat.c 128 0x2a4c x +softfloat.c 128 0x2a50 +softfloat.c 128 0x2a54 +softfloat.c 185 0x2a60 x +softfloat.c 128 0x2a64 +softfloat.c 128 0x2a6a x +softfloat.c 185 0x2a6e x +softfloat.c 185 0x2a72 +softfloat.c 218 0x2a80 x +softfloat.c 224 0x2a80 1 x + +./softfloat-macros:[++] +softfloat-macros 552 0x2a86 x + +./softfloat.c:[++] +softfloat.c 223 0x2a8a x +softfloat.c 224 0x2a8e x +softfloat.c 224 0x2a92 +softfloat.c 477 0x2aa0 x +softfloat.c 481 0x2aa0 1 +softfloat.c 481 0x2aa0 2 x +softfloat.c 482 0x2ab0 +softfloat.c 482 0x2ab6 x +softfloat.c 482 0x2aba +softfloat.c 484 0x2aca +softfloat.c 484 0x2aca 1 x +softfloat.c 484 0x2ad4 +softfloat.c 484 0x2ad4 1 +softfloat.c 483 0x2ad8 +softfloat.c 483 0x2adc x +softfloat.c 481 0x2af0 x +softfloat.c 482 0x2b00 x +softfloat.c 70 0x2b20 +softfloat.c 81 0x2b20 1 +softfloat.c 734 0x2b20 2 x +softfloat.c 81 0x2b2a x +softfloat.c 81 0x2b2e +softfloat.c 81 0x2b32 +softfloat.c 81 0x2b36 + +./softfloat-macros:[++] +softfloat-macros 50 0x2b3a + +./softfloat.c:[++] +softfloat.c 744 0x2b3a 1 x +softfloat.c 747 0x2b3a 2 +softfloat.c 761 0x2b3a 3 +softfloat.c 772 0x2b3a 4 +softfloat.c 788 0x2b3a 5 +softfloat.c 747 0x2b40 x +softfloat.c 747 0x2b44 +softfloat.c 70 0x2b4a x +softfloat.c 70 0x2b4e +softfloat.c 745 0x2b4e 1 +softfloat.c 746 0x2b4e 2 +softfloat.c 745 0x2b54 x +softfloat.c 746 0x2b58 x +softfloat.c 748 0x2b58 1 +softfloat.c 762 0x2b58 2 + +./softfloat-macros:[++] +softfloat-macros 50 0x2b5e + +./softfloat.c:[++] +softfloat.c 128 0x2b5e 1 +softfloat.c 748 0x2b5e 2 x +softfloat.c 761 0x2b64 x +softfloat.c 761 0x2b68 +softfloat.c 128 0x2b6e x +softfloat.c 762 0x2b7a x +softfloat.c 762 0x2b7e +softfloat.c 793 0x2b8e +softfloat.c 787 0x2b92 +softfloat.c 767 0x2b96 x +softfloat.c 766 0x2b9a x +softfloat.c 772 0x2b9e x + +./softfloat-macros:[++] +softfloat-macros 46 0x2ba2 x +softfloat-macros 46 0x2ba2 1 x + +./softfloat.c:[++] +softfloat.c 770 0x2ba8 +softfloat.c 785 0x2ba8 1 +softfloat.c 770 0x2bae x +softfloat.c 766 0x2bb2 x + +./softfloat-macros:[++] +softfloat-macros 49 0x2bba +softfloat-macros 50 0x2bba 1 x +softfloat-macros 50 0x2bc0 +softfloat-macros 50 0x2bc4 +softfloat-macros 49 0x2bc8 x +softfloat-macros 50 0x2bd2 x +softfloat-macros 50 0x2bd6 +softfloat-macros 53 0x2bda x +softfloat-macros 50 0x2bde x +softfloat-macros 49 0x2be2 x + +./softfloat.c:[++] +softfloat.c 748 0x2bf0 x +softfloat.c 756 0x2bf6 +softfloat.c 785 0x2bf6 1 +softfloat.c 793 0x2c04 +softfloat.c 753 0x2c08 x +softfloat.c 787 0x2c08 1 +softfloat.c 752 0x2c0e +softfloat.c 752 0x2c0e 1 +softfloat.c 752 0x2c12 x +softfloat.c 752 0x2c12 1 x + +./softfloat-macros:[++] +softfloat-macros 46 0x2c16 x +softfloat-macros 46 0x2c16 1 x + +./softfloat.c:[++] +softfloat.c 756 0x2c1c x +softfloat.c 752 0x2c20 x +softfloat.c 752 0x2c20 1 x + +./softfloat-macros:[++] +softfloat-macros 49 0x2c2a +softfloat-macros 50 0x2c2a 1 x +softfloat-macros 50 0x2c30 +softfloat-macros 50 0x2c34 +softfloat-macros 50 0x2c38 +softfloat-macros 49 0x2c3c x +softfloat-macros 50 0x2c40 x +softfloat-macros 53 0x2c44 x +softfloat-macros 50 0x2c48 x +softfloat-macros 49 0x2c4c x + +./softfloat.c:[++] +softfloat.c 785 0x2c50 x +softfloat.c 786 0x2c50 1 +softfloat.c 787 0x2c50 2 x +softfloat.c 786 0x2c5a x +softfloat.c 790 0x2c5a 1 x +softfloat.c 786 0x2c60 +softfloat.c 788 0x2c64 x +softfloat.c 788 0x2c68 +softfloat.c 788 0x2c6c +softfloat.c 793 0x2c70 x +softfloat.c 763 0x2c80 x +softfloat.c 764 0x2c90 x +softfloat.c 128 0x2c94 +softfloat.c 128 0x2c9a x +softfloat.c 776 0x2cb0 x +softfloat.c 780 0x2cc0 x +softfloat.c 793 0x2cd0 +softfloat.c 781 0x2cda +softfloat.c 781 0x2ce0 x +softfloat.c 793 0x2ce0 1 +softfloat.c 781 0x2ce6 +softfloat.c 749 0x2cf0 x +softfloat.c 750 0x2d00 x +softfloat.c 763 0x2d10 x +softfloat.c 777 0x2d20 x +softfloat.c 777 0x2d24 +softfloat.c 778 0x2d34 x +softfloat.c 780 0x2d50 x +softfloat.c 780 0x2d50 1 x +softfloat.c 780 0x2d56 +softfloat.c 780 0x2d5a +softfloat.c 128 0x2d5e x +softfloat.c 749 0x2d70 x +softfloat.c 777 0x2d80 x +softfloat.c 70 0x2d90 +softfloat.c 81 0x2d90 1 +softfloat.c 805 0x2d90 2 x +softfloat.c 81 0x2d9a x +softfloat.c 81 0x2d9e +softfloat.c 70 0x2da2 x +softfloat.c 81 0x2da6 x +softfloat.c 81 0x2daa +softfloat.c 70 0x2dae x +softfloat.c 816 0x2dae 1 +softfloat.c 817 0x2dae 2 +softfloat.c 816 0x2db4 x + +./softfloat-macros:[++] +softfloat-macros 50 0x2db8 + +./softfloat.c:[++] +softfloat.c 815 0x2db8 1 x +softfloat.c 818 0x2db8 2 +softfloat.c 819 0x2db8 3 +softfloat.c 843 0x2db8 4 +softfloat.c 818 0x2dbe x +softfloat.c 818 0x2dc2 +softfloat.c 817 0x2dc8 x +softfloat.c 833 0x2dcc +softfloat.c 851 0x2dcc 1 +softfloat.c 859 0x2dcc 2 +softfloat.c 862 0x2dcc 3 +softfloat.c 851 0x2dd6 x +softfloat.c 862 0x2dda x +softfloat.c 859 0x2dde x +softfloat.c 819 0x2de2 x +softfloat.c 819 0x2de6 +softfloat.c 825 0x2dec +softfloat.c 835 0x2dec 1 +softfloat.c 835 0x2df0 x +softfloat.c 833 0x2dfa x +softfloat.c 833 0x2dfe +softfloat.c 868 0x2e0e +softfloat.c 838 0x2e12 x +softfloat.c 837 0x2e16 x +softfloat.c 843 0x2e1a x + +./softfloat-macros:[++] +softfloat-macros 46 0x2e1e x +softfloat-macros 46 0x2e1e 1 x + +./softfloat.c:[++] +softfloat.c 837 0x2e24 x + +./softfloat-macros:[++] +softfloat-macros 49 0x2e30 +softfloat-macros 50 0x2e30 1 x +softfloat-macros 50 0x2e30 2 +softfloat-macros 50 0x2e3a +softfloat-macros 50 0x2e3e +softfloat-macros 53 0x2e42 x +softfloat-macros 49 0x2e46 x +softfloat-macros 50 0x2e4a x +softfloat-macros 50 0x2e4e +softfloat-macros 50 0x2e52 +softfloat-macros 49 0x2e56 x + +./softfloat.c:[++] +softfloat.c 846 0x2e66 x +softfloat.c 851 0x2e80 x +softfloat.c 867 0x2e90 +softfloat.c 868 0x2e94 +softfloat.c 855 0x2e98 +softfloat.c 855 0x2e98 1 +softfloat.c 867 0x2e9c +softfloat.c 856 0x2ea0 x +softfloat.c 855 0x2ea4 x +softfloat.c 855 0x2ea4 1 x + +./softfloat-macros:[++] +softfloat-macros 46 0x2ea8 x +softfloat-macros 46 0x2ea8 1 x + +./softfloat.c:[++] +softfloat.c 855 0x2eae x +softfloat.c 855 0x2eae 1 x + +./softfloat-macros:[++] +softfloat-macros 49 0x2eba +softfloat-macros 50 0x2eba 1 x +softfloat-macros 50 0x2eba 2 +softfloat-macros 50 0x2ec4 +softfloat-macros 50 0x2ec8 +softfloat-macros 50 0x2ecc +softfloat-macros 49 0x2ed0 x +softfloat-macros 50 0x2ed4 x +softfloat-macros 53 0x2ed8 x +softfloat-macros 50 0x2edc x +softfloat-macros 49 0x2ee0 x + +./softfloat.c:[++] +softfloat.c 864 0x2ef0 x +softfloat.c 868 0x2f00 x +softfloat.c 867 0x2f06 x +softfloat.c 820 0x2f20 x +softfloat.c 829 0x2f30 x +softfloat.c 829 0x2f34 +softfloat.c 825 0x2f3a x +softfloat.c 825 0x2f3e +softfloat.c 825 0x2f42 +softfloat.c 830 0x2f4a x +softfloat.c 830 0x2f4e +softfloat.c 128 0x2f5e +softfloat.c 831 0x2f62 +softfloat.c 831 0x2f68 x +softfloat.c 831 0x2f70 +softfloat.c 831 0x2f74 +softfloat.c 831 0x2f7c +softfloat.c 128 0x2f80 x +softfloat.c 834 0x2f90 x +softfloat.c 128 0x2fa0 +softfloat.c 835 0x2fa4 x +softfloat.c 128 0x2fa8 x +softfloat.c 128 0x2fac +softfloat.c 128 0x2fb2 +softfloat.c 852 0x2fc0 x +softfloat.c 853 0x2fd0 x +softfloat.c 821 0x2fe0 x +softfloat.c 821 0x2fe4 +softfloat.c 823 0x2ff4 x +softfloat.c 868 0x3016 +softfloat.c 864 0x301a +softfloat.c 846 0x3036 +softfloat.c 867 0x303a +softfloat.c 868 0x303e +softfloat.c 834 0x3050 x +softfloat.c 852 0x3060 x +softfloat.c 821 0x3070 x +softfloat.c 92 0x3080 +softfloat.c 878 0x3080 1 x +softfloat.c 92 0x3084 x +softfloat.c 92 0x3088 +softfloat.c 884 0x308c x +softfloat.c 884 0x3090 +softfloat.c 888 0x30a0 x +softfloat.c 885 0x30b0 x +softfloat.c - 0x30b1 + + diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable6/scripts/0_2_reloadable6.bcf b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable6/scripts/0_2_reloadable6.bcf new file mode 100644 index 0000000000000000000000000000000000000000..ac2c44e2095fee61e0bb45bf67ea52ec6719ca60 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable6/scripts/0_2_reloadable6.bcf @@ -0,0 +1,16 @@ +_reserved DMb 0x0 0x40000 + +_reserved PM 0x0 0x930 //reserved for main elf + +_entry_point _Z13kernelWrapperPPvjjjj +_symbol _Z13kernelWrapperPPvjjjj 0x930 + +_reserved DMb 0x7b280 0x800 //reserved for lcp ping-pong buffers +_reserved DMb 0x7ba80 0x40 //reserved for sync buffer +_stack DM_stack 0x7bac0 0x940 //stack for core +_reserved DMb 0x7c400 0x40 //reserved for main elf heap +//space for synopsys compiler at 0x7c440 0x880//heap +_reserved DMb 0x40000 0x3b280 + +_reserved DMb 0x7ccc0 0x3340 +_reserved DMb 0x80000 0x80000 // And everything else the core can't see diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable6/scripts/0_2_reloadable6.prx b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable6/scripts/0_2_reloadable6.prx new file mode 100644 index 0000000000000000000000000000000000000000..5e3707bb19575a3232b0261f586da1fed8993d26 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable6/scripts/0_2_reloadable6.prx @@ -0,0 +1,13 @@ + + + \ No newline at end of file diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable6/src/0_2_reloadable6.cc b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable6/src/0_2_reloadable6.cc new file mode 100644 index 0000000000000000000000000000000000000000..7b211124072bdc08c2e3d113228cd9b65f8857a3 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable6/src/0_2_reloadable6.cc @@ -0,0 +1,41 @@ +// Automatically generated processor driver using AIEngine tool-chain + +#include +#include +#include + + +// Declare Kernel functions and initializers +void superkernel_reduce_mean_c8(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict); + +// Declare Kernel objects and external arrays + + +void _b961_wrapper(void* args[]) +{ + superkernel_reduce_mean_c8( + *reinterpret_cast*>(args[0]), + *reinterpret_cast(args[2]), + *reinterpret_cast*>(args[1])); +} + +using UniformKernelFunc = void (*)(void **); + +static UniformKernelFunc g_uniformKernelFuncs[1] = { + _b961_wrapper +}; + +__attribute__((always_inline)) void kernelWrapper(void* args[], uint32 kernelId, uint32 numSyncIn, uint32 numAsyncIn, uint32 numSyncOut) +{ + uint32 idx = 0; + reinterpret_cast(args[idx])->acquire(numSyncIn > 0); + idx += (numSyncIn > 0) ? 1 : 0; + idx += numAsyncIn; + + (*(g_uniformKernelFuncs[kernelId]))(args); + + idx = 0; + reinterpret_cast(args[idx])->release(numSyncIn > 0); + idx += (numSyncIn > 0) ? 1 : 0; + idx += numAsyncIn; +} diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable7/Release/0_2_reloadable7.calltree b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable7/Release/0_2_reloadable7.calltree new file mode 100644 index 0000000000000000000000000000000000000000..a9aa937024e08d6db65ac17b5f174a0a1241e359 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable7/Release/0_2_reloadable7.calltree @@ -0,0 +1,108 @@ + +// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:49:20 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// bridge -o../Release/0_0_reloadable5 ../Release/0_0_reloadable5.o -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/isg -g -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable5.bcf -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork4008 -pme + + +// Release: ipp V-2024.06-TGT-241219 + +_Z13kernelWrapperPPvjjjj + _Z13_b896_wrapperPPv (referenced text) + _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh + _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params + _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams + _Z13_b901_wrapperPPv (referenced text) + _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E + _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E + _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E + _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E + _Z13_b906_wrapperPPv (referenced text) + _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv + _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E + _Z13_b881_wrapperPPv (referenced text) + _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv + _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv + _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E + _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE + _Z13_b891_wrapperPPv (referenced text) + _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E + _Z13_b924_wrapperPPv (referenced text) + _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE + _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh (*) + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv (*) + _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv + _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params (*) + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E (*) + _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E + _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE (*) + _Z13_b919_wrapperPPv (referenced text) + _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh + _ZN12me_primitive10udiv_dstepEjjRjS0_ + _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params + + +Call tree stack and functions sizes: + +stack stack stack call func func function name + desc level level desc +----- ----- ----- ----- ----- ----- -------------------------------------------------------------- + 64 320 0 0 390 13150 _Z13kernelWrapperPPvjjjj + 0 192 1 1 36 4714 _Z13_b896_wrapperPPv + 64 192 1 2 568 4678 _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + 64 64 2 3 1430 1430 _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh + 128 128 2 3 2410 2680 _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params + 0 0 3 4 270 270 _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams + 0 192 1 1 32 1252 _Z13_b901_wrapperPPv + 64 192 1 2 488 1220 _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + 64 128 2 3 62 304 _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv + 64 64 3 4 162 186 _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv + 0 0 4 5 24 24 _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E + 0 0 2 4 56 56 _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E + 128 128 2 3 114 428 _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E + 0 0 3 4 314 314 _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E + 0 64 1 1 32 862 _Z13_b906_wrapperPPv + 64 64 1 2 488 830 _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + 0 0 2 3 100 100 _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv + 0 0 2 3 242 242 _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E + 0 256 1 1 32 1394 _Z13_b881_wrapperPPv + 64 256 1 2 488 1362 _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + 64 64 2 3 74 190 _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv + 0 0 3 4 116 116 _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv + 64 192 2 3 150 684 _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E + 128 128 3 4 534 534 _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE + 0 128 1 1 36 1092 _Z13_b891_wrapperPPv + 64 128 1 2 602 1056 _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE + 64 64 2 3 138 162 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv + 0 0 3 4 24 24 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E + 0 0 2 3 292 292 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E + 0 192 1 1 40 6494 _Z13_b924_wrapperPPv + 64 192 1 2 1126 6454 _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE + 64 64 2 3 1430 1430 _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh + 64 64 2 3 138 162 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv (*) + 64 64 2 3 98 214 _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv + 0 0 3 4 116 116 _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv + 128 128 2 3 2410 2680 _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params (*) + 0 0 2 3 292 292 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E + 0 128 2 3 16 550 _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E + 128 128 2 4 534 534 _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE + 0 192 1 1 36 2050 _Z13_b919_wrapperPPv + 128 192 1 2 478 2014 _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + 64 64 2 3 672 814 _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh + 0 0 3 4 142 142 _ZN12me_primitive10udiv_dstepEjjRjS0_ + 0 0 2 3 722 722 _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params + + +Maximum call level : 5 +Maximum stack level: 4 +Maximum stack size : 320 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable7/Release/0_2_reloadable7.cmic2 b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable7/Release/0_2_reloadable7.cmic2 new file mode 100644 index 0000000000000000000000000000000000000000..cc24263e196c609ab062129e37812e382b48d43f --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable7/Release/0_2_reloadable7.cmic2 @@ -0,0 +1,19187 @@ + +// File generated by darts version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:49:22 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// darts -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -d -h -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable5 me + +// Release: ipp V-2024.06-TGT-241219 +.label __Z13kernelWrapperPPvjjjj___func_begin0 +.label _Z13kernelWrapperPPvjjjj +.function kernelWrapper _Z13kernelWrapperPPvjjjj +.src_ref 0 "0_0_reloadable5.cc" 94 first +.src_ref 0 "0_0_reloadable5.cc" 96 60 first +.src_ref 0 "0_0_reloadable5.cc" 96 110 +.src_ref 0 "0_0_reloadable5.cc" 98 110 +.src_ref 1 "io_buffer_compiler.h" 606 24 +.function_start + 2352 "11010100" // LDA r17, [p0]; MOV r2, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2353 "01000001" // /* MW 5 */ + 2354 "00100001" // /* MW 4 */ + 2355 "11010001" // /* MW 3 */ + 2356 "11000110" // /* MW 2 */ + 2357 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 94 + 2358 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2359 "00000001" // /* MW 5 */ + 2360 "00000000" // /* MW 4 */ + 2361 "00000000" // /* MW 3 */ + 2362 "00001000" // /* MW 2 */ + 2363 "00000000" // /* MW 1 */ + 2364 "00000010" // ST p7, [sp, #-12]; MOV r1, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2365 "01110000" // /* MW 7 */ + 2366 "11010000" // /* MW 6 */ + 2367 "00101011" // /* MW 5 */ + 2368 "00000000" // /* MW 4 */ + 2369 "10110000" // /* MW 3 */ + 2370 "11110011" // /* MW 2 */ + 2371 "11111110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 96 110 +.src_ref 0 "0_0_reloadable5.cc" 98 110 +.src_ref 1 "io_buffer_compiler.h" 606 24 + 2372 "00000010" // ST lr, [sp, #-4]; MOV r15, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2373 "01110000" // /* MW 7 */ + 2374 "10010000" // /* MW 6 */ + 2375 "11101000" // /* MW 5 */ + 2376 "00000001" // /* MW 4 */ + 2377 "10110000" // /* MW 3 */ + 2378 "10000111" // /* MW 2 */ + 2379 "11111111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 96 110 first + 2380 "01011100" // ST r1, [sp, #-8]; NEZ r16, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2381 "11100000" // /* MW 5 */ + 2382 "11000001" // /* MW 4 */ + 2383 "10110111" // /* MW 3 */ + 2384 "00000110" // /* MW 2 */ + 2385 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 + 2386 "11111000" // MOV r26, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2387 "00100000" // /* MW 3 */ + 2388 "10011000" // /* MW 2 */ + 2389 "00011110" // /* MW 1 */ + 2390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2391 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 first + 2392 "00011000" // ADD.NC p7, r17, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2393 "10000010" // /* MW 3 */ + 2394 "01101000" // /* MW 2 */ + 2395 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 + 2396 "10011000" // LDA r17, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2397 "00110110" // /* MW 3 */ + 2398 "00011110" // /* MW 2 */ + 2399 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 36 + 2400 "10011000" // LDA r19, [p7], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2401 "01110110" // /* MW 3 */ + 2402 "00111110" // /* MW 2 */ + 2403 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 46 + 2404 "10011000" // LDA r18, [p7], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2405 "01010110" // /* MW 3 */ + 2406 "11101110" // /* MW 2 */ + 2407 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 60 + 2408 "10011000" // LDA r27, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2409 "01110110" // /* MW 3 */ + 2410 "00000111" // /* MW 2 */ + 2411 "00000111" // /* MW 1 */ + 2412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2413 "00000000" // /* MW 1 */ + 2414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2415 "00000000" // /* MW 1 */ + 2416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2417 "00000000" // /* MW 1 */ + 2418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2419 "00000000" // /* MW 1 */ + 2420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2421 "00000000" // /* MW 1 */ + 2422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2423 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 195 30 first +.src_ref 1 "io_buffer_compiler.h" 195 37 first + 2424 "00011000" // SEL.EQZ r17, r17, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2425 "00110010" // /* MW 3 */ + 2426 "01100011" // /* MW 2 */ + 2427 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 194 23 first + 2428 "10011000" // ST r17, [p7, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2429 "00110001" // /* MW 3 */ + 2430 "11010110" // /* MW 2 */ + 2431 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 +.src_ref 1 "io_buffer_main.h" 410 8 + 2432 "00011000" // MOVX r17, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2433 "11111101" // /* MW 3 */ + 2434 "11100010" // /* MW 2 */ + 2435 "00010111" // /* MW 1 */ + 2436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2437 "00000000" // /* MW 1 */ + 2438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2439 "00000000" // /* MW 1 */ + 2440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2441 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 first + 2442 "00011000" // ACQ.COND r18, r17, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2443 "00011000" // /* MW 3 */ + 2444 "10010111" // /* MW 2 */ + 2445 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 98 60 +.src_ref 0 "0_0_reloadable5.cc" 102 7 + 2446 "00011000" // MOVX r18, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2447 "00001001" // /* MW 3 */ + 2448 "00100100" // /* MW 2 */ + 2449 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 98 60 first + 2450 "10011000" // LSHL r20, r16, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2451 "00101101" // /* MW 3 */ + 2452 "00101001" // /* MW 2 */ + 2453 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 98 60 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 2454 "11111000" // MOV dj0, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2455 "00100000" // /* MW 3 */ + 2456 "10001010" // /* MW 2 */ + 2457 "00011000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 98 60 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 2458 "00001100" // LDA r19, [p0, dj0]; ST dj0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2459 "10001011" // /* MW 5 */ + 2460 "11011000" // /* MW 4 */ + 2461 "11011111" // /* MW 3 */ + 2462 "01001110" // /* MW 2 */ + 2463 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2465 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2467 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2469 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2471 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 98 110 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2472 "00011000" // MOVX r19, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2473 "00000101" // /* MW 3 */ + 2474 "00100110" // /* MW 2 */ + 2475 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 98 110 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2476 "10011000" // LTU r26, r19, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2477 "11111100" // /* MW 3 */ + 2478 "11110100" // /* MW 2 */ + 2479 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 first + 2480 "00000010" // ST r26, [sp, #-16]; ADD.NC p7, r19, #4 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2481 "00000000" // /* MW 7 */ + 2482 "11000001" // /* MW 6 */ + 2483 "10110100" // /* MW 5 */ + 2484 "00000011" // /* MW 4 */ + 2485 "10110000" // /* MW 3 */ + 2486 "01101010" // /* MW 2 */ + 2487 "11111110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 + 2488 "10011000" // LDA r19, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2489 "01110110" // /* MW 3 */ + 2490 "00011110" // /* MW 2 */ + 2491 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 36 + 2492 "10011000" // LDA r21, [p7], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2493 "10110110" // /* MW 3 */ + 2494 "00111110" // /* MW 2 */ + 2495 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 46 + 2496 "10011000" // LDA r20, [p7], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2497 "10010110" // /* MW 3 */ + 2498 "11101110" // /* MW 2 */ + 2499 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 60 + 2500 "10011000" // LDA r27, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2501 "01110110" // /* MW 3 */ + 2502 "00000111" // /* MW 2 */ + 2503 "00000111" // /* MW 1 */ + 2504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2505 "00000000" // /* MW 1 */ + 2506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2507 "00000000" // /* MW 1 */ + 2508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2509 "00000000" // /* MW 1 */ + 2510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2511 "00000000" // /* MW 1 */ + 2512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2513 "00000000" // /* MW 1 */ + 2514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2515 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 195 30 first +.src_ref 1 "io_buffer_compiler.h" 195 37 first + 2516 "00011000" // SEL.EQZ r19, r19, r21, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2517 "01010010" // /* MW 3 */ + 2518 "11100111" // /* MW 2 */ + 2519 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 194 23 first + 2520 "10011000" // ST r19, [p7, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2521 "01110001" // /* MW 3 */ + 2522 "11010110" // /* MW 2 */ + 2523 "00001111" // /* MW 1 */ + 2524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2525 "00000000" // /* MW 1 */ + 2526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2527 "00000000" // /* MW 1 */ + 2528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2529 "00000000" // /* MW 1 */ + 2530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2531 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 first + 2532 "00011000" // ACQ.COND r20, r17, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2533 "00011000" // /* MW 3 */ + 2534 "00010111" // /* MW 2 */ + 2535 "00010101" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 102 7 first + 2536 "10011000" // LSHL r17, r0, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2537 "00101101" // /* MW 3 */ + 2538 "00100011" // /* MW 2 */ + 2539 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 102 7 + 2540 "11111000" // MOV dj0, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2541 "10100000" // /* MW 3 */ + 2542 "10001000" // /* MW 2 */ + 2543 "00011000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 102 7 + 2544 "01000100" // MOVXM p7, #509056 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2545 "00000000" // /* MW 5 */ + 2546 "11001001" // /* MW 4 */ + 2547 "11001110" // /* MW 3 */ + 2548 "00000111" // /* MW 2 */ + 2549 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 102 7 + 2550 "00001100" // LDA p1, [p7, dj0]; ST r16, [sp, #-24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2551 "00101011" // /* MW 5 */ + 2552 "11010100" // /* MW 4 */ + 2553 "11011111" // /* MW 3 */ + 2554 "00010011" // /* MW 2 */ + 2555 "11100000" // /* MW 1 */ + 2556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2557 "00000000" // /* MW 1 */ + 2558 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2559 "00000000" // /* MW 1 */ + 2560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2561 "00000000" // /* MW 1 */ + 2562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2563 "00000000" // /* MW 1 */ + 2564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2565 "00000000" // /* MW 1 */ + 2566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2567 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 102 4 +.no_stack_arguments + 2568 "00011000" // JL p1 /* MW 4 */ /* control_operation: words=4 call unconditional cycles_taken=1 indirect absolute delay_slots=5 */ + 2569 "01000000" // /* MW 3 */ + 2570 "00110000" // /* MW 2 */ + 2571 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 105 60 +.src_ref 0 "0_0_reloadable5.cc" 107 60 +.delay_slot + 2572 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2573 "11000000" // /* MW 3 */ + 2574 "01100000" // /* MW 2 */ + 2575 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2577 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2579 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2581 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2582 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2583 "01111110" // /* MW 9 */ + 2584 "10100101" // /* MW 8 */ + 2585 "00000001" // /* MW 7 */ + 2586 "00000000" // /* MW 6 */ + 2587 "00010000" // /* MW 5 */ + 2588 "00000000" // /* MW 4 */ + 2589 "11110000" // /* MW 3 */ + 2590 "00101100" // /* MW 2 */ + 2591 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 105 60 first +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_main.h" 440 8 +.src_ref 1 "io_buffer_main.h" 440 8 +.return_address + 2592 "00101100" // LDA r17, [p7]; MOVX r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2593 "00001010" // /* MW 5 */ + 2594 "01000000" // /* MW 4 */ + 2595 "11010000" // /* MW 3 */ + 2596 "11000110" // /* MW 2 */ + 2597 "11100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 + 2598 "00011000" // LDA r26, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2599 "01010001" // /* MW 3 */ + 2600 "11101011" // /* MW 2 */ + 2601 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 107 60 + 2602 "00011000" // LDA dj0, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2603 "01000001" // /* MW 3 */ + 2604 "11101100" // /* MW 2 */ + 2605 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_main.h" 440 8 + 2606 "00011000" // LDA el0, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2607 "00101001" // /* MW 3 */ + 2608 "11110000" // /* MW 2 */ + 2609 "00000111" // /* MW 1 */ + 2610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2611 "00000000" // /* MW 1 */ + 2612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2613 "00000000" // /* MW 1 */ + 2614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2615 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 first + 2616 "00011000" // ADD.NC p1, r17, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2617 "10001000" // /* MW 3 */ + 2618 "01101000" // /* MW 2 */ + 2619 "00011001" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 + 2620 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2621 "00110110" // /* MW 3 */ + 2622 "00000110" // /* MW 2 */ + 2623 "00000001" // /* MW 1 */ + 2624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2625 "00000000" // /* MW 1 */ + 2626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2627 "00000000" // /* MW 1 */ + 2628 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2629 "00000000" // /* MW 1 */ + 2630 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2631 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 2632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2633 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 2634 "11111000" // MOV r26, el0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2635 "00011100" // /* MW 3 */ + 2636 "10100000" // /* MW 2 */ + 2637 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2638 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2639 "00001000" // /* MW 3 */ + 2640 "01010101" // /* MW 2 */ + 2641 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_compiler.h" 606 24 first + 2642 "11010100" // LDA r17, [p1, #-4]; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2643 "01000001" // /* MW 5 */ + 2644 "10101111" // /* MW 4 */ + 2645 "11011101" // /* MW 3 */ + 2646 "11000110" // /* MW 2 */ + 2647 "00111110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 107 60 first + 2648 "10011000" // LDA r18, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2649 "01010110" // /* MW 3 */ + 2650 "00000010" // /* MW 2 */ + 2651 "00000111" // /* MW 1 */ + 2652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2653 "00000000" // /* MW 1 */ + 2654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2655 "00000000" // /* MW 1 */ + 2656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2657 "00000000" // /* MW 1 */ + 2658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2659 "00000000" // /* MW 1 */ + 2660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2661 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 first + 2662 "10011000" // SUB r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2663 "00010001" // /* MW 3 */ + 2664 "00100111" // /* MW 2 */ + 2665 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 first +.src_ref 1 "io_buffer_compiler.h" 606 24 + 2666 "00100100" // SEL.EQZ r17, r17, r19, r27; ADD.NC p0, r18, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2667 "00010000" // /* MW 5 */ + 2668 "11010010" // /* MW 4 */ + 2669 "01000000" // /* MW 3 */ + 2670 "01100110" // /* MW 2 */ + 2671 "10001100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 +.src_ref 1 "io_buffer_compiler.h" 606 22 first + 2672 "00001100" // LDA r17, [p0]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2673 "01100011" // /* MW 5 */ + 2674 "11101100" // /* MW 4 */ + 2675 "11010011" // /* MW 3 */ + 2676 "11000110" // /* MW 2 */ + 2677 "00000000" // /* MW 1 */ + 2678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2679 "00000000" // /* MW 1 */ + 2680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2681 "00000000" // /* MW 1 */ + 2682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2683 "00000000" // /* MW 1 */ + 2684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2685 "00000000" // /* MW 1 */ + 2686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2687 "00000000" // /* MW 1 */ + 2688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2689 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 first + 2690 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2691 "00001000" // /* MW 3 */ + 2692 "01010101" // /* MW 2 */ + 2693 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 110 + 2694 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2695 "00111001" // /* MW 3 */ + 2696 "11111100" // /* MW 2 */ + 2697 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 first + 2698 "10011000" // LDA r17, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2699 "00110110" // /* MW 3 */ + 2700 "11110110" // /* MW 2 */ + 2701 "00000000" // /* MW 1 */ + 2702 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2703 "10011001" // /* MW 3 */ + 2704 "11110111" // /* MW 2 */ + 2705 "00000111" // /* MW 1 */ + 2706 "00011000" // LDA r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2707 "11110001" // /* MW 3 */ + 2708 "11111001" // /* MW 2 */ + 2709 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 110 first + 2710 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2711 "00000001" // /* MW 5 */ + 2712 "00000000" // /* MW 4 */ + 2713 "00000000" // /* MW 3 */ + 2714 "11111000" // /* MW 2 */ + 2715 "11111111" // /* MW 1 */ + 2716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2717 "00000000" // /* MW 1 */ + 2718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2719 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 110 + 2720 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 2721 "00000000" // /* MW 3 */ + 2722 "00101000" // /* MW 2 */ + 2723 "00010000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.delay_slot + 2724 "11111000" // MOV r27, el0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2725 "00011100" // /* MW 3 */ + 2726 "11100000" // /* MW 2 */ + 2727 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 first +.delay_slot + 2728 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2729 "00010001" // /* MW 3 */ + 2730 "00100001" // /* MW 2 */ + 2731 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.delay_slot + 2732 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2733 "00000010" // /* MW 3 */ + 2734 "01100001" // /* MW 2 */ + 2735 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 22 +.delay_slot + 2736 "10011000" // ST r16, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2737 "00010001" // /* MW 3 */ + 2738 "11110110" // /* MW 2 */ + 2739 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13kernelWrapperPPvjjjj__end +.label __Z13kernelWrapperPPvjjjj___func_end0 + 2741 "00000000" // /* MW 1 */ +.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_begin0 +.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh +.function setup_conv2d_bf16_params _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh +.src_ref 2 "conv2d_bf16_params.h" 432 first +.src_ref 2 "conv2d_bf16_params.h" 438 17 first +.src_ref 2 "conv2d_bf16_params.h" 452 40 +.src_ref 2 "conv2d_bf16_params.h" 453 40 +.src_ref 2 "conv2d_bf16_params.h" 458 36 +.src_ref 2 "conv2d_bf16_params.h" 470 11 +.function_start + 2752 "10111010" // LDA el0, [p0], #4; MOVX r4, #4; MOV r2, p1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2753 "01111000" // /* MW 9 */ + 2754 "01100000" // /* MW 8 */ + 2755 "01001001" // /* MW 7 */ + 2756 "10001000" // /* MW 6 */ + 2757 "01000000" // /* MW 5 */ + 2758 "00000000" // /* MW 4 */ + 2759 "11010000" // /* MW 3 */ + 2760 "10000101" // /* MW 2 */ + 2761 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 +.src_ref 2 "conv2d_bf16_params.h" 438 17 first +.src_ref 2 "conv2d_bf16_params.h" 452 40 +.src_ref 2 "conv2d_bf16_params.h" 462 7 + 2762 "10111010" // LDA eh0, [p0], #4; MOVX r5, #-1; ADD.NC p2, r2, #9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2763 "01001000" // /* MW 9 */ + 2764 "10000010" // /* MW 8 */ + 2765 "00110000" // /* MW 7 */ + 2766 "11101001" // /* MW 6 */ + 2767 "01010111" // /* MW 5 */ + 2768 "00111110" // /* MW 4 */ + 2769 "11010000" // /* MW 3 */ + 2770 "10000001" // /* MW 2 */ + 2771 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 432 +.src_ref 2 "conv2d_bf16_params.h" 444 52 + 2772 "10111010" // MOVA r1, #-4; PADDXM [sp], #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2773 "01110000" // /* MW 9 */ + 2774 "00000000" // /* MW 8 */ + 2775 "00000000" // /* MW 7 */ + 2776 "00000000" // /* MW 6 */ + 2777 "00000010" // /* MW 5 */ + 2778 "00000000" // /* MW 4 */ + 2779 "00000000" // /* MW 3 */ + 2780 "10000001" // /* MW 2 */ + 2781 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 453 40 +.src_ref 2 "conv2d_bf16_params.h" 458 30 +.src_ref 2 "conv2d_bf16_params.h" 458 30 + 2782 "01110110" // MOVA r6, #12; ST r13, [sp, #-4]; MOVX r16, #1; MOV m0, #16 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 2783 "01011000" // /* MW 11 */ + 2784 "00010000" // /* MW 10 */ + 2785 "00000000" // /* MW 9 */ + 2786 "00101000" // /* MW 8 */ + 2787 "00000000" // /* MW 7 */ + 2788 "10000001" // /* MW 6 */ + 2789 "10110101" // /* MW 5 */ + 2790 "11111101" // /* MW 4 */ + 2791 "00000111" // /* MW 3 */ + 2792 "10000110" // /* MW 2 */ + 2793 "00000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 444 52 +.src_ref 2 "conv2d_bf16_params.h" 470 11 +.src_ref 2 "conv2d_bf16_params.h" 477 40 +.src_ref 2 "conv2d_bf16_params.h" 557 34 + 2794 "01110110" // MOVA r3, #3; ST r14, [sp, #-8]; MOVX r21, #-3; MOV r20, #15 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 2795 "01011000" // /* MW 11 */ + 2796 "00001111" // /* MW 10 */ + 2797 "10001000" // /* MW 9 */ + 2798 "10101010" // /* MW 8 */ + 2799 "01010111" // /* MW 7 */ + 2800 "10111111" // /* MW 6 */ + 2801 "11010101" // /* MW 5 */ + 2802 "11111001" // /* MW 4 */ + 2803 "00000111" // /* MW 3 */ + 2804 "01100011" // /* MW 2 */ + 2805 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 452 40 +.src_ref 2 "conv2d_bf16_params.h" 458 36 +.src_ref 2 "conv2d_bf16_params.h" 462 7 + 2806 "01011100" // ST r15, [sp, #-12]; MOVX r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2807 "00000010" // /* MW 5 */ + 2808 "01100000" // /* MW 4 */ + 2809 "10110000" // /* MW 3 */ + 2810 "10111110" // /* MW 2 */ + 2811 "11111110" // /* MW 1 */ + 2812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2813 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2814 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2815 "00101001" // /* MW 3 */ + 2816 "00011100" // /* MW 2 */ + 2817 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2818 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2819 "00001001" // /* MW 3 */ + 2820 "00011100" // /* MW 2 */ + 2821 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2822 "10011000" // LDA el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2823 "00101110" // /* MW 3 */ + 2824 "00011100" // /* MW 2 */ + 2825 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2826 "10011000" // LDA eh0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2827 "00001110" // /* MW 3 */ + 2828 "00011100" // /* MW 2 */ + 2829 "00000000" // /* MW 1 */ + 2830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2831 "00000000" // /* MW 1 */ + 2832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2833 "00000000" // /* MW 1 */ + 2834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2835 "00000000" // /* MW 1 */ + 2836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2837 "00000000" // /* MW 1 */ + 2838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2839 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2840 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2841 "00101001" // /* MW 3 */ + 2842 "00011100" // /* MW 2 */ + 2843 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2844 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2845 "00001001" // /* MW 3 */ + 2846 "00011100" // /* MW 2 */ + 2847 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2848 "10011000" // LDA el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2849 "00101110" // /* MW 3 */ + 2850 "00011100" // /* MW 2 */ + 2851 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2852 "10011000" // LDA eh0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2853 "00001110" // /* MW 3 */ + 2854 "00011100" // /* MW 2 */ + 2855 "00000000" // /* MW 1 */ + 2856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2857 "00000000" // /* MW 1 */ + 2858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2859 "00000000" // /* MW 1 */ + 2860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2861 "00000000" // /* MW 1 */ + 2862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2863 "00000000" // /* MW 1 */ + 2864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2865 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2866 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2867 "00101001" // /* MW 3 */ + 2868 "00011100" // /* MW 2 */ + 2869 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2870 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2871 "00001001" // /* MW 3 */ + 2872 "00011100" // /* MW 2 */ + 2873 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2874 "10011000" // LDA eh0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2875 "00001110" // /* MW 3 */ + 2876 "00000100" // /* MW 2 */ + 2877 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2878 "10011000" // LDA el0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2879 "00101110" // /* MW 3 */ + 2880 "00010100" // /* MW 2 */ + 2881 "00000000" // /* MW 1 */ + 2882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2883 "00000000" // /* MW 1 */ + 2884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2885 "00000000" // /* MW 1 */ + 2886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2887 "00000000" // /* MW 1 */ + 2888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2889 "00000000" // /* MW 1 */ + 2890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2891 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2892 "10011000" // ST eh0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2893 "00001001" // /* MW 3 */ + 2894 "00000100" // /* MW 2 */ + 2895 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2896 "10011000" // ST el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2897 "00101001" // /* MW 3 */ + 2898 "00010100" // /* MW 2 */ + 2899 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 444 40 first + 2900 "10011000" // LDA.u8 r13, [p2], #-3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2901 "10101010" // /* MW 3 */ + 2902 "11011101" // /* MW 2 */ + 2903 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 447 34 first + 2904 "10011000" // LDA.u8 r17, [p2], #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2905 "00101010" // /* MW 3 */ + 2906 "00011110" // /* MW 2 */ + 2907 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 448 34 first + 2908 "10011000" // LDA.u8 r14, [p2], #-5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2909 "11001010" // /* MW 3 */ + 2910 "10111101" // /* MW 2 */ + 2911 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 452 40 first + 2912 "10011000" // LDA.u16 r15, [p2], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2913 "11111010" // /* MW 3 */ + 2914 "11111101" // /* MW 2 */ + 2915 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 453 40 first + 2916 "10011000" // LDA.u8 r19, [p2], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2917 "01101010" // /* MW 3 */ + 2918 "00001010" // /* MW 2 */ + 2919 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 20 first + 2920 "10011000" // LDA.u8 r7, [p2], #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2921 "11101010" // /* MW 3 */ + 2922 "10101100" // /* MW 2 */ + 2923 "00000010" // /* MW 1 */ + 2924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2925 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 444 52 first + 2926 "10011000" // LSHL r1, r13, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2927 "00011101" // /* MW 3 */ + 2928 "01000010" // /* MW 2 */ + 2929 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 30 first +.src_ref 2 "conv2d_bf16_params.h" 462 7 first + 2930 "00100100" // EQ r16, r1, r16; ADD.NC r18, r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2931 "00000001" // /* MW 5 */ + 2932 "00110001" // /* MW 4 */ + 2933 "11111001" // /* MW 3 */ + 2934 "00100000" // /* MW 2 */ + 2935 "00001100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 462 7 + 2936 "10011000" // LSHL r18, r18, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2937 "01011101" // /* MW 3 */ + 2938 "10100100" // /* MW 2 */ + 2939 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 452 40 first + 2940 "10011000" // EQ r27, r15, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2941 "01000111" // /* MW 3 */ + 2942 "11110110" // /* MW 2 */ + 2943 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 452 40 +.src_ref 2 "conv2d_bf16_params.h" 462 7 first +.src_ref 2 "conv2d_bf16_params.h" 557 34 + 2944 "11100100" // SEL.EQZ r5, r24, r5, r27; MOV eh0, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2945 "00111001" // /* MW 5 */ + 2946 "10110111" // /* MW 4 */ + 2947 "01000000" // /* MW 3 */ + 2948 "01001010" // /* MW 2 */ + 2949 "11000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 462 7 + 2950 "00011000" // SEL.EQZ r29, r17, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2951 "00100010" // /* MW 3 */ + 2952 "01111011" // /* MW 2 */ + 2953 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 30 first + 2954 "10011000" // EQ r6, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2955 "01100111" // /* MW 3 */ + 2956 "11001100" // /* MW 2 */ + 2957 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 + 2958 "10011000" // AND r27, r6, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2959 "00000100" // /* MW 3 */ + 2960 "10110111" // /* MW 2 */ + 2961 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 +.src_ref 2 "conv2d_bf16_params.h" 477 40 +.src_ref 2 "conv2d_bf16_params.h" 557 34 first + 2962 "11100100" // LSHL r15, r15, r21; MOV r25, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2963 "01000001" // /* MW 5 */ + 2964 "10111011" // /* MW 4 */ + 2965 "10111100" // /* MW 3 */ + 2966 "11101011" // /* MW 2 */ + 2967 "01111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 first + 2968 "01011100" // ST r15, [sp, #-20]; SEL.EQZ r6, r7, r24, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2969 "00000100" // /* MW 5 */ + 2970 "10011011" // /* MW 4 */ + 2971 "10110011" // /* MW 3 */ + 2972 "10111110" // /* MW 2 */ + 2973 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 +.src_ref 2 "conv2d_bf16_params.h" 477 40 first + 2974 "10000100" // JNZ r25, #3056 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3056 delay_slots=5 */ + 2975 "00000001" // /* MW 5 */ + 2976 "01000000" // /* MW 4 */ + 2977 "11111000" // /* MW 3 */ + 2978 "00000101" // /* MW 2 */ + 2979 "11001000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 first +.delay_slot + 2980 "10011000" // EQ r27, r6, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2981 "01000111" // /* MW 3 */ + 2982 "10110110" // /* MW 2 */ + 2983 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 444 52 first +.delay_slot + 2984 "10011000" // AND r24, r13, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2985 "01000100" // /* MW 3 */ + 2986 "01110001" // /* MW 2 */ + 2987 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 462 7 first +.delay_slot + 2988 "10011000" // LSHL r30, r19, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2989 "01011101" // /* MW 3 */ + 2990 "11111100" // /* MW 2 */ + 2991 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 470 11 first +.delay_slot + 2992 "10011000" // LSHL r20, r27, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2993 "01001101" // /* MW 3 */ + 2994 "11101000" // /* MW 2 */ + 2995 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 470 11 +.src_ref 2 "conv2d_bf16_params.h" 477 40 first +.delay_slot + 2996 "00011000" // SEL.EQZ r6, r6, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2997 "00110010" // /* MW 3 */ + 2998 "10001100" // /* MW 2 */ + 2999 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 40 + 3000 "10000100" // JNZ r27, #3056 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3056 delay_slots=5 */ + 3001 "00000001" // /* MW 5 */ + 3002 "01000000" // /* MW 4 */ + 3003 "11111000" // /* MW 3 */ + 3004 "00000101" // /* MW 2 */ + 3005 "11011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3007 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3009 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3011 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3013 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3015 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 491 25 +.src_ref 2 "conv2d_bf16_params.h" 492 25 +.src_ref 2 "conv2d_bf16_params.h" 495 99 +.src_ref 2 "conv2d_bf16_params.h" 502 57 +.src_ref 2 "conv2d_bf16_params.h" 533 46 +.src_ref 2 "conv2d_bf16_params.h" 539 82 +.src_ref 2 "conv2d_bf16_params.h" 557 34 +.src_ref 2 "conv2d_bf16_params.h" 621 240 +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.src_ref 2 "conv2d_bf16_params.h" 709 76 + 3016 "10111010" // MOVA r15, #1; J #3104 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=3104 delay_slots=5 */ + 3017 "00100000" // /* MW 9 */ + 3018 "00000000" // /* MW 8 */ + 3019 "00000000" // /* MW 7 */ + 3020 "10000100" // /* MW 6 */ + 3021 "00000001" // /* MW 5 */ + 3022 "00000000" // /* MW 4 */ + 3023 "00000000" // /* MW 3 */ + 3024 "00101111" // /* MW 2 */ + 3025 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 481 24 +.src_ref 2 "conv2d_bf16_params.h" 500 53 +.src_ref 2 "conv2d_bf16_params.h" 506 73 +.src_ref 2 "conv2d_bf16_params.h" 507 53 +.src_ref 2 "conv2d_bf16_params.h" 524 122 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.delay_slot + 3026 "10111010" // MOVA r26, #0; MOVX r5, #-3; MOV r28, #12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3027 "01011000" // /* MW 9 */ + 3028 "00001100" // /* MW 8 */ + 3029 "10001000" // /* MW 7 */ + 3030 "10101011" // /* MW 6 */ + 3031 "01010111" // /* MW 5 */ + 3032 "00111110" // /* MW 4 */ + 3033 "00000000" // /* MW 3 */ + 3034 "00011010" // /* MW 2 */ + 3035 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 504 45 +.src_ref 2 "conv2d_bf16_params.h" 510 45 +.src_ref 2 "conv2d_bf16_params.h" 520 48 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.delay_slot + 3036 "01100100" // MOVX r21, #4; MOV r2, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3037 "01000001" // /* MW 5 */ + 3038 "00100000" // /* MW 4 */ + 3039 "00100001" // /* MW 3 */ + 3040 "01000010" // /* MW 2 */ + 3041 "00000101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 40 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 578 52 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.delay_slot + 3042 "00011000" // MOVX r13, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3043 "00001101" // /* MW 3 */ + 3044 "00011010" // /* MW 2 */ + 3045 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 78 +.src_ref 2 "conv2d_bf16_params.h" 642 20 +.src_ref 2 "conv2d_bf16_params.h" 642 87 +.delay_slot + 3046 "00011000" // MOVX r7, #15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3047 "00111101" // /* MW 3 */ + 3048 "00001110" // /* MW 2 */ + 3049 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.delay_slot + 3050 "00101100" // NOPA; MOVX r4, #-4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3051 "11100010" // /* MW 5 */ + 3052 "10010001" // /* MW 4 */ + 3053 "11111111" // /* MW 3 */ + 3054 "00101100" // /* MW 2 */ + 3055 "00000000" // /* MW 1 */ +.label __ll6__Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh +.src_ref 2 "conv2d_bf16_params.h" 453 40 +.src_ref 2 "conv2d_bf16_params.h" 453 40 +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 504 45 +.src_ref 2 "conv2d_bf16_params.h" 655 23 + 3056 "01110110" // MOVA dj0, #16; MOVS p1, r2; MOVX r21, #4; MOV r4, #-4 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3057 "01011000" // /* MW 11 */ + 3058 "11111100" // /* MW 10 */ + 3059 "10001111" // /* MW 9 */ + 3060 "10001000" // /* MW 8 */ + 3061 "01010000" // /* MW 7 */ + 3062 "00000001" // /* MW 6 */ + 3063 "00001011" // /* MW 5 */ + 3064 "10000010" // /* MW 4 */ + 3065 "10000001" // /* MW 3 */ + 3066 "00000010" // /* MW 2 */ + 3067 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 453 40 first +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 481 24 +.src_ref 2 "conv2d_bf16_params.h" 500 53 +.src_ref 2 "conv2d_bf16_params.h" 506 73 +.src_ref 2 "conv2d_bf16_params.h" 507 53 +.src_ref 2 "conv2d_bf16_params.h" 524 122 +.src_ref 2 "conv2d_bf16_params.h" 539 139 + 3068 "10111010" // ST.s8 r6, [p1, dj0]; MOVX r26, #0; MOV r28, #12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3069 "01011000" // /* MW 9 */ + 3070 "00001100" // /* MW 8 */ + 3071 "10001000" // /* MW 7 */ + 3072 "00001011" // /* MW 6 */ + 3073 "10100000" // /* MW 5 */ + 3074 "00000001" // /* MW 4 */ + 3075 "11100000" // /* MW 3 */ + 3076 "00011000" // /* MW 2 */ + 3077 "00100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 491 25 +.src_ref 2 "conv2d_bf16_params.h" 492 25 +.src_ref 2 "conv2d_bf16_params.h" 495 99 +.src_ref 2 "conv2d_bf16_params.h" 502 57 +.src_ref 2 "conv2d_bf16_params.h" 510 45 +.src_ref 2 "conv2d_bf16_params.h" 520 48 +.src_ref 2 "conv2d_bf16_params.h" 533 46 +.src_ref 2 "conv2d_bf16_params.h" 539 82 +.src_ref 2 "conv2d_bf16_params.h" 557 34 +.src_ref 2 "conv2d_bf16_params.h" 621 240 +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.src_ref 2 "conv2d_bf16_params.h" 709 76 + 3078 "10111010" // MOVA r2, #16; MOVX r5, #-3; MOV r15, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3079 "01011000" // /* MW 9 */ + 3080 "00000001" // /* MW 8 */ + 3081 "11101000" // /* MW 7 */ + 3082 "10101001" // /* MW 6 */ + 3083 "01010111" // /* MW 5 */ + 3084 "00111110" // /* MW 4 */ + 3085 "00000000" // /* MW 3 */ + 3086 "00000010" // /* MW 2 */ + 3087 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 40 +.src_ref 2 "conv2d_bf16_params.h" 529 78 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 578 52 +.src_ref 2 "conv2d_bf16_params.h" 642 20 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 642 87 + 3088 "11100001" // NOPA; NOPB; NOPS; MOVX r7, #15; MOV r13, #3; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3089 "00000000" // /* MW 15 */ + 3090 "00000000" // /* MW 14 */ + 3091 "01011000" // /* MW 13 */ + 3092 "00000011" // /* MW 12 */ + 3093 "10101000" // /* MW 11 */ + 3094 "11101001" // /* MW 10 */ + 3095 "01110001" // /* MW 9 */ + 3096 "00000000" // /* MW 8 */ + 3097 "01011011" // /* MW 7 */ + 3098 "00000001" // /* MW 6 */ + 3099 "00100000" // /* MW 5 */ + 3100 "00000000" // /* MW 4 */ + 3101 "11110000" // /* MW 3 */ + 3102 "00101100" // /* MW 2 */ + 3103 "00000000" // /* MW 1 */ +.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_352 +.src_ref 2 "conv2d_bf16_params.h" 477 40 first +.src_ref 2 "conv2d_bf16_params.h" 495 68 first +.src_ref 2 "conv2d_bf16_params.h" 495 112 +.src_ref 2 "conv2d_bf16_params.h" 682 38 + 3104 "10111010" // LDA.u8 r17, [p2], #-2; EQ r27, r13, r6; MOV m0, #60 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3105 "01011000" // /* MW 9 */ + 3106 "00111100" // /* MW 8 */ + 3107 "00000000" // /* MW 7 */ + 3108 "00111100" // /* MW 6 */ + 3109 "10110011" // /* MW 5 */ + 3110 "00011011" // /* MW 4 */ + 3111 "01010000" // /* MW 3 */ + 3112 "11000101" // /* MW 2 */ + 3113 "01011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 481 24 first +.src_ref 2 "conv2d_bf16_params.h" 495 53 +.src_ref 2 "conv2d_bf16_params.h" 495 112 + 3114 "10111010" // LDA.u8 r1, [p2], m0; SEL.EQZ r18, r1, r26, r27; MOV m5, #-51 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3115 "01011000" // /* MW 9 */ + 3116 "11001101" // /* MW 8 */ + 3117 "10000111" // /* MW 7 */ + 3118 "00010010" // /* MW 6 */ + 3119 "00101101" // /* MW 5 */ + 3120 "00000011" // /* MW 4 */ + 3121 "01010000" // /* MW 3 */ + 3122 "00000101" // /* MW 2 */ + 3123 "01000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 18 first +.src_ref 2 "conv2d_bf16_params.h" 496 68 +.src_ref 2 "conv2d_bf16_params.h" 504 35 +.src_ref 2 "conv2d_bf16_params.h" 539 14 +.src_ref 2 "conv2d_bf16_params.h" 578 47 + 3124 "10111010" // MOVA r23, #2; SEL.EQZ r29, r29, r21, r27; MOV m3, #55 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3125 "01011000" // /* MW 9 */ + 3126 "00110111" // /* MW 8 */ + 3127 "10000000" // /* MW 7 */ + 3128 "10010001" // /* MW 6 */ + 3129 "11011010" // /* MW 5 */ + 3130 "00111011" // /* MW 4 */ + 3131 "00000000" // /* MW 3 */ + 3132 "01010111" // /* MW 2 */ + 3133 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 496 53 +.src_ref 2 "conv2d_bf16_params.h" 499 51 +.src_ref 2 "conv2d_bf16_params.h" 504 45 first +.src_ref 2 "conv2d_bf16_params.h" 509 50 +.src_ref 2 "conv2d_bf16_params.h" 519 42 +.src_ref 2 "conv2d_bf16_params.h" 700 34 + 3134 "10111010" // MOVA r3, #8; EQ r27, r21, r0; MOV m2, #-68 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3135 "01011000" // /* MW 9 */ + 3136 "10111100" // /* MW 8 */ + 3137 "00000111" // /* MW 7 */ + 3138 "00111101" // /* MW 6 */ + 3139 "10110000" // /* MW 5 */ + 3140 "00101011" // /* MW 4 */ + 3141 "00000000" // /* MW 3 */ + 3142 "00000011" // /* MW 2 */ + 3143 "00000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 492 25 first +.src_ref 2 "conv2d_bf16_params.h" 497 46 +.src_ref 2 "conv2d_bf16_params.h" 509 50 + 3144 "10111010" // MOVA r16, #512; LSHL r22, r15, r24; MOV m1, #112 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3145 "01011000" // /* MW 9 */ + 3146 "01110000" // /* MW 8 */ + 3147 "10000000" // /* MW 7 */ + 3148 "01101100" // /* MW 6 */ + 3149 "01101100" // /* MW 5 */ + 3150 "00011111" // /* MW 4 */ + 3151 "00000000" // /* MW 3 */ + 3152 "00010000" // /* MW 2 */ + 3153 "01000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 500 53 +.src_ref 2 "conv2d_bf16_params.h" 520 34 first + 3154 "01100100" // EXTEND.u8 r22, r22; MOV m4, #-105 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3155 "01011101" // /* MW 5 */ + 3156 "00011110" // /* MW 4 */ + 3157 "00001000" // /* MW 3 */ + 3158 "10010010" // /* MW 2 */ + 3159 "10110101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 77 +.src_ref 2 "conv2d_bf16_params.h" 520 48 + 3160 "00111010" // ST r22, [sp, #-16]; LSHL r22, r22, r2; MOV m7, #49 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3161 "01011001" // /* MW 9 */ + 3162 "00110001" // /* MW 8 */ + 3163 "10000000" // /* MW 7 */ + 3164 "01101111" // /* MW 6 */ + 3165 "01100001" // /* MW 5 */ + 3166 "00101101" // /* MW 4 */ + 3167 "10110000" // /* MW 3 */ + 3168 "01011010" // /* MW 2 */ + 3169 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 53 +.src_ref 2 "conv2d_bf16_params.h" 507 42 first + 3170 "01100100" // SUB r30, r30, r29; MOV m6, #-63 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3171 "00000101" // /* MW 5 */ + 3172 "00011111" // /* MW 4 */ + 3173 "00111100" // /* MW 3 */ + 3174 "10111010" // /* MW 2 */ + 3175 "11110111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 495 99 first + 3176 "10011000" // SUB r1, r15, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3177 "00010001" // /* MW 3 */ + 3178 "11000010" // /* MW 2 */ + 3179 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 495 96 +.src_ref 2 "conv2d_bf16_params.h" 495 96 +.src_ref 2 "conv2d_bf16_params.h" 610 64 +.src_ref 2 "conv2d_bf16_params.h" 709 96 + 3180 "01100100" // MUL r31, r17, r1; MOV r1, #7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3181 "00011101" // /* MW 5 */ + 3182 "10100000" // /* MW 4 */ + 3183 "11110000" // /* MW 3 */ + 3184 "11000011" // /* MW 2 */ + 3185 "10001111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 507 53 first + 3186 "10011000" // SUB r17, r26, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3187 "00100001" // /* MW 3 */ + 3188 "10100011" // /* MW 2 */ + 3189 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 495 96 first + 3190 "10011000" // LSHL r31, r31, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3191 "00011101" // /* MW 3 */ + 3192 "11111110" // /* MW 2 */ + 3193 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 495 53 +.src_ref 2 "conv2d_bf16_params.h" 506 48 +.src_ref 2 "conv2d_bf16_params.h" 519 42 first + 3194 "00111010" // ST r31, [p2], m5; LSHL r31, r29, r3; MOV m5, #87 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3195 "01011001" // /* MW 9 */ + 3196 "01010111" // /* MW 8 */ + 3197 "10000000" // /* MW 7 */ + 3198 "11101110" // /* MW 6 */ + 3199 "11110001" // /* MW 5 */ + 3200 "00111011" // /* MW 4 */ + 3201 "00110000" // /* MW 3 */ + 3202 "01111110" // /* MW 2 */ + 3203 "01010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 496 68 first +.src_ref 2 "conv2d_bf16_params.h" 504 35 first +.src_ref 2 "conv2d_bf16_params.h" 522 68 + 3204 "10111010" // LDA.u8 r21, [p2], m3; EQ r19, r23, r0; MOV m3, #-78 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3205 "01011000" // /* MW 9 */ + 3206 "10110010" // /* MW 8 */ + 3207 "10000111" // /* MW 7 */ + 3208 "00111101" // /* MW 6 */ + 3209 "00110000" // /* MW 5 */ + 3210 "00101111" // /* MW 4 */ + 3211 "01010000" // /* MW 3 */ + 3212 "01010101" // /* MW 2 */ + 3213 "01001101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 509 50 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3214 "01011100" // ST r19, [sp, #-24]; LSHL r19, r19, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3215 "01111011" // /* MW 5 */ + 3216 "11001100" // /* MW 4 */ + 3217 "10111001" // /* MW 3 */ + 3218 "01001110" // /* MW 2 */ + 3219 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 496 53 first +.src_ref 2 "conv2d_bf16_params.h" 520 19 first +.src_ref 2 "conv2d_bf16_params.h" 529 62 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3220 "10111010" // ST.s8 r21, [p2], m2; OR r22, r31, r22; MOV m2, #246 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3221 "01011000" // /* MW 9 */ + 3222 "11110110" // /* MW 8 */ + 3223 "00000000" // /* MW 7 */ + 3224 "00101101" // /* MW 6 */ + 3225 "01101011" // /* MW 5 */ + 3226 "00111111" // /* MW 4 */ + 3227 "11100000" // /* MW 3 */ + 3228 "01010100" // /* MW 2 */ + 3229 "01001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 497 46 first +.src_ref 2 "conv2d_bf16_params.h" 509 50 first +.src_ref 2 "conv2d_bf16_params.h" 533 44 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3230 "10111010" // LDA.u16 r16, [p2], m1; SEL.EQZ r19, r19, r16, r27; MOV m1, #-176 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3231 "01011000" // /* MW 9 */ + 3232 "01010000" // /* MW 8 */ + 3233 "10000111" // /* MW 7 */ + 3234 "00010000" // /* MW 6 */ + 3235 "00111000" // /* MW 5 */ + 3236 "00100111" // /* MW 4 */ + 3237 "01010000" // /* MW 3 */ + 3238 "01000011" // /* MW 2 */ + 3239 "01000101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 14 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3240 "10011000" // EQ r31, r23, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3241 "01100111" // /* MW 3 */ + 3242 "11111110" // /* MW 2 */ + 3243 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 499 51 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3244 "10011000" // EQ r16, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3245 "01100111" // /* MW 3 */ + 3246 "11100000" // /* MW 2 */ + 3247 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 499 51 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3248 "10011000" // OR r27, r31, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3249 "00000101" // /* MW 3 */ + 3250 "11110111" // /* MW 2 */ + 3251 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 78 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3252 "10011000" // AND r21, r7, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3253 "01010100" // /* MW 3 */ + 3254 "11101011" // /* MW 2 */ + 3255 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 507 53 first +.src_ref 2 "conv2d_bf16_params.h" 511 47 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3256 "01100100" // ASHL r30, r30, r17; MOV r17, #24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3257 "01100001" // /* MW 5 */ + 3258 "10100000" // /* MW 4 */ + 3259 "11011000" // /* MW 3 */ + 3260 "10100011" // /* MW 2 */ + 3261 "11110111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 491 25 first +.src_ref 2 "conv2d_bf16_params.h" 507 34 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3262 "00111010" // ST r16, [sp, #-32]; LSHL r18, r15, r18; ADD.NC r30, r30, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3263 "01001001" // /* MW 9 */ + 3264 "10000000" // /* MW 8 */ + 3265 "11001111" // /* MW 7 */ + 3266 "01101111" // /* MW 6 */ + 3267 "00101001" // /* MW 5 */ + 3268 "00011111" // /* MW 4 */ + 3269 "10110000" // /* MW 3 */ + 3270 "01000010" // /* MW 2 */ + 3271 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 500 53 first +.src_ref 2 "conv2d_bf16_params.h" 511 47 first + 3272 "01011100" // ST r26, [p2], #4; LSHL r17, r30, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3273 "00111011" // /* MW 5 */ + 3274 "01000110" // /* MW 4 */ + 3275 "00111111" // /* MW 3 */ + 3276 "11101010" // /* MW 2 */ + 3277 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 500 53 +.src_ref 2 "conv2d_bf16_params.h" 534 44 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 3278 "00000010" // ST r26, [p2], m4; MOV m4, #168 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3279 "01010000" // /* MW 7 */ + 3280 "10101000" // /* MW 6 */ + 3281 "00000000" // /* MW 5 */ + 3282 "00000010" // /* MW 4 */ + 3283 "00110000" // /* MW 3 */ + 3284 "01101010" // /* MW 2 */ + 3285 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 77 first +.src_ref 2 "conv2d_bf16_params.h" 509 19 first +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 3286 "01110110" // LDA.u8 r18, [p2], m7; ST r31, [sp, #-28]; OR r27, r19, r0; MOV el0, r27 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3287 "01111000" // /* MW 11 */ + 3288 "11001110" // /* MW 10 */ + 3289 "00001101" // /* MW 9 */ + 3290 "00101100" // /* MW 8 */ + 3291 "10110000" // /* MW 7 */ + 3292 "10100111" // /* MW 6 */ + 3293 "11110101" // /* MW 5 */ + 3294 "11100111" // /* MW 4 */ + 3295 "01010111" // /* MW 3 */ + 3296 "01001001" // /* MW 2 */ + 3297 "01011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 19 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3298 "10011000" // OR r17, r27, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3299 "00010101" // /* MW 3 */ + 3300 "11100011" // /* MW 2 */ + 3301 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 506 73 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3302 "10011000" // SUB r27, r26, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3303 "10000001" // /* MW 3 */ + 3304 "10110111" // /* MW 2 */ + 3305 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 527 47 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3306 "00011000" // EXTEND.u8 r24, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3307 "10010000" // /* MW 3 */ + 3308 "10110000" // /* MW 2 */ + 3309 "00010100" // /* MW 1 */ + 3310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3311 "00000000" // /* MW 1 */ + 3312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3313 "00000000" // /* MW 1 */ + 3314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3315 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 57 first + 3316 "10011000" // SUB r18, r15, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3317 "00100001" // /* MW 3 */ + 3318 "11100101" // /* MW 2 */ + 3319 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 53 + 3320 "10011000" // ST r18, [p2], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3321 "01010001" // /* MW 3 */ + 3322 "11001010" // /* MW 2 */ + 3323 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 506 48 first + 3324 "10011000" // LDA.u8 r18, [p2], m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3325 "01001010" // /* MW 3 */ + 3326 "10101010" // /* MW 2 */ + 3327 "00000010" // /* MW 1 */ + 3328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3329 "00000000" // /* MW 1 */ + 3330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3331 "00000000" // /* MW 1 */ + 3332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3333 "00000000" // /* MW 1 */ + 3334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3335 "00000000" // /* MW 1 */ + 3336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3337 "00000000" // /* MW 1 */ + 3338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3339 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 506 62 + 3340 "10011000" // SUB r18, r18, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3341 "11100001" // /* MW 3 */ + 3342 "10100100" // /* MW 2 */ + 3343 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 506 73 + 3344 "10011000" // ASHL r18, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3345 "10111110" // /* MW 3 */ + 3346 "10100101" // /* MW 2 */ + 3347 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 first + 3348 "10011000" // LSHL r18, r18, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3349 "00101101" // /* MW 3 */ + 3350 "10100100" // /* MW 2 */ + 3351 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 + 3352 "01000100" // MOVXM r27, #65536 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3353 "00000000" // /* MW 5 */ + 3354 "10100000" // /* MW 4 */ + 3355 "00001101" // /* MW 3 */ + 3356 "00000001" // /* MW 2 */ + 3357 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 + 3358 "10011000" // ADD r18, r27, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3359 "00100000" // /* MW 3 */ + 3360 "11100101" // /* MW 2 */ + 3361 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 + 3362 "01000100" // MOVXM r27, #16711680 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3363 "00000000" // /* MW 5 */ + 3364 "10100000" // /* MW 4 */ + 3365 "00001101" // /* MW 3 */ + 3366 "11111111" // /* MW 2 */ + 3367 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 +.src_ref 2 "conv2d_bf16_params.h" 539 14 +.src_ref 2 "conv2d_bf16_params.h" 642 99 + 3368 "01100100" // AND r27, r27, r18; MOV r18, #-16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3369 "11000001" // /* MW 5 */ + 3370 "00111111" // /* MW 4 */ + 3371 "10011001" // /* MW 3 */ + 3372 "11100100" // /* MW 2 */ + 3373 "11011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 511 19 first +.src_ref 2 "conv2d_bf16_params.h" 524 122 +.src_ref 2 "conv2d_bf16_params.h" 539 14 + 3374 "01100100" // OR r27, r27, r17; MOV r17, #-8 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3375 "11100001" // /* MW 5 */ + 3376 "10111111" // /* MW 4 */ + 3377 "10111000" // /* MW 3 */ + 3378 "11100010" // /* MW 2 */ + 3379 "11011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 512 64 first +.src_ref 2 "conv2d_bf16_params.h" 524 122 first + 3380 "01011100" // ST r27, [p2], #4; LSHL r19, r19, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3381 "00111011" // /* MW 5 */ + 3382 "11001110" // /* MW 4 */ + 3383 "00111001" // /* MW 3 */ + 3384 "11101110" // /* MW 2 */ + 3385 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 524 122 + 3386 "10011000" // SUB r26, r26, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3387 "00110001" // /* MW 3 */ + 3388 "10110101" // /* MW 2 */ + 3389 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 524 122 + 3390 "10011000" // LSHL r20, r20, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3391 "10101101" // /* MW 3 */ + 3392 "00101001" // /* MW 2 */ + 3393 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 520 19 first + 3394 "10011000" // OR r26, r14, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3395 "01100101" // /* MW 3 */ + 3396 "10110101" // /* MW 2 */ + 3397 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 511 36 first +.src_ref 2 "conv2d_bf16_params.h" 522 68 first + 3398 "01011100" // ST r26, [p2], m3; EXTEND.u8 r26, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3399 "00100000" // /* MW 5 */ + 3400 "01101001" // /* MW 4 */ + 3401 "00111111" // /* MW 3 */ + 3402 "01101010" // /* MW 2 */ + 3403 "01001101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 524 65 first +.src_ref 2 "conv2d_bf16_params.h" 529 62 first +.src_ref 2 "conv2d_bf16_params.h" 539 14 first + 3404 "10111010" // LDA.u8 r25, [p2], m2; LSHL r20, r27, r18; ADD.NC r30, r26, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3405 "10101000" // /* MW 9 */ + 3406 "10101000" // /* MW 8 */ + 3407 "11001110" // /* MW 7 */ + 3408 "01101111" // /* MW 6 */ + 3409 "01001001" // /* MW 5 */ + 3410 "00110111" // /* MW 4 */ + 3411 "01010000" // /* MW 3 */ + 3412 "01100101" // /* MW 2 */ + 3413 "01001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 534 93 +.src_ref 2 "conv2d_bf16_params.h" 539 14 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 3414 "01100100" // LSHL r22, r22, r17; MOV r17, #254 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3415 "11111001" // /* MW 5 */ + 3416 "10100011" // /* MW 4 */ + 3417 "10111000" // /* MW 3 */ + 3418 "10100011" // /* MW 2 */ + 3419 "10110101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 527 45 first +.src_ref 2 "conv2d_bf16_params.h" 533 44 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 3420 "00101100" // ST.s8 r25, [p2], m1; MUL r26, r26, r24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3421 "00011111" // /* MW 5 */ + 3422 "01101011" // /* MW 4 */ + 3423 "11101101" // /* MW 3 */ + 3424 "01100100" // /* MW 2 */ + 3425 "01000101" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3427 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3428 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3429 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3431 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3433 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 48 first +.src_ref 2 "conv2d_bf16_params.h" 533 46 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3434 "10100100" // LSHL r25, r16, r15; ADD.NC r27, r21, r25 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3435 "11001010" // /* MW 5 */ + 3436 "10110101" // /* MW 4 */ + 3437 "10111101" // /* MW 3 */ + 3438 "01011111" // /* MW 2 */ + 3439 "10000110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 14 first + 3440 "10000100" // JNZ r31, #3568 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3568 delay_slots=5 */ + 3441 "00000001" // /* MW 5 */ + 3442 "01000000" // /* MW 4 */ + 3443 "11111000" // /* MW 3 */ + 3444 "00000110" // /* MW 2 */ + 3445 "11111000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 76 first +.src_ref 2 "conv2d_bf16_params.h" 529 122 +.delay_slot + 3446 "10100100" // ADD r21, r19, #3; ADD.NC r27, r27, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3447 "11110010" // /* MW 5 */ + 3448 "10111011" // /* MW 4 */ + 3449 "11101101" // /* MW 3 */ + 3450 "01000001" // /* MW 2 */ + 3451 "10011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 122 +.delay_slot + 3452 "10011000" // LSHL r21, r27, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3453 "01011101" // /* MW 3 */ + 3454 "11101011" // /* MW 2 */ + 3455 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 534 93 first +.delay_slot + 3456 "10011000" // AND r17, r25, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3457 "00010100" // /* MW 3 */ + 3458 "01100011" // /* MW 2 */ + 3459 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 534 44 +.src_ref 2 "conv2d_bf16_params.h" 539 139 first +.src_ref 2 "conv2d_bf16_params.h" 555 59 +.src_ref 2 "conv2d_bf16_params.h" 559 59 +.src_ref 2 "conv2d_bf16_params.h" 700 17 +.delay_slot + 3460 "00111010" // ST r17, [p2], m4; EQ r27, r6, r28; MOV r17, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3461 "01011001" // /* MW 9 */ + 3462 "00000001" // /* MW 8 */ + 3463 "00101000" // /* MW 7 */ + 3464 "00111110" // /* MW 6 */ + 3465 "10111110" // /* MW 5 */ + 3466 "00001101" // /* MW 4 */ + 3467 "00110000" // /* MW 3 */ + 3468 "01000110" // /* MW 2 */ + 3469 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 669 63 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.delay_slot + 3470 "11111000" // MOV el1, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3471 "10011100" // /* MW 3 */ + 3472 "10011011" // /* MW 2 */ + 3473 "00011000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 14 + 3474 "00011000" // LDA r28, [sp, #-32] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3475 "10010001" // /* MW 3 */ + 3476 "11100011" // /* MW 2 */ + 3477 "00000111" // /* MW 1 */ + 3478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3479 "00000000" // /* MW 1 */ + 3480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3481 "00000000" // /* MW 1 */ + 3482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3483 "00000000" // /* MW 1 */ + 3484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3485 "00000000" // /* MW 1 */ + 3486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3487 "00000000" // /* MW 1 */ + 3488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3489 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 14 + 3490 "10000100" // JNZ r28, #3568 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3568 delay_slots=5 */ + 3491 "00000001" // /* MW 5 */ + 3492 "01000000" // /* MW 4 */ + 3493 "11111000" // /* MW 3 */ + 3494 "00000110" // /* MW 2 */ + 3495 "11100000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3497 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3499 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3501 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3503 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3505 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 115 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 539 162 + 3506 "10111010" // MOVA r28, #5; MOVX r17, #4; MOV r25, #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3507 "01011000" // /* MW 9 */ + 3508 "01000000" // /* MW 8 */ + 3509 "00101000" // /* MW 7 */ + 3510 "10001011" // /* MW 6 */ + 3511 "00010000" // /* MW 5 */ + 3512 "00000001" // /* MW 4 */ + 3513 "00000000" // /* MW 3 */ + 3514 "10111100" // /* MW 2 */ + 3515 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 139 + 3516 "00011000" // SEL.EQZ r31, r17, r13, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3517 "11010010" // /* MW 3 */ + 3518 "01111110" // /* MW 2 */ + 3519 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 162 + 3520 "10011000" // EQ r27, r25, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3521 "01100111" // /* MW 3 */ + 3522 "01110110" // /* MW 2 */ + 3523 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 135 +.src_ref 2 "conv2d_bf16_params.h" 539 139 + 3524 "01100100" // SEL.EQZ r28, r31, r28, r27; MOV r31, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3525 "00000001" // /* MW 5 */ + 3526 "10100000" // /* MW 4 */ + 3527 "01001111" // /* MW 3 */ + 3528 "00111000" // /* MW 2 */ + 3529 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 46 + 3530 "00011000" // EXTEND.s8 r25, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3531 "01010000" // /* MW 3 */ + 3532 "00110010" // /* MW 2 */ + 3533 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 44 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 3534 "10011000" // MUL r30, r25, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3535 "11101111" // /* MW 3 */ + 3536 "01111101" // /* MW 2 */ + 3537 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 115 +.src_ref 2 "conv2d_bf16_params.h" 669 63 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 3538 "11100100" // LT r27, r25, r17; MOV r27, el1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3539 "00111001" // /* MW 5 */ + 3540 "11000100" // /* MW 4 */ + 3541 "01011101" // /* MW 3 */ + 3542 "11100011" // /* MW 2 */ + 3543 "11001110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 82 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3544 "00011000" // SEL.EQZ r17, r15, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3545 "10000010" // /* MW 3 */ + 3546 "11100011" // /* MW 2 */ + 3547 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 79 + 3548 "10011000" // MUL r17, r17, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3549 "11101111" // /* MW 3 */ + 3550 "01100011" // /* MW 2 */ + 3551 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 135 + 3552 "10011000" // SUB r28, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3553 "11000001" // /* MW 3 */ + 3554 "11111001" // /* MW 2 */ + 3555 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 135 + 3556 "10011000" // ASHL r17, r17, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3557 "11001110" // /* MW 3 */ + 3558 "01100011" // /* MW 2 */ + 3559 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 555 55 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 3560 "00100010" // EXTEND.u8 r17, r17; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3561 "00011100" // /* MW 7 */ + 3562 "00000000" // /* MW 6 */ + 3563 "00000000" // /* MW 5 */ + 3564 "10000001" // /* MW 4 */ + 3565 "00010100" // /* MW 3 */ + 3566 "00100011" // /* MW 2 */ + 3567 "00000000" // /* MW 1 */ +.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_816 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 669 63 +.src_ref 2 "conv2d_bf16_params.h" 669 63 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 3568 "10111010" // MOVA r25, #0; MOVX r28, #-1; MOV r27, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3569 "01111000" // /* MW 9 */ + 3570 "00001110" // /* MW 8 */ + 3571 "01110000" // /* MW 7 */ + 3572 "11101011" // /* MW 6 */ + 3573 "11000111" // /* MW 5 */ + 3574 "00111111" // /* MW 4 */ + 3575 "00000000" // /* MW 3 */ + 3576 "00011001" // /* MW 2 */ + 3577 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 669 63 first +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3578 "00011000" // SEL.EQZ r31, r25, r28, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3579 "11000010" // /* MW 3 */ + 3580 "01111111" // /* MW 2 */ + 3581 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 497 34 first +.src_ref 2 "conv2d_bf16_params.h" 641 32 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 3582 "10111010" // LDA r27, [sp, #-24]; EXTEND.u8 r16, r16; ADD.NC r26, r29, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3583 "10101000" // /* MW 9 */ + 3584 "01110100" // /* MW 8 */ + 3585 "01001111" // /* MW 7 */ + 3586 "10000011" // /* MW 6 */ + 3587 "00000100" // /* MW 5 */ + 3588 "00100001" // /* MW 4 */ + 3589 "00100000" // /* MW 3 */ + 3590 "01101110" // /* MW 2 */ + 3591 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 559 61 first +.src_ref 2 "conv2d_bf16_params.h" 640 16 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3592 "10111010" // MOVA r30, #72; EXTEND.u8 r20, r20; MOV r29, #9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3593 "01011000" // /* MW 9 */ + 3594 "00001001" // /* MW 8 */ + 3595 "10101000" // /* MW 7 */ + 3596 "10000011" // /* MW 6 */ + 3597 "01000100" // /* MW 5 */ + 3598 "00101001" // /* MW 4 */ + 3599 "00000000" // /* MW 3 */ + 3600 "00011110" // /* MW 2 */ + 3601 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 25 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3602 "00011000" // SEL.EQZ r25, r29, r30, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3603 "11100010" // /* MW 3 */ + 3604 "01110011" // /* MW 2 */ + 3605 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 578 47 first + 3606 "10011000" // NE r28, r23, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3607 "10001000" // /* MW 3 */ + 3608 "11111001" // /* MW 2 */ + 3609 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 640 16 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 3610 "10011000" // LSHL r29, r29, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3611 "00111101" // /* MW 3 */ + 3612 "01111011" // /* MW 2 */ + 3613 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 557 34 +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.aggressive_scheduled_block_id 6 +.noswbrkpt + 3614 "10111010" // LDA r23, [sp, #-20]; MOVXM r24, #1032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3615 "00010000" // /* MW 9 */ + 3616 "00000100" // /* MW 8 */ + 3617 "00001010" // /* MW 7 */ + 3618 "00000011" // /* MW 6 */ + 3619 "00000000" // /* MW 5 */ + 3620 "00000000" // /* MW 4 */ + 3621 "00100000" // /* MW 3 */ + 3622 "11011110" // /* MW 2 */ + 3623 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 641 44 first +.src_ref 2 "conv2d_bf16_params.h" 642 45 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 3624 "00100100" // LSHL r19, r25, r19; ADD.NC r30, r26, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3625 "11111111" // /* MW 5 */ + 3626 "00111010" // /* MW 4 */ + 3627 "10111111" // /* MW 3 */ + 3628 "11100111" // /* MW 2 */ + 3629 "11001100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 20 +.src_ref 2 "conv2d_bf16_params.h" 642 87 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 3630 "00011000" // MAC r7, r7, r19, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3631 "11100110" // /* MW 3 */ + 3632 "11001111" // /* MW 2 */ + 3633 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 55 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 3634 "01100100" // EXTEND.u8 r19, r22; MOV r23, #522 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3635 "00101001" // /* MW 5 */ + 3636 "10101000" // /* MW 4 */ + 3637 "00001011" // /* MW 3 */ + 3638 "11010010" // /* MW 2 */ + 3639 "10110100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 540 38 +.src_ref 2 "conv2d_bf16_params.h" 645 41 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3640 "01100100" // SEL.EQZ r22, r23, r24, r27; MOV r26, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3641 "00000001" // /* MW 5 */ + 3642 "00100001" // /* MW 4 */ + 3643 "01001101" // /* MW 3 */ + 3644 "10110000" // /* MW 2 */ + 3645 "10111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 540 38 first +.src_ref 2 "conv2d_bf16_params.h" 557 34 + 3646 "11100100" // NE r6, r6, r26; MOV r27, eh0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3647 "00111001" // /* MW 5 */ + 3648 "11000010" // /* MW 4 */ + 3649 "00011101" // /* MW 3 */ + 3650 "10110101" // /* MW 2 */ + 3651 "00110001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 99 first + 3652 "10011000" // AND r7, r7, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3653 "00100100" // /* MW 3 */ + 3654 "11001111" // /* MW 2 */ + 3655 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 557 34 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 + 3656 "11100100" // SEL.EQZ r23, r23, r15, r27; MOV r27, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3657 "01000001" // /* MW 5 */ + 3658 "10100110" // /* MW 4 */ + 3659 "01001101" // /* MW 3 */ + 3660 "11011110" // /* MW 2 */ + 3661 "10111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 119 +.src_ref 2 "conv2d_bf16_params.h" 655 23 first + 3662 "01100100" // SEL.EQZ r4, r5, r4, r27; MOV r18, #31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3663 "01111101" // /* MW 5 */ + 3664 "00100000" // /* MW 4 */ + 3665 "01001001" // /* MW 3 */ + 3666 "00001000" // /* MW 2 */ + 3667 "00101001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 119 first + 3668 "10011000" // AND r23, r23, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3669 "00100100" // /* MW 3 */ + 3670 "11101111" // /* MW 2 */ + 3671 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 540 15 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 + 3672 "10111010" // MOVA r30, #-288; LSHL r4, r16, r4; MOV r18, #-144 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3673 "01011000" // /* MW 9 */ + 3674 "01110000" // /* MW 8 */ + 3675 "01001111" // /* MW 7 */ + 3676 "01101110" // /* MW 6 */ + 3677 "01000010" // /* MW 5 */ + 3678 "00100000" // /* MW 4 */ + 3679 "00000000" // /* MW 3 */ + 3680 "00011110" // /* MW 2 */ + 3681 "11011100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first + 3682 "00011000" // SEL.EQZ r30, r30, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3683 "00100010" // /* MW 3 */ + 3684 "10111101" // /* MW 2 */ + 3685 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 85 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 + 3686 "10111010" // MOVA r5, #144; MUL r26, r23, r19; MOV r16, #288 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3687 "01011000" // /* MW 9 */ + 3688 "00100000" // /* MW 8 */ + 3689 "00001001" // /* MW 7 */ + 3690 "11111110" // /* MW 6 */ + 3691 "10101001" // /* MW 5 */ + 3692 "00101111" // /* MW 4 */ + 3693 "00000000" // /* MW 3 */ + 3694 "00000101" // /* MW 2 */ + 3695 "00010010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first + 3696 "00011000" // SEL.EQZ r16, r16, r5, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3697 "01010010" // /* MW 3 */ + 3698 "00100000" // /* MW 2 */ + 3699 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 126 21 first +.src_ref 2 "conv2d_bf16_params.h" 559 59 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 3700 "10100100" // MUL r24, r17, r4; ADD.NC r27, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3701 "11110010" // /* MW 5 */ + 3702 "10111101" // /* MW 4 */ + 3703 "11111101" // /* MW 3 */ + 3704 "00001001" // /* MW 2 */ + 3705 "10001110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 669 41 first +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.aggressive_scheduled_block_id 7 +.noswbrkpt + 3706 "11100100" // LSHL r16, r16, r31; MOV r27, el1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3707 "00111001" // /* MW 5 */ + 3708 "11000100" // /* MW 4 */ + 3709 "10111101" // /* MW 3 */ + 3710 "00111111" // /* MW 2 */ + 3711 "10000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 117 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3712 "01011100" // ST r27, [sp, #-36]; MUL r26, r14, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3713 "01011111" // /* MW 5 */ + 3714 "01101011" // /* MW 4 */ + 3715 "10110111" // /* MW 3 */ + 3716 "11101110" // /* MW 2 */ + 3717 "11111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 700 34 first + 3718 "00011000" // SEL.EQZ r2, r2, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3719 "00110010" // /* MW 3 */ + 3720 "10000100" // /* MW 2 */ + 3721 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 578 52 first + 3722 "10011000" // LTU r31, r13, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3723 "00001100" // /* MW 3 */ + 3724 "01111110" // /* MW 2 */ + 3725 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 559 92 first + 3726 "10011000" // MUL r24, r20, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3727 "10001111" // /* MW 3 */ + 3728 "00110001" // /* MW 2 */ + 3729 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 578 36 first + 3730 "10011000" // OR r27, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3731 "11000101" // /* MW 3 */ + 3732 "11110111" // /* MW 2 */ + 3733 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 610 64 first +.src_ref 2 "conv2d_bf16_params.h" 611 47 +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 629 82 + 3734 "01110110" // MOVA r3, #128; ST r20, [sp, #-20]; LSHL r28, r27, r1; MOV r20, #256 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3735 "01011000" // /* MW 11 */ + 3736 "00000000" // /* MW 10 */ + 3737 "10001001" // /* MW 9 */ + 3738 "11101110" // /* MW 8 */ + 3739 "11000000" // /* MW 7 */ + 3740 "10110111" // /* MW 6 */ + 3741 "10010101" // /* MW 5 */ + 3742 "11101110" // /* MW 4 */ + 3743 "00000111" // /* MW 3 */ + 3744 "00000011" // /* MW 2 */ + 3745 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 621 156 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.src_ref 2 "conv2d_bf16_params.h" 649 41 + 3746 "11100100" // SEL.EQZ r20, r3, r20, r27; MOV eh0, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3747 "00111001" // /* MW 5 */ + 3748 "10110111" // /* MW 4 */ + 3749 "01000000" // /* MW 3 */ + 3750 "00101000" // /* MW 2 */ + 3751 "00011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 645 41 + 3752 "01000100" // MOVXM r31, #1542 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3753 "00001100" // /* MW 5 */ + 3754 "10101100" // /* MW 4 */ + 3755 "00001111" // /* MW 3 */ + 3756 "00000000" // /* MW 2 */ + 3757 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 554 60 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 first + 3758 "00111010" // ST r4, [sp, #-24]; EQ r27, r15, r0; ADD.NC r4, r4, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3759 "11001001" // /* MW 9 */ + 3760 "00111111" // /* MW 8 */ + 3761 "10001001" // /* MW 7 */ + 3762 "00111100" // /* MW 6 */ + 3763 "10110000" // /* MW 5 */ + 3764 "00011111" // /* MW 4 */ + 3765 "10110000" // /* MW 3 */ + 3766 "00010010" // /* MW 2 */ + 3767 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 554 53 +.src_ref 2 "conv2d_bf16_params.h" 555 53 +.src_ref 2 "conv2d_bf16_params.h" 555 59 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 + 3768 "01110110" // MOVA m3, #-148; ST r4, [p2], #4; SEL.EQZ r31, r22, r31, r27; ADD.NC r22, r17, #-1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3769 "11001000" // /* MW 11 */ + 3770 "01111111" // /* MW 10 */ + 3771 "11001100" // /* MW 9 */ + 3772 "10010010" // /* MW 8 */ + 3773 "11111111" // /* MW 7 */ + 3774 "10101101" // /* MW 6 */ + 3775 "10010001" // /* MW 5 */ + 3776 "00011100" // /* MW 4 */ + 3777 "10000010" // /* MW 3 */ + 3778 "10001100" // /* MW 2 */ + 3779 "11101101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 555 53 +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 621 240 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 + 3780 "00111010" // ST r22, [p2], m3; LSHL r21, r21, r15; MOV r27, eh0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3781 "01111001" // /* MW 9 */ + 3782 "10001110" // /* MW 8 */ + 3783 "01110000" // /* MW 7 */ + 3784 "11101111" // /* MW 6 */ + 3785 "01010111" // /* MW 5 */ + 3786 "00101011" // /* MW 4 */ + 3787 "00110000" // /* MW 3 */ + 3788 "01011010" // /* MW 2 */ + 3789 "01001101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 53 first +.src_ref 2 "conv2d_bf16_params.h" 559 53 +.src_ref 2 "conv2d_bf16_params.h" 621 140 +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 645 41 + 3790 "01110110" // MOVA r25, #22; ST r26, [p2], #4; SUB r20, r20, r28; MOV m4, #88 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3791 "01011000" // /* MW 11 */ + 3792 "01011000" // /* MW 10 */ + 3793 "00000000" // /* MW 9 */ + 3794 "00001110" // /* MW 8 */ + 3795 "01001110" // /* MW 7 */ + 3796 "10101001" // /* MW 6 */ + 3797 "01010001" // /* MW 5 */ + 3798 "00011111" // /* MW 4 */ + 3799 "00000010" // /* MW 3 */ + 3800 "11011001" // /* MW 2 */ + 3801 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 559 53 first +.src_ref 2 "conv2d_bf16_params.h" 621 156 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 first +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 3802 "01011100" // ST r24, [p2], m4; SEL.EQZ r24, r31, r25, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3803 "00100100" // /* MW 5 */ + 3804 "11100011" // /* MW 4 */ + 3805 "00111111" // /* MW 3 */ + 3806 "01100010" // /* MW 2 */ + 3807 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 610 47 first +.src_ref 2 "conv2d_bf16_params.h" 621 222 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 +.aggressive_scheduled_block_id 8 +.noswbrkpt + 3808 "01110110" // LDA r27, [sp, #-32]; ST r28, [p2], #-8; SUB r28, r21, r28; MOV r27, r6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3809 "01111000" // /* MW 11 */ + 3810 "10010000" // /* MW 10 */ + 3811 "01101001" // /* MW 9 */ + 3812 "00001111" // /* MW 8 */ + 3813 "11001110" // /* MW 7 */ + 3814 "10101011" // /* MW 6 */ + 3815 "10010001" // /* MW 5 */ + 3816 "11101111" // /* MW 4 */ + 3817 "00100010" // /* MW 3 */ + 3818 "01101110" // /* MW 2 */ + 3819 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 649 41 +.src_ref 2 "conv2d_bf16_params.h" 655 23 first +.src_ref 2 "conv2d_bf16_params.h" 661 61 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3820 "10111010" // MOVA r19, #279; SEL.EQZ r28, r20, r28, r27; ADD.NC r20, r19, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3821 "11001000" // /* MW 9 */ + 3822 "11111111" // /* MW 8 */ + 3823 "10001100" // /* MW 7 */ + 3824 "00010010" // /* MW 6 */ + 3825 "11001110" // /* MW 5 */ + 3826 "00101001" // /* MW 4 */ + 3827 "00000000" // /* MW 3 */ + 3828 "11110011" // /* MW 2 */ + 3829 "00100010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 127 19 first +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 649 41 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 +.src_ref 2 "conv2d_bf16_params.h" 710 60 +.src_ref 2 "conv2d_bf16_params.h" 710 65 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3830 "10111010" // MOVA r29, #-72; MSC r30, r30, r29, r20; MOV r27, eh0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3831 "01111000" // /* MW 9 */ + 3832 "10001110" // /* MW 8 */ + 3833 "01110000" // /* MW 7 */ + 3834 "01110011" // /* MW 6 */ + 3835 "11101010" // /* MW 5 */ + 3836 "00111011" // /* MW 4 */ + 3837 "00000000" // /* MW 3 */ + 3838 "00011101" // /* MW 2 */ + 3839 "11110111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first +.src_ref 2 "conv2d_bf16_params.h" 679 23 first +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3840 "00101100" // LDA r27, [sp, #-28]; SEL.EQZ r18, r29, r18, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3841 "01000100" // /* MW 5 */ + 3842 "11001010" // /* MW 4 */ + 3843 "00101110" // /* MW 3 */ + 3844 "11101110" // /* MW 2 */ + 3845 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 621 156 first +.src_ref 2 "conv2d_bf16_params.h" 649 41 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3846 "10111010" // MOVA r31, #32; SEL.EQZ r19, r31, r19, r27; MOV r27, r6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3847 "01111000" // /* MW 9 */ + 3848 "10010000" // /* MW 8 */ + 3849 "01101001" // /* MW 7 */ + 3850 "10010011" // /* MW 6 */ + 3851 "00111001" // /* MW 5 */ + 3852 "00111111" // /* MW 4 */ + 3853 "00000000" // /* MW 3 */ + 3854 "00011111" // /* MW 2 */ + 3855 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first +.src_ref 2 "conv2d_bf16_params.h" 700 34 first +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3856 "00011000" // SEL.EQZ r2, r31, r2, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3857 "00100010" // /* MW 3 */ + 3858 "11000100" // /* MW 2 */ + 3859 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 629 82 first +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3860 "10011000" // SUB r21, r3, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3861 "01010001" // /* MW 3 */ + 3862 "11101011" // /* MW 2 */ + 3863 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 611 47 first +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3864 "00111010" // ST r3, [p2], #12; SEL.EQZ r2, r2, r15, r27; MOV r3, #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3865 "01011001" // /* MW 9 */ + 3866 "11000000" // /* MW 8 */ + 3867 "01101111" // /* MW 7 */ + 3868 "10010000" // /* MW 6 */ + 3869 "00100111" // /* MW 5 */ + 3870 "00000100" // /* MW 4 */ + 3871 "00110000" // /* MW 3 */ + 3872 "10001110" // /* MW 2 */ + 3873 "01000111" // /* MW 1 */ +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3874 "00011000" // SEL.EQZ r28, r28, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3875 "00110010" // /* MW 3 */ + 3876 "00111000" // /* MW 2 */ + 3877 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 643 22 first +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id first + 3878 "10011000" // MUL r31, r23, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3879 "01111111" // /* MW 3 */ + 3880 "11111110" // /* MW 2 */ + 3881 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.aggressive_scheduled_block_id 9 +.noswbrkpt + 3882 "00101100" // LDA r17, [sp, #-36]; SEL.EQZ r3, r28, r3, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3883 "01100100" // /* MW 5 */ + 3884 "00001100" // /* MW 4 */ + 3885 "00101110" // /* MW 3 */ + 3886 "11000110" // /* MW 2 */ + 3887 "11111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 621 47 first +.src_ref 2 "conv2d_bf16_params.h" 629 45 +.src_ref 2 "conv2d_bf16_params.h" 684 30 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3888 "00111010" // ST r3, [p2], #-8; MUL r18, r26, r18; MOV m1, #40 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3889 "01011001" // /* MW 9 */ + 3890 "00101000" // /* MW 8 */ + 3891 "10000000" // /* MW 7 */ + 3892 "01111100" // /* MW 6 */ + 3893 "00101001" // /* MW 5 */ + 3894 "00110101" // /* MW 4 */ + 3895 "00110000" // /* MW 3 */ + 3896 "10001110" // /* MW 2 */ + 3897 "01011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 629 45 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3898 "01011100" // ST r21, [p2], m1; SEL.EQZ r3, r2, r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3899 "11100100" // /* MW 5 */ + 3900 "00001101" // /* MW 4 */ + 3901 "00110001" // /* MW 3 */ + 3902 "01010110" // /* MW 2 */ + 3903 "01000101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 644 22 +.src_ref 2 "conv2d_bf16_params.h" 700 17 first +.src_ref 2 "conv2d_bf16_params.h" 705 50 +.src_ref 2 "conv2d_bf16_params.h" 705 61 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3904 "10111010" // LDA r0, [sp, #-16]; MUL r3, r3, r17; ADD.NC r21, r7, r30 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3905 "10101000" // /* MW 9 */ + 3906 "11111100" // /* MW 8 */ + 3907 "10101001" // /* MW 7 */ + 3908 "11111110" // /* MW 6 */ + 3909 "00111000" // /* MW 5 */ + 3910 "00000110" // /* MW 4 */ + 3911 "00100000" // /* MW 3 */ + 3912 "00000010" // /* MW 2 */ + 3913 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 645 38 first +.src_ref 2 "conv2d_bf16_params.h" 700 111 +.src_ref 2 "conv2d_bf16_params.h" 700 149 +.src_ref 2 "conv2d_bf16_params.h" 705 45 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3914 "01111010" // LDA r17, [sp, #-20]; ST r24, [p2], #4; MAC r3, r3, r20, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3915 "00000110" // /* MW 9 */ + 3916 "00000110" // /* MW 8 */ + 3917 "00000101" // /* MW 7 */ + 3918 "10000000" // /* MW 6 */ + 3919 "00010001" // /* MW 5 */ + 3920 "00011111" // /* MW 4 */ + 3921 "00100010" // /* MW 3 */ + 3922 "11000110" // /* MW 2 */ + 3923 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 644 14 +.src_ref 2 "conv2d_bf16_params.h" 649 38 first +.src_ref 2 "conv2d_bf16_params.h" 674 24 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3924 "00111010" // ST r19, [p2], #28; MOVXM r19, #65520 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3925 "00010001" // /* MW 9 */ + 3926 "11111000" // /* MW 8 */ + 3927 "01101111" // /* MW 7 */ + 3928 "00111110" // /* MW 6 */ + 3929 "00000000" // /* MW 5 */ + 3930 "00000000" // /* MW 4 */ + 3931 "00110000" // /* MW 3 */ + 3932 "11001110" // /* MW 2 */ + 3933 "01001111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 644 14 first +.src_ref 2 "conv2d_bf16_params.h" 662 61 +.src_ref 2 "conv2d_bf16_params.h" 664 38 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3934 "00111010" // ST r20, [p2], #4; AND r20, r31, r19; ADD.NC r2, r14, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3935 "11001001" // /* MW 9 */ + 3936 "10111111" // /* MW 8 */ + 3937 "01001011" // /* MW 7 */ + 3938 "10100100" // /* MW 6 */ + 3939 "01001001" // /* MW 5 */ + 3940 "00111111" // /* MW 4 */ + 3941 "00110000" // /* MW 3 */ + 3942 "11010010" // /* MW 2 */ + 3943 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 19 first +.src_ref 2 "conv2d_bf16_params.h" 663 22 first +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3944 "01011100" // ST r17, [p2], #4; MSC r21, r21, r2, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3945 "10011100" // /* MW 5 */ + 3946 "01010110" // /* MW 4 */ + 3947 "00110001" // /* MW 3 */ + 3948 "11000110" // /* MW 2 */ + 3949 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 126 21 first +.src_ref 2 "conv2d_bf16_params.h" 664 38 first + 3950 "01011100" // ST r2, [p2], #4; ADD r30, r30, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3951 "10000001" // /* MW 5 */ + 3952 "01111010" // /* MW 4 */ + 3953 "00111111" // /* MW 3 */ + 3954 "10001010" // /* MW 2 */ + 3955 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 126 21 +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id first + 3956 "01011100" // ST r30, [p2], #4; SUB r28, r16, r31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3957 "11100011" // /* MW 5 */ + 3958 "01110011" // /* MW 4 */ + 3959 "00111000" // /* MW 3 */ + 3960 "11111010" // /* MW 2 */ + 3961 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 127 19 first +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.src_ref 2 "conv2d_bf16_params.h" 675 38 +.src_ref 2 "conv2d_bf16_params.h" 696 37 +.aggressive_scheduled_block_id 10 +.noswbrkpt + 3962 "00111010" // ST r21, [p2], #4; MAC r31, r31, r22, r16; MOV dc0, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3963 "01011001" // /* MW 9 */ + 3964 "00000000" // /* MW 8 */ + 3965 "01100000" // /* MW 7 */ + 3966 "00110000" // /* MW 6 */ + 3967 "11111000" // /* MW 5 */ + 3968 "00101101" // /* MW 4 */ + 3969 "00110000" // /* MW 3 */ + 3970 "11010110" // /* MW 2 */ + 3971 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 644 22 first +.src_ref 2 "conv2d_bf16_params.h" 664 38 first +.src_ref 2 "conv2d_bf16_params.h" 705 45 +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3972 "00111010" // ST dc0, [p2], #4; MUL r2, r31, r0; ADD.NC r17, r17, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3973 "11001001" // /* MW 9 */ + 3974 "01111111" // /* MW 8 */ + 3975 "00101100" // /* MW 7 */ + 3976 "01111110" // /* MW 6 */ + 3977 "00100000" // /* MW 5 */ + 3978 "00111110" // /* MW 4 */ + 3979 "00110000" // /* MW 3 */ + 3980 "10001100" // /* MW 2 */ + 3981 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.src_ref 2 "conv2d_bf16_params.h" 705 50 first +.src_ref 2 "conv2d_bf16_params.h" 705 61 first + 3982 "01011100" // ST dc0, [p2], #4; MAC r14, r14, r17, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3983 "00001100" // /* MW 5 */ + 3984 "10111000" // /* MW 4 */ + 3985 "00111000" // /* MW 3 */ + 3986 "10001100" // /* MW 2 */ + 3987 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 +.src_ref 2 "conv2d_bf16_params.h" 674 24 first +.src_ref 2 "conv2d_bf16_params.h" 675 38 first +.src_ref 2 "conv2d_bf16_params.h" 682 38 +.src_ref 2 "conv2d_bf16_params.h" 718 48 +.src_ref 2 "conv2d_bf16_params.h" 720 50 + 3988 "00111010" // ST r22, [p2], #4; AND r16, r19, r2; MOV r2, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3989 "01011001" // /* MW 9 */ + 3990 "00000000" // /* MW 8 */ + 3991 "01001000" // /* MW 7 */ + 3992 "00100100" // /* MW 6 */ + 3993 "00000001" // /* MW 5 */ + 3994 "00100111" // /* MW 4 */ + 3995 "00110000" // /* MW 3 */ + 3996 "11011010" // /* MW 2 */ + 3997 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 675 38 + 3998 "00111010" // ST r28, [p2], #4; SUB r17, r2, r31; MOV r27, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3999 "01111001" // /* MW 9 */ + 4000 "00001110" // /* MW 8 */ + 4001 "01110000" // /* MW 7 */ + 4002 "10001111" // /* MW 6 */ + 4003 "00011111" // /* MW 5 */ + 4004 "00000101" // /* MW 4 */ + 4005 "00110000" // /* MW 3 */ + 4006 "11110010" // /* MW 2 */ + 4007 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 675 38 first +.src_ref 2 "conv2d_bf16_params.h" 707 61 first + 4008 "01011100" // ST r4, [p2], #4; MUL r14, r23, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4009 "11011111" // /* MW 5 */ + 4010 "10111001" // /* MW 4 */ + 4011 "00111011" // /* MW 3 */ + 4012 "10010010" // /* MW 2 */ + 4013 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 674 22 first +.src_ref 2 "conv2d_bf16_params.h" 675 38 + 4014 "00111010" // ST r17, [p2], #4; SUB r16, r16, r31; MOV r0, #6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4015 "01011001" // /* MW 9 */ + 4016 "00000110" // /* MW 8 */ + 4017 "00001000" // /* MW 7 */ + 4018 "10001100" // /* MW 6 */ + 4019 "00001111" // /* MW 5 */ + 4020 "00100001" // /* MW 4 */ + 4021 "00110000" // /* MW 3 */ + 4022 "11000110" // /* MW 2 */ + 4023 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 25 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 675 38 first +.src_ref 2 "conv2d_bf16_params.h" 679 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id first + 4024 "01110110" // MOVA r0, #72; ST r16, [p2], #4; SEL.EQZ r16, r13, r0, r27; MOV r27, r6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4025 "01111000" // /* MW 11 */ + 4026 "10010000" // /* MW 10 */ + 4027 "01101001" // /* MW 9 */ + 4028 "00010011" // /* MW 8 */ + 4029 "00000000" // /* MW 7 */ + 4030 "10011011" // /* MW 6 */ + 4031 "00010001" // /* MW 5 */ + 4032 "00011110" // /* MW 4 */ + 4033 "00000010" // /* MW 3 */ + 4034 "00000000" // /* MW 2 */ + 4035 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first +.src_ref 2 "conv2d_bf16_params.h" 679 23 first +.src_ref 2 "conv2d_bf16_params.h" 713 12 +.aggressive_scheduled_block_id 11 +.noswbrkpt + 4036 "00101100" // LDA r5, [sp, #-24]; SEL.EQZ r5, r0, r5, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4037 "10100100" // /* MW 5 */ + 4038 "00010100" // /* MW 4 */ + 4039 "00100000" // /* MW 3 */ + 4040 "00010110" // /* MW 2 */ + 4041 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 691 56 first +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4042 "10011000" // MUL r17, r5, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4043 "10101111" // /* MW 3 */ + 4044 "01100011" // /* MW 2 */ + 4045 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 675 38 +.src_ref 2 "conv2d_bf16_params.h" 675 38 first +.src_ref 2 "conv2d_bf16_params.h" 709 71 first + 4046 "00111010" // ST dc0, [p2], #4; LSHL r16, r3, r16; MOV m2, #-56 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4047 "01011001" // /* MW 9 */ + 4048 "11001000" // /* MW 8 */ + 4049 "00000111" // /* MW 7 */ + 4050 "01101101" // /* MW 6 */ + 4051 "00001000" // /* MW 5 */ + 4052 "00000111" // /* MW 4 */ + 4053 "00110000" // /* MW 3 */ + 4054 "10001100" // /* MW 2 */ + 4055 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 675 38 +.src_ref 2 "conv2d_bf16_params.h" 706 23 first +.src_ref 2 "conv2d_bf16_params.h" 706 28 +.src_ref 2 "conv2d_bf16_params.h" 709 76 + 4056 "01110110" // MOVA r3, #-29; ST dc0, [p2], m2; LSHL r15, r16, r15; ADD.NC r13, r3, #7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4057 "11001000" // /* MW 11 */ + 4058 "11000001" // /* MW 10 */ + 4059 "10101000" // /* MW 9 */ + 4060 "11101101" // /* MW 8 */ + 4061 "11110111" // /* MW 7 */ + 4062 "10100000" // /* MW 6 */ + 4063 "01100001" // /* MW 5 */ + 4064 "01001000" // /* MW 4 */ + 4065 "00000010" // /* MW 3 */ + 4066 "01100011" // /* MW 2 */ + 4067 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 682 38 first +.src_ref 2 "conv2d_bf16_params.h" 706 28 + 4068 "01011100" // ST r2, [p2], m0; LSHL r16, r13, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4069 "01111011" // /* MW 5 */ + 4070 "11000000" // /* MW 4 */ + 4071 "00110110" // /* MW 3 */ + 4072 "00001010" // /* MW 2 */ + 4073 "01000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 126 21 first +.src_ref 2 "conv2d_bf16_params.h" 696 37 first + 4074 "01011100" // ST r22, [p2], #4; ADD r3, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4075 "01000001" // /* MW 5 */ + 4076 "10001110" // /* MW 4 */ + 4077 "00111000" // /* MW 3 */ + 4078 "11011010" // /* MW 2 */ + 4079 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 127 19 first +.src_ref 2 "conv2d_bf16_params.h" 696 37 + 4080 "01011100" // ST r18, [p2], #4; MSC r18, r18, r17, r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4081 "10011100" // /* MW 5 */ + 4082 "11001000" // /* MW 4 */ + 4083 "00111000" // /* MW 3 */ + 4084 "11001010" // /* MW 2 */ + 4085 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 first +.src_ref 2 "conv2d_bf16_params.h" 713 12 first + 4086 "01011100" // ST r4, [p2], #4; LSHL r5, r5, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4087 "11011011" // /* MW 5 */ + 4088 "10010100" // /* MW 4 */ + 4089 "00110010" // /* MW 3 */ + 4090 "10010010" // /* MW 2 */ + 4091 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 +.src_ref 2 "conv2d_bf16_params.h" 706 28 +.src_ref 2 "conv2d_bf16_params.h" 706 28 first + 4092 "00111010" // ST r3, [p2], #4; ADD r3, r13, r16; MOV r0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4093 "01011001" // /* MW 9 */ + 4094 "11111101" // /* MW 8 */ + 4095 "00001111" // /* MW 7 */ + 4096 "00000100" // /* MW 6 */ + 4097 "00111000" // /* MW 5 */ + 4098 "00011010" // /* MW 4 */ + 4099 "00110000" // /* MW 3 */ + 4100 "10001110" // /* MW 2 */ + 4101 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 706 28 + 4102 "10011000" // ASHL r0, r3, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4103 "00001110" // /* MW 3 */ + 4104 "11000000" // /* MW 2 */ + 4105 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 first +.src_ref 2 "conv2d_bf16_params.h" 707 66 first + 4106 "01011100" // ST r18, [p2], #4; MUL r4, r14, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4107 "00011111" // /* MW 5 */ + 4108 "00010000" // /* MW 4 */ + 4109 "00110111" // /* MW 3 */ + 4110 "11001010" // /* MW 2 */ + 4111 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 +.src_ref 2 "conv2d_bf16_params.h" 709 96 first + 4112 "01011100" // ST dc0, [p2], #4; LSHL r3, r0, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4113 "00111011" // /* MW 5 */ + 4114 "00001100" // /* MW 4 */ + 4115 "00110000" // /* MW 3 */ + 4116 "10001100" // /* MW 2 */ + 4117 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 first +.src_ref 2 "conv2d_bf16_params.h" 709 90 + 4118 "11111010" // LDA r13, [sp, #-4]; ST dc0, [p2], #4; SUB r3, r15, r3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4119 "00110001" // /* MW 9 */ + 4120 "11000110" // /* MW 8 */ + 4121 "00000011" // /* MW 7 */ + 4122 "10000000" // /* MW 6 */ + 4123 "01100001" // /* MW 5 */ + 4124 "00011100" // /* MW 4 */ + 4125 "00100010" // /* MW 3 */ + 4126 "10110110" // /* MW 2 */ + 4127 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 707 50 first +.src_ref 2 "conv2d_bf16_params.h" 708 59 +.src_ref 2 "conv2d_bf16_params.h" 710 60 first +.src_ref 2 "conv2d_bf16_params.h" 710 65 first + 4128 "01110110" // LDA r14, [sp, #-8]; ST r4, [p2], #4; MAC r7, r7, r29, r0; ADD.NC r1, r0, #-1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4129 "11001000" // /* MW 11 */ + 4130 "00111111" // /* MW 10 */ + 4131 "00101000" // /* MW 9 */ + 4132 "00110000" // /* MW 8 */ + 4133 "01110000" // /* MW 7 */ + 4134 "10111010" // /* MW 6 */ + 4135 "10010001" // /* MW 5 */ + 4136 "00011100" // /* MW 4 */ + 4137 "00100010" // /* MW 3 */ + 4138 "00111010" // /* MW 2 */ + 4139 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 708 48 first +.src_ref 2 "conv2d_bf16_params.h" 713 12 first + 4140 "11111010" // LDA r15, [sp, #-12]; ST r1, [p2], #4; MUL r0, r5, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4141 "10101111" // /* MW 9 */ + 4142 "01000001" // /* MW 8 */ + 4143 "00000001" // /* MW 7 */ + 4144 "10000000" // /* MW 6 */ + 4145 "00110001" // /* MW 5 */ + 4146 "00011100" // /* MW 4 */ + 4147 "00100010" // /* MW 3 */ + 4148 "10111110" // /* MW 2 */ + 4149 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 709 50 first +.src_ref 2 "conv2d_bf16_params.h" 800 first + 4150 "01011100" // ST r3, [p2], #4; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 4151 "00000000" // /* MW 5 */ + 4152 "01010000" // /* MW 4 */ + 4153 "00110000" // /* MW 3 */ + 4154 "10001110" // /* MW 2 */ + 4155 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 710 50 first +.delay_slot + 4156 "10011000" // ST r7, [p2], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4157 "11110001" // /* MW 3 */ + 4158 "01011100" // /* MW 2 */ + 4159 "00001010" // /* MW 1 */ +.delay_slot + 4160 "10011000" // ST r0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4161 "00010001" // /* MW 3 */ + 4162 "00011100" // /* MW 2 */ + 4163 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 718 48 first +.delay_slot + 4164 "10011000" // ST r2, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4165 "01010001" // /* MW 3 */ + 4166 "00011100" // /* MW 2 */ + 4167 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 718 48 +.delay_slot + 4168 "10011000" // ST r2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4169 "01010001" // /* MW 3 */ + 4170 "00000100" // /* MW 2 */ + 4171 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 720 50 first +.src_ref 2 "conv2d_bf16_params.h" 800 first +.delay_slot + 4172 "00111010" // ST r2, [p2, #4]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4173 "01110001" // /* MW 9 */ + 4174 "00000000" // /* MW 8 */ + 4175 "00000000" // /* MW 7 */ + 4176 "00000000" // /* MW 6 */ + 4177 "11111110" // /* MW 5 */ + 4178 "00111111" // /* MW 4 */ + 4179 "00110000" // /* MW 3 */ + 4180 "10001010" // /* MW 2 */ +.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh__end +.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_end0 + 4181 "01000010" // /* MW 1 */ +.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_begin0 +.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams +.function convert_bf16_to_bfp16 _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams +.src_ref 3 "utils.h" 526 11 +.src_ref 3 "utils.h" 531 4 +.src_ref 2 "conv2d_bf16.h" 689 first +.src_ref 2 "conv2d_bf16.h" 698 28 +.src_ref 2 "conv2d_bf16.h" 704 12 +.src_ref 2 "conv2d_bf16.h" 707 12 +.src_ref 2 "conv2d_bf16.h" 707 30 +.function_start + 4192 "01110110" // MOVA dc0, #0; MOVS p2, p1; MOVX r24, #0; MOV r0, p2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4193 "01111000" // /* MW 11 */ + 4194 "01100000" // /* MW 10 */ + 4195 "00001010" // /* MW 9 */ + 4196 "00001000" // /* MW 8 */ + 4197 "10000000" // /* MW 7 */ + 4198 "00000001" // /* MW 6 */ + 4199 "10001011" // /* MW 5 */ + 4200 "10000100" // /* MW 4 */ + 4201 "10000010" // /* MW 3 */ + 4202 "00000011" // /* MW 2 */ + 4203 "00000000" // /* MW 1 */ +.src_ref 3 "utils.h" 526 11 +.src_ref 3 "utils.h" 526 11 +.src_ref 2 "conv2d_bf16.h" 698 28 first +.src_ref 2 "conv2d_bf16.h" 704 12 first +.src_ref 2 "conv2d_bf16.h" 707 12 +.src_ref 2 "conv2d_bf16.h" 707 30 + 4204 "01111110" // MOVA dj1, #0; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc1, dc0; MOVX r26, #0; ADD.NC p3, r0, #4 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 4205 "01100000" // /* MW 13 */ + 4206 "00001001" // /* MW 12 */ + 4207 "00100000" // /* MW 11 */ + 4208 "00100001" // /* MW 10 */ + 4209 "00000000" // /* MW 9 */ + 4210 "00110110" // /* MW 8 */ + 4211 "00000001" // /* MW 7 */ + 4212 "00110100" // /* MW 6 */ + 4213 "00101000" // /* MW 5 */ + 4214 "00101000" // /* MW 4 */ + 4215 "10001000" // /* MW 3 */ + 4216 "00000110" // /* MW 2 */ + 4217 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 698 28 +.src_ref 2 "conv2d_bf16.h" 702 37 + 4218 "10111010" // LDA dn1, [p3], #4; MOVXM p4, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4219 "00010000" // /* MW 9 */ + 4220 "00110100" // /* MW 8 */ + 4221 "00110010" // /* MW 7 */ + 4222 "11110010" // /* MW 6 */ + 4223 "00000001" // /* MW 5 */ + 4224 "00000000" // /* MW 4 */ + 4225 "11010000" // /* MW 3 */ + 4226 "10010100" // /* MW 2 */ + 4227 "01100011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 698 43 +.src_ref 2 "conv2d_bf16.h" 702 4 first + 4228 "10111010" // LDA m1, [p3], #4; MOVXM ls, #4336 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4229 "00010000" // /* MW 9 */ + 4230 "01111000" // /* MW 8 */ + 4231 "01111000" // /* MW 7 */ + 4232 "00000100" // /* MW 6 */ + 4233 "00000000" // /* MW 5 */ + 4234 "00000000" // /* MW 4 */ + 4235 "11010000" // /* MW 3 */ + 4236 "10010000" // /* MW 2 */ + 4237 "01100011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 699 43 first +.src_ref 2 "conv2d_bf16.h" 702 4 + 4238 "10111010" // LDA m0, [p3]; MOVXM le, #4384 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4239 "00010000" // /* MW 9 */ + 4240 "10010000" // /* MW 8 */ + 4241 "10111000" // /* MW 7 */ + 4242 "00000101" // /* MW 6 */ + 4243 "00000000" // /* MW 5 */ + 4244 "00000000" // /* MW 4 */ + 4245 "11010000" // /* MW 3 */ + 4246 "10000000" // /* MW 2 */ + 4247 "01100000" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 +.src_ref 2 "conv2d_bf16.h" 702 37 first + 4248 "01010100" // LDA r0, [p3, #-12]; MOV dj0, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4249 "00000001" // /* MW 5 */ + 4250 "00000000" // /* MW 4 */ + 4251 "11010001" // /* MW 3 */ + 4252 "10000010" // /* MW 2 */ + 4253 "01111010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 702 37 + 4254 "10011000" // LDA.s8 r1, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4255 "00100010" // /* MW 3 */ + 4256 "00000100" // /* MW 2 */ + 4257 "00000100" // /* MW 1 */ + 4258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4259 "00000000" // /* MW 1 */ + 4260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4261 "00000000" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 +.src_ref 2 "conv2d_bf16.h" 705 66 first + 4262 "11110100" // VLDB.POP.512 x1, [p0, lf0, r24]; MOV dn0, dn1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4263 "00000001" // /* MW 5 */ + 4264 "10000101" // /* MW 4 */ + 4265 "10000000" // /* MW 3 */ + 4266 "00001010" // /* MW 2 */ + 4267 "00000000" // /* MW 1 */ +.src_ref 3 "utils.h" 526 11 first + 4268 "00011000" // VLDB.POP.512.2D x0, [p0, lf0, r24, d1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4269 "00010100" // /* MW 3 */ + 4270 "00110000" // /* MW 2 */ + 4271 "00111110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 704 12 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4272 "00011000" // VLDB.FILL.512 [p0, lf0, r24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4273 "00010100" // /* MW 3 */ + 4274 "00010100" // /* MW 2 */ + 4275 "00111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 702 4 first +.src_ref 2 "conv2d_bf16.h" 705 66 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4276 "00110100" // VLDB.POP.512 x1, [p0, lf0, r24]; ADD.NC lc, r0, #-3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4277 "11111101" // /* MW 5 */ + 4278 "11100000" // /* MW 4 */ + 4279 "10001010" // /* MW 3 */ + 4280 "00001010" // /* MW 2 */ + 4281 "00000000" // /* MW 1 */ +.src_ref 3 "utils.h" 526 11 first +.src_ref 2 "conv2d_bf16.h" 707 12 +.src_ref 2 "conv2d_bf16.h" 707 30 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4282 "00011100" // VLDB.POP.512.2D x0, [p0, lf0, r24, d1]; MOVX crRnd, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4283 "00000000" // /* MW 5 */ + 4284 "11110101" // /* MW 4 */ + 4285 "10000000" // /* MW 3 */ + 4286 "00000010" // /* MW 2 */ + 4287 "11000110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 704 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4288 "00011000" // VLDB.FILL.512 [p0, lf0, r24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4289 "00010100" // /* MW 3 */ + 4290 "00010100" // /* MW 2 */ + 4291 "00111100" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4293 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 705 66 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4294 "10111010" // NOPA; VLDB.POP.512 x1, [p0, lf0, r24]; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4295 "01111110" // /* MW 9 */ + 4296 "10100101" // /* MW 8 */ + 4297 "00000001" // /* MW 7 */ + 4298 "00000000" // /* MW 6 */ + 4299 "01010100" // /* MW 5 */ + 4300 "00000000" // /* MW 4 */ + 4301 "11110000" // /* MW 3 */ + 4302 "00101100" // /* MW 2 */ + 4303 "00000000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 first +.src_ref 3 "utils.h" 526 11 first +.src_ref 2 "conv2d_bf16.h" 705 30 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4304 "11100001" // NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];NOPS; NOPX; VCONV.fp32.bf16 cml0, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4305 "00000000" // /* MW 15 */ + 4306 "00000000" // /* MW 14 */ + 4307 "01111000" // /* MW 13 */ + 4308 "11000101" // /* MW 12 */ + 4309 "00000001" // /* MW 11 */ + 4310 "00000000" // /* MW 10 */ + 4311 "00000000" // /* MW 9 */ + 4312 "00000000" // /* MW 8 */ + 4313 "01011011" // /* MW 7 */ + 4314 "00000001" // /* MW 6 */ + 4315 "00101000" // /* MW 5 */ + 4316 "01100000" // /* MW 4 */ + 4317 "11111100" // /* MW 3 */ + 4318 "00101100" // /* MW 2 */ + 4319 "00000000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 +.src_ref 2 "conv2d_bf16.h" 706 18 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4320 "11100001" // NOPA; NOPB; NOPS; NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4321 "00000000" // /* MW 15 */ + 4322 "00000000" // /* MW 14 */ + 4323 "01111000" // /* MW 13 */ + 4324 "11000101" // /* MW 12 */ + 4325 "01000000" // /* MW 11 */ + 4326 "00000000" // /* MW 10 */ + 4327 "00000000" // /* MW 9 */ + 4328 "00000000" // /* MW 8 */ + 4329 "01011011" // /* MW 7 */ + 4330 "00000001" // /* MW 6 */ + 4331 "00100000" // /* MW 5 */ + 4332 "00000000" // /* MW 4 */ + 4333 "11110000" // /* MW 3 */ + 4334 "00101100" // /* MW 2 */ + 4335 "00000000" // /* MW 1 */ +.label ZLS_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_144 +.src_ref 2 "conv2d_bf16.h" 704 12 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 4336 "11100001" // NOPA; VLDB.FILL.512 [p0, lf0, r24]; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4337 "00000000" // /* MW 15 */ + 4338 "00000000" // /* MW 14 */ + 4339 "01111000" // /* MW 13 */ + 4340 "10100101" // /* MW 12 */ + 4341 "00000001" // /* MW 11 */ + 4342 "00000000" // /* MW 10 */ + 4343 "00000000" // /* MW 9 */ + 4344 "00000000" // /* MW 8 */ + 4345 "01011011" // /* MW 7 */ + 4346 "00000001" // /* MW 6 */ + 4347 "00101000" // /* MW 5 */ + 4348 "00101000" // /* MW 4 */ + 4349 "11111000" // /* MW 3 */ + 4350 "00101100" // /* MW 2 */ + 4351 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 705 66 first +.src_ref 2 "conv2d_bf16.h" 707 12 first +.src_ref 2 "conv2d_bf16.h" 707 30 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4352 "11100001" // NOPA; VLDB.POP.512 x1, [p0, lf0, r24];VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4353 "00000000" // /* MW 15 */ + 4354 "00000000" // /* MW 14 */ + 4355 "01111000" // /* MW 13 */ + 4356 "10100101" // /* MW 12 */ + 4357 "00000001" // /* MW 11 */ + 4358 "00000000" // /* MW 10 */ + 4359 "00000000" // /* MW 9 */ + 4360 "00000000" // /* MW 8 */ + 4361 "00000011" // /* MW 7 */ + 4362 "10000000" // /* MW 6 */ + 4363 "10101101" // /* MW 5 */ + 4364 "00000000" // /* MW 4 */ + 4365 "11110000" // /* MW 3 */ + 4366 "00101100" // /* MW 2 */ + 4367 "00000000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 first +.src_ref 3 "utils.h" 526 11 first +.src_ref 2 "conv2d_bf16.h" 705 30 +.src_ref 2 "conv2d_bf16.h" 708 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4368 "11100001" // NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];VST.FLUSH.512.CONV [p2, sf, r26];NOPX; VCONV.fp32.bf16 cml0, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4369 "00000000" // /* MW 15 */ + 4370 "00000000" // /* MW 14 */ + 4371 "01111000" // /* MW 13 */ + 4372 "11000101" // /* MW 12 */ + 4373 "00000001" // /* MW 11 */ + 4374 "00000000" // /* MW 10 */ + 4375 "00000000" // /* MW 9 */ + 4376 "00000000" // /* MW 8 */ + 4377 "00000011" // /* MW 7 */ + 4378 "00000000" // /* MW 6 */ + 4379 "00101001" // /* MW 5 */ + 4380 "01100000" // /* MW 4 */ + 4381 "11111100" // /* MW 3 */ + 4382 "00101100" // /* MW 2 */ + 4383 "00000000" // /* MW 1 */ +.label ZLE_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_192 +.src_ref 3 "kernel_helpers.h" 350 11 +.src_ref 3 "utils.h" 531 4 first +.src_ref 2 "conv2d_bf16.h" 706 18 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4384 "11100001" // NOPA; NOPB; VST.FLUSH.512.CONV.2D [p2, sf, r26, d0];NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4385 "00000000" // /* MW 15 */ + 4386 "00000000" // /* MW 14 */ + 4387 "01111000" // /* MW 13 */ + 4388 "11000101" // /* MW 12 */ + 4389 "01000000" // /* MW 11 */ + 4390 "00000000" // /* MW 10 */ + 4391 "00000000" // /* MW 9 */ + 4392 "00000000" // /* MW 8 */ + 4393 "00000011" // /* MW 7 */ + 4394 "00000000" // /* MW 6 */ + 4395 "00100011" // /* MW 5 */ + 4396 "00000000" // /* MW 4 */ + 4397 "11110000" // /* MW 3 */ + 4398 "00101100" // /* MW 2 */ + 4399 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 4400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4401 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 707 12 first +.src_ref 2 "conv2d_bf16.h" 707 30 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4402 "00011000" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4403 "00000011" // /* MW 3 */ + 4404 "10000000" // /* MW 2 */ + 4405 "00001101" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 first +.src_ref 2 "conv2d_bf16.h" 705 30 first +.src_ref 2 "conv2d_bf16.h" 708 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4406 "00000010" // VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4407 "01110000" // /* MW 7 */ + 4408 "11000101" // /* MW 6 */ + 4409 "00000001" // /* MW 5 */ + 4410 "00000000" // /* MW 4 */ + 4411 "01100000" // /* MW 3 */ + 4412 "00000000" // /* MW 2 */ + 4413 "00100000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 +.src_ref 2 "conv2d_bf16.h" 706 18 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4414 "11111000" // VCONV.fp32.bf16 cmh0, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4415 "10001010" // /* MW 3 */ + 4416 "10000001" // /* MW 2 */ + 4417 "00011000" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 first + 4418 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4419 "00000011" // /* MW 3 */ + 4420 "00000000" // /* MW 2 */ + 4421 "00001011" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 first +.src_ref 2 "conv2d_bf16.h" 705 30 first +.src_ref 2 "conv2d_bf16.h" 707 12 first +.src_ref 2 "conv2d_bf16.h" 707 30 first + 4422 "00000010" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4423 "01110000" // /* MW 7 */ + 4424 "11000101" // /* MW 6 */ + 4425 "00000001" // /* MW 5 */ + 4426 "00000000" // /* MW 4 */ + 4427 "01100000" // /* MW 3 */ + 4428 "00000000" // /* MW 2 */ + 4429 "10110000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 +.src_ref 2 "conv2d_bf16.h" 706 18 first +.src_ref 2 "conv2d_bf16.h" 708 12 first + 4430 "00000010" // VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cmh0, x0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4431 "01110000" // /* MW 7 */ + 4432 "11000101" // /* MW 6 */ + 4433 "01000000" // /* MW 5 */ + 4434 "00000000" // /* MW 4 */ + 4435 "01100000" // /* MW 3 */ + 4436 "00000000" // /* MW 2 */ + 4437 "00100000" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 first + 4438 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4439 "00000011" // /* MW 3 */ + 4440 "00000000" // /* MW 2 */ + 4441 "00001011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 707 12 first +.src_ref 2 "conv2d_bf16.h" 707 30 first +.src_ref 2 "conv2d_bf16.h" 723 first + 4442 "01011100" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 4443 "00000000" // /* MW 5 */ + 4444 "01010000" // /* MW 4 */ + 4445 "01100000" // /* MW 3 */ + 4446 "00000000" // /* MW 2 */ + 4447 "10110000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 708 12 first +.delay_slot + 4448 "00011000" // VST.FLUSH.512.CONV [p2, sf, r26] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4449 "00000011" // /* MW 3 */ + 4450 "00000000" // /* MW 2 */ + 4451 "00001001" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 first +.delay_slot + 4452 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4453 "00000011" // /* MW 3 */ + 4454 "00000000" // /* MW 2 */ + 4455 "00001011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4457 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4459 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams__end +.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_end0 + 4461 "00000000" // /* MW 1 */ +.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_begin0 +.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params +.function conv2d_bf16<(unsigned char)'\x01', (act_t)0, bfloat16, bfloat16, bfloat16, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::async, adf::addressing::linear, adf::margin<0U> >, false, false, true, false> _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 2 "conv2d_bf16.h" 1836 +.src_ref 2 "conv2d_bf16.h" 1836 first +.src_ref 2 "conv2d_bf16.h" 1836 first +.src_ref 2 "conv2d_bf16_params.h" 240 68 +.function_start + 4464 "01111110" // MOVA m0, #-81; PADDB [p3], #64; MOVS p4, p2; PADDXM [sp], #128 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 4465 "01100000" // /* MW 13 */ + 4466 "00010001" // /* MW 12 */ + 4467 "10010001" // /* MW 11 */ + 4468 "00001110" // /* MW 10 */ + 4469 "00000000" // /* MW 9 */ + 4470 "00000000" // /* MW 8 */ + 4471 "10000000" // /* MW 7 */ + 4472 "00000000" // /* MW 6 */ + 4473 "00100000" // /* MW 5 */ + 4474 "00111111" // /* MW 4 */ + 4475 "10000110" // /* MW 3 */ + 4476 "11100000" // /* MW 2 */ + 4477 "11110101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1836 +.src_ref 2 "conv2d_bf16_params.h" 241 95 +.src_ref 2 "conv2d_bf16_params.h" 242 153 +.src_ref 2 "conv2d_bf16_params.h" 250 129 + 4478 "01110110" // MOVA r19, #3; ST r12, [sp, #-16]; MOVX r28, #-24; MOV r17, p3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4479 "01111000" // /* MW 11 */ + 4480 "01100000" // /* MW 10 */ + 4481 "00101011" // /* MW 9 */ + 4482 "00001010" // /* MW 8 */ + 4483 "11000101" // /* MW 7 */ + 4484 "10111111" // /* MW 6 */ + 4485 "10010101" // /* MW 5 */ + 4486 "11110001" // /* MW 4 */ + 4487 "00000111" // /* MW 3 */ + 4488 "01110011" // /* MW 2 */ + 4489 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 866 21 +.src_ref 2 "conv2d_bf16.h" 876 12 +.src_ref 2 "conv2d_bf16.h" 876 51 +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16.h" 1836 +.src_ref 2 "conv2d_bf16_params.h" 242 41 +.src_ref 2 "conv2d_bf16_params.h" 242 54 +.src_ref 2 "conv2d_bf16_params.h" 242 94 +.src_ref 2 "conv2d_bf16_params.h" 242 100 +.src_ref 2 "conv2d_bf16_params.h" 242 153 +.src_ref 2 "conv2d_bf16_params.h" 243 80 +.src_ref 2 "conv2d_bf16_params.h" 245 28 +.src_ref 2 "conv2d_bf16_params.h" 250 106 +.src_ref 2 "conv2d_bf16_params.h" 250 133 + 4490 "01110110" // MOVA r25, #0; ST r17, [sp, #-40]; MOVX r17, #1; ADD.NC p2, r17, #28 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4491 "00001000" // /* MW 11 */ + 4492 "01000111" // /* MW 10 */ + 4493 "00110100" // /* MW 9 */ + 4494 "00101001" // /* MW 8 */ + 4495 "00010000" // /* MW 7 */ + 4496 "10000001" // /* MW 6 */ + 4497 "00110101" // /* MW 5 */ + 4498 "11011010" // /* MW 4 */ + 4499 "00000111" // /* MW 3 */ + 4500 "00011001" // /* MW 2 */ + 4501 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 240 68 +.src_ref 2 "conv2d_bf16_params.h" 240 68 first + 4502 "01110110" // LDA r18, [p2]; ST r9, [sp, #-12]; MOVXM r29, #16777216 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4503 "00010000" // /* MW 11 */ + 4504 "00000000" // /* MW 10 */ + 4505 "10101000" // /* MW 9 */ + 4506 "00000011" // /* MW 8 */ + 4507 "01000000" // /* MW 7 */ + 4508 "10000000" // /* MW 6 */ + 4509 "00110101" // /* MW 5 */ + 4510 "11110101" // /* MW 4 */ + 4511 "11010111" // /* MW 3 */ + 4512 "11001010" // /* MW 2 */ + 4513 "01000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 171 +.src_ref 2 "conv2d_bf16_params.h" 245 20 + 4514 "01110110" // MOVA m6, #88; ST r14, [sp, #-4]; MOVXM r31, #33554431 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4515 "10010000" // /* MW 11 */ + 4516 "11111111" // /* MW 10 */ + 4517 "11101111" // /* MW 9 */ + 4518 "11111111" // /* MW 8 */ + 4519 "01111111" // /* MW 7 */ + 4520 "10000000" // /* MW 6 */ + 4521 "11010101" // /* MW 5 */ + 4522 "11111101" // /* MW 4 */ + 4523 "10000111" // /* MW 3 */ + 4524 "00011000" // /* MW 2 */ + 4525 "00001011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 39 +.src_ref 2 "conv2d_bf16_params.h" 244 82 +.src_ref 2 "conv2d_bf16_params.h" 244 156 +.src_ref 2 "conv2d_bf16_params.h" 249 87 + 4526 "01110110" // MOVA r20, #5; ST r13, [sp, #-32]; MOVX r22, #8; MOV m4, #-20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4527 "01011000" // /* MW 11 */ + 4528 "11101100" // /* MW 10 */ + 4529 "00000111" // /* MW 9 */ + 4530 "00001010" // /* MW 8 */ + 4531 "01100001" // /* MW 7 */ + 4532 "10000001" // /* MW 6 */ + 4533 "10110101" // /* MW 5 */ + 4534 "11100001" // /* MW 4 */ + 4535 "00000111" // /* MW 3 */ + 4536 "10110100" // /* MW 2 */ + 4537 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 243 39 +.src_ref 2 "conv2d_bf16_params.h" 244 87 +.src_ref 2 "conv2d_bf16_params.h" 250 71 + 4538 "01110110" // MOVA r21, #12; ST r15, [sp, #-20]; MOVX r23, #254; MOV m5, #-60 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4539 "01011000" // /* MW 11 */ + 4540 "11000100" // /* MW 10 */ + 4541 "10000111" // /* MW 9 */ + 4542 "11001010" // /* MW 8 */ + 4543 "01110111" // /* MW 7 */ + 4544 "10000111" // /* MW 6 */ + 4545 "11110101" // /* MW 5 */ + 4546 "11101101" // /* MW 4 */ + 4547 "00000111" // /* MW 3 */ + 4548 "10010101" // /* MW 2 */ + 4549 "00000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 44 + 4550 "00000010" // ST p7, [sp, #-8]; MOV m7, #64 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4551 "01010000" // /* MW 7 */ + 4552 "01000000" // /* MW 6 */ + 4553 "10000000" // /* MW 5 */ + 4554 "00000011" // /* MW 4 */ + 4555 "10110000" // /* MW 3 */ + 4556 "01110011" // /* MW 2 */ + 4557 "11111111" // /* MW 1 */ + 4558 "10011000" // ST lr, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4559 "00111101" // /* MW 3 */ + 4560 "11100100" // /* MW 2 */ + 4561 "00001111" // /* MW 1 */ + 4562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4563 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 240 68 + 4564 "10011000" // ADD r12, r29, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4565 "00100000" // /* MW 3 */ + 4566 "01011001" // /* MW 2 */ + 4567 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 240 68 +.src_ref 2 "conv2d_bf16_params.h" 241 95 first + 4568 "01011100" // ST r12, [p2], m0; LSHL r29, r12, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4569 "10011011" // /* MW 5 */ + 4570 "01110111" // /* MW 4 */ + 4571 "00110110" // /* MW 3 */ + 4572 "00110010" // /* MW 2 */ + 4573 "01000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 54 first +.src_ref 2 "conv2d_bf16_params.h" 242 94 first + 4574 "00101100" // LDA.u8 r30, [p2], #-3; EQ r28, r29, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4575 "00101111" // /* MW 5 */ + 4576 "11110010" // /* MW 4 */ + 4577 "01011110" // /* MW 3 */ + 4578 "11111001" // /* MW 2 */ + 4579 "01011011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 245 20 first + 4580 "10011000" // LDA.u8 r9, [p2], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4581 "00101010" // /* MW 3 */ + 4582 "11001001" // /* MW 2 */ + 4583 "00000010" // /* MW 1 */ + 4584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4585 "00000000" // /* MW 1 */ + 4586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4587 "00000000" // /* MW 1 */ + 4588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4589 "00000000" // /* MW 1 */ + 4590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4591 "00000000" // /* MW 1 */ + 4592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4593 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 174 first + 4594 "10011000" // LTU r27, r29, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4595 "11101100" // /* MW 3 */ + 4596 "01110111" // /* MW 2 */ + 4597 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 153 + 4598 "00011000" // SEL.EQZ r14, r25, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4599 "00110010" // /* MW 3 */ + 4600 "01011101" // /* MW 2 */ + 4601 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 171 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4602 "10011000" // LTU r27, r31, r12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4603 "11001100" // /* MW 3 */ + 4604 "11110110" // /* MW 2 */ + 4605 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 241 95 first +.src_ref 2 "conv2d_bf16_params.h" 242 39 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4606 "00101100" // ST.s8 r28, [p2], m4; EQ r13, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4607 "11001111" // /* MW 5 */ + 4608 "10110111" // /* MW 4 */ + 4609 "11101110" // /* MW 3 */ + 4610 "01110000" // /* MW 2 */ + 4611 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 100 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4612 "10011000" // LSHL r31, r13, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4613 "00011101" // /* MW 3 */ + 4614 "01111111" // /* MW 2 */ + 4615 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 153 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4616 "00011000" // SEL.EQZ r12, r25, r14, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4617 "11100010" // /* MW 3 */ + 4618 "01011000" // /* MW 2 */ + 4619 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 98 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4620 "10011000" // OR r28, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4621 "11000101" // /* MW 3 */ + 4622 "11111001" // /* MW 2 */ + 4623 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 54 +.src_ref 2 "conv2d_bf16_params.h" 242 151 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4624 "10100100" // LTU r27, r17, r30; ADD.NC r28, r28, r12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4625 "01100010" // /* MW 5 */ + 4626 "00111100" // /* MW 4 */ + 4627 "10011110" // /* MW 3 */ + 4628 "11111101" // /* MW 2 */ + 4629 "10001110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 41 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4630 "00011000" // SEL.EQZ r28, r25, r28, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4631 "11000010" // /* MW 3 */ + 4632 "01111001" // /* MW 2 */ + 4633 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 243 80 first + 4634 "10011000" // LTU r31, r17, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4635 "11001100" // /* MW 3 */ + 4636 "01111111" // /* MW 2 */ + 4637 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 117 first +.src_ref 2 "conv2d_bf16_params.h" 243 39 + 4638 "01011100" // ST r31, [p2], m5; NE r29, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4639 "11010001" // /* MW 5 */ + 4640 "11110111" // /* MW 4 */ + 4641 "00111110" // /* MW 3 */ + 4642 "01111110" // /* MW 2 */ + 4643 "01010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 44 first +.src_ref 2 "conv2d_bf16_params.h" 245 28 first + 4644 "00101100" // LDA.u8 r30, [p2], m7; NE r12, r9, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4645 "00110001" // /* MW 5 */ + 4646 "10110010" // /* MW 4 */ + 4647 "01010100" // /* MW 3 */ + 4648 "01111001" // /* MW 2 */ + 4649 "01011101" // /* MW 1 */ + 4650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4651 "00000000" // /* MW 1 */ + 4652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4653 "00000000" // /* MW 1 */ + 4654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4655 "00000000" // /* MW 1 */ + 4656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4657 "00000000" // /* MW 1 */ + 4658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4659 "00000000" // /* MW 1 */ + 4660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4661 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 82 +.src_ref 2 "conv2d_bf16_params.h" 244 87 + 4662 "00100100" // NE r22, r30, r22; ADD.NC r31, r30, #-4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4663 "11111100" // /* MW 5 */ + 4664 "10111110" // /* MW 4 */ + 4665 "00011111" // /* MW 3 */ + 4666 "10101101" // /* MW 2 */ + 4667 "11110101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 245 33 + 4668 "10000100" // JNZ r12, #4736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4736 delay_slots=5 */ + 4669 "00000001" // /* MW 5 */ + 4670 "01000000" // /* MW 4 */ + 4671 "01000000" // /* MW 3 */ + 4672 "00001001" // /* MW 2 */ + 4673 "01100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 156 +.delay_slot + 4674 "10011000" // NE r9, r30, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4675 "01001000" // /* MW 3 */ + 4676 "10010011" // /* MW 2 */ + 4677 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 87 +.delay_slot + 4678 "00011000" // EXTEND.u8 r31, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4679 "10010000" // /* MW 3 */ + 4680 "11111110" // /* MW 2 */ + 4681 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 87 +.delay_slot + 4682 "10011000" // AND r22, r9, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4683 "01100100" // /* MW 3 */ + 4684 "01101101" // /* MW 2 */ + 4685 "00010010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 87 +.delay_slot + 4686 "10011000" // LTU r23, r31, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4687 "01111100" // /* MW 3 */ + 4688 "11101111" // /* MW 2 */ + 4689 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 132 +.delay_slot + 4690 "10011000" // AND r16, r23, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4691 "01100100" // /* MW 3 */ + 4692 "11100001" // /* MW 2 */ + 4693 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 245 33 + 4694 "10000100" // JNZ r29, #4736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4736 delay_slots=5 */ + 4695 "00000001" // /* MW 5 */ + 4696 "01000000" // /* MW 4 */ + 4697 "01000000" // /* MW 3 */ + 4698 "00001001" // /* MW 2 */ + 4699 "11101000" // /* MW 1 */ +.delay_slot + 4700 "10011000" // ST p6, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4701 "00011101" // /* MW 3 */ + 4702 "11101011" // /* MW 2 */ + 4703 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4704 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4705 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4707 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4709 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4711 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 +.src_ref 2 "conv2d_bf16.h" 876 51 + 4712 "10111010" // MOVA r27, #1; J #4784 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=4784 delay_slots=5 */ + 4713 "00100000" // /* MW 9 */ + 4714 "00000000" // /* MW 8 */ + 4715 "00000000" // /* MW 7 */ + 4716 "01010110" // /* MW 6 */ + 4717 "00000010" // /* MW 5 */ + 4718 "00000000" // /* MW 4 */ + 4719 "00000000" // /* MW 3 */ + 4720 "00111011" // /* MW 2 */ + 4721 "00000000" // /* MW 1 */ +.delay_slot + 4722 "11111000" // MOV el0, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4723 "10011100" // /* MW 3 */ + 4724 "00011001" // /* MW 2 */ + 4725 "00011000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1849 12 +.delay_slot + 4726 "00011000" // MOVX r19, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4727 "00000101" // /* MW 3 */ + 4728 "00100110" // /* MW 2 */ + 4729 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4731 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4732 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4733 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4735 "00000000" // /* MW 1 */ +.label __ll6__Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params +.src_ref 2 "conv2d_bf16_params.h" 250 71 first +.src_ref 2 "conv2d_bf16_params.h" 250 101 + 4736 "01110110" // MOVA r21, #4; ST p6, [sp, #-24]; EQ r27, r21, r30; MOV el0, r25 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4737 "01111000" // /* MW 11 */ + 4738 "11001110" // /* MW 10 */ + 4739 "00001100" // /* MW 9 */ + 4740 "00111100" // /* MW 8 */ + 4741 "10111111" // /* MW 7 */ + 4742 "10101011" // /* MW 6 */ + 4743 "00011101" // /* MW 5 */ + 4744 "11101011" // /* MW 4 */ + 4745 "00000111" // /* MW 3 */ + 4746 "10010101" // /* MW 2 */ + 4747 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 101 + 4748 "10011000" // LSHL r21, r30, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4749 "01011101" // /* MW 3 */ + 4750 "10101011" // /* MW 2 */ + 4751 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 106 + 4752 "00011000" // SEL.EQZ r21, r21, r25, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4753 "10010010" // /* MW 3 */ + 4754 "01101011" // /* MW 2 */ + 4755 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 129 + 4756 "10011000" // EQ r27, r19, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4757 "11100111" // /* MW 3 */ + 4758 "11110111" // /* MW 2 */ + 4759 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 106 +.src_ref 2 "conv2d_bf16_params.h" 250 133 + 4760 "11100100" // SEL.EQZ r19, r21, r25, r27; MOV r27, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4761 "01000001" // /* MW 5 */ + 4762 "10110000" // /* MW 4 */ + 4763 "01001101" // /* MW 3 */ + 4764 "11110010" // /* MW 2 */ + 4765 "10101100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 133 + 4766 "00011000" // SEL.EQZ r19, r25, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4767 "00110010" // /* MW 3 */ + 4768 "01100111" // /* MW 2 */ + 4769 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 249 87 first + 4770 "10011000" // AND r20, r28, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4771 "01000100" // /* MW 3 */ + 4772 "00101001" // /* MW 2 */ + 4773 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 249 87 + 4774 "00011000" // NEZ r27, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4775 "11110000" // /* MW 3 */ + 4776 "00110110" // /* MW 2 */ + 4777 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 152 first + 4778 "00101100" // NOPA; OR r19, r19, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4779 "10001011" // /* MW 5 */ + 4780 "11001111" // /* MW 4 */ + 4781 "11111001" // /* MW 3 */ + 4782 "00101100" // /* MW 2 */ + 4783 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_320 +.src_ref 2 "conv2d_bf16_params.h" 258 8 first + 4784 "01110110" // MOVA m4, #12; ST r27, [p2], #24; JNZ r29, #4832 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4832 delay_slots=5 */ + 4785 "01100000" // /* MW 11 */ + 4786 "00000000" // /* MW 10 */ + 4787 "00010000" // /* MW 9 */ + 4788 "01011100" // /* MW 8 */ + 4789 "00000010" // /* MW 7 */ + 4790 "10111010" // /* MW 6 */ + 4791 "01110001" // /* MW 5 */ + 4792 "01101111" // /* MW 4 */ + 4793 "10000010" // /* MW 3 */ + 4794 "10010000" // /* MW 2 */ + 4795 "00000001" // /* MW 1 */ +.delay_slot + 4796 "00011000" // ST.s8 r19, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4797 "01100111" // /* MW 3 */ + 4798 "10001010" // /* MW 2 */ + 4799 "00000010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4801 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4803 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4805 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4807 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 259 71 + 4808 "01000100" // MOVXM r20, #16777215 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4809 "11111110" // /* MW 5 */ + 4810 "00111111" // /* MW 4 */ + 4811 "11111010" // /* MW 3 */ + 4812 "11111111" // /* MW 2 */ + 4813 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 259 71 first + 4814 "10011000" // AND r18, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4815 "01000100" // /* MW 3 */ + 4816 "10100101" // /* MW 2 */ + 4817 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 259 71 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4818 "00101110" // NOPA; ST r18, [p3, #28]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 4819 "00011100" // /* MW 13 */ + 4820 "00000000" // /* MW 12 */ + 4821 "00000000" // /* MW 11 */ + 4822 "01010111" // /* MW 10 */ + 4823 "00011010" // /* MW 9 */ + 4824 "01000000" // /* MW 8 */ + 4825 "00000000" // /* MW 7 */ + 4826 "00000000" // /* MW 6 */ + 4827 "10100011" // /* MW 5 */ + 4828 "11101100" // /* MW 4 */ + 4829 "11110110" // /* MW 3 */ + 4830 "00101100" // /* MW 2 */ + 4831 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_368 +.src_ref 2 "conv2d_bf16.h" 1841 65 first +.src_ref 2 "conv2d_bf16.h" 1849 4 +.src_ref 2 "conv2d_bf16.h" 1849 12 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4832 "10111010" // LDA r20, [p2], #-32; EXTEND.u8 r20, r19; MOV r22, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4833 "01011000" // /* MW 9 */ + 4834 "11111101" // /* MW 8 */ + 4835 "11001111" // /* MW 7 */ + 4836 "10000010" // /* MW 6 */ + 4837 "01000100" // /* MW 5 */ + 4838 "00100111" // /* MW 4 */ + 4839 "11010000" // /* MW 3 */ + 4840 "11010010" // /* MW 2 */ + 4841 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16.h" 1841 34 +.src_ref 2 "conv2d_bf16.h" 1842 36 +.src_ref 2 "conv2d_bf16.h" 1842 67 +.src_ref 2 "conv2d_bf16.h" 1842 75 +.src_ref 2 "conv2d_bf16.h" 1845 31 +.src_ref 2 "conv2d_bf16.h" 1849 4 +.src_ref 2 "conv2d_bf16_params.h" 243 80 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4842 "10111010" // MOVA r18, #2; ADD r21, r20, #-1; MOV m4, #36 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4843 "01011000" // /* MW 9 */ + 4844 "00100100" // /* MW 8 */ + 4845 "00000000" // /* MW 7 */ + 4846 "11111010" // /* MW 6 */ + 4847 "01011111" // /* MW 5 */ + 4848 "00101001" // /* MW 4 */ + 4849 "00000000" // /* MW 3 */ + 4850 "01010010" // /* MW 2 */ + 4851 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1842 67 first +.src_ref 2 "conv2d_bf16.h" 1842 106 +.src_ref 2 "conv2d_bf16.h" 1849 4 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4852 "01110110" // LDA r22, [p2], m4; ST el0, [sp, #-48]; AND r22, r21, r22; MOV m4, #-52 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4853 "01011000" // /* MW 11 */ + 4854 "11001100" // /* MW 10 */ + 4855 "00000111" // /* MW 9 */ + 4856 "00100110" // /* MW 8 */ + 4857 "01101011" // /* MW 7 */ + 4858 "10101011" // /* MW 6 */ + 4859 "00101101" // /* MW 5 */ + 4860 "11010000" // /* MW 4 */ + 4861 "11010111" // /* MW 3 */ + 4862 "01011010" // /* MW 2 */ + 4863 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 862 52 +.src_ref 2 "conv2d_bf16.h" 1842 106 +.src_ref 2 "conv2d_bf16.h" 1845 80 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4864 "01110110" // LDA r23, [p2], m4; ST r22, [sp, #-36]; MOVX r19, #-1; MOV m4, #196 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4865 "01011000" // /* MW 11 */ + 4866 "11000100" // /* MW 10 */ + 4867 "00000000" // /* MW 9 */ + 4868 "11101010" // /* MW 8 */ + 4869 "00110111" // /* MW 7 */ + 4870 "10111111" // /* MW 6 */ + 4871 "11010101" // /* MW 5 */ + 4872 "11011110" // /* MW 4 */ + 4873 "11010111" // /* MW 3 */ + 4874 "01011110" // /* MW 2 */ + 4875 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1845 63 first + 4876 "10011000" // LDA r29, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4877 "10110110" // /* MW 3 */ + 4878 "11111111" // /* MW 2 */ + 4879 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 862 52 first + 4880 "10011000" // LDA r31, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4881 "11110110" // /* MW 3 */ + 4882 "10001011" // /* MW 2 */ + 4883 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 4884 "10011000" // LDA r21, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4885 "10110110" // /* MW 3 */ + 4886 "00000110" // /* MW 2 */ + 4887 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 2 "conv2d_bf16.h" 1841 34 first + 4888 "00101100" // LDA r20, [p0]; LSHL r9, r20, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4889 "01011011" // /* MW 5 */ + 4890 "00100110" // /* MW 4 */ + 4891 "11011010" // /* MW 3 */ + 4892 "11010010" // /* MW 2 */ + 4893 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 4894 "10011000" // LDA r30, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4895 "11010110" // /* MW 3 */ + 4896 "00000111" // /* MW 2 */ + 4897 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1842 36 first + 4898 "10011000" // LSHL r22, r22, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4899 "00101101" // /* MW 3 */ + 4900 "10101101" // /* MW 2 */ + 4901 "00010101" // /* MW 1 */ + 4902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4903 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1845 80 first + 4904 "10011000" // ASHL r19, r29, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4905 "00111110" // /* MW 3 */ + 4906 "01100111" // /* MW 2 */ + 4907 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 866 21 first + 4908 "10011000" // NE r17, r31, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4909 "00011000" // /* MW 3 */ + 4910 "11100011" // /* MW 2 */ + 4911 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 866 12 + 4912 "10000100" // JNZ r17, #5024 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5024 delay_slots=5 */ + 4913 "00000001" // /* MW 5 */ + 4914 "01000000" // /* MW 4 */ + 4915 "11010000" // /* MW 3 */ + 4916 "00001001" // /* MW 2 */ + 4917 "10001000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1842 36 first +.src_ref 2 "conv2d_bf16.h" 1842 75 first +.delay_slot + 4918 "10100100" // LSHL r22, r23, r18; ADD.NC r21, r21, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4919 "10110010" // /* MW 5 */ + 4920 "10110101" // /* MW 4 */ + 4921 "10111010" // /* MW 3 */ + 4922 "10100101" // /* MW 2 */ + 4923 "10111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1842 75 +.src_ref 2 "conv2d_bf16.h" 1845 31 first +.delay_slot + 4924 "10100100" // LSHL r21, r19, r18; ADD.NC dn0, r21, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4925 "10110010" // /* MW 5 */ + 4926 "10010101" // /* MW 4 */ + 4927 "10110000" // /* MW 3 */ + 4928 "01100101" // /* MW 2 */ + 4929 "10011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1841 34 first +.delay_slot + 4930 "00000010" // ST dn0, [sp, #-44]; ADD.NC r14, r9, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4931 "10100000" // /* MW 7 */ + 4932 "01101000" // /* MW 6 */ + 4933 "11001010" // /* MW 5 */ + 4934 "00000001" // /* MW 4 */ + 4935 "10110000" // /* MW 3 */ + 4936 "10000100" // /* MW 2 */ + 4937 "11111010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16.h" 885 4 +.delay_slot + 4938 "11111000" // MOV r15, dn0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4939 "10000000" // /* MW 3 */ + 4940 "11010000" // /* MW 2 */ + 4941 "00011011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1845 31 first +.delay_slot + 4942 "01011000" // ADD.NC p6, r21, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4943 "11111001" // /* MW 3 */ + 4944 "01101010" // /* MW 2 */ + 4945 "00011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 + 4946 "01000100" // MOVXM p7, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4947 "11010000" // /* MW 5 */ + 4948 "11001000" // /* MW 4 */ + 4949 "11001110" // /* MW 3 */ + 4950 "00000111" // /* MW 2 */ + 4951 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.src_ref 2 "conv2d_bf16.h" 867 18 first + 4952 "00101100" // LDA.s8 r17, [p7]; MOVX vaddSign0, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4953 "10000000" // /* MW 5 */ + 4954 "10110100" // /* MW 4 */ + 4955 "01010000" // /* MW 3 */ + 4956 "11000100" // /* MW 2 */ + 4957 "11100000" // /* MW 1 */ + 4958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4959 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 + 4960 "01000100" // MOVXM r20, #-8454144 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4961 "00000000" // /* MW 5 */ + 4962 "00100000" // /* MW 4 */ + 4963 "00001010" // /* MW 3 */ + 4964 "01111111" // /* MW 2 */ + 4965 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 + 4966 "01111000" // VINSERT.32 x0, x0, #0, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4967 "10010001" // /* MW 3 */ + 4968 "00000010" // /* MW 2 */ + 4969 "00011000" // /* MW 1 */ + 4970 "11111000" // MOV r20, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4971 "11100000" // /* MW 3 */ + 4972 "00010101" // /* MW 2 */ + 4973 "00011101" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 4974 "00011000" // ADD.NC p7, r20, #-66 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4975 "01011111" // /* MW 3 */ + 4976 "01101010" // /* MW 2 */ + 4977 "00011111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.src_ref 2 "conv2d_bf16.h" 867 18 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 4978 "11010100" // ST.s16 r17, [p7]; VMOV bmll0, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4979 "00100101" // /* MW 5 */ + 4980 "00000001" // /* MW 4 */ + 4981 "11100000" // /* MW 3 */ + 4982 "11000110" // /* MW 2 */ + 4983 "11100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4984 "00011000" // MOVX crRnd, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4985 "10000000" // /* MW 3 */ + 4986 "01111010" // /* MW 2 */ + 4987 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4988 "00011000" // VCONV.bf16.fp32 wl0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4989 "00010110" // /* MW 3 */ + 4990 "01000000" // /* MW 2 */ + 4991 "00001000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4993 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4994 "10111000" // VEXTRACT.16 r17, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4995 "00000001" // /* MW 3 */ + 4996 "01000001" // /* MW 2 */ + 4997 "00011100" // /* MW 1 */ + 4998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4999 "00000000" // /* MW 1 */ + 5000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5001 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 + 5002 "10011000" // LDA.s16 r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5003 "00110010" // /* MW 3 */ + 5004 "00000110" // /* MW 2 */ + 5005 "00000111" // /* MW 1 */ + 5006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5007 "00000000" // /* MW 1 */ + 5008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5009 "00000000" // /* MW 1 */ + 5010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5011 "00000000" // /* MW 1 */ + 5012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5013 "00000000" // /* MW 1 */ + 5014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5015 "00000000" // /* MW 1 */ + 5016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5017 "00000000" // /* MW 1 */ + 5018 "00001100" // NOPA; ST r17, [sp, #-48] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5019 "01101011" // /* MW 5 */ + 5020 "10100100" // /* MW 4 */ + 5021 "11111111" // /* MW 3 */ + 5022 "00101100" // /* MW 2 */ + 5023 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_560 +.src_ref 2 "conv2d_bf16.h" 881 76 +.src_ref 2 "conv2d_bf16.h" 883 4 +.src_ref 2 "conv2d_bf16.h" 884 4 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 5024 "01110110" // MOVA m4, #92; MOVS p1, r14; MOVXM p3, #509032 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5025 "00010000" // /* MW 11 */ + 5026 "00110100" // /* MW 10 */ + 5027 "10110010" // /* MW 9 */ + 5028 "11110001" // /* MW 8 */ + 5029 "00000001" // /* MW 7 */ + 5030 "00000000" // /* MW 6 */ + 5031 "00001011" // /* MW 5 */ + 5032 "10001110" // /* MW 4 */ + 5033 "10000001" // /* MW 3 */ + 5034 "10010000" // /* MW 2 */ + 5035 "00001011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 first +.src_ref 2 "conv2d_bf16.h" 876 51 first +.src_ref 2 "conv2d_bf16.h" 881 76 first +.src_ref 2 "conv2d_bf16.h" 883 4 +.src_ref 2 "conv2d_bf16.h" 884 4 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 5036 "01110110" // LDA.u8 r17, [p2], m4; MOVS p0, p1; SEL.EQZ r17, r25, r19, r27; MOV r19, #11 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5037 "01011000" // /* MW 11 */ + 5038 "00001011" // /* MW 10 */ + 5039 "01101000" // /* MW 9 */ + 5040 "10010010" // /* MW 8 */ + 5041 "00011001" // /* MW 7 */ + 5042 "00110011" // /* MW 6 */ + 5043 "10001011" // /* MW 5 */ + 5044 "10000100" // /* MW 4 */ + 5045 "01010000" // /* MW 3 */ + 5046 "01000101" // /* MW 2 */ + 5047 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 44 +.src_ref 2 "conv2d_bf16_params.h" 243 80 +.src_ref 2 "conv2d_bf16_params.h" 243 80 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5048 "10111010" // MOVA r22, #780; LTU r27, r28, r18; MOV r13, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5049 "01111000" // /* MW 9 */ + 5050 "01100000" // /* MW 8 */ + 5051 "10101010" // /* MW 7 */ + 5052 "01100101" // /* MW 6 */ + 5053 "10111001" // /* MW 5 */ + 5054 "00111001" // /* MW 4 */ + 5055 "00000000" // /* MW 3 */ + 5056 "10010110" // /* MW 2 */ + 5057 "01100001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 883 4 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5058 "00011000" // ST.s8 r19, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5059 "01100111" // /* MW 3 */ + 5060 "00000110" // /* MW 2 */ + 5061 "00000011" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5063 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 884 4 first +.aggressive_scheduled_block_id 4 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 5064 "00000100" // JL #4192 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4192 delay_slots=5 */ + 5065 "00000001" // /* MW 5 */ + 5066 "00000000" // /* MW 4 */ + 5067 "00110000" // /* MW 3 */ + 5068 "00001000" // /* MW 2 */ + 5069 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 first +.delay_slot +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5070 "10011000" // LSHL r21, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5071 "00101101" // /* MW 3 */ + 5072 "01101011" // /* MW 2 */ + 5073 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 +.delay_slot + 5074 "01011000" // ADD.NC p7, r21, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5075 "11111001" // /* MW 3 */ + 5076 "01101010" // /* MW 2 */ + 5077 "00011111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 881 45 first +.delay_slot + 5078 "10011000" // SUB r17, r25, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5079 "00010001" // /* MW 3 */ + 5080 "01100011" // /* MW 2 */ + 5081 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16_params.h" 243 80 +.delay_slot + 5082 "01100100" // LSHL r17, r17, r18; MOV r20, #781 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5083 "00110101" // /* MW 5 */ + 5084 "00101100" // /* MW 4 */ + 5085 "10111010" // /* MW 3 */ + 5086 "01100101" // /* MW 2 */ + 5087 "10001100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16_params.h" 243 80 first +.delay_slot + 5088 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r9, r22, r20, r27; ADD.NC r12, r15, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5089 "00000000" // /* MW 15 */ + 5090 "00000000" // /* MW 14 */ + 5091 "10101000" // /* MW 13 */ + 5092 "11100010" // /* MW 12 */ + 5093 "10001011" // /* MW 11 */ + 5094 "00010001" // /* MW 10 */ + 5095 "10011010" // /* MW 9 */ + 5096 "00101100" // /* MW 8 */ + 5097 "01011011" // /* MW 7 */ + 5098 "00000001" // /* MW 6 */ + 5099 "00100000" // /* MW 5 */ + 5100 "00000000" // /* MW 4 */ + 5101 "11110000" // /* MW 3 */ + 5102 "00101100" // /* MW 2 */ + 5103 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 4 +.return_address + 5104 "00011000" // LDA p1, [sp, #-44] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5105 "10011001" // /* MW 3 */ + 5106 "11010100" // /* MW 2 */ + 5107 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 4 first +.no_stack_arguments + 5108 "00000100" // JL #4192 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4192 delay_slots=5 */ + 5109 "00000001" // /* MW 5 */ + 5110 "00000000" // /* MW 4 */ + 5111 "00110000" // /* MW 3 */ + 5112 "00001000" // /* MW 2 */ + 5113 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5115 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5117 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 44 +.delay_slot + 5118 "00011000" // ADD.NC r13, r13, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5119 "10010000" // /* MW 3 */ + 5120 "01010110" // /* MW 2 */ + 5121 "00011011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 4 +.delay_slot + 5122 "11111000" // MOV p2, r13 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5123 "10100000" // /* MW 3 */ + 5124 "01100110" // /* MW 2 */ + 5125 "00011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 4 +.delay_slot + 5126 "01111010" // NOPA; MOVS p0, r15; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5127 "00000000" // /* MW 9 */ + 5128 "00000000" // /* MW 8 */ + 5129 "00000000" // /* MW 7 */ + 5130 "00000000" // /* MW 6 */ + 5131 "00001011" // /* MW 5 */ + 5132 "10001111" // /* MW 4 */ + 5133 "11110000" // /* MW 3 */ + 5134 "00101100" // /* MW 2 */ + 5135 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 +.src_ref 2 "conv2d_bf16.h" 1115 26 +.src_ref 2 "conv2d_bf16.h" 1115 26 +.return_address + 5136 "10111010" // MOVA dj6, #-332; MOVX r19, #63; ADD.NC p4, r13, #-116 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5137 "00001000" // /* MW 9 */ + 5138 "01100011" // /* MW 8 */ + 5139 "00110011" // /* MW 7 */ + 5140 "11101010" // /* MW 6 */ + 5141 "00110111" // /* MW 5 */ + 5142 "00000001" // /* MW 4 */ + 5143 "10000000" // /* MW 3 */ + 5144 "10011010" // /* MW 2 */ + 5145 "11010110" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 886 4 +.src_ref 2 "conv2d_bf16.h" 896 23 first +.src_ref 2 "conv2d_bf16.h" 1123 71 + 5146 "00101100" // LDA dn0, [p4], #4; MOVX r13, #12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5147 "01100010" // /* MW 5 */ + 5148 "00110100" // /* MW 4 */ + 5149 "11010000" // /* MW 3 */ + 5150 "10000100" // /* MW 2 */ + 5151 "10000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5152 "10011000" // LDA dj0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5153 "01000110" // /* MW 3 */ + 5154 "00011100" // /* MW 2 */ + 5155 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5156 "10011000" // LDA dn4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5157 "00100110" // /* MW 3 */ + 5158 "00011110" // /* MW 2 */ + 5159 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5160 "10011000" // LDA dj4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5161 "01000110" // /* MW 3 */ + 5162 "00011110" // /* MW 2 */ + 5163 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5164 "10011000" // LDA m0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5165 "00000110" // /* MW 3 */ + 5166 "00011100" // /* MW 2 */ + 5167 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5168 "10011000" // LDA dc0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5169 "01100110" // /* MW 3 */ + 5170 "00011100" // /* MW 2 */ + 5171 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5172 "10011000" // LDA dc4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5173 "01100110" // /* MW 3 */ + 5174 "00011110" // /* MW 2 */ + 5175 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 first + 5176 "10011000" // LDA r22, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5177 "11010110" // /* MW 3 */ + 5178 "00011110" // /* MW 2 */ + 5179 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5180 "10011000" // LDA r17, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5181 "00110110" // /* MW 3 */ + 5182 "00011110" // /* MW 2 */ + 5183 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5184 "10011000" // LDA r28, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5185 "10010110" // /* MW 3 */ + 5186 "00011111" // /* MW 2 */ + 5187 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5188 "10011000" // LDA r21, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5189 "10110110" // /* MW 3 */ + 5190 "00011110" // /* MW 2 */ + 5191 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5192 "10011000" // LDA r23, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5193 "11110110" // /* MW 3 */ + 5194 "00011110" // /* MW 2 */ + 5195 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5196 "10011000" // LDA p3, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5197 "10011110" // /* MW 3 */ + 5198 "00011101" // /* MW 2 */ + 5199 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5200 "10011000" // LDA dn2, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5201 "00100110" // /* MW 3 */ + 5202 "00011101" // /* MW 2 */ + 5203 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 first + 5204 "10011000" // LDA dn1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5205 "10100110" // /* MW 3 */ + 5206 "00011100" // /* MW 2 */ + 5207 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5208 "10011000" // LDA dj1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5209 "11000110" // /* MW 3 */ + 5210 "00011100" // /* MW 2 */ + 5211 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5212 "10011000" // LDA dn5, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5213 "10100110" // /* MW 3 */ + 5214 "00011110" // /* MW 2 */ + 5215 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5216 "10011000" // LDA r30, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5217 "11010110" // /* MW 3 */ + 5218 "00011111" // /* MW 2 */ + 5219 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5220 "10011000" // LDA r29, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5221 "10110110" // /* MW 3 */ + 5222 "00011111" // /* MW 2 */ + 5223 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5224 "10011000" // LDA dc1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5225 "11100110" // /* MW 3 */ + 5226 "00011100" // /* MW 2 */ + 5227 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1115 26 first + 5228 "10011000" // LDA.u8 r18, [p4, dj6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5229 "01001010" // /* MW 3 */ + 5230 "11000010" // /* MW 2 */ + 5231 "00000100" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 + 5232 "00011000" // LDA r20, [sp, #-48] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5233 "10010001" // /* MW 3 */ + 5234 "11010010" // /* MW 2 */ + 5235 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 first + 5236 "10011000" // LDA r2, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5237 "01010110" // /* MW 3 */ + 5238 "00000100" // /* MW 2 */ + 5239 "00000100" // /* MW 1 */ + 5240 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5241 "00000000" // /* MW 1 */ + 5242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5243 "00000000" // /* MW 1 */ + 5244 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5245 "00000000" // /* MW 1 */ + 5246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5247 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1115 26 first + 5248 "10011000" // LTU r19, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5249 "00101100" // /* MW 3 */ + 5250 "11100111" // /* MW 2 */ + 5251 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1115 12 + 5252 "10000100" // JNZ r19, #6176 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6176 delay_slots=5 */ + 5253 "00000001" // /* MW 5 */ + 5254 "01000000" // /* MW 4 */ + 5255 "00010000" // /* MW 3 */ + 5256 "00001100" // /* MW 2 */ + 5257 "10011000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 886 4 +.delay_slot + 5258 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5259 "11010000" // /* MW 5 */ + 5260 "11001000" // /* MW 4 */ + 5261 "11000100" // /* MW 3 */ + 5262 "00000111" // /* MW 2 */ + 5263 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 886 4 first +.delay_slot + 5264 "00011000" // ST.s8 r13, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5265 "10100111" // /* MW 3 */ + 5266 "00000101" // /* MW 2 */ + 5267 "00000010" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first +.delay_slot + 5268 "11111000" // VBCST.16 x9, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5269 "01110010" // /* MW 3 */ + 5270 "11010001" // /* MW 2 */ + 5271 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5273 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5275 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1123 71 first + 5276 "10111010" // LDA p4, [sp, #-40]; EQ r27, r13, r18; MOV m7, #132 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5277 "01011000" // /* MW 9 */ + 5278 "10000100" // /* MW 8 */ + 5279 "10000000" // /* MW 7 */ + 5280 "00111111" // /* MW 6 */ + 5281 "10111001" // /* MW 5 */ + 5282 "00011011" // /* MW 4 */ + 5283 "00100000" // /* MW 3 */ + 5284 "01000011" // /* MW 2 */ + 5285 "11111011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1125 16 +.src_ref 2 "conv2d_bf16.h" 1154 80 + 5286 "10111010" // MOVA r19, #0; MOVX r18, #-128; MOV m4, #60 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5287 "01011000" // /* MW 9 */ + 5288 "00111100" // /* MW 8 */ + 5289 "00000000" // /* MW 7 */ + 5290 "00001010" // /* MW 6 */ + 5291 "00100000" // /* MW 5 */ + 5292 "00111101" // /* MW 4 */ + 5293 "00000000" // /* MW 3 */ + 5294 "00010011" // /* MW 2 */ + 5295 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1125 16 + 5296 "10111010" // MOVA m5, #-64; MOVX r26, #0; MOV dc7, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5297 "01111000" // /* MW 9 */ + 5298 "11010000" // /* MW 8 */ + 5299 "11100100" // /* MW 7 */ + 5300 "00001011" // /* MW 6 */ + 5301 "10100000" // /* MW 5 */ + 5302 "00000001" // /* MW 4 */ + 5303 "10000000" // /* MW 3 */ + 5304 "00010100" // /* MW 2 */ + 5305 "11111000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 746 83 + 5306 "01110110" // MOVA m6, #-132; MOVS dc2, dc7; MOVX crRnd, r13; MOV dn3, dc7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5307 "01111000" // /* MW 11 */ + 5308 "11000000" // /* MW 10 */ + 5309 "10100111" // /* MW 9 */ + 5310 "00000001" // /* MW 8 */ + 5311 "11010100" // /* MW 7 */ + 5312 "00011011" // /* MW 6 */ + 5313 "01001011" // /* MW 5 */ + 5314 "00011100" // /* MW 4 */ + 5315 "10000010" // /* MW 3 */ + 5316 "10011000" // /* MW 2 */ + 5317 "11101111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 2 "conv2d_bf16.h" 738 8 +.src_ref 2 "conv2d_bf16.h" 1187 40 +.src_ref 2 "conv2d_bf16.h" 1199 26 +.src_ref 2 "conv2d_bf16.h" 1200 26 +.src_ref 2 "conv2d_bf16.h" 1201 26 +.src_ref 2 "conv2d_bf16.h" 1202 26 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 5318 "01110110" // LDA r5, [sp, #-44]; MOVS dc6, dc7; MOVX r31, #60; MOV r15, #7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5319 "01011000" // /* MW 11 */ + 5320 "00000111" // /* MW 10 */ + 5321 "11101000" // /* MW 9 */ + 5322 "10001001" // /* MW 8 */ + 5323 "11110111" // /* MW 7 */ + 5324 "00000001" // /* MW 6 */ + 5325 "01001011" // /* MW 5 */ + 5326 "00011100" // /* MW 4 */ + 5327 "00100110" // /* MW 3 */ + 5328 "10010110" // /* MW 2 */ + 5329 "11111010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1143 12 +.src_ref 2 "conv2d_bf16.h" 1218 20 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 5330 "10111010" // LDA r18, [sp, #-36]; MOVXM p2, #5440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5331 "00010000" // /* MW 9 */ + 5332 "10100000" // /* MW 8 */ + 5333 "00110010" // /* MW 7 */ + 5334 "00000101" // /* MW 6 */ + 5335 "00000000" // /* MW 5 */ + 5336 "00000000" // /* MW 4 */ + 5337 "00100000" // /* MW 3 */ + 5338 "11001010" // /* MW 2 */ + 5339 "11111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 749 26 +.src_ref 2 "conv2d_bf16.h" 750 26 +.src_ref 2 "conv2d_bf16.h" 751 26 +.src_ref 2 "conv2d_bf16.h" 752 26 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5340 "10111010" // LDA r13, [sp, #-32]; SEL.EQZ r6, r26, r18, r27; MOV r20, #780 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5341 "01011000" // /* MW 9 */ + 5342 "00001100" // /* MW 8 */ + 5343 "10001011" // /* MW 7 */ + 5344 "00010010" // /* MW 6 */ + 5345 "01101001" // /* MW 5 */ + 5346 "00110100" // /* MW 4 */ + 5347 "00100000" // /* MW 3 */ + 5348 "00110110" // /* MW 2 */ + 5349 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 736 8 +.src_ref 2 "conv2d_bf16.h" 738 8 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1873 + 5350 "10110110" // LDA lr, [sp, #-28]; PADDB [p4], m7; MOVX r25, #0; MOV r24, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5351 "01011000" // /* MW 11 */ + 5352 "00000000" // /* MW 10 */ + 5353 "00001000" // /* MW 9 */ + 5354 "00001011" // /* MW 8 */ + 5355 "10010000" // /* MW 7 */ + 5356 "00000001" // /* MW 6 */ + 5357 "00100000" // /* MW 5 */ + 5358 "11010111" // /* MW 4 */ + 5359 "00101001" // /* MW 3 */ + 5360 "10000111" // /* MW 2 */ + 5361 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1125 16 first + 5362 "10011000" // LDA r0, [p4], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5363 "00010110" // /* MW 3 */ + 5364 "10001000" // /* MW 2 */ + 5365 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1125 16 + 5366 "10011000" // LDA dn6, [p4], m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5367 "00100110" // /* MW 3 */ + 5368 "10101011" // /* MW 2 */ + 5369 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1125 16 + 5370 "10011000" // LDA r27, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5371 "01110110" // /* MW 3 */ + 5372 "00101111" // /* MW 2 */ + 5373 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1149 80 first + 5374 "10011000" // LDA m5, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5375 "10000110" // /* MW 3 */ + 5376 "00011110" // /* MW 2 */ + 5377 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1154 80 first + 5378 "10011000" // LDA dj5, [p4], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5379 "11000110" // /* MW 3 */ + 5380 "10001010" // /* MW 2 */ + 5381 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 743 87 first + 5382 "10011000" // LDA m4, [p4], #-28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5383 "00000110" // /* MW 3 */ + 5384 "10011110" // /* MW 2 */ + 5385 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 745 83 first + 5386 "10011000" // LDA r1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5387 "00110110" // /* MW 3 */ + 5388 "00011100" // /* MW 2 */ + 5389 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 746 83 first +.src_ref 2 "conv2d_bf16.h" 1125 16 first + 5390 "10010100" // LDA r0, [p4], m6; ADD.NC dj6, r6, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5391 "00000010" // /* MW 5 */ + 5392 "00000110" // /* MW 4 */ + 5393 "11011101" // /* MW 3 */ + 5394 "00000010" // /* MW 2 */ + 5395 "10011001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1143 66 first + 5396 "10011000" // LDA r3, [p4, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5397 "01110110" // /* MW 3 */ + 5398 "00010100" // /* MW 2 */ + 5399 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1206 63 first + 5400 "10011000" // LDA r4, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5401 "10010110" // /* MW 3 */ + 5402 "00000100" // /* MW 2 */ + 5403 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1149 89 + 5404 "11111000" // MOV r7, m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5405 "00000000" // /* MW 3 */ + 5406 "11011010" // /* MW 2 */ + 5407 "00011001" // /* MW 1 */ + 5408 "01011000" // ADD.NC dj2, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5409 "10011001" // /* MW 3 */ + 5410 "10000011" // /* MW 2 */ + 5411 "00011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1154 89 + 5412 "11111000" // MOV r16, dj5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5413 "00000000" // /* MW 3 */ + 5414 "00011011" // /* MW 2 */ + 5415 "00011100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1125 16 first + 5416 "01011000" // ADD.NC m2, r27, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5417 "10011001" // /* MW 3 */ + 5418 "00001101" // /* MW 2 */ + 5419 "00011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1149 89 first + 5420 "00011000" // ADD.NC m6, r7, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5421 "11100000" // /* MW 3 */ + 5422 "00000011" // /* MW 2 */ + 5423 "00011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1154 89 first + 5424 "00100100" // ADD r3, r3, #-1; ADD.NC m7, r16, #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5425 "11000000" // /* MW 5 */ + 5426 "00010000" // /* MW 4 */ + 5427 "11101110" // /* MW 3 */ + 5428 "11111111" // /* MW 2 */ + 5429 "00011000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1287 37 + 5430 "10111010" // NOPA; NOPB; MOV m1, dj2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5431 "01111110" // /* MW 9 */ + 5432 "10000000" // /* MW 8 */ + 5433 "10000010" // /* MW 7 */ + 5434 "00000000" // /* MW 6 */ + 5435 "00010000" // /* MW 5 */ + 5436 "00000000" // /* MW 4 */ + 5437 "11110000" // /* MW 3 */ + 5438 "00101100" // /* MW 2 */ + 5439 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_976 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 736 8 +.src_ref 2 "conv2d_bf16.h" 738 8 +.src_ref 2 "conv2d_bf16.h" 1147 31 first +.src_ref 2 "conv2d_bf16.h" 1187 40 first +.loop_nesting 1 + 5440 "01110110" // VLDA.CONV.fp32.bf16 cml0, [p6], #64; MOVS p1, r5; LSHL r14, r2, r15; MOV p0, r14 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5441 "01111000" // /* MW 11 */ + 5442 "10010000" // /* MW 10 */ + 5443 "00110011" // /* MW 9 */ + 5444 "11101100" // /* MW 8 */ + 5445 "11100111" // /* MW 7 */ + 5446 "00000100" // /* MW 6 */ + 5447 "00001011" // /* MW 5 */ + 5448 "10000101" // /* MW 4 */ + 5449 "01110001" // /* MW 3 */ + 5450 "10000101" // /* MW 2 */ + 5451 "11000011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 1188 50 first + 5452 "11110110" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc3, dn3; ADD.NC p4, r14, r12 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5453 "10100000" // /* MW 11 */ + 5454 "10011000" // /* MW 10 */ + 5455 "00110011" // /* MW 9 */ + 5456 "00000010" // /* MW 8 */ + 5457 "01001011" // /* MW 7 */ + 5458 "00001110" // /* MW 6 */ + 5459 "00101011" // /* MW 5 */ + 5460 "00101000" // /* MW 4 */ + 5461 "01111000" // /* MW 3 */ + 5462 "10000001" // /* MW 2 */ + 5463 "00100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 742 30 first + 5464 "11110110" // VLDA.POP.576 ex7, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];MOVS dn7, dn6; MOV dj7, dj6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5465 "01110000" // /* MW 11 */ + 5466 "10000000" // /* MW 10 */ + 5467 "11000110" // /* MW 9 */ + 5468 "00000011" // /* MW 8 */ + 5469 "01001011" // /* MW 7 */ + 5470 "01011010" // /* MW 6 */ + 5471 "00101111" // /* MW 5 */ + 5472 "00101000" // /* MW 4 */ + 5473 "01111000" // /* MW 3 */ + 5474 "00111001" // /* MW 2 */ + 5475 "10100000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.src_ref 2 "conv2d_bf16.h" 1149 31 first + 5476 "11110110" // VLDA.CONV.fp32.bf16 cmh0, [p6], m6;VLDB.POP.576 ex6, [p0, lf0, r24];MOVS dn3, r19; MOV m3, m2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5477 "01110000" // /* MW 11 */ + 5478 "00000000" // /* MW 10 */ + 5479 "10000010" // /* MW 9 */ + 5480 "00000001" // /* MW 8 */ + 5481 "00001011" // /* MW 7 */ + 5482 "01010011" // /* MW 6 */ + 5483 "00101011" // /* MW 5 */ + 5484 "00000011" // /* MW 4 */ + 5485 "01110100" // /* MW 3 */ + 5486 "00001101" // /* MW 2 */ + 5487 "11011001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 743 30 first + 5488 "10111010" // VLDA.POP.576 ex8, [p1, lf1, r25, m4];VLDB.POP.576.3D ex11, [p0, lf0, r24, d0]; MOV dj3, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5489 "01011110" // /* MW 9 */ + 5490 "00000000" // /* MW 8 */ + 5491 "11000000" // /* MW 7 */ + 5492 "00000001" // /* MW 6 */ + 5493 "11010100" // /* MW 5 */ + 5494 "00010010" // /* MW 4 */ + 5495 "01110100" // /* MW 3 */ + 5496 "01000001" // /* MW 2 */ + 5497 "01110001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 1152 31 first +.src_ref 2 "conv2d_bf16.h" 1206 8 first + 5498 "10110110" // VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.FILL.512 [p0, lf0, r24]; MOVXM le, #5760 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5499 "00010000" // /* MW 11 */ + 5500 "01000000" // /* MW 10 */ + 5501 "10111011" // /* MW 9 */ + 5502 "00000101" // /* MW 8 */ + 5503 "00000000" // /* MW 7 */ + 5504 "00000000" // /* MW 6 */ + 5505 "00101000" // /* MW 5 */ + 5506 "00101000" // /* MW 4 */ + 5507 "01111000" // /* MW 3 */ + 5508 "10010101" // /* MW 2 */ + 5509 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 1154 31 first +.src_ref 2 "conv2d_bf16.h" 1206 8 + 5510 "10110110" // VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.FILL.512 [p0, lf0, r24]; MOVXM ls, #5712 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5511 "00010000" // /* MW 11 */ + 5512 "00101000" // /* MW 10 */ + 5513 "01111011" // /* MW 9 */ + 5514 "00000100" // /* MW 8 */ + 5515 "00000000" // /* MW 7 */ + 5516 "00000000" // /* MW 6 */ + 5517 "00101000" // /* MW 5 */ + 5518 "00101000" // /* MW 4 */ + 5519 "01111000" // /* MW 3 */ + 5520 "00011101" // /* MW 2 */ + 5521 "11011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 740 30 first + 5522 "00111100" // VLDA.CONV.fp32.bf16 cml3, [p4];VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5523 "00101000" // /* MW 5 */ + 5524 "00000001" // /* MW 4 */ + 5525 "01110100" // /* MW 3 */ + 5526 "10110101" // /* MW 2 */ + 5527 "10000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 1157 31 first + 5528 "00111100" // VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.POP.576.3D ex4, [p0, lf0, r24, d0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5529 "00101000" // /* MW 5 */ + 5530 "00100010" // /* MW 4 */ + 5531 "01111000" // /* MW 3 */ + 5532 "10100101" // /* MW 2 */ + 5533 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 1159 31 first + 5534 "00111100" // VLDA.CONV.fp32.bf16 cmh2, [p6], m6;VLDB.FILL.512 [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5535 "00101000" // /* MW 5 */ + 5536 "00101000" // /* MW 4 */ + 5537 "01111000" // /* MW 3 */ + 5538 "00101101" // /* MW 2 */ + 5539 "11011001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 738 8 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 5540 "00111100" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5541 "00101000" // /* MW 5 */ + 5542 "00101000" // /* MW 4 */ + 5543 "01111000" // /* MW 3 */ + 5544 "10000001" // /* MW 2 */ + 5545 "00100010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.src_ref 2 "conv2d_bf16.h" 1192 29 first +.aggressive_scheduled_block_id 6 +.noswbrkpt + 5546 "00111100" // VLDA.CONV.fp32.bf16 cmh3, [p4], #64;VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5547 "00101000" // /* MW 5 */ + 5548 "00000001" // /* MW 4 */ + 5549 "01110100" // /* MW 3 */ + 5550 "10111101" // /* MW 2 */ + 5551 "10000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 745 30 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5552 "10111010" // VLDA.CONV.fp32.bf16 cmh4, [p4];VLDB.POP.576.3D ex4, [p0, lf0, r24, d0]; VSHUFFLE ex10, ex6, ex11, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5553 "11101110" // /* MW 9 */ + 5554 "11000011" // /* MW 8 */ + 5555 "10011010" // /* MW 7 */ + 5556 "00000010" // /* MW 6 */ + 5557 "00010100" // /* MW 5 */ + 5558 "00010001" // /* MW 4 */ + 5559 "01110100" // /* MW 3 */ + 5560 "11001101" // /* MW 2 */ + 5561 "10000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 1162 81 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5562 "11110110" // VLDA.CONV.fp32.bf16 cml4, [p4];VLDB.FILL.512 [p0, lf0, r24];MOVS p4, p6; VSHUFFLE ex6, ex6, ex11, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5563 "11100000" // /* MW 11 */ + 5564 "11000001" // /* MW 10 */ + 5565 "10011010" // /* MW 9 */ + 5566 "00000001" // /* MW 8 */ + 5567 "10001011" // /* MW 7 */ + 5568 "10011000" // /* MW 6 */ + 5569 "00101100" // /* MW 5 */ + 5570 "00101000" // /* MW 4 */ + 5571 "01111000" // /* MW 3 */ + 5572 "11000101" // /* MW 2 */ + 5573 "10000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 749 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5574 "01001010" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex1, [p1, lf1, r25]; VMAC.f dm0, dm0, ex10, ex7, r9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5575 "11101001" // /* MW 9 */ + 5576 "00010100" // /* MW 8 */ + 5577 "01001000" // /* MW 7 */ + 5578 "00011101" // /* MW 6 */ + 5579 "01010100" // /* MW 5 */ + 5580 "00000000" // /* MW 4 */ + 5581 "01110011" // /* MW 3 */ + 5582 "10000001" // /* MW 2 */ + 5583 "00000010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.src_ref 2 "conv2d_bf16.h" 1286 32 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5584 "01101110" // VLDA.3D.CONV.fp32.bf16 cml3, [p6], d3; MOVS dn3, dn2; MOV dj3, dj5; VMAC.f dm1, dm1, ex6, ex7, r9 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5585 "11101001" // /* MW 13 */ + 5586 "00101100" // /* MW 12 */ + 5587 "01001001" // /* MW 11 */ + 5588 "00000111" // /* MW 10 */ + 5589 "01011000" // /* MW 9 */ + 5590 "01011100" // /* MW 8 */ + 5591 "00000000" // /* MW 7 */ + 5592 "00000000" // /* MW 6 */ + 5593 "10010110" // /* MW 5 */ + 5594 "10010100" // /* MW 4 */ + 5595 "01110110" // /* MW 3 */ + 5596 "00110101" // /* MW 2 */ + 5597 "11001111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 751 26 first +.src_ref 2 "conv2d_bf16.h" 1162 81 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5598 "01101110" // VLDA.CONV.fp32.bf16 cmh3, [p4, #64]; MOVS dc5, dc7; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm2, dm2, ex10, ex8, r9 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5599 "00001001" // /* MW 13 */ + 5600 "01010101" // /* MW 12 */ + 5601 "01001010" // /* MW 11 */ + 5602 "00111110" // /* MW 10 */ + 5603 "10010000" // /* MW 9 */ + 5604 "01001100" // /* MW 8 */ + 5605 "00000000" // /* MW 7 */ + 5606 "00000000" // /* MW 6 */ + 5607 "10010110" // /* MW 5 */ + 5608 "00111000" // /* MW 4 */ + 5609 "01111010" // /* MW 3 */ + 5610 "10111101" // /* MW 2 */ + 5611 "10000010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 1199 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5612 "01101110" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dn2, dc3; VSHUFFLE ex5, ex2, ex4, r0; VADD.f dm0, dm3, dm0, r31 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5613 "00111101" // /* MW 13 */ + 5614 "01100000" // /* MW 12 */ + 5615 "11111000" // /* MW 11 */ + 5616 "00011110" // /* MW 10 */ + 5617 "10010000" // /* MW 9 */ + 5618 "01010100" // /* MW 8 */ + 5619 "00000000" // /* MW 7 */ + 5620 "00000000" // /* MW 6 */ + 5621 "10010110" // /* MW 5 */ + 5622 "10011000" // /* MW 4 */ + 5623 "01110100" // /* MW 3 */ + 5624 "00000001" // /* MW 2 */ + 5625 "01110001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 1200 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5626 "01100010" // VLDA.FILL.512 [p1, lf1, r25]; VADD.f dm1, dm3, dm1, r31 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5627 "00111101" // /* MW 7 */ + 5628 "01100100" // /* MW 6 */ + 5629 "11111001" // /* MW 5 */ + 5630 "00000100" // /* MW 4 */ + 5631 "01110000" // /* MW 3 */ + 5632 "10000001" // /* MW 2 */ + 5633 "00100010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 1201 26 first +.aggressive_scheduled_block_id 6 +.noswbrkpt + 5634 "01100010" // VLDA.POP.576 ex1, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r31 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5635 "00111101" // /* MW 7 */ + 5636 "10001000" // /* MW 6 */ + 5637 "11111010" // /* MW 5 */ + 5638 "00000100" // /* MW 4 */ + 5639 "01110000" // /* MW 3 */ + 5640 "00001001" // /* MW 2 */ + 5641 "10100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5642 "01100010" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; VMAC.f dm3, dm3, ex6, ex8, r9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5643 "00001001" // /* MW 7 */ + 5644 "01101101" // /* MW 6 */ + 5645 "01001011" // /* MW 5 */ + 5646 "00000100" // /* MW 4 */ + 5647 "01110000" // /* MW 3 */ + 5648 "00000001" // /* MW 2 */ + 5649 "01110001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5650 "00111100" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5651 "00101000" // /* MW 5 */ + 5652 "00000001" // /* MW 4 */ + 5653 "01110100" // /* MW 3 */ + 5654 "10000001" // /* MW 2 */ + 5655 "00100010" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 978 11 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5656 "00011000" // VLDB.POP.576.3D ex4, [p0, lf0, r24, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5657 "00010100" // /* MW 3 */ + 5658 "00010001" // /* MW 2 */ + 5659 "00111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 1202 26 first +.src_ref 2 "conv2d_bf16.h" 1206 8 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5660 "01100110" // VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24]; ADD.NC lc, r4, #-5; VADD.f dm3, dm4, dm3, r31 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5661 "00111101" // /* MW 11 */ + 5662 "10001100" // /* MW 10 */ + 5663 "11111011" // /* MW 9 */ + 5664 "10000010" // /* MW 8 */ + 5665 "01111101" // /* MW 7 */ + 5666 "01110010" // /* MW 6 */ + 5667 "00101101" // /* MW 5 */ + 5668 "00101000" // /* MW 4 */ + 5669 "01111000" // /* MW 3 */ + 5670 "00001001" // /* MW 2 */ + 5671 "10100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 749 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5672 "01001010" // VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24]; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5673 "00101001" // /* MW 9 */ + 5674 "00000110" // /* MW 8 */ + 5675 "10100000" // /* MW 7 */ + 5676 "00011101" // /* MW 6 */ + 5677 "00010100" // /* MW 5 */ + 5678 "00010100" // /* MW 4 */ + 5679 "01110100" // /* MW 3 */ + 5680 "00000001" // /* MW 2 */ + 5681 "01110001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.src_ref 2 "conv2d_bf16.h" 751 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5682 "01001110" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24]; NOPX; MOV dj5, r21; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5683 "00001001" // /* MW 13 */ + 5684 "01000110" // /* MW 12 */ + 5685 "10100010" // /* MW 11 */ + 5686 "00001111" // /* MW 10 */ + 5687 "10101010" // /* MW 9 */ + 5688 "01011000" // /* MW 8 */ + 5689 "00000000" // /* MW 7 */ + 5690 "00000000" // /* MW 6 */ + 5691 "00101000" // /* MW 5 */ + 5692 "00000001" // /* MW 4 */ + 5693 "01110100" // /* MW 3 */ + 5694 "10000001" // /* MW 2 */ + 5695 "00100010" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5696 "01001011" // NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5697 "01010001" // /* MW 15 */ + 5698 "00001001" // /* MW 14 */ + 5699 "11101101" // /* MW 13 */ + 5700 "00000011" // /* MW 12 */ + 5701 "11001001" // /* MW 11 */ + 5702 "00000000" // /* MW 10 */ + 5703 "00000000" // /* MW 9 */ + 5704 "00000000" // /* MW 8 */ + 5705 "01011011" // /* MW 7 */ + 5706 "00000001" // /* MW 6 */ + 5707 "00101000" // /* MW 5 */ + 5708 "00100010" // /* MW 4 */ + 5709 "11111000" // /* MW 3 */ + 5710 "00101100" // /* MW 2 */ + 5711 "00000000" // /* MW 1 */ +.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1248 +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.begin_of_loop +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt +.loop_nesting 2 + 5712 "01001011" // VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5713 "01010000" // /* MW 15 */ + 5714 "00011011" // /* MW 14 */ + 5715 "11101101" // /* MW 13 */ + 5716 "00000001" // /* MW 12 */ + 5717 "01001001" // /* MW 11 */ + 5718 "00000001" // /* MW 10 */ + 5719 "00000000" // /* MW 9 */ + 5720 "00000000" // /* MW 8 */ + 5721 "01011011" // /* MW 7 */ + 5722 "00000001" // /* MW 6 */ + 5723 "00101000" // /* MW 5 */ + 5724 "00101000" // /* MW 4 */ + 5725 "01111000" // /* MW 3 */ + 5726 "00001001" // /* MW 2 */ + 5727 "10100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 749 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5728 "01001011" // VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5729 "00110001" // /* MW 15 */ + 5730 "00000000" // /* MW 14 */ + 5731 "01111101" // /* MW 13 */ + 5732 "10100101" // /* MW 12 */ + 5733 "00000001" // /* MW 11 */ + 5734 "00000000" // /* MW 10 */ + 5735 "00000000" // /* MW 9 */ + 5736 "00000000" // /* MW 8 */ + 5737 "01011011" // /* MW 7 */ + 5738 "00000001" // /* MW 6 */ + 5739 "00101000" // /* MW 5 */ + 5740 "00101000" // /* MW 4 */ + 5741 "01111000" // /* MW 3 */ + 5742 "00000001" // /* MW 2 */ + 5743 "01110001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.src_ref 2 "conv2d_bf16.h" 751 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5744 "01001011" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5745 "00110000" // /* MW 15 */ + 5746 "00010010" // /* MW 14 */ + 5747 "01111101" // /* MW 13 */ + 5748 "10100101" // /* MW 12 */ + 5749 "00000001" // /* MW 11 */ + 5750 "00000000" // /* MW 10 */ + 5751 "00000000" // /* MW 9 */ + 5752 "00000000" // /* MW 8 */ + 5753 "01011011" // /* MW 7 */ + 5754 "00000001" // /* MW 6 */ + 5755 "00101000" // /* MW 5 */ + 5756 "00000001" // /* MW 4 */ + 5757 "01110100" // /* MW 3 */ + 5758 "10000001" // /* MW 2 */ + 5759 "00100010" // /* MW 1 */ +.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1296 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.end_of_loop +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5760 "01001011" // NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5761 "01010001" // /* MW 15 */ + 5762 "00001001" // /* MW 14 */ + 5763 "11101101" // /* MW 13 */ + 5764 "00000011" // /* MW 12 */ + 5765 "11001001" // /* MW 11 */ + 5766 "00000000" // /* MW 10 */ + 5767 "00000000" // /* MW 9 */ + 5768 "00000000" // /* MW 8 */ + 5769 "01011011" // /* MW 7 */ + 5770 "00000001" // /* MW 6 */ + 5771 "00101000" // /* MW 5 */ + 5772 "00100010" // /* MW 4 */ + 5773 "11111000" // /* MW 3 */ + 5774 "00101100" // /* MW 2 */ + 5775 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 5776 "01101110" // VLDA.POP.576 ex1, [p1, lf1, r25]; MOVS dn6, dn7; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5777 "00001001" // /* MW 13 */ + 5778 "01101010" // /* MW 12 */ + 5779 "10100011" // /* MW 11 */ + 5780 "00011110" // /* MW 10 */ + 5781 "10010000" // /* MW 9 */ + 5782 "01010100" // /* MW 8 */ + 5783 "00000000" // /* MW 7 */ + 5784 "00000000" // /* MW 6 */ + 5785 "10010110" // /* MW 5 */ + 5786 "10111100" // /* MW 4 */ + 5787 "01111100" // /* MW 3 */ + 5788 "00001001" // /* MW 2 */ + 5789 "10100000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 749 26 first +.src_ref 2 "conv2d_bf16.h" 1286 32 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5790 "01101110" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dc7, dn3; MOV dj7, dj3; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5791 "00101001" // /* MW 13 */ + 5792 "00000110" // /* MW 12 */ + 5793 "10100000" // /* MW 11 */ + 5794 "00000111" // /* MW 10 */ + 5795 "00111000" // /* MW 9 */ + 5796 "01111100" // /* MW 8 */ + 5797 "00000000" // /* MW 7 */ + 5798 "00000000" // /* MW 6 */ + 5799 "10010110" // /* MW 5 */ + 5800 "00011100" // /* MW 4 */ + 5801 "01111110" // /* MW 3 */ + 5802 "00000001" // /* MW 2 */ + 5803 "01110001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 751 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5804 "01001010" // MOVS dc3, p3; MOV r5, dj2; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5805 "00001001" // /* MW 9 */ + 5806 "01000110" // /* MW 8 */ + 5807 "10100010" // /* MW 7 */ + 5808 "11100100" // /* MW 6 */ + 5809 "00000000" // /* MW 5 */ + 5810 "01010101" // /* MW 4 */ + 5811 "01100001" // /* MW 3 */ + 5812 "10010001" // /* MW 2 */ + 5813 "01100001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5814 "01001010" // MOVS dn3, r22; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5815 "00101001" // /* MW 9 */ + 5816 "00101010" // /* MW 8 */ + 5817 "10100001" // /* MW 7 */ + 5818 "11000100" // /* MW 6 */ + 5819 "00000111" // /* MW 5 */ + 5820 "10010010" // /* MW 4 */ + 5821 "01100001" // /* MW 3 */ + 5822 "11000001" // /* MW 2 */ + 5823 "01101010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5824 "01001010" // MOVS dn7, r28; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5825 "00001001" // /* MW 9 */ + 5826 "01101010" // /* MW 8 */ + 5827 "10100011" // /* MW 7 */ + 5828 "11000100" // /* MW 6 */ + 5829 "00000011" // /* MW 5 */ + 5830 "10010010" // /* MW 4 */ + 5831 "01100010" // /* MW 3 */ + 5832 "10000001" // /* MW 2 */ + 5833 "11101011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 749 26 first +.src_ref 2 "conv2d_bf16.h" 1285 32 first +.src_ref 2 "conv2d_bf16.h" 1286 32 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5834 "01100110" // PADDB [p7], m5; MOVS p5, p7; MOV dj2, dj7; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5835 "00101001" // /* MW 11 */ + 5836 "00000110" // /* MW 10 */ + 5837 "10100000" // /* MW 9 */ + 5838 "11100110" // /* MW 8 */ + 5839 "00000000" // /* MW 7 */ + 5840 "10001111" // /* MW 6 */ + 5841 "00100010" // /* MW 5 */ + 5842 "01010111" // /* MW 4 */ + 5843 "01101111" // /* MW 3 */ + 5844 "10010001" // /* MW 2 */ + 5845 "10110011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 751 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5846 "01001010" // MOVS p4, p7; MOV m2, m3; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5847 "00001001" // /* MW 9 */ + 5848 "01000110" // /* MW 8 */ + 5849 "10100010" // /* MW 7 */ + 5850 "11100100" // /* MW 6 */ + 5851 "00000000" // /* MW 5 */ + 5852 "00000110" // /* MW 4 */ + 5853 "01100010" // /* MW 3 */ + 5854 "10010001" // /* MW 2 */ + 5855 "10010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5856 "01100010" // VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5857 "00101001" // /* MW 7 */ + 5858 "00101010" // /* MW 6 */ + 5859 "10100001" // /* MW 5 */ + 5860 "11000110" // /* MW 4 */ + 5861 "00000011" // /* MW 3 */ + 5862 "10010010" // /* MW 2 */ + 5863 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5864 "01100010" // VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5865 "00001001" // /* MW 7 */ + 5866 "01101010" // /* MW 6 */ + 5867 "10100011" // /* MW 5 */ + 5868 "11000110" // /* MW 4 */ + 5869 "00000111" // /* MW 3 */ + 5870 "10010010" // /* MW 2 */ + 5871 "00000001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 + 5872 "11111000" // MOV dj7, dj5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5873 "00000000" // /* MW 3 */ + 5874 "10001011" // /* MW 2 */ + 5875 "00011111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 750 26 first + 5876 "01100010" // MOV m3, r23; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5877 "00101001" // /* MW 7 */ + 5878 "00101010" // /* MW 6 */ + 5879 "10100001" // /* MW 5 */ + 5880 "11100110" // /* MW 4 */ + 5881 "10100000" // /* MW 3 */ + 5882 "00001011" // /* MW 2 */ + 5883 "00000011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 749 26 first + 5884 "01100010" // MOV dj3, r17; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5885 "00101001" // /* MW 7 */ + 5886 "00000110" // /* MW 6 */ + 5887 "10100000" // /* MW 5 */ + 5888 "11100110" // /* MW 4 */ + 5889 "10100000" // /* MW 3 */ + 5890 "10001000" // /* MW 2 */ + 5891 "00000011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.src_ref 2 "conv2d_bf16.h" 1286 32 + 5892 "01001010" // PADDB.3D [p0], d3; MOV m3, dj2; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5893 "00001001" // /* MW 9 */ + 5894 "01101010" // /* MW 8 */ + 5895 "10100011" // /* MW 7 */ + 5896 "11100110" // /* MW 6 */ + 5897 "00000000" // /* MW 5 */ + 5898 "00000101" // /* MW 4 */ + 5899 "00100011" // /* MW 3 */ + 5900 "11110111" // /* MW 2 */ + 5901 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 751 26 first +.src_ref 2 "conv2d_bf16.h" 1286 32 first + 5902 "01100110" // PADDB [p7], m3; MOVS p3, dc3; MOV dj5, r5; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5903 "00001001" // /* MW 11 */ + 5904 "01000110" // /* MW 10 */ + 5905 "10100010" // /* MW 9 */ + 5906 "11100110" // /* MW 8 */ + 5907 "10100000" // /* MW 7 */ + 5908 "10000010" // /* MW 6 */ + 5909 "00100101" // /* MW 5 */ + 5910 "11010111" // /* MW 4 */ + 5911 "01101110" // /* MW 3 */ + 5912 "10001001" // /* MW 2 */ + 5913 "01110001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 + 5914 "00000010" // MOVS dc3, dc5; MOV dj7, dj5 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5915 "01110000" // /* MW 7 */ + 5916 "10000000" // /* MW 6 */ + 5917 "11000101" // /* MW 5 */ + 5918 "00000011" // /* MW 4 */ + 5919 "01100000" // /* MW 3 */ + 5920 "10001001" // /* MW 2 */ + 5921 "01100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 1287 37 + 5922 "00000010" // MOVS dc5, r2; MOV m3, m1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5923 "01110000" // /* MW 7 */ + 5924 "00000000" // /* MW 6 */ + 5925 "10000001" // /* MW 5 */ + 5926 "00000001" // /* MW 4 */ + 5927 "01100000" // /* MW 3 */ + 5928 "01000001" // /* MW 2 */ + 5929 "10100000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first + 5930 "00000010" // VCONV.bf16.fp32 x11, cml1; MOV m1, r29 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5931 "01110000" // /* MW 7 */ + 5932 "01010000" // /* MW 6 */ + 5933 "10000111" // /* MW 5 */ + 5934 "00000000" // /* MW 4 */ + 5935 "11000000" // /* MW 3 */ + 5936 "00010010" // /* MW 2 */ + 5937 "10110010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 + 5938 "00000010" // VCONV.bf16.fp32 x10, cml0; MOV dj5, r30 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5939 "01110000" // /* MW 7 */ + 5940 "10010000" // /* MW 6 */ + 5941 "11000111" // /* MW 5 */ + 5942 "00000010" // /* MW 4 */ + 5943 "11000000" // /* MW 3 */ + 5944 "00000010" // /* MW 2 */ + 5945 "10100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 first +.src_ref 2 "conv2d_bf16.h" 736 8 +.src_ref 2 "conv2d_bf16.h" 1287 37 + 5946 "10111010" // PADDB.3D [p1], d1; MOVS p0, p7; MOV r14, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5947 "01110110" // /* MW 9 */ + 5948 "01100000" // /* MW 8 */ + 5949 "11001000" // /* MW 7 */ + 5950 "00000001" // /* MW 6 */ + 5951 "10010000" // /* MW 5 */ + 5952 "00111011" // /* MW 4 */ + 5953 "01100001" // /* MW 3 */ + 5954 "10010001" // /* MW 2 */ + 5955 "00010011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 2 "conv2d_bf16.h" 1287 37 + 5956 "00000010" // VCONV.bf16.fp32 x6, cmh0; MOV m1, m3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5957 "01110000" // /* MW 7 */ + 5958 "00000000" // /* MW 6 */ + 5959 "10000011" // /* MW 5 */ + 5960 "00000000" // /* MW 4 */ + 5961 "11000000" // /* MW 3 */ + 5962 "00001010" // /* MW 2 */ + 5963 "01100010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 1218 20 first +.src_ref 2 "conv2d_bf16.h" 1287 37 first + 5964 "00110110" // PADDB [p0], m1; VCONV.bf16.fp32 x5, cml2; JZ r18, #6096 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6096 delay_slots=5 */ + 5965 "01100000" // /* MW 11 */ + 5966 "00000000" // /* MW 10 */ + 5967 "00000000" // /* MW 9 */ + 5968 "11111010" // /* MW 8 */ + 5969 "00000010" // /* MW 7 */ + 5970 "00100100" // /* MW 6 */ + 5971 "00100000" // /* MW 5 */ + 5972 "01010111" // /* MW 4 */ + 5973 "11000000" // /* MW 3 */ + 5974 "00100010" // /* MW 2 */ + 5975 "01010010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 2 "conv2d_bf16.h" 738 8 +.delay_slot + 5976 "00000010" // VCONV.bf16.fp32 x7, cmh1; MOV r5, p1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5977 "01110000" // /* MW 7 */ + 5978 "01100000" // /* MW 6 */ + 5979 "10101001" // /* MW 5 */ + 5980 "00000000" // /* MW 4 */ + 5981 "11000000" // /* MW 3 */ + 5982 "00011010" // /* MW 2 */ + 5983 "01110010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 5984 "00000010" // VCONV.bf16.fp32 x8, cml3; MOV dn7, dc7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5985 "01110000" // /* MW 7 */ + 5986 "11000000" // /* MW 6 */ + 5987 "10100111" // /* MW 5 */ + 5988 "00000011" // /* MW 4 */ + 5989 "11000000" // /* MW 3 */ + 5990 "00110010" // /* MW 2 */ + 5991 "10000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 5992 "10111010" // PADDB [p5], m1; VCONV.bf16.fp32 x1, cmh3; MOV p1, p5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5993 "01110110" // /* MW 9 */ + 5994 "01100000" // /* MW 8 */ + 5995 "10110101" // /* MW 7 */ + 5996 "00000000" // /* MW 6 */ + 5997 "10010000" // /* MW 5 */ + 5998 "00101011" // /* MW 4 */ + 5999 "11000101" // /* MW 3 */ + 6000 "00111010" // /* MW 2 */ + 6001 "00010010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 2 "conv2d_bf16.h" 1286 32 +.delay_slot + 6002 "00000010" // VCONV.bf16.fp32 x2, cmh2; MOV dj5, dj2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6003 "01110000" // /* MW 7 */ + 6004 "10000000" // /* MW 6 */ + 6005 "11000010" // /* MW 5 */ + 6006 "00000010" // /* MW 4 */ + 6007 "11000000" // /* MW 3 */ + 6008 "00101010" // /* MW 2 */ + 6009 "00100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 1187 40 +.delay_slot + 6010 "00000010" // MOVS dc7, dc3; MOV r2, dc5 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6011 "01110000" // /* MW 7 */ + 6012 "11000000" // /* MW 6 */ + 6013 "01001101" // /* MW 5 */ + 6014 "00000000" // /* MW 4 */ + 6015 "01100000" // /* MW 3 */ + 6016 "10001001" // /* MW 2 */ + 6017 "11100001" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first + 6018 "11111000" // VMAX_LT.bf16 x11, r16, x11, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6019 "11101100" // /* MW 3 */ + 6020 "11011100" // /* MW 2 */ + 6021 "00011101" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 + 6022 "11111000" // VMAX_LT.bf16 x7, r16, x7, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6023 "11101100" // /* MW 3 */ + 6024 "10111100" // /* MW 2 */ + 6025 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.src_ref 4 "max_min.hpp" 20 104 + 6026 "00000010" // VST x11, [p1, dj7]; VMAX_LT.bf16 x10, r16, x10, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6027 "01110000" // /* MW 7 */ + 6028 "01110110" // /* MW 6 */ + 6029 "10101010" // /* MW 5 */ + 6030 "00000010" // /* MW 4 */ + 6031 "01100000" // /* MW 3 */ + 6032 "01011010" // /* MW 2 */ + 6033 "00111100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first + 6034 "00000010" // VST x7, [p5, #64]; VMAX_LT.bf16 x7, r16, x6, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6035 "01110000" // /* MW 7 */ + 6036 "01110110" // /* MW 6 */ + 6037 "11011010" // /* MW 5 */ + 6038 "00000001" // /* MW 4 */ + 6039 "01100000" // /* MW 3 */ + 6040 "10111010" // /* MW 2 */ + 6041 "10100010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first + 6042 "00111010" // VST x10, [p1]; J #6128 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=6128 delay_slots=5 */ + 6043 "00100001" // /* MW 9 */ + 6044 "00000000" // /* MW 8 */ + 6045 "00000000" // /* MW 7 */ + 6046 "11111110" // /* MW 6 */ + 6047 "00000010" // /* MW 5 */ + 6048 "00000000" // /* MW 4 */ + 6049 "01100000" // /* MW 3 */ + 6050 "11010010" // /* MW 2 */ + 6051 "00100000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 6052 "00000010" // VST x7, [p1, #64]; VMAX_LT.bf16 x10, r16, x8, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6053 "01110000" // /* MW 7 */ + 6054 "01110110" // /* MW 6 */ + 6055 "10100010" // /* MW 5 */ + 6056 "00000010" // /* MW 4 */ + 6057 "01100000" // /* MW 3 */ + 6058 "10111010" // /* MW 2 */ + 6059 "00100010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 +.delay_slot + 6060 "11111000" // VMAX_LT.bf16 x7, r16, x1, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6061 "11101100" // /* MW 3 */ + 6062 "10001100" // /* MW 2 */ + 6063 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.src_ref 4 "max_min.hpp" 20 104 +.delay_slot + 6064 "00000010" // VST x10, [p0]; VMAX_LT.bf16 x10, r16, x5, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6065 "01110000" // /* MW 7 */ + 6066 "01110110" // /* MW 6 */ + 6067 "10010110" // /* MW 5 */ + 6068 "00000010" // /* MW 4 */ + 6069 "01100000" // /* MW 3 */ + 6070 "11010010" // /* MW 2 */ + 6071 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 6072 "00000010" // VST x7, [p0, #64]; VMAX_LT.bf16 x2, r16, x2, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6073 "01110000" // /* MW 7 */ + 6074 "01110110" // /* MW 6 */ + 6075 "10001010" // /* MW 5 */ + 6076 "00000000" // /* MW 4 */ + 6077 "01100000" // /* MW 3 */ + 6078 "10111010" // /* MW 2 */ + 6079 "00000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.delay_slot + 6080 "11100001" // NOPA; NOPB; VST x10, [p4, dj5]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6081 "00000000" // /* MW 15 */ + 6082 "00000000" // /* MW 14 */ + 6083 "01111000" // /* MW 13 */ + 6084 "10100101" // /* MW 12 */ + 6085 "00000001" // /* MW 11 */ + 6086 "00000000" // /* MW 10 */ + 6087 "00000000" // /* MW 9 */ + 6088 "00000000" // /* MW 8 */ + 6089 "10010011" // /* MW 7 */ + 6090 "10100010" // /* MW 6 */ + 6091 "00100100" // /* MW 5 */ + 6092 "00000000" // /* MW 4 */ + 6093 "11110000" // /* MW 3 */ + 6094 "00101100" // /* MW 2 */ + 6095 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1632 +.src_ref 4 "vector.hpp" 1152 43 + 6096 "00011000" // VST.CONV.bf16.fp32 cml1, [p1, dj7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6097 "10100011" // /* MW 3 */ + 6098 "11100000" // /* MW 2 */ + 6099 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6100 "00011000" // VST.CONV.bf16.fp32 cmh1, [p5, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6101 "11100011" // /* MW 3 */ + 6102 "00010100" // /* MW 2 */ + 6103 "00001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6104 "00011000" // VST.CONV.bf16.fp32 cml0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6105 "00100011" // /* MW 3 */ + 6106 "00000100" // /* MW 2 */ + 6107 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6108 "00011000" // VST.CONV.bf16.fp32 cmh0, [p1, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6109 "01100011" // /* MW 3 */ + 6110 "00010100" // /* MW 2 */ + 6111 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6112 "00011000" // VST x8, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6113 "00010011" // /* MW 3 */ + 6114 "00000110" // /* MW 2 */ + 6115 "00001000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6116 "00011000" // VST.CONV.bf16.fp32 cmh3, [p0, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6117 "11100011" // /* MW 3 */ + 6118 "00010101" // /* MW 2 */ + 6119 "00001000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6120 "00000010" // VST.CONV.bf16.fp32 cml2, [p4, dj5]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6121 "01110000" // /* MW 7 */ + 6122 "10100101" // /* MW 6 */ + 6123 "00000001" // /* MW 5 */ + 6124 "00000000" // /* MW 4 */ + 6125 "01100000" // /* MW 3 */ + 6126 "00100100" // /* MW 2 */ + 6127 "10010100" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1664 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 1143 12 first + 6128 "00110110" // PADDB [p7], m5; VST x2, [p7, #64]; JNZD r3, r3, p2; MOV dj2, #0 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 6129 "01011000" // /* MW 11 */ + 6130 "00000000" // /* MW 10 */ + 6131 "01000000" // /* MW 9 */ + 6132 "00000001" // /* MW 8 */ + 6133 "00110101" // /* MW 7 */ + 6134 "00000110" // /* MW 6 */ + 6135 "00100000" // /* MW 5 */ + 6136 "01010111" // /* MW 4 */ + 6137 "01101111" // /* MW 3 */ + 6138 "10010010" // /* MW 2 */ + 6139 "11100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.delay_slot + 6140 "11111000" // MOV dn3, dn2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6141 "10000000" // /* MW 3 */ + 6142 "01000100" // /* MW 2 */ + 6143 "00011011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.delay_slot + 6144 "11111000" // MOV dn2, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6145 "10100000" // /* MW 3 */ + 6146 "01001001" // /* MW 2 */ + 6147 "00011010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.delay_slot + 6148 "11110100" // PADDB.3D [p7], d2; MOV dj2, dj7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6149 "00000001" // /* MW 5 */ + 6150 "00011110" // /* MW 4 */ + 6151 "00000101" // /* MW 3 */ + 6152 "01110010" // /* MW 2 */ + 6153 "11101011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.delay_slot + 6154 "11111000" // MOV dn2, dn7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6155 "10000000" // /* MW 3 */ + 6156 "01001110" // /* MW 2 */ + 6157 "00011010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6159 "00000000" // /* MW 1 */ +.loop_nesting 0 + 6160 "10000100" // J #6832 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6832 delay_slots=5 */ + 6161 "00000000" // /* MW 5 */ + 6162 "00000000" // /* MW 4 */ + 6163 "01011000" // /* MW 3 */ + 6164 "00001101" // /* MW 2 */ + 6165 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6167 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6168 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6169 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6171 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6173 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6175 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1712 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 792 8 +.src_ref 2 "conv2d_bf16.h" 1364 80 +.src_ref 2 "conv2d_bf16.h" 1364 80 + 6176 "01110110" // LDA r31, [sp, #-40]; MOVS dc2, p3; MOVX r14, #136; MOV p1, r14 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6177 "01111000" // /* MW 11 */ + 6178 "10010000" // /* MW 10 */ + 6179 "10110011" // /* MW 9 */ + 6180 "00001000" // /* MW 8 */ + 6181 "11100001" // /* MW 7 */ + 6182 "00000100" // /* MW 6 */ + 6183 "10001011" // /* MW 5 */ + 6184 "00001100" // /* MW 4 */ + 6185 "00100010" // /* MW 3 */ + 6186 "01111110" // /* MW 2 */ + 6187 "11111011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 1369 80 + 6188 "01110110" // MOVA m4, #60; MOVS dn2, r22; MOVX crRnd, r13; MOV dc6, dn2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6189 "01111000" // /* MW 11 */ + 6190 "01000000" // /* MW 10 */ + 6191 "01100010" // /* MW 9 */ + 6192 "00000011" // /* MW 8 */ + 6193 "11010100" // /* MW 7 */ + 6194 "00011011" // /* MW 6 */ + 6195 "00001011" // /* MW 5 */ + 6196 "01010110" // /* MW 4 */ + 6197 "10000010" // /* MW 3 */ + 6198 "10010000" // /* MW 2 */ + 6199 "00000111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 807 26 +.src_ref 2 "conv2d_bf16.h" 808 26 +.src_ref 2 "conv2d_bf16.h" 809 26 +.src_ref 2 "conv2d_bf16.h" 810 26 +.src_ref 2 "conv2d_bf16.h" 1436 26 +.src_ref 2 "conv2d_bf16.h" 1437 26 +.src_ref 2 "conv2d_bf16.h" 1438 26 +.src_ref 2 "conv2d_bf16.h" 1439 26 + 6200 "10111010" // MOVA r20, #60; MOVX r19, #780; MOV m2, r23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6201 "01111000" // /* MW 9 */ + 6202 "11010000" // /* MW 8 */ + 6203 "00000101" // /* MW 7 */ + 6204 "10001001" // /* MW 6 */ + 6205 "00110001" // /* MW 5 */ + 6206 "00011001" // /* MW 4 */ + 6207 "00000000" // /* MW 3 */ + 6208 "10010100" // /* MW 2 */ + 6209 "00000111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 802 83 +.src_ref 2 "conv2d_bf16.h" 1428 39 + 6210 "01110110" // MOVA m6, #-132; MOVS dn6, r28; MOVX r18, #6; MOV dj5, r30 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6211 "01111000" // /* MW 11 */ + 6212 "10010000" // /* MW 10 */ + 6213 "11000111" // /* MW 9 */ + 6214 "11001010" // /* MW 8 */ + 6215 "00100000" // /* MW 7 */ + 6216 "00000001" // /* MW 6 */ + 6217 "00001011" // /* MW 5 */ + 6218 "01011100" // /* MW 4 */ + 6219 "10000110" // /* MW 3 */ + 6220 "10011000" // /* MW 2 */ + 6221 "11101111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 792 8 +.src_ref 2 "conv2d_bf16.h" 794 8 + 6222 "01110110" // LDA p0, [sp, #-44]; MOVS dc5, r2; MOVX r25, #0; MOV m1, r29 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6223 "01111000" // /* MW 11 */ + 6224 "01010000" // /* MW 10 */ + 6225 "10000111" // /* MW 9 */ + 6226 "00001000" // /* MW 8 */ + 6227 "10010000" // /* MW 7 */ + 6228 "00000001" // /* MW 6 */ + 6229 "00001011" // /* MW 5 */ + 6230 "00000010" // /* MW 4 */ + 6231 "00100101" // /* MW 3 */ + 6232 "10000011" // /* MW 2 */ + 6233 "11111010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 794 8 +.src_ref 2 "conv2d_bf16.h" 1455 20 + 6234 "10111010" // LDA r21, [sp, #-36]; MOVX r24, #0; MOV dj6, r21 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6235 "01111000" // /* MW 9 */ + 6236 "01010000" // /* MW 8 */ + 6237 "01000101" // /* MW 7 */ + 6238 "00001011" // /* MW 6 */ + 6239 "10000000" // /* MW 5 */ + 6240 "00000001" // /* MW 4 */ + 6241 "00100000" // /* MW 3 */ + 6242 "11010110" // /* MW 2 */ + 6243 "11111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1337 12 + 6244 "10111010" // LDA r13, [sp, #-32]; MOVXM p2, #6320 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6245 "00010000" // /* MW 9 */ + 6246 "01011000" // /* MW 8 */ + 6247 "00110100" // /* MW 7 */ + 6248 "00000101" // /* MW 6 */ + 6249 "00000000" // /* MW 5 */ + 6250 "00000000" // /* MW 4 */ + 6251 "00100000" // /* MW 3 */ + 6252 "00110110" // /* MW 2 */ + 6253 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 80 first +.src_ref 2 "conv2d_bf16.h" 1873 + 6254 "10010100" // LDA lr, [sp, #-28]; ADD.NC p3, r31, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6255 "01110010" // /* MW 5 */ + 6256 "11011111" // /* MW 4 */ + 6257 "00100110" // /* MW 3 */ + 6258 "10000111" // /* MW 2 */ + 6259 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 80 + 6260 "10011000" // LDA dj3, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6261 "11000110" // /* MW 3 */ + 6262 "00011101" // /* MW 2 */ + 6263 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1369 80 first + 6264 "10011000" // LDA m4, [p3], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6265 "00000110" // /* MW 3 */ + 6266 "10001010" // /* MW 2 */ + 6267 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 799 87 first + 6268 "10011000" // LDA m5, [p3], #-28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6269 "10000110" // /* MW 3 */ + 6270 "10011110" // /* MW 2 */ + 6271 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 801 83 first + 6272 "10011000" // LDA r22, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6273 "11010110" // /* MW 3 */ + 6274 "00011110" // /* MW 2 */ + 6275 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 802 83 first + 6276 "10011000" // LDA r23, [p3], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6277 "11110110" // /* MW 3 */ + 6278 "11001010" // /* MW 2 */ + 6279 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1337 66 first + 6280 "10011000" // LDA r29, [p3, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6281 "10110110" // /* MW 3 */ + 6282 "00010111" // /* MW 2 */ + 6283 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1443 71 first + 6284 "10011000" // LDA r28, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6285 "10010110" // /* MW 3 */ + 6286 "00000111" // /* MW 2 */ + 6287 "00000011" // /* MW 1 */ + 6288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6289 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 1369 89 + 6290 "11111000" // MOV r30, m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6291 "00000000" // /* MW 3 */ + 6292 "10011000" // /* MW 2 */ + 6293 "00011111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 89 +.src_ref 2 "conv2d_bf16.h" 1518 37 + 6294 "11111000" // MOV m6, dj3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6295 "00000000" // /* MW 3 */ + 6296 "00000111" // /* MW 2 */ + 6297 "00011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 89 + 6298 "11111000" // MOV r31, m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6299 "00000000" // /* MW 3 */ + 6300 "11011100" // /* MW 2 */ + 6301 "00011111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 89 first + 6302 "00011000" // ADD.NC m3, r31, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6303 "11100000" // /* MW 3 */ + 6304 "00001111" // /* MW 2 */ + 6305 "00011011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1369 89 first + 6306 "00100100" // ADD r29, r29, #-1; ADD.NC m7, r30, #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6307 "11000000" // /* MW 5 */ + 6308 "00011110" // /* MW 4 */ + 6309 "11101110" // /* MW 3 */ + 6310 "01111111" // /* MW 2 */ + 6311 "11101111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 + 6312 "00000010" // NOPS; MOV dj7, r30 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6313 "01110000" // /* MW 7 */ + 6314 "10010000" // /* MW 6 */ + 6315 "11000111" // /* MW 5 */ + 6316 "00000011" // /* MW 4 */ + 6317 "01100000" // /* MW 3 */ + 6318 "00101011" // /* MW 2 */ + 6319 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1856 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 792 8 first +.src_ref 2 "conv2d_bf16.h" 1362 31 first +.src_ref 2 "conv2d_bf16.h" 1429 50 +.src_ref 2 "conv2d_bf16.h" 1443 16 first +.loop_nesting 1 + 6320 "01111110" // VLDA.CONV.fp32.bf16 cml0, [p6], #64;VLDB.FILL.512 [p1, lf1, r25];MOVS p3, r12; MOVXM ls, #6496 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 6321 "01100000" // /* MW 13 */ + 6322 "10000001" // /* MW 12 */ + 6323 "01110001" // /* MW 11 */ + 6324 "00000010" // /* MW 10 */ + 6325 "10010110" // /* MW 9 */ + 6326 "10001111" // /* MW 8 */ + 6327 "00000000" // /* MW 7 */ + 6328 "00000000" // /* MW 6 */ + 6329 "00101000" // /* MW 5 */ + 6330 "00101000" // /* MW 4 */ + 6331 "01111010" // /* MW 3 */ + 6332 "10000101" // /* MW 2 */ + 6333 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 793 8 first +.src_ref 2 "conv2d_bf16.h" 1364 31 first +.src_ref 2 "conv2d_bf16.h" 1443 16 + 6334 "10110110" // VLDA.CONV.fp32.bf16 cmh0, [p6], m3;VLDB.FILL.512 [p1, lf1, r25]; MOVXM le, #6544 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6335 "00010000" // /* MW 11 */ + 6336 "11001000" // /* MW 10 */ + 6337 "10111100" // /* MW 9 */ + 6338 "00000101" // /* MW 8 */ + 6339 "00000000" // /* MW 7 */ + 6340 "00000000" // /* MW 6 */ + 6341 "00101000" // /* MW 5 */ + 6342 "00101000" // /* MW 4 */ + 6343 "01111010" // /* MW 3 */ + 6344 "00001101" // /* MW 2 */ + 6345 "11001101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 794 8 first +.src_ref 2 "conv2d_bf16.h" 795 30 first +.src_ref 2 "conv2d_bf16.h" 1428 39 first +.src_ref 2 "conv2d_bf16.h" 1443 16 first + 6346 "10110110" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex10, [p1, lf1, r25]; LSHL r30, r2, r18; ADD.NC lc, r28, #-3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6347 "01001000" // /* MW 11 */ + 6348 "00111111" // /* MW 10 */ + 6349 "10111111" // /* MW 9 */ + 6350 "01101110" // /* MW 8 */ + 6351 "11101001" // /* MW 7 */ + 6352 "00000101" // /* MW 6 */ + 6353 "00101000" // /* MW 5 */ + 6354 "00000101" // /* MW 4 */ + 6355 "01110110" // /* MW 3 */ + 6356 "10000001" // /* MW 2 */ + 6357 "00000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 796 30 first +.src_ref 2 "conv2d_bf16.h" 799 30 first +.src_ref 2 "conv2d_bf16.h" 1429 50 + 6358 "10111010" // VLDA.POP.576 ex11, [p0, lf0, r24, m5];VLDB.POP.576 ex4, [p1, lf1, r25]; MOV dj2, r30 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6359 "01111110" // /* MW 9 */ + 6360 "10010000" // /* MW 8 */ + 6361 "01000111" // /* MW 7 */ + 6362 "00000001" // /* MW 6 */ + 6363 "00010100" // /* MW 5 */ + 6364 "00000001" // /* MW 4 */ + 6365 "01110011" // /* MW 3 */ + 6366 "01011001" // /* MW 2 */ + 6367 "01010101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 797 30 first +.src_ref 2 "conv2d_bf16.h" 1367 31 first + 6368 "00111100" // VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.POP.576 ex2, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6369 "00101000" // /* MW 5 */ + 6370 "00000001" // /* MW 4 */ + 6371 "01110110" // /* MW 3 */ + 6372 "10010101" // /* MW 2 */ + 6373 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 1369 31 first + 6374 "00111100" // VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.POP.576.3D ex3, [p1, lf1, r25, d0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6375 "10101000" // /* MW 5 */ + 6376 "00100001" // /* MW 4 */ + 6377 "01111010" // /* MW 3 */ + 6378 "00011101" // /* MW 2 */ + 6379 "11011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 792 8 first +.src_ref 2 "conv2d_bf16.h" 1372 31 first + 6380 "00111100" // VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.FILL.512 [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6381 "00101000" // /* MW 5 */ + 6382 "00101000" // /* MW 4 */ + 6383 "01111010" // /* MW 3 */ + 6384 "10100101" // /* MW 2 */ + 6385 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 793 8 first +.src_ref 2 "conv2d_bf16.h" 1374 31 first + 6386 "00111100" // VLDA.CONV.fp32.bf16 cmh2, [p6], m3;VLDB.FILL.512 [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6387 "00101000" // /* MW 5 */ + 6388 "00101000" // /* MW 4 */ + 6389 "01111010" // /* MW 3 */ + 6390 "00101101" // /* MW 2 */ + 6391 "11001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 795 30 first +.src_ref 2 "conv2d_bf16.h" 1377 31 first + 6392 "00111100" // VLDA.CONV.fp32.bf16 cml3, [p6], #64;VLDB.POP.576 ex1, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6393 "10101000" // /* MW 5 */ + 6394 "00000000" // /* MW 4 */ + 6395 "01110110" // /* MW 3 */ + 6396 "10110101" // /* MW 2 */ + 6397 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 796 30 first +.src_ref 2 "conv2d_bf16.h" 1379 31 first + 6398 "00111100" // VLDA.CONV.fp32.bf16 cmh3, [p6], m7;VLDB.POP.576 ex6, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6399 "00101000" // /* MW 5 */ + 6400 "00000011" // /* MW 4 */ + 6401 "01110110" // /* MW 3 */ + 6402 "00111101" // /* MW 2 */ + 6403 "11011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 797 30 first +.src_ref 2 "conv2d_bf16.h" 1429 50 first + 6404 "00111100" // VLDA.CONV.fp32.bf16 cml4, [p3, dj2];VLDB.POP.576 ex7, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6405 "10101000" // /* MW 5 */ + 6406 "00000011" // /* MW 4 */ + 6407 "01110110" // /* MW 3 */ + 6408 "01000101" // /* MW 2 */ + 6409 "01101000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 801 30 first +.src_ref 2 "conv2d_bf16.h" 1429 50 + 6410 "10111010" // VLDA.CONV.fp32.bf16 cmh4, [p3, dj2];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VSHUFFLE ex5, ex10, ex4, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6411 "11101110" // /* MW 9 */ + 6412 "00101101" // /* MW 8 */ + 6413 "01101001" // /* MW 7 */ + 6414 "00000001" // /* MW 6 */ + 6415 "00010100" // /* MW 5 */ + 6416 "00010010" // /* MW 4 */ + 6417 "01110101" // /* MW 3 */ + 6418 "01001101" // /* MW 2 */ + 6419 "01101000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 792 8 first +.src_ref 2 "conv2d_bf16.h" 794 8 first +.src_ref 2 "conv2d_bf16.h" 802 30 first + 6420 "10111010" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex10, ex10, ex4, r23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6421 "11101110" // /* MW 9 */ + 6422 "00101111" // /* MW 8 */ + 6423 "10101001" // /* MW 7 */ + 6424 "00000010" // /* MW 6 */ + 6425 "00010100" // /* MW 5 */ + 6426 "00010100" // /* MW 4 */ + 6427 "01110101" // /* MW 3 */ + 6428 "10000001" // /* MW 2 */ + 6429 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 793 8 first +.src_ref 2 "conv2d_bf16.h" 799 30 first +.src_ref 2 "conv2d_bf16.h" 803 30 first +.src_ref 2 "conv2d_bf16.h" 807 26 first + 6430 "01100110" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex2, ex3, r22; VMAC.f dm0, dm0, ex5, ex11, r9 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6431 "01101001" // /* MW 11 */ + 6432 "00001011" // /* MW 10 */ + 6433 "01001000" // /* MW 9 */ + 6434 "11000010" // /* MW 8 */ + 6435 "11011011" // /* MW 7 */ + 6436 "00010001" // /* MW 6 */ + 6437 "00101010" // /* MW 5 */ + 6438 "00101000" // /* MW 4 */ + 6439 "01111010" // /* MW 3 */ + 6440 "00000001" // /* MW 2 */ + 6441 "01010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 794 8 first +.src_ref 2 "conv2d_bf16.h" 804 30 first +.src_ref 2 "conv2d_bf16.h" 808 26 first + 6442 "01001010" // VLDA.FILL.512 [p0, lf0, r24]; VSHUFFLE ex10, ex2, ex3, r23; VMAC.f dm1, dm1, ex10, ex11, r9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6443 "01101001" // /* MW 9 */ + 6444 "00110101" // /* MW 8 */ + 6445 "01001001" // /* MW 7 */ + 6446 "11000010" // /* MW 6 */ + 6447 "11011111" // /* MW 5 */ + 6448 "00010001" // /* MW 4 */ + 6449 "01110101" // /* MW 3 */ + 6450 "10000001" // /* MW 2 */ + 6451 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 809 26 first + 6452 "01001000" // VMAC.f dm2, dm2, ex4, ex11, r9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6453 "01101001" // /* MW 3 */ + 6454 "01001001" // /* MW 2 */ + 6455 "01001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 810 26 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 6456 "01001000" // VMAC.f dm3, dm3, ex10, ex11, r9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6457 "01101001" // /* MW 3 */ + 6458 "01110101" // /* MW 2 */ + 6459 "01001011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 795 30 first +.src_ref 2 "conv2d_bf16.h" 802 30 first +.src_ref 2 "conv2d_bf16.h" 1437 26 first +.aggressive_scheduled_block_id 7 +.noswbrkpt + 6460 "01001010" // VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex5, ex1, ex6, r23; VADD.f dm1, dm4, dm1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6461 "00111101" // /* MW 9 */ + 6462 "10000100" // /* MW 8 */ + 6463 "10100001" // /* MW 7 */ + 6464 "11000110" // /* MW 6 */ + 6465 "01011111" // /* MW 5 */ + 6466 "10001011" // /* MW 4 */ + 6467 "10101010" // /* MW 3 */ + 6468 "00000000" // /* MW 2 */ + 6469 "00000110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 796 30 first +.src_ref 2 "conv2d_bf16.h" 1436 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6470 "01100010" // VLDB.POP.576 ex6, [p1, lf1, r25]; VADD.f dm0, dm4, dm0, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6471 "00111101" // /* MW 7 */ + 6472 "10000000" // /* MW 6 */ + 6473 "10100000" // /* MW 5 */ + 6474 "00000000" // /* MW 4 */ + 6475 "10010100" // /* MW 3 */ + 6476 "00000001" // /* MW 2 */ + 6477 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 797 30 first +.src_ref 2 "conv2d_bf16.h" 1438 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6478 "01100010" // VLDB.POP.576 ex7, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6479 "00111101" // /* MW 7 */ + 6480 "10001000" // /* MW 6 */ + 6481 "10100010" // /* MW 5 */ + 6482 "00000000" // /* MW 4 */ + 6483 "11010100" // /* MW 3 */ + 6484 "00000001" // /* MW 2 */ + 6485 "00000011" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 799 30 first +.src_ref 2 "conv2d_bf16.h" 1439 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6486 "01001010" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VADD.f dm3, dm4, dm3, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6487 "00111101" // /* MW 9 */ + 6488 "10001100" // /* MW 8 */ + 6489 "10100011" // /* MW 7 */ + 6490 "00011101" // /* MW 6 */ + 6491 "00010100" // /* MW 5 */ + 6492 "00010010" // /* MW 4 */ + 6493 "01110101" // /* MW 3 */ + 6494 "00000001" // /* MW 2 */ + 6495 "01010101" // /* MW 1 */ +.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2032 +.src_ref 2 "conv2d_bf16.h" 792 8 first +.src_ref 2 "conv2d_bf16.h" 801 30 first +.begin_of_loop +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt +.loop_nesting 2 + 6496 "10110100" // VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex2, ex1, ex6, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6497 "10110111" // /* MW 5 */ + 6498 "00010110" // /* MW 4 */ + 6499 "10000010" // /* MW 3 */ + 6500 "10000010" // /* MW 2 */ + 6501 "10100010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 793 8 first +.src_ref 2 "conv2d_bf16.h" 804 30 first +.src_ref 2 "conv2d_bf16.h" 808 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6502 "01001010" // VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6503 "00001001" // /* MW 9 */ + 6504 "00101010" // /* MW 8 */ + 6505 "10011001" // /* MW 7 */ + 6506 "11000110" // /* MW 6 */ + 6507 "01011111" // /* MW 5 */ + 6508 "00111100" // /* MW 4 */ + 6509 "00101010" // /* MW 3 */ + 6510 "00101000" // /* MW 2 */ + 6511 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 795 30 first +.src_ref 2 "conv2d_bf16.h" 803 30 first +.src_ref 2 "conv2d_bf16.h" 807 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6512 "01001010" // VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6513 "00001001" // /* MW 9 */ + 6514 "00000100" // /* MW 8 */ + 6515 "10011000" // /* MW 7 */ + 6516 "11000110" // /* MW 6 */ + 6517 "01011011" // /* MW 5 */ + 6518 "10111100" // /* MW 4 */ + 6519 "10101001" // /* MW 3 */ + 6520 "00000000" // /* MW 2 */ + 6521 "00000110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 796 30 first +.src_ref 2 "conv2d_bf16.h" 810 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6522 "01100010" // VLDB.POP.576 ex6, [p1, lf1, r25]; VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6523 "00001001" // /* MW 7 */ + 6524 "01101000" // /* MW 6 */ + 6525 "10011011" // /* MW 5 */ + 6526 "00000000" // /* MW 4 */ + 6527 "10010100" // /* MW 3 */ + 6528 "00000001" // /* MW 2 */ + 6529 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 794 8 first +.src_ref 2 "conv2d_bf16.h" 797 30 first +.src_ref 2 "conv2d_bf16.h" 809 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6530 "01101110" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex7, [p1, lf1, r25];NOPS; NOPX; VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 6531 "00001001" // /* MW 13 */ + 6532 "01000110" // /* MW 12 */ + 6533 "10011010" // /* MW 11 */ + 6534 "01101100" // /* MW 10 */ + 6535 "00000101" // /* MW 9 */ + 6536 "00000000" // /* MW 8 */ + 6537 "00000000" // /* MW 7 */ + 6538 "00000000" // /* MW 6 */ + 6539 "10101000" // /* MW 5 */ + 6540 "00000011" // /* MW 4 */ + 6541 "01110110" // /* MW 3 */ + 6542 "10000001" // /* MW 2 */ + 6543 "00000010" // /* MW 1 */ +.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2080 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 799 30 first +.src_ref 2 "conv2d_bf16.h" 802 30 first +.end_of_loop +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6544 "11100001" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0];NOPS; NOPX; VSHUFFLE ex5, ex1, ex6, r23; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6545 "00000000" // /* MW 15 */ + 6546 "00000000" // /* MW 14 */ + 6547 "11101000" // /* MW 13 */ + 6548 "10101111" // /* MW 12 */ + 6549 "01000101" // /* MW 11 */ + 6550 "00000001" // /* MW 10 */ + 6551 "00000000" // /* MW 9 */ + 6552 "00000000" // /* MW 8 */ + 6553 "01011011" // /* MW 7 */ + 6554 "00000001" // /* MW 6 */ + 6555 "00101000" // /* MW 5 */ + 6556 "00100100" // /* MW 4 */ + 6557 "01111010" // /* MW 3 */ + 6558 "00000001" // /* MW 2 */ + 6559 "01010101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 first +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 801 30 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 6560 "11110110" // PADDA.3D [p0], d1; PADDB [p7], m6; MOVS p5, p7; VSHUFFLE ex2, ex1, ex6, r22 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6561 "11100000" // /* MW 11 */ + 6562 "10101101" // /* MW 10 */ + 6563 "10000101" // /* MW 9 */ + 6564 "00000000" // /* MW 8 */ + 6565 "10001011" // /* MW 7 */ + 6566 "10011100" // /* MW 6 */ + 6567 "00100101" // /* MW 5 */ + 6568 "10010111" // /* MW 4 */ + 6569 "11111111" // /* MW 3 */ + 6570 "00001100" // /* MW 2 */ + 6571 "00000111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 804 30 first +.src_ref 2 "conv2d_bf16.h" 808 26 first +.src_ref 2 "conv2d_bf16.h" 1517 32 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6572 "01100110" // PADDB [p7], m4; MOVS p4, p7; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6573 "00001001" // /* MW 11 */ + 6574 "00101010" // /* MW 10 */ + 6575 "10011001" // /* MW 9 */ + 6576 "11000110" // /* MW 8 */ + 6577 "01011111" // /* MW 7 */ + 6578 "00111100" // /* MW 6 */ + 6579 "00100010" // /* MW 5 */ + 6580 "00010111" // /* MW 4 */ + 6581 "01101111" // /* MW 3 */ + 6582 "10010001" // /* MW 2 */ + 6583 "10010011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 803 30 first +.src_ref 2 "conv2d_bf16.h" 807 26 first +.src_ref 2 "conv2d_bf16.h" 1518 37 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6584 "01100110" // PADDB [p7], m6; MOVS p3, p7; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6585 "00001001" // /* MW 11 */ + 6586 "00000100" // /* MW 10 */ + 6587 "10011000" // /* MW 9 */ + 6588 "11000110" // /* MW 8 */ + 6589 "01011011" // /* MW 7 */ + 6590 "10111100" // /* MW 6 */ + 6591 "00100001" // /* MW 5 */ + 6592 "10010111" // /* MW 4 */ + 6593 "01101111" // /* MW 3 */ + 6594 "10010001" // /* MW 2 */ + 6595 "01110011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 810 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6596 "01100010" // MOV dj2, r17; VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6597 "00001001" // /* MW 7 */ + 6598 "01101000" // /* MW 6 */ + 6599 "10011011" // /* MW 5 */ + 6600 "11100110" // /* MW 4 */ + 6601 "10100000" // /* MW 3 */ + 6602 "10001000" // /* MW 2 */ + 6603 "00000010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 first +.src_ref 2 "conv2d_bf16.h" 809 26 first +.src_ref 2 "conv2d_bf16.h" 1428 39 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6604 "01001010" // PADDB.3D [p1], d2; MOV r2, dc5; VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6605 "00001001" // /* MW 9 */ + 6606 "01000110" // /* MW 8 */ + 6607 "10011010" // /* MW 7 */ + 6608 "11100110" // /* MW 6 */ + 6609 "10000000" // /* MW 5 */ + 6610 "10011011" // /* MW 4 */ + 6611 "00100000" // /* MW 3 */ + 6612 "10110111" // /* MW 2 */ + 6613 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 801 30 first + 6614 "11011000" // VSHUFFLE ex2, ex1, ex6, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6615 "01011011" // /* MW 3 */ + 6616 "00001011" // /* MW 2 */ + 6617 "00011001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 802 30 first + 6618 "11011000" // VSHUFFLE ex5, ex1, ex6, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6619 "01011111" // /* MW 3 */ + 6620 "10001011" // /* MW 2 */ + 6621 "00011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 803 30 first +.src_ref 2 "conv2d_bf16.h" 807 26 first + 6622 "01100010" // VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6623 "00001001" // /* MW 7 */ + 6624 "00000100" // /* MW 6 */ + 6625 "10011000" // /* MW 5 */ + 6626 "11000110" // /* MW 4 */ + 6627 "01011011" // /* MW 3 */ + 6628 "10111100" // /* MW 2 */ + 6629 "00000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 804 30 first +.src_ref 2 "conv2d_bf16.h" 808 26 first + 6630 "01100010" // VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6631 "00001001" // /* MW 7 */ + 6632 "00101010" // /* MW 6 */ + 6633 "10011001" // /* MW 5 */ + 6634 "11000110" // /* MW 4 */ + 6635 "01011111" // /* MW 3 */ + 6636 "00111100" // /* MW 2 */ + 6637 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 809 26 first + 6638 "01001000" // VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6639 "00001001" // /* MW 3 */ + 6640 "01000110" // /* MW 2 */ + 6641 "10011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 810 26 first + 6642 "01001000" // VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6643 "00001001" // /* MW 3 */ + 6644 "01101000" // /* MW 2 */ + 6645 "10011011" // /* MW 1 */ + 6646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6647 "00000000" // /* MW 1 */ + 6648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6649 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first + 6650 "00011000" // VCONV.bf16.fp32 x10, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6651 "00010110" // /* MW 3 */ + 6652 "00010000" // /* MW 2 */ + 6653 "00001101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 + 6654 "00011000" // VCONV.bf16.fp32 x11, cml1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6655 "10010110" // /* MW 3 */ + 6656 "10010000" // /* MW 2 */ + 6657 "00001101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 1455 20 first + 6658 "00111010" // VCONV.bf16.fp32 x1, cmh1; JZ r21, #6768 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6768 delay_slots=5 */ + 6659 "01100001" // /* MW 9 */ + 6660 "00000000" // /* MW 8 */ + 6661 "00000000" // /* MW 7 */ + 6662 "01001110" // /* MW 6 */ + 6663 "00000011" // /* MW 5 */ + 6664 "00101010" // /* MW 4 */ + 6665 "11000000" // /* MW 3 */ + 6666 "00011010" // /* MW 2 */ + 6667 "00010010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first +.delay_slot + 6668 "00011000" // VCONV.bf16.fp32 x6, cmh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6669 "01010110" // /* MW 3 */ + 6670 "00010000" // /* MW 2 */ + 6671 "00001011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 6672 "00011000" // VCONV.bf16.fp32 x2, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6673 "10010110" // /* MW 3 */ + 6674 "00010001" // /* MW 2 */ + 6675 "00001001" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 6676 "00011000" // VCONV.bf16.fp32 x7, cmh3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6677 "11010110" // /* MW 3 */ + 6678 "10010001" // /* MW 2 */ + 6679 "00001011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 6680 "00011000" // VCONV.bf16.fp32 x5, cml2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6681 "00010110" // /* MW 3 */ + 6682 "10010001" // /* MW 2 */ + 6683 "00001010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 6684 "00011000" // VCONV.bf16.fp32 x8, cmh2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6685 "01010110" // /* MW 3 */ + 6686 "00010001" // /* MW 2 */ + 6687 "00001100" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first + 6688 "11111000" // VMAX_LT.bf16 x11, r16, x11, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6689 "11101100" // /* MW 3 */ + 6690 "11011100" // /* MW 2 */ + 6691 "00011101" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 + 6692 "11111000" // VMAX_LT.bf16 x1, r16, x1, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6693 "11101100" // /* MW 3 */ + 6694 "10001100" // /* MW 2 */ + 6695 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.src_ref 4 "max_min.hpp" 20 104 + 6696 "00000010" // VST x11, [p5, dj3]; VMAX_LT.bf16 x10, r16, x10, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6697 "01110000" // /* MW 7 */ + 6698 "01110110" // /* MW 6 */ + 6699 "10101010" // /* MW 5 */ + 6700 "00000010" // /* MW 4 */ + 6701 "01100000" // /* MW 3 */ + 6702 "01011010" // /* MW 2 */ + 6703 "10101100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first + 6704 "00000010" // VST x1, [p4, #64]; VMAX_LT.bf16 x1, r16, x6, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6705 "01110000" // /* MW 7 */ + 6706 "01110110" // /* MW 6 */ + 6707 "01011010" // /* MW 5 */ + 6708 "00000000" // /* MW 4 */ + 6709 "01100000" // /* MW 3 */ + 6710 "10001010" // /* MW 2 */ + 6711 "10000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first + 6712 "00111010" // VST x10, [p5]; J #6800 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=6800 delay_slots=5 */ + 6713 "00100001" // /* MW 9 */ + 6714 "00000000" // /* MW 8 */ + 6715 "00000000" // /* MW 7 */ + 6716 "01010010" // /* MW 6 */ + 6717 "00000011" // /* MW 5 */ + 6718 "00000000" // /* MW 4 */ + 6719 "01100000" // /* MW 3 */ + 6720 "11010010" // /* MW 2 */ + 6721 "10100000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 6722 "00000010" // VST x1, [p5, #64]; VMAX_LT.bf16 x10, r16, x2, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6723 "01110000" // /* MW 7 */ + 6724 "01110110" // /* MW 6 */ + 6725 "10001010" // /* MW 5 */ + 6726 "00000010" // /* MW 4 */ + 6727 "01100000" // /* MW 3 */ + 6728 "10001010" // /* MW 2 */ + 6729 "10100010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 +.delay_slot + 6730 "11111000" // VMAX_LT.bf16 x1, r16, x7, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6731 "11101100" // /* MW 3 */ + 6732 "10111100" // /* MW 2 */ + 6733 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.src_ref 4 "max_min.hpp" 20 104 +.delay_slot + 6734 "00000010" // VST x10, [p3, dj3]; VMAX_LT.bf16 x10, r16, x5, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6735 "01110000" // /* MW 7 */ + 6736 "01110110" // /* MW 6 */ + 6737 "10010110" // /* MW 5 */ + 6738 "00000010" // /* MW 4 */ + 6739 "01100000" // /* MW 3 */ + 6740 "01010010" // /* MW 2 */ + 6741 "01101100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 6742 "10111010" // NOPA; VST x1, [p7, #64]; VMAX_LT.bf16 x8, r16, x8, x9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6743 "01110010" // /* MW 9 */ + 6744 "01110110" // /* MW 8 */ + 6745 "00100010" // /* MW 7 */ + 6746 "00000010" // /* MW 6 */ + 6747 "01010011" // /* MW 5 */ + 6748 "00010100" // /* MW 4 */ + 6749 "11110111" // /* MW 3 */ + 6750 "00101100" // /* MW 2 */ + 6751 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.delay_slot + 6752 "11100001" // NOPA; NOPB; VST x10, [p4, dj7]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6753 "00000000" // /* MW 15 */ + 6754 "00000000" // /* MW 14 */ + 6755 "01111000" // /* MW 13 */ + 6756 "10100101" // /* MW 12 */ + 6757 "00000001" // /* MW 11 */ + 6758 "00000000" // /* MW 10 */ + 6759 "00000000" // /* MW 9 */ + 6760 "00000000" // /* MW 8 */ + 6761 "10010011" // /* MW 7 */ + 6762 "11100010" // /* MW 6 */ + 6763 "00100100" // /* MW 5 */ + 6764 "00000000" // /* MW 4 */ + 6765 "11110000" // /* MW 3 */ + 6766 "00101100" // /* MW 2 */ + 6767 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2304 +.src_ref 4 "vector.hpp" 1152 43 + 6768 "00011000" // VST.CONV.bf16.fp32 cml1, [p5, dj3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6769 "10100011" // /* MW 3 */ + 6770 "01100000" // /* MW 2 */ + 6771 "00001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6772 "00011000" // VST.CONV.bf16.fp32 cmh1, [p4, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6773 "11100011" // /* MW 3 */ + 6774 "00010100" // /* MW 2 */ + 6775 "00001100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6776 "00011000" // VST.CONV.bf16.fp32 cml0, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6777 "00100011" // /* MW 3 */ + 6778 "00000100" // /* MW 2 */ + 6779 "00001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6780 "00011000" // VST.CONV.bf16.fp32 cmh0, [p5, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6781 "01100011" // /* MW 3 */ + 6782 "00010100" // /* MW 2 */ + 6783 "00001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6784 "00011000" // VST.CONV.bf16.fp32 cml3, [p3, dj3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6785 "10100011" // /* MW 3 */ + 6786 "01100001" // /* MW 2 */ + 6787 "00001011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6788 "00011000" // VST.CONV.bf16.fp32 cmh3, [p7, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6789 "11100011" // /* MW 3 */ + 6790 "00010101" // /* MW 2 */ + 6791 "00001111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6792 "00000010" // VST.CONV.bf16.fp32 cml2, [p4, dj7]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6793 "01110000" // /* MW 7 */ + 6794 "10100101" // /* MW 6 */ + 6795 "00000001" // /* MW 5 */ + 6796 "00000000" // /* MW 4 */ + 6797 "01100000" // /* MW 3 */ + 6798 "00100100" // /* MW 2 */ + 6799 "10011100" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2336 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 1337 12 first + 6800 "01011100" // VST x8, [p3, #64]; JNZD r29, r29, p2 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 6801 "01000000" // /* MW 5 */ + 6802 "11110101" // /* MW 4 */ + 6803 "01101110" // /* MW 3 */ + 6804 "11000010" // /* MW 2 */ + 6805 "01100010" // /* MW 1 */ +.delay_slot + 6806 "00011000" // PADDB [p7], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6807 "10010000" // /* MW 3 */ + 6808 "10001011" // /* MW 2 */ + 6809 "00111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6811 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6813 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6815 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6816 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6817 "00000000" // /* MW 15 */ + 6818 "00000000" // /* MW 14 */ + 6819 "01111000" // /* MW 13 */ + 6820 "10100101" // /* MW 12 */ + 6821 "00000001" // /* MW 11 */ + 6822 "00000000" // /* MW 10 */ + 6823 "00000000" // /* MW 9 */ + 6824 "00000000" // /* MW 8 */ + 6825 "01011011" // /* MW 7 */ + 6826 "00000001" // /* MW 6 */ + 6827 "00100000" // /* MW 5 */ + 6828 "00000000" // /* MW 4 */ + 6829 "11110000" // /* MW 3 */ + 6830 "00101100" // /* MW 2 */ + 6831 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2368 +.loop_nesting 0 + 6832 "00011000" // LDA r15, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6833 "11110001" // /* MW 3 */ + 6834 "11101101" // /* MW 2 */ + 6835 "00000111" // /* MW 1 */ + 6836 "00011000" // LDA r12, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6837 "10010001" // /* MW 3 */ + 6838 "11110001" // /* MW 2 */ + 6839 "00000111" // /* MW 1 */ + 6840 "00011000" // LDA r9, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6841 "00110001" // /* MW 3 */ + 6842 "11110101" // /* MW 2 */ + 6843 "00000111" // /* MW 1 */ + 6844 "00011000" // LDA p6, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6845 "00011001" // /* MW 3 */ + 6846 "11101011" // /* MW 2 */ + 6847 "00000111" // /* MW 1 */ + 6848 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6849 "10011001" // /* MW 3 */ + 6850 "11111011" // /* MW 2 */ + 6851 "00000111" // /* MW 1 */ + 6852 "00011000" // LDA r14, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6853 "11010001" // /* MW 3 */ + 6854 "11111101" // /* MW 2 */ + 6855 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1873 first + 6856 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 6857 "00000000" // /* MW 3 */ + 6858 "00101000" // /* MW 2 */ + 6859 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1873 +.delay_slot + 6860 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6861 "00000001" // /* MW 5 */ + 6862 "00000000" // /* MW 4 */ + 6863 "00000000" // /* MW 3 */ + 6864 "11110000" // /* MW 2 */ + 6865 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6867 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6869 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6871 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params__end +.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_end0 + 6873 "00000000" // /* MW 1 */ +.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function conv2d_maxpool _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 7 "superkernels.cpp" 74 first +.src_ref 7 "superkernels.cpp" 79 6 +.src_ref 7 "superkernels.cpp" 81 4 +.function_start + 6880 "10111010" // MOVA r0, #1; MOVXM p4, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6881 "00010000" // /* MW 9 */ + 6882 "00100000" // /* MW 8 */ + 6883 "00110010" // /* MW 7 */ + 6884 "11110010" // /* MW 6 */ + 6885 "00000001" // /* MW 5 */ + 6886 "00000000" // /* MW 4 */ + 6887 "00000000" // /* MW 3 */ + 6888 "00100000" // /* MW 2 */ + 6889 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 79 6 first +.src_ref 7 "superkernels.cpp" 81 4 + 6890 "10111010" // LDA r16, [p4]; MOVX r1, #0; MOV r2, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6891 "01111000" // /* MW 9 */ + 6892 "11010000" // /* MW 8 */ + 6893 "01001011" // /* MW 7 */ + 6894 "00001000" // /* MW 6 */ + 6895 "00010000" // /* MW 5 */ + 6896 "00000000" // /* MW 4 */ + 6897 "11010000" // /* MW 3 */ + 6898 "11000010" // /* MW 2 */ + 6899 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 74 + 6900 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6901 "00000001" // /* MW 5 */ + 6902 "00000000" // /* MW 4 */ + 6903 "00000000" // /* MW 3 */ + 6904 "00001000" // /* MW 2 */ + 6905 "00000000" // /* MW 1 */ + 6906 "10011000" // ST r2, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6907 "01010101" // /* MW 3 */ + 6908 "11110000" // /* MW 2 */ + 6909 "00001111" // /* MW 1 */ + 6910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6911 "00000000" // /* MW 1 */ + 6912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6913 "00000000" // /* MW 1 */ + 6914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6915 "00000000" // /* MW 1 */ + 6916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6917 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 79 6 +.src_ref 7 "superkernels.cpp" 79 16 + 6918 "10000100" // JNZ r16, #7088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7088 delay_slots=5 */ + 6919 "00000001" // /* MW 5 */ + 6920 "01000000" // /* MW 4 */ + 6921 "11011000" // /* MW 3 */ + 6922 "00001101" // /* MW 2 */ + 6923 "10000000" // /* MW 1 */ +.delay_slot + 6924 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6925 "10011101" // /* MW 3 */ + 6926 "11111011" // /* MW 2 */ + 6927 "00001111" // /* MW 1 */ +.delay_slot + 6928 "10011000" // ST p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6929 "00011101" // /* MW 3 */ + 6930 "11111111" // /* MW 2 */ + 6931 "00001111" // /* MW 1 */ +.delay_slot + 6932 "10011000" // ST p3, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6933 "10011101" // /* MW 3 */ + 6934 "11101101" // /* MW 2 */ + 6935 "00001111" // /* MW 1 */ +.delay_slot + 6936 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6937 "00111101" // /* MW 3 */ + 6938 "11110100" // /* MW 2 */ + 6939 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 +.delay_slot + 6940 "01000100" // MOVXM r15, #509440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6941 "00000000" // /* MW 5 */ + 6942 "10101100" // /* MW 4 */ + 6943 "11000111" // /* MW 3 */ + 6944 "00000111" // /* MW 2 */ + 6945 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 113 2 +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 6946 "00111010" // MOVS p6, p1; MOVXM p7, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6947 "00010001" // /* MW 9 */ + 6948 "00110100" // /* MW 8 */ + 6949 "10110010" // /* MW 7 */ + 6950 "11110011" // /* MW 6 */ + 6951 "00000001" // /* MW 5 */ + 6952 "00000000" // /* MW 4 */ + 6953 "01100000" // /* MW 3 */ + 6954 "10010001" // /* MW 2 */ + 6955 "11010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 6956 "01110110" // ST.s8 r16, [p7]; MOVS p1, r15; MOVXM p7, #509028 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6957 "00010000" // /* MW 11 */ + 6958 "00110010" // /* MW 10 */ + 6959 "10110010" // /* MW 9 */ + 6960 "11110011" // /* MW 8 */ + 6961 "00000001" // /* MW 7 */ + 6962 "00000000" // /* MW 6 */ + 6963 "00001011" // /* MW 5 */ + 6964 "10001111" // /* MW 4 */ + 6965 "11100001" // /* MW 3 */ + 6966 "11000000" // /* MW 2 */ + 6967 "11100000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6969 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6971 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6972 "00000100" // JL #2752 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=2752 delay_slots=5 */ + 6973 "00000001" // /* MW 5 */ + 6974 "00000000" // /* MW 4 */ + 6975 "01100000" // /* MW 3 */ + 6976 "00000101" // /* MW 2 */ + 6977 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6979 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6980 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6981 "00110001" // /* MW 3 */ + 6982 "00100000" // /* MW 2 */ + 6983 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 6984 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6985 "00000101" // /* MW 3 */ + 6986 "00100000" // /* MW 2 */ + 6987 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 113 2 +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 6988 "00000010" // ST r16, [p7]; MOV p7, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6989 "01110000" // /* MW 7 */ + 6990 "01100000" // /* MW 6 */ + 6991 "10110000" // /* MW 5 */ + 6992 "00000011" // /* MW 4 */ + 6993 "00110000" // /* MW 3 */ + 6994 "11000010" // /* MW 2 */ + 6995 "11100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 +.delay_slot + 6996 "11110110" // NOPA; NOPB; NOPS; MOV p0, p2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6997 "01110000" // /* MW 11 */ + 6998 "01100000" // /* MW 10 */ + 6999 "00110010" // /* MW 9 */ + 7000 "00000000" // /* MW 8 */ + 7001 "01011011" // /* MW 7 */ + 7002 "00000001" // /* MW 6 */ + 7003 "00100000" // /* MW 5 */ + 7004 "00000000" // /* MW 4 */ + 7005 "11110000" // /* MW 3 */ + 7006 "00101100" // /* MW 2 */ + 7007 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 +.return_address + 7008 "10011000" // ADD.NC p2, r15, #11 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7009 "10000101" // /* MW 3 */ + 7010 "01100111" // /* MW 2 */ + 7011 "00011010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 87 19 +.src_ref 7 "superkernels.cpp" 87 35 first + 7012 "10111010" // LDA.u8 r16, [p2], #7; MOVXM p1, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7013 "00010000" // /* MW 9 */ + 7014 "00100010" // /* MW 8 */ + 7015 "10110010" // /* MW 7 */ + 7016 "11110000" // /* MW 6 */ + 7017 "00000001" // /* MW 5 */ + 7018 "00000000" // /* MW 4 */ + 7019 "01010000" // /* MW 3 */ + 7020 "11000001" // /* MW 2 */ + 7021 "01001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 37 first +.src_ref 7 "superkernels.cpp" 89 13 + 7022 "10111010" // LDA.u16 r19, [p2], #2; MOVXM p0, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7023 "00010000" // /* MW 9 */ + 7024 "00110000" // /* MW 8 */ + 7025 "00110010" // /* MW 7 */ + 7026 "11110000" // /* MW 6 */ + 7027 "00000001" // /* MW 5 */ + 7028 "00000000" // /* MW 4 */ + 7029 "01010000" // /* MW 3 */ + 7030 "11001111" // /* MW 2 */ + 7031 "01000011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 73 + 7032 "10011000" // LDA.u16 r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7033 "00111010" // /* MW 3 */ + 7034 "00000110" // /* MW 2 */ + 7035 "00000010" // /* MW 1 */ + 7036 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7037 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 110 + 7038 "10011000" // LDA.u16 r18, [p2, #2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7039 "01011010" // /* MW 3 */ + 7040 "00010110" // /* MW 2 */ + 7041 "00000010" // /* MW 1 */ + 7042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7043 "00000000" // /* MW 1 */ + 7044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7045 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 87 19 first +.src_ref 7 "superkernels.cpp" 113 2 + 7046 "00000010" // ST r16, [p1]; MOV p1, p6 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7047 "01110000" // /* MW 7 */ + 7048 "01100000" // /* MW 6 */ + 7049 "10110110" // /* MW 5 */ + 7050 "00000000" // /* MW 4 */ + 7051 "00110000" // /* MW 3 */ + 7052 "11000010" // /* MW 2 */ + 7053 "00100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 57 first + 7054 "10011000" // MUL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7055 "00001111" // /* MW 3 */ + 7056 "11100001" // /* MW 2 */ + 7057 "00010100" // /* MW 1 */ + 7058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7059 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 94 + 7060 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7061 "00001111" // /* MW 3 */ + 7062 "01100001" // /* MW 2 */ + 7063 "00010100" // /* MW 1 */ + 7064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7065 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 89 28 first + 7066 "10011000" // MUL r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7067 "00001111" // /* MW 3 */ + 7068 "10100001" // /* MW 2 */ + 7069 "00010100" // /* MW 1 */ + 7070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7071 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 89 13 +.src_ref 7 "superkernels.cpp" 113 2 + 7072 "11100001" // NOPA; NOPB; ST r16, [p0]; NOPX; MOV p0, p7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7073 "00000000" // /* MW 15 */ + 7074 "00000000" // /* MW 14 */ + 7075 "01111000" // /* MW 13 */ + 7076 "01100000" // /* MW 12 */ + 7077 "00110111" // /* MW 11 */ + 7078 "00000000" // /* MW 10 */ + 7079 "00000000" // /* MW 9 */ + 7080 "10000000" // /* MW 8 */ + 7081 "00010001" // /* MW 7 */ + 7082 "00000110" // /* MW 6 */ + 7083 "00100000" // /* MW 5 */ + 7084 "00000000" // /* MW 4 */ + 7085 "11110000" // /* MW 3 */ + 7086 "00101100" // /* MW 2 */ + 7087 "00000000" // /* MW 1 */ +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 7 "superkernels.cpp" 106 12 +.src_ref 7 "superkernels.cpp" 113 2 +.src_ref 7 "superkernels.cpp" 117 6 +.src_ref 7 "superkernels.cpp" 136 15 +.src_ref 1 "io_buffer_main.h" 218 49 +.src_ref 1 "io_buffer_main.h" 324 51 + 7088 "10111010" // LDA r15, [sp, #-20]; MOVXM p6, #509000 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7089 "00010000" // /* MW 9 */ + 7090 "00100100" // /* MW 8 */ + 7091 "00110010" // /* MW 7 */ + 7092 "11110011" // /* MW 6 */ + 7093 "00000001" // /* MW 5 */ + 7094 "00000000" // /* MW 4 */ + 7095 "00100000" // /* MW 3 */ + 7096 "10111110" // /* MW 2 */ + 7097 "11111101" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 106 12 first +.src_ref 7 "superkernels.cpp" 108 13 + 7098 "10111010" // LDA r16, [p6]; MOVXM p2, #509004 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7099 "00010000" // /* MW 9 */ + 7100 "00100110" // /* MW 8 */ + 7101 "00110010" // /* MW 7 */ + 7102 "11110001" // /* MW 6 */ + 7103 "00000001" // /* MW 5 */ + 7104 "00000000" // /* MW 4 */ + 7105 "11010000" // /* MW 3 */ + 7106 "11000010" // /* MW 2 */ + 7107 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 107 11 +.src_ref 7 "superkernels.cpp" 108 13 first +.src_ref 7 "superkernels.cpp" 139 6 +.src_ref 7 "superkernels.cpp" 140 14 + 7108 "10111010" // LDA r17, [p2]; MOVXM p7, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7109 "00010000" // /* MW 9 */ + 7110 "00100000" // /* MW 8 */ + 7111 "10110010" // /* MW 7 */ + 7112 "11110011" // /* MW 6 */ + 7113 "00000001" // /* MW 5 */ + 7114 "00000000" // /* MW 4 */ + 7115 "11010000" // /* MW 3 */ + 7116 "11000110" // /* MW 2 */ + 7117 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 107 11 first + 7118 "10011000" // LDA r18, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7119 "01010110" // /* MW 3 */ + 7120 "00000110" // /* MW 2 */ + 7121 "00000111" // /* MW 1 */ + 7122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7123 "00000000" // /* MW 1 */ + 7124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7125 "00000000" // /* MW 1 */ + 7126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7127 "00000000" // /* MW 1 */ + 7128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7129 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 110 6 first +.src_ref 7 "superkernels.cpp" 110 17 first + 7130 "10000100" // JNZ r16, #7216 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7216 delay_slots=5 */ + 7131 "00000001" // /* MW 5 */ + 7132 "01000000" // /* MW 4 */ + 7133 "00011000" // /* MW 3 */ + 7134 "00001110" // /* MW 2 */ + 7135 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 108 13 first +.delay_slot + 7136 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7137 "00000111" // /* MW 3 */ + 7138 "01100010" // /* MW 2 */ + 7139 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 107 11 first +.src_ref 7 "superkernels.cpp" 108 13 +.delay_slot + 7140 "01011100" // ST r17, [p2]; ADD r17, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7141 "00001110" // /* MW 5 */ + 7142 "01000100" // /* MW 4 */ + 7143 "00111001" // /* MW 3 */ + 7144 "11000110" // /* MW 2 */ + 7145 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 106 12 first +.delay_slot + 7146 "00011000" // ADD r19, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7147 "00000111" // /* MW 3 */ + 7148 "00100110" // /* MW 2 */ + 7149 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 106 12 +.delay_slot + 7150 "10011000" // ST r19, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7151 "01110001" // /* MW 3 */ + 7152 "00000110" // /* MW 2 */ + 7153 "00001110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 107 11 first +.delay_slot + 7154 "10011000" // ST r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7155 "00110001" // /* MW 3 */ + 7156 "00000110" // /* MW 2 */ + 7157 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 first + 7158 "00011000" // ADD.NC p2, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7159 "10000110" // /* MW 3 */ + 7160 "01100111" // /* MW 2 */ + 7161 "00011010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 7162 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7163 "01110110" // /* MW 3 */ + 7164 "11111111" // /* MW 2 */ + 7165 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 7166 "10011000" // LDA r16, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7167 "00010110" // /* MW 3 */ + 7168 "11111110" // /* MW 2 */ + 7169 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 7170 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7171 "00110110" // /* MW 3 */ + 7172 "11111110" // /* MW 2 */ + 7173 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 7174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7175 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 7176 "10011000" // LDA r16, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7177 "00010110" // /* MW 3 */ + 7178 "01000110" // /* MW 2 */ + 7179 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7181 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7183 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7185 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7187 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7188 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7189 "00000010" // /* MW 3 */ + 7190 "01100001" // /* MW 2 */ + 7191 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7192 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7193 "00010001" // /* MW 3 */ + 7194 "00000110" // /* MW 2 */ + 7195 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 + 7196 "00011000" // MOVX r17, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7197 "11111101" // /* MW 3 */ + 7198 "11100010" // /* MW 2 */ + 7199 "00010111" // /* MW 1 */ + 7200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7201 "00000000" // /* MW 1 */ + 7202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7203 "00000000" // /* MW 1 */ + 7204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7205 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 7206 "01111010" // NOPA; NOPS; ACQ r16, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7207 "00011000" // /* MW 9 */ + 7208 "00010011" // /* MW 8 */ + 7209 "00000100" // /* MW 7 */ + 7210 "00000000" // /* MW 6 */ + 7211 "01011011" // /* MW 5 */ + 7212 "00000001" // /* MW 4 */ + 7213 "11110000" // /* MW 3 */ + 7214 "00101100" // /* MW 2 */ + 7215 "00000000" // /* MW 1 */ +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_336 +.src_ref 7 "superkernels.cpp" 113 2 first +.no_stack_arguments + 7216 "00000100" // JL #4464 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4464 delay_slots=5 */ + 7217 "00000001" // /* MW 5 */ + 7218 "00000000" // /* MW 4 */ + 7219 "10111000" // /* MW 3 */ + 7220 "00001000" // /* MW 2 */ + 7221 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 113 2 +.delay_slot + 7222 "01000100" // MOVXM p3, #509440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7223 "00000000" // /* MW 5 */ + 7224 "11001100" // /* MW 4 */ + 7225 "11000110" // /* MW 3 */ + 7226 "00000111" // /* MW 2 */ + 7227 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7229 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7231 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7233 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 113 2 +.delay_slot + 7234 "00101110" // NOPA; NOPS; MOV p2, r15; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 7235 "00011100" // /* MW 13 */ + 7236 "00000000" // /* MW 12 */ + 7237 "00000000" // /* MW 11 */ + 7238 "00000111" // /* MW 10 */ + 7239 "00111101" // /* MW 9 */ + 7240 "01010011" // /* MW 8 */ + 7241 "00000000" // /* MW 7 */ + 7242 "00000000" // /* MW 6 */ + 7243 "10110110" // /* MW 5 */ + 7244 "00000010" // /* MW 4 */ + 7245 "11110000" // /* MW 3 */ + 7246 "00101100" // /* MW 2 */ + 7247 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 117 6 first +.src_ref 7 "superkernels.cpp" 117 20 +.return_address + 7248 "10111010" // LDA r16, [p6]; MOVXM p1, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7249 "00010000" // /* MW 9 */ + 7250 "00100010" // /* MW 8 */ + 7251 "10110010" // /* MW 7 */ + 7252 "11110000" // /* MW 6 */ + 7253 "00000001" // /* MW 5 */ + 7254 "00000000" // /* MW 4 */ + 7255 "11010000" // /* MW 3 */ + 7256 "11000010" // /* MW 2 */ + 7257 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 117 20 + 7258 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7259 "00110110" // /* MW 3 */ + 7260 "00000110" // /* MW 2 */ + 7261 "00000001" // /* MW 1 */ + 7262 "00011000" // LDA r0, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7263 "00010001" // /* MW 3 */ + 7264 "11110000" // /* MW 2 */ + 7265 "00000111" // /* MW 1 */ + 7266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7267 "00000000" // /* MW 1 */ + 7268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7269 "00000000" // /* MW 1 */ + 7270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7271 "00000000" // /* MW 1 */ + 7272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7273 "00000000" // /* MW 1 */ + 7274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7275 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 117 17 + 7276 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7277 "00001000" // /* MW 3 */ + 7278 "01100001" // /* MW 2 */ + 7279 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 117 6 + 7280 "10000100" // JNZ r16, #7360 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7360 delay_slots=5 */ + 7281 "00000001" // /* MW 5 */ + 7282 "01000000" // /* MW 4 */ + 7283 "01100000" // /* MW 3 */ + 7284 "00001110" // /* MW 2 */ + 7285 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 136 15 +.src_ref 7 "superkernels.cpp" 140 14 +.delay_slot + 7286 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7287 "00000001" // /* MW 3 */ + 7288 "00110000" // /* MW 2 */ + 7289 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7291 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7293 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7294 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7295 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7297 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 first +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 + 7298 "00100100" // MOVX r16, #1; ADD.NC p1, r15, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7299 "00010100" // /* MW 5 */ + 7300 "11001111" // /* MW 4 */ + 7301 "10100010" // /* MW 3 */ + 7302 "00000000" // /* MW 2 */ + 7303 "00000100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 + 7304 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7305 "00110110" // /* MW 3 */ + 7306 "00000110" // /* MW 2 */ + 7307 "00000001" // /* MW 1 */ + 7308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7309 "00000000" // /* MW 1 */ + 7310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7311 "00000000" // /* MW 1 */ + 7312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7313 "00000000" // /* MW 1 */ + 7314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7315 "00000000" // /* MW 1 */ + 7316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7317 "00000000" // /* MW 1 */ + 7318 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7319 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 7320 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7321 "00001000" // /* MW 3 */ + 7322 "01010001" // /* MW 2 */ + 7323 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 136 15 first +.src_ref 1 "io_buffer_main.h" 327 40 first + 7324 "00001100" // LDA r17, [p1, #-8]; ST r24, [p6] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7325 "00100011" // /* MW 5 */ + 7326 "00001110" // /* MW 4 */ + 7327 "11011100" // /* MW 3 */ + 7328 "11000110" // /* MW 2 */ + 7329 "00111100" // /* MW 1 */ + 7330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7331 "00000000" // /* MW 1 */ + 7332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7333 "00000000" // /* MW 1 */ + 7334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7335 "00000000" // /* MW 1 */ + 7336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7337 "00000000" // /* MW 1 */ + 7338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7339 "00000000" // /* MW 1 */ + 7340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7341 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 + 7342 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7343 "00010001" // /* MW 3 */ + 7344 "00100001" // /* MW 2 */ + 7345 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 7346 "00101110" // NOPA; ST r16, [p1, #-8]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 7347 "00011100" // /* MW 13 */ + 7348 "00000000" // /* MW 12 */ + 7349 "00000000" // /* MW 11 */ + 7350 "01010111" // /* MW 10 */ + 7351 "00011010" // /* MW 9 */ + 7352 "01000000" // /* MW 8 */ + 7353 "00000000" // /* MW 7 */ + 7354 "00000000" // /* MW 6 */ + 7355 "00100011" // /* MW 5 */ + 7356 "11001100" // /* MW 4 */ + 7357 "11110011" // /* MW 3 */ + 7358 "00101100" // /* MW 2 */ + 7359 "00000000" // /* MW 1 */ +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_480 +.src_ref 7 "superkernels.cpp" 139 6 first +.src_ref 7 "superkernels.cpp" 139 19 + 7360 "10111010" // LDA r16, [p7]; MOVXM p6, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7361 "00010000" // /* MW 9 */ + 7362 "00110000" // /* MW 8 */ + 7363 "00110010" // /* MW 7 */ + 7364 "11110011" // /* MW 6 */ + 7365 "00000001" // /* MW 5 */ + 7366 "00000000" // /* MW 4 */ + 7367 "11010000" // /* MW 3 */ + 7368 "11000010" // /* MW 2 */ + 7369 "11100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 139 19 + 7370 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7371 "00110110" // /* MW 3 */ + 7372 "00000110" // /* MW 2 */ + 7373 "00000110" // /* MW 1 */ + 7374 "00011000" // LDA p1, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7375 "10011001" // /* MW 3 */ + 7376 "11111000" // /* MW 2 */ + 7377 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 142 + 7378 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7379 "00111001" // /* MW 3 */ + 7380 "11110100" // /* MW 2 */ + 7381 "00000111" // /* MW 1 */ + 7382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7383 "00000000" // /* MW 1 */ + 7384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7385 "00000000" // /* MW 1 */ + 7386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7387 "00000000" // /* MW 1 */ + 7388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7389 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 139 16 + 7390 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7391 "00001000" // /* MW 3 */ + 7392 "01100001" // /* MW 2 */ + 7393 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 139 6 + 7394 "10000100" // JNZ r16, #7424 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7424 delay_slots=5 */ + 7395 "00000001" // /* MW 5 */ + 7396 "01000000" // /* MW 4 */ + 7397 "10000000" // /* MW 3 */ + 7398 "00001110" // /* MW 2 */ + 7399 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7401 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7403 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7405 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7407 "00000000" // /* MW 1 */ +.delay_slot + 7408 "11111000" // MOV r15, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7409 "00100000" // /* MW 3 */ + 7410 "11010000" // /* MW 2 */ + 7411 "00011011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 140 14 first + 7412 "00110110" // NOPA; NOPB; ST r24, [p7]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7413 "11000001" // /* MW 11 */ + 7414 "10001000" // /* MW 10 */ + 7415 "10000011" // /* MW 9 */ + 7416 "00000011" // /* MW 8 */ + 7417 "00000000" // /* MW 7 */ + 7418 "00000000" // /* MW 6 */ + 7419 "00100000" // /* MW 5 */ + 7420 "00000000" // /* MW 4 */ + 7421 "11110000" // /* MW 3 */ + 7422 "00101100" // /* MW 2 */ + 7423 "00000000" // /* MW 1 */ +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_544 + 7424 "00011000" // LDA p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7425 "00011001" // /* MW 3 */ + 7426 "11111111" // /* MW 2 */ + 7427 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 142 first + 7428 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 7429 "00000000" // /* MW 3 */ + 7430 "00101000" // /* MW 2 */ + 7431 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 142 +.delay_slot + 7432 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7433 "00000001" // /* MW 5 */ + 7434 "00000000" // /* MW 4 */ + 7435 "00000000" // /* MW 3 */ + 7436 "11111000" // /* MW 2 */ + 7437 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7439 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7441 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7443 "00000000" // /* MW 1 */ +.delay_slot + 7444 "00011000" // MOVS p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7445 "10001011" // /* MW 3 */ + 7446 "10000100" // /* MW 2 */ +.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 7447 "00001111" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.src_ref 3 "elementwise_binary.h" 141 first +.src_ref 3 "elementwise_binary.h" 142 23 +.src_ref 3 "elementwise_binary.h" 144 4 first +.function_start + 7456 "01100100" // RET lr; MOV r0, #64 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 7457 "00000001" // /* MW 5 */ + 7458 "00100001" // /* MW 4 */ + 7459 "00000000" // /* MW 3 */ + 7460 "00000000" // /* MW 2 */ + 7461 "00000101" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 141 +.delay_slot + 7462 "11111000" // MOV r1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7463 "11000000" // /* MW 3 */ + 7464 "01010000" // /* MW 2 */ + 7465 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 141 +.delay_slot + 7466 "00011000" // ADD.NC p0, r1, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7467 "10010000" // /* MW 3 */ + 7468 "01100000" // /* MW 2 */ + 7469 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 142 23 first +.delay_slot + 7470 "10011000" // ST r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7471 "00010001" // /* MW 3 */ + 7472 "00000100" // /* MW 2 */ + 7473 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 142 23 +.delay_slot + 7474 "10011000" // ST r0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7475 "00010001" // /* MW 3 */ + 7476 "00010100" // /* MW 2 */ + 7477 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_end0 + 7479 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 3 "elementwise_binary.h" 130 first +.src_ref 3 "elementwise_binary.h" 133 24 first +.function_start + 7488 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7489 "00101110" // /* MW 3 */ + 7490 "00011100" // /* MW 2 */ + 7491 "00000001" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 130 + 7492 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7493 "00000001" // /* MW 5 */ + 7494 "00000000" // /* MW 4 */ + 7495 "00000000" // /* MW 3 */ + 7496 "00001000" // /* MW 2 */ + 7497 "00000000" // /* MW 1 */ + 7498 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7499 "00111101" // /* MW 3 */ + 7500 "11111000" // /* MW 2 */ + 7501 "00001111" // /* MW 1 */ + 7502 "10011000" // ST r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7503 "11110101" // /* MW 3 */ + 7504 "11111101" // /* MW 2 */ + 7505 "00001111" // /* MW 1 */ + 7506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7507 "00000000" // /* MW 1 */ + 7508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7509 "00000000" // /* MW 1 */ + 7510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7511 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 133 22 first + 7512 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7513 "00101001" // /* MW 3 */ + 7514 "00011100" // /* MW 2 */ + 7515 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 134 24 first + 7516 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7517 "00101110" // /* MW 3 */ + 7518 "00011100" // /* MW 2 */ + 7519 "00000001" // /* MW 1 */ + 7520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7521 "00000000" // /* MW 1 */ + 7522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7523 "00000000" // /* MW 1 */ + 7524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7525 "00000000" // /* MW 1 */ + 7526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7527 "00000000" // /* MW 1 */ + 7528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7529 "00000000" // /* MW 1 */ + 7530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7531 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 134 22 + 7532 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7533 "00101001" // /* MW 3 */ + 7534 "00011100" // /* MW 2 */ + 7535 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 135 24 first + 7536 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7537 "00101110" // /* MW 3 */ + 7538 "00000100" // /* MW 2 */ + 7539 "00000001" // /* MW 1 */ + 7540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7541 "00000000" // /* MW 1 */ + 7542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7543 "00000000" // /* MW 1 */ + 7544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7545 "00000000" // /* MW 1 */ + 7546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7547 "00000000" // /* MW 1 */ + 7548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7549 "00000000" // /* MW 1 */ + 7550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7551 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 135 22 + 7552 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7553 "00101001" // /* MW 3 */ + 7554 "00011100" // /* MW 2 */ + 7555 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 136 24 first + 7556 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7557 "00101110" // /* MW 3 */ + 7558 "00010100" // /* MW 2 */ + 7559 "00000001" // /* MW 1 */ + 7560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7561 "00000000" // /* MW 1 */ + 7562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7563 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 137 8 first +.no_stack_arguments + 7564 "00000100" // JL #7456 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7456 delay_slots=5 */ + 7565 "00000001" // /* MW 5 */ + 7566 "00000000" // /* MW 4 */ + 7567 "10010000" // /* MW 3 */ + 7568 "00001110" // /* MW 2 */ + 7569 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7571 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7572 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7573 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7575 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 136 22 first +.delay_slot + 7576 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7577 "00101001" // /* MW 3 */ + 7578 "11011100" // /* MW 2 */ + 7579 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 137 8 +.delay_slot + 7580 "11111000" // MOV r15, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7581 "11000000" // /* MW 3 */ + 7582 "11010000" // /* MW 2 */ + 7583 "00011011" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 137 8 +.src_ref 3 "elementwise_binary.h" 139 4 +.src_ref 8 "add_impl.h" 146 29 +.return_address + 7584 "10111010" // LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7585 "00001000" // /* MW 9 */ + 7586 "11000100" // /* MW 8 */ + 7587 "00110011" // /* MW 7 */ + 7588 "01101000" // /* MW 6 */ + 7589 "00000000" // /* MW 5 */ + 7590 "00000001" // /* MW 4 */ + 7591 "00100000" // /* MW 3 */ + 7592 "00000111" // /* MW 2 */ + 7593 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 146 29 +.src_ref 8 "add_impl.h" 147 37 +.src_ref 8 "add_impl.h" 147 39 + 7594 "10111010" // MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7595 "01011000" // /* MW 9 */ + 7596 "11111101" // /* MW 8 */ + 7597 "00000111" // /* MW 7 */ + 7598 "00001000" // /* MW 6 */ + 7599 "10000000" // /* MW 5 */ + 7600 "00000001" // /* MW 4 */ + 7601 "10000000" // /* MW 3 */ + 7602 "11100010" // /* MW 2 */ + 7603 "00000001" // /* MW 1 */ +.src_ref 8 "add_impl.h" 146 29 first +.src_ref 8 "add_impl.h" 147 39 + 7604 "01111010" // LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7605 "00000001" // /* MW 9 */ + 7606 "10100000" // /* MW 8 */ + 7607 "00000111" // /* MW 7 */ + 7608 "10000000" // /* MW 6 */ + 7609 "00010001" // /* MW 5 */ + 7610 "00001010" // /* MW 4 */ + 7611 "00100000" // /* MW 3 */ + 7612 "10111110" // /* MW 2 */ + 7613 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 50 first + 7614 "10011000" // LDA.u8 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7615 "01001010" // /* MW 3 */ + 7616 "00000110" // /* MW 2 */ + 7617 "00000000" // /* MW 1 */ + 7618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7619 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7621 "00000000" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 37 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7622 "00011000" // ST.s16 r16, [p0, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7623 "00010111" // /* MW 3 */ + 7624 "00000010" // /* MW 2 */ + 7625 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7626 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 7627 "00000000" // /* MW 3 */ + 7628 "00101000" // /* MW 2 */ + 7629 "00010000" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 54 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7630 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7631 "00000101" // /* MW 3 */ + 7632 "00100010" // /* MW 2 */ + 7633 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7634 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7635 "00000001" // /* MW 5 */ + 7636 "00000000" // /* MW 4 */ + 7637 "00000000" // /* MW 3 */ + 7638 "11111000" // /* MW 2 */ + 7639 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 54 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7640 "10011000" // EQ r27, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7641 "00100111" // /* MW 3 */ + 7642 "01110111" // /* MW 2 */ + 7643 "00010100" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 39 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7644 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7645 "10000010" // /* MW 3 */ + 7646 "00100001" // /* MW 2 */ + 7647 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 7649 "00000000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.src_ref 3 "elementwise_binary_broadcasting.h" 81 +.src_ref 3 "elementwise_binary_broadcasting.h" 81 first +.src_ref 3 "elementwise_binary_broadcasting.h" 82 25 +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 +.src_ref 3 "elementwise_binary_broadcasting.h" 83 25 +.function_start + 7664 "10111010" // MOVA m0, #20; MOVX r1, #6; MOV r0, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7665 "01111000" // /* MW 9 */ + 7666 "01100000" // /* MW 8 */ + 7667 "00001000" // /* MW 7 */ + 7668 "11001000" // /* MW 6 */ + 7669 "00010000" // /* MW 5 */ + 7670 "00000000" // /* MW 4 */ + 7671 "10000000" // /* MW 3 */ + 7672 "10000000" // /* MW 2 */ + 7673 "00000010" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 81 +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 + 7674 "00100100" // MOVX r0, #1; ADD.NC p0, r0, #12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7675 "00001100" // /* MW 5 */ + 7676 "11000000" // /* MW 4 */ + 7677 "10100000" // /* MW 3 */ + 7678 "00000000" // /* MW 2 */ + 7679 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first + 7680 "10011000" // LDA.u8 r2, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7681 "01001010" // /* MW 3 */ + 7682 "00001000" // /* MW 2 */ + 7683 "00000000" // /* MW 1 */ + 7684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7685 "00000000" // /* MW 1 */ + 7686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7687 "00000000" // /* MW 1 */ + 7688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7689 "00000000" // /* MW 1 */ + 7690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7691 "00000000" // /* MW 1 */ + 7692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7693 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 84 4 first + 7694 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 7695 "00000000" // /* MW 3 */ + 7696 "00101000" // /* MW 2 */ + 7697 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first +.delay_slot + 7698 "10011000" // NE r0, r2, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7699 "00001000" // /* MW 3 */ + 7700 "10000000" // /* MW 2 */ + 7701 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 83 25 first +.delay_slot + 7702 "10011000" // LSHL r0, r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7703 "00011101" // /* MW 3 */ + 7704 "00000000" // /* MW 2 */ + 7705 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first +.src_ref 3 "elementwise_binary_broadcasting.h" 83 23 +.delay_slot + 7706 "01011100" // ST r0, [p0, #4]; NEZ r3, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7707 "11100000" // /* MW 5 */ + 7708 "00001101" // /* MW 4 */ + 7709 "00110001" // /* MW 3 */ + 7710 "10000010" // /* MW 2 */ + 7711 "00000010" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 25 +.delay_slot + 7712 "10011000" // LSHL r2, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7713 "00011101" // /* MW 3 */ + 7714 "11000100" // /* MW 2 */ + 7715 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 23 +.delay_slot + 7716 "10011000" // ST r2, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7717 "01010001" // /* MW 3 */ + 7718 "00000100" // /* MW 2 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_end0 + 7719 "00001000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 3 "elementwise_binary_broadcasting.h" 76 +.src_ref 3 "elementwise_binary_broadcasting.h" 76 first +.function_start + 7728 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7729 "00000001" // /* MW 5 */ + 7730 "00000000" // /* MW 4 */ + 7731 "00000000" // /* MW 3 */ + 7732 "00001000" // /* MW 2 */ + 7733 "00000000" // /* MW 1 */ + 7734 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7735 "00111101" // /* MW 3 */ + 7736 "11111100" // /* MW 2 */ + 7737 "00001111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 77 8 first +.no_stack_arguments + 7738 "00000100" // JL #7488 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7488 delay_slots=5 */ + 7739 "00000001" // /* MW 5 */ + 7740 "00000000" // /* MW 4 */ + 7741 "10100000" // /* MW 3 */ + 7742 "00001110" // /* MW 2 */ + 7743 "00000000" // /* MW 1 */ +.delay_slot + 7744 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7745 "10011101" // /* MW 3 */ + 7746 "11111011" // /* MW 2 */ + 7747 "00001111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 78 8 +.delay_slot + 7748 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7749 "11000000" // /* MW 3 */ + 7750 "01100000" // /* MW 2 */ + 7751 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7753 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7755 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7756 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7757 "01100111" // /* MW 3 */ + 7758 "00000001" // /* MW 2 */ + 7759 "00000000" // /* MW 1 */ +.return_address +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7760 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7761 "10011001" // /* MW 3 */ + 7762 "11111011" // /* MW 2 */ + 7763 "00000111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 78 8 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7764 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7765 "00111001" // /* MW 3 */ + 7766 "11111100" // /* MW 2 */ + 7767 "00000111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 78 8 first +.tail_call +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7768 "10000100" // J #7664 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=7664 delay_slots=5 */ + 7769 "00000000" // /* MW 5 */ + 7770 "00000000" // /* MW 4 */ + 7771 "11111000" // /* MW 3 */ + 7772 "00001110" // /* MW 2 */ + 7773 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 78 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7774 "11111000" // MOV p0, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7775 "11000000" // /* MW 3 */ + 7776 "01101110" // /* MW 2 */ + 7777 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 79 4 first +.delay_slot + 7778 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7779 "00000001" // /* MW 5 */ + 7780 "00000000" // /* MW 4 */ + 7781 "00000000" // /* MW 3 */ + 7782 "11111000" // /* MW 2 */ + 7783 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7785 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7787 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 7789 "00000000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.src_ref 3 "elementwise_binary_broadcasting.h" 89 first +.src_ref 3 "elementwise_binary_broadcasting.h" 96 37 first +.src_ref 3 "elementwise_binary_broadcasting.h" 102 19 +.function_start + 7792 "01010100" // LDA r0, [p3], #12; MOV m0, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7793 "01010001" // /* MW 5 */ + 7794 "00000000" // /* MW 4 */ + 7795 "11010000" // /* MW 3 */ + 7796 "10000010" // /* MW 2 */ + 7797 "01100111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 102 19 first +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 7798 "11010100" // LDA.u8 r1, [p3], m0; MOV p4, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7799 "10000001" // /* MW 5 */ + 7800 "11001101" // /* MW 4 */ + 7801 "01011000" // /* MW 3 */ + 7802 "00000101" // /* MW 2 */ + 7803 "01100001" // /* MW 1 */ + 7804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7805 "00000000" // /* MW 1 */ + 7806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7807 "00000000" // /* MW 1 */ + 7808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7809 "00000000" // /* MW 1 */ + 7810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7811 "00000000" // /* MW 1 */ + 7812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7813 "00000000" // /* MW 1 */ + 7814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7815 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 102 12 +.src_ref 3 "elementwise_binary_broadcasting.h" 102 35 + 7816 "10000100" // JNZ r1, #7872 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7872 delay_slots=5 */ + 7817 "00000001" // /* MW 5 */ + 7818 "01000000" // /* MW 4 */ + 7819 "01100000" // /* MW 3 */ + 7820 "00001111" // /* MW 2 */ + 7821 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 96 78 +.delay_slot + 7822 "00011000" // MOVX r2, #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7823 "11101001" // /* MW 3 */ + 7824 "11000100" // /* MW 2 */ + 7825 "00010111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 96 78 first +.delay_slot + 7826 "10011000" // LSHL r0, r0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7827 "00101101" // /* MW 3 */ + 7828 "00000000" // /* MW 2 */ + 7829 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7831 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7833 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7835 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 103 28 first + 7836 "10011000" // LDA.s16 r1, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7837 "00110010" // /* MW 3 */ + 7838 "00000100" // /* MW 2 */ + 7839 "00000000" // /* MW 1 */ + 7840 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7841 "00000000" // /* MW 1 */ + 7842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7843 "00000000" // /* MW 1 */ + 7844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7845 "00000000" // /* MW 1 */ + 7846 "10000100" // J #7904 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=7904 delay_slots=5 */ + 7847 "00000000" // /* MW 5 */ + 7848 "00000000" // /* MW 4 */ + 7849 "01110000" // /* MW 3 */ + 7850 "00001111" // /* MW 2 */ + 7851 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7853 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7854 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7855 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first +.delay_slot + 7856 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7857 "01110010" // /* MW 3 */ + 7858 "00000101" // /* MW 2 */ + 7859 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7861 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.delay_slot + 7862 "01111010" // NOPA; VST x0, [p0]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7863 "00000000" // /* MW 9 */ + 7864 "00000000" // /* MW 8 */ + 7865 "00000000" // /* MW 7 */ + 7866 "00000000" // /* MW 6 */ + 7867 "00010011" // /* MW 5 */ + 7868 "00000100" // /* MW 4 */ + 7869 "11110000" // /* MW 3 */ + 7870 "00101100" // /* MW 2 */ + 7871 "00000000" // /* MW 1 */ +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_80 +.src_ref 3 "elementwise_binary_broadcasting.h" 106 28 first + 7872 "10011000" // LDA.s16 r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7873 "00110010" // /* MW 3 */ + 7874 "00000100" // /* MW 2 */ + 7875 "00000001" // /* MW 1 */ + 7876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7877 "00000000" // /* MW 1 */ + 7878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7879 "00000000" // /* MW 1 */ + 7880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7881 "00000000" // /* MW 1 */ + 7882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7883 "00000000" // /* MW 1 */ + 7884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7885 "00000000" // /* MW 1 */ + 7886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7887 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first + 7888 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7889 "01110010" // /* MW 3 */ + 7890 "00000101" // /* MW 2 */ + 7891 "00011000" // /* MW 1 */ + 7892 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7893 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first + 7894 "01111010" // NOPA; VST x0, [p1]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7895 "00000000" // /* MW 9 */ + 7896 "00000000" // /* MW 8 */ + 7897 "00000000" // /* MW 7 */ + 7898 "00000000" // /* MW 6 */ + 7899 "00010011" // /* MW 5 */ + 7900 "00000100" // /* MW 4 */ + 7901 "11110001" // /* MW 3 */ + 7902 "00101100" // /* MW 2 */ + 7903 "00000000" // /* MW 1 */ +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_112 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 first +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 first + 7904 "10111010" // LDA m0, [p4, #20]; MOVX r0, #60; ADD.NC lc, r0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7905 "01001000" // /* MW 9 */ + 7906 "00111111" // /* MW 8 */ + 7907 "10111000" // /* MW 7 */ + 7908 "10001010" // /* MW 6 */ + 7909 "00000111" // /* MW 5 */ + 7910 "00000000" // /* MW 4 */ + 7911 "11010000" // /* MW 3 */ + 7912 "10000000" // /* MW 2 */ + 7913 "10001010" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 7914 "10111010" // LDA m1, [p3, #4]; MOVXM ls, #8016 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7915 "00010000" // /* MW 9 */ + 7916 "10101000" // /* MW 8 */ + 7917 "01111111" // /* MW 7 */ + 7918 "00000100" // /* MW 6 */ + 7919 "00000000" // /* MW 5 */ + 7920 "00000000" // /* MW 4 */ + 7921 "11010000" // /* MW 3 */ + 7922 "10010000" // /* MW 2 */ + 7923 "01100010" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 7924 "01000100" // MOVXM le, #8048 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7925 "11100000" // /* MW 5 */ + 7926 "11111110" // /* MW 4 */ + 7927 "00010110" // /* MW 3 */ + 7928 "00000000" // /* MW 2 */ + 7929 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 7930 "01000100" // MOVXM p4, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7931 "11010000" // /* MW 5 */ + 7932 "11001000" // /* MW 4 */ + 7933 "11001000" // /* MW 3 */ + 7934 "00000111" // /* MW 2 */ + 7935 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 7936 "10011000" // LDA.s8 r1, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7937 "00100010" // /* MW 3 */ + 7938 "00000100" // /* MW 2 */ + 7939 "00000100" // /* MW 1 */ + 7940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7941 "00000000" // /* MW 1 */ + 7942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7943 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 187 20 first + 7944 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7945 "10101011" // /* MW 3 */ + 7946 "00001000" // /* MW 2 */ + 7947 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary.h" 189 20 first + 7948 "10011000" // VLDA.CONV.fp32.bf16 cml2, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7949 "00101011" // /* MW 3 */ + 7950 "00101001" // /* MW 2 */ + 7951 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 211 20 first + 7952 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7953 "00101011" // /* MW 3 */ + 7954 "00001000" // /* MW 2 */ + 7955 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7956 "10011000" // VLDA.CONV.fp32.bf16 cml4, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7957 "00101011" // /* MW 3 */ + 7958 "00101010" // /* MW 2 */ + 7959 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 195 20 +.src_ref 3 "elementwise_binary.h" 218 20 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7960 "00101100" // VLDA.CONV.fp32.bf16 cml1, [p0], m0; MOVX crRnd, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7961 "00000000" // /* MW 5 */ + 7962 "11110101" // /* MW 4 */ + 7963 "01110000" // /* MW 3 */ + 7964 "00010101" // /* MW 2 */ + 7965 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7966 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7967 "00111101" // /* MW 7 */ + 7968 "00101000" // /* MW 6 */ + 7969 "00000011" // /* MW 5 */ + 7970 "00000100" // /* MW 4 */ + 7971 "01110000" // /* MW 3 */ + 7972 "00100101" // /* MW 2 */ + 7973 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7974 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7975 "00101011" // /* MW 3 */ + 7976 "00001000" // /* MW 2 */ + 7977 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7978 "01100010" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7979 "00111101" // /* MW 7 */ + 7980 "00010000" // /* MW 6 */ + 7981 "00000100" // /* MW 5 */ + 7982 "00000100" // /* MW 4 */ + 7983 "01110000" // /* MW 3 */ + 7984 "01000101" // /* MW 2 */ + 7985 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 187 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7986 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7987 "10101011" // /* MW 3 */ + 7988 "00001000" // /* MW 2 */ + 7989 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7990 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7991 "00111101" // /* MW 7 */ + 7992 "00101000" // /* MW 6 */ + 7993 "00000011" // /* MW 5 */ + 7994 "00000100" // /* MW 4 */ + 7995 "01110000" // /* MW 3 */ + 7996 "00100101" // /* MW 2 */ + 7997 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7998 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7999 "00101011" // /* MW 3 */ + 8000 "00001000" // /* MW 2 */ + 8001 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8002 "01101110" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VST.CONV.bf16.fp32 cml3, [p2], #64; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 8003 "00111101" // /* MW 13 */ + 8004 "00010000" // /* MW 12 */ + 8005 "00000100" // /* MW 11 */ + 8006 "01010111" // /* MW 10 */ + 8007 "00011010" // /* MW 9 */ + 8008 "01000000" // /* MW 8 */ + 8009 "00000000" // /* MW 7 */ + 8010 "00000000" // /* MW 6 */ + 8011 "01000110" // /* MW 5 */ + 8012 "00111011" // /* MW 4 */ + 8013 "01110100" // /* MW 3 */ + 8014 "01000101" // /* MW 2 */ + 8015 "00100101" // /* MW 1 */ +.label ZLS_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_224 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 187 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 8016 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8017 "10101011" // /* MW 3 */ + 8018 "00001000" // /* MW 2 */ + 8019 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8020 "01100110" // VLDA.CONV.fp32.bf16 cml2, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8021 "00111101" // /* MW 11 */ + 8022 "00101000" // /* MW 10 */ + 8023 "00000011" // /* MW 9 */ + 8024 "10001110" // /* MW 8 */ + 8025 "00010001" // /* MW 7 */ + 8026 "00001111" // /* MW 6 */ + 8027 "00100001" // /* MW 5 */ + 8028 "00000000" // /* MW 4 */ + 8029 "01110000" // /* MW 3 */ + 8030 "00100101" // /* MW 2 */ + 8031 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8032 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8033 "00000000" // /* MW 15 */ + 8034 "00000000" // /* MW 14 */ + 8035 "01111000" // /* MW 13 */ + 8036 "10100101" // /* MW 12 */ + 8037 "00000001" // /* MW 11 */ + 8038 "00000000" // /* MW 10 */ + 8039 "00000000" // /* MW 9 */ + 8040 "00000000" // /* MW 8 */ + 8041 "01011011" // /* MW 7 */ + 8042 "00000001" // /* MW 6 */ + 8043 "00100000" // /* MW 5 */ + 8044 "00000000" // /* MW 4 */ + 8045 "01110000" // /* MW 3 */ + 8046 "00000101" // /* MW 2 */ + 8047 "00000001" // /* MW 1 */ +.label ZLE_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_256 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8048 "11101011" // VLDA.CONV.fp32.bf16 cml4, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml3, [p2], #64;NOPX; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8049 "10000001" // /* MW 15 */ + 8050 "00100000" // /* MW 14 */ + 8051 "01111000" // /* MW 13 */ + 8052 "10100101" // /* MW 12 */ + 8053 "00000001" // /* MW 11 */ + 8054 "00000000" // /* MW 10 */ + 8055 "00000000" // /* MW 9 */ + 8056 "00000000" // /* MW 8 */ + 8057 "10100011" // /* MW 7 */ + 8058 "00011101" // /* MW 6 */ + 8059 "00100010" // /* MW 5 */ + 8060 "00000000" // /* MW 4 */ + 8061 "01110000" // /* MW 3 */ + 8062 "01000101" // /* MW 2 */ + 8063 "00100101" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 8064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8065 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8066 "01100010" // VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8067 "00111101" // /* MW 7 */ + 8068 "00101000" // /* MW 6 */ + 8069 "00000011" // /* MW 5 */ + 8070 "00000010" // /* MW 4 */ + 8071 "01100000" // /* MW 3 */ + 8072 "11000100" // /* MW 2 */ + 8073 "01000011" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8075 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8076 "01100010" // VST.CONV.bf16.fp32 cml3, [p2], #64; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8077 "00111101" // /* MW 7 */ + 8078 "00010000" // /* MW 6 */ + 8079 "00000100" // /* MW 5 */ + 8080 "00000010" // /* MW 4 */ + 8081 "01100000" // /* MW 3 */ + 8082 "10110100" // /* MW 2 */ + 8083 "01000011" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8085 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 195 20 first +.src_ref 3 "elementwise_binary_broadcasting.h" 121 4 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8086 "01011100" // VST.CONV.bf16.fp32 cml4, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 8087 "00000000" // /* MW 5 */ + 8088 "01010000" // /* MW 4 */ + 8089 "01100000" // /* MW 3 */ + 8090 "11000100" // /* MW 2 */ + 8091 "01000011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8093 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.delay_slot + 8094 "00011000" // VST.CONV.bf16.fp32 cml3, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8095 "10100011" // /* MW 3 */ + 8096 "00011101" // /* MW 2 */ + 8097 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8099 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 195 20 first +.delay_slot + 8100 "00011000" // VST.CONV.bf16.fp32 cml4, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8101 "00100011" // /* MW 3 */ + 8102 "00011110" // /* MW 2 */ + 8103 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 8105 "00000000" // /* MW 1 */ +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 82 +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 82 first +.function_start + 8112 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8113 "00000001" // /* MW 5 */ + 8114 "00000000" // /* MW 4 */ + 8115 "00000000" // /* MW 3 */ + 8116 "00010000" // /* MW 2 */ + 8117 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 90 24 + 8118 "00000010" // ST lr, [sp, #-4]; MOV r16, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8119 "01110000" // /* MW 7 */ + 8120 "01100000" // /* MW 6 */ + 8121 "00001010" // /* MW 5 */ + 8122 "00000010" // /* MW 4 */ + 8123 "10110000" // /* MW 3 */ + 8124 "10000111" // /* MW 2 */ + 8125 "11111111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 90 24 first +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8 + 8126 "00000010" // MOVS p2, p1; ADD.NC p3, r16, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8127 "00000000" // /* MW 7 */ + 8128 "00000011" // /* MW 6 */ + 8129 "10110100" // /* MW 5 */ + 8130 "00000001" // /* MW 4 */ + 8131 "01100000" // /* MW 3 */ + 8132 "10010001" // /* MW 2 */ + 8133 "01010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 19 first +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35 + 8134 "11010100" // LDA.u8 r27, [p3], #2; MOV r16, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8135 "10000001" // /* MW 5 */ + 8136 "00100001" // /* MW 4 */ + 8137 "01011000" // /* MW 3 */ + 8138 "11101101" // /* MW 2 */ + 8139 "01100101" // /* MW 1 */ + 8140 "11010100" // LDA.s16 r18, [p3], #-14; MOV r17, sp /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8141 "11000001" // /* MW 5 */ + 8142 "10101011" // /* MW 4 */ + 8143 "01011000" // /* MW 3 */ + 8144 "11001010" // /* MW 2 */ + 8145 "01110011" // /* MW 1 */ + 8146 "00011000" // ADD.NC p0, r17, #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8147 "11000000" // /* MW 3 */ + 8148 "01101000" // /* MW 2 */ + 8149 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 538 13 first +.src_ref 4 "vector_native_types.hpp" 374 137 first + 8150 "00011000" // VST sfh, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8151 "00101011" // /* MW 3 */ + 8152 "00000111" // /* MW 2 */ + 8153 "00001000" // /* MW 1 */ + 8154 "00011000" // ST.s16 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8155 "01010111" // /* MW 3 */ + 8156 "00000110" // /* MW 2 */ + 8157 "00000000" // /* MW 1 */ + 8158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8159 "00000000" // /* MW 1 */ + 8160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8161 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8 first +.no_stack_arguments + 8162 "00000100" // JL #7792 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7792 delay_slots=5 */ + 8163 "00000001" // /* MW 5 */ + 8164 "00000000" // /* MW 4 */ + 8165 "00111000" // /* MW 3 */ + 8166 "00001111" // /* MW 2 */ + 8167 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35 +.delay_slot + 8168 "11111000" // MOV r17, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8169 "11000000" // /* MW 3 */ + 8170 "01010000" // /* MW 2 */ + 8171 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8173 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35 first +.delay_slot + 8174 "00011000" // SEL.EQZ r18, r16, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8175 "00010010" // /* MW 3 */ + 8176 "00100101" // /* MW 2 */ + 8177 "00010100" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35 +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8 +.delay_slot + 8178 "11100100" // SEL.EQZ r16, r17, r16, r27; MOV p1, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8179 "01000001" // /* MW 5 */ + 8180 "11010010" // /* MW 4 */ + 8181 "01000010" // /* MW 3 */ + 8182 "00100000" // /* MW 2 */ + 8183 "10001100" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8 +.delay_slot + 8184 "00000010" // NOPS; MOV p0, r16 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8185 "01110000" // /* MW 7 */ + 8186 "00010000" // /* MW 6 */ + 8187 "00110100" // /* MW 5 */ + 8188 "00000000" // /* MW 4 */ + 8189 "01100000" // /* MW 3 */ + 8190 "00101011" // /* MW 2 */ + 8191 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4 +.return_address + 8192 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8193 "00111001" // /* MW 3 */ + 8194 "11111100" // /* MW 2 */ + 8195 "00000111" // /* MW 1 */ + 8196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8197 "00000000" // /* MW 1 */ + 8198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8199 "00000000" // /* MW 1 */ + 8200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8201 "00000000" // /* MW 1 */ + 8202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8203 "00000000" // /* MW 1 */ + 8204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8205 "00000000" // /* MW 1 */ + 8206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8207 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4 first + 8208 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 8209 "00000000" // /* MW 3 */ + 8210 "00101000" // /* MW 2 */ + 8211 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4 +.delay_slot + 8212 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8213 "00000001" // /* MW 5 */ + 8214 "00000000" // /* MW 4 */ + 8215 "00000000" // /* MW 3 */ + 8216 "11110000" // /* MW 2 */ + 8217 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8219 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8221 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8223 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 8225 "00000000" // /* MW 1 */ +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_add1d_attribute_broadcasting _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 7 "superkernels.cpp" 147 first +.src_ref 7 "superkernels.cpp" 152 6 +.function_start + 8240 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8241 "10000000" // /* MW 5 */ + 8242 "11001000" // /* MW 4 */ + 8243 "11000110" // /* MW 3 */ + 8244 "00000111" // /* MW 2 */ + 8245 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 152 6 first + 8246 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8247 "11000001" // /* MW 5 */ + 8248 "10110101" // /* MW 4 */ + 8249 "11011000" // /* MW 3 */ + 8250 "11000010" // /* MW 2 */ + 8251 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 147 + 8252 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8253 "00000001" // /* MW 5 */ + 8254 "00000000" // /* MW 4 */ + 8255 "00000000" // /* MW 3 */ + 8256 "00001000" // /* MW 2 */ + 8257 "00000000" // /* MW 1 */ + 8258 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8259 "01110000" // /* MW 7 */ + 8260 "11010000" // /* MW 6 */ + 8261 "00001011" // /* MW 5 */ + 8262 "00000000" // /* MW 4 */ + 8263 "10110000" // /* MW 3 */ + 8264 "01100011" // /* MW 2 */ + 8265 "11111111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 149 11 + 8266 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8267 "00010001" // /* MW 9 */ + 8268 "00101000" // /* MW 8 */ + 8269 "00110010" // /* MW 7 */ + 8270 "11110011" // /* MW 6 */ + 8271 "00000001" // /* MW 5 */ + 8272 "00000000" // /* MW 4 */ + 8273 "10110000" // /* MW 3 */ + 8274 "10000010" // /* MW 2 */ + 8275 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 8276 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8277 "11000000" // /* MW 3 */ + 8278 "11010100" // /* MW 2 */ + 8279 "00011011" // /* MW 1 */ + 8280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8281 "00000000" // /* MW 1 */ + 8282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8283 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 152 6 +.src_ref 7 "superkernels.cpp" 152 16 + 8284 "10000100" // JNZ r16, #8448 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8448 delay_slots=5 */ + 8285 "00000001" // /* MW 5 */ + 8286 "01000000" // /* MW 4 */ + 8287 "10000000" // /* MW 3 */ + 8288 "00010000" // /* MW 2 */ + 8289 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 149 22 first +.delay_slot + 8290 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8291 "10010000" // /* MW 3 */ + 8292 "01100010" // /* MW 2 */ + 8293 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 149 30 +.delay_slot + 8294 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8295 "11111011" // /* MW 3 */ + 8296 "01100011" // /* MW 2 */ + 8297 "00010100" // /* MW 1 */ +.delay_slot + 8298 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8299 "00111101" // /* MW 3 */ + 8300 "11110100" // /* MW 2 */ + 8301 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 149 11 +.src_ref 1 "io_buffer_main.h" 125 25 +.delay_slot + 8302 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8303 "01110000" // /* MW 7 */ + 8304 "01100000" // /* MW 6 */ + 8305 "00110000" // /* MW 5 */ + 8306 "00000011" // /* MW 4 */ + 8307 "00110000" // /* MW 3 */ + 8308 "11000110" // /* MW 2 */ + 8309 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 155 4 +.src_ref 7 "superkernels.cpp" 166 2 +.delay_slot + 8310 "01000100" // MOVXM p0, #509120 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8311 "10000000" // /* MW 5 */ + 8312 "11001001" // /* MW 4 */ + 8313 "11000000" // /* MW 3 */ + 8314 "00000111" // /* MW 2 */ + 8315 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8316 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8317 "11010000" // /* MW 5 */ + 8318 "11001000" // /* MW 4 */ + 8319 "11000100" // /* MW 3 */ + 8320 "00000111" // /* MW 2 */ + 8321 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8322 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8323 "00010000" // /* MW 9 */ + 8324 "00110010" // /* MW 8 */ + 8325 "00110010" // /* MW 7 */ + 8326 "11110001" // /* MW 6 */ + 8327 "00000001" // /* MW 5 */ + 8328 "00000000" // /* MW 4 */ + 8329 "11100000" // /* MW 3 */ + 8330 "11000000" // /* MW 2 */ + 8331 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8333 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 155 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 8334 "00000100" // JL #7728 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7728 delay_slots=5 */ + 8335 "00000001" // /* MW 5 */ + 8336 "00000000" // /* MW 4 */ + 8337 "00011000" // /* MW 3 */ + 8338 "00001111" // /* MW 2 */ + 8339 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8341 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8343 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8344 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8345 "00110001" // /* MW 3 */ + 8346 "00100000" // /* MW 2 */ + 8347 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 8348 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8349 "00000101" // /* MW 3 */ + 8350 "00100000" // /* MW 2 */ + 8351 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 8352 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8353 "00000000" // /* MW 15 */ + 8354 "00000000" // /* MW 14 */ + 8355 "01111000" // /* MW 13 */ + 8356 "10100101" // /* MW 12 */ + 8357 "00000001" // /* MW 11 */ + 8358 "00000000" // /* MW 10 */ + 8359 "00000000" // /* MW 9 */ + 8360 "10000000" // /* MW 8 */ + 8361 "00010001" // /* MW 7 */ + 8362 "00000110" // /* MW 6 */ + 8363 "00100010" // /* MW 5 */ + 8364 "00000000" // /* MW 4 */ + 8365 "11110000" // /* MW 3 */ + 8366 "00101100" // /* MW 2 */ + 8367 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 159 18 +.return_address + 8368 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8369 "10100000" // /* MW 5 */ + 8370 "11001000" // /* MW 4 */ + 8371 "11000100" // /* MW 3 */ + 8372 "00000111" // /* MW 2 */ + 8373 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 159 18 first +.src_ref 7 "superkernels.cpp" 159 65 + 8374 "10111010" // LDA r16, [p2]; MOVXM p2, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8375 "00010000" // /* MW 9 */ + 8376 "01100000" // /* MW 8 */ + 8377 "00110010" // /* MW 7 */ + 8378 "11110001" // /* MW 6 */ + 8379 "00000001" // /* MW 5 */ + 8380 "00000000" // /* MW 4 */ + 8381 "11010000" // /* MW 3 */ + 8382 "11000010" // /* MW 2 */ + 8383 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 157 51 +.src_ref 7 "superkernels.cpp" 159 65 +.src_ref 7 "superkernels.cpp" 166 2 + 8384 "10111010" // LDA r17, [p2]; MOVXM p2, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8385 "00010000" // /* MW 9 */ + 8386 "01100000" // /* MW 8 */ + 8387 "00110010" // /* MW 7 */ + 8388 "11110001" // /* MW 6 */ + 8389 "00000001" // /* MW 5 */ + 8390 "00000000" // /* MW 4 */ + 8391 "11010000" // /* MW 3 */ + 8392 "11000110" // /* MW 2 */ + 8393 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 157 51 first +.src_ref 7 "superkernels.cpp" 159 16 +.src_ref 7 "superkernels.cpp" 164 47 + 8394 "10111010" // LDA.u16 r18, [p2, #10]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8395 "00010000" // /* MW 9 */ + 8396 "00101010" // /* MW 8 */ + 8397 "10110010" // /* MW 7 */ + 8398 "11110000" // /* MW 6 */ + 8399 "00000001" // /* MW 5 */ + 8400 "00000000" // /* MW 4 */ + 8401 "01010000" // /* MW 3 */ + 8402 "11001011" // /* MW 2 */ + 8403 "01001010" // /* MW 1 */ + 8404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8405 "00000000" // /* MW 1 */ + 8406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8407 "00000000" // /* MW 1 */ + 8408 "10000100" // J #8464 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8464 delay_slots=5 */ + 8409 "00000000" // /* MW 5 */ + 8410 "00000000" // /* MW 4 */ + 8411 "10001000" // /* MW 3 */ + 8412 "00010000" // /* MW 2 */ + 8413 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 157 13 +.delay_slot + 8414 "01000100" // MOVXM p0, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8415 "11000000" // /* MW 5 */ + 8416 "11001000" // /* MW 4 */ + 8417 "11000000" // /* MW 3 */ + 8418 "00000111" // /* MW 2 */ + 8419 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8421 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 159 27 first +.delay_slot + 8422 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8423 "00001111" // /* MW 3 */ + 8424 "01100001" // /* MW 2 */ + 8425 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 157 13 first +.delay_slot + 8426 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8427 "10100011" // /* MW 5 */ + 8428 "00001100" // /* MW 4 */ + 8429 "11110000" // /* MW 3 */ + 8430 "00101100" // /* MW 2 */ + 8431 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 159 16 first +.delay_slot + 8432 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8433 "00000000" // /* MW 15 */ + 8434 "00000000" // /* MW 14 */ + 8435 "01111000" // /* MW 13 */ + 8436 "10100101" // /* MW 12 */ + 8437 "00000001" // /* MW 11 */ + 8438 "00000000" // /* MW 10 */ + 8439 "00000000" // /* MW 9 */ + 8440 "10000000" // /* MW 8 */ + 8441 "00010001" // /* MW 7 */ + 8442 "00000110" // /* MW 6 */ + 8443 "00100001" // /* MW 5 */ + 8444 "00000000" // /* MW 4 */ + 8445 "11110000" // /* MW 3 */ + 8446 "00101100" // /* MW 2 */ + 8447 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 7 "superkernels.cpp" 164 47 +.src_ref 7 "superkernels.cpp" 166 2 + 8448 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8449 "00000000" // /* MW 15 */ + 8450 "00000000" // /* MW 14 */ + 8451 "00010000" // /* MW 13 */ + 8452 "00101010" // /* MW 12 */ + 8453 "10110010" // /* MW 11 */ + 8454 "11110000" // /* MW 10 */ + 8455 "00000001" // /* MW 9 */ + 8456 "00000000" // /* MW 8 */ + 8457 "10001011" // /* MW 7 */ + 8458 "10000000" // /* MW 6 */ + 8459 "00100010" // /* MW 5 */ + 8460 "00000000" // /* MW 4 */ + 8461 "11110000" // /* MW 3 */ + 8462 "00101100" // /* MW 2 */ + 8463 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 218 49 first + 8464 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8465 "00000000" // /* MW 7 */ + 8466 "11000011" // /* MW 6 */ + 8467 "10110011" // /* MW 5 */ + 8468 "00000011" // /* MW 4 */ + 8469 "01100000" // /* MW 3 */ + 8470 "10010001" // /* MW 2 */ + 8471 "01110011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 163 2 +.src_ref 1 "io_buffer_main.h" 218 49 + 8472 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8473 "00010000" // /* MW 9 */ + 8474 "00100000" // /* MW 8 */ + 8475 "00110010" // /* MW 7 */ + 8476 "11110000" // /* MW 6 */ + 8477 "00000001" // /* MW 5 */ + 8478 "00000000" // /* MW 4 */ + 8479 "11010000" // /* MW 3 */ + 8480 "11101110" // /* MW 2 */ + 8481 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 8482 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8483 "00010110" // /* MW 3 */ + 8484 "11111110" // /* MW 2 */ + 8485 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 8486 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8487 "00110110" // /* MW 3 */ + 8488 "11111110" // /* MW 2 */ + 8489 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 8490 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8491 "01010110" // /* MW 3 */ + 8492 "01000110" // /* MW 2 */ + 8493 "00000111" // /* MW 1 */ + 8494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8495 "00000000" // /* MW 1 */ + 8496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8497 "00000000" // /* MW 1 */ + 8498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8499 "00000000" // /* MW 1 */ + 8500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8501 "00000000" // /* MW 1 */ + 8502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8503 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 8504 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8505 "00000010" // /* MW 3 */ + 8506 "01100001" // /* MW 2 */ + 8507 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 8508 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8509 "00010001" // /* MW 3 */ + 8510 "00000110" // /* MW 2 */ + 8511 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 + 8512 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8513 "11111101" // /* MW 3 */ + 8514 "11100000" // /* MW 2 */ + 8515 "00010111" // /* MW 1 */ + 8516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8517 "00000000" // /* MW 1 */ + 8518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8519 "00000000" // /* MW 1 */ + 8520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8521 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 8522 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8523 "00001000" // /* MW 3 */ + 8524 "10010011" // /* MW 2 */ + 8525 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 164 45 + 8526 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8527 "10000001" // /* MW 5 */ + 8528 "10101101" // /* MW 4 */ + 8529 "10100111" // /* MW 3 */ + 8530 "00000000" // /* MW 2 */ + 8531 "00000100" // /* MW 1 */ + 8532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8533 "00000000" // /* MW 1 */ + 8534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8535 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 163 2 first + 8536 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8537 "00110110" // /* MW 3 */ + 8538 "00000110" // /* MW 2 */ + 8539 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 1 "io_buffer_main.h" 324 51 + 8540 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8541 "10000001" // /* MW 5 */ + 8542 "11011101" // /* MW 4 */ + 8543 "11011100" // /* MW 3 */ + 8544 "11001010" // /* MW 2 */ + 8545 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 164 47 first + 8546 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8547 "01110110" // /* MW 3 */ + 8548 "00000110" // /* MW 2 */ + 8549 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 8550 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8551 "10011110" // /* MW 3 */ + 8552 "01011100" // /* MW 2 */ + 8553 "00000111" // /* MW 1 */ + 8554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8555 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 166 2 first +.no_stack_arguments + 8556 "00000100" // JL #8112 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8112 delay_slots=5 */ + 8557 "00000001" // /* MW 5 */ + 8558 "00000000" // /* MW 4 */ + 8559 "11011000" // /* MW 3 */ + 8560 "00001111" // /* MW 2 */ + 8561 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8563 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 163 2 first +.delay_slot + 8564 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8565 "00000111" // /* MW 3 */ + 8566 "01100010" // /* MW 2 */ + 8567 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 163 2 +.delay_slot + 8568 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8569 "00110001" // /* MW 3 */ + 8570 "00000110" // /* MW 2 */ + 8571 "00001000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 164 45 first +.delay_slot + 8572 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8573 "00001101" // /* MW 3 */ + 8574 "11100001" // /* MW 2 */ + 8575 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 164 45 +.delay_slot + 8576 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8577 "00000000" // /* MW 15 */ + 8578 "00000000" // /* MW 14 */ + 8579 "10101000" // /* MW 13 */ + 8580 "10100000" // /* MW 12 */ + 8581 "00110100" // /* MW 11 */ + 8582 "00000000" // /* MW 10 */ + 8583 "00000000" // /* MW 9 */ + 8584 "00000000" // /* MW 8 */ + 8585 "01011011" // /* MW 7 */ + 8586 "00000001" // /* MW 6 */ + 8587 "00100000" // /* MW 5 */ + 8588 "00000000" // /* MW 4 */ + 8589 "11110000" // /* MW 3 */ + 8590 "00101100" // /* MW 2 */ + 8591 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 6 +.src_ref 7 "superkernels.cpp" 169 14 +.src_ref 1 "io_buffer_main.h" 324 51 first +.return_address + 8592 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8593 "00010000" // /* MW 9 */ + 8594 "00100000" // /* MW 8 */ + 8595 "00110010" // /* MW 7 */ + 8596 "11110011" // /* MW 6 */ + 8597 "00000001" // /* MW 5 */ + 8598 "00000000" // /* MW 4 */ + 8599 "11010000" // /* MW 3 */ + 8600 "11000110" // /* MW 2 */ + 8601 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 + 8602 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8603 "00000101" // /* MW 3 */ + 8604 "00100000" // /* MW 2 */ + 8605 "00010000" // /* MW 1 */ + 8606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8607 "00000000" // /* MW 1 */ + 8608 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8609 "00000000" // /* MW 1 */ + 8610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8611 "00000000" // /* MW 1 */ + 8612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8613 "00000000" // /* MW 1 */ + 8614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8615 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 8616 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8617 "00001000" // /* MW 3 */ + 8618 "01010001" // /* MW 2 */ + 8619 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 19 +.src_ref 1 "io_buffer_main.h" 327 40 first + 8620 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8621 "00010000" // /* MW 9 */ + 8622 "00110000" // /* MW 8 */ + 8623 "00110010" // /* MW 7 */ + 8624 "11110001" // /* MW 6 */ + 8625 "00000001" // /* MW 5 */ + 8626 "00000000" // /* MW 4 */ + 8627 "11010000" // /* MW 3 */ + 8628 "11001110" // /* MW 2 */ + 8629 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 6 first + 8630 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8631 "00110110" // /* MW 3 */ + 8632 "00000110" // /* MW 2 */ + 8633 "00000110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 19 + 8634 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8635 "01010110" // /* MW 3 */ + 8636 "00000110" // /* MW 2 */ + 8637 "00000010" // /* MW 1 */ + 8638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8639 "00000000" // /* MW 1 */ + 8640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8641 "00000000" // /* MW 1 */ + 8642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8643 "00000000" // /* MW 1 */ + 8644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8645 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 8646 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8647 "00110001" // /* MW 3 */ + 8648 "00100001" // /* MW 2 */ + 8649 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 8650 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8651 "00010001" // /* MW 3 */ + 8652 "11100110" // /* MW 2 */ + 8653 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 16 first + 8654 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8655 "00101000" // /* MW 3 */ + 8656 "01100001" // /* MW 2 */ + 8657 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 6 + 8658 "10000100" // JNZ r16, #8688 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8688 delay_slots=5 */ + 8659 "00000001" // /* MW 5 */ + 8660 "01000000" // /* MW 4 */ + 8661 "11111000" // /* MW 3 */ + 8662 "00010000" // /* MW 2 */ + 8663 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8665 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8667 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8669 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8671 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8673 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 169 14 + 8674 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8675 "00000001" // /* MW 3 */ + 8676 "00100000" // /* MW 2 */ + 8677 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 169 14 first + 8678 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8679 "00000000" // /* MW 9 */ + 8680 "00000000" // /* MW 8 */ + 8681 "00000000" // /* MW 7 */ + 8682 "10000000" // /* MW 6 */ + 8683 "00010001" // /* MW 5 */ + 8684 "00000110" // /* MW 4 */ + 8685 "11110110" // /* MW 3 */ + 8686 "00101100" // /* MW 2 */ + 8687 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 7 "superkernels.cpp" 171 + 8688 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8689 "00111001" // /* MW 3 */ + 8690 "11110100" // /* MW 2 */ + 8691 "00000111" // /* MW 1 */ + 8692 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8693 "00011001" // /* MW 3 */ + 8694 "11111011" // /* MW 2 */ + 8695 "00000111" // /* MW 1 */ + 8696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8697 "00000000" // /* MW 1 */ + 8698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8699 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 8700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8701 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 8702 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8703 "11110001" // /* MW 3 */ + 8704 "11111101" // /* MW 2 */ + 8705 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 8706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8707 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 171 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 8708 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 8709 "00000000" // /* MW 3 */ + 8710 "00101000" // /* MW 2 */ + 8711 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8712 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8713 "10100000" // /* MW 3 */ + 8714 "01100111" // /* MW 2 */ + 8715 "00011111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 171 +.delay_slot + 8716 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8717 "00000001" // /* MW 5 */ + 8718 "00000000" // /* MW 4 */ + 8719 "00000000" // /* MW 3 */ + 8720 "11111000" // /* MW 2 */ + 8721 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8723 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8725 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 8727 "00000000" // /* MW 1 */ +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.function setup _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.src_ref 3 "elementwise_unary.h" 124 first +.src_ref 3 "elementwise_unary.h" 126 24 first +.function_start + 8736 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8737 "00101110" // /* MW 3 */ + 8738 "00011100" // /* MW 2 */ + 8739 "00000001" // /* MW 1 */ + 8740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8741 "00000000" // /* MW 1 */ + 8742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8743 "00000000" // /* MW 1 */ + 8744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8745 "00000000" // /* MW 1 */ + 8746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8747 "00000000" // /* MW 1 */ + 8748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8749 "00000000" // /* MW 1 */ + 8750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8751 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 126 22 first + 8752 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8753 "00101001" // /* MW 3 */ + 8754 "00011100" // /* MW 2 */ + 8755 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 127 24 first + 8756 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8757 "00101110" // /* MW 3 */ + 8758 "00011100" // /* MW 2 */ + 8759 "00000001" // /* MW 1 */ + 8760 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8761 "00000000" // /* MW 1 */ + 8762 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8763 "00000000" // /* MW 1 */ + 8764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8765 "00000000" // /* MW 1 */ + 8766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8767 "00000000" // /* MW 1 */ + 8768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8769 "00000000" // /* MW 1 */ + 8770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8771 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 127 22 + 8772 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8773 "00101001" // /* MW 3 */ + 8774 "00011100" // /* MW 2 */ + 8775 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 128 24 first + 8776 "10011000" // LDA el0, [p1], #24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8777 "00101110" // /* MW 3 */ + 8778 "01101100" // /* MW 2 */ + 8779 "00000001" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 113 33 first + 8780 "10011000" // LDA.s16 r0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8781 "00010010" // /* MW 3 */ + 8782 "00000100" // /* MW 2 */ + 8783 "00000001" // /* MW 1 */ + 8784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8785 "00000000" // /* MW 1 */ + 8786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8787 "00000000" // /* MW 1 */ + 8788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8789 "00000000" // /* MW 1 */ + 8790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8791 "00000000" // /* MW 1 */ + 8792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8793 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 128 22 first + 8794 "10011000" // ST el0, [p0], #24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8795 "00101001" // /* MW 3 */ + 8796 "01101100" // /* MW 2 */ + 8797 "00001000" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 113 33 first + 8798 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8799 "00010111" // /* MW 3 */ + 8800 "00000100" // /* MW 2 */ + 8801 "00000000" // /* MW 1 */ + 8802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8803 "00000000" // /* MW 1 */ + 8804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8805 "00000000" // /* MW 1 */ + 8806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8807 "00000000" // /* MW 1 */ + 8808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8809 "00000000" // /* MW 1 */ + 8810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8811 "00000000" // /* MW 1 */ + 8812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8813 "00000000" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 114 33 first + 8814 "10011000" // LDA.s16 r0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8815 "00010010" // /* MW 3 */ + 8816 "00100100" // /* MW 2 */ + 8817 "00000001" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 114 33 + 8818 "00011000" // ST.s16 r0, [p0, #2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8819 "00010111" // /* MW 3 */ + 8820 "00010100" // /* MW 2 */ + 8821 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 130 4 first + 8822 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 8823 "00000000" // /* MW 3 */ + 8824 "00101000" // /* MW 2 */ + 8825 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8827 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8829 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8831 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8833 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv__end +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_end0 + 8835 "00000000" // /* MW 1 */ +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.function run _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_unary.h" 136 first +.src_ref 3 "elementwise_unary.h" 142 37 +.src_ref 3 "elementwise_unary.h" 154 8 first +.src_ref 3 "elementwise_unary.h" 171 19 +.function_start + 8848 "10110110" // MOVA dj0, #-34; VLDB x4, [p0], #64; MOVXM ls, #8976 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8849 "00010000" // /* MW 11 */ + 8850 "10001000" // /* MW 10 */ + 8851 "01111001" // /* MW 9 */ + 8852 "00001000" // /* MW 8 */ + 8853 "00000000" // /* MW 7 */ + 8854 "00000000" // /* MW 6 */ + 8855 "01101000" // /* MW 5 */ + 8856 "00111010" // /* MW 4 */ + 8857 "10000000" // /* MW 3 */ + 8858 "11000010" // /* MW 2 */ + 8859 "11111011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_unary.h" 142 78 +.src_ref 3 "elementwise_unary.h" 154 8 first +.src_ref 3 "elementwise_unary.h" 190 19 first + 8860 "10110110" // MOVA r17, #-6; VLDB x2, [p0], #64; MOVXM le, #9024 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8861 "00010000" // /* MW 11 */ + 8862 "10100000" // /* MW 10 */ + 8863 "10111001" // /* MW 9 */ + 8864 "00001001" // /* MW 8 */ + 8865 "00000000" // /* MW 7 */ + 8866 "00000000" // /* MW 6 */ + 8867 "01101000" // /* MW 5 */ + 8868 "00111001" // /* MW 4 */ + 8869 "00000000" // /* MW 3 */ + 8870 "01010001" // /* MW 2 */ + 8871 "11111111" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 136 + 8872 "11111000" // MOV r0, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8873 "11000000" // /* MW 3 */ + 8874 "00010100" // /* MW 2 */ + 8875 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 136 first + 8876 "00011000" // ADD.NC p2, r0, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8877 "00010000" // /* MW 3 */ + 8878 "01100000" // /* MW 2 */ + 8879 "00011010" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 103 16 first + 8880 "10011000" // LDA.s16 r2, [p2], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8881 "01010010" // /* MW 3 */ + 8882 "00011100" // /* MW 2 */ + 8883 "00000010" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 142 37 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8884 "10011000" // LDA r0, [p2, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8885 "00010110" // /* MW 3 */ + 8886 "00000000" // /* MW 2 */ + 8887 "00000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_unary.h" 171 19 first +.src_ref 8 "clip_impl.h" 104 16 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8888 "00111100" // LDA.s16 r1, [p2]; VLDB x4, [p0], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8889 "01101000" // /* MW 5 */ + 8890 "00111010" // /* MW 4 */ + 8891 "01010000" // /* MW 3 */ + 8892 "10000110" // /* MW 2 */ + 8893 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8895 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8897 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8899 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_unary.h" 190 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8900 "00011000" // VLDB x2, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8901 "10110100" // /* MW 3 */ + 8902 "00011100" // /* MW 2 */ + 8903 "00111000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8904 "11111000" // VBCST.16 x0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8905 "01110010" // /* MW 3 */ + 8906 "00001001" // /* MW 2 */ + 8907 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 3 "elementwise_unary.h" 142 78 first +.src_ref 3 "elementwise_unary.h" 171 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8908 "00111010" // VLDB x4, [p0], #64; LSHL r17, r0, r17; VMAX_LT.bf16 x5, r16, x4, x0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8909 "01111000" // /* MW 9 */ + 8910 "00110110" // /* MW 8 */ + 8911 "01010000" // /* MW 7 */ + 8912 "11101101" // /* MW 6 */ + 8913 "00011000" // /* MW 5 */ + 8914 "00000001" // /* MW 4 */ + 8915 "01101000" // /* MW 3 */ + 8916 "00111010" // /* MW 2 */ + 8917 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 154 8 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8918 "10011000" // ADD.NC lc, r17, #-3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8919 "11111110" // /* MW 3 */ + 8920 "01111000" // /* MW 2 */ + 8921 "00011101" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8922 "11111000" // VBCST.16 x1, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8923 "01110010" // /* MW 3 */ + 8924 "10000101" // /* MW 2 */ + 8925 "00011000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8926 "11111000" // VMIN_GE.bf16 x3, r16, x5, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8927 "10101100" // /* MW 3 */ + 8928 "10101000" // /* MW 2 */ + 8929 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 3 "elementwise_unary.h" 190 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8930 "01111110" // NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 8931 "01100000" // /* MW 13 */ + 8932 "00101011" // /* MW 12 */ + 8933 "00000000" // /* MW 11 */ + 8934 "11001111" // /* MW 10 */ + 8935 "00000110" // /* MW 9 */ + 8936 "00110001" // /* MW 8 */ + 8937 "00000000" // /* MW 7 */ + 8938 "00000000" // /* MW 6 */ + 8939 "01101000" // /* MW 5 */ + 8940 "00111001" // /* MW 4 */ + 8941 "11110000" // /* MW 3 */ + 8942 "00101100" // /* MW 2 */ + 8943 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8944 "11100001" // NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8945 "00000000" // /* MW 15 */ + 8946 "00000000" // /* MW 14 */ + 8947 "01111000" // /* MW 13 */ + 8948 "01010110" // /* MW 12 */ + 8949 "11011000" // /* MW 11 */ + 8950 "00000001" // /* MW 10 */ + 8951 "00000000" // /* MW 9 */ + 8952 "00000000" // /* MW 8 */ + 8953 "11010011" // /* MW 7 */ + 8954 "00011100" // /* MW 6 */ + 8955 "00100001" // /* MW 5 */ + 8956 "00000000" // /* MW 4 */ + 8957 "11110000" // /* MW 3 */ + 8958 "00101100" // /* MW 2 */ + 8959 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8960 "11100001" // NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8961 "00000000" // /* MW 15 */ + 8962 "00000000" // /* MW 14 */ + 8963 "01111000" // /* MW 13 */ + 8964 "00110110" // /* MW 12 */ + 8965 "01010000" // /* MW 11 */ + 8966 "00000001" // /* MW 10 */ + 8967 "00000000" // /* MW 9 */ + 8968 "00000000" // /* MW 8 */ + 8969 "01011011" // /* MW 7 */ + 8970 "00000001" // /* MW 6 */ + 8971 "00100000" // /* MW 5 */ + 8972 "00000000" // /* MW 4 */ + 8973 "11110000" // /* MW 3 */ + 8974 "00101100" // /* MW 2 */ + 8975 "00000000" // /* MW 1 */ +.label ZLS_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_128 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 171 19 first +.src_ref 3 "elementwise_unary.h" 176 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 8976 "11100001" // NOPA; VLDB x4, [p0], #64; VST x7, [p1], #64; NOPX; VMIN_GE.bf16 x3, r16, x5, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8977 "00000000" // /* MW 15 */ + 8978 "00000000" // /* MW 14 */ + 8979 "01111000" // /* MW 13 */ + 8980 "01010110" // /* MW 12 */ + 8981 "11010100" // /* MW 11 */ + 8982 "00000000" // /* MW 10 */ + 8983 "00000000" // /* MW 9 */ + 8984 "00000000" // /* MW 8 */ + 8985 "11010011" // /* MW 7 */ + 8986 "00011101" // /* MW 6 */ + 8987 "01101001" // /* MW 5 */ + 8988 "00111010" // /* MW 4 */ + 8989 "11110000" // /* MW 3 */ + 8990 "00101100" // /* MW 2 */ + 8991 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 3 "elementwise_unary.h" 190 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8992 "11100001" // NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8993 "00000000" // /* MW 15 */ + 8994 "00000000" // /* MW 14 */ + 8995 "01111000" // /* MW 13 */ + 8996 "00110110" // /* MW 12 */ + 8997 "10001000" // /* MW 11 */ + 8998 "00000001" // /* MW 10 */ + 8999 "00000000" // /* MW 9 */ + 9000 "00000000" // /* MW 8 */ + 9001 "01011011" // /* MW 7 */ + 9002 "00000001" // /* MW 6 */ + 9003 "01101000" // /* MW 5 */ + 9004 "00111001" // /* MW 4 */ + 9005 "11110000" // /* MW 3 */ + 9006 "00101100" // /* MW 2 */ + 9007 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9008 "11100001" // NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9009 "00000000" // /* MW 15 */ + 9010 "00000000" // /* MW 14 */ + 9011 "01111000" // /* MW 13 */ + 9012 "01010110" // /* MW 12 */ + 9013 "11011000" // /* MW 11 */ + 9014 "00000001" // /* MW 10 */ + 9015 "00000000" // /* MW 9 */ + 9016 "00000000" // /* MW 8 */ + 9017 "11010011" // /* MW 7 */ + 9018 "00011100" // /* MW 6 */ + 9019 "00100001" // /* MW 5 */ + 9020 "00000000" // /* MW 4 */ + 9021 "11110000" // /* MW 3 */ + 9022 "00101100" // /* MW 2 */ + 9023 "00000000" // /* MW 1 */ +.label ZLE_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_176 +.src_ref 4 "max_min.hpp" 20 104 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9024 "11100001" // NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9025 "00000000" // /* MW 15 */ + 9026 "00000000" // /* MW 14 */ + 9027 "01111000" // /* MW 13 */ + 9028 "00110110" // /* MW 12 */ + 9029 "01010000" // /* MW 11 */ + 9030 "00000001" // /* MW 10 */ + 9031 "00000000" // /* MW 9 */ + 9032 "00000000" // /* MW 8 */ + 9033 "01011011" // /* MW 7 */ + 9034 "00000001" // /* MW 6 */ + 9035 "00100000" // /* MW 5 */ + 9036 "00000000" // /* MW 4 */ + 9037 "11110000" // /* MW 3 */ + 9038 "00101100" // /* MW 2 */ + 9039 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 176 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 9040 "00000010" // VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9041 "01110000" // /* MW 7 */ + 9042 "01010110" // /* MW 6 */ + 9043 "11010100" // /* MW 5 */ + 9044 "00000000" // /* MW 4 */ + 9045 "01100000" // /* MW 3 */ + 9046 "10111010" // /* MW 2 */ + 9047 "00100011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9048 "11111000" // VMAX_LT.bf16 x6, r16, x2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9049 "01101100" // /* MW 3 */ + 9050 "00010000" // /* MW 2 */ + 9051 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 195 20 first + 9052 "00000010" // VST x3, [p1], #64; VMIN_GE.bf16 x7, r16, x6, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9053 "01110000" // /* MW 7 */ + 9054 "01010110" // /* MW 6 */ + 9055 "11011000" // /* MW 5 */ + 9056 "00000001" // /* MW 4 */ + 9057 "01100000" // /* MW 3 */ + 9058 "10011010" // /* MW 2 */ + 9059 "00100011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 3 "elementwise_unary.h" 158 4 first + 9060 "11100100" // RET lr; VMAX_LT.bf16 x5, r16, x4, x0 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 9061 "11011001" // /* MW 5 */ + 9062 "01000000" // /* MW 4 */ + 9063 "00000101" // /* MW 3 */ + 9064 "00000000" // /* MW 2 */ + 9065 "00000101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 176 20 first +.delay_slot + 9066 "00000010" // VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9067 "01110000" // /* MW 7 */ + 9068 "01010110" // /* MW 6 */ + 9069 "11010100" // /* MW 5 */ + 9070 "00000000" // /* MW 4 */ + 9071 "01100000" // /* MW 3 */ + 9072 "10111010" // /* MW 2 */ + 9073 "00100011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 9074 "11111000" // VMAX_LT.bf16 x6, r16, x2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9075 "01101100" // /* MW 3 */ + 9076 "00010000" // /* MW 2 */ + 9077 "00011011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.delay_slot + 9078 "11111000" // VMIN_GE.bf16 x7, r16, x6, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9079 "10101100" // /* MW 3 */ + 9080 "10110000" // /* MW 2 */ + 9081 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "elementwise_unary.h" 195 20 first +.delay_slot + 9082 "00011000" // VST x3, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9083 "11010011" // /* MW 3 */ + 9084 "00011100" // /* MW 2 */ + 9085 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "elementwise_unary.h" 176 20 first +.delay_slot + 9086 "00011000" // VST x7, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9087 "11010011" // /* MW 3 */ + 9088 "00011101" // /* MW 2 */ +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E__end +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_end0 + 9089 "00001001" // /* MW 1 */ +.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_clip1d _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 7 "superkernels.cpp" 176 first +.src_ref 7 "superkernels.cpp" 181 6 +.function_start + 9104 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9105 "10000000" // /* MW 5 */ + 9106 "11001000" // /* MW 4 */ + 9107 "11000110" // /* MW 3 */ + 9108 "00000111" // /* MW 2 */ + 9109 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 181 6 first + 9110 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9111 "11000001" // /* MW 5 */ + 9112 "10110101" // /* MW 4 */ + 9113 "11011000" // /* MW 3 */ + 9114 "11000010" // /* MW 2 */ + 9115 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 176 + 9116 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9117 "00000001" // /* MW 5 */ + 9118 "00000000" // /* MW 4 */ + 9119 "00000000" // /* MW 3 */ + 9120 "00001000" // /* MW 2 */ + 9121 "00000000" // /* MW 1 */ + 9122 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9123 "01110000" // /* MW 7 */ + 9124 "11010000" // /* MW 6 */ + 9125 "00001011" // /* MW 5 */ + 9126 "00000000" // /* MW 4 */ + 9127 "10110000" // /* MW 3 */ + 9128 "01100011" // /* MW 2 */ + 9129 "11111111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 178 11 + 9130 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9131 "00010001" // /* MW 9 */ + 9132 "00101000" // /* MW 8 */ + 9133 "00110010" // /* MW 7 */ + 9134 "11110011" // /* MW 6 */ + 9135 "00000001" // /* MW 5 */ + 9136 "00000000" // /* MW 4 */ + 9137 "10110000" // /* MW 3 */ + 9138 "10000010" // /* MW 2 */ + 9139 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 9140 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9141 "11000000" // /* MW 3 */ + 9142 "11010100" // /* MW 2 */ + 9143 "00011011" // /* MW 1 */ + 9144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9145 "00000000" // /* MW 1 */ + 9146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9147 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 181 6 +.src_ref 7 "superkernels.cpp" 181 16 + 9148 "10000100" // JNZ r16, #9312 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9312 delay_slots=5 */ + 9149 "00000001" // /* MW 5 */ + 9150 "01000000" // /* MW 4 */ + 9151 "00110000" // /* MW 3 */ + 9152 "00010010" // /* MW 2 */ + 9153 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 178 22 first +.delay_slot + 9154 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9155 "10010000" // /* MW 3 */ + 9156 "01100010" // /* MW 2 */ + 9157 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 178 30 +.delay_slot + 9158 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9159 "11111011" // /* MW 3 */ + 9160 "01100011" // /* MW 2 */ + 9161 "00010100" // /* MW 1 */ +.delay_slot + 9162 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9163 "00111101" // /* MW 3 */ + 9164 "11110100" // /* MW 2 */ + 9165 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 178 11 +.src_ref 1 "io_buffer_main.h" 125 25 +.delay_slot + 9166 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9167 "01110000" // /* MW 7 */ + 9168 "01100000" // /* MW 6 */ + 9169 "00110000" // /* MW 5 */ + 9170 "00000011" // /* MW 4 */ + 9171 "00110000" // /* MW 3 */ + 9172 "11000110" // /* MW 2 */ + 9173 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 184 4 +.src_ref 7 "superkernels.cpp" 195 2 +.delay_slot + 9174 "01000100" // MOVXM p0, #509376 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9175 "10000000" // /* MW 5 */ + 9176 "11001011" // /* MW 4 */ + 9177 "11000000" // /* MW 3 */ + 9178 "00000111" // /* MW 2 */ + 9179 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9180 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9181 "11010000" // /* MW 5 */ + 9182 "11001000" // /* MW 4 */ + 9183 "11000100" // /* MW 3 */ + 9184 "00000111" // /* MW 2 */ + 9185 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9186 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9187 "00010000" // /* MW 9 */ + 9188 "00110010" // /* MW 8 */ + 9189 "00110010" // /* MW 7 */ + 9190 "11110001" // /* MW 6 */ + 9191 "00000001" // /* MW 5 */ + 9192 "00000000" // /* MW 4 */ + 9193 "11100000" // /* MW 3 */ + 9194 "11000000" // /* MW 2 */ + 9195 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9197 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 184 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 9198 "00000100" // JL #8736 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8736 delay_slots=5 */ + 9199 "00000001" // /* MW 5 */ + 9200 "00000000" // /* MW 4 */ + 9201 "00010000" // /* MW 3 */ + 9202 "00010001" // /* MW 2 */ + 9203 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9205 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9207 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9208 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9209 "00110001" // /* MW 3 */ + 9210 "00100000" // /* MW 2 */ + 9211 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 9212 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9213 "00000101" // /* MW 3 */ + 9214 "00100000" // /* MW 2 */ + 9215 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 9216 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9217 "00000000" // /* MW 15 */ + 9218 "00000000" // /* MW 14 */ + 9219 "01111000" // /* MW 13 */ + 9220 "10100101" // /* MW 12 */ + 9221 "00000001" // /* MW 11 */ + 9222 "00000000" // /* MW 10 */ + 9223 "00000000" // /* MW 9 */ + 9224 "10000000" // /* MW 8 */ + 9225 "00010001" // /* MW 7 */ + 9226 "00000110" // /* MW 6 */ + 9227 "00100010" // /* MW 5 */ + 9228 "00000000" // /* MW 4 */ + 9229 "11110000" // /* MW 3 */ + 9230 "00101100" // /* MW 2 */ + 9231 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 188 18 +.return_address + 9232 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9233 "10100000" // /* MW 5 */ + 9234 "11001000" // /* MW 4 */ + 9235 "11000100" // /* MW 3 */ + 9236 "00000111" // /* MW 2 */ + 9237 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 188 18 first +.src_ref 7 "superkernels.cpp" 188 43 + 9238 "10111010" // LDA r16, [p2]; MOVXM p2, #509376 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9239 "00010000" // /* MW 9 */ + 9240 "11100000" // /* MW 8 */ + 9241 "00110010" // /* MW 7 */ + 9242 "11110001" // /* MW 6 */ + 9243 "00000001" // /* MW 5 */ + 9244 "00000000" // /* MW 4 */ + 9245 "11010000" // /* MW 3 */ + 9246 "11000010" // /* MW 2 */ + 9247 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 186 29 +.src_ref 7 "superkernels.cpp" 188 43 +.src_ref 7 "superkernels.cpp" 195 2 + 9248 "10111010" // LDA r17, [p2]; MOVXM p2, #509376 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9249 "00010000" // /* MW 9 */ + 9250 "11100000" // /* MW 8 */ + 9251 "00110010" // /* MW 7 */ + 9252 "11110001" // /* MW 6 */ + 9253 "00000001" // /* MW 5 */ + 9254 "00000000" // /* MW 4 */ + 9255 "11010000" // /* MW 3 */ + 9256 "11000110" // /* MW 2 */ + 9257 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 186 29 first +.src_ref 7 "superkernels.cpp" 188 16 +.src_ref 7 "superkernels.cpp" 193 47 + 9258 "10111010" // LDA.u16 r18, [p2, #8]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9259 "00010000" // /* MW 9 */ + 9260 "00101010" // /* MW 8 */ + 9261 "10110010" // /* MW 7 */ + 9262 "11110000" // /* MW 6 */ + 9263 "00000001" // /* MW 5 */ + 9264 "00000000" // /* MW 4 */ + 9265 "01010000" // /* MW 3 */ + 9266 "11001011" // /* MW 2 */ + 9267 "01001000" // /* MW 1 */ + 9268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9269 "00000000" // /* MW 1 */ + 9270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9271 "00000000" // /* MW 1 */ + 9272 "10000100" // J #9328 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9328 delay_slots=5 */ + 9273 "00000000" // /* MW 5 */ + 9274 "00000000" // /* MW 4 */ + 9275 "00111000" // /* MW 3 */ + 9276 "00010010" // /* MW 2 */ + 9277 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 186 13 +.delay_slot + 9278 "01000100" // MOVXM p0, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9279 "11000000" // /* MW 5 */ + 9280 "11001000" // /* MW 4 */ + 9281 "11000000" // /* MW 3 */ + 9282 "00000111" // /* MW 2 */ + 9283 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9285 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 188 27 first +.delay_slot + 9286 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9287 "00001111" // /* MW 3 */ + 9288 "01100001" // /* MW 2 */ + 9289 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 186 13 first +.delay_slot + 9290 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9291 "10100011" // /* MW 5 */ + 9292 "00001100" // /* MW 4 */ + 9293 "11110000" // /* MW 3 */ + 9294 "00101100" // /* MW 2 */ + 9295 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 188 16 first +.delay_slot + 9296 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9297 "00000000" // /* MW 15 */ + 9298 "00000000" // /* MW 14 */ + 9299 "01111000" // /* MW 13 */ + 9300 "10100101" // /* MW 12 */ + 9301 "00000001" // /* MW 11 */ + 9302 "00000000" // /* MW 10 */ + 9303 "00000000" // /* MW 9 */ + 9304 "10000000" // /* MW 8 */ + 9305 "00010001" // /* MW 7 */ + 9306 "00000110" // /* MW 6 */ + 9307 "00100001" // /* MW 5 */ + 9308 "00000000" // /* MW 4 */ + 9309 "11110000" // /* MW 3 */ + 9310 "00101100" // /* MW 2 */ + 9311 "00000000" // /* MW 1 */ +.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 7 "superkernels.cpp" 193 47 +.src_ref 7 "superkernels.cpp" 195 2 + 9312 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9313 "00000000" // /* MW 15 */ + 9314 "00000000" // /* MW 14 */ + 9315 "00010000" // /* MW 13 */ + 9316 "00101010" // /* MW 12 */ + 9317 "10110010" // /* MW 11 */ + 9318 "11110000" // /* MW 10 */ + 9319 "00000001" // /* MW 9 */ + 9320 "00000000" // /* MW 8 */ + 9321 "10001011" // /* MW 7 */ + 9322 "10000000" // /* MW 6 */ + 9323 "00100010" // /* MW 5 */ + 9324 "00000000" // /* MW 4 */ + 9325 "11110000" // /* MW 3 */ + 9326 "00101100" // /* MW 2 */ + 9327 "00000000" // /* MW 1 */ +.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 218 49 first + 9328 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9329 "00000000" // /* MW 7 */ + 9330 "11000011" // /* MW 6 */ + 9331 "10110011" // /* MW 5 */ + 9332 "00000011" // /* MW 4 */ + 9333 "01100000" // /* MW 3 */ + 9334 "10010001" // /* MW 2 */ + 9335 "01110011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 192 2 +.src_ref 1 "io_buffer_main.h" 218 49 + 9336 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9337 "00010000" // /* MW 9 */ + 9338 "00100000" // /* MW 8 */ + 9339 "00110010" // /* MW 7 */ + 9340 "11110000" // /* MW 6 */ + 9341 "00000001" // /* MW 5 */ + 9342 "00000000" // /* MW 4 */ + 9343 "11010000" // /* MW 3 */ + 9344 "11101110" // /* MW 2 */ + 9345 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 9346 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9347 "00010110" // /* MW 3 */ + 9348 "11111110" // /* MW 2 */ + 9349 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 9350 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9351 "00110110" // /* MW 3 */ + 9352 "11111110" // /* MW 2 */ + 9353 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 9354 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9355 "01010110" // /* MW 3 */ + 9356 "01000110" // /* MW 2 */ + 9357 "00000111" // /* MW 1 */ + 9358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9359 "00000000" // /* MW 1 */ + 9360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9361 "00000000" // /* MW 1 */ + 9362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9363 "00000000" // /* MW 1 */ + 9364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9365 "00000000" // /* MW 1 */ + 9366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9367 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 9368 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9369 "00000010" // /* MW 3 */ + 9370 "01100001" // /* MW 2 */ + 9371 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 9372 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9373 "00010001" // /* MW 3 */ + 9374 "00000110" // /* MW 2 */ + 9375 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 + 9376 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9377 "11111101" // /* MW 3 */ + 9378 "11100000" // /* MW 2 */ + 9379 "00010111" // /* MW 1 */ + 9380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9381 "00000000" // /* MW 1 */ + 9382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9383 "00000000" // /* MW 1 */ + 9384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9385 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 9386 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9387 "00001000" // /* MW 3 */ + 9388 "10010011" // /* MW 2 */ + 9389 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 193 45 + 9390 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9391 "10000001" // /* MW 5 */ + 9392 "10101101" // /* MW 4 */ + 9393 "10100111" // /* MW 3 */ + 9394 "00000000" // /* MW 2 */ + 9395 "00000100" // /* MW 1 */ + 9396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9397 "00000000" // /* MW 1 */ + 9398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9399 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 192 2 first + 9400 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9401 "00110110" // /* MW 3 */ + 9402 "00000110" // /* MW 2 */ + 9403 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 1 "io_buffer_main.h" 324 51 + 9404 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9405 "10000001" // /* MW 5 */ + 9406 "11011101" // /* MW 4 */ + 9407 "11011100" // /* MW 3 */ + 9408 "11001010" // /* MW 2 */ + 9409 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 193 47 first + 9410 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9411 "01110110" // /* MW 3 */ + 9412 "00000110" // /* MW 2 */ + 9413 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 9414 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9415 "10011110" // /* MW 3 */ + 9416 "01011100" // /* MW 2 */ + 9417 "00000111" // /* MW 1 */ + 9418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9419 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 195 2 first +.no_stack_arguments + 9420 "00000100" // JL #8848 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8848 delay_slots=5 */ + 9421 "00000001" // /* MW 5 */ + 9422 "00000000" // /* MW 4 */ + 9423 "01001000" // /* MW 3 */ + 9424 "00010001" // /* MW 2 */ + 9425 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9427 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 192 2 first +.delay_slot + 9428 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9429 "00000111" // /* MW 3 */ + 9430 "01100010" // /* MW 2 */ + 9431 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 192 2 +.delay_slot + 9432 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9433 "00110001" // /* MW 3 */ + 9434 "00000110" // /* MW 2 */ + 9435 "00001000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 193 45 first +.delay_slot + 9436 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9437 "00001101" // /* MW 3 */ + 9438 "11100001" // /* MW 2 */ + 9439 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 193 45 +.delay_slot + 9440 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9441 "00000000" // /* MW 15 */ + 9442 "00000000" // /* MW 14 */ + 9443 "10101000" // /* MW 13 */ + 9444 "10100000" // /* MW 12 */ + 9445 "00110100" // /* MW 11 */ + 9446 "00000000" // /* MW 10 */ + 9447 "00000000" // /* MW 9 */ + 9448 "00000000" // /* MW 8 */ + 9449 "01011011" // /* MW 7 */ + 9450 "00000001" // /* MW 6 */ + 9451 "00100000" // /* MW 5 */ + 9452 "00000000" // /* MW 4 */ + 9453 "11110000" // /* MW 3 */ + 9454 "00101100" // /* MW 2 */ + 9455 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 6 +.src_ref 7 "superkernels.cpp" 198 14 +.src_ref 1 "io_buffer_main.h" 324 51 first +.return_address + 9456 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9457 "00010000" // /* MW 9 */ + 9458 "00100000" // /* MW 8 */ + 9459 "00110010" // /* MW 7 */ + 9460 "11110011" // /* MW 6 */ + 9461 "00000001" // /* MW 5 */ + 9462 "00000000" // /* MW 4 */ + 9463 "11010000" // /* MW 3 */ + 9464 "11000110" // /* MW 2 */ + 9465 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 + 9466 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9467 "00000101" // /* MW 3 */ + 9468 "00100000" // /* MW 2 */ + 9469 "00010000" // /* MW 1 */ + 9470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9471 "00000000" // /* MW 1 */ + 9472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9473 "00000000" // /* MW 1 */ + 9474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9475 "00000000" // /* MW 1 */ + 9476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9477 "00000000" // /* MW 1 */ + 9478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9479 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 9480 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9481 "00001000" // /* MW 3 */ + 9482 "01010001" // /* MW 2 */ + 9483 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 19 +.src_ref 1 "io_buffer_main.h" 327 40 first + 9484 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9485 "00010000" // /* MW 9 */ + 9486 "00110000" // /* MW 8 */ + 9487 "00110010" // /* MW 7 */ + 9488 "11110001" // /* MW 6 */ + 9489 "00000001" // /* MW 5 */ + 9490 "00000000" // /* MW 4 */ + 9491 "11010000" // /* MW 3 */ + 9492 "11001110" // /* MW 2 */ + 9493 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 6 first + 9494 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9495 "00110110" // /* MW 3 */ + 9496 "00000110" // /* MW 2 */ + 9497 "00000110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 19 + 9498 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9499 "01010110" // /* MW 3 */ + 9500 "00000110" // /* MW 2 */ + 9501 "00000010" // /* MW 1 */ + 9502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9503 "00000000" // /* MW 1 */ + 9504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9505 "00000000" // /* MW 1 */ + 9506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9507 "00000000" // /* MW 1 */ + 9508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9509 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 9510 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9511 "00110001" // /* MW 3 */ + 9512 "00100001" // /* MW 2 */ + 9513 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 9514 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9515 "00010001" // /* MW 3 */ + 9516 "11100110" // /* MW 2 */ + 9517 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 16 first + 9518 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9519 "00101000" // /* MW 3 */ + 9520 "01100001" // /* MW 2 */ + 9521 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 6 + 9522 "10000100" // JNZ r16, #9552 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9552 delay_slots=5 */ + 9523 "00000001" // /* MW 5 */ + 9524 "01000000" // /* MW 4 */ + 9525 "10101000" // /* MW 3 */ + 9526 "00010010" // /* MW 2 */ + 9527 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9529 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9531 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9533 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9535 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9536 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9537 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 198 14 + 9538 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9539 "00000001" // /* MW 3 */ + 9540 "00100000" // /* MW 2 */ + 9541 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 198 14 first + 9542 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9543 "00000000" // /* MW 9 */ + 9544 "00000000" // /* MW 8 */ + 9545 "00000000" // /* MW 7 */ + 9546 "10000000" // /* MW 6 */ + 9547 "00010001" // /* MW 5 */ + 9548 "00000110" // /* MW 4 */ + 9549 "11110110" // /* MW 3 */ + 9550 "00101100" // /* MW 2 */ + 9551 "00000000" // /* MW 1 */ +.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 7 "superkernels.cpp" 200 + 9552 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9553 "00111001" // /* MW 3 */ + 9554 "11110100" // /* MW 2 */ + 9555 "00000111" // /* MW 1 */ + 9556 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9557 "00011001" // /* MW 3 */ + 9558 "11111011" // /* MW 2 */ + 9559 "00000111" // /* MW 1 */ + 9560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9561 "00000000" // /* MW 1 */ + 9562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9563 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 9564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9565 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 9566 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9567 "11110001" // /* MW 3 */ + 9568 "11111101" // /* MW 2 */ + 9569 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9571 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 200 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9572 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9573 "00000000" // /* MW 3 */ + 9574 "00101000" // /* MW 2 */ + 9575 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9576 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9577 "10100000" // /* MW 3 */ + 9578 "01100111" // /* MW 2 */ + 9579 "00011111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 200 +.delay_slot + 9580 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9581 "00000001" // /* MW 5 */ + 9582 "00000000" // /* MW 4 */ + 9583 "00000000" // /* MW 3 */ + 9584 "11111000" // /* MW 2 */ + 9585 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9587 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9589 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 9591 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv +.function shared_setup_backbone _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv +.src_ref 3 "elementwise_binary_shared.h" 205 first +.src_ref 3 "elementwise_binary_shared.h" 211 24 first +.src_ref 3 "elementwise_binary_shared.h" 216 36 +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.function_start + 9600 "10111010" // LDA el0, [p1], #4; MOVX r2, #256; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9601 "01011000" // /* MW 9 */ + 9602 "00000000" // /* MW 8 */ + 9603 "00001000" // /* MW 7 */ + 9604 "00001011" // /* MW 6 */ + 9605 "00100000" // /* MW 5 */ + 9606 "00001000" // /* MW 4 */ + 9607 "11010000" // /* MW 3 */ + 9608 "10000101" // /* MW 2 */ + 9609 "00100011" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 36 + 9610 "00011000" // MOVX r0, #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9611 "00000001" // /* MW 3 */ + 9612 "10000000" // /* MW 2 */ + 9613 "00010111" // /* MW 1 */ + 9614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9615 "00000000" // /* MW 1 */ + 9616 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9617 "00000000" // /* MW 1 */ + 9618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9619 "00000000" // /* MW 1 */ + 9620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9621 "00000000" // /* MW 1 */ + 9622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9623 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 211 22 first + 9624 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9625 "00101001" // /* MW 3 */ + 9626 "00011100" // /* MW 2 */ + 9627 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 212 24 first + 9628 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9629 "00101110" // /* MW 3 */ + 9630 "00011100" // /* MW 2 */ + 9631 "00000001" // /* MW 1 */ + 9632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9633 "00000000" // /* MW 1 */ + 9634 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9635 "00000000" // /* MW 1 */ + 9636 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9637 "00000000" // /* MW 1 */ + 9638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9639 "00000000" // /* MW 1 */ + 9640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9641 "00000000" // /* MW 1 */ + 9642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9643 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 212 22 + 9644 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9645 "00101001" // /* MW 3 */ + 9646 "00011100" // /* MW 2 */ + 9647 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 213 24 first + 9648 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9649 "00101110" // /* MW 3 */ + 9650 "00000100" // /* MW 2 */ + 9651 "00000001" // /* MW 1 */ + 9652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9653 "00000000" // /* MW 1 */ + 9654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9655 "00000000" // /* MW 1 */ + 9656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9657 "00000000" // /* MW 1 */ + 9658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9659 "00000000" // /* MW 1 */ + 9660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9661 "00000000" // /* MW 1 */ + 9662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9663 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 213 22 + 9664 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9665 "00101001" // /* MW 3 */ + 9666 "00011100" // /* MW 2 */ + 9667 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 214 24 first + 9668 "10011000" // LDA r3, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9669 "01110110" // /* MW 3 */ + 9670 "00010100" // /* MW 2 */ + 9671 "00000001" // /* MW 1 */ + 9672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9673 "00000000" // /* MW 1 */ + 9674 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9675 "00000000" // /* MW 1 */ + 9676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9677 "00000000" // /* MW 1 */ + 9678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9679 "00000000" // /* MW 1 */ + 9680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9681 "00000000" // /* MW 1 */ + 9682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9683 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 214 22 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9684 "10011000" // ST r3, [p0], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9685 "01110001" // /* MW 3 */ + 9686 "01001100" // /* MW 2 */ + 9687 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 34 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9688 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9689 "00010111" // /* MW 3 */ + 9690 "00000100" // /* MW 2 */ + 9691 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 217 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9692 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9693 "00000000" // /* MW 3 */ + 9694 "00101000" // /* MW 2 */ + 9695 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9696 "01000100" // MOVXM r1, #65280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9697 "00000000" // /* MW 5 */ + 9698 "10111110" // /* MW 4 */ + 9699 "11110000" // /* MW 3 */ + 9700 "00000000" // /* MW 2 */ + 9701 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9702 "10011000" // AND r1, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9703 "00010100" // /* MW 3 */ + 9704 "11000010" // /* MW 2 */ + 9705 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9706 "10011000" // EQ r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9707 "00100111" // /* MW 3 */ + 9708 "01110110" // /* MW 2 */ + 9709 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 36 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9710 "00011000" // SEL.EQZ r0, r0, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9711 "10000010" // /* MW 3 */ + 9712 "00000001" // /* MW 2 */ + 9713 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_end0 + 9715 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv +.function setup _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv +.src_ref 3 "elementwise_binary_shared.h" 219 +.src_ref 3 "elementwise_binary_shared.h" 219 first +.function_start + 9728 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9729 "00000001" // /* MW 5 */ + 9730 "00000000" // /* MW 4 */ + 9731 "00000000" // /* MW 3 */ + 9732 "00001000" // /* MW 2 */ + 9733 "00000000" // /* MW 1 */ + 9734 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9735 "00111101" // /* MW 3 */ + 9736 "11111000" // /* MW 2 */ + 9737 "00001111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 220 8 first +.no_stack_arguments + 9738 "00000100" // JL #9600 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9600 delay_slots=5 */ + 9739 "00000001" // /* MW 5 */ + 9740 "00000000" // /* MW 4 */ + 9741 "11000000" // /* MW 3 */ + 9742 "00010010" // /* MW 2 */ + 9743 "00000000" // /* MW 1 */ +.delay_slot + 9744 "10011000" // ST p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9745 "10011101" // /* MW 3 */ + 9746 "11111111" // /* MW 2 */ + 9747 "00001111" // /* MW 1 */ +.src_ref 8 "mul_impl.h" 193 25 +.delay_slot + 9748 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9749 "11000000" // /* MW 3 */ + 9750 "01100000" // /* MW 2 */ + 9751 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9753 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9755 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9756 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9757 "01100111" // /* MW 3 */ + 9758 "00000001" // /* MW 2 */ + 9759 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 222 4 +.return_address + 9760 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9761 "00111001" // /* MW 3 */ + 9762 "11111000" // /* MW 2 */ + 9763 "00000111" // /* MW 1 */ + 9764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9765 "00000000" // /* MW 1 */ + 9766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9767 "00000000" // /* MW 1 */ + 9768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9769 "00000000" // /* MW 1 */ + 9770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9771 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9773 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9774 "00011000" // LDA p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9775 "10011001" // /* MW 3 */ + 9776 "11111111" // /* MW 2 */ + 9777 "00000111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 222 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9778 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9779 "00000000" // /* MW 3 */ + 9780 "00101000" // /* MW 2 */ + 9781 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9783 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9785 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9787 "00000000" // /* MW 1 */ +.src_ref 8 "mul_impl.h" 193 25 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9788 "00011000" // MOVX r16, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9789 "00001001" // /* MW 3 */ + 9790 "00100000" // /* MW 2 */ + 9791 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 222 4 +.src_ref 8 "mul_impl.h" 193 25 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9792 "00111010" // ST r16, [p7, #16]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9793 "01110001" // /* MW 9 */ + 9794 "00000000" // /* MW 8 */ + 9795 "00000000" // /* MW 7 */ + 9796 "00000000" // /* MW 6 */ + 9797 "11111110" // /* MW 5 */ + 9798 "00111111" // /* MW 4 */ + 9799 "00110000" // /* MW 3 */ + 9800 "11000010" // /* MW 2 */ +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + 9801 "11101000" // /* MW 1 */ +.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE +.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_begin0 +.function shared_run_backbone _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE +.src_ref 3 "elementwise_binary_shared.h" 107 first +.src_ref 3 "elementwise_binary_shared.h" 119 37 +.src_ref 3 "elementwise_binary_shared.h" 126 34 +.src_ref 3 "elementwise_binary_shared.h" 131 19 +.function_start + 9808 "11111000" // MOV r0, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9809 "11000000" // /* MW 3 */ + 9810 "00010110" // /* MW 2 */ + 9811 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 119 37 first + 9812 "00011000" // ADD.NC p3, r0, #14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9813 "00000111" // /* MW 3 */ + 9814 "01100000" // /* MW 2 */ + 9815 "00011011" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 122 22 first + 9816 "10011000" // LDA.s16 r2, [p3], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9817 "01010010" // /* MW 3 */ + 9818 "00011100" // /* MW 2 */ + 9819 "00000011" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 124 15 first + 9820 "10011000" // LDA r4, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9821 "10010110" // /* MW 3 */ + 9822 "00000100" // /* MW 2 */ + 9823 "00000011" // /* MW 1 */ + 9824 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9825 "00000000" // /* MW 1 */ + 9826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9827 "00000000" // /* MW 1 */ + 9828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9829 "00000000" // /* MW 1 */ + 9830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9831 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 124 26 + 9832 "00011000" // MOVX r3, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9833 "00001001" // /* MW 3 */ + 9834 "00000110" // /* MW 2 */ + 9835 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 107 + 9836 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9837 "00000001" // /* MW 5 */ + 9838 "00000000" // /* MW 4 */ + 9839 "00000000" // /* MW 3 */ + 9840 "00010000" // /* MW 2 */ + 9841 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 124 26 + 9842 "10011000" // LTU r3, r3, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9843 "01001100" // /* MW 3 */ + 9844 "11000110" // /* MW 2 */ + 9845 "00010000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 65 25 +.src_ref 3 "elementwise_binary_shared.h" 124 8 + 9846 "10111010" // MOVA r1, #0; JNZ r3, #10000 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10000 delay_slots=5 */ + 9847 "01100000" // /* MW 9 */ + 9848 "00000000" // /* MW 8 */ + 9849 "00010000" // /* MW 7 */ + 9850 "11100010" // /* MW 6 */ + 9851 "00000100" // /* MW 5 */ + 9852 "00000110" // /* MW 4 */ + 9853 "00000000" // /* MW 3 */ + 9854 "00000001" // /* MW 2 */ + 9855 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 65 25 first +.delay_slot + 9856 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9857 "01110010" // /* MW 3 */ + 9858 "00000101" // /* MW 2 */ + 9859 "00011000" // /* MW 1 */ +.delay_slot + 9860 "11111000" // MOV r1, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9861 "11000000" // /* MW 3 */ + 9862 "01011110" // /* MW 2 */ + 9863 "00011000" // /* MW 1 */ +.delay_slot + 9864 "11111000" // MOV p7, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9865 "11100000" // /* MW 3 */ + 9866 "01100101" // /* MW 2 */ + 9867 "00011111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.delay_slot + 9868 "11110100" // PADDB [p7], #-64; MOV p5, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9869 "10000001" // /* MW 5 */ + 9870 "11011101" // /* MW 4 */ + 9871 "00001010" // /* MW 3 */ + 9872 "11110010" // /* MW 2 */ + 9873 "11111111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 119 37 first +.delay_slot + 9874 "00011000" // VST x0, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9875 "00010011" // /* MW 3 */ + 9876 "00000100" // /* MW 2 */ + 9877 "00001111" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first +.src_ref 3 "elementwise_binary_shared.h" 126 34 +.src_ref 3 "elementwise_binary_shared.h" 126 34 +.src_ref 3 "elementwise_binary_shared.h" 131 19 +.src_ref 3 "elementwise_binary_shared.h" 131 19 + 9878 "10111010" // MOVA dj0, #12; MOVS p4, r0; VBCST.16 x0, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9879 "01110010" // /* MW 9 */ + 9880 "10111001" // /* MW 8 */ + 9881 "00000100" // /* MW 7 */ + 9882 "00000000" // /* MW 6 */ + 9883 "00001011" // /* MW 5 */ + 9884 "10000000" // /* MW 4 */ + 9885 "10000100" // /* MW 3 */ + 9886 "10000010" // /* MW 2 */ + 9887 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 126 34 first +.src_ref 3 "elementwise_binary_shared.h" 131 19 first +.src_ref 3 "elementwise_binary_shared.h" 171 16 + 9888 "01010100" // LDA.u8 r0, [p4, dj0]; MOV m2, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9889 "00000001" // /* MW 5 */ + 9890 "00000001" // /* MW 4 */ + 9891 "01010100" // /* MW 3 */ + 9892 "00000001" // /* MW 2 */ + 9893 "10000000" // /* MW 1 */ + 9894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9895 "00000000" // /* MW 1 */ + 9896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9897 "00000000" // /* MW 1 */ + 9898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9899 "00000000" // /* MW 1 */ + 9900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9901 "00000000" // /* MW 1 */ + 9902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9903 "00000000" // /* MW 1 */ + 9904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9905 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 131 12 +.src_ref 3 "elementwise_binary_shared.h" 131 35 + 9906 "10000100" // JNZ r0, #9952 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9952 delay_slots=5 */ + 9907 "00000001" // /* MW 5 */ + 9908 "01000000" // /* MW 4 */ + 9909 "01110000" // /* MW 3 */ + 9910 "00010011" // /* MW 2 */ + 9911 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary_shared.h" 173 18 +.delay_slot + 9912 "10111000" // MOV m0, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9913 "00000000" // /* MW 3 */ + 9914 "00000000" // /* MW 2 */ + 9915 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 166 31 +.delay_slot + 9916 "01000100" // MOVXM p4, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9917 "11010000" // /* MW 5 */ + 9918 "11001000" // /* MW 4 */ + 9919 "11001000" // /* MW 3 */ + 9920 "00000111" // /* MW 2 */ + 9921 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9923 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9925 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9927 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 169 16 + 9928 "10111010" // MOVA m1, #0; J #9968 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=9968 delay_slots=5 */ + 9929 "00100000" // /* MW 9 */ + 9930 "00000000" // /* MW 8 */ + 9931 "00000000" // /* MW 7 */ + 9932 "11011110" // /* MW 6 */ + 9933 "00000100" // /* MW 5 */ + 9934 "00000000" // /* MW 4 */ + 9935 "10000000" // /* MW 3 */ + 9936 "00000100" // /* MW 2 */ + 9937 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9939 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9941 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9943 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9945 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.delay_slot + 9946 "00001100" // NOPA; VST x0, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9947 "00100110" // /* MW 5 */ + 9948 "00001000" // /* MW 4 */ + 9949 "11110000" // /* MW 3 */ + 9950 "00101100" // /* MW 2 */ + 9951 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_144 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 169 16 + 9952 "10111000" // MOV m1, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9953 "10000000" // /* MW 3 */ + 9954 "00000000" // /* MW 2 */ + 9955 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "elementwise_binary_shared.h" 171 16 + 9956 "11110110" // NOPA; NOPB; VST x0, [p1]; MOV m2, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9957 "01010000" // /* MW 11 */ + 9958 "00000000" // /* MW 10 */ + 9959 "00000000" // /* MW 9 */ + 9960 "00000001" // /* MW 8 */ + 9961 "00010011" // /* MW 7 */ + 9962 "00000100" // /* MW 6 */ + 9963 "00100001" // /* MW 5 */ + 9964 "00000000" // /* MW 4 */ + 9965 "11110000" // /* MW 3 */ + 9966 "00101100" // /* MW 2 */ + 9967 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_160 + 9968 "10000100" // J #10128 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10128 delay_slots=5 */ + 9969 "00000000" // /* MW 5 */ + 9970 "00000000" // /* MW 4 */ + 9971 "11001000" // /* MW 3 */ + 9972 "00010011" // /* MW 2 */ + 9973 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary_shared.h" 169 16 +.src_ref 3 "elementwise_binary_shared.h" 173 18 +.delay_slot + 9974 "00000010" // MOVS p0, p7; MOV p7, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9975 "01110000" // /* MW 7 */ + 9976 "01100000" // /* MW 6 */ + 9977 "10110000" // /* MW 5 */ + 9978 "00000011" // /* MW 4 */ + 9979 "01100000" // /* MW 3 */ + 9980 "10010001" // /* MW 2 */ + 9981 "00010011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9983 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9985 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9987 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9988 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9989 "10000001" // /* MW 11 */ + 9990 "10101101" // /* MW 10 */ + 9991 "00000000" // /* MW 9 */ + 9992 "00000000" // /* MW 8 */ + 9993 "00000000" // /* MW 7 */ + 9994 "00000000" // /* MW 6 */ + 9995 "00100000" // /* MW 5 */ + 9996 "00000000" // /* MW 4 */ + 9997 "11110000" // /* MW 3 */ + 9998 "00101100" // /* MW 2 */ + 9999 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_192 +.src_ref 3 "elementwise_binary_shared.h" 150 97 + 10000 "00011000" // MOVX r2, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10001 "00001101" // /* MW 3 */ + 10002 "00000100" // /* MW 2 */ + 10003 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 97 first + 10004 "10011000" // EQ r2, r2, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10005 "01000111" // /* MW 3 */ + 10006 "10000100" // /* MW 2 */ + 10007 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 + 10008 "10000100" // JNZ r2, #10048 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10048 delay_slots=5 */ + 10009 "00000001" // /* MW 5 */ + 10010 "01000000" // /* MW 4 */ + 10011 "10100000" // /* MW 3 */ + 10012 "00010011" // /* MW 2 */ + 10013 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.delay_slot + 10014 "01000100" // MOVXM r0, #1065353216 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10015 "00000000" // /* MW 5 */ + 10016 "00100000" // /* MW 4 */ + 10017 "00000000" // /* MW 3 */ + 10018 "10000000" // /* MW 2 */ + 10019 "00111111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.src_ref 3 "elementwise_binary_shared.h" 166 31 +.delay_slot + 10020 "01000100" // MOVXM p4, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10021 "11010000" // /* MW 5 */ + 10022 "11001000" // /* MW 4 */ + 10023 "11001000" // /* MW 3 */ + 10024 "00000111" // /* MW 2 */ + 10025 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10027 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10029 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10031 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 10032 "11100001" // NOPA; NOPB; NOPS; MOVXM r0, #-1082130432; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10033 "00000000" // /* MW 15 */ + 10034 "00000000" // /* MW 14 */ + 10035 "00010000" // /* MW 13 */ + 10036 "00000000" // /* MW 12 */ + 10037 "00001000" // /* MW 11 */ + 10038 "00000000" // /* MW 10 */ + 10039 "11100000" // /* MW 9 */ + 10040 "00101111" // /* MW 8 */ + 10041 "01011011" // /* MW 7 */ + 10042 "00000001" // /* MW 6 */ + 10043 "00100000" // /* MW 5 */ + 10044 "00000000" // /* MW 4 */ + 10045 "11110000" // /* MW 3 */ + 10046 "00101100" // /* MW 2 */ + 10047 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_240 +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10048 "10111010" // LDA.s8 r0, [p4]; MOVX vaddSign0, #1; MOV dj0, #-66 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10049 "01011000" // /* MW 9 */ + 10050 "10111110" // /* MW 8 */ + 10051 "01000111" // /* MW 7 */ + 10052 "00000000" // /* MW 6 */ + 10053 "11010010" // /* MW 5 */ + 10054 "00000010" // /* MW 4 */ + 10055 "01010000" // /* MW 3 */ + 10056 "10000000" // /* MW 2 */ + 10057 "10000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary_shared.h" 173 18 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10058 "10111000" // MOV m0, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10059 "10000000" // /* MW 3 */ + 10060 "00000000" // /* MW 2 */ + 10061 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 169 16 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10062 "10111000" // MOV m1, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10063 "00000000" // /* MW 3 */ + 10064 "00000000" // /* MW 2 */ + 10065 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 171 16 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10066 "10111000" // MOV m2, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10067 "10000000" // /* MW 3 */ + 10068 "00000000" // /* MW 2 */ + 10069 "00011010" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10071 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10072 "01111000" // VINSERT.32 x0, x0, #0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10073 "00010001" // /* MW 3 */ + 10074 "00000000" // /* MW 2 */ + 10075 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10076 "11010100" // ST.s16 r0, [p5, dj0]; VMOV bmll1, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10077 "00100101" // /* MW 5 */ + 10078 "00000001" // /* MW 4 */ + 10079 "11100010" // /* MW 3 */ + 10080 "00000010" // /* MW 2 */ + 10081 "10100000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10082 "00011000" // MOVX crRnd, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10083 "10000000" // /* MW 3 */ + 10084 "00111010" // /* MW 2 */ + 10085 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10086 "00011000" // VCONV.bf16.fp32 wl0, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10087 "10010110" // /* MW 3 */ + 10088 "01000000" // /* MW 2 */ + 10089 "00001000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10091 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10092 "10111000" // VEXTRACT.16 r0, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10093 "00000001" // /* MW 3 */ + 10094 "00000001" // /* MW 2 */ + 10095 "00011000" // /* MW 1 */ + 10096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10097 "00000000" // /* MW 1 */ + 10098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10099 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 + 10100 "10011000" // LDA.s16 r0, [p5, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10101 "00010010" // /* MW 3 */ + 10102 "00000000" // /* MW 2 */ + 10103 "00000101" // /* MW 1 */ + 10104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10105 "00000000" // /* MW 1 */ + 10106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10107 "00000000" // /* MW 1 */ + 10108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10109 "00000000" // /* MW 1 */ + 10110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10111 "00000000" // /* MW 1 */ + 10112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10113 "00000000" // /* MW 1 */ + 10114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10115 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first + 10116 "11111000" // VBCST.16 x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10117 "01110010" // /* MW 3 */ + 10118 "00000001" // /* MW 2 */ + 10119 "00011000" // /* MW 1 */ + 10120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10121 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first + 10122 "00001100" // NOPA; VST x0, [sp, #-64] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10123 "01100110" // /* MW 5 */ + 10124 "11111000" // /* MW 4 */ + 10125 "11111111" // /* MW 3 */ + 10126 "00101100" // /* MW 2 */ + 10127 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_320 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary_shared.h" 166 4 first +.src_ref 3 "elementwise_binary_shared.h" 166 31 first +.src_ref 3 "elementwise_binary_shared.h" 169 16 first + 10128 "10110110" // LDA r2, [p3, #-16]; VLDB x1, [p7], m1; MOVXM ls, #10240 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10129 "00010000" // /* MW 11 */ + 10130 "00000000" // /* MW 10 */ + 10131 "01111100" // /* MW 9 */ + 10132 "00001000" // /* MW 8 */ + 10133 "00000000" // /* MW 7 */ + 10134 "00000000" // /* MW 6 */ + 10135 "11101000" // /* MW 5 */ + 10136 "01010000" // /* MW 4 */ + 10137 "11011110" // /* MW 3 */ + 10138 "10001010" // /* MW 2 */ + 10139 "01111000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 166 4 +.src_ref 3 "elementwise_binary_shared.h" 166 31 +.src_ref 3 "elementwise_binary_shared.h" 171 16 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 10140 "10110110" // MOVA r3, #-5; VLDB x0, [p1], m2; MOVXM le, #10288 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10141 "00010000" // /* MW 11 */ + 10142 "00011000" // /* MW 10 */ + 10143 "10111100" // /* MW 9 */ + 10144 "00001001" // /* MW 8 */ + 10145 "00000000" // /* MW 7 */ + 10146 "00000000" // /* MW 6 */ + 10147 "01101000" // /* MW 5 */ + 10148 "10010000" // /* MW 4 */ + 10149 "00000010" // /* MW 3 */ + 10150 "01100011" // /* MW 2 */ + 10151 "11111111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary_shared.h" 169 16 first +.src_ref 3 "elementwise_binary_shared.h" 173 18 first +.src_ref 3 "elementwise_binary_shared.h" 177 44 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10152 "00010010" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;VLDB x1, [p7], m1; MOVX r0, #60 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10153 "11110001" // /* MW 7 */ + 10154 "00000000" // /* MW 6 */ + 10155 "11101000" // /* MW 5 */ + 10156 "01010000" // /* MW 4 */ + 10157 "01111110" // /* MW 3 */ + 10158 "00000101" // /* MW 2 */ + 10159 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 166 31 first +.src_ref 3 "elementwise_binary_shared.h" 171 16 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10160 "00111100" // LDA.s8 r4, [p4]; VLDB x0, [p1], m2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10161 "01101000" // /* MW 5 */ + 10162 "10010000" // /* MW 4 */ + 10163 "01010010" // /* MW 3 */ + 10164 "10010000" // /* MW 2 */ + 10165 "10000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10167 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary_shared.h" 173 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10168 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10169 "00101011" // /* MW 3 */ + 10170 "00001000" // /* MW 2 */ + 10171 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10173 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 166 31 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10174 "10011000" // LSHL r2, r2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10175 "00111101" // /* MW 3 */ + 10176 "10000100" // /* MW 2 */ + 10177 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 166 4 +.src_ref 3 "elementwise_binary_shared.h" 177 44 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10178 "01100010" // ADD.NC lc, r2, #-3; VMAC.f dm1, dm0, x1, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10179 "00000001" // /* MW 7 */ + 10180 "00000010" // /* MW 6 */ + 10181 "00000001" // /* MW 5 */ + 10182 "10000110" // /* MW 4 */ + 10183 "01111110" // /* MW 3 */ + 10184 "01110001" // /* MW 2 */ + 10185 "00000101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary_shared.h" 169 16 first +.src_ref 3 "elementwise_binary_shared.h" 171 16 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10186 "00111100" // VLDA x0, [p1], m2; VLDB x1, [p7], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10187 "11101000" // /* MW 5 */ + 10188 "01010000" // /* MW 4 */ + 10189 "01111110" // /* MW 3 */ + 10190 "00000011" // /* MW 2 */ + 10191 "00101001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary_shared.h" 173 18 first +.src_ref 3 "elementwise_binary_shared.h" 185 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10192 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; MOVX crRnd, r4; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10193 "00000000" // /* MW 15 */ + 10194 "00000000" // /* MW 14 */ + 10195 "01111000" // /* MW 13 */ + 10196 "10100101" // /* MW 12 */ + 10197 "00000001" // /* MW 11 */ + 10198 "00000000" // /* MW 10 */ + 10199 "11010100" // /* MW 9 */ + 10200 "00001001" // /* MW 8 */ + 10201 "01011011" // /* MW 7 */ + 10202 "00000001" // /* MW 6 */ + 10203 "00100000" // /* MW 5 */ + 10204 "00000000" // /* MW 4 */ + 10205 "01110000" // /* MW 3 */ + 10206 "00000101" // /* MW 2 */ + 10207 "00000001" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10208 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10209 "00000000" // /* MW 15 */ + 10210 "00000000" // /* MW 14 */ + 10211 "01111000" // /* MW 13 */ + 10212 "10100101" // /* MW 12 */ + 10213 "00000001" // /* MW 11 */ + 10214 "00000000" // /* MW 10 */ + 10215 "00000000" // /* MW 9 */ + 10216 "00000000" // /* MW 8 */ + 10217 "01011011" // /* MW 7 */ + 10218 "00000001" // /* MW 6 */ + 10219 "00100000" // /* MW 5 */ + 10220 "00000000" // /* MW 4 */ + 10221 "11110000" // /* MW 3 */ + 10222 "00101100" // /* MW 2 */ + 10223 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 177 44 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10224 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10225 "00010000" // /* MW 15 */ + 10226 "00001000" // /* MW 14 */ + 10227 "01111000" // /* MW 13 */ + 10228 "10100101" // /* MW 12 */ + 10229 "00000001" // /* MW 11 */ + 10230 "00000000" // /* MW 10 */ + 10231 "00000000" // /* MW 9 */ + 10232 "00000000" // /* MW 8 */ + 10233 "01011011" // /* MW 7 */ + 10234 "00000001" // /* MW 6 */ + 10235 "00100000" // /* MW 5 */ + 10236 "00000000" // /* MW 4 */ + 10237 "11110000" // /* MW 3 */ + 10238 "00101100" // /* MW 2 */ + 10239 "00000000" // /* MW 1 */ +.label ZLS_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_432 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary_shared.h" 169 16 first +.src_ref 3 "elementwise_binary_shared.h" 171 16 first +.begin_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 10240 "11100001" // VLDA x0, [p1], m2; VLDB x1, [p7], m1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10241 "00000000" // /* MW 15 */ + 10242 "00000000" // /* MW 14 */ + 10243 "01111000" // /* MW 13 */ + 10244 "10100101" // /* MW 12 */ + 10245 "00000001" // /* MW 11 */ + 10246 "00000000" // /* MW 10 */ + 10247 "00000000" // /* MW 9 */ + 10248 "00000000" // /* MW 8 */ + 10249 "01011011" // /* MW 7 */ + 10250 "00000001" // /* MW 6 */ + 10251 "11101000" // /* MW 5 */ + 10252 "01010000" // /* MW 4 */ + 10253 "01111110" // /* MW 3 */ + 10254 "00000011" // /* MW 2 */ + 10255 "00101001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary_shared.h" 173 18 first +.src_ref 3 "elementwise_binary_shared.h" 185 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10256 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10257 "00000000" // /* MW 15 */ + 10258 "00000000" // /* MW 14 */ + 10259 "01111000" // /* MW 13 */ + 10260 "10100101" // /* MW 12 */ + 10261 "00000001" // /* MW 11 */ + 10262 "00000000" // /* MW 10 */ + 10263 "00000000" // /* MW 9 */ + 10264 "00000000" // /* MW 8 */ + 10265 "10100011" // /* MW 7 */ + 10266 "00011100" // /* MW 6 */ + 10267 "00100010" // /* MW 5 */ + 10268 "00000000" // /* MW 4 */ + 10269 "01110000" // /* MW 3 */ + 10270 "00000101" // /* MW 2 */ + 10271 "00000001" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10272 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10273 "00000000" // /* MW 15 */ + 10274 "00000000" // /* MW 14 */ + 10275 "01111000" // /* MW 13 */ + 10276 "10100101" // /* MW 12 */ + 10277 "00000001" // /* MW 11 */ + 10278 "00000000" // /* MW 10 */ + 10279 "00000000" // /* MW 9 */ + 10280 "00000000" // /* MW 8 */ + 10281 "01011011" // /* MW 7 */ + 10282 "00000001" // /* MW 6 */ + 10283 "00100000" // /* MW 5 */ + 10284 "00000000" // /* MW 4 */ + 10285 "11110000" // /* MW 3 */ + 10286 "00101100" // /* MW 2 */ + 10287 "00000000" // /* MW 1 */ +.label ZLE_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_480 +.src_ref 3 "elementwise_binary_shared.h" 177 44 first +.end_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10288 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10289 "00010000" // /* MW 15 */ + 10290 "00001000" // /* MW 14 */ + 10291 "01111000" // /* MW 13 */ + 10292 "10100101" // /* MW 12 */ + 10293 "00000001" // /* MW 11 */ + 10294 "00000000" // /* MW 10 */ + 10295 "00000000" // /* MW 9 */ + 10296 "00000000" // /* MW 8 */ + 10297 "01011011" // /* MW 7 */ + 10298 "00000001" // /* MW 6 */ + 10299 "00100000" // /* MW 5 */ + 10300 "00000000" // /* MW 4 */ + 10301 "11110000" // /* MW 3 */ + 10302 "00101100" // /* MW 2 */ + 10303 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 187 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 10304 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10305 "00000001" // /* MW 5 */ + 10306 "00000000" // /* MW 4 */ + 10307 "00000000" // /* MW 3 */ + 10308 "11110000" // /* MW 2 */ + 10309 "11111111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary_shared.h" 185 18 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10310 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10311 "10100011" // /* MW 3 */ + 10312 "00011100" // /* MW 2 */ + 10313 "00001010" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 10314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10315 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 177 44 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 10316 "01001000" // VMAC.f dm1, dm0, x1, x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10317 "00000001" // /* MW 3 */ + 10318 "00000010" // /* MW 2 */ + 10319 "00000001" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 10320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10321 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 187 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 10322 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10323 "00000000" // /* MW 3 */ + 10324 "00101000" // /* MW 2 */ + 10325 "00010000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary_shared.h" 185 18 first +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10326 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10327 "10100011" // /* MW 3 */ + 10328 "00011100" // /* MW 2 */ + 10329 "00001010" // /* MW 1 */ +.delay_slot + 10330 "11111000" // MOV p7, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10331 "10100000" // /* MW 3 */ + 10332 "01100000" // /* MW 2 */ + 10333 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10335 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary_shared.h" 185 18 +.delay_slot + 10336 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10337 "10100011" // /* MW 3 */ + 10338 "00011100" // /* MW 2 */ + 10339 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE__end +.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_end0 + 10341 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E +.function run _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E +.src_ref 4 "vector.hpp" 538 13 +.src_ref 3 "elementwise_binary_shared.h" 237 first +.src_ref 3 "elementwise_binary_shared.h" 244 19 +.src_ref 3 "elementwise_binary_shared.h" 245 12 +.src_ref 3 "elementwise_binary_shared.h" 247 12 +.src_ref 3 "elementwise_binary_shared.h" 250 4 +.function_start + 10352 "10111010" // MOVA dj0, #12; MOVS p3, p2; MOV dc0, lr /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10353 "01110010" // /* MW 9 */ + 10354 "11110000" // /* MW 8 */ + 10355 "01100000" // /* MW 7 */ + 10356 "00000000" // /* MW 6 */ + 10357 "10001011" // /* MW 5 */ + 10358 "10001000" // /* MW 4 */ + 10359 "10000011" // /* MW 3 */ + 10360 "10000010" // /* MW 2 */ + 10361 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 538 13 first +.src_ref 3 "elementwise_binary_shared.h" 244 19 first +.src_ref 3 "elementwise_binary_shared.h" 245 12 +.src_ref 3 "elementwise_binary_shared.h" 247 12 + 10362 "11010100" // LDA.u8 r0, [p2, dj0]; MOV p2, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10363 "10000001" // /* MW 5 */ + 10364 "11000101" // /* MW 4 */ + 10365 "01010100" // /* MW 3 */ + 10366 "00000001" // /* MW 2 */ + 10367 "01000000" // /* MW 1 */ + 10368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10369 "00000000" // /* MW 1 */ + 10370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10371 "00000000" // /* MW 1 */ + 10372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10373 "00000000" // /* MW 1 */ + 10374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10375 "00000000" // /* MW 1 */ + 10376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10377 "00000000" // /* MW 1 */ + 10378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10379 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 244 12 +.src_ref 3 "elementwise_binary_shared.h" 244 35 + 10380 "10000100" // JZ r0, #10448 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10448 delay_slots=5 */ + 10381 "00000001" // /* MW 5 */ + 10382 "00000000" // /* MW 4 */ + 10383 "01101000" // /* MW 3 */ + 10384 "00010100" // /* MW 2 */ + 10385 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 237 +.delay_slot + 10386 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10387 "00000001" // /* MW 5 */ + 10388 "00000000" // /* MW 4 */ + 10389 "00000000" // /* MW 3 */ + 10390 "00001000" // /* MW 2 */ + 10391 "00000000" // /* MW 1 */ +.delay_slot + 10392 "11111000" // MOV r1, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10393 "11100000" // /* MW 3 */ + 10394 "01010101" // /* MW 2 */ + 10395 "00011000" // /* MW 1 */ +.delay_slot + 10396 "00011000" // ADD.NC p1, r1, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10397 "11100000" // /* MW 3 */ + 10398 "01100000" // /* MW 2 */ + 10399 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 538 13 +.src_ref 4 "vector_native_types.hpp" 374 137 first +.delay_slot + 10400 "00011000" // VST sfh, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10401 "00101011" // /* MW 3 */ + 10402 "00000111" // /* MW 2 */ + 10403 "00001001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10405 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 247 12 first +.no_stack_arguments + 10406 "00000100" // JL #9808 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9808 delay_slots=5 */ + 10407 "00000001" // /* MW 5 */ + 10408 "00000000" // /* MW 4 */ + 10409 "00101000" // /* MW 3 */ + 10410 "00010011" // /* MW 2 */ + 10411 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10413 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10415 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10417 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10419 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10420 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10421 "10000001" // /* MW 11 */ + 10422 "10101101" // /* MW 10 */ + 10423 "00000000" // /* MW 9 */ + 10424 "00000000" // /* MW 8 */ + 10425 "00000000" // /* MW 7 */ + 10426 "00000000" // /* MW 6 */ + 10427 "00100000" // /* MW 5 */ + 10428 "00000000" // /* MW 4 */ + 10429 "11110000" // /* MW 3 */ + 10430 "00101100" // /* MW 2 */ + 10431 "00000000" // /* MW 1 */ +.return_address + 10432 "10000100" // J #10480 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10480 delay_slots=5 */ + 10433 "00000000" // /* MW 5 */ + 10434 "00000000" // /* MW 4 */ + 10435 "01111000" // /* MW 3 */ + 10436 "00010100" // /* MW 2 */ + 10437 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10439 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10441 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10443 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10445 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10447 "00000000" // /* MW 1 */ +.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_96 +.src_ref 3 "elementwise_binary_shared.h" 245 12 first +.no_stack_arguments + 10448 "00000100" // JL #9808 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9808 delay_slots=5 */ + 10449 "00000001" // /* MW 5 */ + 10450 "00000000" // /* MW 4 */ + 10451 "00101000" // /* MW 3 */ + 10452 "00010011" // /* MW 2 */ + 10453 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 245 12 +.src_ref 3 "elementwise_binary_shared.h" 245 12 +.delay_slot + 10454 "00000010" // MOVS p0, p1; MOV p1, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10455 "01110000" // /* MW 7 */ + 10456 "01100000" // /* MW 6 */ + 10457 "10110000" // /* MW 5 */ + 10458 "00000000" // /* MW 4 */ + 10459 "01100000" // /* MW 3 */ + 10460 "10010001" // /* MW 2 */ + 10461 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10463 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10465 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10467 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10468 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10469 "10000001" // /* MW 11 */ + 10470 "10101101" // /* MW 10 */ + 10471 "00000000" // /* MW 9 */ + 10472 "00000000" // /* MW 8 */ + 10473 "00000000" // /* MW 7 */ + 10474 "00000000" // /* MW 6 */ + 10475 "00100000" // /* MW 5 */ + 10476 "00000000" // /* MW 4 */ + 10477 "11110000" // /* MW 3 */ + 10478 "00101100" // /* MW 2 */ + 10479 "00000000" // /* MW 1 */ +.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_128 +.src_ref 3 "elementwise_binary_shared.h" 250 4 +.return_address + 10480 "11111000" // MOV lr, dc0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10481 "10000000" // /* MW 3 */ + 10482 "01110001" // /* MW 2 */ + 10483 "00011111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 250 4 first + 10484 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10485 "00000000" // /* MW 3 */ + 10486 "00101000" // /* MW 2 */ + 10487 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 250 4 +.delay_slot + 10488 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10489 "00000001" // /* MW 5 */ + 10490 "00000000" // /* MW 4 */ + 10491 "00000000" // /* MW 3 */ + 10492 "11111000" // /* MW 2 */ + 10493 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10495 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10497 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10499 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_end0 + 10501 "00000000" // /* MW 1 */ +.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_mul1d_attribute_broadcasting _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 7 "superkernels.cpp" 205 first +.src_ref 7 "superkernels.cpp" 210 6 +.function_start + 10512 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10513 "10000000" // /* MW 5 */ + 10514 "11001000" // /* MW 4 */ + 10515 "11000110" // /* MW 3 */ + 10516 "00000111" // /* MW 2 */ + 10517 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 210 6 first + 10518 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10519 "11000001" // /* MW 5 */ + 10520 "10110101" // /* MW 4 */ + 10521 "11011000" // /* MW 3 */ + 10522 "11000010" // /* MW 2 */ + 10523 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 205 + 10524 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10525 "00000001" // /* MW 5 */ + 10526 "00000000" // /* MW 4 */ + 10527 "00000000" // /* MW 3 */ + 10528 "00001000" // /* MW 2 */ + 10529 "00000000" // /* MW 1 */ + 10530 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10531 "01110000" // /* MW 7 */ + 10532 "11010000" // /* MW 6 */ + 10533 "00001011" // /* MW 5 */ + 10534 "00000000" // /* MW 4 */ + 10535 "10110000" // /* MW 3 */ + 10536 "01100011" // /* MW 2 */ + 10537 "11111111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 207 11 + 10538 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10539 "00010001" // /* MW 9 */ + 10540 "00101000" // /* MW 8 */ + 10541 "00110010" // /* MW 7 */ + 10542 "11110011" // /* MW 6 */ + 10543 "00000001" // /* MW 5 */ + 10544 "00000000" // /* MW 4 */ + 10545 "10110000" // /* MW 3 */ + 10546 "10000010" // /* MW 2 */ + 10547 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 10548 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10549 "11000000" // /* MW 3 */ + 10550 "11010100" // /* MW 2 */ + 10551 "00011011" // /* MW 1 */ + 10552 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10553 "00000000" // /* MW 1 */ + 10554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10555 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 210 6 +.src_ref 7 "superkernels.cpp" 210 16 + 10556 "10000100" // JNZ r16, #10720 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10720 delay_slots=5 */ + 10557 "00000001" // /* MW 5 */ + 10558 "01000000" // /* MW 4 */ + 10559 "11110000" // /* MW 3 */ + 10560 "00010100" // /* MW 2 */ + 10561 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 207 22 first +.delay_slot + 10562 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10563 "10010000" // /* MW 3 */ + 10564 "01100010" // /* MW 2 */ + 10565 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 207 30 +.delay_slot + 10566 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10567 "11111011" // /* MW 3 */ + 10568 "01100011" // /* MW 2 */ + 10569 "00010100" // /* MW 1 */ +.delay_slot + 10570 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10571 "00111101" // /* MW 3 */ + 10572 "11110100" // /* MW 2 */ + 10573 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 207 11 +.src_ref 1 "io_buffer_main.h" 125 25 +.delay_slot + 10574 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10575 "01110000" // /* MW 7 */ + 10576 "01100000" // /* MW 6 */ + 10577 "00110000" // /* MW 5 */ + 10578 "00000011" // /* MW 4 */ + 10579 "00110000" // /* MW 3 */ + 10580 "11000110" // /* MW 2 */ + 10581 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 213 4 +.src_ref 7 "superkernels.cpp" 224 2 +.delay_slot + 10582 "01000100" // MOVXM p0, #509184 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10583 "00000000" // /* MW 5 */ + 10584 "11001010" // /* MW 4 */ + 10585 "11000000" // /* MW 3 */ + 10586 "00000111" // /* MW 2 */ + 10587 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 10588 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10589 "11010000" // /* MW 5 */ + 10590 "11001000" // /* MW 4 */ + 10591 "11000100" // /* MW 3 */ + 10592 "00000111" // /* MW 2 */ + 10593 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10594 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10595 "00010000" // /* MW 9 */ + 10596 "00110010" // /* MW 8 */ + 10597 "00110010" // /* MW 7 */ + 10598 "11110001" // /* MW 6 */ + 10599 "00000001" // /* MW 5 */ + 10600 "00000000" // /* MW 4 */ + 10601 "11100000" // /* MW 3 */ + 10602 "11000000" // /* MW 2 */ + 10603 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10605 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 213 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 10606 "00000100" // JL #9728 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9728 delay_slots=5 */ + 10607 "00000001" // /* MW 5 */ + 10608 "00000000" // /* MW 4 */ + 10609 "00000000" // /* MW 3 */ + 10610 "00010011" // /* MW 2 */ + 10611 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10613 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10615 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10616 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10617 "00110001" // /* MW 3 */ + 10618 "00100000" // /* MW 2 */ + 10619 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 10620 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10621 "00000101" // /* MW 3 */ + 10622 "00100000" // /* MW 2 */ + 10623 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 10624 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10625 "00000000" // /* MW 15 */ + 10626 "00000000" // /* MW 14 */ + 10627 "01111000" // /* MW 13 */ + 10628 "10100101" // /* MW 12 */ + 10629 "00000001" // /* MW 11 */ + 10630 "00000000" // /* MW 10 */ + 10631 "00000000" // /* MW 9 */ + 10632 "10000000" // /* MW 8 */ + 10633 "00010001" // /* MW 7 */ + 10634 "00000110" // /* MW 6 */ + 10635 "00100010" // /* MW 5 */ + 10636 "00000000" // /* MW 4 */ + 10637 "11110000" // /* MW 3 */ + 10638 "00101100" // /* MW 2 */ + 10639 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 217 18 +.return_address + 10640 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10641 "10100000" // /* MW 5 */ + 10642 "11001000" // /* MW 4 */ + 10643 "11000100" // /* MW 3 */ + 10644 "00000111" // /* MW 2 */ + 10645 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 217 18 first +.src_ref 7 "superkernels.cpp" 217 65 + 10646 "10111010" // LDA r16, [p2]; MOVXM p2, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10647 "00010000" // /* MW 9 */ + 10648 "10000000" // /* MW 8 */ + 10649 "00110010" // /* MW 7 */ + 10650 "11110001" // /* MW 6 */ + 10651 "00000001" // /* MW 5 */ + 10652 "00000000" // /* MW 4 */ + 10653 "11010000" // /* MW 3 */ + 10654 "11000010" // /* MW 2 */ + 10655 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 215 51 +.src_ref 7 "superkernels.cpp" 217 65 +.src_ref 7 "superkernels.cpp" 224 2 + 10656 "10111010" // LDA r17, [p2]; MOVXM p2, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10657 "00010000" // /* MW 9 */ + 10658 "10000000" // /* MW 8 */ + 10659 "00110010" // /* MW 7 */ + 10660 "11110001" // /* MW 6 */ + 10661 "00000001" // /* MW 5 */ + 10662 "00000000" // /* MW 4 */ + 10663 "11010000" // /* MW 3 */ + 10664 "11000110" // /* MW 2 */ + 10665 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 215 51 first +.src_ref 7 "superkernels.cpp" 217 16 +.src_ref 7 "superkernels.cpp" 222 47 + 10666 "10111010" // LDA.u16 r18, [p2, #10]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10667 "00010000" // /* MW 9 */ + 10668 "00101010" // /* MW 8 */ + 10669 "10110010" // /* MW 7 */ + 10670 "11110000" // /* MW 6 */ + 10671 "00000001" // /* MW 5 */ + 10672 "00000000" // /* MW 4 */ + 10673 "01010000" // /* MW 3 */ + 10674 "11001011" // /* MW 2 */ + 10675 "01001010" // /* MW 1 */ + 10676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10677 "00000000" // /* MW 1 */ + 10678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10679 "00000000" // /* MW 1 */ + 10680 "10000100" // J #10736 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10736 delay_slots=5 */ + 10681 "00000000" // /* MW 5 */ + 10682 "00000000" // /* MW 4 */ + 10683 "11111000" // /* MW 3 */ + 10684 "00010100" // /* MW 2 */ + 10685 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 215 13 +.delay_slot + 10686 "01000100" // MOVXM p0, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10687 "11000000" // /* MW 5 */ + 10688 "11001000" // /* MW 4 */ + 10689 "11000000" // /* MW 3 */ + 10690 "00000111" // /* MW 2 */ + 10691 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10693 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 217 27 first +.delay_slot + 10694 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10695 "00001111" // /* MW 3 */ + 10696 "01100001" // /* MW 2 */ + 10697 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 215 13 first +.delay_slot + 10698 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10699 "10100011" // /* MW 5 */ + 10700 "00001100" // /* MW 4 */ + 10701 "11110000" // /* MW 3 */ + 10702 "00101100" // /* MW 2 */ + 10703 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 217 16 first +.delay_slot + 10704 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10705 "00000000" // /* MW 15 */ + 10706 "00000000" // /* MW 14 */ + 10707 "01111000" // /* MW 13 */ + 10708 "10100101" // /* MW 12 */ + 10709 "00000001" // /* MW 11 */ + 10710 "00000000" // /* MW 10 */ + 10711 "00000000" // /* MW 9 */ + 10712 "10000000" // /* MW 8 */ + 10713 "00010001" // /* MW 7 */ + 10714 "00000110" // /* MW 6 */ + 10715 "00100001" // /* MW 5 */ + 10716 "00000000" // /* MW 4 */ + 10717 "11110000" // /* MW 3 */ + 10718 "00101100" // /* MW 2 */ + 10719 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 7 "superkernels.cpp" 222 47 +.src_ref 7 "superkernels.cpp" 224 2 + 10720 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10721 "00000000" // /* MW 15 */ + 10722 "00000000" // /* MW 14 */ + 10723 "00010000" // /* MW 13 */ + 10724 "00101010" // /* MW 12 */ + 10725 "10110010" // /* MW 11 */ + 10726 "11110000" // /* MW 10 */ + 10727 "00000001" // /* MW 9 */ + 10728 "00000000" // /* MW 8 */ + 10729 "10001011" // /* MW 7 */ + 10730 "10000000" // /* MW 6 */ + 10731 "00100010" // /* MW 5 */ + 10732 "00000000" // /* MW 4 */ + 10733 "11110000" // /* MW 3 */ + 10734 "00101100" // /* MW 2 */ + 10735 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 218 49 first + 10736 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10737 "00000000" // /* MW 7 */ + 10738 "11000011" // /* MW 6 */ + 10739 "10110011" // /* MW 5 */ + 10740 "00000011" // /* MW 4 */ + 10741 "01100000" // /* MW 3 */ + 10742 "10010001" // /* MW 2 */ + 10743 "01110011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 221 2 +.src_ref 1 "io_buffer_main.h" 218 49 + 10744 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10745 "00010000" // /* MW 9 */ + 10746 "00100000" // /* MW 8 */ + 10747 "00110010" // /* MW 7 */ + 10748 "11110000" // /* MW 6 */ + 10749 "00000001" // /* MW 5 */ + 10750 "00000000" // /* MW 4 */ + 10751 "11010000" // /* MW 3 */ + 10752 "11101110" // /* MW 2 */ + 10753 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 10754 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10755 "00010110" // /* MW 3 */ + 10756 "11111110" // /* MW 2 */ + 10757 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 10758 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10759 "00110110" // /* MW 3 */ + 10760 "11111110" // /* MW 2 */ + 10761 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 10762 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10763 "01010110" // /* MW 3 */ + 10764 "01000110" // /* MW 2 */ + 10765 "00000111" // /* MW 1 */ + 10766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10767 "00000000" // /* MW 1 */ + 10768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10769 "00000000" // /* MW 1 */ + 10770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10771 "00000000" // /* MW 1 */ + 10772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10773 "00000000" // /* MW 1 */ + 10774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10775 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 10776 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10777 "00000010" // /* MW 3 */ + 10778 "01100001" // /* MW 2 */ + 10779 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 10780 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10781 "00010001" // /* MW 3 */ + 10782 "00000110" // /* MW 2 */ + 10783 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 + 10784 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10785 "11111101" // /* MW 3 */ + 10786 "11100000" // /* MW 2 */ + 10787 "00010111" // /* MW 1 */ + 10788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10789 "00000000" // /* MW 1 */ + 10790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10791 "00000000" // /* MW 1 */ + 10792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10793 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 10794 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10795 "00001000" // /* MW 3 */ + 10796 "10010011" // /* MW 2 */ + 10797 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 222 45 + 10798 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10799 "10000001" // /* MW 5 */ + 10800 "10101101" // /* MW 4 */ + 10801 "10100111" // /* MW 3 */ + 10802 "00000000" // /* MW 2 */ + 10803 "00000100" // /* MW 1 */ + 10804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10805 "00000000" // /* MW 1 */ + 10806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10807 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 221 2 first + 10808 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10809 "00110110" // /* MW 3 */ + 10810 "00000110" // /* MW 2 */ + 10811 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 1 "io_buffer_main.h" 324 51 + 10812 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10813 "10000001" // /* MW 5 */ + 10814 "11011101" // /* MW 4 */ + 10815 "11011100" // /* MW 3 */ + 10816 "11001010" // /* MW 2 */ + 10817 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 222 47 first + 10818 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10819 "01110110" // /* MW 3 */ + 10820 "00000110" // /* MW 2 */ + 10821 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 10822 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10823 "10011110" // /* MW 3 */ + 10824 "01011100" // /* MW 2 */ + 10825 "00000111" // /* MW 1 */ + 10826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10827 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 224 2 first +.no_stack_arguments + 10828 "00000100" // JL #10352 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10352 delay_slots=5 */ + 10829 "00000001" // /* MW 5 */ + 10830 "00000000" // /* MW 4 */ + 10831 "00111000" // /* MW 3 */ + 10832 "00010100" // /* MW 2 */ + 10833 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10835 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 221 2 first +.delay_slot + 10836 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10837 "00000111" // /* MW 3 */ + 10838 "01100010" // /* MW 2 */ + 10839 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 221 2 +.delay_slot + 10840 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10841 "00110001" // /* MW 3 */ + 10842 "00000110" // /* MW 2 */ + 10843 "00001000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 222 45 first +.delay_slot + 10844 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10845 "00001101" // /* MW 3 */ + 10846 "11100001" // /* MW 2 */ + 10847 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 222 45 +.delay_slot + 10848 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10849 "00000000" // /* MW 15 */ + 10850 "00000000" // /* MW 14 */ + 10851 "10101000" // /* MW 13 */ + 10852 "10100000" // /* MW 12 */ + 10853 "00110100" // /* MW 11 */ + 10854 "00000000" // /* MW 10 */ + 10855 "00000000" // /* MW 9 */ + 10856 "00000000" // /* MW 8 */ + 10857 "01011011" // /* MW 7 */ + 10858 "00000001" // /* MW 6 */ + 10859 "00100000" // /* MW 5 */ + 10860 "00000000" // /* MW 4 */ + 10861 "11110000" // /* MW 3 */ + 10862 "00101100" // /* MW 2 */ + 10863 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 6 +.src_ref 7 "superkernels.cpp" 227 14 +.src_ref 1 "io_buffer_main.h" 324 51 first +.return_address + 10864 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10865 "00010000" // /* MW 9 */ + 10866 "00100000" // /* MW 8 */ + 10867 "00110010" // /* MW 7 */ + 10868 "11110011" // /* MW 6 */ + 10869 "00000001" // /* MW 5 */ + 10870 "00000000" // /* MW 4 */ + 10871 "11010000" // /* MW 3 */ + 10872 "11000110" // /* MW 2 */ + 10873 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 + 10874 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10875 "00000101" // /* MW 3 */ + 10876 "00100000" // /* MW 2 */ + 10877 "00010000" // /* MW 1 */ + 10878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10879 "00000000" // /* MW 1 */ + 10880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10881 "00000000" // /* MW 1 */ + 10882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10883 "00000000" // /* MW 1 */ + 10884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10885 "00000000" // /* MW 1 */ + 10886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10887 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 10888 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10889 "00001000" // /* MW 3 */ + 10890 "01010001" // /* MW 2 */ + 10891 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 19 +.src_ref 1 "io_buffer_main.h" 327 40 first + 10892 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10893 "00010000" // /* MW 9 */ + 10894 "00110000" // /* MW 8 */ + 10895 "00110010" // /* MW 7 */ + 10896 "11110001" // /* MW 6 */ + 10897 "00000001" // /* MW 5 */ + 10898 "00000000" // /* MW 4 */ + 10899 "11010000" // /* MW 3 */ + 10900 "11001110" // /* MW 2 */ + 10901 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 6 first + 10902 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10903 "00110110" // /* MW 3 */ + 10904 "00000110" // /* MW 2 */ + 10905 "00000110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 19 + 10906 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10907 "01010110" // /* MW 3 */ + 10908 "00000110" // /* MW 2 */ + 10909 "00000010" // /* MW 1 */ + 10910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10911 "00000000" // /* MW 1 */ + 10912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10913 "00000000" // /* MW 1 */ + 10914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10915 "00000000" // /* MW 1 */ + 10916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10917 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 10918 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10919 "00110001" // /* MW 3 */ + 10920 "00100001" // /* MW 2 */ + 10921 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 10922 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10923 "00010001" // /* MW 3 */ + 10924 "11100110" // /* MW 2 */ + 10925 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 16 first + 10926 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10927 "00101000" // /* MW 3 */ + 10928 "01100001" // /* MW 2 */ + 10929 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 6 + 10930 "10000100" // JNZ r16, #10960 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10960 delay_slots=5 */ + 10931 "00000001" // /* MW 5 */ + 10932 "01000000" // /* MW 4 */ + 10933 "01101000" // /* MW 3 */ + 10934 "00010101" // /* MW 2 */ + 10935 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10937 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10939 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10941 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10943 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10945 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 227 14 + 10946 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10947 "00000001" // /* MW 3 */ + 10948 "00100000" // /* MW 2 */ + 10949 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 227 14 first + 10950 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10951 "00000000" // /* MW 9 */ + 10952 "00000000" // /* MW 8 */ + 10953 "00000000" // /* MW 7 */ + 10954 "10000000" // /* MW 6 */ + 10955 "00010001" // /* MW 5 */ + 10956 "00000110" // /* MW 4 */ + 10957 "11110110" // /* MW 3 */ + 10958 "00101100" // /* MW 2 */ + 10959 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 7 "superkernels.cpp" 229 + 10960 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10961 "00111001" // /* MW 3 */ + 10962 "11110100" // /* MW 2 */ + 10963 "00000111" // /* MW 1 */ + 10964 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10965 "00011001" // /* MW 3 */ + 10966 "11111011" // /* MW 2 */ + 10967 "00000111" // /* MW 1 */ + 10968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10969 "00000000" // /* MW 1 */ + 10970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10971 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 10972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10973 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10974 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10975 "11110001" // /* MW 3 */ + 10976 "11111101" // /* MW 2 */ + 10977 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10979 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 229 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10980 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10981 "00000000" // /* MW 3 */ + 10982 "00101000" // /* MW 2 */ + 10983 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10984 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10985 "10100000" // /* MW 3 */ + 10986 "01100111" // /* MW 2 */ + 10987 "00011111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 229 +.delay_slot + 10988 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10989 "00000001" // /* MW 5 */ + 10990 "00000000" // /* MW 4 */ + 10991 "00000000" // /* MW 3 */ + 10992 "11111000" // /* MW 2 */ + 10993 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10995 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10997 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 10999 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv +.function shared_setup_backbone _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv +.src_ref 3 "elementwise_binary_shared.h" 205 first +.src_ref 3 "elementwise_binary_shared.h" 211 24 first +.src_ref 3 "elementwise_binary_shared.h" 216 36 +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.function_start + 11008 "10111010" // LDA el0, [p1], #4; MOVX r2, #256; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11009 "01011000" // /* MW 9 */ + 11010 "00000000" // /* MW 8 */ + 11011 "00001000" // /* MW 7 */ + 11012 "00001011" // /* MW 6 */ + 11013 "00100000" // /* MW 5 */ + 11014 "00001000" // /* MW 4 */ + 11015 "11010000" // /* MW 3 */ + 11016 "10000101" // /* MW 2 */ + 11017 "00100011" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 36 + 11018 "00011000" // MOVX r0, #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11019 "00000001" // /* MW 3 */ + 11020 "10000000" // /* MW 2 */ + 11021 "00010111" // /* MW 1 */ + 11022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11023 "00000000" // /* MW 1 */ + 11024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11025 "00000000" // /* MW 1 */ + 11026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11027 "00000000" // /* MW 1 */ + 11028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11029 "00000000" // /* MW 1 */ + 11030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11031 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 211 22 first + 11032 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11033 "00101001" // /* MW 3 */ + 11034 "00011100" // /* MW 2 */ + 11035 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 212 24 first + 11036 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11037 "00101110" // /* MW 3 */ + 11038 "00011100" // /* MW 2 */ + 11039 "00000001" // /* MW 1 */ + 11040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11041 "00000000" // /* MW 1 */ + 11042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11043 "00000000" // /* MW 1 */ + 11044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11045 "00000000" // /* MW 1 */ + 11046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11047 "00000000" // /* MW 1 */ + 11048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11049 "00000000" // /* MW 1 */ + 11050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11051 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 212 22 + 11052 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11053 "00101001" // /* MW 3 */ + 11054 "00011100" // /* MW 2 */ + 11055 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 213 24 first + 11056 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11057 "00101110" // /* MW 3 */ + 11058 "00000100" // /* MW 2 */ + 11059 "00000001" // /* MW 1 */ + 11060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11061 "00000000" // /* MW 1 */ + 11062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11063 "00000000" // /* MW 1 */ + 11064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11065 "00000000" // /* MW 1 */ + 11066 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11067 "00000000" // /* MW 1 */ + 11068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11069 "00000000" // /* MW 1 */ + 11070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11071 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 213 22 + 11072 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11073 "00101001" // /* MW 3 */ + 11074 "00011100" // /* MW 2 */ + 11075 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 214 24 first + 11076 "10011000" // LDA r3, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11077 "01110110" // /* MW 3 */ + 11078 "00010100" // /* MW 2 */ + 11079 "00000001" // /* MW 1 */ + 11080 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11081 "00000000" // /* MW 1 */ + 11082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11083 "00000000" // /* MW 1 */ + 11084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11085 "00000000" // /* MW 1 */ + 11086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11087 "00000000" // /* MW 1 */ + 11088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11089 "00000000" // /* MW 1 */ + 11090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11091 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 214 22 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11092 "10011000" // ST r3, [p0], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11093 "01110001" // /* MW 3 */ + 11094 "01001100" // /* MW 2 */ + 11095 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 34 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11096 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11097 "00010111" // /* MW 3 */ + 11098 "00000100" // /* MW 2 */ + 11099 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 217 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11100 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11101 "00000000" // /* MW 3 */ + 11102 "00101000" // /* MW 2 */ + 11103 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11104 "01000100" // MOVXM r1, #65280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11105 "00000000" // /* MW 5 */ + 11106 "10111110" // /* MW 4 */ + 11107 "11110000" // /* MW 3 */ + 11108 "00000000" // /* MW 2 */ + 11109 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11110 "10011000" // AND r1, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11111 "00010100" // /* MW 3 */ + 11112 "11000010" // /* MW 2 */ + 11113 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11114 "10011000" // EQ r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11115 "00100111" // /* MW 3 */ + 11116 "01110110" // /* MW 2 */ + 11117 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 36 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11118 "00011000" // SEL.EQZ r0, r0, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11119 "10000010" // /* MW 3 */ + 11120 "00000001" // /* MW 2 */ + 11121 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_end0 + 11123 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 3 "elementwise_binary_shared.h" 219 +.src_ref 3 "elementwise_binary_shared.h" 219 first +.function_start + 11136 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11137 "00000001" // /* MW 5 */ + 11138 "00000000" // /* MW 4 */ + 11139 "00000000" // /* MW 3 */ + 11140 "00001000" // /* MW 2 */ + 11141 "00000000" // /* MW 1 */ + 11142 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11143 "00111101" // /* MW 3 */ + 11144 "11111000" // /* MW 2 */ + 11145 "00001111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 220 8 first +.no_stack_arguments + 11146 "00000100" // JL #11008 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11008 delay_slots=5 */ + 11147 "00000001" // /* MW 5 */ + 11148 "00000000" // /* MW 4 */ + 11149 "10000000" // /* MW 3 */ + 11150 "00010101" // /* MW 2 */ + 11151 "00000000" // /* MW 1 */ +.delay_slot + 11152 "11111000" // MOV r0, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11153 "10100000" // /* MW 3 */ + 11154 "00010111" // /* MW 2 */ + 11155 "00011000" // /* MW 1 */ +.delay_slot + 11156 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11157 "00010101" // /* MW 3 */ + 11158 "11111100" // /* MW 2 */ + 11159 "00001111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 220 8 +.delay_slot + 11160 "11111000" // MOV r15, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11161 "11000000" // /* MW 3 */ + 11162 "11010000" // /* MW 2 */ + 11163 "00011011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11165 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11167 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 220 8 +.src_ref 3 "elementwise_binary_shared.h" 222 4 +.src_ref 8 "add_impl.h" 146 29 +.return_address + 11168 "10111010" // LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11169 "00001000" // /* MW 9 */ + 11170 "11000100" // /* MW 8 */ + 11171 "00110011" // /* MW 7 */ + 11172 "01101000" // /* MW 6 */ + 11173 "00000000" // /* MW 5 */ + 11174 "00000001" // /* MW 4 */ + 11175 "00100000" // /* MW 3 */ + 11176 "00000111" // /* MW 2 */ + 11177 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 146 29 +.src_ref 8 "add_impl.h" 147 37 +.src_ref 8 "add_impl.h" 147 39 + 11178 "10111010" // MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11179 "01011000" // /* MW 9 */ + 11180 "11111101" // /* MW 8 */ + 11181 "00000111" // /* MW 7 */ + 11182 "00001000" // /* MW 6 */ + 11183 "10000000" // /* MW 5 */ + 11184 "00000001" // /* MW 4 */ + 11185 "10000000" // /* MW 3 */ + 11186 "11100010" // /* MW 2 */ + 11187 "00000001" // /* MW 1 */ +.src_ref 8 "add_impl.h" 146 29 first +.src_ref 8 "add_impl.h" 147 39 + 11188 "01111010" // LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11189 "00000001" // /* MW 9 */ + 11190 "10100000" // /* MW 8 */ + 11191 "00000111" // /* MW 7 */ + 11192 "10000000" // /* MW 6 */ + 11193 "00010001" // /* MW 5 */ + 11194 "00001010" // /* MW 4 */ + 11195 "00100000" // /* MW 3 */ + 11196 "10111110" // /* MW 2 */ + 11197 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 50 first + 11198 "10011000" // LDA.u8 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11199 "01001010" // /* MW 3 */ + 11200 "00000110" // /* MW 2 */ + 11201 "00000000" // /* MW 1 */ + 11202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11203 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11205 "00000000" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 37 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11206 "00011000" // ST.s16 r16, [p0, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11207 "00010111" // /* MW 3 */ + 11208 "00000010" // /* MW 2 */ + 11209 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 222 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11210 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11211 "00000000" // /* MW 3 */ + 11212 "00101000" // /* MW 2 */ + 11213 "00010000" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 54 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11214 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11215 "00000101" // /* MW 3 */ + 11216 "00100010" // /* MW 2 */ + 11217 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 222 4 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11218 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11219 "00000001" // /* MW 5 */ + 11220 "00000000" // /* MW 4 */ + 11221 "00000000" // /* MW 3 */ + 11222 "11111000" // /* MW 2 */ + 11223 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 54 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11224 "10011000" // EQ r27, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11225 "00100111" // /* MW 3 */ + 11226 "01110111" // /* MW 2 */ + 11227 "00010100" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 39 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11228 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11229 "10000010" // /* MW 3 */ + 11230 "00100001" // /* MW 2 */ + 11231 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 11233 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.src_ref 3 "elementwise_binary_shared.h" 227 first +.src_ref 3 "elementwise_binary_shared.h" 232 8 first +.tail_call +.function_start + 11248 "10000100" // J #9808 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=9808 delay_slots=5 */ + 11249 "00000000" // /* MW 5 */ + 11250 "00000000" // /* MW 4 */ + 11251 "00101000" // /* MW 3 */ + 11252 "00010011" // /* MW 2 */ + 11253 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11255 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11256 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11257 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11259 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11261 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 11263 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.src_ref 3 "elementwise_binary.h" 141 first +.src_ref 3 "elementwise_binary.h" 142 23 +.src_ref 3 "elementwise_binary.h" 144 4 first +.function_start + 11264 "01100100" // RET lr; MOV r0, #64 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 11265 "00000001" // /* MW 5 */ + 11266 "00100001" // /* MW 4 */ + 11267 "00000000" // /* MW 3 */ + 11268 "00000000" // /* MW 2 */ + 11269 "00000101" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 141 +.delay_slot + 11270 "11111000" // MOV r1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11271 "11000000" // /* MW 3 */ + 11272 "01010000" // /* MW 2 */ + 11273 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 141 +.delay_slot + 11274 "00011000" // ADD.NC p0, r1, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11275 "10010000" // /* MW 3 */ + 11276 "01100000" // /* MW 2 */ + 11277 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 142 23 first +.delay_slot + 11278 "10011000" // ST r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11279 "00010001" // /* MW 3 */ + 11280 "00000100" // /* MW 2 */ + 11281 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 142 23 +.delay_slot + 11282 "10011000" // ST r0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11283 "00010001" // /* MW 3 */ + 11284 "00010100" // /* MW 2 */ + 11285 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_end0 + 11287 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.src_ref 3 "elementwise_binary.h" 130 first +.src_ref 3 "elementwise_binary.h" 133 24 first +.function_start + 11296 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11297 "00101110" // /* MW 3 */ + 11298 "00011100" // /* MW 2 */ + 11299 "00000001" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 130 + 11300 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11301 "00000001" // /* MW 5 */ + 11302 "00000000" // /* MW 4 */ + 11303 "00000000" // /* MW 3 */ + 11304 "00001000" // /* MW 2 */ + 11305 "00000000" // /* MW 1 */ + 11306 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11307 "00111101" // /* MW 3 */ + 11308 "11111100" // /* MW 2 */ + 11309 "00001111" // /* MW 1 */ + 11310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11311 "00000000" // /* MW 1 */ + 11312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11313 "00000000" // /* MW 1 */ + 11314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11315 "00000000" // /* MW 1 */ + 11316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11317 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 133 22 first + 11318 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11319 "00101001" // /* MW 3 */ + 11320 "00011100" // /* MW 2 */ + 11321 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 134 24 first + 11322 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11323 "00101110" // /* MW 3 */ + 11324 "00011100" // /* MW 2 */ + 11325 "00000001" // /* MW 1 */ + 11326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11327 "00000000" // /* MW 1 */ + 11328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11329 "00000000" // /* MW 1 */ + 11330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11331 "00000000" // /* MW 1 */ + 11332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11333 "00000000" // /* MW 1 */ + 11334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11335 "00000000" // /* MW 1 */ + 11336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11337 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 134 22 + 11338 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11339 "00101001" // /* MW 3 */ + 11340 "00011100" // /* MW 2 */ + 11341 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 135 24 first + 11342 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11343 "00101110" // /* MW 3 */ + 11344 "00000100" // /* MW 2 */ + 11345 "00000001" // /* MW 1 */ + 11346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11347 "00000000" // /* MW 1 */ + 11348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11349 "00000000" // /* MW 1 */ + 11350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11351 "00000000" // /* MW 1 */ + 11352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11353 "00000000" // /* MW 1 */ + 11354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11355 "00000000" // /* MW 1 */ + 11356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11357 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 135 22 + 11358 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11359 "00101001" // /* MW 3 */ + 11360 "00011100" // /* MW 2 */ + 11361 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 136 24 first + 11362 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11363 "00101110" // /* MW 3 */ + 11364 "00010100" // /* MW 2 */ + 11365 "00000001" // /* MW 1 */ + 11366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11367 "00000000" // /* MW 1 */ + 11368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11369 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 137 8 first +.no_stack_arguments + 11370 "00000100" // JL #11264 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11264 delay_slots=5 */ + 11371 "00000001" // /* MW 5 */ + 11372 "00000000" // /* MW 4 */ + 11373 "00000000" // /* MW 3 */ + 11374 "00010110" // /* MW 2 */ + 11375 "00000000" // /* MW 1 */ +.delay_slot + 11376 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11377 "10011101" // /* MW 3 */ + 11378 "11111011" // /* MW 2 */ + 11379 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11381 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11383 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 136 22 first +.delay_slot + 11384 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11385 "00101001" // /* MW 3 */ + 11386 "11011100" // /* MW 2 */ + 11387 "00001000" // /* MW 1 */ +.src_ref 8 "mul_impl.h" 134 25 +.delay_slot + 11388 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11389 "11000000" // /* MW 3 */ + 11390 "01100000" // /* MW 2 */ + 11391 "00011111" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 +.return_address + 11392 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11393 "00111001" // /* MW 3 */ + 11394 "11111100" // /* MW 2 */ + 11395 "00000111" // /* MW 1 */ + 11396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11397 "00000000" // /* MW 1 */ + 11398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11399 "00000000" // /* MW 1 */ + 11400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11401 "00000000" // /* MW 1 */ + 11402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11403 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11405 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11406 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11407 "10011001" // /* MW 3 */ + 11408 "11111011" // /* MW 2 */ + 11409 "00000111" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11410 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11411 "00000000" // /* MW 3 */ + 11412 "00101000" // /* MW 2 */ + 11413 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11415 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11417 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11419 "00000000" // /* MW 1 */ +.src_ref 8 "mul_impl.h" 134 25 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11420 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11421 "00000001" // /* MW 3 */ + 11422 "00100000" // /* MW 2 */ + 11423 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 +.src_ref 8 "mul_impl.h" 134 25 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11424 "00111010" // ST r16, [p7, #16]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11425 "01110001" // /* MW 9 */ + 11426 "00000000" // /* MW 8 */ + 11427 "00000000" // /* MW 7 */ + 11428 "00000000" // /* MW 6 */ + 11429 "11111110" // /* MW 5 */ + 11430 "00111111" // /* MW 4 */ + 11431 "00110000" // /* MW 3 */ + 11432 "11000010" // /* MW 2 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + 11433 "11101000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.function run _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.src_ref 3 "elementwise_binary.h" 149 first +.src_ref 3 "elementwise_binary.h" 156 37 +.src_ref 3 "elementwise_binary.h" 168 8 first +.function_start + 11440 "10111010" // MOVA m0, #32; MOVXM ls, #11616 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11441 "00010000" // /* MW 9 */ + 11442 "10110000" // /* MW 8 */ + 11443 "01111110" // /* MW 7 */ + 11444 "00001000" // /* MW 6 */ + 11445 "00000000" // /* MW 5 */ + 11446 "00000000" // /* MW 4 */ + 11447 "10000000" // /* MW 3 */ + 11448 "00000000" // /* MW 2 */ + 11449 "00000100" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 156 37 first +.src_ref 3 "elementwise_binary.h" 168 8 first + 11450 "10111010" // LDA r3, [p3], m0; MOVXM le, #11632 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11451 "00010000" // /* MW 9 */ + 11452 "10111000" // /* MW 8 */ + 11453 "10111110" // /* MW 7 */ + 11454 "00001001" // /* MW 6 */ + 11455 "00000000" // /* MW 5 */ + 11456 "00000000" // /* MW 4 */ + 11457 "11010000" // /* MW 3 */ + 11458 "00001110" // /* MW 2 */ + 11459 "01100001" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 156 78 +.src_ref 3 "elementwise_binary.h" 156 78 + 11460 "10111010" // LDA m1, [p3]; MOVX r1, #-6; MOV r0, #828 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11461 "01011000" // /* MW 9 */ + 11462 "00111100" // /* MW 8 */ + 11463 "00001011" // /* MW 7 */ + 11464 "01001000" // /* MW 6 */ + 11465 "00010111" // /* MW 5 */ + 11466 "00111110" // /* MW 4 */ + 11467 "11010000" // /* MW 3 */ + 11468 "10010000" // /* MW 2 */ + 11469 "01100000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 156 78 +.src_ref 3 "elementwise_binary.h" 156 78 + 11470 "10111010" // LDA m0, [p3, #4]; MOVXM p4, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11471 "00010000" // /* MW 9 */ + 11472 "00110100" // /* MW 8 */ + 11473 "00110010" // /* MW 7 */ + 11474 "11110010" // /* MW 6 */ + 11475 "00000001" // /* MW 5 */ + 11476 "00000000" // /* MW 4 */ + 11477 "11010000" // /* MW 3 */ + 11478 "10000000" // /* MW 2 */ + 11479 "01100010" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 156 78 + 11480 "10011000" // LDA.s8 r2, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11481 "01000010" // /* MW 3 */ + 11482 "00000100" // /* MW 2 */ + 11483 "00000100" // /* MW 1 */ + 11484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11485 "00000000" // /* MW 1 */ + 11486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11487 "00000000" // /* MW 1 */ + 11488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11489 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 156 78 + 11490 "10011000" // LSHL r1, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11491 "00011101" // /* MW 3 */ + 11492 "11000010" // /* MW 2 */ + 11493 "00010000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary.h" 168 8 +.src_ref 3 "elementwise_binary.h" 187 20 first + 11494 "00110100" // VLDB x1, [p0], m1; ADD.NC lc, r1, #-7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11495 "11111001" // /* MW 5 */ + 11496 "11100001" // /* MW 4 */ + 11497 "10001010" // /* MW 3 */ + 11498 "00001110" // /* MW 2 */ + 11499 "00000101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary.h" 189 20 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11500 "00111100" // VLDA x2, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11501 "01101000" // /* MW 5 */ + 11502 "01010000" // /* MW 4 */ + 11503 "01110000" // /* MW 3 */ + 11504 "00010011" // /* MW 2 */ + 11505 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 195 20 +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11506 "00010010" // VLDA x3, [p1], m0; VLDB x1, [p0], m1; MOVX crRnd, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11507 "10000000" // /* MW 7 */ + 11508 "10111010" // /* MW 6 */ + 11509 "11101000" // /* MW 5 */ + 11510 "01010000" // /* MW 4 */ + 11511 "01110000" // /* MW 3 */ + 11512 "00011011" // /* MW 2 */ + 11513 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary.h" 189 20 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11514 "00111100" // VLDA x2, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11515 "01101000" // /* MW 5 */ + 11516 "01010000" // /* MW 4 */ + 11517 "01110000" // /* MW 3 */ + 11518 "00010011" // /* MW 2 */ + 11519 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11520 "00111100" // VLDA x3, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11521 "11101000" // /* MW 5 */ + 11522 "01010000" // /* MW 4 */ + 11523 "01110000" // /* MW 3 */ + 11524 "00011011" // /* MW 2 */ + 11525 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11526 "10011000" // VLDA x2, [p1], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11527 "10011011" // /* MW 3 */ + 11528 "00001000" // /* MW 2 */ + 11529 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11530 "00111100" // VLDA x3, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11531 "01101000" // /* MW 5 */ + 11532 "01010000" // /* MW 4 */ + 11533 "01110000" // /* MW 3 */ + 11534 "00011011" // /* MW 2 */ + 11535 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11536 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11537 "11101000" // /* MW 5 */ + 11538 "01010000" // /* MW 4 */ + 11539 "01110000" // /* MW 3 */ + 11540 "00010011" // /* MW 2 */ + 11541 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11542 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11543 "01000001" // /* MW 9 */ + 11544 "11100010" // /* MW 8 */ + 11545 "00000000" // /* MW 7 */ + 11546 "00011101" // /* MW 6 */ + 11547 "00110100" // /* MW 5 */ + 11548 "00101000" // /* MW 4 */ + 11549 "01110000" // /* MW 3 */ + 11550 "00011011" // /* MW 2 */ + 11551 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11552 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11553 "01100001" // /* MW 9 */ + 11554 "11100000" // /* MW 8 */ + 11555 "00000001" // /* MW 7 */ + 11556 "00011101" // /* MW 6 */ + 11557 "01110100" // /* MW 5 */ + 11558 "00101000" // /* MW 4 */ + 11559 "01110000" // /* MW 3 */ + 11560 "00010011" // /* MW 2 */ + 11561 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11562 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11563 "01000001" // /* MW 9 */ + 11564 "11100010" // /* MW 8 */ + 11565 "00000000" // /* MW 7 */ + 11566 "00011101" // /* MW 6 */ + 11567 "00110100" // /* MW 5 */ + 11568 "00101000" // /* MW 4 */ + 11569 "01110000" // /* MW 3 */ + 11570 "00011011" // /* MW 2 */ + 11571 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11572 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11573 "01100001" // /* MW 9 */ + 11574 "11100000" // /* MW 8 */ + 11575 "00000001" // /* MW 7 */ + 11576 "00011101" // /* MW 6 */ + 11577 "01110100" // /* MW 5 */ + 11578 "00101000" // /* MW 4 */ + 11579 "01110000" // /* MW 3 */ + 11580 "00010011" // /* MW 2 */ + 11581 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11582 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11583 "01000001" // /* MW 9 */ + 11584 "11100010" // /* MW 8 */ + 11585 "00000000" // /* MW 7 */ + 11586 "00011101" // /* MW 6 */ + 11587 "00110100" // /* MW 5 */ + 11588 "00101000" // /* MW 4 */ + 11589 "01110000" // /* MW 3 */ + 11590 "00011011" // /* MW 2 */ + 11591 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11592 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11593 "01100001" // /* MW 9 */ + 11594 "11100000" // /* MW 8 */ + 11595 "00000001" // /* MW 7 */ + 11596 "00011101" // /* MW 6 */ + 11597 "01110100" // /* MW 5 */ + 11598 "00101000" // /* MW 4 */ + 11599 "01110000" // /* MW 3 */ + 11600 "00010011" // /* MW 2 */ + 11601 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11602 "01101110" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; VMUL.f dm0, x1, x2, r0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 11603 "01000001" // /* MW 13 */ + 11604 "11100010" // /* MW 12 */ + 11605 "00000000" // /* MW 11 */ + 11606 "10001100" // /* MW 10 */ + 11607 "01110000" // /* MW 9 */ + 11608 "00001000" // /* MW 8 */ + 11609 "00000000" // /* MW 7 */ + 11610 "00000000" // /* MW 6 */ + 11611 "01101000" // /* MW 5 */ + 11612 "01010000" // /* MW 4 */ + 11613 "01110000" // /* MW 3 */ + 11614 "00011011" // /* MW 2 */ + 11615 "00100001" // /* MW 1 */ +.label ZLS_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_176 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.src_ref 3 "elementwise_binary.h" 195 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 11616 "00001011" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; VMUL.f dm1, x0, x3, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11617 "00000011" // /* MW 15 */ + 11618 "00001111" // /* MW 14 */ + 11619 "01111000" // /* MW 13 */ + 11620 "10100101" // /* MW 12 */ + 11621 "00000001" // /* MW 11 */ + 11622 "00000000" // /* MW 10 */ + 11623 "00000000" // /* MW 9 */ + 11624 "00000000" // /* MW 8 */ + 11625 "10100011" // /* MW 7 */ + 11626 "00011100" // /* MW 6 */ + 11627 "11101010" // /* MW 5 */ + 11628 "01010000" // /* MW 4 */ + 11629 "01110000" // /* MW 3 */ + 11630 "00010011" // /* MW 2 */ + 11631 "00100001" // /* MW 1 */ +.label ZLE_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_192 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11632 "00001011" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11633 "00010010" // /* MW 15 */ + 11634 "00000111" // /* MW 14 */ + 11635 "01111000" // /* MW 13 */ + 11636 "10100101" // /* MW 12 */ + 11637 "00000001" // /* MW 11 */ + 11638 "00000000" // /* MW 10 */ + 11639 "00000000" // /* MW 9 */ + 11640 "00000000" // /* MW 8 */ + 11641 "00100011" // /* MW 7 */ + 11642 "00011100" // /* MW 6 */ + 11643 "01101010" // /* MW 5 */ + 11644 "01010000" // /* MW 4 */ + 11645 "01110000" // /* MW 3 */ + 11646 "00011011" // /* MW 2 */ + 11647 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 11648 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11649 "01100001" // /* MW 7 */ + 11650 "11100000" // /* MW 6 */ + 11651 "00000001" // /* MW 5 */ + 11652 "00000010" // /* MW 4 */ + 11653 "01100000" // /* MW 3 */ + 11654 "10010100" // /* MW 2 */ + 11655 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11656 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11657 "01000001" // /* MW 7 */ + 11658 "11100010" // /* MW 6 */ + 11659 "00000000" // /* MW 5 */ + 11660 "00000010" // /* MW 4 */ + 11661 "01100000" // /* MW 3 */ + 11662 "10000100" // /* MW 2 */ + 11663 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11664 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11665 "01100001" // /* MW 7 */ + 11666 "11100000" // /* MW 6 */ + 11667 "00000001" // /* MW 5 */ + 11668 "00000010" // /* MW 4 */ + 11669 "01100000" // /* MW 3 */ + 11670 "10010100" // /* MW 2 */ + 11671 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11672 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11673 "01000001" // /* MW 7 */ + 11674 "11100010" // /* MW 6 */ + 11675 "00000000" // /* MW 5 */ + 11676 "00000010" // /* MW 4 */ + 11677 "01100000" // /* MW 3 */ + 11678 "10000100" // /* MW 2 */ + 11679 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11680 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11681 "01100001" // /* MW 7 */ + 11682 "11100000" // /* MW 6 */ + 11683 "00000001" // /* MW 5 */ + 11684 "00000010" // /* MW 4 */ + 11685 "01100000" // /* MW 3 */ + 11686 "10010100" // /* MW 2 */ + 11687 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11688 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11689 "01000001" // /* MW 7 */ + 11690 "11100010" // /* MW 6 */ + 11691 "00000000" // /* MW 5 */ + 11692 "00000010" // /* MW 4 */ + 11693 "01100000" // /* MW 3 */ + 11694 "10000100" // /* MW 2 */ + 11695 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11696 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11697 "01100001" // /* MW 7 */ + 11698 "11100000" // /* MW 6 */ + 11699 "00000001" // /* MW 5 */ + 11700 "00000010" // /* MW 4 */ + 11701 "01100000" // /* MW 3 */ + 11702 "10010100" // /* MW 2 */ + 11703 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11704 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11705 "00100011" // /* MW 3 */ + 11706 "00011100" // /* MW 2 */ + 11707 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 172 4 first +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11708 "01011100" // VST.CONV.bf16.fp32 cml1, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 11709 "00000000" // /* MW 5 */ + 11710 "01010000" // /* MW 4 */ + 11711 "01100000" // /* MW 3 */ + 11712 "10010100" // /* MW 2 */ + 11713 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11714 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11715 "00100011" // /* MW 3 */ + 11716 "00011100" // /* MW 2 */ + 11717 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 195 20 first +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11718 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11719 "10100011" // /* MW 3 */ + 11720 "00011100" // /* MW 2 */ + 11721 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.delay_slot + 11722 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11723 "00100011" // /* MW 3 */ + 11724 "00011100" // /* MW 2 */ + 11725 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 195 20 first +.delay_slot + 11726 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11727 "10100011" // /* MW 3 */ + 11728 "00011100" // /* MW 2 */ + 11729 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_end0 + 11731 "00000000" // /* MW 1 */ +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.function superkernel_mul1d _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.src_ref 7 "superkernels.cpp" 369 first +.src_ref 7 "superkernels.cpp" 374 6 +.function_start + 11744 "01000100" // MOVXM p4, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11745 "10000000" // /* MW 5 */ + 11746 "11001000" // /* MW 4 */ + 11747 "11001000" // /* MW 3 */ + 11748 "00000111" // /* MW 2 */ + 11749 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 374 6 first + 11750 "11010100" // LDA r16, [p4]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11751 "11000001" // /* MW 5 */ + 11752 "10110101" // /* MW 4 */ + 11753 "11011000" // /* MW 3 */ + 11754 "11000010" // /* MW 2 */ + 11755 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 369 + 11756 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11757 "00000001" // /* MW 5 */ + 11758 "00000000" // /* MW 4 */ + 11759 "00000000" // /* MW 3 */ + 11760 "00001000" // /* MW 2 */ + 11761 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 371 22 first +.src_ref 1 "io_buffer_main.h" 218 49 + 11762 "00111010" // ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11763 "01111001" // /* MW 9 */ + 11764 "01100000" // /* MW 8 */ + 11765 "11001010" // /* MW 7 */ + 11766 "10000001" // /* MW 6 */ + 11767 "00010100" // /* MW 5 */ + 11768 "00100011" // /* MW 4 */ + 11769 "10110000" // /* MW 3 */ + 11770 "00111010" // /* MW 2 */ + 11771 "11111111" // /* MW 1 */ + 11772 "00000010" // ST p0, [sp, #-20]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11773 "01110000" // /* MW 7 */ + 11774 "11010000" // /* MW 6 */ + 11775 "00001011" // /* MW 5 */ + 11776 "00000000" // /* MW 4 */ + 11777 "10110000" // /* MW 3 */ + 11778 "10000011" // /* MW 2 */ + 11779 "11111101" // /* MW 1 */ + 11780 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11781 "00010101" // /* MW 3 */ + 11782 "11111100" // /* MW 2 */ + 11783 "00001111" // /* MW 1 */ + 11784 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11785 "00111101" // /* MW 3 */ + 11786 "11110000" // /* MW 2 */ + 11787 "00001111" // /* MW 1 */ + 11788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11789 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 374 6 first +.src_ref 7 "superkernels.cpp" 374 16 first + 11790 "10000100" // JNZ r16, #11936 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11936 delay_slots=5 */ + 11791 "00000001" // /* MW 5 */ + 11792 "01000000" // /* MW 4 */ + 11793 "01010000" // /* MW 3 */ + 11794 "00010111" // /* MW 2 */ + 11795 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 371 30 first +.delay_slot + 11796 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11797 "11111011" // /* MW 3 */ + 11798 "01100011" // /* MW 2 */ + 11799 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 371 11 +.delay_slot + 11800 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11801 "10100000" // /* MW 5 */ + 11802 "11001000" // /* MW 4 */ + 11803 "11000100" // /* MW 3 */ + 11804 "00000111" // /* MW 2 */ + 11805 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 371 11 +.delay_slot + 11806 "00000010" // ST r17, [p2]; MOV p2, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11807 "01110000" // /* MW 7 */ + 11808 "01100000" // /* MW 6 */ + 11809 "00110111" // /* MW 5 */ + 11810 "00000001" // /* MW 4 */ + 11811 "00110000" // /* MW 3 */ + 11812 "11000110" // /* MW 2 */ + 11813 "01000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 +.delay_slot + 11814 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11815 "11000000" // /* MW 3 */ + 11816 "11010110" // /* MW 2 */ + 11817 "00011011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 377 4 +.src_ref 7 "superkernels.cpp" 379 28 +.src_ref 7 "superkernels.cpp" 381 42 +.src_ref 7 "superkernels.cpp" 393 2 +.delay_slot + 11818 "00111010" // ST p2, [sp, #-12]; MOVXM p7, #509312 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11819 "00010001" // /* MW 9 */ + 11820 "11000000" // /* MW 8 */ + 11821 "10110010" // /* MW 7 */ + 11822 "11110011" // /* MW 6 */ + 11823 "00000001" // /* MW 5 */ + 11824 "00000000" // /* MW 4 */ + 11825 "10110000" // /* MW 3 */ + 11826 "10100011" // /* MW 2 */ + 11827 "11111110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 377 4 +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11828 "00111010" // MOVS p0, p7; MOVXM p2, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11829 "00010001" // /* MW 9 */ + 11830 "00110100" // /* MW 8 */ + 11831 "00110010" // /* MW 7 */ + 11832 "11110001" // /* MW 6 */ + 11833 "00000001" // /* MW 5 */ + 11834 "00000000" // /* MW 4 */ + 11835 "01100000" // /* MW 3 */ + 11836 "10010001" // /* MW 2 */ + 11837 "00010011" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11838 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11839 "00010000" // /* MW 9 */ + 11840 "00110010" // /* MW 8 */ + 11841 "00110010" // /* MW 7 */ + 11842 "11110001" // /* MW 6 */ + 11843 "00000001" // /* MW 5 */ + 11844 "00000000" // /* MW 4 */ + 11845 "11100000" // /* MW 3 */ + 11846 "11000000" // /* MW 2 */ + 11847 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11849 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 377 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 11850 "00000100" // JL #11296 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11296 delay_slots=5 */ + 11851 "00000001" // /* MW 5 */ + 11852 "00000000" // /* MW 4 */ + 11853 "00010000" // /* MW 3 */ + 11854 "00010110" // /* MW 2 */ + 11855 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11857 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11859 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11860 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11861 "00110001" // /* MW 3 */ + 11862 "00100000" // /* MW 2 */ + 11863 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 11864 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11865 "00000101" // /* MW 3 */ + 11866 "00100000" // /* MW 2 */ + 11867 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 11868 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11869 "00010001" // /* MW 3 */ + 11870 "00000110" // /* MW 2 */ + 11871 "00001010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 381 18 +.src_ref 7 "superkernels.cpp" 381 42 first +.return_address + 11872 "10111010" // LDA r16, [p7]; MOVXM p1, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11873 "00010000" // /* MW 9 */ + 11874 "00101000" // /* MW 8 */ + 11875 "10110010" // /* MW 7 */ + 11876 "11110000" // /* MW 6 */ + 11877 "00000001" // /* MW 5 */ + 11878 "00000000" // /* MW 4 */ + 11879 "11010000" // /* MW 3 */ + 11880 "11000010" // /* MW 2 */ + 11881 "11100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 381 16 +.src_ref 7 "superkernels.cpp" 381 18 +.src_ref 7 "superkernels.cpp" 390 48 + 11882 "10111010" // LDA r17, [p1]; MOVXM p3, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11883 "00010000" // /* MW 9 */ + 11884 "00101010" // /* MW 8 */ + 11885 "10110010" // /* MW 7 */ + 11886 "11110001" // /* MW 6 */ + 11887 "00000001" // /* MW 5 */ + 11888 "00000000" // /* MW 4 */ + 11889 "11010000" // /* MW 3 */ + 11890 "11000110" // /* MW 2 */ + 11891 "00100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 379 28 first +.src_ref 7 "superkernels.cpp" 382 16 +.src_ref 7 "superkernels.cpp" 391 48 + 11892 "10111010" // LDA.u16 r18, [p7, #10]; MOVXM p1, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11893 "00010000" // /* MW 9 */ + 11894 "00101110" // /* MW 8 */ + 11895 "10110010" // /* MW 7 */ + 11896 "11110000" // /* MW 6 */ + 11897 "00000001" // /* MW 5 */ + 11898 "00000000" // /* MW 4 */ + 11899 "01010000" // /* MW 3 */ + 11900 "11001011" // /* MW 2 */ + 11901 "11101010" // /* MW 1 */ + 11902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11903 "00000000" // /* MW 1 */ + 11904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11905 "00000000" // /* MW 1 */ + 11906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11907 "00000000" // /* MW 1 */ + 11908 "10000100" // J #11952 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11952 delay_slots=5 */ + 11909 "00000000" // /* MW 5 */ + 11910 "00000000" // /* MW 4 */ + 11911 "01011000" // /* MW 3 */ + 11912 "00010111" // /* MW 2 */ + 11913 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 379 13 +.delay_slot + 11914 "01000100" // MOVXM p2, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11915 "11000000" // /* MW 5 */ + 11916 "11001000" // /* MW 4 */ + 11917 "11000100" // /* MW 3 */ + 11918 "00000111" // /* MW 2 */ + 11919 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 381 27 first +.delay_slot + 11920 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11921 "00001111" // /* MW 3 */ + 11922 "01100001" // /* MW 2 */ + 11923 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 379 13 first +.delay_slot + 11924 "10011000" // ST r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11925 "01010001" // /* MW 3 */ + 11926 "00000110" // /* MW 2 */ + 11927 "00001010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 381 16 first +.delay_slot + 11928 "10011000" // ST r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11929 "00010001" // /* MW 3 */ + 11930 "00000110" // /* MW 2 */ + 11931 "00001011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 382 16 first +.delay_slot + 11932 "10011000" // ST r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11933 "00010001" // /* MW 3 */ + 11934 "00000110" // /* MW 2 */ + 11935 "00001001" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 +.src_ref 7 "superkernels.cpp" 390 48 + 11936 "01000100" // MOVXM p3, #509012 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11937 "10101000" // /* MW 5 */ + 11938 "11001000" // /* MW 4 */ + 11939 "11000110" // /* MW 3 */ + 11940 "00000111" // /* MW 2 */ + 11941 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 391 48 + 11942 "10111010" // NOPA; MOVXM p1, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11943 "00010000" // /* MW 9 */ + 11944 "00101110" // /* MW 8 */ + 11945 "10110010" // /* MW 7 */ + 11946 "11110000" // /* MW 6 */ + 11947 "00000001" // /* MW 5 */ + 11948 "00000000" // /* MW 4 */ + 11949 "11110000" // /* MW 3 */ + 11950 "00101100" // /* MW 2 */ + 11951 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 +.src_ref 1 "io_buffer_main.h" 218 49 first + 11952 "00011000" // ADD.NC p0, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11953 "10000110" // /* MW 3 */ + 11954 "01100111" // /* MW 2 */ + 11955 "00011000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 385 2 +.src_ref 1 "io_buffer_main.h" 218 49 + 11956 "10111010" // LDA r27, [p0], #-4; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11957 "00010000" // /* MW 9 */ + 11958 "00100000" // /* MW 8 */ + 11959 "00110010" // /* MW 7 */ + 11960 "11110001" // /* MW 6 */ + 11961 "00000001" // /* MW 5 */ + 11962 "00000000" // /* MW 4 */ + 11963 "11010000" // /* MW 3 */ + 11964 "11101110" // /* MW 2 */ + 11965 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 11966 "10011000" // LDA r16, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11967 "00010110" // /* MW 3 */ + 11968 "11111110" // /* MW 2 */ + 11969 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 11970 "10011000" // LDA r17, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11971 "00110110" // /* MW 3 */ + 11972 "11111110" // /* MW 2 */ + 11973 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 385 2 first + 11974 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11975 "01010110" // /* MW 3 */ + 11976 "00000110" // /* MW 2 */ + 11977 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 11978 "10011000" // LDA r19, [p0, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11979 "01110110" // /* MW 3 */ + 11980 "01000110" // /* MW 2 */ + 11981 "00000000" // /* MW 1 */ + 11982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11983 "00000000" // /* MW 1 */ + 11984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11985 "00000000" // /* MW 1 */ + 11986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11987 "00000000" // /* MW 1 */ + 11988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11989 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 11990 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11991 "00000010" // /* MW 3 */ + 11992 "01100001" // /* MW 2 */ + 11993 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 385 2 first +.src_ref 1 "io_buffer_main.h" 218 20 + 11994 "01011100" // ST r16, [p0]; ADD r16, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11995 "00001110" // /* MW 5 */ + 11996 "01000000" // /* MW 4 */ + 11997 "00111001" // /* MW 3 */ + 11998 "11000010" // /* MW 2 */ + 11999 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 385 2 + 12000 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12001 "00010001" // /* MW 3 */ + 12002 "00000110" // /* MW 2 */ + 12003 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 +.src_ref 1 "io_buffer_main.h" 395 8 + 12004 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12005 "11111101" // /* MW 3 */ + 12006 "11100000" // /* MW 2 */ + 12007 "00010111" // /* MW 1 */ + 12008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12009 "00000000" // /* MW 1 */ + 12010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12011 "00000000" // /* MW 1 */ + 12012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12013 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 12014 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12015 "00001000" // /* MW 3 */ + 12016 "11010011" // /* MW 2 */ + 12017 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 first + 12018 "00011000" // ADD.NC p2, r14, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12019 "00000110" // /* MW 3 */ + 12020 "01100111" // /* MW 2 */ + 12021 "00011010" // /* MW 1 */ + 12022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12023 "00000000" // /* MW 1 */ + 12024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12025 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 12026 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12027 "01110110" // /* MW 3 */ + 12028 "11111111" // /* MW 2 */ + 12029 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 12030 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12031 "00110110" // /* MW 3 */ + 12032 "11111110" // /* MW 2 */ + 12033 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 12034 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12035 "01010110" // /* MW 3 */ + 12036 "11111110" // /* MW 2 */ + 12037 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 47 first + 12038 "10011000" // LDA r19, [p2, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12039 "01110110" // /* MW 3 */ + 12040 "01010110" // /* MW 2 */ + 12041 "00000010" // /* MW 1 */ + 12042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12043 "00000000" // /* MW 1 */ + 12044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12045 "00000000" // /* MW 1 */ + 12046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12047 "00000000" // /* MW 1 */ + 12048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12049 "00000000" // /* MW 1 */ + 12050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12051 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 12052 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12053 "00010010" // /* MW 3 */ + 12054 "10100011" // /* MW 2 */ + 12055 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 12056 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12057 "00110001" // /* MW 3 */ + 12058 "00000110" // /* MW 2 */ + 12059 "00001010" // /* MW 1 */ + 12060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12061 "00000000" // /* MW 1 */ + 12062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12063 "00000000" // /* MW 1 */ + 12064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12065 "00000000" // /* MW 1 */ + 12066 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12067 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 12068 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12069 "00001000" // /* MW 3 */ + 12070 "11010011" // /* MW 2 */ + 12071 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 390 46 +.src_ref 7 "superkernels.cpp" 391 46 +.src_ref 1 "io_buffer_main.h" 324 32 + 12072 "00111010" // MOVS p6, p2; MOVX r16, #1; MOV r14, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12073 "01111001" // /* MW 9 */ + 12074 "01100000" // /* MW 8 */ + 12075 "11001110" // /* MW 7 */ + 12076 "00101001" // /* MW 6 */ + 12077 "00000000" // /* MW 5 */ + 12078 "00000001" // /* MW 4 */ + 12079 "01100000" // /* MW 3 */ + 12080 "00010001" // /* MW 2 */ + 12081 "11010001" // /* MW 1 */ + 12082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12083 "00000000" // /* MW 1 */ + 12084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12085 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 + 12086 "00011000" // LDA p4, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12087 "00011001" // /* MW 3 */ + 12088 "11101110" // /* MW 2 */ + 12089 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 390 48 first + 12090 "00001100" // LDA r17, [p3]; ST p0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12091 "00111011" // /* MW 5 */ + 12092 "11011000" // /* MW 4 */ + 12093 "11011111" // /* MW 3 */ + 12094 "11000110" // /* MW 2 */ + 12095 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 391 48 first +.src_ref 7 "superkernels.cpp" 393 2 + 12096 "11010100" // LDA r20, [p1]; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12097 "10000001" // /* MW 5 */ + 12098 "11011101" // /* MW 4 */ + 12099 "11010110" // /* MW 3 */ + 12100 "11010010" // /* MW 2 */ + 12101 "00100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 12102 "10011000" // LDA r18, [p2], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12103 "01010110" // /* MW 3 */ + 12104 "01001110" // /* MW 2 */ + 12105 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 12106 "10011000" // LDA p2, [p0], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12107 "00011110" // /* MW 3 */ + 12108 "01011101" // /* MW 2 */ + 12109 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 +.src_ref 1 "io_buffer_main.h" 327 40 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12110 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12111 "11000000" // /* MW 3 */ + 12112 "01100000" // /* MW 2 */ + 12113 "00011111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12115 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12116 "10011000" // LDA r19, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12117 "01110110" // /* MW 3 */ + 12118 "00000110" // /* MW 2 */ + 12119 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12121 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 393 2 first +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 12122 "00000100" // JL #11440 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11440 delay_slots=5 */ + 12123 "00000001" // /* MW 5 */ + 12124 "00000000" // /* MW 4 */ + 12125 "01011000" // /* MW 3 */ + 12126 "00010110" // /* MW 2 */ + 12127 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 +.src_ref 1 "io_buffer_main.h" 327 40 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12128 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12129 "11000000" // /* MW 3 */ + 12130 "11010100" // /* MW 2 */ + 12131 "00011011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 390 46 first +.delay_slot + 12132 "10011000" // LSHL r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12133 "00001101" // /* MW 3 */ + 12134 "01100011" // /* MW 2 */ + 12135 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 391 46 first +.delay_slot + 12136 "10011000" // LSHL r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12137 "00001101" // /* MW 3 */ + 12138 "00100001" // /* MW 2 */ + 12139 "00010101" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 391 46 +.delay_slot + 12140 "01011000" // ADD.NC p1, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12141 "01000001" // /* MW 3 */ + 12142 "01101001" // /* MW 2 */ + 12143 "00011001" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 390 46 first +.delay_slot + 12144 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12145 "00000000" // /* MW 15 */ + 12146 "00000000" // /* MW 14 */ + 12147 "10101000" // /* MW 13 */ + 12148 "11100010" // /* MW 12 */ + 12149 "00110100" // /* MW 11 */ + 12150 "00000000" // /* MW 10 */ + 12151 "00000000" // /* MW 9 */ + 12152 "00000000" // /* MW 8 */ + 12153 "01011011" // /* MW 7 */ + 12154 "00000001" // /* MW 6 */ + 12155 "00100000" // /* MW 5 */ + 12156 "00000000" // /* MW 4 */ + 12157 "11110000" // /* MW 3 */ + 12158 "00101100" // /* MW 2 */ + 12159 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 32 first +.src_ref 1 "io_buffer_main.h" 327 28 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 40 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 +.return_address + 12160 "10111010" // LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12161 "01111000" // /* MW 9 */ + 12162 "11010000" // /* MW 8 */ + 12163 "10110011" // /* MW 7 */ + 12164 "00101000" // /* MW 6 */ + 12165 "00000000" // /* MW 5 */ + 12166 "00000001" // /* MW 4 */ + 12167 "11010000" // /* MW 3 */ + 12168 "11000110" // /* MW 2 */ + 12169 "11001000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 19 + 12170 "01000100" // MOVXM p6, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12171 "11000000" // /* MW 5 */ + 12172 "11001000" // /* MW 4 */ + 12173 "11001100" // /* MW 3 */ + 12174 "00000111" // /* MW 2 */ + 12175 "00000000" // /* MW 1 */ + 12176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12177 "00000000" // /* MW 1 */ + 12178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12179 "00000000" // /* MW 1 */ + 12180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12181 "00000000" // /* MW 1 */ + 12182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12183 "00000000" // /* MW 1 */ + 12184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12185 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 12186 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12187 "00001000" // /* MW 3 */ + 12188 "01010001" // /* MW 2 */ + 12189 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 40 first + 12190 "10011000" // LDA r17, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12191 "00110110" // /* MW 3 */ + 12192 "11110110" // /* MW 2 */ + 12193 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 + 12194 "00011000" // LDA p2, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12195 "00011001" // /* MW 3 */ + 12196 "11101101" // /* MW 2 */ + 12197 "00000111" // /* MW 1 */ + 12198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12199 "00000000" // /* MW 1 */ + 12200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12201 "00000000" // /* MW 1 */ + 12202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12203 "00000000" // /* MW 1 */ + 12204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12205 "00000000" // /* MW 1 */ + 12206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12207 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 + 12208 "10011000" // SUB r17, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12209 "00010001" // /* MW 3 */ + 12210 "00100011" // /* MW 2 */ + 12211 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 first +.src_ref 1 "io_buffer_main.h" 327 28 + 12212 "00001100" // LDA r17, [p2, #20]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12213 "01100011" // /* MW 5 */ + 12214 "11101100" // /* MW 4 */ + 12215 "11010011" // /* MW 3 */ + 12216 "11000110" // /* MW 2 */ + 12217 "01001010" // /* MW 1 */ + 12218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12219 "00000000" // /* MW 1 */ + 12220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12221 "00000000" // /* MW 1 */ + 12222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12223 "00000000" // /* MW 1 */ + 12224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12225 "00000000" // /* MW 1 */ + 12226 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12227 "00000000" // /* MW 1 */ + 12228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12229 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 12230 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12231 "00001000" // /* MW 3 */ + 12232 "01010001" // /* MW 2 */ + 12233 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 6 +.src_ref 7 "superkernels.cpp" 398 14 +.src_ref 1 "io_buffer_main.h" 327 40 first + 12234 "10111010" // LDA r19, [p7, #-8]; MOVXM p1, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12235 "00010000" // /* MW 9 */ + 12236 "00100000" // /* MW 8 */ + 12237 "10110010" // /* MW 7 */ + 12238 "11110000" // /* MW 6 */ + 12239 "00000001" // /* MW 5 */ + 12240 "00000000" // /* MW 4 */ + 12241 "11010000" // /* MW 3 */ + 12242 "11001110" // /* MW 2 */ + 12243 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 19 first + 12244 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12245 "01010110" // /* MW 3 */ + 12246 "00000110" // /* MW 2 */ + 12247 "00000110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 6 + 12248 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12249 "00110110" // /* MW 3 */ + 12250 "00000110" // /* MW 2 */ + 12251 "00000001" // /* MW 1 */ + 12252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12253 "00000000" // /* MW 1 */ + 12254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12255 "00000000" // /* MW 1 */ + 12256 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12257 "00000000" // /* MW 1 */ + 12258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12259 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 12260 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12261 "00110001" // /* MW 3 */ + 12262 "00100001" // /* MW 2 */ + 12263 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 12264 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12265 "00010001" // /* MW 3 */ + 12266 "11100110" // /* MW 2 */ + 12267 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 16 first + 12268 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12269 "00101000" // /* MW 3 */ + 12270 "01100001" // /* MW 2 */ + 12271 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 6 + 12272 "10000100" // JNZ r16, #12304 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12304 delay_slots=5 */ + 12273 "00000001" // /* MW 5 */ + 12274 "01000000" // /* MW 4 */ + 12275 "00001000" // /* MW 3 */ + 12276 "00011000" // /* MW 2 */ + 12277 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12279 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12281 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12283 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12285 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12287 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 398 14 + 12288 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12289 "00000001" // /* MW 3 */ + 12290 "00100000" // /* MW 2 */ + 12291 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 398 14 first + 12292 "00110110" // NOPA; NOPB; ST r16, [p1]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12293 "11000001" // /* MW 11 */ + 12294 "00001000" // /* MW 10 */ + 12295 "10000011" // /* MW 9 */ + 12296 "00000000" // /* MW 8 */ + 12297 "00000000" // /* MW 7 */ + 12298 "00000000" // /* MW 6 */ + 12299 "00100000" // /* MW 5 */ + 12300 "00000000" // /* MW 4 */ + 12301 "11110000" // /* MW 3 */ + 12302 "00101100" // /* MW 2 */ + 12303 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 +.src_ref 7 "superkernels.cpp" 400 + 12304 "00011000" // LDA lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12305 "00111001" // /* MW 3 */ + 12306 "11110000" // /* MW 2 */ + 12307 "00000111" // /* MW 1 */ + 12308 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12309 "11110001" // /* MW 3 */ + 12310 "11111101" // /* MW 2 */ + 12311 "00000111" // /* MW 1 */ + 12312 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12313 "10011001" // /* MW 3 */ + 12314 "11110111" // /* MW 2 */ + 12315 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 12316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12317 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.noswbrkpt + 12318 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12319 "11010001" // /* MW 3 */ + 12320 "11111001" // /* MW 2 */ + 12321 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 12322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12323 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 12324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12325 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 400 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 12326 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12327 "00000000" // /* MW 3 */ + 12328 "00101000" // /* MW 2 */ + 12329 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12330 "00011000" // MOVS p6, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12331 "00001011" // /* MW 3 */ + 12332 "10001110" // /* MW 2 */ + 12333 "00001110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 400 +.delay_slot + 12334 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12335 "00000001" // /* MW 5 */ + 12336 "00000000" // /* MW 4 */ + 12337 "00000000" // /* MW 3 */ + 12338 "11111000" // /* MW 2 */ + 12339 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12341 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12343 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 + 12345 "00000000" // /* MW 1 */ +.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh +.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_begin0 +.function setup_conv2d_dw_params_bf16 _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh +.src_ref 2 "conv2d_dw_bf16_params.h" 211 first +.src_ref 2 "conv2d_dw_bf16_params.h" 215 15 +.src_ref 2 "conv2d_dw_bf16_params.h" 215 17 first +.function_start + 12352 "10111010" // LDA el0, [p0], #4; MOVXM p1, #509888 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12353 "00010000" // /* MW 9 */ + 12354 "11100000" // /* MW 8 */ + 12355 "10110011" // /* MW 7 */ + 12356 "11110000" // /* MW 6 */ + 12357 "00000001" // /* MW 5 */ + 12358 "00000000" // /* MW 4 */ + 12359 "11010000" // /* MW 3 */ + 12360 "10000101" // /* MW 2 */ + 12361 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 17 first +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.src_ref 2 "conv2d_dw_bf16_params.h" 218 80 + 12362 "10111010" // LDA eh0, [p0], #4; MOVX r16, #2; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12363 "01011000" // /* MW 9 */ + 12364 "00000000" // /* MW 8 */ + 12365 "00001000" // /* MW 7 */ + 12366 "01001011" // /* MW 6 */ + 12367 "00000000" // /* MW 5 */ + 12368 "00000001" // /* MW 4 */ + 12369 "11010000" // /* MW 3 */ + 12370 "10000001" // /* MW 2 */ + 12371 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 211 + 12372 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12373 "00000001" // /* MW 5 */ + 12374 "00000000" // /* MW 4 */ + 12375 "00000000" // /* MW 3 */ + 12376 "00001000" // /* MW 2 */ + 12377 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 32 + 12378 "00111010" // ST p7, [sp, #-12]; MOVXM p7, #509888 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12379 "00010001" // /* MW 9 */ + 12380 "11100000" // /* MW 8 */ + 12381 "10110011" // /* MW 7 */ + 12382 "11110011" // /* MW 6 */ + 12383 "00000001" // /* MW 5 */ + 12384 "00000000" // /* MW 4 */ + 12385 "10110000" // /* MW 3 */ + 12386 "11110011" // /* MW 2 */ + 12387 "11111110" // /* MW 1 */ + 12388 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12389 "00111101" // /* MW 3 */ + 12390 "11111100" // /* MW 2 */ + 12391 "00001111" // /* MW 1 */ + 12392 "10011000" // ST r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12393 "11110101" // /* MW 3 */ + 12394 "11111001" // /* MW 2 */ + 12395 "00001111" // /* MW 1 */ + 12396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12397 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 15 + 12398 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12399 "00101001" // /* MW 3 */ + 12400 "00011100" // /* MW 2 */ + 12401 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 15 + 12402 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12403 "00001001" // /* MW 3 */ + 12404 "00011100" // /* MW 2 */ + 12405 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 17 + 12406 "10011000" // LDA el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12407 "00101110" // /* MW 3 */ + 12408 "00000100" // /* MW 2 */ + 12409 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 17 + 12410 "10011000" // LDA eh0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12411 "00001110" // /* MW 3 */ + 12412 "00010100" // /* MW 2 */ + 12413 "00000000" // /* MW 1 */ + 12414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12415 "00000000" // /* MW 1 */ + 12416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12417 "00000000" // /* MW 1 */ + 12418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12419 "00000000" // /* MW 1 */ + 12420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12421 "00000000" // /* MW 1 */ + 12422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12423 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 15 + 12424 "10011000" // ST el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12425 "00101001" // /* MW 3 */ + 12426 "00000100" // /* MW 2 */ + 12427 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 15 + 12428 "10011000" // ST eh0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12429 "00001001" // /* MW 3 */ + 12430 "00010100" // /* MW 2 */ + 12431 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 32 first + 12432 "10011000" // LDA.u8 r17, [p7], #5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12433 "00101010" // /* MW 3 */ + 12434 "01011110" // /* MW 2 */ + 12435 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 52 + 12436 "10011000" // LDA.u8 r18, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12437 "01001010" // /* MW 3 */ + 12438 "11101110" // /* MW 2 */ + 12439 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 80 + 12440 "10011000" // LDA.u8 r1, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12441 "00101010" // /* MW 3 */ + 12442 "11101100" // /* MW 2 */ + 12443 "00000111" // /* MW 1 */ + 12444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12445 "00000000" // /* MW 1 */ + 12446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12447 "00000000" // /* MW 1 */ + 12448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12449 "00000000" // /* MW 1 */ + 12450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12451 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.no_stack_arguments + 12452 "00000100" // JL #15664 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=15664 delay_slots=5 */ + 12453 "00000001" // /* MW 5 */ + 12454 "00000000" // /* MW 4 */ + 12455 "10011000" // /* MW 3 */ + 12456 "00011110" // /* MW 2 */ + 12457 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 38 +.delay_slot + 12458 "01011100" // ST r18, [sp, #-28]; SUB r15, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12459 "01000011" // /* MW 5 */ + 12460 "10111110" // /* MW 4 */ + 12461 "10111000" // /* MW 3 */ + 12462 "11001010" // /* MW 2 */ + 12463 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 80 +.delay_slot + 12464 "01011100" // ST r1, [sp, #-20]; NE r16, r1, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12465 "00010001" // /* MW 5 */ + 12466 "11000010" // /* MW 4 */ + 12467 "10110000" // /* MW 3 */ + 12468 "10000110" // /* MW 2 */ + 12469 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.delay_slot + 12470 "01011100" // ST r16, [sp, #-16]; LT r27, r15, r24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12471 "00010101" // /* MW 5 */ + 12472 "11101111" // /* MW 4 */ + 12473 "10110111" // /* MW 3 */ + 12474 "01000010" // /* MW 2 */ + 12475 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.delay_slot + 12476 "10011000" // SUB r17, r24, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12477 "11110001" // /* MW 3 */ + 12478 "00100010" // /* MW 2 */ + 12479 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.delay_slot + 12480 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r0, r15, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12481 "00000000" // /* MW 15 */ + 12482 "00000000" // /* MW 14 */ + 12483 "01111000" // /* MW 13 */ + 12484 "10100101" // /* MW 12 */ + 12485 "00000001" // /* MW 11 */ + 12486 "10010000" // /* MW 10 */ + 12487 "00001000" // /* MW 9 */ + 12488 "00011110" // /* MW 8 */ + 12489 "01011011" // /* MW 7 */ + 12490 "00000001" // /* MW 6 */ + 12491 "00100000" // /* MW 5 */ + 12492 "00000000" // /* MW 4 */ + 12493 "11110000" // /* MW 3 */ + 12494 "00101100" // /* MW 2 */ + 12495 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.return_address + 12496 "00101100" // LDA r20, [sp, #-20]; MOVX r16, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12497 "00000010" // /* MW 5 */ + 12498 "01000000" // /* MW 4 */ + 12499 "00100000" // /* MW 3 */ + 12500 "11010010" // /* MW 2 */ + 12501 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.src_ref 2 "conv2d_dw_bf16_params.h" 219 32 first + 12502 "00101100" // LDA.u8 r17, [p7], #3; SUB r18, r16, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12503 "01000011" // /* MW 5 */ + 12504 "01001000" // /* MW 4 */ + 12505 "01011000" // /* MW 3 */ + 12506 "11000101" // /* MW 2 */ + 12507 "11100111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 52 + 12508 "10011000" // LDA.u8 r19, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12509 "01101010" // /* MW 3 */ + 12510 "11101110" // /* MW 2 */ + 12511 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 + 12512 "00011000" // LDA r1, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12513 "00110001" // /* MW 3 */ + 12514 "11101100" // /* MW 2 */ + 12515 "00000111" // /* MW 1 */ + 12516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12517 "00000000" // /* MW 1 */ + 12518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12519 "00000000" // /* MW 1 */ + 12520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12521 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 first + 12522 "10011000" // XOR r20, r15, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12523 "01000110" // /* MW 3 */ + 12524 "11101001" // /* MW 2 */ + 12525 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 + 12526 "10011000" // LT r27, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12527 "00001010" // /* MW 3 */ + 12528 "00110111" // /* MW 2 */ + 12529 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 38 first + 12530 "01011100" // ST r19, [sp, #-24]; SUB r17, r17, r19 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12531 "01100011" // /* MW 5 */ + 12532 "11000110" // /* MW 4 */ + 12533 "10111000" // /* MW 3 */ + 12534 "01001110" // /* MW 2 */ + 12535 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.no_stack_arguments + 12536 "00111010" // ST r17, [sp, #-32]; JL #15664 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=15664 delay_slots=5 */ + 12537 "01000001" // /* MW 9 */ + 12538 "00000000" // /* MW 8 */ + 12539 "00000000" // /* MW 7 */ + 12540 "10100110" // /* MW 6 */ + 12541 "00000111" // /* MW 5 */ + 12542 "00000000" // /* MW 4 */ + 12543 "10110000" // /* MW 3 */ + 12544 "01000110" // /* MW 2 */ + 12545 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 first +.delay_slot + 12546 "00011000" // SEL.EQZ r20, r2, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12547 "00100010" // /* MW 3 */ + 12548 "10101001" // /* MW 2 */ + 12549 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first +.delay_slot + 12550 "10011000" // LT r27, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12551 "00001010" // /* MW 3 */ + 12552 "01110111" // /* MW 2 */ + 12553 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.delay_slot + 12554 "10011000" // SUB r18, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12555 "00010001" // /* MW 3 */ + 12556 "00100101" // /* MW 2 */ + 12557 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 first +.delay_slot + 12558 "00011000" // EXTEND.s16 r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12559 "01110000" // /* MW 3 */ + 12560 "00100110" // /* MW 2 */ + 12561 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 87 +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first +.delay_slot + 12562 "01111110" // NOPA; NOPB; NOPS; SEL.EQZ r0, r17, r18, r27; ADD.NC r15, r19, #1 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 12563 "01100000" // /* MW 13 */ + 12564 "00101011" // /* MW 12 */ + 12565 "00000000" // /* MW 11 */ + 12566 "00001001" // /* MW 10 */ + 12567 "10011000" // /* MW 9 */ + 12568 "00111101" // /* MW 8 */ + 12569 "00100010" // /* MW 7 */ + 12570 "01000001" // /* MW 6 */ + 12571 "00100100" // /* MW 5 */ + 12572 "00000000" // /* MW 4 */ + 12573 "11110000" // /* MW 3 */ + 12574 "00101100" // /* MW 2 */ + 12575 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.src_ref 2 "conv2d_dw_bf16_params.h" 226 50 +.src_ref 2 "conv2d_dw_bf16_params.h" 231 68 +.return_address + 12576 "10111010" // LDA r3, [sp, #-32]; MOVX r19, #-2; MOV m0, #66 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12577 "01011000" // /* MW 9 */ + 12578 "01000010" // /* MW 8 */ + 12579 "00000000" // /* MW 7 */ + 12580 "11001000" // /* MW 6 */ + 12581 "00110111" // /* MW 5 */ + 12582 "00111111" // /* MW 4 */ + 12583 "00100000" // /* MW 3 */ + 12584 "00001110" // /* MW 2 */ + 12585 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.src_ref 2 "conv2d_dw_bf16_params.h" 220 23 +.src_ref 2 "conv2d_dw_bf16_params.h" 220 84 +.src_ref 2 "conv2d_dw_bf16_params.h" 232 45 +.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 +.src_ref 2 "conv2d_dw_bf16_params.h" 234 64 +.src_ref 2 "conv2d_dw_bf16_params.h" 243 100 +.src_ref 2 "conv2d_dw_bf16_params.h" 250 53 +.src_ref 2 "conv2d_dw_bf16_params.h" 253 63 +.src_ref 2 "conv2d_dw_bf16_params.h" 260 49 +.src_ref 2 "conv2d_dw_bf16_params.h" 264 47 + 12586 "10111010" // LDA r16, [sp, #-20]; MOVX r24, #0; MOV r1, #508 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12587 "01011000" // /* MW 9 */ + 12588 "11111100" // /* MW 8 */ + 12589 "00101001" // /* MW 7 */ + 12590 "00001000" // /* MW 6 */ + 12591 "10000000" // /* MW 5 */ + 12592 "00000001" // /* MW 4 */ + 12593 "00100000" // /* MW 3 */ + 12594 "11000010" // /* MW 2 */ + 12595 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 220 74 +.src_ref 2 "conv2d_dw_bf16_params.h" 234 64 +.src_ref 2 "conv2d_dw_bf16_params.h" 246 52 +.src_ref 2 "conv2d_dw_bf16_params.h" 253 53 + 12596 "10111010" // LDA r22, [sp, #-28]; MOVX r6, #4; MOV r4, #2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12597 "01011000" // /* MW 9 */ + 12598 "00000010" // /* MW 8 */ + 12599 "10001000" // /* MW 7 */ + 12600 "10001000" // /* MW 6 */ + 12601 "01100000" // /* MW 5 */ + 12602 "00000000" // /* MW 4 */ + 12603 "00100000" // /* MW 3 */ + 12604 "11011010" // /* MW 2 */ + 12605 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 226 50 first +.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 +.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 + 12606 "10111010" // LDA.u8 r17, [p7], m0; MOVX r5, #8; MOV r28, #23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12607 "01011000" // /* MW 9 */ + 12608 "00010111" // /* MW 8 */ + 12609 "10001000" // /* MW 7 */ + 12610 "00001011" // /* MW 6 */ + 12611 "01010001" // /* MW 5 */ + 12612 "00000000" // /* MW 4 */ + 12613 "01010000" // /* MW 3 */ + 12614 "01000101" // /* MW 2 */ + 12615 "11100001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 231 78 +.src_ref 2 "conv2d_dw_bf16_params.h" 232 39 +.src_ref 2 "conv2d_dw_bf16_params.h" 232 76 + 12616 "10111010" // LDA r21, [sp, #-24]; MOVX r18, #-6; MOV m1, #32 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12617 "01011000" // /* MW 9 */ + 12618 "00100000" // /* MW 8 */ + 12619 "10000000" // /* MW 7 */ + 12620 "01001000" // /* MW 6 */ + 12621 "00100111" // /* MW 5 */ + 12622 "00111111" // /* MW 4 */ + 12623 "00100000" // /* MW 3 */ + 12624 "01010110" // /* MW 2 */ + 12625 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 235 50 +.src_ref 2 "conv2d_dw_bf16_params.h" 235 50 +.src_ref 2 "conv2d_dw_bf16_params.h" 242 16 +.src_ref 2 "conv2d_dw_bf16_params.h" 242 63 +.src_ref 2 "conv2d_dw_bf16_params.h" 243 93 +.src_ref 2 "conv2d_dw_bf16_params.h" 250 53 +.src_ref 2 "conv2d_dw_bf16_params.h" 255 73 +.src_ref 2 "conv2d_dw_bf16_params.h" 260 49 +.src_ref 2 "conv2d_dw_bf16_params.h" 264 47 + 12626 "10111010" // LDA r30, [sp, #-16]; MOVX r23, #6; MOV r26, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12627 "01011000" // /* MW 9 */ + 12628 "00000001" // /* MW 8 */ + 12629 "01001000" // /* MW 7 */ + 12630 "11001011" // /* MW 6 */ + 12631 "01110000" // /* MW 5 */ + 12632 "00000001" // /* MW 4 */ + 12633 "00100000" // /* MW 3 */ + 12634 "01111010" // /* MW 2 */ + 12635 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 +.src_ref 2 "conv2d_dw_bf16_params.h" 239 42 +.src_ref 2 "conv2d_dw_bf16_params.h" 242 71 +.src_ref 2 "conv2d_dw_bf16_params.h" 248 72 +.src_ref 2 "conv2d_dw_bf16_params.h" 253 63 +.src_ref 2 "conv2d_dw_bf16_params.h" 264 41 + 12636 "10111010" // MOVA m0, #-178; MOVX r29, #128; MOV r31, #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12637 "01011000" // /* MW 9 */ + 12638 "11000000" // /* MW 8 */ + 12639 "11101111" // /* MW 7 */ + 12640 "00001011" // /* MW 6 */ + 12641 "11010000" // /* MW 5 */ + 12642 "00000101" // /* MW 4 */ + 12643 "10000000" // /* MW 3 */ + 12644 "11000000" // /* MW 2 */ + 12645 "11101001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first + 12646 "10011000" // SUB r20, r24, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12647 "00100001" // /* MW 3 */ + 12648 "00101000" // /* MW 2 */ + 12649 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 + 12650 "10011000" // XOR r3, r3, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12651 "00000110" // /* MW 3 */ + 12652 "11000111" // /* MW 2 */ + 12653 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.src_ref 2 "conv2d_dw_bf16_params.h" 220 74 + 12654 "00100100" // LT r27, r3, r24; ADD.NC r0, r22, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12655 "00000010" // /* MW 5 */ + 12656 "00110110" // /* MW 4 */ + 12657 "01010000" // /* MW 3 */ + 12658 "11110001" // /* MW 2 */ + 12659 "00011110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.src_ref 2 "conv2d_dw_bf16_params.h" 243 42 +.src_ref 2 "conv2d_dw_bf16_params.h" 247 69 + 12660 "01100100" // SEL.EQZ r20, r2, r20, r27; MOV r22, #-3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12661 "11110101" // /* MW 5 */ + 12662 "00111111" // /* MW 4 */ + 12663 "01001011" // /* MW 3 */ + 12664 "00101000" // /* MW 2 */ + 12665 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 220 23 first +.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 + 12666 "01100100" // MUL r3, r15, r16; MOV r2, #7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12667 "00011101" // /* MW 5 */ + 12668 "00100000" // /* MW 4 */ + 12669 "11110001" // /* MW 3 */ + 12670 "11100001" // /* MW 2 */ + 12671 "01111000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first + 12672 "00011000" // EXTEND.s16 r20, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12673 "01110000" // /* MW 3 */ + 12674 "00101000" // /* MW 2 */ + 12675 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 220 84 first +.src_ref 2 "conv2d_dw_bf16_params.h" 231 68 + 12676 "00100100" // AND r0, r1, r0; ADD.NC r1, r0, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12677 "00000001" // /* MW 5 */ + 12678 "10100000" // /* MW 4 */ + 12679 "10010000" // /* MW 3 */ + 12680 "00000000" // /* MW 2 */ + 12681 "00001000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 88 first +.src_ref 2 "conv2d_dw_bf16_params.h" 231 68 first + 12682 "00100100" // LSHL r19, r1, r19; ADD.NC r27, r20, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12683 "00000001" // /* MW 5 */ + 12684 "10110100" // /* MW 4 */ + 12685 "10111101" // /* MW 3 */ + 12686 "11100111" // /* MW 2 */ + 12687 "00001100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 220 44 first +.src_ref 2 "conv2d_dw_bf16_params.h" 253 53 first + 12688 "10100100" // LSHL r20, r15, r6; ADD.NC r1, r3, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12689 "00000010" // /* MW 5 */ + 12690 "10100011" // /* MW 4 */ + 12691 "10110000" // /* MW 3 */ + 12692 "00001101" // /* MW 2 */ + 12693 "01111101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 240 70 +.src_ref 2 "conv2d_dw_bf16_params.h" 246 52 first + 12694 "00100100" // LSHL r7, r1, r6; ADD.NC r0, r21, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12695 "11111111" // /* MW 5 */ + 12696 "00110101" // /* MW 4 */ + 12697 "10110000" // /* MW 3 */ + 12698 "11001101" // /* MW 2 */ + 12699 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 232 45 first + 12700 "10011000" // MUL r6, r27, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12701 "00001111" // /* MW 3 */ + 12702 "11001101" // /* MW 2 */ + 12703 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 226 22 first + 12704 "10011000" // MUL r15, r15, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12705 "00011111" // /* MW 3 */ + 12706 "11011111" // /* MW 2 */ + 12707 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 231 78 first +.src_ref 2 "conv2d_dw_bf16_params.h" 238 79 + 12708 "00100100" // MUL r21, r19, r21; ADD.NC r19, r19, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12709 "11111111" // /* MW 5 */ + 12710 "10110011" // /* MW 4 */ + 12711 "11111001" // /* MW 3 */ + 12712 "01101011" // /* MW 2 */ + 12713 "10011101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 234 64 first + 12714 "10011000" // EQ r27, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12715 "00000111" // /* MW 3 */ + 12716 "00110111" // /* MW 2 */ + 12717 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 231 39 first +.src_ref 2 "conv2d_dw_bf16_params.h" 232 55 first + 12718 "01011100" // ST r21, [p7], #-4; MUL r4, r15, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12719 "11011111" // /* MW 5 */ + 12720 "10010000" // /* MW 4 */ + 12721 "00110111" // /* MW 3 */ + 12722 "11010110" // /* MW 2 */ + 12723 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 first + 12724 "00011000" // SEL.EQZ r28, r28, r5, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12725 "01010010" // /* MW 3 */ + 12726 "00111000" // /* MW 2 */ + 12727 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 232 76 first + 12728 "10011000" // LSHL r18, r4, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12729 "00101101" // /* MW 3 */ + 12730 "00100101" // /* MW 2 */ + 12731 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 227 22 first +.src_ref 2 "conv2d_dw_bf16_params.h" 232 39 + 12732 "01011100" // ST r18, [p7], m1; MUL r18, r17, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12733 "00111111" // /* MW 5 */ + 12734 "11001000" // /* MW 4 */ + 12735 "00111000" // /* MW 3 */ + 12736 "01001010" // /* MW 2 */ + 12737 "11100101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 234 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 235 50 first + 12738 "01011100" // ST r28, [p7], #-16; LSHL r28, r30, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12739 "11111011" // /* MW 5 */ + 12740 "01110010" // /* MW 4 */ + 12741 "00111111" // /* MW 3 */ + 12742 "11110010" // /* MW 2 */ + 12743 "11111001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 235 47 +.src_ref 2 "conv2d_dw_bf16_params.h" 243 53 first + 12744 "01011100" // ST r28, [p7], #24; MUL r28, r18, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12745 "00011111" // /* MW 5 */ + 12746 "01110000" // /* MW 4 */ + 12747 "00111001" // /* MW 3 */ + 12748 "11110010" // /* MW 2 */ + 12749 "11101101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 238 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 242 63 first + 12750 "01011100" // ST r19, [p7], #4; LSHL r19, r19, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12751 "11111011" // /* MW 5 */ + 12752 "11001110" // /* MW 4 */ + 12753 "00111001" // /* MW 3 */ + 12754 "11001110" // /* MW 2 */ + 12755 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 242 71 +.src_ref 2 "conv2d_dw_bf16_params.h" 243 93 first + 12756 "10100100" // LSHL r28, r28, r26; ADD.NC r19, r19, r29 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12757 "11101010" // /* MW 5 */ + 12758 "10110011" // /* MW 4 */ + 12759 "10111001" // /* MW 3 */ + 12760 "00110101" // /* MW 2 */ + 12761 "11100111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 239 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 242 16 first + 12762 "01011100" // ST r31, [p7], #4; LSHL r30, r18, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12763 "01011011" // /* MW 5 */ + 12764 "01111011" // /* MW 4 */ + 12765 "00111001" // /* MW 3 */ + 12766 "11111110" // /* MW 2 */ + 12767 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 243 100 first +.src_ref 2 "conv2d_dw_bf16_params.h" 250 53 first + 12768 "10100100" // MUL r16, r18, r16; ADD.NC r18, r19, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12769 "11100010" // /* MW 5 */ + 12770 "00110011" // /* MW 4 */ + 12771 "11111001" // /* MW 3 */ + 12772 "00100001" // /* MW 2 */ + 12773 "10010100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 first +.src_ref 2 "conv2d_dw_bf16_params.h" 240 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 253 63 first + 12774 "01011100" // ST r0, [p7], #4; SEL.EQZ r28, r31, r24, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12775 "00000100" // /* MW 5 */ + 12776 "11110011" // /* MW 4 */ + 12777 "00111111" // /* MW 3 */ + 12778 "10000010" // /* MW 2 */ + 12779 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 247 69 first + 12780 "10011000" // LSHL r31, r3, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12781 "01101101" // /* MW 3 */ + 12782 "11111111" // /* MW 2 */ + 12783 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 242 23 first +.src_ref 2 "conv2d_dw_bf16_params.h" 247 73 + 12784 "00100100" // SUB r1, r30, r19; ADD.NC r19, r31, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12785 "11111111" // /* MW 5 */ + 12786 "10111111" // /* MW 4 */ + 12787 "00111001" // /* MW 3 */ + 12788 "01100110" // /* MW 2 */ + 12789 "11110000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 241 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 243 42 first + 12790 "01011100" // ST r1, [p7], #4; LSHL r17, r17, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12791 "11011011" // /* MW 5 */ + 12792 "11000110" // /* MW 4 */ + 12793 "00111000" // /* MW 3 */ + 12794 "10000110" // /* MW 2 */ + 12795 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 243 100 +.src_ref 2 "conv2d_dw_bf16_params.h" 245 77 first + 12796 "00100100" // SUB r22, r24, r18; ADD.NC r18, r17, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12797 "11111111" // /* MW 5 */ + 12798 "00110001" // /* MW 4 */ + 12799 "00111001" // /* MW 3 */ + 12800 "10100100" // /* MW 2 */ + 12801 "11000101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 243 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 248 72 first + 12802 "01011100" // ST r22, [p7], #4; SUB r22, r7, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12803 "11000011" // /* MW 5 */ + 12804 "11011011" // /* MW 4 */ + 12805 "00110011" // /* MW 3 */ + 12806 "11011010" // /* MW 2 */ + 12807 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 245 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 250 53 first + 12808 "01011100" // ST r18, [p7], #4; LSHL r16, r16, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12809 "01011011" // /* MW 5 */ + 12810 "01000011" // /* MW 4 */ + 12811 "00111000" // /* MW 3 */ + 12812 "11001010" // /* MW 2 */ + 12813 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 246 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 + 12814 "01011100" // ST r7, [p7], #4; LSHL r31, r19, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12815 "01011011" // /* MW 5 */ + 12816 "11111100" // /* MW 4 */ + 12817 "00111001" // /* MW 3 */ + 12818 "10011110" // /* MW 2 */ + 12819 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 247 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 248 72 first + 12820 "01011100" // ST r19, [p7], #4; ADD r22, r29, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12821 "11000001" // /* MW 5 */ + 12822 "11011010" // /* MW 4 */ + 12823 "00111110" // /* MW 3 */ + 12824 "11001110" // /* MW 2 */ + 12825 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 first +.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 first + 12826 "10100100" // ADD r16, r7, r16; ADD.NC r29, r31, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12827 "11110010" // /* MW 5 */ + 12828 "10111111" // /* MW 4 */ + 12829 "00011110" // /* MW 3 */ + 12830 "00100000" // /* MW 2 */ + 12831 "00111100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 248 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 + 12832 "01011100" // ST r22, [p7], #4; SUB r16, r16, r29 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12833 "10100011" // /* MW 5 */ + 12834 "01000011" // /* MW 4 */ + 12835 "00111000" // /* MW 3 */ + 12836 "11011010" // /* MW 2 */ + 12837 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 249 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 255 73 first +.src_ref 2 "conv2d_dw_bf16_params.h" 258 116 +.src_ref 2 "conv2d_dw_bf16_params.h" 258 140 + 12838 "00111010" // ST r16, [p7], #4; LSHL r22, r15, r26; MOV r16, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12839 "01011001" // /* MW 9 */ + 12840 "11111111" // /* MW 8 */ + 12841 "00001111" // /* MW 7 */ + 12842 "01101110" // /* MW 6 */ + 12843 "01101101" // /* MW 5 */ + 12844 "00011111" // /* MW 4 */ + 12845 "00110000" // /* MW 3 */ + 12846 "11000010" // /* MW 2 */ + 12847 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 252 43 first +.src_ref 2 "conv2d_dw_bf16_params.h" 253 60 first + 12848 "01011100" // ST r18, [p7], #4; ADD r26, r28, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12849 "10000001" // /* MW 5 */ + 12850 "01101010" // /* MW 4 */ + 12851 "00111110" // /* MW 3 */ + 12852 "11001010" // /* MW 2 */ + 12853 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 253 43 +.src_ref 2 "conv2d_dw_bf16_params.h" 255 73 first + 12854 "01011100" // ST r26, [p7], #4; SUB r20, r20, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12855 "11000011" // /* MW 5 */ + 12856 "01010010" // /* MW 4 */ + 12857 "00111010" // /* MW 3 */ + 12858 "11101010" // /* MW 2 */ + 12859 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 254 43 first +.src_ref 2 "conv2d_dw_bf16_params.h" 255 73 +.src_ref 2 "conv2d_dw_bf16_params.h" 256 43 +.src_ref 2 "conv2d_dw_bf16_params.h" 258 116 first +.src_ref 2 "conv2d_dw_bf16_params.h" 258 140 first +.src_ref 2 "conv2d_dw_bf16_params.h" 259 43 +.src_ref 2 "conv2d_dw_bf16_params.h" 263 41 + 12860 "01110110" // MOVA r17, #64; ST r19, [p7], #4; MAC r16, r16, r21, r17; ADD.NC r19, r20, #64 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12861 "00001000" // /* MW 11 */ + 12862 "00010000" // /* MW 10 */ + 12863 "01101101" // /* MW 9 */ + 12864 "10110010" // /* MW 8 */ + 12865 "00001000" // /* MW 7 */ + 12866 "10101011" // /* MW 6 */ + 12867 "01110001" // /* MW 5 */ + 12868 "00011110" // /* MW 4 */ + 12869 "00000111" // /* MW 3 */ + 12870 "00010001" // /* MW 2 */ + 12871 "00001000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 255 43 first + 12872 "10011000" // ST r19, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12873 "01110001" // /* MW 3 */ + 12874 "00011110" // /* MW 2 */ + 12875 "00001111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 256 43 first +.src_ref 2 "conv2d_dw_bf16_params.h" 260 49 first + 12876 "01011100" // ST r17, [p7], #4; LSHL r20, r16, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12877 "11111011" // /* MW 5 */ + 12878 "01010010" // /* MW 4 */ + 12879 "00111000" // /* MW 3 */ + 12880 "11000110" // /* MW 2 */ + 12881 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 258 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 260 49 + 12882 "01011100" // ST r16, [p7], #4; SUB r16, r24, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12883 "10000011" // /* MW 5 */ + 12884 "01000010" // /* MW 4 */ + 12885 "00111100" // /* MW 3 */ + 12886 "11000010" // /* MW 2 */ + 12887 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 259 43 first +.src_ref 2 "conv2d_dw_bf16_params.h" 264 47 first + 12888 "01011100" // ST r17, [p7], #4; LSHL r20, r18, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12889 "11111011" // /* MW 5 */ + 12890 "01010010" // /* MW 4 */ + 12891 "00111001" // /* MW 3 */ + 12892 "11000110" // /* MW 2 */ + 12893 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 260 43 first +.src_ref 2 "conv2d_dw_bf16_params.h" 264 47 + 12894 "01011100" // ST r16, [p7], #4; SUB r16, r24, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12895 "10000011" // /* MW 5 */ + 12896 "01000010" // /* MW 4 */ + 12897 "00111100" // /* MW 3 */ + 12898 "11000010" // /* MW 2 */ + 12899 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 262 40 first + 12900 "10011000" // ST r18, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12901 "01010001" // /* MW 3 */ + 12902 "00011110" // /* MW 2 */ + 12903 "00001111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 263 41 first + 12904 "10011000" // ST r17, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12905 "00110001" // /* MW 3 */ + 12906 "00011110" // /* MW 2 */ + 12907 "00001111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 264 41 first + 12908 "10011000" // ST r16, [p7], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12909 "00010001" // /* MW 3 */ + 12910 "00001010" // /* MW 2 */ + 12911 "00001111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 54 first + 12912 "10011000" // LDA.u8 r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12913 "00001010" // /* MW 3 */ + 12914 "00000110" // /* MW 2 */ + 12915 "00000111" // /* MW 1 */ + 12916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12917 "00000000" // /* MW 1 */ + 12918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12919 "00000000" // /* MW 1 */ + 12920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12921 "00000000" // /* MW 1 */ + 12922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12923 "00000000" // /* MW 1 */ + 12924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12925 "00000000" // /* MW 1 */ + 12926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12927 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.src_ref 2 "conv2d_dw_bf16_params.h" 266 58 + 12928 "10000100" // JZ r16, #12960 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12960 delay_slots=5 */ + 12929 "00000001" // /* MW 5 */ + 12930 "00000000" // /* MW 4 */ + 12931 "01010000" // /* MW 3 */ + 12932 "00011001" // /* MW 2 */ + 12933 "10000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.delay_slot + 12934 "11111000" // MOV vaddSign0, crMCDEn /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12935 "01100000" // /* MW 3 */ + 12936 "00111011" // /* MW 2 */ + 12937 "00011001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.delay_slot + 12938 "01000100" // MOVXM r19, #-8454144 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12939 "00000000" // /* MW 5 */ + 12940 "10100000" // /* MW 4 */ + 12941 "00001001" // /* MW 3 */ + 12942 "01111111" // /* MW 2 */ + 12943 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12945 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12947 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12949 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 + 12950 "01111010" // NOPA; NOPS; MOVX r19, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12951 "00000001" // /* MW 9 */ + 12952 "00100110" // /* MW 8 */ + 12953 "00000000" // /* MW 7 */ + 12954 "00000000" // /* MW 6 */ + 12955 "01011011" // /* MW 5 */ + 12956 "00000001" // /* MW 4 */ + 12957 "11110000" // /* MW 3 */ + 12958 "00101100" // /* MW 2 */ + 12959 "00000000" // /* MW 1 */ +.label TGT_F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_608 +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.src_ref 2 "conv2d_dw_bf16_params.h" 267 + 12960 "10111010" // LDA lr, [sp, #-4]; MOVXM p0, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12961 "00010000" // /* MW 9 */ + 12962 "00110100" // /* MW 8 */ + 12963 "00110010" // /* MW 7 */ + 12964 "11110000" // /* MW 6 */ + 12965 "00000001" // /* MW 5 */ + 12966 "00000000" // /* MW 4 */ + 12967 "00100000" // /* MW 3 */ + 12968 "10000111" // /* MW 2 */ + 12969 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 + 12970 "11010100" // LDA.s8 r16, [p0]; VINSERT.32 x0, x0, #0, r19 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12971 "11100010" // /* MW 5 */ + 12972 "00000100" // /* MW 4 */ + 12973 "01010000" // /* MW 3 */ + 12974 "11000000" // /* MW 2 */ + 12975 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 39 + 12976 "01010100" // LDA p0, [sp, #-12]; MOV dj0, #186 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12977 "11101001" // /* MW 5 */ + 12978 "00000010" // /* MW 4 */ + 12979 "00100001" // /* MW 3 */ + 12980 "10000011" // /* MW 2 */ + 12981 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 + 12982 "11010100" // LDA r15, [sp, #-8]; VMOV bmll0, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12983 "00100101" // /* MW 5 */ + 12984 "00000001" // /* MW 4 */ + 12985 "00100000" // /* MW 3 */ + 12986 "00111110" // /* MW 2 */ + 12987 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 267 first + 12988 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12989 "00000001" // /* MW 5 */ + 12990 "00000000" // /* MW 4 */ + 12991 "00000000" // /* MW 3 */ + 12992 "11111000" // /* MW 2 */ + 12993 "11111111" // /* MW 1 */ + 12994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12995 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 12996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12997 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 39 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 12998 "00011000" // ST.s16 r16, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12999 "00010111" // /* MW 3 */ + 13000 "00000010" // /* MW 2 */ + 13001 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.src_ref 2 "conv2d_dw_bf16_params.h" 267 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13002 "11100100" // RET lr; MOV crRnd, r16 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 13003 "01000001" // /* MW 5 */ + 13004 "01110000" // /* MW 4 */ + 13005 "00001111" // /* MW 3 */ + 13006 "00000000" // /* MW 2 */ + 13007 "00000101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13008 "00011000" // VCONV.bf16.fp32 wl0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13009 "00010110" // /* MW 3 */ + 13010 "01000000" // /* MW 2 */ + 13011 "00001000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13012 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13013 "11000000" // /* MW 3 */ + 13014 "01100000" // /* MW 2 */ + 13015 "00011111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13016 "10111000" // VEXTRACT.16 r16, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13017 "00000001" // /* MW 3 */ + 13018 "00000001" // /* MW 2 */ + 13019 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13021 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh__end +.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_end0 + 13023 "00000000" // /* MW 1 */ +.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_begin0 +.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params +.function conv2d_dw<(unsigned char)'\x01', bfloat16, bfloat16, bfloat16, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::async, adf::addressing::linear, adf::margin<0U> > > _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params +.src_ref 1 "io_buffer_main.h" 125 12 +.src_ref 2 "conv2d_dw_bf16.h" 199 first +.function_start + 13024 "11111000" // MOV r17, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13025 "11000000" // /* MW 3 */ + 13026 "01010110" // /* MW 2 */ + 13027 "00011100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 2 "conv2d_dw_bf16.h" 204 82 + 13028 "01010100" // LDA p1, [p1]; MOV m7, #106 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13029 "10101001" // /* MW 5 */ + 13030 "00000001" // /* MW 4 */ + 13031 "11011110" // /* MW 3 */ + 13032 "10010011" // /* MW 2 */ + 13033 "00100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 12 +.src_ref 1 "io_buffer_main.h" 125 25 + 13034 "00010100" // LDA p0, [p0]; ADD.NC p3, r17, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13035 "00000010" // /* MW 5 */ + 13036 "11010001" // /* MW 4 */ + 13037 "11010110" // /* MW 3 */ + 13038 "10000011" // /* MW 2 */ + 13039 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 204 82 first + 13040 "10011000" // LDA.u8 r4, [p3], m7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13041 "10001010" // /* MW 3 */ + 13042 "11101000" // /* MW 2 */ + 13043 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 221 4 first + 13044 "10011000" // LDA dj2, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13045 "01000110" // /* MW 3 */ + 13046 "11111101" // /* MW 2 */ + 13047 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 221 4 + 13048 "10011000" // LDA dn2, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13049 "00100110" // /* MW 3 */ + 13050 "00111101" // /* MW 2 */ + 13051 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 221 4 + 13052 "10011000" // LDA dj6, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13053 "01000110" // /* MW 3 */ + 13054 "11111111" // /* MW 2 */ + 13055 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 221 4 + 13056 "10011000" // LDA dn6, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13057 "00100110" // /* MW 3 */ + 13058 "00101111" // /* MW 2 */ + 13059 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 221 4 + 13060 "10011000" // LDA m2, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13061 "00000110" // /* MW 3 */ + 13062 "00101101" // /* MW 2 */ + 13063 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 222 4 first + 13064 "10011000" // LDA dj0, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13065 "01000110" // /* MW 3 */ + 13066 "11111100" // /* MW 2 */ + 13067 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 222 4 + 13068 "10011000" // LDA dn0, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13069 "00100110" // /* MW 3 */ + 13070 "00111100" // /* MW 2 */ + 13071 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 222 4 + 13072 "10011000" // LDA dj4, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13073 "01000110" // /* MW 3 */ + 13074 "11111110" // /* MW 2 */ + 13075 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 222 4 + 13076 "10011000" // LDA dn4, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13077 "00100110" // /* MW 3 */ + 13078 "00101110" // /* MW 2 */ + 13079 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 222 4 + 13080 "10011000" // LDA m0, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13081 "00000110" // /* MW 3 */ + 13082 "00101100" // /* MW 2 */ + 13083 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 223 4 first + 13084 "10011000" // LDA dj1, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13085 "11000110" // /* MW 3 */ + 13086 "11111100" // /* MW 2 */ + 13087 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 223 4 + 13088 "10011000" // LDA dn1, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13089 "10100110" // /* MW 3 */ + 13090 "00111100" // /* MW 2 */ + 13091 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 223 4 + 13092 "10011000" // LDA dj5, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13093 "11000110" // /* MW 3 */ + 13094 "11111110" // /* MW 2 */ + 13095 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 223 4 + 13096 "10011000" // LDA dn5, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13097 "10100110" // /* MW 3 */ + 13098 "00101110" // /* MW 2 */ + 13099 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 223 4 + 13100 "10011000" // LDA m1, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13101 "10000110" // /* MW 3 */ + 13102 "00101100" // /* MW 2 */ + 13103 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 224 4 first + 13104 "10011000" // LDA dj7, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13105 "11000110" // /* MW 3 */ + 13106 "11111111" // /* MW 2 */ + 13107 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 224 4 + 13108 "10011000" // LDA dn7, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13109 "10100110" // /* MW 3 */ + 13110 "00101111" // /* MW 2 */ + 13111 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 224 4 +.src_ref 2 "conv2d_dw_bf16.h" 244 56 + 13112 "10111010" // LDA m7, [p3], #8; MOVXM p4, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13113 "00010000" // /* MW 9 */ + 13114 "00110100" // /* MW 8 */ + 13115 "00110010" // /* MW 7 */ + 13116 "11110010" // /* MW 6 */ + 13117 "00000001" // /* MW 5 */ + 13118 "00000000" // /* MW 4 */ + 13119 "11010000" // /* MW 3 */ + 13120 "11110000" // /* MW 2 */ + 13121 "01100101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_dw_bf16.h" 244 56 first + 13122 "11010100" // LDA.s8 r6, [p4]; MOV p4, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13123 "10000001" // /* MW 5 */ + 13124 "11000101" // /* MW 4 */ + 13125 "01011000" // /* MW 3 */ + 13126 "10011000" // /* MW 2 */ + 13127 "10000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 225 4 + 13128 "10111000" // MOV m3, #-120 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13129 "00010000" // /* MW 3 */ + 13130 "00001111" // /* MW 2 */ + 13131 "00011011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 6 "aie_core.h" 81 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_dw_bf16.h" 204 43 + 13132 "10110110" // VLDA.CONV.fp32.bf16 cml0, [p4];VLDB x6, [p0], #64; MOVX r2, #3; MOV dc4, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 13133 "01011000" // /* MW 11 */ + 13134 "00000000" // /* MW 10 */ + 13135 "01100000" // /* MW 9 */ + 13136 "01101010" // /* MW 8 */ + 13137 "00100000" // /* MW 7 */ + 13138 "00000000" // /* MW 6 */ + 13139 "01101000" // /* MW 5 */ + 13140 "00111011" // /* MW 4 */ + 13141 "01110000" // /* MW 3 */ + 13142 "10000101" // /* MW 2 */ + 13143 "10000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 6 "aie_core.h" 81 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 2 "conv2d_dw_bf16.h" 204 43 first +.src_ref 2 "conv2d_dw_bf16.h" 225 4 first +.src_ref 2 "conv2d_dw_bf16.h" 244 56 + 13144 "01111110" // LDA dj3, [p3], #-4; VLDB x1, [p0], #64; MOVS dc3, dc4; LSHL r2, r4, r2; MOV m6, #128 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 13145 "01100000" // /* MW 13 */ + 13146 "00001001" // /* MW 12 */ + 13147 "01100010" // /* MW 11 */ + 13148 "00001011" // /* MW 10 */ + 13149 "00010000" // /* MW 9 */ + 13150 "11100000" // /* MW 8 */ + 13151 "00101101" // /* MW 7 */ + 13152 "00000100" // /* MW 6 */ + 13153 "11101001" // /* MW 5 */ + 13154 "00111000" // /* MW 4 */ + 13155 "11010000" // /* MW 3 */ + 13156 "10111000" // /* MW 2 */ + 13157 "01111111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 2 "conv2d_dw_bf16.h" 204 43 +.src_ref 2 "conv2d_dw_bf16.h" 225 4 + 13158 "10111010" // LDA dn3, [p3], #8; MOVS dc1, dc3; MOV m5, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13159 "01110010" // /* MW 9 */ + 13160 "10010000" // /* MW 8 */ + 13161 "10000000" // /* MW 7 */ + 13162 "00000010" // /* MW 6 */ + 13163 "01001011" // /* MW 5 */ + 13164 "00001100" // /* MW 4 */ + 13165 "11010001" // /* MW 3 */ + 13166 "10110100" // /* MW 2 */ + 13167 "01100101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "conv2d_dw_bf16.h" 204 43 +.src_ref 2 "conv2d_dw_bf16.h" 225 4 + 13168 "10111010" // LDA m3, [p3], m3; PADDB [p1], m5; MOV dc7, dc1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13169 "01111110" // /* MW 9 */ + 13170 "11000000" // /* MW 8 */ + 13171 "11100001" // /* MW 7 */ + 13172 "00000011" // /* MW 6 */ + 13173 "10010000" // /* MW 5 */ + 13174 "10101011" // /* MW 4 */ + 13175 "11010001" // /* MW 3 */ + 13176 "00110000" // /* MW 2 */ + 13177 "01101101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "conv2d_dw_bf16.h" 244 56 +.src_ref 2 "conv2d_dw_bf16.h" 244 56 first + 13178 "10111010" // LDA r2, [p3], m6; VLDB.2D x3, [p1], d7; MOV m4, #-112 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13179 "01011110" // /* MW 9 */ + 13180 "10010000" // /* MW 8 */ + 13181 "00000111" // /* MW 7 */ + 13182 "00000010" // /* MW 6 */ + 13183 "11110100" // /* MW 5 */ + 13184 "11110000" // /* MW 4 */ + 13185 "11010001" // /* MW 3 */ + 13186 "00001010" // /* MW 2 */ + 13187 "01111001" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 2 "conv2d_dw_bf16.h" 244 56 + 13188 "00101100" // LDA.s16 r7, [p3], m4; MOVX r0, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13189 "10000010" // /* MW 5 */ + 13190 "00000000" // /* MW 4 */ + 13191 "01010000" // /* MW 3 */ + 13192 "00011110" // /* MW 2 */ + 13193 "01110001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "conv2d_dw_bf16.h" 250 8 first + 13194 "01110110" // LDA m4, [p3], #16; MOVS dc6, dc4; MOVXM ls, #13296 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 13195 "00010000" // /* MW 11 */ + 13196 "11111000" // /* MW 10 */ + 13197 "01111001" // /* MW 9 */ + 13198 "00001100" // /* MW 8 */ + 13199 "00000000" // /* MW 7 */ + 13200 "00000000" // /* MW 6 */ + 13201 "01001011" // /* MW 5 */ + 13202 "00010000" // /* MW 4 */ + 13203 "11010110" // /* MW 3 */ + 13204 "11000000" // /* MW 2 */ + 13205 "01101001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "conv2d_dw_bf16.h" 244 56 first +.src_ref 2 "conv2d_dw_bf16.h" 250 8 + 13206 "01110110" // LDA r4, [p3, #-28]; MOVS dc2, dc4; MOVXM le, #13392 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 13207 "00010000" // /* MW 11 */ + 13208 "00101000" // /* MW 10 */ + 13209 "10111010" // /* MW 9 */ + 13210 "00001101" // /* MW 8 */ + 13211 "00000000" // /* MW 7 */ + 13212 "00000000" // /* MW 6 */ + 13213 "01001011" // /* MW 5 */ + 13214 "00010000" // /* MW 4 */ + 13215 "11010010" // /* MW 3 */ + 13216 "10010010" // /* MW 2 */ + 13217 "01110010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 142 18 first + 13218 "10110100" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13219 "00000101" // /* MW 5 */ + 13220 "01100001" // /* MW 4 */ + 13221 "10000100" // /* MW 3 */ + 13222 "00010110" // /* MW 2 */ + 13223 "00001011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 270 12 + 13224 "11111000" // VMOV cml3, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13225 "10001010" // /* MW 3 */ + 13226 "00000000" // /* MW 2 */ + 13227 "00011011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 244 4 + 13228 "10111010" // LDA r5, [p3]; MOVXM p3, #13456 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13229 "00010000" // /* MW 9 */ + 13230 "01001000" // /* MW 8 */ + 13231 "10110010" // /* MW 7 */ + 13232 "00001101" // /* MW 6 */ + 13233 "00000000" // /* MW 5 */ + 13234 "00000000" // /* MW 4 */ + 13235 "11010000" // /* MW 3 */ + 13236 "10010110" // /* MW 2 */ + 13237 "01100000" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 2 "conv2d_dw_bf16.h" 268 12 first + 13238 "10111010" // NOPA; MOVX r1, #32; VEXTBCST.128 x10, x3, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13239 "10101000" // /* MW 9 */ + 13240 "00000001" // /* MW 8 */ + 13241 "10001110" // /* MW 7 */ + 13242 "00001010" // /* MW 6 */ + 13243 "00010100" // /* MW 5 */ + 13244 "00000000" // /* MW 4 */ + 13245 "11110000" // /* MW 3 */ + 13246 "00101100" // /* MW 2 */ + 13247 "00000000" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 2 "conv2d_dw_bf16.h" 270 12 +.src_ref 2 "conv2d_dw_bf16.h" 271 12 +.src_ref 2 "conv2d_dw_bf16.h" 272 12 +.src_ref 2 "conv2d_dw_bf16.h" 273 12 +.src_ref 2 "conv2d_dw_bf16.h" 274 12 +.src_ref 2 "conv2d_dw_bf16.h" 275 12 +.src_ref 2 "conv2d_dw_bf16.h" 276 12 +.src_ref 2 "conv2d_dw_bf16.h" 277 12 + 13248 "11100001" // MOVA r17, #60; NOPB; NOPS; MOVX r3, #48; VBCST.16 x0, r7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13249 "00000000" // /* MW 15 */ + 13250 "00000000" // /* MW 14 */ + 13251 "01111000" // /* MW 13 */ + 13252 "10111001" // /* MW 12 */ + 13253 "00001110" // /* MW 11 */ + 13254 "00001000" // /* MW 10 */ + 13255 "00110110" // /* MW 9 */ + 13256 "00000000" // /* MW 8 */ + 13257 "01011011" // /* MW 7 */ + 13258 "00000001" // /* MW 6 */ + 13259 "00100000" // /* MW 5 */ + 13260 "00000000" // /* MW 4 */ + 13261 "00000000" // /* MW 3 */ + 13262 "10010001" // /* MW 2 */ + 13263 "00000111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_dw_bf16.h" 265 12 first +.src_ref 2 "conv2d_dw_bf16.h" 270 12 first + 13264 "00001011" // NOPA; NOPB; MOVS dc0, dc4; MOVX crRnd, r6; VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13265 "01101010" // /* MW 15 */ + 13266 "01100011" // /* MW 14 */ + 13267 "10101100" // /* MW 13 */ + 13268 "00000011" // /* MW 12 */ + 13269 "00001110" // /* MW 11 */ + 13270 "00000010" // /* MW 10 */ + 13271 "11010100" // /* MW 9 */ + 13272 "00001101" // /* MW 8 */ + 13273 "01001011" // /* MW 7 */ + 13274 "00010000" // /* MW 6 */ + 13275 "00100000" // /* MW 5 */ + 13276 "00000000" // /* MW 4 */ + 13277 "11110000" // /* MW 3 */ + 13278 "00101100" // /* MW 2 */ + 13279 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 2 "conv2d_dw_bf16.h" 250 8 first +.src_ref 2 "conv2d_dw_bf16.h" 274 12 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 13280 "00001011" // LDA p2, [p2]; NOPB; MOVS dc5, dc4; ADD r2, r2, #-2; ADD.NC lc, r4, #-1; VMAC.f dm1, dm0, x1, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13281 "00011010" // /* MW 15 */ + 13282 "01001000" // /* MW 14 */ + 13283 "11001100" // /* MW 13 */ + 13284 "00111111" // /* MW 12 */ + 13285 "10111001" // /* MW 11 */ + 13286 "11011010" // /* MW 10 */ + 13287 "00101111" // /* MW 9 */ + 13288 "00000100" // /* MW 8 */ + 13289 "01001011" // /* MW 7 */ + 13290 "00010000" // /* MW 6 */ + 13291 "00100101" // /* MW 5 */ + 13292 "00000000" // /* MW 4 */ + 13293 "11010000" // /* MW 3 */ + 13294 "10100011" // /* MW 2 */ + 13295 "01000000" // /* MW 1 */ +.label ZLS_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_272 +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 142 18 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt +.loop_nesting 1 + 13296 "10111010" // VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13297 "01101110" // /* MW 9 */ + 13298 "10000001" // /* MW 8 */ + 13299 "10000100" // /* MW 7 */ + 13300 "00000010" // /* MW 6 */ + 13301 "11110100" // /* MW 5 */ + 13302 "11110000" // /* MW 4 */ + 13303 "01110001" // /* MW 3 */ + 13304 "10110011" // /* MW 2 */ + 13305 "00000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "conv2d_dw_bf16.h" 266 12 first +.src_ref 2 "conv2d_dw_bf16.h" 271 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13306 "01001010" // VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13307 "00000001" // /* MW 9 */ + 13308 "10001001" // /* MW 8 */ + 13309 "10001010" // /* MW 7 */ + 13310 "01000110" // /* MW 6 */ + 13311 "00001011" // /* MW 5 */ + 13312 "10011100" // /* MW 4 */ + 13313 "11101010" // /* MW 3 */ + 13314 "00111000" // /* MW 2 */ + 13315 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 275 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13316 "01001010" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13317 "00000001" // /* MW 9 */ + 13318 "00110101" // /* MW 8 */ + 13319 "10001001" // /* MW 7 */ + 13320 "11000110" // /* MW 6 */ + 13321 "10000110" // /* MW 5 */ + 13322 "00110000" // /* MW 4 */ + 13323 "01101010" // /* MW 3 */ + 13324 "10110001" // /* MW 2 */ + 13325 "00000000" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13326 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13327 "00000110" // /* MW 3 */ + 13328 "10001001" // /* MW 2 */ + 13329 "00011101" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 2 "conv2d_dw_bf16.h" 272 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13330 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13331 "10100001" // /* MW 7 */ + 13332 "01001000" // /* MW 6 */ + 13333 "10001100" // /* MW 5 */ + 13334 "11000110" // /* MW 4 */ + 13335 "10001110" // /* MW 3 */ + 13336 "10110000" // /* MW 2 */ + 13337 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 267 12 first +.src_ref 2 "conv2d_dw_bf16.h" 276 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13338 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13339 "10100001" // /* MW 7 */ + 13340 "00110110" // /* MW 6 */ + 13341 "10001010" // /* MW 5 */ + 13342 "01000110" // /* MW 4 */ + 13343 "00001111" // /* MW 3 */ + 13344 "10011100" // /* MW 2 */ + 13345 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13346 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13347 "00001110" // /* MW 3 */ + 13348 "10001001" // /* MW 2 */ + 13349 "00011101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 268 12 first +.src_ref 2 "conv2d_dw_bf16.h" 273 12 first + 13350 "01100010" // VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13351 "11100001" // /* MW 7 */ + 13352 "10010010" // /* MW 6 */ + 13353 "10001011" // /* MW 5 */ + 13354 "01000110" // /* MW 4 */ + 13355 "00000011" // /* MW 3 */ + 13356 "00011100" // /* MW 2 */ + 13357 "00000101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 265 12 first +.src_ref 2 "conv2d_dw_bf16.h" 277 12 first + 13358 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13359 "11100001" // /* MW 7 */ + 13360 "01010110" // /* MW 6 */ + 13361 "10001000" // /* MW 5 */ + 13362 "01000110" // /* MW 4 */ + 13363 "00000111" // /* MW 3 */ + 13364 "00011100" // /* MW 2 */ + 13365 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first + 13366 "10111010" // NOPA; NOPB; VSHIFT x4, x6, x1, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13367 "01101110" // /* MW 9 */ + 13368 "01000001" // /* MW 8 */ + 13369 "00011000" // /* MW 7 */ + 13370 "00000001" // /* MW 6 */ + 13371 "00010000" // /* MW 5 */ + 13372 "00000000" // /* MW 4 */ + 13373 "11110000" // /* MW 3 */ + 13374 "00101100" // /* MW 2 */ + 13375 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 270 12 first + 13376 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm4, dm3, x6, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13377 "01101010" // /* MW 15 */ + 13378 "01100011" // /* MW 14 */ + 13379 "01111100" // /* MW 13 */ + 13380 "10100101" // /* MW 12 */ + 13381 "00000001" // /* MW 11 */ + 13382 "00000000" // /* MW 10 */ + 13383 "00000000" // /* MW 9 */ + 13384 "00000000" // /* MW 8 */ + 13385 "01011011" // /* MW 7 */ + 13386 "00000001" // /* MW 6 */ + 13387 "00100000" // /* MW 5 */ + 13388 "00000000" // /* MW 4 */ + 13389 "11110000" // /* MW 3 */ + 13390 "00101100" // /* MW 2 */ + 13391 "00000000" // /* MW 1 */ +.label ZLE_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_368 +.src_ref 2 "conv2d_dw_bf16.h" 274 12 first +.end_of_loop +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 13392 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13393 "00011010" // /* MW 15 */ + 13394 "01001000" // /* MW 14 */ + 13395 "01111100" // /* MW 13 */ + 13396 "10100101" // /* MW 12 */ + 13397 "00000001" // /* MW 11 */ + 13398 "00000000" // /* MW 10 */ + 13399 "00000000" // /* MW 9 */ + 13400 "00000000" // /* MW 8 */ + 13401 "01011011" // /* MW 7 */ + 13402 "00000001" // /* MW 6 */ + 13403 "00100000" // /* MW 5 */ + 13404 "00000000" // /* MW 4 */ + 13405 "11110000" // /* MW 3 */ + 13406 "00101100" // /* MW 2 */ + 13407 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "shuffle.hpp" 142 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 13408 "10111010" // PADDA.3D [p0], d0; PADDB.2D [p4], d3; VSHIFT x10, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13409 "01101110" // /* MW 9 */ + 13410 "10000001" // /* MW 8 */ + 13411 "10000100" // /* MW 7 */ + 13412 "00000010" // /* MW 6 */ + 13413 "10010000" // /* MW 5 */ + 13414 "01110011" // /* MW 4 */ + 13415 "11110100" // /* MW 3 */ + 13416 "00001100" // /* MW 2 */ + 13417 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 266 12 first +.src_ref 2 "conv2d_dw_bf16.h" 271 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13418 "01100010" // VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13419 "00000001" // /* MW 7 */ + 13420 "10001001" // /* MW 6 */ + 13421 "10001010" // /* MW 5 */ + 13422 "01000110" // /* MW 4 */ + 13423 "00001011" // /* MW 3 */ + 13424 "10011100" // /* MW 2 */ + 13425 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 275 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13426 "01100010" // VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13427 "00000001" // /* MW 7 */ + 13428 "00110101" // /* MW 6 */ + 13429 "10001001" // /* MW 5 */ + 13430 "11000110" // /* MW 4 */ + 13431 "10000110" // /* MW 3 */ + 13432 "00110000" // /* MW 2 */ + 13433 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13434 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13435 "00000110" // /* MW 3 */ + 13436 "10001001" // /* MW 2 */ + 13437 "00011101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 267 12 first +.src_ref 2 "conv2d_dw_bf16.h" 272 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13438 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13439 "10100001" // /* MW 7 */ + 13440 "01001000" // /* MW 6 */ + 13441 "10001100" // /* MW 5 */ + 13442 "01000110" // /* MW 4 */ + 13443 "00001111" // /* MW 3 */ + 13444 "10011100" // /* MW 2 */ + 13445 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 276 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13446 "01001010" // NOPA; VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13447 "10100001" // /* MW 9 */ + 13448 "00110110" // /* MW 8 */ + 13449 "10001010" // /* MW 7 */ + 13450 "11000010" // /* MW 6 */ + 13451 "10001110" // /* MW 5 */ + 13452 "10110000" // /* MW 4 */ + 13453 "11110100" // /* MW 3 */ + 13454 "00101100" // /* MW 2 */ + 13455 "00000000" // /* MW 1 */ +.label TGT_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_432 +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 142 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 13456 "10110100" // VLDB.2D x3, [p1], d7; VSHIFT x11, x1, x2, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13457 "00011101" // /* MW 5 */ + 13458 "00010010" // /* MW 4 */ + 13459 "10001011" // /* MW 3 */ + 13460 "00011110" // /* MW 2 */ + 13461 "00111110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 250 8 first +.src_ref 2 "conv2d_dw_bf16.h" 273 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13462 "01011010" // MOVXM le, #13632; VMAC.f dm3, dm4, x9, x7, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13463 "11100001" // /* MW 9 */ + 13464 "10010010" // /* MW 8 */ + 13465 "10001011" // /* MW 7 */ + 13466 "00000010" // /* MW 6 */ + 13467 "01010100" // /* MW 5 */ + 13468 "10110111" // /* MW 4 */ + 13469 "00000001" // /* MW 3 */ + 13470 "00000000" // /* MW 2 */ + 13471 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_dw_bf16.h" 250 8 +.src_ref 2 "conv2d_dw_bf16.h" 277 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13472 "01000110" // VLDA.CONV.fp32.bf16 cml0, [p4]; MOVXM ls, #13552; VMAC.f dm0, dm2, x11, x7, r17 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 13473 "11100001" // /* MW 11 */ + 13474 "01010110" // /* MW 10 */ + 13475 "10001000" // /* MW 9 */ + 13476 "00000010" // /* MW 8 */ + 13477 "01001111" // /* MW 7 */ + 13478 "10001111" // /* MW 6 */ + 13479 "00000001" // /* MW 5 */ + 13480 "00000000" // /* MW 4 */ + 13481 "01110000" // /* MW 3 */ + 13482 "10000101" // /* MW 2 */ + 13483 "10000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 250 8 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13484 "10011000" // ADD.NC lc, r4, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13485 "01111111" // /* MW 3 */ + 13486 "01110010" // /* MW 2 */ + 13487 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13488 "10011000" // VLDA x6, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13489 "10011011" // /* MW 3 */ + 13490 "00011101" // /* MW 2 */ + 13491 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13492 "00011000" // VLDB x1, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13493 "01110100" // /* MW 3 */ + 13494 "00011100" // /* MW 2 */ + 13495 "00111000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13496 "00011000" // VLDB.3D x2, [p0], d2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13497 "10110100" // /* MW 3 */ + 13498 "01011000" // /* MW 2 */ + 13499 "00111000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1110 102 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13500 "00011000" // VCONV.bf16.fp32 x10, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13501 "10010110" // /* MW 3 */ + 13502 "00010001" // /* MW 2 */ + 13503 "00001101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1110 102 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13504 "00011000" // VCONV.bf16.fp32 x6, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13505 "00010110" // /* MW 3 */ + 13506 "00010000" // /* MW 2 */ + 13507 "00001011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13508 "11111000" // VMAX_LT.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13509 "01101100" // /* MW 3 */ + 13510 "01010000" // /* MW 2 */ + 13511 "00011100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 286 17 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13512 "01111000" // VSHUFFLE x10, x10, x6, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13513 "00010100" // /* MW 3 */ + 13514 "01010011" // /* MW 2 */ + 13515 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 2 "conv2d_dw_bf16.h" 285 16 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13516 "00000010" // VST x8, [p2], m4; VMAX_LT.bf16 x10, r16, x10, x0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13517 "01110000" // /* MW 7 */ + 13518 "00110110" // /* MW 6 */ + 13519 "10101000" // /* MW 5 */ + 13520 "00000010" // /* MW 4 */ + 13521 "01100000" // /* MW 3 */ + 13522 "01000010" // /* MW 2 */ + 13523 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 268 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13524 "01011000" // VEXTBCST.128 x10, x3, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13525 "00000011" // /* MW 3 */ + 13526 "00011100" // /* MW 2 */ + 13527 "00011101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 2 "conv2d_dw_bf16.h" 270 12 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13528 "00000010" // VST.3D x10, [p2], d1; VMOV cml3, cml0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13529 "01110000" // /* MW 7 */ + 13530 "01000101" // /* MW 6 */ + 13531 "10000000" // /* MW 5 */ + 13532 "00000001" // /* MW 4 */ + 13533 "01100000" // /* MW 3 */ + 13534 "01010010" // /* MW 2 */ + 13535 "01000111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 265 12 first +.src_ref 2 "conv2d_dw_bf16.h" 270 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13536 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13537 "01000001" // /* MW 7 */ + 13538 "01101101" // /* MW 6 */ + 13539 "10001100" // /* MW 5 */ + 13540 "01000110" // /* MW 4 */ + 13541 "00000111" // /* MW 3 */ + 13542 "00011100" // /* MW 2 */ + 13543 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 274 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13544 "01100010" // VSHIFT x4, x6, x1, r0; VMAC.f dm1, dm0, x1, x10, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13545 "01000001" // /* MW 7 */ + 13546 "00000011" // /* MW 6 */ + 13547 "10001001" // /* MW 5 */ + 13548 "11000110" // /* MW 4 */ + 13549 "10000010" // /* MW 3 */ + 13550 "00110000" // /* MW 2 */ + 13551 "00000010" // /* MW 1 */ +.label ZLS_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_528 +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 142 18 +.begin_of_loop +.aggressive_scheduled_block_id 2 +.noswbrkpt +.loop_nesting 2 + 13552 "10111010" // VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13553 "01101110" // /* MW 9 */ + 13554 "10000001" // /* MW 8 */ + 13555 "10000100" // /* MW 7 */ + 13556 "00000010" // /* MW 6 */ + 13557 "11110100" // /* MW 5 */ + 13558 "11110000" // /* MW 4 */ + 13559 "01110001" // /* MW 3 */ + 13560 "10110011" // /* MW 2 */ + 13561 "00000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "conv2d_dw_bf16.h" 266 12 first +.src_ref 2 "conv2d_dw_bf16.h" 271 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13562 "01001010" // VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13563 "00000001" // /* MW 9 */ + 13564 "10001001" // /* MW 8 */ + 13565 "10001010" // /* MW 7 */ + 13566 "01000110" // /* MW 6 */ + 13567 "00001011" // /* MW 5 */ + 13568 "10011100" // /* MW 4 */ + 13569 "11101010" // /* MW 3 */ + 13570 "00111000" // /* MW 2 */ + 13571 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 275 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13572 "01001010" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13573 "00000001" // /* MW 9 */ + 13574 "00110101" // /* MW 8 */ + 13575 "10001001" // /* MW 7 */ + 13576 "11000110" // /* MW 6 */ + 13577 "10000110" // /* MW 5 */ + 13578 "00110000" // /* MW 4 */ + 13579 "01101010" // /* MW 3 */ + 13580 "10110001" // /* MW 2 */ + 13581 "00000000" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13582 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13583 "00000110" // /* MW 3 */ + 13584 "10001001" // /* MW 2 */ + 13585 "00011101" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 2 "conv2d_dw_bf16.h" 272 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13586 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13587 "10100001" // /* MW 7 */ + 13588 "01001000" // /* MW 6 */ + 13589 "10001100" // /* MW 5 */ + 13590 "11000110" // /* MW 4 */ + 13591 "10001110" // /* MW 3 */ + 13592 "10110000" // /* MW 2 */ + 13593 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 267 12 first +.src_ref 2 "conv2d_dw_bf16.h" 276 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13594 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13595 "10100001" // /* MW 7 */ + 13596 "00110110" // /* MW 6 */ + 13597 "10001010" // /* MW 5 */ + 13598 "01000110" // /* MW 4 */ + 13599 "00001111" // /* MW 3 */ + 13600 "10011100" // /* MW 2 */ + 13601 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13602 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13603 "00001110" // /* MW 3 */ + 13604 "10001001" // /* MW 2 */ + 13605 "00011101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 268 12 first +.src_ref 2 "conv2d_dw_bf16.h" 273 12 first + 13606 "01100010" // VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13607 "11100001" // /* MW 7 */ + 13608 "10010010" // /* MW 6 */ + 13609 "10001011" // /* MW 5 */ + 13610 "01000110" // /* MW 4 */ + 13611 "00000011" // /* MW 3 */ + 13612 "00011100" // /* MW 2 */ + 13613 "00000101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 265 12 first +.src_ref 2 "conv2d_dw_bf16.h" 277 12 first + 13614 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13615 "11100001" // /* MW 7 */ + 13616 "01010110" // /* MW 6 */ + 13617 "10001000" // /* MW 5 */ + 13618 "01000110" // /* MW 4 */ + 13619 "00000111" // /* MW 3 */ + 13620 "00011100" // /* MW 2 */ + 13621 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first + 13622 "10010100" // NOPA; VSHIFT x4, x6, x1, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13623 "00000101" // /* MW 5 */ + 13624 "01100001" // /* MW 4 */ + 13625 "11110100" // /* MW 3 */ + 13626 "00101100" // /* MW 2 */ + 13627 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 270 12 first + 13628 "01001000" // VMAC.f dm4, dm3, x6, x10, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13629 "01000001" // /* MW 3 */ + 13630 "01101101" // /* MW 2 */ + 13631 "10001100" // /* MW 1 */ +.label ZLE_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_608 +.src_ref 2 "conv2d_dw_bf16.h" 274 12 first +.end_of_loop +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 13632 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13633 "00011010" // /* MW 15 */ + 13634 "01001000" // /* MW 14 */ + 13635 "01111100" // /* MW 13 */ + 13636 "10100101" // /* MW 12 */ + 13637 "00000001" // /* MW 11 */ + 13638 "00000000" // /* MW 10 */ + 13639 "00000000" // /* MW 9 */ + 13640 "00000000" // /* MW 8 */ + 13641 "01011011" // /* MW 7 */ + 13642 "00000001" // /* MW 6 */ + 13643 "00100000" // /* MW 5 */ + 13644 "00000000" // /* MW 4 */ + 13645 "11110000" // /* MW 3 */ + 13646 "00101100" // /* MW 2 */ + 13647 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 244 4 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 13648 "10110110" // PADDA.3D [p0], d0; PADDB.2D [p4], d3; JNZD r2, r2, p3; VSHIFT x10, x1, x2, r0 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 13649 "01101000" // /* MW 11 */ + 13650 "10000001" // /* MW 10 */ + 13651 "10000100" // /* MW 9 */ + 13652 "00000010" // /* MW 8 */ + 13653 "00100111" // /* MW 7 */ + 13654 "00000100" // /* MW 6 */ + 13655 "00100000" // /* MW 5 */ + 13656 "11100111" // /* MW 4 */ + 13657 "11111000" // /* MW 3 */ + 13658 "00001100" // /* MW 2 */ + 13659 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 266 12 first +.src_ref 2 "conv2d_dw_bf16.h" 271 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13660 "01100010" // VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13661 "00000001" // /* MW 7 */ + 13662 "10001001" // /* MW 6 */ + 13663 "10001010" // /* MW 5 */ + 13664 "01000110" // /* MW 4 */ + 13665 "00001011" // /* MW 3 */ + 13666 "10011100" // /* MW 2 */ + 13667 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 275 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13668 "01100010" // VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13669 "00000001" // /* MW 7 */ + 13670 "00110101" // /* MW 6 */ + 13671 "10001001" // /* MW 5 */ + 13672 "11000110" // /* MW 4 */ + 13673 "10000110" // /* MW 3 */ + 13674 "00110000" // /* MW 2 */ + 13675 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13676 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13677 "00000110" // /* MW 3 */ + 13678 "10001001" // /* MW 2 */ + 13679 "00011101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 267 12 first +.src_ref 2 "conv2d_dw_bf16.h" 272 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13680 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13681 "10100001" // /* MW 7 */ + 13682 "01001000" // /* MW 6 */ + 13683 "10001100" // /* MW 5 */ + 13684 "01000110" // /* MW 4 */ + 13685 "00001111" // /* MW 3 */ + 13686 "10011100" // /* MW 2 */ + 13687 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 276 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13688 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13689 "10100001" // /* MW 7 */ + 13690 "00110110" // /* MW 6 */ + 13691 "10001010" // /* MW 5 */ + 13692 "11000110" // /* MW 4 */ + 13693 "10001110" // /* MW 3 */ + 13694 "10110000" // /* MW 2 */ + 13695 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 13696 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13697 "00001110" // /* MW 3 */ + 13698 "10001001" // /* MW 2 */ + 13699 "00011101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 273 12 first + 13700 "01001000" // VMAC.f dm3, dm4, x9, x7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13701 "11100001" // /* MW 3 */ + 13702 "10010010" // /* MW 2 */ + 13703 "10001011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 277 12 first + 13704 "01001000" // VMAC.f dm0, dm2, x11, x7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13705 "11100001" // /* MW 3 */ + 13706 "01010110" // /* MW 2 */ + 13707 "10001000" // /* MW 1 */ + 13708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13709 "00000000" // /* MW 1 */ + 13710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13711 "00000000" // /* MW 1 */ + 13712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13713 "00000000" // /* MW 1 */ + 13714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13715 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1110 102 first + 13716 "00011000" // VCONV.bf16.fp32 x10, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13717 "10010110" // /* MW 3 */ + 13718 "00010001" // /* MW 2 */ + 13719 "00001101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_dw_bf16.h" 290 first + 13720 "01011100" // VCONV.bf16.fp32 x6, cml0; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 13721 "00000000" // /* MW 5 */ + 13722 "01010000" // /* MW 4 */ + 13723 "11000000" // /* MW 3 */ + 13724 "00000010" // /* MW 2 */ + 13725 "01100010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 13726 "11111000" // VMAX_LT.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13727 "01101100" // /* MW 3 */ + 13728 "01010000" // /* MW 2 */ + 13729 "00011100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 286 17 first +.delay_slot + 13730 "01111000" // VSHUFFLE x10, x10, x6, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13731 "00010100" // /* MW 3 */ + 13732 "01010011" // /* MW 2 */ + 13733 "00011101" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 13734 "11111000" // VMAX_LT.bf16 x10, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13735 "01101100" // /* MW 3 */ + 13736 "01010000" // /* MW 2 */ + 13737 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 2 "conv2d_dw_bf16.h" 285 16 first +.delay_slot + 13738 "00011000" // VST x8, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13739 "00010011" // /* MW 3 */ + 13740 "10001010" // /* MW 2 */ + 13741 "00001010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1159 33 +.delay_slot + 13742 "00011000" // VST.3D x10, [p2], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13743 "10010011" // /* MW 3 */ + 13744 "00111010" // /* MW 2 */ +.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params__end +.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_end0 + 13745 "00001010" // /* MW 1 */ +.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_conv2d_dwc _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 7 "superkernels.cpp" 444 first +.src_ref 7 "superkernels.cpp" 449 6 +.function_start + 13760 "01000100" // MOVXM p4, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13761 "10000000" // /* MW 5 */ + 13762 "11001000" // /* MW 4 */ + 13763 "11001000" // /* MW 3 */ + 13764 "00000111" // /* MW 2 */ + 13765 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 449 6 first + 13766 "11010100" // LDA r16, [p4]; MOV r0, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13767 "01000001" // /* MW 5 */ + 13768 "00101111" // /* MW 4 */ + 13769 "11010000" // /* MW 3 */ + 13770 "11000010" // /* MW 2 */ + 13771 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 444 + 13772 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13773 "00000001" // /* MW 5 */ + 13774 "00000000" // /* MW 4 */ + 13775 "00000000" // /* MW 3 */ + 13776 "00010000" // /* MW 2 */ + 13777 "00000000" // /* MW 1 */ + 13778 "00000010" // ST r14, [sp, #-8]; MOV r17, CORE_ID /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13779 "01110000" // /* MW 7 */ + 13780 "01110000" // /* MW 6 */ + 13781 "00101101" // /* MW 5 */ + 13782 "00000010" // /* MW 4 */ + 13783 "10110000" // /* MW 3 */ + 13784 "00111010" // /* MW 2 */ + 13785 "11111111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 467 + 13786 "00000010" // ST r13, [sp, #-4]; MOV r13, lr /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13787 "01110000" // /* MW 7 */ + 13788 "11110000" // /* MW 6 */ + 13789 "10101000" // /* MW 5 */ + 13790 "00000001" // /* MW 4 */ + 13791 "10110000" // /* MW 3 */ + 13792 "10110110" // /* MW 2 */ + 13793 "11111111" // /* MW 1 */ + 13794 "10011000" // ST p0, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13795 "00011101" // /* MW 3 */ + 13796 "11101100" // /* MW 2 */ + 13797 "00001111" // /* MW 1 */ + 13798 "10011000" // ST p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13799 "10011101" // /* MW 3 */ + 13800 "11110111" // /* MW 2 */ + 13801 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 452 44 + 13802 "00000010" // ST r0, [sp, #-16]; MOV r14, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13803 "01110000" // /* MW 7 */ + 13804 "01100000" // /* MW 6 */ + 13805 "11001010" // /* MW 5 */ + 13806 "00000001" // /* MW 4 */ + 13807 "10110000" // /* MW 3 */ + 13808 "00000010" // /* MW 2 */ + 13809 "11111110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 449 6 +.src_ref 7 "superkernels.cpp" 449 16 + 13810 "10000100" // JNZ r16, #13936 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=13936 delay_slots=5 */ + 13811 "00000001" // /* MW 5 */ + 13812 "01000000" // /* MW 4 */ + 13813 "00111000" // /* MW 3 */ + 13814 "00011011" // /* MW 2 */ + 13815 "10000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 +.delay_slot + 13816 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13817 "11000000" // /* MW 3 */ + 13818 "11010110" // /* MW 2 */ + 13819 "00011011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 446 22 first +.delay_slot + 13820 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13821 "10010000" // /* MW 3 */ + 13822 "01100010" // /* MW 2 */ + 13823 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 446 30 +.delay_slot + 13824 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13825 "11111011" // /* MW 3 */ + 13826 "01100011" // /* MW 2 */ + 13827 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 446 11 +.delay_slot + 13828 "01000100" // MOVXM p3, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13829 "10100000" // /* MW 5 */ + 13830 "11001000" // /* MW 4 */ + 13831 "11000110" // /* MW 3 */ + 13832 "00000111" // /* MW 2 */ + 13833 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 446 11 +.delay_slot + 13834 "10011000" // ST r17, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13835 "00110001" // /* MW 3 */ + 13836 "00000110" // /* MW 2 */ + 13837 "00001011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 461 2 +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 13838 "00111010" // MOVS p7, p1; MOVXM p1, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13839 "00010001" // /* MW 9 */ + 13840 "00110100" // /* MW 8 */ + 13841 "10110010" // /* MW 7 */ + 13842 "11110000" // /* MW 6 */ + 13843 "00000001" // /* MW 5 */ + 13844 "00000000" // /* MW 4 */ + 13845 "01100000" // /* MW 3 */ + 13846 "10010001" // /* MW 2 */ + 13847 "11110000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 451 4 +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 13848 "01110110" // ST.s8 r16, [p1]; MOVS p0, p2; MOVXM p1, #509028 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 13849 "00010000" // /* MW 11 */ + 13850 "00110010" // /* MW 10 */ + 13851 "10110010" // /* MW 9 */ + 13852 "11110000" // /* MW 8 */ + 13853 "00000001" // /* MW 7 */ + 13854 "00000000" // /* MW 6 */ + 13855 "10001011" // /* MW 5 */ + 13856 "10001000" // /* MW 4 */ + 13857 "11100000" // /* MW 3 */ + 13858 "11000000" // /* MW 2 */ + 13859 "00100000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13861 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 451 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 13862 "00000100" // JL #12352 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12352 delay_slots=5 */ + 13863 "00000001" // /* MW 5 */ + 13864 "00000000" // /* MW 4 */ + 13865 "00100000" // /* MW 3 */ + 13866 "00011000" // /* MW 2 */ + 13867 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13869 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13871 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13872 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13873 "00110001" // /* MW 3 */ + 13874 "00100000" // /* MW 2 */ + 13875 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 13876 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13877 "00000101" // /* MW 3 */ + 13878 "00100000" // /* MW 2 */ + 13879 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 13880 "00000010" // ST r16, [p1]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13881 "01110000" // /* MW 7 */ + 13882 "10100101" // /* MW 6 */ + 13883 "00000001" // /* MW 5 */ + 13884 "00000000" // /* MW 4 */ + 13885 "00110000" // /* MW 3 */ + 13886 "11000010" // /* MW 2 */ + 13887 "00100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 452 44 +.src_ref 7 "superkernels.cpp" 461 2 +.return_address + 13888 "00000010" // MOVS p1, p7; ADD.NC p2, r14, #8 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13889 "00000000" // /* MW 7 */ + 13890 "10000010" // /* MW 6 */ + 13891 "00110011" // /* MW 5 */ + 13892 "00000001" // /* MW 4 */ + 13893 "01100000" // /* MW 3 */ + 13894 "10010001" // /* MW 2 */ + 13895 "00110011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 452 17 first + 13896 "10011000" // LDA.u16 r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13897 "00111010" // /* MW 3 */ + 13898 "00000110" // /* MW 2 */ + 13899 "00000010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 453 13 +.src_ref 7 "superkernels.cpp" 453 15 first + 13900 "10111010" // LDA.u16 r16, [p2, #4]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13901 "00010000" // /* MW 9 */ + 13902 "00110000" // /* MW 8 */ + 13903 "00110010" // /* MW 7 */ + 13904 "11110001" // /* MW 6 */ + 13905 "00000001" // /* MW 5 */ + 13906 "00000000" // /* MW 4 */ + 13907 "01010000" // /* MW 3 */ + 13908 "11000011" // /* MW 2 */ + 13909 "01000100" // /* MW 1 */ + 13910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13911 "00000000" // /* MW 1 */ + 13912 "10000100" // J #13952 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=13952 delay_slots=5 */ + 13913 "00000000" // /* MW 5 */ + 13914 "00000000" // /* MW 4 */ + 13915 "01000000" // /* MW 3 */ + 13916 "00011011" // /* MW 2 */ + 13917 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 452 15 +.src_ref 7 "superkernels.cpp" 457 26 +.delay_slot + 13918 "01000100" // MOVXM p3, #509016 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13919 "10110000" // /* MW 5 */ + 13920 "11001000" // /* MW 4 */ + 13921 "11000110" // /* MW 3 */ + 13922 "00000111" // /* MW 2 */ + 13923 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13925 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13927 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 452 15 first +.delay_slot + 13928 "10011000" // ST r17, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13929 "00110001" // /* MW 3 */ + 13930 "00000110" // /* MW 2 */ + 13931 "00001011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 453 13 first +.delay_slot + 13932 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13933 "00010001" // /* MW 3 */ + 13934 "00000110" // /* MW 2 */ + 13935 "00001010" // /* MW 1 */ +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_176 +.src_ref 7 "superkernels.cpp" 457 26 + 13936 "11100001" // NOPA; NOPB; NOPS; MOVXM p3, #509016; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13937 "00000000" // /* MW 15 */ + 13938 "00000000" // /* MW 14 */ + 13939 "00010000" // /* MW 13 */ + 13940 "00101100" // /* MW 12 */ + 13941 "10110010" // /* MW 11 */ + 13942 "11110001" // /* MW 10 */ + 13943 "00000001" // /* MW 9 */ + 13944 "00000000" // /* MW 8 */ + 13945 "01011011" // /* MW 7 */ + 13946 "00000001" // /* MW 6 */ + 13947 "00100000" // /* MW 5 */ + 13948 "00000000" // /* MW 4 */ + 13949 "11110000" // /* MW 3 */ + 13950 "00101100" // /* MW 2 */ + 13951 "00000000" // /* MW 1 */ +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_192 +.src_ref 1 "io_buffer_main.h" 218 49 first + 13952 "00011000" // ADD.NC p2, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13953 "10000110" // /* MW 3 */ + 13954 "01100111" // /* MW 2 */ + 13955 "00011010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 457 15 +.src_ref 1 "io_buffer_main.h" 218 49 + 13956 "10111010" // LDA r27, [p2], #-4; MOVXM p4, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13957 "00010000" // /* MW 9 */ + 13958 "00101000" // /* MW 8 */ + 13959 "00110010" // /* MW 7 */ + 13960 "11110010" // /* MW 6 */ + 13961 "00000001" // /* MW 5 */ + 13962 "00000000" // /* MW 4 */ + 13963 "11010000" // /* MW 3 */ + 13964 "11101110" // /* MW 2 */ + 13965 "01011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 13966 "10011000" // LDA r16, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13967 "00010110" // /* MW 3 */ + 13968 "11111110" // /* MW 2 */ + 13969 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 13970 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13971 "00110110" // /* MW 3 */ + 13972 "11111110" // /* MW 2 */ + 13973 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 13974 "10011000" // LDA r18, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13975 "01010110" // /* MW 3 */ + 13976 "01000110" // /* MW 2 */ + 13977 "00000010" // /* MW 1 */ + 13978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13979 "00000000" // /* MW 1 */ + 13980 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13981 "00000000" // /* MW 1 */ + 13982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13983 "00000000" // /* MW 1 */ + 13984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13985 "00000000" // /* MW 1 */ + 13986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13987 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 13988 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13989 "00000010" // /* MW 3 */ + 13990 "01100001" // /* MW 2 */ + 13991 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 13992 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13993 "00010001" // /* MW 3 */ + 13994 "00000110" // /* MW 2 */ + 13995 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 + 13996 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13997 "11111101" // /* MW 3 */ + 13998 "11100000" // /* MW 2 */ + 13999 "00010111" // /* MW 1 */ + 14000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14001 "00000000" // /* MW 1 */ + 14002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14003 "00000000" // /* MW 1 */ + 14004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14005 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 14006 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14007 "00001000" // /* MW 3 */ + 14008 "10010011" // /* MW 2 */ + 14009 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 456 11 +.src_ref 7 "superkernels.cpp" 459 47 +.src_ref 7 "superkernels.cpp" 464 6 +.src_ref 7 "superkernels.cpp" 465 16 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 + 14010 "10111010" // MOVA r15, #1; MOVXM p7, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14011 "00010000" // /* MW 9 */ + 14012 "00100000" // /* MW 8 */ + 14013 "10110010" // /* MW 7 */ + 14014 "11110011" // /* MW 6 */ + 14015 "00000001" // /* MW 5 */ + 14016 "00000000" // /* MW 4 */ + 14017 "00000000" // /* MW 3 */ + 14018 "00101111" // /* MW 2 */ + 14019 "00000000" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 52 16 + 14020 "11100100" // MOVX r24, #0; MOV r16, sp /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14021 "11000001" // /* MW 5 */ + 14022 "00101011" // /* MW 4 */ + 14023 "00101000" // /* MW 3 */ + 14024 "00000000" // /* MW 2 */ + 14025 "00000110" // /* MW 1 */ + 14026 "00011000" // ADD.NC p0, r16, #-76 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14027 "01011010" // /* MW 3 */ + 14028 "01101000" // /* MW 2 */ + 14029 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 1 "io_buffer_main.h" 324 51 + 14030 "11010100" // LDA p5, [sp, #-20]; MOV r14, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14031 "10000001" // /* MW 5 */ + 14032 "00101001" // /* MW 4 */ + 14033 "00100111" // /* MW 3 */ + 14034 "11010011" // /* MW 2 */ + 14035 "11111101" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 457 15 first + 14036 "10011000" // LDA r17, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14037 "00110110" // /* MW 3 */ + 14038 "00000110" // /* MW 2 */ + 14039 "00000100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 457 26 +.src_ref 7 "superkernels.cpp" 461 2 + 14040 "10111010" // LDA r16, [p3]; MOVXM p3, #509888 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14041 "00010000" // /* MW 9 */ + 14042 "11100000" // /* MW 8 */ + 14043 "10110011" // /* MW 7 */ + 14044 "11110001" // /* MW 6 */ + 14045 "00000001" // /* MW 5 */ + 14046 "00000000" // /* MW 4 */ + 14047 "11010000" // /* MW 3 */ + 14048 "11000010" // /* MW 2 */ + 14049 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 456 11 first + 14050 "10011000" // LDA r18, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14051 "01010110" // /* MW 3 */ + 14052 "00000110" // /* MW 2 */ + 14053 "00000111" // /* MW 1 */ + 14054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14055 "00000000" // /* MW 1 */ + 14056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14057 "00000000" // /* MW 1 */ + 14058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14059 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 14060 "10011000" // LDA r19, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14061 "01110110" // /* MW 3 */ + 14062 "00000110" // /* MW 2 */ + 14063 "00000101" // /* MW 1 */ + 14064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14065 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 457 24 first + 14066 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14067 "00001111" // /* MW 3 */ + 14068 "01100001" // /* MW 2 */ + 14069 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 456 11 first + 14070 "00011000" // ADD r17, r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14071 "00000111" // /* MW 3 */ + 14072 "10100010" // /* MW 2 */ + 14073 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 459 47 first + 14074 "10011000" // LSHL r16, r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14075 "11111101" // /* MW 3 */ + 14076 "00100000" // /* MW 2 */ + 14077 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 461 2 first +.no_stack_arguments + 14078 "00000100" // JL #13024 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=13024 delay_slots=5 */ + 14079 "00000001" // /* MW 5 */ + 14080 "00000000" // /* MW 4 */ + 14081 "01110000" // /* MW 3 */ + 14082 "00011001" // /* MW 2 */ + 14083 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 456 11 first +.delay_slot + 14084 "10011000" // ST r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14085 "00110001" // /* MW 3 */ + 14086 "00000110" // /* MW 2 */ + 14087 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 459 47 first +.delay_slot + 14088 "01011000" // ADD.NC dn0, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14089 "11000001" // /* MW 3 */ + 14090 "01001001" // /* MW 2 */ + 14091 "00011000" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 201 10 first +.delay_slot + 14092 "10011000" // ST dn0, [sp, #-76] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14093 "00100101" // /* MW 3 */ + 14094 "10110100" // /* MW 2 */ + 14095 "00001111" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 52 16 first +.delay_slot + 14096 "10011000" // ST r24, [sp, #-72] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14097 "00010101" // /* MW 3 */ + 14098 "10111011" // /* MW 2 */ + 14099 "00001111" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 52 16 +.delay_slot + 14100 "00110110" // NOPA; NOPB; ST r24, [sp, #-68]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 14101 "11000001" // /* MW 11 */ + 14102 "10001010" // /* MW 10 */ + 14103 "11011111" // /* MW 9 */ + 14104 "00000011" // /* MW 8 */ + 14105 "00000000" // /* MW 7 */ + 14106 "00000000" // /* MW 6 */ + 14107 "00100000" // /* MW 5 */ + 14108 "00000000" // /* MW 4 */ + 14109 "11110000" // /* MW 3 */ + 14110 "00101100" // /* MW 2 */ + 14111 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 +.return_address + 14112 "00011000" // ADD.NC p2, r14, #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14113 "00001010" // /* MW 3 */ + 14114 "01100111" // /* MW 2 */ + 14115 "00011010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 first + 14116 "10011000" // LDA r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14117 "00010110" // /* MW 3 */ + 14118 "00000110" // /* MW 2 */ + 14119 "00000010" // /* MW 1 */ + 14120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14121 "00000000" // /* MW 1 */ + 14122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14123 "00000000" // /* MW 1 */ + 14124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14125 "00000000" // /* MW 1 */ + 14126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14127 "00000000" // /* MW 1 */ + 14128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14129 "00000000" // /* MW 1 */ + 14130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14131 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 14132 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14133 "11111000" // /* MW 3 */ + 14134 "00010000" // /* MW 2 */ + 14135 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 464 19 +.src_ref 1 "io_buffer_main.h" 327 40 first + 14136 "10111010" // LDA r16, [p2, #-8]; MOVXM p1, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14137 "00010000" // /* MW 9 */ + 14138 "00110000" // /* MW 8 */ + 14139 "10110010" // /* MW 7 */ + 14140 "11110000" // /* MW 6 */ + 14141 "00000001" // /* MW 5 */ + 14142 "00000000" // /* MW 4 */ + 14143 "11010000" // /* MW 3 */ + 14144 "11000010" // /* MW 2 */ + 14145 "01011100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 464 19 first + 14146 "10011000" // LDA r18, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14147 "01010110" // /* MW 3 */ + 14148 "00000110" // /* MW 2 */ + 14149 "00000001" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 464 6 + 14150 "10011000" // LDA r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14151 "00110110" // /* MW 3 */ + 14152 "00000110" // /* MW 2 */ + 14153 "00000111" // /* MW 1 */ + 14154 "00011000" // LDA p1, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14155 "10011001" // /* MW 3 */ + 14156 "11110100" // /* MW 2 */ + 14157 "00000111" // /* MW 1 */ + 14158 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14159 "11010001" // /* MW 3 */ + 14160 "11111001" // /* MW 2 */ + 14161 "00000111" // /* MW 1 */ + 14162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14163 "00000000" // /* MW 1 */ + 14164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14165 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 14166 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14167 "00000001" // /* MW 3 */ + 14168 "11100001" // /* MW 2 */ + 14169 "00010011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 14170 "10011000" // ST r16, [p2, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14171 "00010001" // /* MW 3 */ + 14172 "11100110" // /* MW 2 */ + 14173 "00001010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 464 16 first + 14174 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14175 "00101000" // /* MW 3 */ + 14176 "01100001" // /* MW 2 */ + 14177 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 464 6 + 14178 "10000100" // JNZ r16, #14208 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14208 delay_slots=5 */ + 14179 "00000001" // /* MW 5 */ + 14180 "01000000" // /* MW 4 */ + 14181 "11000000" // /* MW 3 */ + 14182 "00011011" // /* MW 2 */ + 14183 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 465 16 +.delay_slot + 14184 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14185 "00000001" // /* MW 3 */ + 14186 "00110000" // /* MW 2 */ + 14187 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14189 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14191 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14193 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14195 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 465 16 first + 14196 "00110110" // NOPA; NOPB; ST r24, [p7]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 14197 "11000001" // /* MW 11 */ + 14198 "10001000" // /* MW 10 */ + 14199 "10000011" // /* MW 9 */ + 14200 "00000011" // /* MW 8 */ + 14201 "00000000" // /* MW 7 */ + 14202 "00000000" // /* MW 6 */ + 14203 "00100000" // /* MW 5 */ + 14204 "00000000" // /* MW 4 */ + 14205 "11110000" // /* MW 3 */ + 14206 "00101100" // /* MW 2 */ + 14207 "00000000" // /* MW 1 */ +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 7 "superkernels.cpp" 467 + 14208 "11010100" // LDA r13, [sp, #-4]; MOV lr, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14209 "01000001" // /* MW 5 */ + 14210 "11101101" // /* MW 4 */ + 14211 "00101110" // /* MW 3 */ + 14212 "10110110" // /* MW 2 */ + 14213 "11111111" // /* MW 1 */ + 14214 "00011000" // LDA r15, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14215 "11110001" // /* MW 3 */ + 14216 "11110001" // /* MW 2 */ + 14217 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 467 first + 14218 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 14219 "00000000" // /* MW 3 */ + 14220 "00101000" // /* MW 2 */ + 14221 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 467 +.delay_slot + 14222 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14223 "00000001" // /* MW 5 */ + 14224 "00000000" // /* MW 4 */ + 14225 "00000000" // /* MW 3 */ + 14226 "11110000" // /* MW 2 */ + 14227 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14229 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14231 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14233 "00000000" // /* MW 1 */ +.delay_slot + 14234 "11111000" // MOV p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14235 "11000000" // /* MW 3 */ + 14236 "01100010" // /* MW 2 */ +.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 14237 "00011111" // /* MW 1 */ +.label __Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE___func_begin0 +.label _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE +.function superkernel_conv_eltbinary _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE +.src_ref 7 "superkernels.cpp" 578 +.src_ref 7 "superkernels.cpp" 578 first +.function_start + 14240 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14241 "00000001" // /* MW 5 */ + 14242 "00000000" // /* MW 4 */ + 14243 "00000000" // /* MW 3 */ + 14244 "00001000" // /* MW 2 */ + 14245 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 583 6 + 14246 "00111010" // ST p7, [sp, #-8]; MOVXM p7, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14247 "00010001" // /* MW 9 */ + 14248 "00100000" // /* MW 8 */ + 14249 "10110010" // /* MW 7 */ + 14250 "11110011" // /* MW 6 */ + 14251 "00000001" // /* MW 5 */ + 14252 "00000000" // /* MW 4 */ + 14253 "10110000" // /* MW 3 */ + 14254 "01110011" // /* MW 2 */ + 14255 "11111111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 583 6 first + 14256 "10111010" // LDA r16, [p7]; ST p6, [sp, #-4]; MOV r17, CORE_ID /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14257 "01110010" // /* MW 9 */ + 14258 "01110000" // /* MW 8 */ + 14259 "00101101" // /* MW 7 */ + 14260 "10000010" // /* MW 6 */ + 14261 "00011101" // /* MW 5 */ + 14262 "11111111" // /* MW 4 */ + 14263 "11010111" // /* MW 3 */ + 14264 "11000010" // /* MW 2 */ + 14265 "11100000" // /* MW 1 */ + 14266 "10011000" // ST p4, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14267 "00011101" // /* MW 3 */ + 14268 "11110110" // /* MW 2 */ + 14269 "00001111" // /* MW 1 */ + 14270 "10011000" // ST p2, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14271 "00011101" // /* MW 3 */ + 14272 "11110001" // /* MW 2 */ + 14273 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 587 4 +.src_ref 7 "superkernels.cpp" 590 35 +.src_ref 7 "superkernels.cpp" 599 105 +.src_ref 7 "superkernels.cpp" 629 34 + 14274 "00000010" // ST lr, [sp, #-20]; MOV p7, p3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 14275 "01110000" // /* MW 7 */ + 14276 "01100000" // /* MW 6 */ + 14277 "10110011" // /* MW 5 */ + 14278 "00000011" // /* MW 4 */ + 14279 "10110000" // /* MW 3 */ + 14280 "10000111" // /* MW 2 */ + 14281 "11111101" // /* MW 1 */ + 14282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14283 "00000000" // /* MW 1 */ + 14284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14285 "00000000" // /* MW 1 */ + 14286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14287 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 583 6 +.src_ref 7 "superkernels.cpp" 583 16 + 14288 "10000100" // JNZ r16, #14688 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14688 delay_slots=5 */ + 14289 "00000001" // /* MW 5 */ + 14290 "01000000" // /* MW 4 */ + 14291 "10110000" // /* MW 3 */ + 14292 "00011100" // /* MW 2 */ + 14293 "10000000" // /* MW 1 */ +.delay_slot + 14294 "10011000" // ST p0, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14295 "00011101" // /* MW 3 */ + 14296 "11101000" // /* MW 2 */ + 14297 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 580 22 first +.delay_slot + 14298 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14299 "10010000" // /* MW 3 */ + 14300 "01100010" // /* MW 2 */ + 14301 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 580 30 +.delay_slot + 14302 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14303 "11111011" // /* MW 3 */ + 14304 "01100011" // /* MW 2 */ + 14305 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 580 11 +.delay_slot + 14306 "01000100" // MOVXM p6, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14307 "10100000" // /* MW 5 */ + 14308 "11001000" // /* MW 4 */ + 14309 "11001100" // /* MW 3 */ + 14310 "00000111" // /* MW 2 */ + 14311 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 580 11 +.delay_slot + 14312 "10011000" // ST r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14313 "00110001" // /* MW 3 */ + 14314 "00000110" // /* MW 2 */ + 14315 "00001110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 587 4 +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 14316 "10111010" // MOVA r0, #1; MOVXM p6, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14317 "00010000" // /* MW 9 */ + 14318 "00110100" // /* MW 8 */ + 14319 "00110010" // /* MW 7 */ + 14320 "11110011" // /* MW 6 */ + 14321 "00000001" // /* MW 5 */ + 14322 "00000000" // /* MW 4 */ + 14323 "00000000" // /* MW 3 */ + 14324 "00100000" // /* MW 2 */ + 14325 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 621 2 +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 14326 "01110110" // ST.s8 r16, [p6]; MOVS p6, p1; MOVXM p0, #509028 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 14327 "00010000" // /* MW 11 */ + 14328 "00110010" // /* MW 10 */ + 14329 "00110010" // /* MW 9 */ + 14330 "11110000" // /* MW 8 */ + 14331 "00000001" // /* MW 7 */ + 14332 "00000000" // /* MW 6 */ + 14333 "10001011" // /* MW 5 */ + 14334 "10000100" // /* MW 4 */ + 14335 "11100110" // /* MW 3 */ + 14336 "11000000" // /* MW 2 */ + 14337 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 587 4 +.src_ref 7 "superkernels.cpp" 587 4 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14338 "10111010" // MOVA r1, #0; MOVXM p1, #509440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14339 "00010000" // /* MW 9 */ + 14340 "00000000" // /* MW 8 */ + 14341 "10110011" // /* MW 7 */ + 14342 "11110000" // /* MW 6 */ + 14343 "00000001" // /* MW 5 */ + 14344 "00000000" // /* MW 4 */ + 14345 "00000000" // /* MW 3 */ + 14346 "00000001" // /* MW 2 */ + 14347 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 587 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 14348 "00000100" // JL #2752 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=2752 delay_slots=5 */ + 14349 "00000001" // /* MW 5 */ + 14350 "00000000" // /* MW 4 */ + 14351 "01100000" // /* MW 3 */ + 14352 "00000101" // /* MW 2 */ + 14353 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14355 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14357 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 14358 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14359 "00110001" // /* MW 3 */ + 14360 "00100000" // /* MW 2 */ + 14361 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 14362 "00101100" // NOPA; MOVX r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14363 "00001010" // /* MW 5 */ + 14364 "01000000" // /* MW 4 */ + 14365 "11110000" // /* MW 3 */ + 14366 "00101100" // /* MW 2 */ + 14367 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 587 4 +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 14368 "11100001" // NOPA; NOPB; ST r16, [p0]; NOPX; MOV p0, p7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14369 "00000000" // /* MW 15 */ + 14370 "00000000" // /* MW 14 */ + 14371 "01111000" // /* MW 13 */ + 14372 "01100000" // /* MW 12 */ + 14373 "00110111" // /* MW 11 */ + 14374 "00000000" // /* MW 10 */ + 14375 "00000000" // /* MW 9 */ + 14376 "10000000" // /* MW 8 */ + 14377 "00010001" // /* MW 7 */ + 14378 "00000110" // /* MW 6 */ + 14379 "00100000" // /* MW 5 */ + 14380 "00000000" // /* MW 4 */ + 14381 "11110000" // /* MW 3 */ + 14382 "00101100" // /* MW 2 */ + 14383 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 590 35 +.src_ref 7 "superkernels.cpp" 591 4 +.return_address + 14384 "01100100" // MOVX r16, #1; MOV dj0, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14385 "00000001" // /* MW 5 */ + 14386 "00000001" // /* MW 4 */ + 14387 "10100001" // /* MW 3 */ + 14388 "00000000" // /* MW 2 */ + 14389 "00000100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 590 35 first + 14390 "10011000" // LDA r18, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14391 "01010110" // /* MW 3 */ + 14392 "00000010" // /* MW 2 */ + 14393 "00000111" // /* MW 1 */ + 14394 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14395 "00000000" // /* MW 1 */ + 14396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14397 "00000000" // /* MW 1 */ + 14398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14399 "00000000" // /* MW 1 */ + 14400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14401 "00000000" // /* MW 1 */ + 14402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14403 "00000000" // /* MW 1 */ + 14404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14405 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 591 4 first + 14406 "10011000" // EQ r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14407 "00000111" // /* MW 3 */ + 14408 "10100001" // /* MW 2 */ + 14409 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 591 4 + 14410 "10000100" // JNZ r16, #14544 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14544 delay_slots=5 */ + 14411 "00000001" // /* MW 5 */ + 14412 "01000000" // /* MW 4 */ + 14413 "01101000" // /* MW 3 */ + 14414 "00011100" // /* MW 2 */ + 14415 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 599 105 +.delay_slot + 14416 "11111000" // MOV r17, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14417 "11000000" // /* MW 3 */ + 14418 "01011110" // /* MW 2 */ + 14419 "00011100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 599 105 first +.delay_slot + 14420 "00011000" // ADD.NC dc0, r17, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14421 "10010000" // /* MW 3 */ + 14422 "11001000" // /* MW 2 */ + 14423 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14425 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14427 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14428 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14429 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 591 4 first + 14430 "10000100" // JNZ r18, #14512 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14512 delay_slots=5 */ + 14431 "00000001" // /* MW 5 */ + 14432 "01000000" // /* MW 4 */ + 14433 "01011000" // /* MW 3 */ + 14434 "00011100" // /* MW 2 */ + 14435 "10010000" // /* MW 1 */ +.delay_slot + 14436 "01000100" // MOVXM r16, #509440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14437 "00000000" // /* MW 5 */ + 14438 "00101100" // /* MW 4 */ + 14439 "11001000" // /* MW 3 */ + 14440 "00000111" // /* MW 2 */ + 14441 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 611 27 +.delay_slot + 14442 "00011000" // MOVX r17, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14443 "00000001" // /* MW 3 */ + 14444 "00100010" // /* MW 2 */ + 14445 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14447 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14449 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14451 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 594 8 first +.no_stack_arguments + 14452 "00111010" // ST p6, [sp, #-28]; JL #11136 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=11136 delay_slots=5 */ + 14453 "01000001" // /* MW 9 */ + 14454 "00000000" // /* MW 8 */ + 14455 "00000000" // /* MW 7 */ + 14456 "01110000" // /* MW 6 */ + 14457 "00000101" // /* MW 5 */ + 14458 "00000000" // /* MW 4 */ + 14459 "10110000" // /* MW 3 */ + 14460 "11100011" // /* MW 2 */ + 14461 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 595 38 +.delay_slot + 14462 "01000100" // MOVXM p6, #509248 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14463 "10000000" // /* MW 5 */ + 14464 "11001010" // /* MW 4 */ + 14465 "11001100" // /* MW 3 */ + 14466 "00000111" // /* MW 2 */ + 14467 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 594 8 +.delay_slot + 14468 "01000100" // MOVXM p0, #509248 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14469 "10000000" // /* MW 5 */ + 14470 "11001010" // /* MW 4 */ + 14471 "11000000" // /* MW 3 */ + 14472 "00000111" // /* MW 2 */ + 14473 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 594 8 +.delay_slot + 14474 "11111000" // MOV p1, dc0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14475 "10000000" // /* MW 3 */ + 14476 "01100001" // /* MW 2 */ + 14477 "00011001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14479 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14480 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14481 "00000000" // /* MW 15 */ + 14482 "00000000" // /* MW 14 */ + 14483 "01111000" // /* MW 13 */ + 14484 "10100101" // /* MW 12 */ + 14485 "00000001" // /* MW 11 */ + 14486 "00000000" // /* MW 10 */ + 14487 "00000000" // /* MW 9 */ + 14488 "00000000" // /* MW 8 */ + 14489 "01011011" // /* MW 7 */ + 14490 "00000001" // /* MW 6 */ + 14491 "00100000" // /* MW 5 */ + 14492 "00000000" // /* MW 4 */ + 14493 "11110000" // /* MW 3 */ + 14494 "00101100" // /* MW 2 */ + 14495 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 595 38 first +.return_address + 14496 "10111010" // LDA r17, [p6]; MOVXM r16, #509440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14497 "00010000" // /* MW 9 */ + 14498 "00000000" // /* MW 8 */ + 14499 "00001011" // /* MW 7 */ + 14500 "11110010" // /* MW 6 */ + 14501 "00000001" // /* MW 5 */ + 14502 "00000000" // /* MW 4 */ + 14503 "11010000" // /* MW 3 */ + 14504 "11000110" // /* MW 2 */ + 14505 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 621 2 + 14506 "00111100" // LDA p6, [sp, #-28]; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14507 "00100000" // /* MW 5 */ + 14508 "00000000" // /* MW 4 */ + 14509 "00100000" // /* MW 3 */ + 14510 "11100011" // /* MW 2 */ + 14511 "11111100" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_272 + 14512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14513 "00000000" // /* MW 1 */ + 14514 "10000100" // J #14592 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=14592 delay_slots=5 */ + 14515 "00000000" // /* MW 5 */ + 14516 "00000000" // /* MW 4 */ + 14517 "10000000" // /* MW 3 */ + 14518 "00011100" // /* MW 2 */ + 14519 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14521 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14523 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14525 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14527 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 621 2 +.delay_slot + 14528 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV p1, p6; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14529 "00000000" // /* MW 15 */ + 14530 "00000000" // /* MW 14 */ + 14531 "01111000" // /* MW 13 */ + 14532 "01100000" // /* MW 12 */ + 14533 "10110110" // /* MW 11 */ + 14534 "00000000" // /* MW 10 */ + 14535 "00000000" // /* MW 9 */ + 14536 "00000000" // /* MW 8 */ + 14537 "01011011" // /* MW 7 */ + 14538 "00000001" // /* MW 6 */ + 14539 "00100000" // /* MW 5 */ + 14540 "00000000" // /* MW 4 */ + 14541 "11110000" // /* MW 3 */ + 14542 "00101100" // /* MW 2 */ + 14543 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_304 +.src_ref 7 "superkernels.cpp" 599 8 first +.no_stack_arguments + 14544 "00111010" // ST p6, [sp, #-28]; JL #11296 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=11296 delay_slots=5 */ + 14545 "01000001" // /* MW 9 */ + 14546 "00000000" // /* MW 8 */ + 14547 "00000000" // /* MW 7 */ + 14548 "10000100" // /* MW 6 */ + 14549 "00000101" // /* MW 5 */ + 14550 "00000000" // /* MW 4 */ + 14551 "10110000" // /* MW 3 */ + 14552 "11100011" // /* MW 2 */ + 14553 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 600 38 +.delay_slot + 14554 "01000100" // MOVXM p6, #509312 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14555 "00000000" // /* MW 5 */ + 14556 "11001011" // /* MW 4 */ + 14557 "11001100" // /* MW 3 */ + 14558 "00000111" // /* MW 2 */ + 14559 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 599 8 +.delay_slot + 14560 "01000100" // MOVXM p0, #509312 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14561 "00000000" // /* MW 5 */ + 14562 "11001011" // /* MW 4 */ + 14563 "11000000" // /* MW 3 */ + 14564 "00000111" // /* MW 2 */ + 14565 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 599 8 +.delay_slot + 14566 "11111000" // MOV p1, dc0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14567 "10000000" // /* MW 3 */ + 14568 "01100001" // /* MW 2 */ + 14569 "00011001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14571 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14572 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14573 "01100111" // /* MW 3 */ + 14574 "00000001" // /* MW 2 */ + 14575 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 600 38 first +.return_address + 14576 "10111010" // LDA r17, [p6]; MOVXM r16, #509440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14577 "00010000" // /* MW 9 */ + 14578 "00000000" // /* MW 8 */ + 14579 "00001011" // /* MW 7 */ + 14580 "11110010" // /* MW 6 */ + 14581 "00000001" // /* MW 5 */ + 14582 "00000000" // /* MW 4 */ + 14583 "11010000" // /* MW 3 */ + 14584 "11000110" // /* MW 2 */ + 14585 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 621 2 + 14586 "00111100" // LDA p1, [sp, #-28]; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14587 "00100000" // /* MW 5 */ + 14588 "00000000" // /* MW 4 */ + 14589 "00100000" // /* MW 3 */ + 14590 "10010011" // /* MW 2 */ + 14591 "11111100" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_352 + 14592 "10011000" // ADD.NC p3, r16, #11 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14593 "00000101" // /* MW 3 */ + 14594 "01101000" // /* MW 2 */ + 14595 "00011011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 606 35 first +.src_ref 7 "superkernels.cpp" 611 18 + 14596 "10111010" // LDA.u8 r19, [p3], #7; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14597 "00010000" // /* MW 9 */ + 14598 "00101000" // /* MW 8 */ + 14599 "00110010" // /* MW 7 */ + 14600 "11110011" // /* MW 6 */ + 14601 "00000001" // /* MW 5 */ + 14602 "00000000" // /* MW 4 */ + 14603 "01010000" // /* MW 3 */ + 14604 "11001101" // /* MW 2 */ + 14605 "01101111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 611 18 first + 14606 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14607 "01010110" // /* MW 3 */ + 14608 "00000110" // /* MW 2 */ + 14609 "00000110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 607 37 first + 14610 "10011000" // LDA.u16 r21, [p3], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14611 "10111010" // /* MW 3 */ + 14612 "00011110" // /* MW 2 */ + 14613 "00000011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 607 73 + 14614 "10011000" // LDA.u16 r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14615 "00011010" // /* MW 3 */ + 14616 "00000110" // /* MW 2 */ + 14617 "00000011" // /* MW 1 */ + 14618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14619 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 607 110 + 14620 "10011000" // LDA.u16 r20, [p3, #2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14621 "10011010" // /* MW 3 */ + 14622 "00010110" // /* MW 2 */ + 14623 "00000011" // /* MW 1 */ + 14624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14625 "00000000" // /* MW 1 */ + 14626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14627 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 606 19 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 14628 "01000100" // MOVXM p0, #508996 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14629 "10001000" // /* MW 5 */ + 14630 "11001000" // /* MW 4 */ + 14631 "11000000" // /* MW 3 */ + 14632 "00000111" // /* MW 2 */ + 14633 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 607 57 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 14634 "10011000" // MUL r19, r19, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14635 "01011111" // /* MW 3 */ + 14636 "11100111" // /* MW 2 */ + 14637 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 606 19 first +.src_ref 7 "superkernels.cpp" 611 16 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 14638 "00111010" // ST r19, [p0]; MOVXM p2, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14639 "00010001" // /* MW 9 */ + 14640 "00101110" // /* MW 8 */ + 14641 "00110010" // /* MW 7 */ + 14642 "11110001" // /* MW 6 */ + 14643 "00000001" // /* MW 5 */ + 14644 "00000000" // /* MW 4 */ + 14645 "00110000" // /* MW 3 */ + 14646 "11001110" // /* MW 2 */ + 14647 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 607 94 first + 14648 "10011000" // MUL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14649 "00001111" // /* MW 3 */ + 14650 "11100001" // /* MW 2 */ + 14651 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 611 27 first + 14652 "10011000" // MUL r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14653 "00101111" // /* MW 3 */ + 14654 "01100011" // /* MW 2 */ + 14655 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 608 28 first + 14656 "10011000" // MUL r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14657 "00001111" // /* MW 3 */ + 14658 "00100001" // /* MW 2 */ + 14659 "00010101" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 608 13 +.src_ref 7 "superkernels.cpp" 611 16 first + 14660 "01110110" // NOPA; ST r17, [p2]; MOVXM p6, #509024 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 14661 "00010000" // /* MW 11 */ + 14662 "00110000" // /* MW 10 */ + 14663 "00110010" // /* MW 9 */ + 14664 "11110011" // /* MW 8 */ + 14665 "00000001" // /* MW 7 */ + 14666 "10000000" // /* MW 6 */ + 14667 "00110001" // /* MW 5 */ + 14668 "00000110" // /* MW 4 */ + 14669 "11110010" // /* MW 3 */ + 14670 "00101100" // /* MW 2 */ + 14671 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 608 13 first + 14672 "11100001" // NOPA; NOPB; ST r16, [p6]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14673 "00000000" // /* MW 15 */ + 14674 "00000000" // /* MW 14 */ + 14675 "01111000" // /* MW 13 */ + 14676 "10100101" // /* MW 12 */ + 14677 "00000001" // /* MW 11 */ + 14678 "00000000" // /* MW 10 */ + 14679 "00000000" // /* MW 9 */ + 14680 "10000000" // /* MW 8 */ + 14681 "00010001" // /* MW 7 */ + 14682 "00000110" // /* MW 6 */ + 14683 "00100110" // /* MW 5 */ + 14684 "00000000" // /* MW 4 */ + 14685 "11110000" // /* MW 3 */ + 14686 "00101100" // /* MW 2 */ + 14687 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_448 +.src_ref 7 "superkernels.cpp" 614 12 + 14688 "01000100" // MOVXM p0, #509000 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14689 "10010000" // /* MW 5 */ + 14690 "11001000" // /* MW 4 */ + 14691 "11000000" // /* MW 3 */ + 14692 "00000111" // /* MW 2 */ + 14693 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 614 12 first +.src_ref 7 "superkernels.cpp" 616 11 + 14694 "10111010" // LDA r16, [p0]; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14695 "00010000" // /* MW 9 */ + 14696 "00100000" // /* MW 8 */ + 14697 "00110010" // /* MW 7 */ + 14698 "11110001" // /* MW 6 */ + 14699 "00000001" // /* MW 5 */ + 14700 "00000000" // /* MW 4 */ + 14701 "11010000" // /* MW 3 */ + 14702 "11000010" // /* MW 2 */ + 14703 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 615 13 +.src_ref 7 "superkernels.cpp" 616 11 first + 14704 "10111010" // LDA r17, [p2]; MOVXM p6, #509004 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14705 "00010000" // /* MW 9 */ + 14706 "00100110" // /* MW 8 */ + 14707 "00110010" // /* MW 7 */ + 14708 "11110011" // /* MW 6 */ + 14709 "00000001" // /* MW 5 */ + 14710 "00000000" // /* MW 4 */ + 14711 "11010000" // /* MW 3 */ + 14712 "11000110" // /* MW 2 */ + 14713 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 615 13 first + 14714 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14715 "01010110" // /* MW 3 */ + 14716 "00000110" // /* MW 2 */ + 14717 "00000110" // /* MW 1 */ + 14718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14719 "00000000" // /* MW 1 */ + 14720 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14721 "00000000" // /* MW 1 */ + 14722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14723 "00000000" // /* MW 1 */ + 14724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14725 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 618 6 first +.src_ref 7 "superkernels.cpp" 618 17 first + 14726 "10000100" // JNZ r16, #14832 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14832 delay_slots=5 */ + 14727 "00000001" // /* MW 5 */ + 14728 "01000000" // /* MW 4 */ + 14729 "11111000" // /* MW 3 */ + 14730 "00011100" // /* MW 2 */ + 14731 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 614 12 first +.src_ref 7 "superkernels.cpp" 616 11 first +.delay_slot + 14732 "00100100" // ADD r17, r17, #1; ADD.NC r19, r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14733 "00000001" // /* MW 5 */ + 14734 "10110000" // /* MW 4 */ + 14735 "11101001" // /* MW 3 */ + 14736 "01000000" // /* MW 2 */ + 14737 "10001100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 615 13 first +.delay_slot + 14738 "00011000" // ADD r18, r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14739 "00000111" // /* MW 3 */ + 14740 "10100100" // /* MW 2 */ + 14741 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 616 11 first +.delay_slot + 14742 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14743 "00110001" // /* MW 3 */ + 14744 "00000110" // /* MW 2 */ + 14745 "00001010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 615 13 first +.delay_slot + 14746 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14747 "01010001" // /* MW 3 */ + 14748 "00000110" // /* MW 2 */ + 14749 "00001110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 614 12 first +.delay_slot + 14750 "10011000" // ST r19, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14751 "01110001" // /* MW 3 */ + 14752 "00000110" // /* MW 2 */ + 14753 "00001000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 14754 "00011000" // LDA r17, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14755 "00110001" // /* MW 3 */ + 14756 "11110110" // /* MW 2 */ + 14757 "00000111" // /* MW 1 */ + 14758 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14759 "00000000" // /* MW 1 */ + 14760 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14761 "00000000" // /* MW 1 */ + 14762 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14763 "00000000" // /* MW 1 */ + 14764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14765 "00000000" // /* MW 1 */ + 14766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14767 "00000000" // /* MW 1 */ + 14768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14769 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 first + 14770 "00011000" // ADD.NC p6, r17, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14771 "10000110" // /* MW 3 */ + 14772 "01101000" // /* MW 2 */ + 14773 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 14774 "10011000" // LDA r27, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14775 "01110110" // /* MW 3 */ + 14776 "11111111" // /* MW 2 */ + 14777 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 14778 "10011000" // LDA r17, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14779 "00110110" // /* MW 3 */ + 14780 "11111110" // /* MW 2 */ + 14781 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 14782 "10011000" // LDA r18, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14783 "01010110" // /* MW 3 */ + 14784 "11111110" // /* MW 2 */ + 14785 "00000110" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 14786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14787 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 14788 "10011000" // LDA r17, [p6, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14789 "00110110" // /* MW 3 */ + 14790 "01000110" // /* MW 2 */ + 14791 "00000110" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 14792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14793 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 14794 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14795 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 14796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14797 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 14798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14799 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 14800 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14801 "00010010" // /* MW 3 */ + 14802 "10100011" // /* MW 2 */ + 14803 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 +.src_ref 1 "io_buffer_main.h" 395 8 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 14804 "01011100" // ST r17, [p6]; MOVX r16, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14805 "11111010" // /* MW 5 */ + 14806 "11000001" // /* MW 4 */ + 14807 "00111111" // /* MW 3 */ + 14808 "11000110" // /* MW 2 */ + 14809 "11000000" // /* MW 1 */ + 14810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14811 "00000000" // /* MW 1 */ + 14812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14813 "00000000" // /* MW 1 */ + 14814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14815 "00000000" // /* MW 1 */ + 14816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14817 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 14818 "01111110" // NOPA; NOPB; NOPS; ACQ r17, r16; NOPM /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 14819 "01100000" // /* MW 13 */ + 14820 "00101011" // /* MW 12 */ + 14821 "00000000" // /* MW 11 */ + 14822 "10101111" // /* MW 10 */ + 14823 "00110100" // /* MW 9 */ + 14824 "00000000" // /* MW 8 */ + 14825 "00001000" // /* MW 7 */ + 14826 "01010011" // /* MW 6 */ + 14827 "00100100" // /* MW 5 */ + 14828 "00000000" // /* MW 4 */ + 14829 "11110000" // /* MW 3 */ + 14830 "00101100" // /* MW 2 */ + 14831 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_592 + 14832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14833 "00000000" // /* MW 1 */ + 14834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14835 "00000000" // /* MW 1 */ + 14836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14837 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 621 2 +.src_ref 1 "io_buffer_main.h" 125 25 + 14838 "00011000" // LDA p2, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14839 "00011001" // /* MW 3 */ + 14840 "11110101" // /* MW 2 */ + 14841 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 621 2 + 14842 "00011000" // LDA p0, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14843 "00011001" // /* MW 3 */ + 14844 "11101000" // /* MW 2 */ + 14845 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 621 2 first +.no_stack_arguments + 14846 "00000100" // JL #4464 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4464 delay_slots=5 */ + 14847 "00000001" // /* MW 5 */ + 14848 "00000000" // /* MW 4 */ + 14849 "10111000" // /* MW 3 */ + 14850 "00001000" // /* MW 2 */ + 14851 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 621 2 +.delay_slot + 14852 "01000100" // MOVXM p3, #509440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14853 "00000000" // /* MW 5 */ + 14854 "11001100" // /* MW 4 */ + 14855 "11000110" // /* MW 3 */ + 14856 "00000111" // /* MW 2 */ + 14857 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14859 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14861 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14863 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.delay_slot + 14864 "11100001" // NOPA; NOPB; MOVS p6, p2; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14865 "00000000" // /* MW 15 */ + 14866 "00000000" // /* MW 14 */ + 14867 "01111000" // /* MW 13 */ + 14868 "10100101" // /* MW 12 */ + 14869 "00000001" // /* MW 11 */ + 14870 "00000000" // /* MW 10 */ + 14871 "00000000" // /* MW 9 */ + 14872 "00000000" // /* MW 8 */ + 14873 "10001011" // /* MW 7 */ + 14874 "10001000" // /* MW 6 */ + 14875 "00100110" // /* MW 5 */ + 14876 "00000000" // /* MW 4 */ + 14877 "11110000" // /* MW 3 */ + 14878 "00101100" // /* MW 2 */ + 14879 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 623 6 +.src_ref 1 "io_buffer_main.h" 218 49 +.return_address + 14880 "10111010" // LDA r16, [sp, #-16]; MOVXM p1, #509000 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14881 "00010000" // /* MW 9 */ + 14882 "00100100" // /* MW 8 */ + 14883 "10110010" // /* MW 7 */ + 14884 "11110000" // /* MW 6 */ + 14885 "00000001" // /* MW 5 */ + 14886 "00000000" // /* MW 4 */ + 14887 "00100000" // /* MW 3 */ + 14888 "01000010" // /* MW 2 */ + 14889 "11111110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 623 6 first +.src_ref 7 "superkernels.cpp" 623 20 + 14890 "10111010" // LDA r17, [p1]; MOVXM p1, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14891 "00010000" // /* MW 9 */ + 14892 "00100010" // /* MW 8 */ + 14893 "10110010" // /* MW 7 */ + 14894 "11110000" // /* MW 6 */ + 14895 "00000001" // /* MW 5 */ + 14896 "00000000" // /* MW 4 */ + 14897 "11010000" // /* MW 3 */ + 14898 "11000110" // /* MW 2 */ + 14899 "00100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 623 20 + 14900 "10011000" // LDA r18, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14901 "01010110" // /* MW 3 */ + 14902 "00000110" // /* MW 2 */ + 14903 "00000001" // /* MW 1 */ + 14904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14905 "00000000" // /* MW 1 */ + 14906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14907 "00000000" // /* MW 1 */ + 14908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14909 "00000000" // /* MW 1 */ + 14910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14911 "00000000" // /* MW 1 */ + 14912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14913 "00000000" // /* MW 1 */ + 14914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14915 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 623 17 + 14916 "10011000" // NE r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14917 "00101000" // /* MW 3 */ + 14918 "01100011" // /* MW 2 */ + 14919 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 623 6 + 14920 "10000100" // JNZ r17, #15264 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=15264 delay_slots=5 */ + 14921 "00000001" // /* MW 5 */ + 14922 "01000000" // /* MW 4 */ + 14923 "11010000" // /* MW 3 */ + 14924 "00011101" // /* MW 2 */ + 14925 "10001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14927 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14929 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14931 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14933 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14935 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 629 34 +.src_ref 1 "io_buffer_main.h" 218 49 first +.src_ref 1 "io_buffer_main.h" 395 8 + 14936 "10111010" // MOVA dj0, #64; MOVX r17, #-1; ADD.NC p1, r16, #12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14937 "00001000" // /* MW 9 */ + 14938 "00000011" // /* MW 8 */ + 14939 "10110100" // /* MW 7 */ + 14940 "11101000" // /* MW 6 */ + 14941 "00010111" // /* MW 5 */ + 14942 "00111111" // /* MW 4 */ + 14943 "10000000" // /* MW 3 */ + 14944 "00000010" // /* MW 2 */ + 14945 "00001000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 626 52 +.src_ref 1 "io_buffer_main.h" 218 49 + 14946 "10111010" // LDA r27, [p1], #-4; MOVXM p0, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14947 "00010000" // /* MW 9 */ + 14948 "00101110" // /* MW 8 */ + 14949 "00110010" // /* MW 7 */ + 14950 "11110000" // /* MW 6 */ + 14951 "00000001" // /* MW 5 */ + 14952 "00000000" // /* MW 4 */ + 14953 "11010000" // /* MW 3 */ + 14954 "11101110" // /* MW 2 */ + 14955 "00111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 14956 "10011000" // LDA r18, [p1], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14957 "01010110" // /* MW 3 */ + 14958 "11111110" // /* MW 2 */ + 14959 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 14960 "10011000" // LDA r19, [p1], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14961 "01110110" // /* MW 3 */ + 14962 "11111110" // /* MW 2 */ + 14963 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 47 first + 14964 "10011000" // LDA r20, [p1, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14965 "10010110" // /* MW 3 */ + 14966 "01010110" // /* MW 2 */ + 14967 "00000001" // /* MW 1 */ + 14968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14969 "00000000" // /* MW 1 */ + 14970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14971 "00000000" // /* MW 1 */ + 14972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14973 "00000000" // /* MW 1 */ + 14974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14975 "00000000" // /* MW 1 */ + 14976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14977 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 14978 "00011000" // SEL.EQZ r18, r19, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14979 "00100010" // /* MW 3 */ + 14980 "11100101" // /* MW 2 */ + 14981 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 626 50 +.src_ref 7 "superkernels.cpp" 630 3 +.src_ref 1 "io_buffer_main.h" 218 20 + 14982 "01011100" // ST r18, [p1]; MOVX r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14983 "00001010" // /* MW 5 */ + 14984 "01000000" // /* MW 4 */ + 14985 "00110000" // /* MW 3 */ + 14986 "11001010" // /* MW 2 */ + 14987 "00100000" // /* MW 1 */ + 14988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14989 "00000000" // /* MW 1 */ + 14990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14991 "00000000" // /* MW 1 */ + 14992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14993 "00000000" // /* MW 1 */ + 14994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14995 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 14996 "00011000" // ACQ r20, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14997 "00011000" // /* MW 3 */ + 14998 "00010011" // /* MW 2 */ + 14999 "00010101" // /* MW 1 */ + 15000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15001 "00000000" // /* MW 1 */ + 15002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15003 "00000000" // /* MW 1 */ + 15004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15005 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 626 52 first + 15006 "10011000" // LDA r19, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15007 "01110110" // /* MW 3 */ + 15008 "00000110" // /* MW 2 */ + 15009 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 629 34 first + 15010 "10011000" // LDA r18, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15011 "01010110" // /* MW 3 */ + 15012 "00000010" // /* MW 2 */ + 15013 "00000111" // /* MW 1 */ + 15014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15015 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 15016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15017 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.aggressive_scheduled_block_id 4 +.noswbrkpt + 15018 "10011000" // LDA p0, [p6], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15019 "00011110" // /* MW 3 */ + 15020 "01011100" // /* MW 2 */ + 15021 "00000110" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 15022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15023 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 15024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15025 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 626 50 first +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 1 "io_buffer_main.h" 324 32 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 15026 "10111010" // LDA r17, [p1], #16; LSHL r19, r19, r16; MOV p0, p1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15027 "01111000" // /* MW 9 */ + 15028 "01100000" // /* MW 8 */ + 15029 "00110001" // /* MW 7 */ + 15030 "01101100" // /* MW 6 */ + 15031 "00111000" // /* MW 5 */ + 15032 "00100111" // /* MW 4 */ + 15033 "11010000" // /* MW 3 */ + 15034 "11000110" // /* MW 2 */ + 15035 "00101001" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 630 3 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 15036 "10011000" // EQ r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15037 "00000111" // /* MW 3 */ + 15038 "10100001" // /* MW 2 */ + 15039 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 630 3 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 15040 "10000100" // JNZ r16, #15120 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=15120 delay_slots=5 */ + 15041 "00000001" // /* MW 5 */ + 15042 "01000000" // /* MW 4 */ + 15043 "10001000" // /* MW 3 */ + 15044 "00011101" // /* MW 2 */ + 15045 "10000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 32 +.delay_slot +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 15046 "00011000" // MOVS p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15047 "10001011" // /* MW 3 */ + 15048 "10000000" // /* MW 2 */ + 15049 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15051 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15053 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15055 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 626 50 first +.delay_slot + 15056 "00000010" // ST p1, [sp, #-16]; ADD.NC p1, r19, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 15057 "10100000" // /* MW 7 */ + 15058 "11100010" // /* MW 6 */ + 15059 "10110100" // /* MW 5 */ + 15060 "00000000" // /* MW 4 */ + 15061 "10110000" // /* MW 3 */ + 15062 "00010011" // /* MW 2 */ + 15063 "11111110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 630 3 first + 15064 "10000100" // JNZ r18, #15152 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=15152 delay_slots=5 */ + 15065 "00000001" // /* MW 5 */ + 15066 "01000000" // /* MW 4 */ + 15067 "10011000" // /* MW 3 */ + 15068 "00011101" // /* MW 2 */ + 15069 "10010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15071 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15073 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15075 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15077 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15078 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15079 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 633 8 first +.no_stack_arguments + 15080 "00000100" // JL #11248 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11248 delay_slots=5 */ + 15081 "00000001" // /* MW 5 */ + 15082 "00000000" // /* MW 4 */ + 15083 "11111000" // /* MW 3 */ + 15084 "00010101" // /* MW 2 */ + 15085 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 633 8 +.delay_slot + 15086 "01000100" // MOVXM p3, #509248 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15087 "10000000" // /* MW 5 */ + 15088 "11001010" // /* MW 4 */ + 15089 "11000110" // /* MW 3 */ + 15090 "00000111" // /* MW 2 */ + 15091 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15093 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15095 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15097 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 633 8 +.delay_slot + 15098 "11010100" // NOPA; MOV p2, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15099 "10000001" // /* MW 5 */ + 15100 "11000001" // /* MW 4 */ + 15101 "11110100" // /* MW 3 */ + 15102 "00101100" // /* MW 2 */ + 15103 "00000000" // /* MW 1 */ +.return_address + 15104 "10000100" // J #15152 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=15152 delay_slots=5 */ + 15105 "00000000" // /* MW 5 */ + 15106 "00000000" // /* MW 4 */ + 15107 "10011000" // /* MW 3 */ + 15108 "00011101" // /* MW 2 */ + 15109 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15111 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15113 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15115 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15117 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15119 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_880 +.src_ref 7 "superkernels.cpp" 637 8 first +.no_stack_arguments + 15120 "00000100" // JL #11440 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11440 delay_slots=5 */ + 15121 "00000001" // /* MW 5 */ + 15122 "00000000" // /* MW 4 */ + 15123 "01011000" // /* MW 3 */ + 15124 "00010110" // /* MW 2 */ + 15125 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 637 8 +.delay_slot + 15126 "01000100" // MOVXM p3, #509312 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15127 "00000000" // /* MW 5 */ + 15128 "11001011" // /* MW 4 */ + 15129 "11000110" // /* MW 3 */ + 15130 "00000111" // /* MW 2 */ + 15131 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 637 8 +.delay_slot + 15132 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15133 "11000000" // /* MW 3 */ + 15134 "01100000" // /* MW 2 */ + 15135 "00011010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15137 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15139 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15140 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 15141 "10000001" // /* MW 11 */ + 15142 "10101101" // /* MW 10 */ + 15143 "00000000" // /* MW 9 */ + 15144 "00000000" // /* MW 8 */ + 15145 "00000000" // /* MW 7 */ + 15146 "00000000" // /* MW 6 */ + 15147 "00100000" // /* MW 5 */ + 15148 "00000000" // /* MW 4 */ + 15149 "11110000" // /* MW 3 */ + 15150 "00101100" // /* MW 2 */ + 15151 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_912 +.src_ref 1 "io_buffer_main.h" 327 28 +.src_ref 1 "io_buffer_main.h" 327 40 +.return_address + 15152 "00011000" // LDA p1, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15153 "10011001" // /* MW 3 */ + 15154 "11110000" // /* MW 2 */ + 15155 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 + 15156 "00101100" // LDA p0, [sp, #-12]; MOVX r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15157 "00001010" // /* MW 5 */ + 15158 "01000100" // /* MW 4 */ + 15159 "00100000" // /* MW 3 */ + 15160 "10000011" // /* MW 2 */ + 15161 "11111110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 645 15 +.src_ref 1 "io_buffer_main.h" 324 32 first + 15162 "10111010" // LDA r16, [p7, #16]; MOVXM p7, #509000 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15163 "00010000" // /* MW 9 */ + 15164 "00100100" // /* MW 8 */ + 15165 "10110010" // /* MW 7 */ + 15166 "11110011" // /* MW 6 */ + 15167 "00000001" // /* MW 5 */ + 15168 "00000000" // /* MW 4 */ + 15169 "11010000" // /* MW 3 */ + 15170 "11000010" // /* MW 2 */ + 15171 "11101000" // /* MW 1 */ + 15172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15173 "00000000" // /* MW 1 */ + 15174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15175 "00000000" // /* MW 1 */ + 15176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15177 "00000000" // /* MW 1 */ + 15178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15179 "00000000" // /* MW 1 */ + 15180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15181 "00000000" // /* MW 1 */ + 15182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15183 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 15184 "00011000" // REL r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15185 "00011000" // /* MW 3 */ + 15186 "00010001" // /* MW 2 */ + 15187 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 40 first + 15188 "10011000" // LDA r18, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15189 "01010110" // /* MW 3 */ + 15190 "11110110" // /* MW 2 */ + 15191 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 first + 15192 "10011000" // LDA r16, [p0, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15193 "00010110" // /* MW 3 */ + 15194 "01010110" // /* MW 2 */ + 15195 "00000000" // /* MW 1 */ + 15196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15197 "00000000" // /* MW 1 */ + 15198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15199 "00000000" // /* MW 1 */ + 15200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15201 "00000000" // /* MW 1 */ + 15202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15203 "00000000" // /* MW 1 */ + 15204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15205 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 15206 "10011000" // SUB r18, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15207 "00100001" // /* MW 3 */ + 15208 "01100101" // /* MW 2 */ + 15209 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 15210 "10011000" // ST r18, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15211 "01010001" // /* MW 3 */ + 15212 "11110110" // /* MW 2 */ + 15213 "00001001" // /* MW 1 */ + 15214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15215 "00000000" // /* MW 1 */ + 15216 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15217 "00000000" // /* MW 1 */ + 15218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15219 "00000000" // /* MW 1 */ + 15220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15221 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 15222 "00011000" // REL r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15223 "00011000" // /* MW 3 */ + 15224 "00010001" // /* MW 2 */ + 15225 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 40 first + 15226 "10011000" // LDA r18, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15227 "01010110" // /* MW 3 */ + 15228 "11100110" // /* MW 2 */ + 15229 "00000110" // /* MW 1 */ + 15230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15231 "00000000" // /* MW 1 */ + 15232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15233 "00000000" // /* MW 1 */ + 15234 "10000100" // J #15280 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=15280 delay_slots=5 */ + 15235 "00000000" // /* MW 5 */ + 15236 "00000000" // /* MW 4 */ + 15237 "11011000" // /* MW 3 */ + 15238 "00011101" // /* MW 2 */ + 15239 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15240 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15241 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15243 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 645 15 +.src_ref 7 "superkernels.cpp" 649 14 +.delay_slot + 15244 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15245 "00000001" // /* MW 3 */ + 15246 "00100000" // /* MW 2 */ + 15247 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 645 15 first +.src_ref 1 "io_buffer_main.h" 327 32 +.delay_slot + 15248 "01011100" // ST r16, [p7]; SUB r17, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15249 "01000011" // /* MW 5 */ + 15250 "11000110" // /* MW 4 */ + 15251 "00111000" // /* MW 3 */ + 15252 "11000010" // /* MW 2 */ + 15253 "11100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 first +.delay_slot + 15254 "01111010" // NOPA; ST r17, [p6, #-8]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15255 "00000000" // /* MW 9 */ + 15256 "00000000" // /* MW 8 */ + 15257 "00000000" // /* MW 7 */ + 15258 "10000000" // /* MW 6 */ + 15259 "00110001" // /* MW 5 */ + 15260 "11100110" // /* MW 4 */ + 15261 "11110110" // /* MW 3 */ + 15262 "00101100" // /* MW 2 */ + 15263 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_1024 +.src_ref 7 "superkernels.cpp" 649 14 + 15264 "11100001" // NOPA; NOPB; NOPS; MOVX r16, #0; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 15265 "00000000" // /* MW 15 */ + 15266 "00000000" // /* MW 14 */ + 15267 "01111000" // /* MW 13 */ + 15268 "10100101" // /* MW 12 */ + 15269 "00000001" // /* MW 11 */ + 15270 "00001000" // /* MW 10 */ + 15271 "00000000" // /* MW 9 */ + 15272 "00000001" // /* MW 8 */ + 15273 "01011011" // /* MW 7 */ + 15274 "00000001" // /* MW 6 */ + 15275 "00100000" // /* MW 5 */ + 15276 "00000000" // /* MW 4 */ + 15277 "11110000" // /* MW 3 */ + 15278 "00101100" // /* MW 2 */ + 15279 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_1040 +.src_ref 7 "superkernels.cpp" 648 19 +.src_ref 7 "superkernels.cpp" 651 + 15280 "10111010" // LDA lr, [sp, #-20]; MOVXM p7, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15281 "00010000" // /* MW 9 */ + 15282 "00110000" // /* MW 8 */ + 15283 "10110010" // /* MW 7 */ + 15284 "11110011" // /* MW 6 */ + 15285 "00000001" // /* MW 5 */ + 15286 "00000000" // /* MW 4 */ + 15287 "00100000" // /* MW 3 */ + 15288 "10000111" // /* MW 2 */ + 15289 "11111101" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 648 6 +.src_ref 7 "superkernels.cpp" 648 19 first +.src_ref 7 "superkernels.cpp" 649 14 + 15290 "10111010" // LDA r18, [p7]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15291 "00010000" // /* MW 9 */ + 15292 "00100000" // /* MW 8 */ + 15293 "00110010" // /* MW 7 */ + 15294 "11110011" // /* MW 6 */ + 15295 "00000001" // /* MW 5 */ + 15296 "00000000" // /* MW 4 */ + 15297 "11010000" // /* MW 3 */ + 15298 "11001010" // /* MW 2 */ + 15299 "11100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 648 6 + 15300 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15301 "00110110" // /* MW 3 */ + 15302 "00000110" // /* MW 2 */ + 15303 "00000110" // /* MW 1 */ + 15304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15305 "00000000" // /* MW 1 */ + 15306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15307 "00000000" // /* MW 1 */ + 15308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15309 "00000000" // /* MW 1 */ + 15310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15311 "00000000" // /* MW 1 */ + 15312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15313 "00000000" // /* MW 1 */ + 15314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15315 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 648 16 + 15316 "10011000" // NE r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15317 "00101000" // /* MW 3 */ + 15318 "01100011" // /* MW 2 */ + 15319 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 648 6 + 15320 "10000100" // JNZ r17, #15344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=15344 delay_slots=5 */ + 15321 "00000001" // /* MW 5 */ + 15322 "01000000" // /* MW 4 */ + 15323 "11111000" // /* MW 3 */ + 15324 "00011101" // /* MW 2 */ + 15325 "10001000" // /* MW 1 */ +.delay_slot + 15326 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15327 "10011001" // /* MW 3 */ + 15328 "11111011" // /* MW 2 */ + 15329 "00000111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15331 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15333 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15335 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15337 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 649 14 first + 15338 "00001100" // NOPA; ST r16, [p6] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15339 "00100011" // /* MW 5 */ + 15340 "00001100" // /* MW 4 */ + 15341 "11111100" // /* MW 3 */ + 15342 "00101100" // /* MW 2 */ + 15343 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_1104 + 15344 "00011000" // LDA p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15345 "00011001" // /* MW 3 */ + 15346 "11111111" // /* MW 2 */ + 15347 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 651 first + 15348 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 15349 "00000000" // /* MW 3 */ + 15350 "00101000" // /* MW 2 */ + 15351 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 651 +.delay_slot + 15352 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15353 "00000001" // /* MW 5 */ + 15354 "00000000" // /* MW 4 */ + 15355 "00000000" // /* MW 3 */ + 15356 "11111000" // /* MW 2 */ + 15357 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15359 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15361 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15363 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE__end +.label __Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE___func_end0 + 15365 "00000000" // /* MW 1 */ +.label __Z13_b896_wrapperPPv___func_begin0 +.label _Z13_b896_wrapperPPv +.function _b896_wrapper _Z13_b896_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 21 first +.src_ref 0 "0_0_reloadable5.cc" 23 79 +.function_start + 15376 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15377 "11000000" // /* MW 3 */ + 15378 "01100000" // /* MW 2 */ + 15379 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 23 79 first + 15380 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15381 "00011110" // /* MW 3 */ + 15382 "00011100" // /* MW 2 */ + 15383 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 24 79 first + 15384 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15385 "10011110" // /* MW 3 */ + 15386 "00101100" // /* MW 2 */ + 15387 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 26 81 first + 15388 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15389 "10011110" // /* MW 3 */ + 15390 "11110101" // /* MW 2 */ + 15391 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 25 47 first + 15392 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15393 "00011110" // /* MW 3 */ + 15394 "00000101" // /* MW 2 */ + 15395 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 22 4 first +.tail_call + 15396 "10000100" // J #6880 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=6880 delay_slots=5 */ + 15397 "00000000" // /* MW 5 */ + 15398 "00000000" // /* MW 4 */ + 15399 "01110000" // /* MW 3 */ + 15400 "00001101" // /* MW 2 */ + 15401 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15403 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15405 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15407 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15409 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b896_wrapperPPv__end +.label __Z13_b896_wrapperPPv___func_end0 + 15411 "00000000" // /* MW 1 */ +.label __Z13_b901_wrapperPPv___func_begin0 +.label _Z13_b901_wrapperPPv +.function _b901_wrapper _Z13_b901_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 30 first +.src_ref 0 "0_0_reloadable5.cc" 32 79 +.function_start + 15424 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15425 "11000000" // /* MW 3 */ + 15426 "01100000" // /* MW 2 */ + 15427 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 32 79 first + 15428 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15429 "00011110" // /* MW 3 */ + 15430 "00101100" // /* MW 2 */ + 15431 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 34 81 first + 15432 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15433 "00011110" // /* MW 3 */ + 15434 "11110101" // /* MW 2 */ + 15435 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 33 47 first + 15436 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15437 "10011110" // /* MW 3 */ + 15438 "00000100" // /* MW 2 */ + 15439 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 31 4 first +.tail_call + 15440 "10000100" // J #8240 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=8240 delay_slots=5 */ + 15441 "00000000" // /* MW 5 */ + 15442 "00000000" // /* MW 4 */ + 15443 "00011000" // /* MW 3 */ + 15444 "00010000" // /* MW 2 */ + 15445 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15447 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15449 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15451 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15453 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b901_wrapperPPv__end +.label __Z13_b901_wrapperPPv___func_end0 + 15455 "00000000" // /* MW 1 */ +.label __Z13_b906_wrapperPPv___func_begin0 +.label _Z13_b906_wrapperPPv +.function _b906_wrapper _Z13_b906_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 38 first +.src_ref 0 "0_0_reloadable5.cc" 40 79 +.function_start + 15456 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15457 "11000000" // /* MW 3 */ + 15458 "01100000" // /* MW 2 */ + 15459 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 40 79 first + 15460 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15461 "00011110" // /* MW 3 */ + 15462 "00101100" // /* MW 2 */ + 15463 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 42 81 first + 15464 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15465 "00011110" // /* MW 3 */ + 15466 "11110101" // /* MW 2 */ + 15467 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 41 47 first + 15468 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15469 "10011110" // /* MW 3 */ + 15470 "00000100" // /* MW 2 */ + 15471 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 39 4 first +.tail_call + 15472 "10000100" // J #9104 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=9104 delay_slots=5 */ + 15473 "00000000" // /* MW 5 */ + 15474 "00000000" // /* MW 4 */ + 15475 "11001000" // /* MW 3 */ + 15476 "00010001" // /* MW 2 */ + 15477 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15479 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15481 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15483 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15485 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b906_wrapperPPv__end +.label __Z13_b906_wrapperPPv___func_end0 + 15487 "00000000" // /* MW 1 */ +.label __Z13_b881_wrapperPPv___func_begin0 +.label _Z13_b881_wrapperPPv +.function _b881_wrapper _Z13_b881_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 46 first +.src_ref 0 "0_0_reloadable5.cc" 48 79 +.function_start + 15488 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15489 "11000000" // /* MW 3 */ + 15490 "01100000" // /* MW 2 */ + 15491 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 48 79 first + 15492 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15493 "00011110" // /* MW 3 */ + 15494 "00101100" // /* MW 2 */ + 15495 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 50 81 first + 15496 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15497 "00011110" // /* MW 3 */ + 15498 "11110101" // /* MW 2 */ + 15499 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 49 47 first + 15500 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15501 "10011110" // /* MW 3 */ + 15502 "00000100" // /* MW 2 */ + 15503 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 47 4 first +.tail_call + 15504 "10000100" // J #10512 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10512 delay_slots=5 */ + 15505 "00000000" // /* MW 5 */ + 15506 "00000000" // /* MW 4 */ + 15507 "10001000" // /* MW 3 */ + 15508 "00010100" // /* MW 2 */ + 15509 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15511 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15513 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15515 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15517 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b881_wrapperPPv__end +.label __Z13_b881_wrapperPPv___func_end0 + 15519 "00000000" // /* MW 1 */ +.label __Z13_b891_wrapperPPv___func_begin0 +.label _Z13_b891_wrapperPPv +.function _b891_wrapper _Z13_b891_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 54 first +.src_ref 0 "0_0_reloadable5.cc" 56 79 +.function_start + 15520 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15521 "11000000" // /* MW 3 */ + 15522 "01100000" // /* MW 2 */ + 15523 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 56 79 first + 15524 "10011000" // LDA p0, [p2], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15525 "00011110" // /* MW 3 */ + 15526 "00111100" // /* MW 2 */ + 15527 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 57 47 first + 15528 "10011000" // LDA p1, [p2], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15529 "10011110" // /* MW 3 */ + 15530 "11101100" // /* MW 2 */ + 15531 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 59 81 first + 15532 "10011000" // LDA p3, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15533 "10011110" // /* MW 3 */ + 15534 "00010101" // /* MW 2 */ + 15535 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 58 80 first + 15536 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15537 "00011110" // /* MW 3 */ + 15538 "00000101" // /* MW 2 */ + 15539 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 55 4 first +.tail_call + 15540 "10000100" // J #11744 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=11744 delay_slots=5 */ + 15541 "00000000" // /* MW 5 */ + 15542 "00000000" // /* MW 4 */ + 15543 "11110000" // /* MW 3 */ + 15544 "00010110" // /* MW 2 */ + 15545 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15547 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15549 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15551 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15552 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15553 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b891_wrapperPPv__end +.label __Z13_b891_wrapperPPv___func_end0 + 15555 "00000000" // /* MW 1 */ +.label __Z13_b924_wrapperPPv___func_begin0 +.label _Z13_b924_wrapperPPv +.function _b924_wrapper _Z13_b924_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 63 first +.src_ref 0 "0_0_reloadable5.cc" 65 79 +.function_start + 15568 "11111000" // MOV p3, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15569 "11000000" // /* MW 3 */ + 15570 "01100000" // /* MW 2 */ + 15571 "00011011" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 65 79 first + 15572 "10011000" // LDA p0, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15573 "00011110" // /* MW 3 */ + 15574 "00011100" // /* MW 2 */ + 15575 "00000011" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 66 79 first + 15576 "10011000" // LDA p1, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15577 "10011110" // /* MW 3 */ + 15578 "00011100" // /* MW 2 */ + 15579 "00000011" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 67 80 first + 15580 "10011000" // LDA p2, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15581 "00011110" // /* MW 3 */ + 15582 "00101101" // /* MW 2 */ + 15583 "00000011" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 69 81 first + 15584 "10011000" // LDA p4, [p3, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15585 "00011110" // /* MW 3 */ + 15586 "11110110" // /* MW 2 */ + 15587 "00000011" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 68 47 first + 15588 "10011000" // LDA p3, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15589 "10011110" // /* MW 3 */ + 15590 "00000101" // /* MW 2 */ + 15591 "00000011" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 64 4 first +.tail_call + 15592 "10000100" // J #14240 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=14240 delay_slots=5 */ + 15593 "00000000" // /* MW 5 */ + 15594 "00000000" // /* MW 4 */ + 15595 "11010000" // /* MW 3 */ + 15596 "00011011" // /* MW 2 */ + 15597 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15598 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15599 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15601 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15603 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15605 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b924_wrapperPPv__end +.label __Z13_b924_wrapperPPv___func_end0 + 15607 "00000000" // /* MW 1 */ +.label __Z13_b919_wrapperPPv___func_begin0 +.label _Z13_b919_wrapperPPv +.function _b919_wrapper _Z13_b919_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 73 first +.src_ref 0 "0_0_reloadable5.cc" 75 79 +.function_start + 15616 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15617 "11000000" // /* MW 3 */ + 15618 "01100000" // /* MW 2 */ + 15619 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 75 79 first + 15620 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15621 "00011110" // /* MW 3 */ + 15622 "00011100" // /* MW 2 */ + 15623 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 76 79 first + 15624 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15625 "10011110" // /* MW 3 */ + 15626 "00101100" // /* MW 2 */ + 15627 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 78 81 first + 15628 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15629 "10011110" // /* MW 3 */ + 15630 "11110101" // /* MW 2 */ + 15631 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 77 47 first + 15632 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15633 "00011110" // /* MW 3 */ + 15634 "00000101" // /* MW 2 */ + 15635 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 74 4 first +.tail_call + 15636 "10000100" // J #13760 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=13760 delay_slots=5 */ + 15637 "00000000" // /* MW 5 */ + 15638 "00000000" // /* MW 4 */ + 15639 "11100000" // /* MW 3 */ + 15640 "00011010" // /* MW 2 */ + 15641 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15643 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15645 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15647 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15649 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b919_wrapperPPv__end +.label __Z13_b919_wrapperPPv___func_end0 + 15651 "00000000" // /* MW 1 */ +.label _ZN12me_primitive10udiv_dstepEjjRjS0_ +.function udiv_dstep _ZN12me_primitive10udiv_dstepEjjRjS0_ +.src_ref 10 "me_div.c" 108 19 +.src_ref 10 "me_div.c" 108 19 +.src_ref 10 "me_div.c" 115 4 first +.function_start + 15664 "11100100" // MOVX r3, #0; MOV r31, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15665 "01000001" // /* MW 5 */ + 15666 "10100000" // /* MW 4 */ + 15667 "00101111" // /* MW 3 */ + 15668 "11000000" // /* MW 2 */ + 15669 "00000000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15670 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15671 "00011100" // /* MW 3 */ + 15672 "11000110" // /* MW 2 */ + 15673 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15674 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15675 "00011100" // /* MW 3 */ + 15676 "11000110" // /* MW 2 */ + 15677 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15678 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15679 "00011100" // /* MW 3 */ + 15680 "11000110" // /* MW 2 */ + 15681 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15682 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15683 "00011100" // /* MW 3 */ + 15684 "11000110" // /* MW 2 */ + 15685 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15686 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15687 "00011100" // /* MW 3 */ + 15688 "11000110" // /* MW 2 */ + 15689 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15690 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15691 "00011100" // /* MW 3 */ + 15692 "11000110" // /* MW 2 */ + 15693 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15694 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15695 "00011100" // /* MW 3 */ + 15696 "11000110" // /* MW 2 */ + 15697 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15698 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15699 "00011100" // /* MW 3 */ + 15700 "11000110" // /* MW 2 */ + 15701 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15702 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15703 "00011100" // /* MW 3 */ + 15704 "11000110" // /* MW 2 */ + 15705 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15706 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15707 "00011100" // /* MW 3 */ + 15708 "11000110" // /* MW 2 */ + 15709 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15710 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15711 "00011100" // /* MW 3 */ + 15712 "11000110" // /* MW 2 */ + 15713 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15714 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15715 "00011100" // /* MW 3 */ + 15716 "11000110" // /* MW 2 */ + 15717 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15718 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15719 "00011100" // /* MW 3 */ + 15720 "11000110" // /* MW 2 */ + 15721 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15722 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15723 "00011100" // /* MW 3 */ + 15724 "11000110" // /* MW 2 */ + 15725 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15726 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15727 "00011100" // /* MW 3 */ + 15728 "11000110" // /* MW 2 */ + 15729 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15730 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15731 "00011100" // /* MW 3 */ + 15732 "11000110" // /* MW 2 */ + 15733 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15734 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15735 "00011100" // /* MW 3 */ + 15736 "11000110" // /* MW 2 */ + 15737 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15738 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15739 "00011100" // /* MW 3 */ + 15740 "11000110" // /* MW 2 */ + 15741 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15742 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15743 "00011100" // /* MW 3 */ + 15744 "11000110" // /* MW 2 */ + 15745 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15746 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15747 "00011100" // /* MW 3 */ + 15748 "11000110" // /* MW 2 */ + 15749 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15750 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15751 "00011100" // /* MW 3 */ + 15752 "11000110" // /* MW 2 */ + 15753 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15754 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15755 "00011100" // /* MW 3 */ + 15756 "11000110" // /* MW 2 */ + 15757 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15758 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15759 "00011100" // /* MW 3 */ + 15760 "11000110" // /* MW 2 */ + 15761 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15762 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15763 "00011100" // /* MW 3 */ + 15764 "11000110" // /* MW 2 */ + 15765 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15766 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15767 "00011100" // /* MW 3 */ + 15768 "11000110" // /* MW 2 */ + 15769 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15770 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15771 "00011100" // /* MW 3 */ + 15772 "11000110" // /* MW 2 */ + 15773 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15774 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15775 "00011100" // /* MW 3 */ + 15776 "11000110" // /* MW 2 */ + 15777 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15778 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15779 "00011100" // /* MW 3 */ + 15780 "11000110" // /* MW 2 */ + 15781 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 119 first + 15782 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 15783 "00000000" // /* MW 3 */ + 15784 "00101000" // /* MW 2 */ + 15785 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 first +.delay_slot + 15786 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15787 "00011100" // /* MW 3 */ + 15788 "11000110" // /* MW 2 */ + 15789 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 +.delay_slot + 15790 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15791 "00011100" // /* MW 3 */ + 15792 "11000110" // /* MW 2 */ + 15793 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 +.delay_slot + 15794 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15795 "00011100" // /* MW 3 */ + 15796 "11000110" // /* MW 2 */ + 15797 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 +.delay_slot + 15798 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15799 "00011100" // /* MW 3 */ + 15800 "11000110" // /* MW 2 */ + 15801 "00010000" // /* MW 1 */ +.delay_slot + 15802 "11111000" // MOV r2, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15803 "10100000" // /* MW 3 */ + 15804 "10011111" // /* MW 2 */ +.label _ZN12me_primitive10udiv_dstepEjjRjS0___end + 15805 "00011000" // /* MW 1 */ +.dir 0 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src" +.dir 1 "/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer" +.dir 2 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/conv" +.dir 3 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common" +.dir 4 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2" +.dir 5 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p" +.dir 6 "/usr/local/lib/python3.10/dist-packages/data/aie2p/lib" +.dir 7 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend" +.dir 8 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc" +.dir 9 "/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/detail" +.dir 10 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21708/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib" diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable7/Release/0_2_reloadable7.cmico b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable7/Release/0_2_reloadable7.cmico new file mode 100644 index 0000000000000000000000000000000000000000..f377058758269f564988080a1597f499edc1b997 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable7/Release/0_2_reloadable7.cmico @@ -0,0 +1 @@ ++Mdec diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable7/Release/0_2_reloadable7.lst b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable7/Release/0_2_reloadable7.lst new file mode 100644 index 0000000000000000000000000000000000000000..4a0bb9c3b02d8c2df3b5faeb6f4b950508fce7fd --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable7/Release/0_2_reloadable7.lst @@ -0,0 +1,5518 @@ + +// File generated by darts version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:49:22 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// darts -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -d -h -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable5 me + +// Release: ipp V-2024.06-TGT-241219 + +.text_segment PM 2352 +.entry_point +.label __Z13kernelWrapperPPvjjjj___func_begin0 +.label _Z13kernelWrapperPPvjjjj +.function_start + 2352 0x00 0xc6 0xd1 0x21 0x41 0xd4 LDA r17, [p0]; MOV r2, r1 + 2358 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 2364 0xfe 0xf3 0xb0 0x00 0x2b 0xd0 0x70 0x02 ST p7, [sp, #-12]; MOV r1, r15 + 2372 0xff 0x87 0xb0 0x01 0xe8 0x90 0x70 0x02 ST lr, [sp, #-4]; MOV r15, r2 + 2380 0xff 0x06 0xb7 0xc1 0xe0 0x5c ST r1, [sp, #-8]; NEZ r16, r15 + 2386 0x1e 0x98 0x20 0xf8 MOV r26, r16 + 2390 0x00 0x00 NOPX + 2392 0x1f 0x68 0x82 0x18 ADD.NC p7, r17, #4 + 2396 0x07 0x1e 0x36 0x98 LDA r17, [p7], #4 + 2400 0x07 0x3e 0x76 0x98 LDA r19, [p7], #12 + 2404 0x07 0xee 0x56 0x98 LDA r18, [p7], #-8 + 2408 0x07 0x07 0x76 0x98 LDA r27, [p7] + 2412 0x00 0x00 NOPX + 2414 0x00 0x00 NOPX + 2416 0x00 0x00 NOPX + 2418 0x00 0x00 NOPX + 2420 0x00 0x00 NOPX + 2422 0x00 0x00 NOPX + 2424 0x14 0x63 0x32 0x18 SEL.EQZ r17, r17, r19, r27 + 2428 0x0f 0xd6 0x31 0x98 ST r17, [p7, #-12] + 2432 0x17 0xe2 0xfd 0x18 MOVX r17, #-1 + 2436 0x00 0x00 NOPX + 2438 0x00 0x00 NOPX + 2440 0x00 0x00 NOPX + 2442 0x14 0x97 0x18 0x18 ACQ.COND r18, r17, r26 + 2446 0x10 0x24 0x09 0x18 MOVX r18, #2 + 2450 0x14 0x29 0x2d 0x98 LSHL r20, r16, r18 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 2454 0x18 0x8a 0x20 0xf8 MOV dj0, r20 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 2458 0x00 0x4e 0xdf 0xd8 0x8b 0x0c LDA r19, [p0, dj0]; ST dj0, [sp, #-20] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2464 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2466 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2468 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2470 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2472 0x10 0x26 0x05 0x18 MOVX r19, #1 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2476 0x14 0xf4 0xfc 0x98 LTU r26, r19, r15 + 2480 0xfe 0x6a 0xb0 0x03 0xb4 0xc1 0x00 0x02 ST r26, [sp, #-16]; ADD.NC p7, r19, #4 + 2488 0x07 0x1e 0x76 0x98 LDA r19, [p7], #4 + 2492 0x07 0x3e 0xb6 0x98 LDA r21, [p7], #12 + 2496 0x07 0xee 0x96 0x98 LDA r20, [p7], #-8 + 2500 0x07 0x07 0x76 0x98 LDA r27, [p7] + 2504 0x00 0x00 NOPX + 2506 0x00 0x00 NOPX + 2508 0x00 0x00 NOPX + 2510 0x00 0x00 NOPX + 2512 0x00 0x00 NOPX + 2514 0x00 0x00 NOPX + 2516 0x14 0xe7 0x52 0x18 SEL.EQZ r19, r19, r21, r27 + 2520 0x0f 0xd6 0x71 0x98 ST r19, [p7, #-12] + 2524 0x00 0x00 NOPX + 2526 0x00 0x00 NOPX + 2528 0x00 0x00 NOPX + 2530 0x00 0x00 NOPX + 2532 0x15 0x17 0x18 0x18 ACQ.COND r20, r17, r26 + 2536 0x10 0x23 0x2d 0x98 LSHL r17, r0, r18 + 2540 0x18 0x88 0xa0 0xf8 MOV dj0, r17 + 2544 0x00 0x07 0xce 0xc9 0x00 0x44 MOVXM p7, #509056 + 2550 0xe0 0x13 0xdf 0xd4 0x2b 0x0c LDA p1, [p7, dj0]; ST r16, [sp, #-24] + 2556 0x00 0x00 NOPX + 2558 0x00 0x00 NOPX + 2560 0x00 0x00 NOPX + 2562 0x00 0x00 NOPX + 2564 0x00 0x00 NOPX + 2566 0x00 0x00 NOPX +.no_stack_arguments + 2568 0x10 0x30 0x40 0x18 JL p1 +.delay_slot + 2572 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.delay_slot +.swstall delay_slot + 2576 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2578 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2580 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2582 0x00 0x2c 0xf0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba NOPA; NOPB; NOPM +.return_address + 2592 0xe0 0xc6 0xd0 0x40 0x0a 0x2c LDA r17, [p7]; MOVX r16, #1 + 2598 0x07 0xeb 0x51 0x18 LDA r26, [sp, #-24] + 2602 0x07 0xec 0x41 0x18 LDA dj0, [sp, #-20] + 2606 0x07 0xf0 0x29 0x18 LDA el0, [sp, #-16] + 2610 0x00 0x00 NOPX + 2612 0x00 0x00 NOPX + 2614 0x00 0x00 NOPX + 2616 0x19 0x68 0x88 0x18 ADD.NC p1, r17, #16 + 2620 0x01 0x06 0x36 0x98 LDA r17, [p1] + 2624 0x00 0x00 NOPX + 2626 0x00 0x00 NOPX + 2628 0x00 0x00 NOPX + 2630 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 2632 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.noswbrkpt + 2634 0x1e 0xa0 0x1c 0xf8 MOV r26, el0 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2638 0x14 0x55 0x08 0x18 REL.COND r17, r16, r26 + 2642 0x3e 0xc6 0xdd 0xaf 0x41 0xd4 LDA r17, [p1, #-4]; MOV r27, r15 + 2648 0x07 0x02 0x56 0x98 LDA r18, [p7, dj0] + 2652 0x00 0x00 NOPX + 2654 0x00 0x00 NOPX + 2656 0x00 0x00 NOPX + 2658 0x00 0x00 NOPX + 2660 0x00 0x00 NOPX + 2662 0x14 0x27 0x11 0x98 SUB r19, r16, r17 + 2666 0x8c 0x66 0x40 0xd2 0x10 0x24 SEL.EQZ r17, r17, r19, r27; ADD.NC p0, r18, #16 + 2672 0x00 0xc6 0xd3 0xec 0x63 0x0c LDA r17, [p0]; ST r17, [p1, #-4] + 2678 0x00 0x00 NOPX + 2680 0x00 0x00 NOPX + 2682 0x00 0x00 NOPX + 2684 0x00 0x00 NOPX + 2686 0x00 0x00 NOPX + 2688 0x00 0x00 NOPX + 2690 0x14 0x55 0x08 0x18 REL.COND r17, r16, r26 + 2694 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] + 2698 0x00 0xf6 0x36 0x98 LDA r17, [p0, #-4] + 2702 0x07 0xf7 0x99 0x18 LDA p7, [sp, #-12] + 2706 0x07 0xf9 0xf1 0x18 LDA r15, [sp, #-8] + 2710 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 + 2716 0x00 0x00 NOPX + 2718 0x00 0x00 NOPX + 2720 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 2724 0x1e 0xe0 0x1c 0xf8 MOV r27, el0 +.delay_slot + 2728 0x14 0x21 0x11 0x98 SUB r16, r16, r17 +.delay_slot + 2732 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 +.delay_slot + 2736 0x08 0xf6 0x11 0x98 ST r16, [p0, #-4] +.delay_slot +.swstall delay_slot + 2740 0x00 0x00 NOPX +.label _Z13kernelWrapperPPvjjjj__end +.label __Z13kernelWrapperPPvjjjj___func_end0 + +.text_segment PM 2752 +.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_begin0 +.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh +.function_start + 2752 0x03 0x85 0xd0 0x00 0x40 0x88 0x49 0x60 0x78 0xba LDA el0, [p0], #4; MOVX r4, #4; MOV r2, p1 + 2762 0x03 0x81 0xd0 0x3e 0x57 0xe9 0x30 0x82 0x48 0xba LDA eh0, [p0], #4; MOVX r5, #-1; ADD.NC p2, r2, #9 + 2772 0xff 0x81 0x00 0x00 0x02 0x00 0x00 0x00 0x70 0xba MOVA r1, #-4; PADDXM [sp], #64 + 2782 0x01 0x86 0x07 0xfd 0xb5 0x81 0x00 0x28 0x00 0x10 0x58 0x76 MOVA r6, #12; ST r13, [sp, #-4]; MOVX r16, #1; MOV m0, #16 + 2794 0x00 0x63 0x07 0xf9 0xd5 0xbf 0x57 0xaa 0x88 0x0f 0x58 0x76 MOVA r3, #3; ST r14, [sp, #-8]; MOVX r21, #-3; MOV r20, #15 + 2806 0xfe 0xbe 0xb0 0x60 0x02 0x5c ST r15, [sp, #-12]; MOVX r24, #0 + 2812 0x00 0x00 NOPX + 2814 0x09 0x1c 0x29 0x98 ST el0, [p1], #4 + 2818 0x09 0x1c 0x09 0x98 ST eh0, [p1], #4 + 2822 0x00 0x1c 0x2e 0x98 LDA el0, [p0], #4 + 2826 0x00 0x1c 0x0e 0x98 LDA eh0, [p0], #4 + 2830 0x00 0x00 NOPX + 2832 0x00 0x00 NOPX + 2834 0x00 0x00 NOPX + 2836 0x00 0x00 NOPX + 2838 0x00 0x00 NOPX + 2840 0x09 0x1c 0x29 0x98 ST el0, [p1], #4 + 2844 0x09 0x1c 0x09 0x98 ST eh0, [p1], #4 + 2848 0x00 0x1c 0x2e 0x98 LDA el0, [p0], #4 + 2852 0x00 0x1c 0x0e 0x98 LDA eh0, [p0], #4 + 2856 0x00 0x00 NOPX + 2858 0x00 0x00 NOPX + 2860 0x00 0x00 NOPX + 2862 0x00 0x00 NOPX + 2864 0x00 0x00 NOPX + 2866 0x09 0x1c 0x29 0x98 ST el0, [p1], #4 + 2870 0x09 0x1c 0x09 0x98 ST eh0, [p1], #4 + 2874 0x00 0x04 0x0e 0x98 LDA eh0, [p0] + 2878 0x00 0x14 0x2e 0x98 LDA el0, [p0, #4] + 2882 0x00 0x00 NOPX + 2884 0x00 0x00 NOPX + 2886 0x00 0x00 NOPX + 2888 0x00 0x00 NOPX + 2890 0x00 0x00 NOPX + 2892 0x09 0x04 0x09 0x98 ST eh0, [p1] + 2896 0x09 0x14 0x29 0x98 ST el0, [p1, #4] + 2900 0x02 0xdd 0xaa 0x98 LDA.u8 r13, [p2], #-3 + 2904 0x02 0x1e 0x2a 0x98 LDA.u8 r17, [p2], #1 + 2908 0x02 0xbd 0xca 0x98 LDA.u8 r14, [p2], #-5 + 2912 0x02 0xfd 0xfa 0x98 LDA.u16 r15, [p2], #-2 + 2916 0x02 0x0a 0x6a 0x98 LDA.u8 r19, [p2], m0 + 2920 0x02 0xac 0xea 0x98 LDA.u8 r7, [p2], #-6 + 2924 0x00 0x00 NOPX + 2926 0x13 0x42 0x1d 0x98 LSHL r1, r13, r1 + 2930 0x0c 0x20 0xf9 0x31 0x01 0x24 EQ r16, r1, r16; ADD.NC r18, r17, #1 + 2936 0x14 0xa4 0x5d 0x98 LSHL r18, r18, r5 + 2940 0x13 0xf6 0x47 0x98 EQ r27, r15, r4 + 2944 0xc1 0x4a 0x40 0xb7 0x39 0xe4 SEL.EQZ r5, r24, r5, r27; MOV eh0, r27 + 2950 0x14 0x7b 0x22 0x18 SEL.EQZ r29, r17, r18, r27 + 2954 0x11 0xcc 0x67 0x98 EQ r6, r7, r6 + 2958 0x11 0xb7 0x04 0x98 AND r27, r6, r16 + 2962 0x7b 0xeb 0xbc 0xbb 0x41 0xe4 LSHL r15, r15, r21; MOV r25, r27 + 2968 0xfd 0xbe 0xb3 0x9b 0x04 0x5c ST r15, [sp, #-20]; SEL.EQZ r6, r7, r24, r27 + 2974 0xc8 0x05 0xf8 0x40 0x01 0x84 JNZ r25, #3056 +.delay_slot + 2980 0x11 0xb6 0x47 0x98 EQ r27, r6, r4 +.delay_slot + 2984 0x13 0x71 0x44 0x98 AND r24, r13, r20 +.delay_slot + 2988 0x14 0xfc 0x5d 0x98 LSHL r30, r19, r5 +.delay_slot + 2992 0x16 0xe8 0x4d 0x98 LSHL r20, r27, r4 +.delay_slot + 2996 0x11 0x8c 0x32 0x18 SEL.EQZ r6, r6, r3, r27 + 3000 0xd8 0x05 0xf8 0x40 0x01 0x84 JNZ r27, #3056 +.delay_slot +.swstall delay_slot + 3006 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3008 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3010 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3012 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3014 0x00 0x00 NOPX + 3016 0x00 0x2f 0x00 0x00 0x01 0x84 0x00 0x00 0x20 0xba MOVA r15, #1; J #3104 +.delay_slot + 3026 0x00 0x1a 0x00 0x3e 0x57 0xab 0x88 0x0c 0x58 0xba MOVA r26, #0; MOVX r5, #-3; MOV r28, #12 +.delay_slot + 3036 0x05 0x42 0x21 0x20 0x41 0x64 MOVX r21, #4; MOV r2, #16 +.delay_slot + 3042 0x10 0x1a 0x0d 0x18 MOVX r13, #3 +.delay_slot + 3046 0x10 0x0e 0x3d 0x18 MOVX r7, #15 +.delay_slot + 3050 0x00 0x2c 0xff 0x91 0xe2 0x2c NOPA; MOVX r4, #-4 +.label __ll6__Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh + 3056 0x02 0x02 0x81 0x82 0x0b 0x01 0x50 0x88 0x8f 0xfc 0x58 0x76 MOVA dj0, #16; MOVS p1, r2; MOVX r21, #4; MOV r4, #-4 + 3068 0x20 0x18 0xe0 0x01 0xa0 0x0b 0x88 0x0c 0x58 0xba ST.s8 r6, [p1, dj0]; MOVX r26, #0; MOV r28, #12 + 3078 0x02 0x02 0x00 0x3e 0x57 0xa9 0xe8 0x01 0x58 0xba MOVA r2, #16; MOVX r5, #-3; MOV r15, #1 + 3088 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x71 0xe9 0xa8 0x03 0x58 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVX r7, #15; MOV r13, #3; NOPV +.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_352 + 3104 0x5d 0xc5 0x50 0x1b 0xb3 0x3c 0x00 0x3c 0x58 0xba LDA.u8 r17, [p2], #-2; EQ r27, r13, r6; MOV m0, #60 + 3114 0x41 0x05 0x50 0x03 0x2d 0x12 0x87 0xcd 0x58 0xba LDA.u8 r1, [p2], m0; SEL.EQZ r18, r1, r26, r27; MOV m5, #-51 + 3124 0x00 0x57 0x00 0x3b 0xda 0x91 0x80 0x37 0x58 0xba MOVA r23, #2; SEL.EQZ r29, r29, r21, r27; MOV m3, #55 + 3134 0x01 0x03 0x00 0x2b 0xb0 0x3d 0x07 0xbc 0x58 0xba MOVA r3, #8; EQ r27, r21, r0; MOV m2, #-68 + 3144 0x40 0x10 0x00 0x1f 0x6c 0x6c 0x80 0x70 0x58 0xba MOVA r16, #512; LSHL r22, r15, r24; MOV m1, #112 + 3154 0xb5 0x92 0x08 0x1e 0x5d 0x64 EXTEND.u8 r22, r22; MOV m4, #-105 + 3160 0xfe 0x5a 0xb0 0x2d 0x61 0x6f 0x80 0x31 0x59 0x3a ST r22, [sp, #-16]; LSHL r22, r22, r2; MOV m7, #49 + 3170 0xf7 0xba 0x3c 0x1f 0x05 0x64 SUB r30, r30, r29; MOV m6, #-63 + 3176 0x13 0xc2 0x11 0x98 SUB r1, r15, r1 + 3180 0x8f 0xc3 0xf0 0xa0 0x1d 0x64 MUL r31, r17, r1; MOV r1, #7 + 3186 0x16 0xa3 0x21 0x98 SUB r17, r26, r18 + 3190 0x17 0xfe 0x1d 0x98 LSHL r31, r31, r1 + 3194 0x55 0x7e 0x30 0x3b 0xf1 0xee 0x80 0x57 0x59 0x3a ST r31, [p2], m5; LSHL r31, r29, r3; MOV m5, #87 + 3204 0x4d 0x55 0x50 0x2f 0x30 0x3d 0x87 0xb2 0x58 0xba LDA.u8 r21, [p2], m3; EQ r19, r23, r0; MOV m3, #-78 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3214 0xfd 0x4e 0xb9 0xcc 0x7b 0x5c ST r19, [sp, #-24]; LSHL r19, r19, r3 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3220 0x49 0x54 0xe0 0x3f 0x6b 0x2d 0x00 0xf6 0x58 0xba ST.s8 r21, [p2], m2; OR r22, r31, r22; MOV m2, #246 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3230 0x45 0x43 0x50 0x27 0x38 0x10 0x87 0x50 0x58 0xba LDA.u16 r16, [p2], m1; SEL.EQZ r19, r19, r16, r27; MOV m1, #-176 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3240 0x15 0xfe 0x67 0x98 EQ r31, r23, r6 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3244 0x10 0xe0 0x67 0x98 EQ r16, r3, r6 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3248 0x17 0xf7 0x05 0x98 OR r27, r31, r16 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3252 0x11 0xeb 0x54 0x98 AND r21, r7, r21 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3256 0xf7 0xa3 0xd8 0xa0 0x61 0x64 ASHL r30, r30, r17; MOV r17, #24 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3262 0xfc 0x42 0xb0 0x1f 0x29 0x6f 0xcf 0x80 0x49 0x3a ST r16, [sp, #-32]; LSHL r18, r15, r18; ADD.NC r30, r30, #1 + 3272 0x43 0xea 0x3f 0x46 0x3b 0x5c ST r26, [p2], #4; LSHL r17, r30, r17 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 3278 0x51 0x6a 0x30 0x02 0x00 0xa8 0x50 0x02 ST r26, [p2], m4; MOV m4, #168 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 3286 0x5d 0x49 0x57 0xe7 0xf5 0xa7 0xb0 0x2c 0x0d 0xce 0x78 0x76 LDA.u8 r18, [p2], m7; ST r31, [sp, #-28]; OR r27, r19, r0; MOV el0, r27 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3298 0x16 0xe3 0x15 0x98 OR r17, r27, r17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3302 0x16 0xb7 0x81 0x98 SUB r27, r26, r24 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3306 0x14 0xb0 0x90 0x18 EXTEND.u8 r24, r18 + 3310 0x00 0x00 NOPX + 3312 0x00 0x00 NOPX + 3314 0x00 0x00 NOPX + 3316 0x13 0xe5 0x21 0x98 SUB r18, r15, r18 + 3320 0x0a 0xca 0x51 0x98 ST r18, [p2], m6 + 3324 0x02 0xaa 0x4a 0x98 LDA.u8 r18, [p2], m5 + 3328 0x00 0x00 NOPX + 3330 0x00 0x00 NOPX + 3332 0x00 0x00 NOPX + 3334 0x00 0x00 NOPX + 3336 0x00 0x00 NOPX + 3338 0x00 0x00 NOPX + 3340 0x14 0xa4 0xe1 0x98 SUB r18, r18, r14 + 3344 0x14 0xa5 0xbe 0x98 ASHL r18, r18, r27 + 3348 0x14 0xa4 0x2d 0x98 LSHL r18, r18, r2 + 3352 0x00 0x01 0x0d 0xa0 0x00 0x44 MOVXM r27, #65536 + 3358 0x16 0xe5 0x20 0x98 ADD r18, r27, r18 + 3362 0x00 0xff 0x0d 0xa0 0x00 0x44 MOVXM r27, #16711680 + 3368 0xde 0xe4 0x99 0x3f 0xc1 0x64 AND r27, r27, r18; MOV r18, #-16 + 3374 0xde 0xe2 0xb8 0xbf 0xe1 0x64 OR r27, r27, r17; MOV r17, #-8 + 3380 0x43 0xee 0x39 0xce 0x3b 0x5c ST r27, [p2], #4; LSHL r19, r19, r17 + 3386 0x16 0xb5 0x31 0x98 SUB r26, r26, r19 + 3390 0x15 0x29 0xad 0x98 LSHL r20, r20, r26 + 3394 0x13 0xb5 0x65 0x98 OR r26, r14, r22 + 3398 0x4d 0x6a 0x3f 0x69 0x20 0x5c ST r26, [p2], m3; EXTEND.u8 r26, r30 + 3404 0x49 0x65 0x50 0x37 0x49 0x6f 0xce 0xa8 0xa8 0xba LDA.u8 r25, [p2], m2; LSHL r20, r27, r18; ADD.NC r30, r26, r20 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 3414 0xb5 0xa3 0xb8 0xa3 0xf9 0x64 LSHL r22, r22, r17; MOV r17, #254 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 3420 0x45 0x64 0xed 0x6b 0x1f 0x2c ST.s8 r25, [p2], m1; MUL r26, r26, r24 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3426 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3428 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3430 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3432 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3434 0x86 0x5f 0xbd 0xb5 0xca 0xa4 LSHL r25, r16, r15; ADD.NC r27, r21, r25 + 3440 0xf8 0x06 0xf8 0x40 0x01 0x84 JNZ r31, #3568 +.delay_slot + 3446 0x9d 0x41 0xed 0xbb 0xf2 0xa4 ADD r21, r19, #3; ADD.NC r27, r27, r30 +.delay_slot + 3452 0x16 0xeb 0x5d 0x98 LSHL r21, r27, r21 +.delay_slot + 3456 0x16 0x63 0x14 0x98 AND r17, r25, r17 +.delay_slot + 3460 0x51 0x46 0x30 0x0d 0xbe 0x3e 0x28 0x01 0x59 0x3a ST r17, [p2], m4; EQ r27, r6, r28; MOV r17, #1 +.delay_slot + 3470 0x18 0x9b 0x9c 0xf8 MOV el1, r27 + 3474 0x07 0xe3 0x91 0x18 LDA r28, [sp, #-32] + 3478 0x00 0x00 NOPX + 3480 0x00 0x00 NOPX + 3482 0x00 0x00 NOPX + 3484 0x00 0x00 NOPX + 3486 0x00 0x00 NOPX + 3488 0x00 0x00 NOPX + 3490 0xe0 0x06 0xf8 0x40 0x01 0x84 JNZ r28, #3568 +.delay_slot +.swstall delay_slot + 3496 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3498 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3500 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3502 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3504 0x00 0x00 NOPX + 3506 0x00 0xbc 0x00 0x01 0x10 0x8b 0x28 0x40 0x58 0xba MOVA r28, #5; MOVX r17, #4; MOV r25, #64 + 3516 0x14 0x7e 0xd2 0x18 SEL.EQZ r31, r17, r13, r27 + 3520 0x16 0x76 0x67 0x98 EQ r27, r25, r6 + 3524 0xff 0x38 0x4f 0xa0 0x01 0x64 SEL.EQZ r28, r31, r28, r27; MOV r31, #0 + 3530 0x10 0x32 0x50 0x18 EXTEND.s8 r25, r0 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 3534 0x16 0x7d 0xef 0x98 MUL r30, r25, r30 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 3538 0xce 0xe3 0x5d 0xc4 0x39 0xe4 LT r27, r25, r17; MOV r27, el1 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3544 0x13 0xe3 0x82 0x18 SEL.EQZ r17, r15, r24, r27 + 3548 0x14 0x63 0xef 0x98 MUL r17, r17, r30 + 3552 0x17 0xf9 0xc1 0x98 SUB r28, r31, r28 + 3556 0x14 0x63 0xce 0x98 ASHL r17, r17, r28 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 3560 0x00 0x23 0x14 0x81 0x00 0x00 0x1c 0x22 EXTEND.u8 r17, r17; NOPV +.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_816 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 3568 0x00 0x19 0x00 0x3f 0xc7 0xeb 0x70 0x0e 0x78 0xba MOVA r25, #0; MOVX r28, #-1; MOV r27, el0 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3578 0x16 0x7f 0xc2 0x18 SEL.EQZ r31, r25, r28, r27 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 3582 0xfd 0x6e 0x20 0x21 0x04 0x83 0x4f 0x74 0xa8 0xba LDA r27, [sp, #-24]; EXTEND.u8 r16, r16; ADD.NC r26, r29, r26 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3592 0x09 0x1e 0x00 0x29 0x44 0x83 0xa8 0x09 0x58 0xba MOVA r30, #72; EXTEND.u8 r20, r20; MOV r29, #9 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3602 0x17 0x73 0xe2 0x18 SEL.EQZ r25, r29, r30, r27 + 3606 0x15 0xf9 0x88 0x98 NE r28, r23, r24 +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 3610 0x17 0x7b 0x3d 0x98 LSHL r29, r29, r19 +.aggressive_scheduled_block_id 6 +.noswbrkpt + 3614 0xfd 0xde 0x20 0x00 0x00 0x03 0x0a 0x04 0x10 0xba LDA r23, [sp, #-20]; MOVXM r24, #1032 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 3624 0xcc 0xe7 0xbf 0x3a 0xff 0x24 LSHL r19, r25, r19; ADD.NC r30, r26, #-1 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 3630 0x14 0xcf 0xe6 0x18 MAC r7, r7, r19, r30 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 3634 0xb4 0xd2 0x0b 0xa8 0x29 0x64 EXTEND.u8 r19, r22; MOV r23, #522 +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3640 0xbd 0xb0 0x4d 0x21 0x01 0x64 SEL.EQZ r22, r23, r24, r27; MOV r26, #64 + 3646 0x31 0xb5 0x1d 0xc2 0x39 0xe4 NE r6, r6, r26; MOV r27, eh0 + 3652 0x11 0xcf 0x24 0x98 AND r7, r7, r18 + 3656 0xbd 0xde 0x4d 0xa6 0x41 0xe4 SEL.EQZ r23, r23, r15, r27; MOV r27, r6 + 3662 0x29 0x08 0x49 0x20 0x7d 0x64 SEL.EQZ r4, r5, r4, r27; MOV r18, #31 + 3668 0x15 0xef 0x24 0x98 AND r23, r23, r18 + 3672 0xdc 0x1e 0x00 0x20 0x42 0x6e 0x4f 0x70 0x58 0xba MOVA r30, #-288; LSHL r4, r16, r4; MOV r18, #-144 + 3682 0x17 0xbd 0x22 0x18 SEL.EQZ r30, r30, r18, r27 + 3686 0x12 0x05 0x00 0x2f 0xa9 0xfe 0x09 0x20 0x58 0xba MOVA r5, #144; MUL r26, r23, r19; MOV r16, #288 + 3696 0x14 0x20 0x52 0x18 SEL.EQZ r16, r16, r5, r27 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 3700 0x8e 0x09 0xfd 0xbd 0xf2 0xa4 MUL r24, r17, r4; ADD.NC r27, r29, r30 +.aggressive_scheduled_block_id 7 +.noswbrkpt + 3706 0x84 0x3f 0xbd 0xc4 0x39 0xe4 LSHL r16, r16, r31; MOV r27, el1 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3712 0xfb 0xee 0xb7 0x6b 0x5f 0x5c ST r27, [sp, #-36]; MUL r26, r14, r26 + 3718 0x10 0x84 0x32 0x18 SEL.EQZ r2, r2, r3, r27 + 3722 0x13 0x7e 0x0c 0x98 LTU r31, r13, r0 + 3726 0x15 0x31 0x8f 0x98 MUL r24, r20, r24 + 3730 0x17 0xf7 0xc5 0x98 OR r27, r31, r28 + 3734 0x10 0x03 0x07 0xee 0x95 0xb7 0xc0 0xee 0x89 0x00 0x58 0x76 MOVA r3, #128; ST r20, [sp, #-20]; LSHL r28, r27, r1; MOV r20, #256 + 3746 0x1d 0x28 0x40 0xb7 0x39 0xe4 SEL.EQZ r20, r3, r20, r27; MOV eh0, r27 + 3752 0x00 0x00 0x0f 0xac 0x0c 0x44 MOVXM r31, #1542 + 3758 0xfd 0x12 0xb0 0x1f 0xb0 0x3c 0x89 0x3f 0xc9 0x3a ST r4, [sp, #-24]; EQ r27, r15, r0; ADD.NC r4, r4, #-1 + 3768 0xed 0x8c 0x82 0x1c 0x91 0xad 0xff 0x92 0xcc 0x7f 0xc8 0x76 MOVA m3, #-148; ST r4, [p2], #4; SEL.EQZ r31, r22, r31, r27; ADD.NC r22, r17, #-1 + 3780 0x4d 0x5a 0x30 0x2b 0x57 0xef 0x70 0x8e 0x79 0x3a ST r22, [p2], m3; LSHL r21, r21, r15; MOV r27, eh0 + 3790 0x02 0xd9 0x02 0x1f 0x51 0xa9 0x4e 0x0e 0x00 0x58 0x58 0x76 MOVA r25, #22; ST r26, [p2], #4; SUB r20, r20, r28; MOV m4, #88 +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 3802 0x51 0x62 0x3f 0xe3 0x24 0x5c ST r24, [p2], m4; SEL.EQZ r24, r31, r25, r27 +.aggressive_scheduled_block_id 8 +.noswbrkpt + 3808 0xfc 0x6e 0x22 0xef 0x91 0xab 0xce 0x0f 0x69 0x90 0x78 0x76 LDA r27, [sp, #-32]; ST r28, [p2], #-8; SUB r28, r21, r28; MOV r27, r6 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3820 0x22 0xf3 0x00 0x29 0xce 0x12 0x8c 0xff 0xc8 0xba MOVA r19, #279; SEL.EQZ r28, r20, r28, r27; ADD.NC r20, r19, #-1 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3830 0xf7 0x1d 0x00 0x3b 0xea 0x73 0x70 0x8e 0x78 0xba MOVA r29, #-72; MSC r30, r30, r29, r20; MOV r27, eh0 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3840 0xfc 0xee 0x2e 0xca 0x44 0x2c LDA r27, [sp, #-28]; SEL.EQZ r18, r29, r18, r27 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3846 0x04 0x1f 0x00 0x3f 0x39 0x93 0x69 0x90 0x78 0xba MOVA r31, #32; SEL.EQZ r19, r31, r19, r27; MOV r27, r6 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3856 0x17 0xc4 0x22 0x18 SEL.EQZ r2, r31, r2, r27 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3860 0x10 0xeb 0x51 0x98 SUB r21, r3, r21 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3864 0x47 0x8e 0x30 0x04 0x27 0x90 0x6f 0xc0 0x59 0x3a ST r3, [p2], #12; SEL.EQZ r2, r2, r15, r27; MOV r3, #-64 +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3874 0x17 0x38 0x32 0x18 SEL.EQZ r28, r28, r3, r27 +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id first + 3878 0x15 0xfe 0x7f 0x98 MUL r31, r23, r7 +.aggressive_scheduled_block_id 9 +.noswbrkpt + 3882 0xfb 0xc6 0x2e 0x0c 0x64 0x2c LDA r17, [sp, #-36]; SEL.EQZ r3, r28, r3, r27 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3888 0x5d 0x8e 0x30 0x35 0x29 0x7c 0x80 0x28 0x59 0x3a ST r3, [p2], #-8; MUL r18, r26, r18; MOV m1, #40 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3898 0x45 0x56 0x31 0x0d 0xe4 0x5c ST r21, [p2], m1; SEL.EQZ r3, r2, r15, r27 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3904 0xfe 0x02 0x20 0x06 0x38 0xfe 0xa9 0xfc 0xa8 0xba LDA r0, [sp, #-16]; MUL r3, r3, r17; ADD.NC r21, r7, r30 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3914 0xfd 0xc6 0x22 0x1f 0x11 0x80 0x05 0x06 0x06 0x7a LDA r17, [sp, #-20]; ST r24, [p2], #4; MAC r3, r3, r20, r0 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3924 0x4f 0xce 0x30 0x00 0x00 0x3e 0x6f 0xf8 0x11 0x3a ST r19, [p2], #28; MOVXM r19, #65520 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3934 0x43 0xd2 0x30 0x3f 0x49 0xa4 0x4b 0xbf 0xc9 0x3a ST r20, [p2], #4; AND r20, r31, r19; ADD.NC r2, r14, #-1 +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3944 0x43 0xc6 0x31 0x56 0x9c 0x5c ST r17, [p2], #4; MSC r21, r21, r2, r20 + 3950 0x43 0x8a 0x3f 0x7a 0x81 0x5c ST r2, [p2], #4; ADD r30, r30, r20 +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id first + 3956 0x43 0xfa 0x38 0x73 0xe3 0x5c ST r30, [p2], #4; SUB r28, r16, r31 +.aggressive_scheduled_block_id 10 +.noswbrkpt + 3962 0x43 0xd6 0x30 0x2d 0xf8 0x30 0x60 0x00 0x59 0x3a ST r21, [p2], #4; MAC r31, r31, r22, r16; MOV dc0, #0 +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3972 0x43 0x8c 0x30 0x3e 0x20 0x7e 0x2c 0x7f 0xc9 0x3a ST dc0, [p2], #4; MUL r2, r31, r0; ADD.NC r17, r17, #-1 + 3982 0x43 0x8c 0x38 0xb8 0x0c 0x5c ST dc0, [p2], #4; MAC r14, r14, r17, r0 + 3988 0x43 0xda 0x30 0x27 0x01 0x24 0x48 0x00 0x59 0x3a ST r22, [p2], #4; AND r16, r19, r2; MOV r2, #0 + 3998 0x43 0xf2 0x30 0x05 0x1f 0x8f 0x70 0x0e 0x79 0x3a ST r28, [p2], #4; SUB r17, r2, r31; MOV r27, el0 + 4008 0x43 0x92 0x3b 0xb9 0xdf 0x5c ST r4, [p2], #4; MUL r14, r23, r14 + 4014 0x43 0xc6 0x30 0x21 0x0f 0x8c 0x08 0x06 0x59 0x3a ST r17, [p2], #4; SUB r16, r16, r31; MOV r0, #6 +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id first + 4024 0x09 0x00 0x02 0x1e 0x11 0x9b 0x00 0x13 0x69 0x90 0x78 0x76 MOVA r0, #72; ST r16, [p2], #4; SEL.EQZ r16, r13, r0, r27; MOV r27, r6 +.aggressive_scheduled_block_id 11 +.noswbrkpt + 4036 0xfd 0x16 0x20 0x14 0xa4 0x2c LDA r5, [sp, #-24]; SEL.EQZ r5, r0, r5, r27 +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4042 0x11 0x63 0xaf 0x98 MUL r17, r5, r26 + 4046 0x43 0x8c 0x30 0x07 0x08 0x6d 0x07 0xc8 0x59 0x3a ST dc0, [p2], #4; LSHL r16, r3, r16; MOV m2, #-56 + 4056 0xfc 0x63 0x02 0x48 0x61 0xa0 0xf7 0xed 0xa8 0xc1 0xc8 0x76 MOVA r3, #-29; ST dc0, [p2], m2; LSHL r15, r16, r15; ADD.NC r13, r3, #7 + 4068 0x41 0x0a 0x36 0xc0 0x7b 0x5c ST r2, [p2], m0; LSHL r16, r13, r3 + 4074 0x43 0xda 0x38 0x8e 0x41 0x5c ST r22, [p2], #4; ADD r3, r17, r18 + 4080 0x43 0xca 0x38 0xc8 0x9c 0x5c ST r18, [p2], #4; MSC r18, r18, r17, r4 + 4086 0x43 0x92 0x32 0x94 0xdb 0x5c ST r4, [p2], #4; LSHL r5, r5, r6 + 4092 0x43 0x8e 0x30 0x1a 0x38 0x04 0x0f 0xfd 0x59 0x3a ST r3, [p2], #4; ADD r3, r13, r16; MOV r0, #-3 + 4102 0x10 0xc0 0x0e 0x98 ASHL r0, r3, r0 + 4106 0x43 0xca 0x37 0x10 0x1f 0x5c ST r18, [p2], #4; MUL r4, r14, r0 + 4112 0x43 0x8c 0x30 0x0c 0x3b 0x5c ST dc0, [p2], #4; LSHL r3, r0, r1 + 4118 0xff 0xb6 0x22 0x1c 0x61 0x80 0x03 0xc6 0x31 0xfa LDA r13, [sp, #-4]; ST dc0, [p2], #4; SUB r3, r15, r3 + 4128 0xff 0x3a 0x22 0x1c 0x91 0xba 0x70 0x30 0x28 0x3f 0xc8 0x76 LDA r14, [sp, #-8]; ST r4, [p2], #4; MAC r7, r7, r29, r0; ADD.NC r1, r0, #-1 + 4140 0xfe 0xbe 0x22 0x1c 0x31 0x80 0x01 0x41 0xaf 0xfa LDA r15, [sp, #-12]; ST r1, [p2], #4; MUL r0, r5, r26 + 4150 0x43 0x8e 0x30 0x50 0x00 0x5c ST r3, [p2], #4; RET lr +.delay_slot + 4156 0x0a 0x5c 0xf1 0x98 ST r7, [p2], #20 +.delay_slot + 4160 0x0a 0x1c 0x11 0x98 ST r0, [p2], #4 +.delay_slot + 4164 0x0a 0x1c 0x51 0x98 ST r2, [p2], #4 +.delay_slot + 4168 0x0a 0x04 0x51 0x98 ST r2, [p2] +.delay_slot + 4172 0x42 0x8a 0x30 0x3f 0xfe 0x00 0x00 0x00 0x71 0x3a ST r2, [p2, #4]; PADDXM [sp], #-64 +.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh__end +.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_end0 + +.text_segment PM 4192 +.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_begin0 +.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams +.function_start + 4192 0x00 0x03 0x82 0x84 0x8b 0x01 0x80 0x08 0x0a 0x60 0x78 0x76 MOVA dc0, #0; MOVS p2, p1; MOVX r24, #0; MOV r0, p2 + 4204 0x00 0x06 0x88 0x28 0x28 0x34 0x01 0x36 0x00 0x21 0x20 0x09 0x60 0x7e MOVA dj1, #0; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc1, dc0; MOVX r26, #0; ADD.NC p3, r0, #4 + 4218 0x63 0x94 0xd0 0x00 0x01 0xf2 0x32 0x34 0x10 0xba LDA dn1, [p3], #4; MOVXM p4, #509032 + 4228 0x63 0x90 0xd0 0x00 0x00 0x04 0x78 0x78 0x10 0xba LDA m1, [p3], #4; MOVXM ls, #4336 + 4238 0x60 0x80 0xd0 0x00 0x00 0x05 0xb8 0x90 0x10 0xba LDA m0, [p3]; MOVXM le, #4384 + 4248 0x7a 0x82 0xd1 0x00 0x01 0x54 LDA r0, [p3, #-12]; MOV dj0, #0 + 4254 0x04 0x04 0x22 0x98 LDA.s8 r1, [p4] + 4258 0x00 0x00 NOPX + 4260 0x00 0x00 NOPX + 4262 0x00 0x0a 0x80 0x85 0x01 0xf4 VLDB.POP.512 x1, [p0, lf0, r24]; MOV dn0, dn1 + 4268 0x3e 0x30 0x14 0x18 VLDB.POP.512.2D x0, [p0, lf0, r24, d1] +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4272 0x3c 0x14 0x14 0x18 VLDB.FILL.512 [p0, lf0, r24] +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4276 0x00 0x0a 0x8a 0xe0 0xfd 0x34 VLDB.POP.512 x1, [p0, lf0, r24]; ADD.NC lc, r0, #-3 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4282 0xc6 0x02 0x80 0xf5 0x00 0x1c VLDB.POP.512.2D x0, [p0, lf0, r24, d1]; MOVX crRnd, r1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4288 0x3c 0x14 0x14 0x18 VLDB.FILL.512 [p0, lf0, r24] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4292 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4294 0x00 0x2c 0xf0 0x00 0x54 0x00 0x01 0xa5 0x7e 0xba NOPA; VLDB.POP.512 x1, [p0, lf0, r24]; NOPM +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4304 0x00 0x2c 0xfc 0x60 0x28 0x01 0x5b 0x00 0x00 0x00 0x01 0xc5 0x78 0x00 0x00 0xe1 NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];NOPS; NOPX; VCONV.fp32.bf16 cml0, x1; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4320 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x40 0xc5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV +.label ZLS_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_144 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4336 0x00 0x2c 0xf8 0x28 0x28 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB.FILL.512 [p0, lf0, r24]; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4352 0x00 0x2c 0xf0 0x00 0xad 0x80 0x03 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB.POP.512 x1, [p0, lf0, r24];VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4368 0x00 0x2c 0xfc 0x60 0x29 0x00 0x03 0x00 0x00 0x00 0x01 0xc5 0x78 0x00 0x00 0xe1 NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];VST.FLUSH.512.CONV [p2, sf, r26];NOPX; VCONV.fp32.bf16 cml0, x1; NOPV +.label ZLE_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_192 +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4384 0x00 0x2c 0xf0 0x00 0x23 0x00 0x03 0x00 0x00 0x00 0x40 0xc5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST.FLUSH.512.CONV.2D [p2, sf, r26, d0];NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV +.loop_nesting 0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4400 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4402 0x0d 0x80 0x03 0x18 VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4406 0x20 0x00 0x60 0x00 0x01 0xc5 0x70 0x02 VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4414 0x18 0x81 0x8a 0xf8 VCONV.fp32.bf16 cmh0, x0 + 4418 0x0b 0x00 0x03 0x18 VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] + 4422 0xb0 0x00 0x60 0x00 0x01 0xc5 0x70 0x02 VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1 + 4430 0x20 0x00 0x60 0x00 0x40 0xc5 0x70 0x02 VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cmh0, x0 + 4438 0x0b 0x00 0x03 0x18 VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] + 4442 0xb0 0x00 0x60 0x50 0x00 0x5c VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];RET lr +.delay_slot + 4448 0x09 0x00 0x03 0x18 VST.FLUSH.512.CONV [p2, sf, r26] +.delay_slot + 4452 0x0b 0x00 0x03 0x18 VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] +.delay_slot +.swstall delay_slot + 4456 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4458 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4460 0x00 0x00 NOPX +.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams__end +.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_end0 + +.text_segment PM 4464 +.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_begin0 +.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params +.function_start + 4464 0xf5 0xe0 0x86 0x3f 0x20 0x00 0x80 0x00 0x00 0x0e 0x91 0x11 0x60 0x7e MOVA m0, #-81; PADDB [p3], #64; MOVS p4, p2; PADDXM [sp], #128 + 4478 0x00 0x73 0x07 0xf1 0x95 0xbf 0xc5 0x0a 0x2b 0x60 0x78 0x76 MOVA r19, #3; ST r12, [sp, #-16]; MOVX r28, #-24; MOV r17, p3 + 4490 0x00 0x19 0x07 0xda 0x35 0x81 0x10 0x29 0x34 0x47 0x08 0x76 MOVA r25, #0; ST r17, [sp, #-40]; MOVX r17, #1; ADD.NC p2, r17, #28 + 4502 0x40 0xca 0xd7 0xf5 0x35 0x80 0x40 0x03 0xa8 0x00 0x10 0x76 LDA r18, [p2]; ST r9, [sp, #-12]; MOVXM r29, #16777216 + 4514 0x0b 0x18 0x87 0xfd 0xd5 0x80 0x7f 0xff 0xef 0xff 0x90 0x76 MOVA m6, #88; ST r14, [sp, #-4]; MOVXM r31, #33554431 + 4526 0x00 0xb4 0x07 0xe1 0xb5 0x81 0x61 0x0a 0x07 0xec 0x58 0x76 MOVA r20, #5; ST r13, [sp, #-32]; MOVX r22, #8; MOV m4, #-20 + 4538 0x01 0x95 0x07 0xed 0xf5 0x87 0x77 0xca 0x87 0xc4 0x58 0x76 MOVA r21, #12; ST r15, [sp, #-20]; MOVX r23, #254; MOV m5, #-60 + 4550 0xff 0x73 0xb0 0x03 0x80 0x40 0x50 0x02 ST p7, [sp, #-8]; MOV m7, #64 + 4558 0x0f 0xe4 0x3d 0x98 ST lr, [sp, #-28] + 4562 0x00 0x00 NOPX + 4564 0x17 0x59 0x20 0x98 ADD r12, r29, r18 + 4568 0x41 0x32 0x36 0x77 0x9b 0x5c ST r12, [p2], m0; LSHL r29, r12, r28 + 4574 0x5b 0xf9 0x5e 0xf2 0x2f 0x2c LDA.u8 r30, [p2], #-3; EQ r28, r29, r17 + 4580 0x02 0xc9 0x2a 0x98 LDA.u8 r9, [p2], m6 + 4584 0x00 0x00 NOPX + 4586 0x00 0x00 NOPX + 4588 0x00 0x00 NOPX + 4590 0x00 0x00 NOPX + 4592 0x00 0x00 NOPX + 4594 0x17 0x77 0xec 0x98 LTU r27, r29, r30 + 4598 0x16 0x5d 0x32 0x18 SEL.EQZ r14, r25, r19, r27 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4602 0x17 0xf6 0xcc 0x98 LTU r27, r31, r12 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4606 0x51 0x70 0xee 0xb7 0xcf 0x2c ST.s8 r28, [p2], m4; EQ r13, r29, r30 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4612 0x13 0x7f 0x1d 0x98 LSHL r31, r13, r17 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4616 0x16 0x58 0xe2 0x18 SEL.EQZ r12, r25, r14, r27 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4620 0x17 0xf9 0xc5 0x98 OR r28, r31, r28 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4624 0x8e 0xfd 0x9e 0x3c 0x62 0xa4 LTU r27, r17, r30; ADD.NC r28, r28, r12 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4630 0x16 0x79 0xc2 0x18 SEL.EQZ r28, r25, r28, r27 + 4634 0x14 0x7f 0xcc 0x98 LTU r31, r17, r28 + 4638 0x55 0x7e 0x3e 0xf7 0xd1 0x5c ST r31, [p2], m5; NE r29, r29, r30 + 4644 0x5d 0x79 0x54 0xb2 0x31 0x2c LDA.u8 r30, [p2], m7; NE r12, r9, r17 + 4650 0x00 0x00 NOPX + 4652 0x00 0x00 NOPX + 4654 0x00 0x00 NOPX + 4656 0x00 0x00 NOPX + 4658 0x00 0x00 NOPX + 4660 0x00 0x00 NOPX + 4662 0xf5 0xad 0x1f 0xbe 0xfc 0x24 NE r22, r30, r22; ADD.NC r31, r30, #-4 + 4668 0x60 0x09 0x40 0x40 0x01 0x84 JNZ r12, #4736 +.delay_slot + 4674 0x17 0x93 0x48 0x98 NE r9, r30, r20 +.delay_slot + 4678 0x17 0xfe 0x90 0x18 EXTEND.u8 r31, r31 +.delay_slot + 4682 0x12 0x6d 0x64 0x98 AND r22, r9, r22 +.delay_slot + 4686 0x17 0xef 0x7c 0x98 LTU r23, r31, r23 +.delay_slot + 4690 0x15 0xe1 0x64 0x98 AND r16, r23, r22 + 4694 0xe8 0x09 0x40 0x40 0x01 0x84 JNZ r29, #4736 +.delay_slot + 4700 0x0f 0xeb 0x1d 0x98 ST p6, [sp, #-24] +.delay_slot +.swstall delay_slot + 4704 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4706 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4708 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4710 0x00 0x00 NOPX + 4712 0x00 0x3b 0x00 0x00 0x02 0x56 0x00 0x00 0x20 0xba MOVA r27, #1; J #4784 +.delay_slot + 4722 0x18 0x19 0x9c 0xf8 MOV el0, r25 +.delay_slot + 4726 0x10 0x26 0x05 0x18 MOVX r19, #1 +.delay_slot +.swstall delay_slot + 4730 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4732 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4734 0x00 0x00 NOPX +.label __ll6__Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params + 4736 0x00 0x95 0x07 0xeb 0x1d 0xab 0xbf 0x3c 0x0c 0xce 0x78 0x76 MOVA r21, #4; ST p6, [sp, #-24]; EQ r27, r21, r30; MOV el0, r25 + 4748 0x17 0xab 0x5d 0x98 LSHL r21, r30, r21 + 4752 0x15 0x6b 0x92 0x18 SEL.EQZ r21, r21, r25, r27 + 4756 0x14 0xf7 0xe7 0x98 EQ r27, r19, r30 + 4760 0xac 0xf2 0x4d 0xb0 0x41 0xe4 SEL.EQZ r19, r21, r25, r27; MOV r27, r16 + 4766 0x16 0x67 0x32 0x18 SEL.EQZ r19, r25, r19, r27 + 4770 0x17 0x29 0x44 0x98 AND r20, r28, r20 + 4774 0x15 0x36 0xf0 0x18 NEZ r27, r20 + 4778 0x00 0x2c 0xf9 0xcf 0x8b 0x2c NOPA; OR r19, r19, r28 +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_320 + 4784 0x01 0x90 0x82 0x6f 0x71 0xba 0x02 0x5c 0x10 0x00 0x60 0x76 MOVA m4, #12; ST r27, [p2], #24; JNZ r29, #4832 +.delay_slot + 4796 0x02 0x8a 0x67 0x18 ST.s8 r19, [p2], m4 +.delay_slot +.swstall delay_slot + 4800 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4802 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4804 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4806 0x00 0x00 NOPX + 4808 0x00 0xff 0xfa 0x3f 0xfe 0x44 MOVXM r20, #16777215 + 4814 0x14 0xa5 0x44 0x98 AND r18, r18, r20 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4818 0x00 0x2c 0xf6 0xec 0xa3 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; ST r18, [p3, #28]; NOPM; NOPV +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_368 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4832 0x51 0xd2 0xd0 0x27 0x44 0x82 0xcf 0xfd 0x58 0xba LDA r20, [p2], #-32; EXTEND.u8 r20, r19; MOV r22, #-3 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4842 0x00 0x52 0x00 0x29 0x5f 0xfa 0x00 0x24 0x58 0xba MOVA r18, #2; ADD r21, r20, #-1; MOV m4, #36 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4852 0x51 0x5a 0xd7 0xd0 0x2d 0xab 0x6b 0x26 0x07 0xcc 0x58 0x76 LDA r22, [p2], m4; ST el0, [sp, #-48]; AND r22, r21, r22; MOV m4, #-52 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4864 0x51 0x5e 0xd7 0xde 0xd5 0xbf 0x37 0xea 0x00 0xc4 0x58 0x76 LDA r23, [p2], m4; ST r22, [sp, #-36]; MOVX r19, #-1; MOV m4, #196 + 4876 0x02 0xff 0xb6 0x98 LDA r29, [p2], #-4 + 4880 0x02 0x8b 0xf6 0x98 LDA r31, [p2], m4 + 4884 0x01 0x06 0xb6 0x98 LDA r21, [p1] + 4888 0x00 0xd2 0xda 0x26 0x5b 0x2c LDA r20, [p0]; LSHL r9, r20, r18 + 4894 0x04 0x07 0xd6 0x98 LDA r30, [p4] + 4898 0x15 0xad 0x2d 0x98 LSHL r22, r22, r18 + 4902 0x00 0x00 NOPX + 4904 0x17 0x67 0x3e 0x98 ASHL r19, r29, r19 + 4908 0x17 0xe3 0x18 0x98 NE r17, r31, r17 + 4912 0x88 0x09 0xd0 0x40 0x01 0x84 JNZ r17, #5024 +.delay_slot + 4918 0xbd 0xa5 0xba 0xb5 0xb2 0xa4 LSHL r22, r23, r18; ADD.NC r21, r21, r22 +.delay_slot + 4924 0x9d 0x65 0xb0 0x95 0xb2 0xa4 LSHL r21, r19, r18; ADD.NC dn0, r21, r22 +.delay_slot + 4930 0xfa 0x84 0xb0 0x01 0xca 0x68 0xa0 0x02 ST dn0, [sp, #-44]; ADD.NC r14, r9, r20 +.delay_slot + 4938 0x1b 0xd0 0x80 0xf8 MOV r15, dn0 +.delay_slot + 4942 0x1e 0x6a 0xf9 0x58 ADD.NC p6, r21, r30 + 4946 0x00 0x07 0xce 0xc8 0xd0 0x44 MOVXM p7, #509032 + 4952 0xe0 0xc4 0x50 0xb4 0x80 0x2c LDA.s8 r17, [p7]; MOVX vaddSign0, #1 + 4958 0x00 0x00 NOPX + 4960 0xff 0x7f 0x0a 0x20 0x00 0x44 MOVXM r20, #-8454144 + 4966 0x18 0x02 0x91 0x78 VINSERT.32 x0, x0, #0, r20 + 4970 0x1d 0x15 0xe0 0xf8 MOV r20, sp +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 4974 0x1f 0x6a 0x5f 0x18 ADD.NC p7, r20, #-66 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 4978 0xe0 0xc6 0xe0 0x01 0x25 0xd4 ST.s16 r17, [p7]; VMOV bmll0, x0 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4984 0x14 0x7a 0x80 0x18 MOVX crRnd, r17 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4988 0x08 0x40 0x16 0x18 VCONV.bf16.fp32 wl0, bmll0 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4992 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4994 0x1c 0x41 0x01 0xb8 VEXTRACT.16 r17, x0, #0, vaddSign0 + 4998 0x00 0x00 NOPX + 5000 0x00 0x00 NOPX + 5002 0x07 0x06 0x32 0x98 LDA.s16 r17, [p7] + 5006 0x00 0x00 NOPX + 5008 0x00 0x00 NOPX + 5010 0x00 0x00 NOPX + 5012 0x00 0x00 NOPX + 5014 0x00 0x00 NOPX + 5016 0x00 0x00 NOPX + 5018 0x00 0x2c 0xff 0xa4 0x6b 0x0c NOPA; ST r17, [sp, #-48] +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_560 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 5024 0x0b 0x90 0x81 0x8e 0x0b 0x00 0x01 0xf1 0xb2 0x34 0x10 0x76 MOVA m4, #92; MOVS p1, r14; MOVXM p3, #509032 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 5036 0x51 0x45 0x50 0x84 0x8b 0x33 0x19 0x92 0x68 0x0b 0x58 0x76 LDA.u8 r17, [p2], m4; MOVS p0, p1; SEL.EQZ r17, r25, r19, r27; MOV r19, #11 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5048 0x61 0x96 0x00 0x39 0xb9 0x65 0xaa 0x60 0x78 0xba MOVA r22, #780; LTU r27, r28, r18; MOV r13, p2 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5058 0x03 0x06 0x67 0x18 ST.s8 r19, [p3] +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5062 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 5064 0x00 0x08 0x30 0x00 0x01 0x04 JL #4192 +.delay_slot +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5070 0x14 0x6b 0x2d 0x98 LSHL r21, r17, r18 +.delay_slot + 5074 0x1f 0x6a 0xf9 0x58 ADD.NC p7, r21, r30 +.delay_slot + 5078 0x16 0x63 0x11 0x98 SUB r17, r25, r17 +.delay_slot + 5082 0x8c 0x65 0xba 0x2c 0x35 0x64 LSHL r17, r17, r18; MOV r20, #781 +.delay_slot + 5088 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x2c 0x9a 0x11 0x8b 0xe2 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SEL.EQZ r9, r22, r20, r27; ADD.NC r12, r15, r17; NOPV +.return_address + 5104 0x07 0xd4 0x99 0x18 LDA p1, [sp, #-44] +.no_stack_arguments + 5108 0x00 0x08 0x30 0x00 0x01 0x04 JL #4192 +.delay_slot +.swstall delay_slot + 5114 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5116 0x00 0x00 NOPX +.delay_slot + 5118 0x1b 0x56 0x90 0x18 ADD.NC r13, r13, #32 +.delay_slot + 5122 0x1a 0x66 0xa0 0xf8 MOV p2, r13 +.delay_slot + 5126 0x00 0x2c 0xf0 0x8f 0x0b 0x00 0x00 0x00 0x00 0x7a NOPA; MOVS p0, r15; NOPX +.return_address + 5136 0xd6 0x9a 0x80 0x01 0x37 0xea 0x33 0x63 0x08 0xba MOVA dj6, #-332; MOVX r19, #63; ADD.NC p4, r13, #-116 + 5146 0x83 0x84 0xd0 0x34 0x62 0x2c LDA dn0, [p4], #4; MOVX r13, #12 + 5152 0x04 0x1c 0x46 0x98 LDA dj0, [p4], #4 + 5156 0x04 0x1e 0x26 0x98 LDA dn4, [p4], #4 + 5160 0x04 0x1e 0x46 0x98 LDA dj4, [p4], #4 + 5164 0x04 0x1c 0x06 0x98 LDA m0, [p4], #4 + 5168 0x04 0x1c 0x66 0x98 LDA dc0, [p4], #4 + 5172 0x04 0x1e 0x66 0x98 LDA dc4, [p4], #4 + 5176 0x04 0x1e 0xd6 0x98 LDA r22, [p4], #4 + 5180 0x04 0x1e 0x36 0x98 LDA r17, [p4], #4 + 5184 0x04 0x1f 0x96 0x98 LDA r28, [p4], #4 + 5188 0x04 0x1e 0xb6 0x98 LDA r21, [p4], #4 + 5192 0x04 0x1e 0xf6 0x98 LDA r23, [p4], #4 + 5196 0x04 0x1d 0x9e 0x98 LDA p3, [p4], #4 + 5200 0x04 0x1d 0x26 0x98 LDA dn2, [p4], #4 + 5204 0x04 0x1c 0xa6 0x98 LDA dn1, [p4], #4 + 5208 0x04 0x1c 0xc6 0x98 LDA dj1, [p4], #4 + 5212 0x04 0x1e 0xa6 0x98 LDA dn5, [p4], #4 + 5216 0x04 0x1f 0xd6 0x98 LDA r30, [p4], #4 + 5220 0x04 0x1f 0xb6 0x98 LDA r29, [p4], #4 + 5224 0x04 0x1c 0xe6 0x98 LDA dc1, [p4], #4 + 5228 0x04 0xc2 0x4a 0x98 LDA.u8 r18, [p4, dj6] + 5232 0x07 0xd2 0x91 0x18 LDA r20, [sp, #-48] + 5236 0x04 0x04 0x56 0x98 LDA r2, [p4] + 5240 0x00 0x00 NOPX + 5242 0x00 0x00 NOPX + 5244 0x00 0x00 NOPX + 5246 0x00 0x00 NOPX + 5248 0x14 0xe7 0x2c 0x98 LTU r19, r19, r18 + 5252 0x98 0x0c 0x10 0x40 0x01 0x84 JNZ r19, #6176 +.delay_slot + 5258 0x00 0x07 0xc4 0xc8 0xd0 0x44 MOVXM p2, #509032 +.delay_slot + 5264 0x02 0x05 0xa7 0x18 ST.s8 r13, [p2] +.delay_slot + 5268 0x1c 0xd1 0x72 0xf8 VBCST.16 x9, r20 +.delay_slot +.swstall delay_slot + 5272 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5274 0x00 0x00 NOPX + 5276 0xfb 0x43 0x20 0x1b 0xb9 0x3f 0x80 0x84 0x58 0xba LDA p4, [sp, #-40]; EQ r27, r13, r18; MOV m7, #132 + 5286 0x00 0x13 0x00 0x3d 0x20 0x0a 0x00 0x3c 0x58 0xba MOVA r19, #0; MOVX r18, #-128; MOV m4, #60 + 5296 0xf8 0x14 0x80 0x01 0xa0 0x0b 0xe4 0xd0 0x78 0xba MOVA m5, #-64; MOVX r26, #0; MOV dc7, r19 + 5306 0xef 0x98 0x82 0x1c 0x4b 0x1b 0xd4 0x01 0xa7 0xc0 0x78 0x76 MOVA m6, #-132; MOVS dc2, dc7; MOVX crRnd, r13; MOV dn3, dc7 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 5318 0xfa 0x96 0x26 0x1c 0x4b 0x01 0xf7 0x89 0xe8 0x07 0x58 0x76 LDA r5, [sp, #-44]; MOVS dc6, dc7; MOVX r31, #60; MOV r15, #7 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 5330 0xfb 0xca 0x20 0x00 0x00 0x05 0x32 0xa0 0x10 0xba LDA r18, [sp, #-36]; MOVXM p2, #5440 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5340 0xfc 0x36 0x20 0x34 0x69 0x12 0x8b 0x0c 0x58 0xba LDA r13, [sp, #-32]; SEL.EQZ r6, r26, r18, r27; MOV r20, #780 + 5350 0xfc 0x87 0x29 0xd7 0x20 0x01 0x90 0x0b 0x08 0x00 0x58 0xb6 LDA lr, [sp, #-28]; PADDB [p4], m7; MOVX r25, #0; MOV r24, #0 + 5362 0x04 0x88 0x16 0x98 LDA r0, [p4], m4 + 5366 0x04 0xab 0x26 0x98 LDA dn6, [p4], m5 + 5370 0x04 0x2f 0x76 0x98 LDA r27, [p4], #8 + 5374 0x04 0x1e 0x86 0x98 LDA m5, [p4], #4 + 5378 0x04 0x8a 0xc6 0x98 LDA dj5, [p4], m4 + 5382 0x04 0x9e 0x06 0x98 LDA m4, [p4], #-28 + 5386 0x04 0x1c 0x36 0x98 LDA r1, [p4], #4 + 5390 0x99 0x02 0xdd 0x06 0x02 0x94 LDA r0, [p4], m6; ADD.NC dj6, r6, r0 + 5396 0x04 0x14 0x76 0x98 LDA r3, [p4, #4] + 5400 0x04 0x04 0x96 0x98 LDA r4, [p4] + 5404 0x19 0xda 0x00 0xf8 MOV r7, m5 + 5408 0x1a 0x83 0x99 0x58 ADD.NC dj2, r7, r6 + 5412 0x1c 0x1b 0x00 0xf8 MOV r16, dj5 + 5416 0x1a 0x0d 0x99 0x58 ADD.NC m2, r27, r6 + 5420 0x1e 0x03 0xe0 0x18 ADD.NC m6, r7, #-64 + 5424 0x18 0xff 0xee 0x10 0xc0 0x24 ADD r3, r3, #-1; ADD.NC m7, r16, #-64 + 5430 0x00 0x2c 0xf0 0x00 0x10 0x00 0x82 0x80 0x7e 0xba NOPA; NOPB; MOV m1, dj2 +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_976 +.loop_nesting 1 + 5440 0xc3 0x85 0x71 0x85 0x0b 0x04 0xe7 0xec 0x33 0x90 0x78 0x76 VLDA.CONV.fp32.bf16 cml0, [p6], #64; MOVS p1, r5; LSHL r14, r2, r15; MOV p0, r14 + 5452 0x22 0x81 0x78 0x28 0x2b 0x0e 0x4b 0x02 0x33 0x98 0xa0 0xf6 VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc3, dn3; ADD.NC p4, r14, r12 + 5464 0xa0 0x39 0x78 0x28 0x2f 0x5a 0x4b 0x03 0xc6 0x80 0x70 0xf6 VLDA.POP.576 ex7, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];MOVS dn7, dn6; MOV dj7, dj6 + 5476 0xd9 0x0d 0x74 0x03 0x2b 0x53 0x0b 0x01 0x82 0x00 0x70 0xf6 VLDA.CONV.fp32.bf16 cmh0, [p6], m6;VLDB.POP.576 ex6, [p0, lf0, r24];MOVS dn3, r19; MOV m3, m2 + 5488 0x71 0x41 0x74 0x12 0xd4 0x01 0xc0 0x00 0x5e 0xba VLDA.POP.576 ex8, [p1, lf1, r25, m4];VLDB.POP.576.3D ex11, [p0, lf0, r24, d0]; MOV dj3, #0 + 5498 0xc3 0x95 0x78 0x28 0x28 0x00 0x00 0x05 0xbb 0x40 0x10 0xb6 VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.FILL.512 [p0, lf0, r24]; MOVXM le, #5760 + 5510 0xdd 0x1d 0x78 0x28 0x28 0x00 0x00 0x04 0x7b 0x28 0x10 0xb6 VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.FILL.512 [p0, lf0, r24]; MOVXM ls, #5712 + 5522 0x80 0xb5 0x74 0x01 0x28 0x3c VLDA.CONV.fp32.bf16 cml3, [p4];VLDB.POP.576 ex2, [p0, lf0, r24] + 5528 0xc3 0xa5 0x78 0x22 0x28 0x3c VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.POP.576.3D ex4, [p0, lf0, r24, d0] + 5534 0xd9 0x2d 0x78 0x28 0x28 0x3c VLDA.CONV.fp32.bf16 cmh2, [p6], m6;VLDB.FILL.512 [p0, lf0, r24] +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 5540 0x22 0x81 0x78 0x28 0x28 0x3c VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24] +.aggressive_scheduled_block_id 6 +.noswbrkpt + 5546 0x83 0xbd 0x74 0x01 0x28 0x3c VLDA.CONV.fp32.bf16 cmh3, [p4], #64;VLDB.POP.576 ex2, [p0, lf0, r24] +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5552 0x80 0xcd 0x74 0x11 0x14 0x02 0x9a 0xc3 0xee 0xba VLDA.CONV.fp32.bf16 cmh4, [p4];VLDB.POP.576.3D ex4, [p0, lf0, r24, d0]; VSHUFFLE ex10, ex6, ex11, r1 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5562 0x80 0xc5 0x78 0x28 0x2c 0x98 0x8b 0x01 0x9a 0xc1 0xe0 0xf6 VLDA.CONV.fp32.bf16 cml4, [p4];VLDB.FILL.512 [p0, lf0, r24];MOVS p4, p6; VSHUFFLE ex6, ex6, ex11, r0 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5574 0x02 0x81 0x73 0x00 0x54 0x1d 0x48 0x14 0xe9 0x4a VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex1, [p1, lf1, r25]; VMAC.f dm0, dm0, ex10, ex7, r9 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5584 0xcf 0x35 0x76 0x94 0x96 0x00 0x00 0x5c 0x58 0x07 0x49 0x2c 0xe9 0x6e VLDA.3D.CONV.fp32.bf16 cml3, [p6], d3; MOVS dn3, dn2; MOV dj3, dj5; VMAC.f dm1, dm1, ex6, ex7, r9 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5598 0x82 0xbd 0x7a 0x38 0x96 0x00 0x00 0x4c 0x90 0x3e 0x4a 0x55 0x09 0x6e VLDA.CONV.fp32.bf16 cmh3, [p4, #64]; MOVS dc5, dc7; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm2, dm2, ex10, ex8, r9 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5612 0x71 0x01 0x74 0x98 0x96 0x00 0x00 0x54 0x90 0x1e 0xf8 0x60 0x3d 0x6e VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dn2, dc3; VSHUFFLE ex5, ex2, ex4, r0; VADD.f dm0, dm3, dm0, r31 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5626 0x22 0x81 0x70 0x04 0xf9 0x64 0x3d 0x62 VLDA.FILL.512 [p1, lf1, r25]; VADD.f dm1, dm3, dm1, r31 +.aggressive_scheduled_block_id 6 +.noswbrkpt + 5634 0xa0 0x09 0x70 0x04 0xfa 0x88 0x3d 0x62 VLDA.POP.576 ex1, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r31 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5642 0x71 0x01 0x70 0x04 0x4b 0x6d 0x09 0x62 VLDA.POP.576 ex0, [p1, lf1, r25, m4]; VMAC.f dm3, dm3, ex6, ex8, r9 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5650 0x22 0x81 0x74 0x01 0x28 0x3c VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24] +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5656 0x3c 0x11 0x14 0x18 VLDB.POP.576.3D ex4, [p0, lf0, r24, d0] +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5660 0xa0 0x09 0x78 0x28 0x2d 0x72 0x7d 0x82 0xfb 0x8c 0x3d 0x66 VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24]; ADD.NC lc, r4, #-5; VADD.f dm3, dm4, dm3, r31 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5672 0x71 0x01 0x74 0x14 0x14 0x1d 0xa0 0x06 0x29 0x4a VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24]; VMAC.f dm0, dm0, ex3, ex1, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5682 0x22 0x81 0x74 0x01 0x28 0x00 0x00 0x58 0xaa 0x0f 0xa2 0x46 0x09 0x4e VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24]; NOPX; MOV dj5, r21; VMAC.f dm2, dm2, ex3, ex0, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5696 0x00 0x2c 0xf8 0x22 0x28 0x01 0x5b 0x00 0x00 0x00 0xc9 0x03 0xed 0x09 0x51 0x4b NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 +.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1248 +.loop_nesting 2 +.begin_of_loop +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5712 0xa0 0x09 0x78 0x28 0x28 0x01 0x5b 0x00 0x00 0x01 0x49 0x01 0xed 0x1b 0x50 0x4b VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5728 0x71 0x01 0x78 0x28 0x28 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7d 0x00 0x31 0x4b VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm0, dm0, ex3, ex1, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5744 0x22 0x81 0x74 0x01 0x28 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7d 0x12 0x30 0x4b VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm2, dm2, ex3, ex0, r20 +.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1296 +.end_of_loop +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5760 0x00 0x2c 0xf8 0x22 0x28 0x01 0x5b 0x00 0x00 0x00 0xc9 0x03 0xed 0x09 0x51 0x4b NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 +.loop_nesting 1 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5776 0xa0 0x09 0x7c 0xbc 0x96 0x00 0x00 0x54 0x90 0x1e 0xa3 0x6a 0x09 0x6e VLDA.POP.576 ex1, [p1, lf1, r25]; MOVS dn6, dn7; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5790 0x71 0x01 0x7e 0x1c 0x96 0x00 0x00 0x7c 0x38 0x07 0xa0 0x06 0x29 0x6e VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dc7, dn3; MOV dj7, dj3; VMAC.f dm0, dm0, ex3, ex1, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5804 0x61 0x91 0x61 0x55 0x00 0xe4 0xa2 0x46 0x09 0x4a MOVS dc3, p3; MOV r5, dj2; VMAC.f dm2, dm2, ex3, ex0, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5814 0x6a 0xc1 0x61 0x92 0x07 0xc4 0xa1 0x2a 0x29 0x4a MOVS dn3, r22; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5824 0xeb 0x81 0x62 0x92 0x03 0xc4 0xa3 0x6a 0x09 0x4a MOVS dn7, r28; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5834 0xb3 0x91 0x6f 0x57 0x22 0x8f 0x00 0xe6 0xa0 0x06 0x29 0x66 PADDB [p7], m5; MOVS p5, p7; MOV dj2, dj7; VMAC.f dm0, dm0, ex3, ex1, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5846 0x93 0x91 0x62 0x06 0x00 0xe4 0xa2 0x46 0x09 0x4a MOVS p4, p7; MOV m2, m3; VMAC.f dm2, dm2, ex3, ex0, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5856 0x02 0x92 0x03 0xc6 0xa1 0x2a 0x29 0x62 VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm1, dm1, ex5, ex1, r20 +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5864 0x01 0x92 0x07 0xc6 0xa3 0x6a 0x09 0x62 VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm3, dm3, ex5, ex0, r20 + 5872 0x1f 0x8b 0x00 0xf8 MOV dj7, dj5 + 5876 0x03 0x0b 0xa0 0xe6 0xa1 0x2a 0x29 0x62 MOV m3, r23; VMAC.f dm1, dm1, ex5, ex1, r20 + 5884 0x03 0x88 0xa0 0xe6 0xa0 0x06 0x29 0x62 MOV dj3, r17; VMAC.f dm0, dm0, ex3, ex1, r20 + 5892 0x00 0xf7 0x23 0x05 0x00 0xe6 0xa3 0x6a 0x09 0x4a PADDB.3D [p0], d3; MOV m3, dj2; VMAC.f dm3, dm3, ex5, ex0, r20 + 5902 0x71 0x89 0x6e 0xd7 0x25 0x82 0xa0 0xe6 0xa2 0x46 0x09 0x66 PADDB [p7], m3; MOVS p3, dc3; MOV dj5, r5; VMAC.f dm2, dm2, ex3, ex0, r20 + 5914 0x62 0x89 0x60 0x03 0xc5 0x80 0x70 0x02 MOVS dc3, dc5; MOV dj7, dj5 + 5922 0xa0 0x41 0x60 0x01 0x81 0x00 0x70 0x02 MOVS dc5, r2; MOV m3, m1 + 5930 0xb2 0x12 0xc0 0x00 0x87 0x50 0x70 0x02 VCONV.bf16.fp32 x11, cml1; MOV m1, r29 + 5938 0xa2 0x02 0xc0 0x02 0xc7 0x90 0x70 0x02 VCONV.bf16.fp32 x10, cml0; MOV dj5, r30 + 5946 0x13 0x91 0x61 0x3b 0x90 0x01 0xc8 0x60 0x76 0xba PADDB.3D [p1], d1; MOVS p0, p7; MOV r14, p0 + 5956 0x62 0x0a 0xc0 0x00 0x83 0x00 0x70 0x02 VCONV.bf16.fp32 x6, cmh0; MOV m1, m3 + 5964 0x52 0x22 0xc0 0x57 0x20 0x24 0x02 0xfa 0x00 0x00 0x60 0x36 PADDB [p0], m1; VCONV.bf16.fp32 x5, cml2; JZ r18, #6096 +.delay_slot + 5976 0x72 0x1a 0xc0 0x00 0xa9 0x60 0x70 0x02 VCONV.bf16.fp32 x7, cmh1; MOV r5, p1 +.delay_slot + 5984 0x82 0x32 0xc0 0x03 0xa7 0xc0 0x70 0x02 VCONV.bf16.fp32 x8, cml3; MOV dn7, dc7 +.delay_slot + 5992 0x12 0x3a 0xc5 0x2b 0x90 0x00 0xb5 0x60 0x76 0xba PADDB [p5], m1; VCONV.bf16.fp32 x1, cmh3; MOV p1, p5 +.delay_slot + 6002 0x22 0x2a 0xc0 0x02 0xc2 0x80 0x70 0x02 VCONV.bf16.fp32 x2, cmh2; MOV dj5, dj2 +.delay_slot + 6010 0xe1 0x89 0x60 0x00 0x4d 0xc0 0x70 0x02 MOVS dc7, dc3; MOV r2, dc5 + 6018 0x1d 0xdc 0xec 0xf8 VMAX_LT.bf16 x11, r16, x11, x9 + 6022 0x1b 0xbc 0xec 0xf8 VMAX_LT.bf16 x7, r16, x7, x9 + 6026 0x3c 0x5a 0x60 0x02 0xaa 0x76 0x70 0x02 VST x11, [p1, dj7]; VMAX_LT.bf16 x10, r16, x10, x9 + 6034 0xa2 0xba 0x60 0x01 0xda 0x76 0x70 0x02 VST x7, [p5, #64]; VMAX_LT.bf16 x7, r16, x6, x9 + 6042 0x20 0xd2 0x60 0x00 0x02 0xfe 0x00 0x00 0x21 0x3a VST x10, [p1]; J #6128 +.delay_slot + 6052 0x22 0xba 0x60 0x02 0xa2 0x76 0x70 0x02 VST x7, [p1, #64]; VMAX_LT.bf16 x10, r16, x8, x9 +.delay_slot + 6060 0x1b 0x8c 0xec 0xf8 VMAX_LT.bf16 x7, r16, x1, x9 +.delay_slot + 6064 0x00 0xd2 0x60 0x02 0x96 0x76 0x70 0x02 VST x10, [p0]; VMAX_LT.bf16 x10, r16, x5, x9 +.delay_slot + 6072 0x02 0xba 0x60 0x00 0x8a 0x76 0x70 0x02 VST x7, [p0, #64]; VMAX_LT.bf16 x2, r16, x2, x9 +.delay_slot + 6080 0x00 0x2c 0xf0 0x00 0x24 0xa2 0x93 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x10, [p4, dj5]; NOPX; NOPM; NOPV +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1632 + 6096 0x09 0xe0 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p1, dj7] + 6100 0x0d 0x14 0xe3 0x18 VST.CONV.bf16.fp32 cmh1, [p5, #64] + 6104 0x09 0x04 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p1] + 6108 0x09 0x14 0x63 0x18 VST.CONV.bf16.fp32 cmh0, [p1, #64] + 6112 0x08 0x06 0x13 0x18 VST x8, [p0] + 6116 0x08 0x15 0xe3 0x18 VST.CONV.bf16.fp32 cmh3, [p0, #64] + 6120 0x94 0x24 0x60 0x00 0x01 0xa5 0x70 0x02 VST.CONV.bf16.fp32 cml2, [p4, dj5]; NOPM +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1664 + 6128 0xe2 0x92 0x6f 0x57 0x20 0x06 0x35 0x01 0x40 0x00 0x58 0x36 PADDB [p7], m5; VST x2, [p7, #64]; JNZD r3, r3, p2; MOV dj2, #0 +.delay_slot + 6140 0x1b 0x44 0x80 0xf8 MOV dn3, dn2 +.delay_slot + 6144 0x1a 0x49 0xa0 0xf8 MOV dn2, r19 +.delay_slot + 6148 0xeb 0x72 0x05 0x1e 0x01 0xf4 PADDB.3D [p7], d2; MOV dj2, dj7 +.delay_slot + 6154 0x1a 0x4e 0x80 0xf8 MOV dn2, dn7 +.delay_slot +.swstall delay_slot + 6158 0x00 0x00 NOPX +.loop_nesting 0 + 6160 0x00 0x0d 0x58 0x00 0x00 0x84 J #6832 +.delay_slot +.swstall delay_slot + 6166 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6168 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6170 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6172 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6174 0x00 0x00 NOPX +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1712 + 6176 0xfb 0x7e 0x22 0x0c 0x8b 0x04 0xe1 0x08 0xb3 0x90 0x78 0x76 LDA r31, [sp, #-40]; MOVS dc2, p3; MOVX r14, #136; MOV p1, r14 + 6188 0x07 0x90 0x82 0x56 0x0b 0x1b 0xd4 0x03 0x62 0x40 0x78 0x76 MOVA m4, #60; MOVS dn2, r22; MOVX crRnd, r13; MOV dc6, dn2 + 6200 0x07 0x94 0x00 0x19 0x31 0x89 0x05 0xd0 0x78 0xba MOVA r20, #60; MOVX r19, #780; MOV m2, r23 + 6210 0xef 0x98 0x86 0x5c 0x0b 0x01 0x20 0xca 0xc7 0x90 0x78 0x76 MOVA m6, #-132; MOVS dn6, r28; MOVX r18, #6; MOV dj5, r30 + 6222 0xfa 0x83 0x25 0x02 0x0b 0x01 0x90 0x08 0x87 0x50 0x78 0x76 LDA p0, [sp, #-44]; MOVS dc5, r2; MOVX r25, #0; MOV m1, r29 + 6234 0xfb 0xd6 0x20 0x01 0x80 0x0b 0x45 0x50 0x78 0xba LDA r21, [sp, #-36]; MOVX r24, #0; MOV dj6, r21 + 6244 0xfc 0x36 0x20 0x00 0x00 0x05 0x34 0x58 0x10 0xba LDA r13, [sp, #-32]; MOVXM p2, #6320 + 6254 0xfc 0x87 0x26 0xdf 0x72 0x94 LDA lr, [sp, #-28]; ADD.NC p3, r31, r14 + 6260 0x03 0x1d 0xc6 0x98 LDA dj3, [p3], #4 + 6264 0x03 0x8a 0x06 0x98 LDA m4, [p3], m4 + 6268 0x03 0x9e 0x86 0x98 LDA m5, [p3], #-28 + 6272 0x03 0x1e 0xd6 0x98 LDA r22, [p3], #4 + 6276 0x03 0xca 0xf6 0x98 LDA r23, [p3], m6 + 6280 0x03 0x17 0xb6 0x98 LDA r29, [p3, #4] + 6284 0x03 0x07 0x96 0x98 LDA r28, [p3] + 6288 0x00 0x00 NOPX + 6290 0x1f 0x98 0x00 0xf8 MOV r30, m4 + 6294 0x1e 0x07 0x00 0xf8 MOV m6, dj3 + 6298 0x1f 0xdc 0x00 0xf8 MOV r31, m6 + 6302 0x1b 0x0f 0xe0 0x18 ADD.NC m3, r31, #-64 + 6306 0xef 0x7f 0xee 0x1e 0xc0 0x24 ADD r29, r29, #-1; ADD.NC m7, r30, #-64 + 6312 0x00 0x2b 0x60 0x03 0xc7 0x90 0x70 0x02 NOPS; MOV dj7, r30 +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1856 +.loop_nesting 1 + 6320 0xc3 0x85 0x7a 0x28 0x28 0x00 0x00 0x8f 0x96 0x02 0x71 0x81 0x60 0x7e VLDA.CONV.fp32.bf16 cml0, [p6], #64;VLDB.FILL.512 [p1, lf1, r25];MOVS p3, r12; MOVXM ls, #6496 + 6334 0xcd 0x0d 0x7a 0x28 0x28 0x00 0x00 0x05 0xbc 0xc8 0x10 0xb6 VLDA.CONV.fp32.bf16 cmh0, [p6], m3;VLDB.FILL.512 [p1, lf1, r25]; MOVXM le, #6544 + 6346 0x02 0x81 0x76 0x05 0x28 0x05 0xe9 0x6e 0xbf 0x3f 0x48 0xb6 VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex10, [p1, lf1, r25]; LSHL r30, r2, r18; ADD.NC lc, r28, #-3 + 6358 0x55 0x59 0x73 0x01 0x14 0x01 0x47 0x90 0x7e 0xba VLDA.POP.576 ex11, [p0, lf0, r24, m5];VLDB.POP.576 ex4, [p1, lf1, r25]; MOV dj2, r30 + 6368 0xc3 0x95 0x76 0x01 0x28 0x3c VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.POP.576 ex2, [p1, lf1, r25] + 6374 0xdd 0x1d 0x7a 0x21 0xa8 0x3c VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.POP.576.3D ex3, [p1, lf1, r25, d0] + 6380 0xc3 0xa5 0x7a 0x28 0x28 0x3c VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.FILL.512 [p1, lf1, r25] + 6386 0xcd 0x2d 0x7a 0x28 0x28 0x3c VLDA.CONV.fp32.bf16 cmh2, [p6], m3;VLDB.FILL.512 [p1, lf1, r25] + 6392 0xc3 0xb5 0x76 0x00 0xa8 0x3c VLDA.CONV.fp32.bf16 cml3, [p6], #64;VLDB.POP.576 ex1, [p1, lf1, r25] + 6398 0xdd 0x3d 0x76 0x03 0x28 0x3c VLDA.CONV.fp32.bf16 cmh3, [p6], m7;VLDB.POP.576 ex6, [p1, lf1, r25] + 6404 0x68 0x45 0x76 0x03 0xa8 0x3c VLDA.CONV.fp32.bf16 cml4, [p3, dj2];VLDB.POP.576 ex7, [p1, lf1, r25] + 6410 0x68 0x4d 0x75 0x12 0x14 0x01 0x69 0x2d 0xee 0xba VLDA.CONV.fp32.bf16 cmh4, [p3, dj2];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VSHUFFLE ex5, ex10, ex4, r22 + 6420 0x02 0x81 0x75 0x14 0x14 0x02 0xa9 0x2f 0xee 0xba VLDA.FILL.512 [p0, lf0, r24]; VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex10, ex10, ex4, r23 + 6430 0x55 0x01 0x7a 0x28 0x2a 0x11 0xdb 0xc2 0x48 0x0b 0x69 0x66 VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex2, ex3, r22; VMAC.f dm0, dm0, ex5, ex11, r9 + 6442 0x02 0x81 0x75 0x11 0xdf 0xc2 0x49 0x35 0x69 0x4a VLDA.FILL.512 [p0, lf0, r24]; VSHUFFLE ex10, ex2, ex3, r23; VMAC.f dm1, dm1, ex10, ex11, r9 + 6452 0x4a 0x49 0x69 0x48 VMAC.f dm2, dm2, ex4, ex11, r9 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 6456 0x4b 0x75 0x69 0x48 VMAC.f dm3, dm3, ex10, ex11, r9 +.aggressive_scheduled_block_id 7 +.noswbrkpt + 6460 0x06 0x00 0xaa 0x8b 0x5f 0xc6 0xa1 0x84 0x3d 0x4a VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex5, ex1, ex6, r23; VADD.f dm1, dm4, dm1, r20 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6470 0x03 0x01 0x94 0x00 0xa0 0x80 0x3d 0x62 VLDB.POP.576 ex6, [p1, lf1, r25]; VADD.f dm0, dm4, dm0, r20 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6478 0x03 0x01 0xd4 0x00 0xa2 0x88 0x3d 0x62 VLDB.POP.576 ex7, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r20 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6486 0x55 0x01 0x75 0x12 0x14 0x1d 0xa3 0x8c 0x3d 0x4a VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VADD.f dm3, dm4, dm3, r20 +.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2032 +.loop_nesting 2 +.begin_of_loop +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6496 0xa2 0x82 0x82 0x16 0xb7 0xb4 VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex2, ex1, ex6, r22 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6502 0x0a 0x28 0x2a 0x3c 0x5f 0xc6 0x99 0x2a 0x09 0x4a VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6512 0x06 0x00 0xa9 0xbc 0x5b 0xc6 0x98 0x04 0x09 0x4a VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6522 0x03 0x01 0x94 0x00 0x9b 0x68 0x09 0x62 VLDB.POP.576 ex6, [p1, lf1, r25]; VMAC.f dm3, dm3, ex4, ex0, r19 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6530 0x02 0x81 0x76 0x03 0xa8 0x00 0x00 0x00 0x05 0x6c 0x9a 0x46 0x09 0x6e VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex7, [p1, lf1, r25];NOPS; NOPX; VMAC.f dm2, dm2, ex3, ex0, r19 +.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2080 +.end_of_loop +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6544 0x55 0x01 0x7a 0x24 0x28 0x01 0x5b 0x00 0x00 0x01 0x45 0xaf 0xe8 0x00 0x00 0xe1 VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0];NOPS; NOPX; VSHUFFLE ex5, ex1, ex6, r23; NOPV +.loop_nesting 1 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6560 0x07 0x0c 0xff 0x97 0x25 0x9c 0x8b 0x00 0x85 0xad 0xe0 0xf6 PADDA.3D [p0], d1; PADDB [p7], m6; MOVS p5, p7; VSHUFFLE ex2, ex1, ex6, r22 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6572 0x93 0x91 0x6f 0x17 0x22 0x3c 0x5f 0xc6 0x99 0x2a 0x09 0x66 PADDB [p7], m4; MOVS p4, p7; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6584 0x73 0x91 0x6f 0x97 0x21 0xbc 0x5b 0xc6 0x98 0x04 0x09 0x66 PADDB [p7], m6; MOVS p3, p7; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6596 0x02 0x88 0xa0 0xe6 0x9b 0x68 0x09 0x62 MOV dj2, r17; VMAC.f dm3, dm3, ex4, ex0, r19 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6604 0x02 0xb7 0x20 0x9b 0x80 0xe6 0x9a 0x46 0x09 0x4a PADDB.3D [p1], d2; MOV r2, dc5; VMAC.f dm2, dm2, ex3, ex0, r19 + 6614 0x19 0x0b 0x5b 0xd8 VSHUFFLE ex2, ex1, ex6, r22 + 6618 0x1a 0x8b 0x5f 0xd8 VSHUFFLE ex5, ex1, ex6, r23 + 6622 0x01 0xbc 0x5b 0xc6 0x98 0x04 0x09 0x62 VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 + 6630 0x02 0x3c 0x5f 0xc6 0x99 0x2a 0x09 0x62 VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 + 6638 0x9a 0x46 0x09 0x48 VMAC.f dm2, dm2, ex3, ex0, r19 + 6642 0x9b 0x68 0x09 0x48 VMAC.f dm3, dm3, ex4, ex0, r19 + 6646 0x00 0x00 NOPX + 6648 0x00 0x00 NOPX + 6650 0x0d 0x10 0x16 0x18 VCONV.bf16.fp32 x10, cml0 + 6654 0x0d 0x90 0x96 0x18 VCONV.bf16.fp32 x11, cml1 + 6658 0x12 0x1a 0xc0 0x2a 0x03 0x4e 0x00 0x00 0x61 0x3a VCONV.bf16.fp32 x1, cmh1; JZ r21, #6768 +.delay_slot + 6668 0x0b 0x10 0x56 0x18 VCONV.bf16.fp32 x6, cmh0 +.delay_slot + 6672 0x09 0x11 0x96 0x18 VCONV.bf16.fp32 x2, cml3 +.delay_slot + 6676 0x0b 0x91 0xd6 0x18 VCONV.bf16.fp32 x7, cmh3 +.delay_slot + 6680 0x0a 0x91 0x16 0x18 VCONV.bf16.fp32 x5, cml2 +.delay_slot + 6684 0x0c 0x11 0x56 0x18 VCONV.bf16.fp32 x8, cmh2 + 6688 0x1d 0xdc 0xec 0xf8 VMAX_LT.bf16 x11, r16, x11, x9 + 6692 0x18 0x8c 0xec 0xf8 VMAX_LT.bf16 x1, r16, x1, x9 + 6696 0xac 0x5a 0x60 0x02 0xaa 0x76 0x70 0x02 VST x11, [p5, dj3]; VMAX_LT.bf16 x10, r16, x10, x9 + 6704 0x82 0x8a 0x60 0x00 0x5a 0x76 0x70 0x02 VST x1, [p4, #64]; VMAX_LT.bf16 x1, r16, x6, x9 + 6712 0xa0 0xd2 0x60 0x00 0x03 0x52 0x00 0x00 0x21 0x3a VST x10, [p5]; J #6800 +.delay_slot + 6722 0xa2 0x8a 0x60 0x02 0x8a 0x76 0x70 0x02 VST x1, [p5, #64]; VMAX_LT.bf16 x10, r16, x2, x9 +.delay_slot + 6730 0x18 0xbc 0xec 0xf8 VMAX_LT.bf16 x1, r16, x7, x9 +.delay_slot + 6734 0x6c 0x52 0x60 0x02 0x96 0x76 0x70 0x02 VST x10, [p3, dj3]; VMAX_LT.bf16 x10, r16, x5, x9 +.delay_slot + 6742 0x00 0x2c 0xf7 0x14 0x53 0x02 0x22 0x76 0x72 0xba NOPA; VST x1, [p7, #64]; VMAX_LT.bf16 x8, r16, x8, x9 +.delay_slot + 6752 0x00 0x2c 0xf0 0x00 0x24 0xe2 0x93 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x10, [p4, dj7]; NOPX; NOPM; NOPV +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2304 + 6768 0x0d 0x60 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p5, dj3] + 6772 0x0c 0x14 0xe3 0x18 VST.CONV.bf16.fp32 cmh1, [p4, #64] + 6776 0x0d 0x04 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p5] + 6780 0x0d 0x14 0x63 0x18 VST.CONV.bf16.fp32 cmh0, [p5, #64] + 6784 0x0b 0x61 0xa3 0x18 VST.CONV.bf16.fp32 cml3, [p3, dj3] + 6788 0x0f 0x15 0xe3 0x18 VST.CONV.bf16.fp32 cmh3, [p7, #64] + 6792 0x9c 0x24 0x60 0x00 0x01 0xa5 0x70 0x02 VST.CONV.bf16.fp32 cml2, [p4, dj7]; NOPM +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2336 + 6800 0x62 0xc2 0x6e 0xf5 0x40 0x5c VST x8, [p3, #64]; JNZD r29, r29, p2 +.delay_slot + 6806 0x3f 0x8b 0x90 0x18 PADDB [p7], m4 +.delay_slot +.swstall delay_slot + 6810 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6812 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6814 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6816 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2368 +.loop_nesting 0 + 6832 0x07 0xed 0xf1 0x18 LDA r15, [sp, #-20] + 6836 0x07 0xf1 0x91 0x18 LDA r12, [sp, #-16] + 6840 0x07 0xf5 0x31 0x18 LDA r9, [sp, #-12] + 6844 0x07 0xeb 0x19 0x18 LDA p6, [sp, #-24] + 6848 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8] + 6852 0x07 0xfd 0xd1 0x18 LDA r14, [sp, #-4] + 6856 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 6860 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128 +.delay_slot +.swstall delay_slot + 6866 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6868 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6870 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6872 0x00 0x00 NOPX +.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params__end +.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_end0 + +.text_segment PM 6880 +.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function_start + 6880 0x00 0x20 0x00 0x00 0x01 0xf2 0x32 0x20 0x10 0xba MOVA r0, #1; MOVXM p4, #508992 + 6890 0x80 0xc2 0xd0 0x00 0x10 0x08 0x4b 0xd0 0x78 0xba LDA r16, [p4]; MOVX r1, #0; MOV r2, r15 + 6900 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 6906 0x0f 0xf0 0x55 0x98 ST r2, [sp, #-16] + 6910 0x00 0x00 NOPX + 6912 0x00 0x00 NOPX + 6914 0x00 0x00 NOPX + 6916 0x00 0x00 NOPX + 6918 0x80 0x0d 0xd8 0x40 0x01 0x84 JNZ r16, #7088 +.delay_slot + 6924 0x0f 0xfb 0x9d 0x98 ST p7, [sp, #-8] +.delay_slot + 6928 0x0f 0xff 0x1d 0x98 ST p6, [sp, #-4] +.delay_slot + 6932 0x0f 0xed 0x9d 0x98 ST p3, [sp, #-20] +.delay_slot + 6936 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12] +.delay_slot + 6940 0x00 0x07 0xc7 0xac 0x00 0x44 MOVXM r15, #509440 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 6946 0xd0 0x91 0x60 0x00 0x01 0xf3 0xb2 0x34 0x11 0x3a MOVS p6, p1; MOVXM p7, #509032 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 6956 0xe0 0xc0 0xe1 0x8f 0x0b 0x00 0x01 0xf3 0xb2 0x32 0x10 0x76 ST.s8 r16, [p7]; MOVS p1, r15; MOVXM p7, #509028 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6968 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6970 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6972 0x00 0x05 0x60 0x00 0x01 0x04 JL #2752 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6978 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6980 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 6984 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 6988 0xe0 0xc2 0x30 0x03 0xb0 0x60 0x70 0x02 ST r16, [p7]; MOV p7, p0 +.delay_slot + 6996 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x32 0x60 0x70 0xf6 NOPA; NOPB; NOPS; MOV p0, p2 +.return_address + 7008 0x1a 0x67 0x85 0x98 ADD.NC p2, r15, #11 + 7012 0x4f 0xc1 0x50 0x00 0x01 0xf0 0xb2 0x22 0x10 0xba LDA.u8 r16, [p2], #7; MOVXM p1, #508996 + 7022 0x43 0xcf 0x50 0x00 0x01 0xf0 0x32 0x30 0x10 0xba LDA.u16 r19, [p2], #2; MOVXM p0, #509024 + 7032 0x02 0x06 0x3a 0x98 LDA.u16 r17, [p2] + 7036 0x00 0x00 NOPX + 7038 0x02 0x16 0x5a 0x98 LDA.u16 r18, [p2, #2] + 7042 0x00 0x00 NOPX + 7044 0x00 0x00 NOPX + 7046 0x20 0xc2 0x30 0x00 0xb6 0x60 0x70 0x02 ST r16, [p1]; MOV p1, p6 + 7054 0x14 0xe1 0x0f 0x98 MUL r16, r19, r16 + 7058 0x00 0x00 NOPX + 7060 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 + 7064 0x00 0x00 NOPX + 7066 0x14 0xa1 0x0f 0x98 MUL r16, r18, r16 + 7070 0x00 0x00 NOPX + 7072 0x00 0x2c 0xf0 0x00 0x20 0x06 0x11 0x80 0x00 0x00 0x37 0x60 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p0]; NOPX; MOV p0, p7; NOPV +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 + 7088 0xfd 0xbe 0x20 0x00 0x01 0xf3 0x32 0x24 0x10 0xba LDA r15, [sp, #-20]; MOVXM p6, #509000 + 7098 0xc0 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x26 0x10 0xba LDA r16, [p6]; MOVXM p2, #509004 + 7108 0x40 0xc6 0xd0 0x00 0x01 0xf3 0xb2 0x20 0x10 0xba LDA r17, [p2]; MOVXM p7, #508992 + 7118 0x07 0x06 0x56 0x98 LDA r18, [p7] + 7122 0x00 0x00 NOPX + 7124 0x00 0x00 NOPX + 7126 0x00 0x00 NOPX + 7128 0x00 0x00 NOPX + 7130 0x80 0x0e 0x18 0x40 0x01 0x84 JNZ r16, #7216 +.delay_slot + 7136 0x14 0x62 0x07 0x18 ADD r17, r17, #1 +.delay_slot + 7140 0x40 0xc6 0x39 0x44 0x0e 0x5c ST r17, [p2]; ADD r17, r18, #1 +.delay_slot + 7146 0x14 0x26 0x07 0x18 ADD r19, r16, #1 +.delay_slot + 7150 0x0e 0x06 0x71 0x98 ST r19, [p6] +.delay_slot + 7154 0x0f 0x06 0x31 0x98 ST r17, [p7] + 7158 0x1a 0x67 0x86 0x18 ADD.NC p2, r15, #12 + 7162 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4 + 7166 0x02 0xfe 0x16 0x98 LDA r16, [p2], #-4 + 7170 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 7174 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.noswbrkpt + 7176 0x02 0x46 0x16 0x98 LDA r16, [p2, #16] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7180 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7182 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7184 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7186 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7188 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7192 0x0a 0x06 0x11 0x98 ST r16, [p2] + 7196 0x17 0xe2 0xfd 0x18 MOVX r17, #-1 + 7200 0x00 0x00 NOPX + 7202 0x00 0x00 NOPX + 7204 0x00 0x00 NOPX + 7206 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x04 0x13 0x18 0x7a NOPA; NOPS; ACQ r16, r17 +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_336 +.no_stack_arguments + 7216 0x00 0x08 0xb8 0x00 0x01 0x04 JL #4464 +.delay_slot + 7222 0x00 0x07 0xc6 0xcc 0x00 0x44 MOVXM p3, #509440 +.delay_slot +.swstall delay_slot + 7228 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7230 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7232 0x00 0x00 NOPX +.delay_slot + 7234 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x53 0x3d 0x07 0x00 0x00 0x1c 0x2e NOPA; NOPS; MOV p2, r15; NOPV +.return_address + 7248 0xc0 0xc2 0xd0 0x00 0x01 0xf0 0xb2 0x22 0x10 0xba LDA r16, [p6]; MOVXM p1, #508996 + 7258 0x01 0x06 0x36 0x98 LDA r17, [p1] + 7262 0x07 0xf0 0x11 0x18 LDA r0, [sp, #-16] + 7266 0x00 0x00 NOPX + 7268 0x00 0x00 NOPX + 7270 0x00 0x00 NOPX + 7272 0x00 0x00 NOPX + 7274 0x00 0x00 NOPX + 7276 0x14 0x61 0x08 0x98 NE r16, r17, r16 + 7280 0x80 0x0e 0x60 0x40 0x01 0x84 JNZ r16, #7360 +.delay_slot + 7286 0x10 0x30 0x01 0x18 MOVX r24, #0 +.delay_slot +.swstall delay_slot + 7290 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7292 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7294 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7296 0x00 0x00 NOPX + 7298 0x04 0x00 0xa2 0xcf 0x14 0x24 MOVX r16, #1; ADD.NC p1, r15, #20 + 7304 0x01 0x06 0x36 0x98 LDA r17, [p1] + 7308 0x00 0x00 NOPX + 7310 0x00 0x00 NOPX + 7312 0x00 0x00 NOPX + 7314 0x00 0x00 NOPX + 7316 0x00 0x00 NOPX + 7318 0x00 0x00 NOPX + 7320 0x14 0x51 0x08 0x18 REL r17, r16 + 7324 0x3c 0xc6 0xdc 0x0e 0x23 0x0c LDA r17, [p1, #-8]; ST r24, [p6] + 7330 0x00 0x00 NOPX + 7332 0x00 0x00 NOPX + 7334 0x00 0x00 NOPX + 7336 0x00 0x00 NOPX + 7338 0x00 0x00 NOPX + 7340 0x00 0x00 NOPX + 7342 0x14 0x21 0x11 0x98 SUB r16, r16, r17 + 7346 0x00 0x2c 0xf3 0xcc 0x23 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; ST r16, [p1, #-8]; NOPM; NOPV +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_480 + 7360 0xe0 0xc2 0xd0 0x00 0x01 0xf3 0x32 0x30 0x10 0xba LDA r16, [p7]; MOVXM p6, #509024 + 7370 0x06 0x06 0x36 0x98 LDA r17, [p6] + 7374 0x07 0xf8 0x99 0x18 LDA p1, [sp, #-8] + 7378 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12] + 7382 0x00 0x00 NOPX + 7384 0x00 0x00 NOPX + 7386 0x00 0x00 NOPX + 7388 0x00 0x00 NOPX + 7390 0x14 0x61 0x08 0x98 NE r16, r17, r16 + 7394 0x80 0x0e 0x80 0x40 0x01 0x84 JNZ r16, #7424 +.delay_slot +.swstall delay_slot + 7400 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7402 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7404 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7406 0x00 0x00 NOPX +.delay_slot + 7408 0x1b 0xd0 0x20 0xf8 MOV r15, r0 + 7412 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x03 0x83 0x88 0xc1 0x36 NOPA; NOPB; ST r24, [p7]; NOPX +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_544 + 7424 0x07 0xff 0x19 0x18 LDA p6, [sp, #-4] + 7428 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 7432 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 7438 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7440 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7442 0x00 0x00 NOPX +.delay_slot + 7444 0x0f 0x84 0x8b 0x18 MOVS p7, p1 +.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + +.text_segment PM 7456 +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.function_start + 7456 0x05 0x00 0x00 0x21 0x01 0x64 RET lr; MOV r0, #64 +.delay_slot + 7462 0x18 0x50 0xc0 0xf8 MOV r1, p0 +.delay_slot + 7466 0x18 0x60 0x90 0x18 ADD.NC p0, r1, #32 +.delay_slot + 7470 0x08 0x04 0x11 0x98 ST r0, [p0] +.delay_slot + 7474 0x08 0x14 0x11 0x98 ST r0, [p0, #4] +.delay_slot +.swstall delay_slot + 7478 0x00 0x00 NOPX +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_end0 + +.text_segment PM 7488 +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.function_start + 7488 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 7492 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 7498 0x0f 0xf8 0x3d 0x98 ST lr, [sp, #-8] + 7502 0x0f 0xfd 0xf5 0x98 ST r15, [sp, #-4] + 7506 0x00 0x00 NOPX + 7508 0x00 0x00 NOPX + 7510 0x00 0x00 NOPX + 7512 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 7516 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 7520 0x00 0x00 NOPX + 7522 0x00 0x00 NOPX + 7524 0x00 0x00 NOPX + 7526 0x00 0x00 NOPX + 7528 0x00 0x00 NOPX + 7530 0x00 0x00 NOPX + 7532 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 7536 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 7540 0x00 0x00 NOPX + 7542 0x00 0x00 NOPX + 7544 0x00 0x00 NOPX + 7546 0x00 0x00 NOPX + 7548 0x00 0x00 NOPX + 7550 0x00 0x00 NOPX + 7552 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 7556 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4] + 7560 0x00 0x00 NOPX + 7562 0x00 0x00 NOPX +.no_stack_arguments + 7564 0x00 0x0e 0x90 0x00 0x01 0x04 JL #7456 +.delay_slot +.swstall delay_slot + 7570 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7572 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7574 0x00 0x00 NOPX +.delay_slot + 7576 0x08 0xdc 0x29 0x98 ST el0, [p0], #-12 +.delay_slot + 7580 0x1b 0xd0 0xc0 0xf8 MOV r15, p0 +.return_address + 7584 0xff 0x07 0x20 0x01 0x00 0x68 0x33 0xc4 0x08 0xba LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 + 7594 0x01 0xe2 0x80 0x01 0x80 0x08 0x07 0xfd 0x58 0xba MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 + 7604 0xff 0xbe 0x20 0x0a 0x11 0x80 0x07 0xa0 0x01 0x7a LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 + 7614 0x00 0x06 0x4a 0x98 LDA.u8 r18, [p0] + 7618 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7620 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7622 0x00 0x02 0x17 0x18 ST.s16 r16, [p0, dj0] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7626 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7630 0x10 0x22 0x05 0x18 MOVX r17, #1 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7634 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7640 0x14 0x77 0x27 0x98 EQ r27, r17, r18 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7644 0x14 0x21 0x82 0x18 SEL.EQZ r16, r16, r24, r27 +.delay_slot +.swstall delay_slot + 7648 0x00 0x00 NOPX +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + +.text_segment PM 7664 +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.function_start + 7664 0x02 0x80 0x80 0x00 0x10 0xc8 0x08 0x60 0x78 0xba MOVA m0, #20; MOVX r1, #6; MOV r0, p0 + 7674 0x00 0x00 0xa0 0xc0 0x0c 0x24 MOVX r0, #1; ADD.NC p0, r0, #12 + 7680 0x00 0x08 0x4a 0x98 LDA.u8 r2, [p0], m0 + 7684 0x00 0x00 NOPX + 7686 0x00 0x00 NOPX + 7688 0x00 0x00 NOPX + 7690 0x00 0x00 NOPX + 7692 0x00 0x00 NOPX + 7694 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 7698 0x10 0x80 0x08 0x98 NE r0, r2, r0 +.delay_slot + 7702 0x10 0x00 0x1d 0x98 LSHL r0, r0, r1 +.delay_slot + 7706 0x02 0x82 0x31 0x0d 0xe0 0x5c ST r0, [p0, #4]; NEZ r3, r2 +.delay_slot + 7712 0x10 0xc4 0x1d 0x98 LSHL r2, r3, r1 +.delay_slot + 7716 0x08 0x04 0x51 0x98 ST r2, [p0] +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_end0 + +.text_segment PM 7728 +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.function_start + 7728 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 7734 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] +.no_stack_arguments + 7738 0x00 0x0e 0xa0 0x00 0x01 0x04 JL #7488 +.delay_slot + 7744 0x0f 0xfb 0x9d 0x98 ST p7, [sp, #-8] +.delay_slot + 7748 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.delay_slot +.swstall delay_slot + 7752 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7754 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7756 0x00 0x01 0x67 0x98 NOPA +.return_address +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7760 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7764 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] +.tail_call +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7768 0x00 0x0e 0xf8 0x00 0x00 0x84 J #7664 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7774 0x18 0x6e 0xc0 0xf8 MOV p0, p7 +.delay_slot + 7778 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 7784 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7786 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7788 0x00 0x00 NOPX +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + +.text_segment PM 7792 +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.function_start + 7792 0x67 0x82 0xd0 0x00 0x51 0x54 LDA r0, [p3], #12; MOV m0, #20 + 7798 0x61 0x05 0x58 0xcd 0x81 0xd4 LDA.u8 r1, [p3], m0; MOV p4, p3 + 7804 0x00 0x00 NOPX + 7806 0x00 0x00 NOPX + 7808 0x00 0x00 NOPX + 7810 0x00 0x00 NOPX + 7812 0x00 0x00 NOPX + 7814 0x00 0x00 NOPX + 7816 0x08 0x0f 0x60 0x40 0x01 0x84 JNZ r1, #7872 +.delay_slot + 7822 0x17 0xc4 0xe9 0x18 MOVX r2, #-6 +.delay_slot + 7826 0x10 0x00 0x2d 0x98 LSHL r0, r0, r2 +.delay_slot +.swstall delay_slot + 7830 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7832 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7834 0x00 0x00 NOPX + 7836 0x00 0x04 0x32 0x98 LDA.s16 r1, [p0] + 7840 0x00 0x00 NOPX + 7842 0x00 0x00 NOPX + 7844 0x00 0x00 NOPX + 7846 0x00 0x0f 0x70 0x00 0x00 0x84 J #7904 +.delay_slot +.swstall delay_slot + 7852 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7854 0x00 0x00 NOPX +.delay_slot + 7856 0x18 0x05 0x72 0xf8 VBCST.16 x0, r1 +.delay_slot +.swstall delay_slot + 7860 0x00 0x00 NOPX +.delay_slot + 7862 0x00 0x2c 0xf0 0x04 0x13 0x00 0x00 0x00 0x00 0x7a NOPA; VST x0, [p0]; NOPX +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_80 + 7872 0x01 0x04 0x32 0x98 LDA.s16 r1, [p1] + 7876 0x00 0x00 NOPX + 7878 0x00 0x00 NOPX + 7880 0x00 0x00 NOPX + 7882 0x00 0x00 NOPX + 7884 0x00 0x00 NOPX + 7886 0x00 0x00 NOPX + 7888 0x18 0x05 0x72 0xf8 VBCST.16 x0, r1 + 7892 0x00 0x00 NOPX + 7894 0x00 0x2c 0xf1 0x04 0x13 0x00 0x00 0x00 0x00 0x7a NOPA; VST x0, [p1]; NOPX +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_112 + 7904 0x8a 0x80 0xd0 0x00 0x07 0x8a 0xb8 0x3f 0x48 0xba LDA m0, [p4, #20]; MOVX r0, #60; ADD.NC lc, r0, #-3 + 7914 0x62 0x90 0xd0 0x00 0x00 0x04 0x7f 0xa8 0x10 0xba LDA m1, [p3, #4]; MOVXM ls, #8016 + 7924 0x00 0x00 0x16 0xfe 0xe0 0x44 MOVXM le, #8048 + 7930 0x00 0x07 0xc8 0xc8 0xd0 0x44 MOVXM p4, #509032 + 7936 0x04 0x04 0x22 0x98 LDA.s8 r1, [p4] + 7940 0x00 0x00 NOPX + 7942 0x00 0x00 NOPX + 7944 0x00 0x08 0xab 0x98 VLDA.CONV.fp32.bf16 cml1, [p0], m0 + 7948 0x01 0x29 0x2b 0x98 VLDA.CONV.fp32.bf16 cml2, [p1], m1 + 7952 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7956 0x01 0x2a 0x2b 0x98 VLDA.CONV.fp32.bf16 cml4, [p1], m1 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7960 0x01 0x15 0x70 0xf5 0x00 0x2c VLDA.CONV.fp32.bf16 cml1, [p0], m0; MOVX crRnd, r1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7966 0x25 0x25 0x70 0x04 0x03 0x28 0x3d 0x62 VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7974 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7978 0x25 0x45 0x70 0x04 0x04 0x10 0x3d 0x62 VLDA.CONV.fp32.bf16 cml4, [p1], m1; VADD.f dm4, dm0, dm4, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7986 0x00 0x08 0xab 0x98 VLDA.CONV.fp32.bf16 cml1, [p0], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7990 0x25 0x25 0x70 0x04 0x03 0x28 0x3d 0x62 VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7998 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8002 0x25 0x45 0x74 0x3b 0x46 0x00 0x00 0x40 0x1a 0x57 0x04 0x10 0x3d 0x6e VLDA.CONV.fp32.bf16 cml4, [p1], m1; VST.CONV.bf16.fp32 cml3, [p2], #64; NOPM; VADD.f dm4, dm0, dm4, r0 +.label ZLS_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_224 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8016 0x00 0x08 0xab 0x98 VLDA.CONV.fp32.bf16 cml1, [p0], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8020 0x25 0x25 0x70 0x00 0x21 0x0f 0x11 0x8e 0x03 0x28 0x3d 0x66 VLDA.CONV.fp32.bf16 cml2, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8032 0x01 0x05 0x70 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLE_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_256 +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8048 0x25 0x45 0x70 0x00 0x22 0x1d 0xa3 0x00 0x00 0x00 0x01 0xa5 0x78 0x20 0x81 0xeb VLDA.CONV.fp32.bf16 cml4, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml3, [p2], #64;NOPX; NOPM; VADD.f dm4, dm0, dm4, r0 +.loop_nesting 0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8064 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8066 0x43 0xc4 0x60 0x02 0x03 0x28 0x3d 0x62 VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8074 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8076 0x43 0xb4 0x60 0x02 0x04 0x10 0x3d 0x62 VST.CONV.bf16.fp32 cml3, [p2], #64; VADD.f dm4, dm0, dm4, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8084 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8086 0x43 0xc4 0x60 0x50 0x00 0x5c VST.CONV.bf16.fp32 cml4, [p2], #64;RET lr +.delay_slot +.swstall delay_slot + 8092 0x00 0x00 NOPX +.delay_slot + 8094 0x0a 0x1d 0xa3 0x18 VST.CONV.bf16.fp32 cml3, [p2], #64 +.delay_slot +.swstall delay_slot + 8098 0x00 0x00 NOPX +.delay_slot + 8100 0x0a 0x1e 0x23 0x18 VST.CONV.bf16.fp32 cml4, [p2], #64 +.delay_slot +.swstall delay_slot + 8104 0x00 0x00 NOPX +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 + +.text_segment PM 8112 +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.function_start + 8112 0x00 0x10 0x00 0x00 0x01 0xc4 PADDXM [sp], #128 + 8118 0xff 0x87 0xb0 0x02 0x0a 0x60 0x70 0x02 ST lr, [sp, #-4]; MOV r16, p2 + 8126 0x50 0x91 0x60 0x01 0xb4 0x03 0x00 0x02 MOVS p2, p1; ADD.NC p3, r16, #12 + 8134 0x65 0xed 0x58 0x21 0x81 0xd4 LDA.u8 r27, [p3], #2; MOV r16, p0 + 8140 0x73 0xca 0x58 0xab 0xc1 0xd4 LDA.s16 r18, [p3], #-14; MOV r17, sp + 8146 0x18 0x68 0xc0 0x18 ADD.NC p0, r17, #-128 + 8150 0x08 0x07 0x2b 0x18 VST sfh, [p0] + 8154 0x00 0x06 0x57 0x18 ST.s16 r18, [p0] + 8158 0x00 0x00 NOPX + 8160 0x00 0x00 NOPX +.no_stack_arguments + 8162 0x00 0x0f 0x38 0x00 0x01 0x04 JL #7792 +.delay_slot + 8168 0x1c 0x50 0xc0 0xf8 MOV r17, p0 +.delay_slot +.swstall delay_slot + 8172 0x00 0x00 NOPX +.delay_slot + 8174 0x14 0x25 0x12 0x18 SEL.EQZ r18, r16, r17, r27 +.delay_slot + 8178 0x8c 0x20 0x42 0xd2 0x41 0xe4 SEL.EQZ r16, r17, r16, r27; MOV p1, r18 +.delay_slot + 8184 0x00 0x2b 0x60 0x00 0x34 0x10 0x70 0x02 NOPS; MOV p0, r16 +.return_address + 8192 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] + 8196 0x00 0x00 NOPX + 8198 0x00 0x00 NOPX + 8200 0x00 0x00 NOPX + 8202 0x00 0x00 NOPX + 8204 0x00 0x00 NOPX + 8206 0x00 0x00 NOPX + 8208 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 8212 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128 +.delay_slot +.swstall delay_slot + 8218 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8220 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8222 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8224 0x00 0x00 NOPX +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_end0 + +.text_segment PM 8240 +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function_start + 8240 0x00 0x07 0xc6 0xc8 0x80 0x44 MOVXM p3, #508992 + 8246 0x60 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p3]; MOV r17, CORE_ID + 8252 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 8258 0xff 0x63 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p6, [sp, #-8]; MOV r0, r15 + 8266 0xff 0x82 0xb0 0x00 0x01 0xf3 0x32 0x28 0x11 0x3a ST r0, [sp, #-4]; MOVXM p6, #509008 + 8276 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 + 8280 0x00 0x00 NOPX + 8282 0x00 0x00 NOPX + 8284 0x80 0x10 0x80 0x40 0x01 0x84 JNZ r16, #8448 +.delay_slot + 8290 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17 +.delay_slot + 8294 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 8298 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12] +.delay_slot + 8302 0xc0 0xc6 0x30 0x03 0x30 0x60 0x70 0x02 ST r17, [p6]; MOV p6, p0 +.delay_slot + 8310 0x00 0x07 0xc0 0xc9 0x80 0x44 MOVXM p0, #509120 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8316 0x00 0x07 0xc4 0xc8 0xd0 0x44 MOVXM p2, #509032 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8322 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x32 0x32 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #509028 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8332 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 8334 0x00 0x0f 0x18 0x00 0x01 0x04 JL #7728 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8340 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8342 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8344 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 8348 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 8352 0x00 0x2c 0xf0 0x00 0x22 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV +.return_address + 8368 0x00 0x07 0xc4 0xc8 0xa0 0x44 MOVXM p2, #509008 + 8374 0x40 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x60 0x10 0xba LDA r16, [p2]; MOVXM p2, #509120 + 8384 0x40 0xc6 0xd0 0x00 0x01 0xf1 0x32 0x60 0x10 0xba LDA r17, [p2]; MOVXM p2, #509120 + 8394 0x4a 0xcb 0x50 0x00 0x01 0xf0 0xb2 0x2a 0x10 0xba LDA.u16 r18, [p2, #10]; MOVXM p1, #509012 + 8404 0x00 0x00 NOPX + 8406 0x00 0x00 NOPX + 8408 0x00 0x10 0x88 0x00 0x00 0x84 J #8464 +.delay_slot + 8414 0x00 0x07 0xc0 0xc8 0xc0 0x44 MOVXM p0, #509024 +.delay_slot +.swstall delay_slot + 8420 0x00 0x00 NOPX +.delay_slot + 8422 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 8426 0x00 0x2c 0xf0 0x0c 0xa3 0x0c NOPA; ST r18, [p0] +.delay_slot + 8432 0x00 0x2c 0xf0 0x00 0x21 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 + 8448 0x00 0x2c 0xf0 0x00 0x22 0x80 0x8b 0x00 0x01 0xf0 0xb2 0x2a 0x10 0x00 0x00 0xe1 NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 + 8464 0x73 0x91 0x60 0x03 0xb3 0xc3 0x00 0x02 MOVS p3, p7; ADD.NC p7, r15, #12 + 8472 0xff 0xee 0xd0 0x00 0x01 0xf0 0x32 0x20 0x10 0xba LDA r27, [p7], #-4; MOVXM p0, #508992 + 8482 0x07 0xfe 0x16 0x98 LDA r16, [p7], #-4 + 8486 0x07 0xfe 0x36 0x98 LDA r17, [p7], #-4 + 8490 0x07 0x46 0x56 0x98 LDA r18, [p7, #16] + 8494 0x00 0x00 NOPX + 8496 0x00 0x00 NOPX + 8498 0x00 0x00 NOPX + 8500 0x00 0x00 NOPX + 8502 0x00 0x00 NOPX + 8504 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 8508 0x0f 0x06 0x11 0x98 ST r16, [p7] + 8512 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 8516 0x00 0x00 NOPX + 8518 0x00 0x00 NOPX + 8520 0x00 0x00 NOPX + 8522 0x14 0x93 0x08 0x18 ACQ r18, r16 + 8526 0x04 0x00 0xa7 0xad 0x81 0xe4 MOVX r16, #1; MOV r15, p3 + 8532 0x00 0x00 NOPX + 8534 0x00 0x00 NOPX + 8536 0x00 0x06 0x36 0x98 LDA r17, [p0] + 8540 0xc0 0xca 0xdc 0xdd 0x81 0xd4 LDA r18, [p6]; MOV p6, p7 + 8546 0x01 0x06 0x76 0x98 LDA r19, [p1] + 8550 0x07 0x5c 0x9e 0x98 LDA p1, [p7], #20 + 8554 0x00 0x00 NOPX +.no_stack_arguments + 8556 0x00 0x0f 0xd8 0x00 0x01 0x04 JL #8112 +.delay_slot +.swstall delay_slot + 8562 0x00 0x00 NOPX +.delay_slot + 8564 0x14 0x62 0x07 0x18 ADD r17, r17, #1 +.delay_slot + 8568 0x08 0x06 0x31 0x98 ST r17, [p0] +.delay_slot + 8572 0x14 0xe1 0x0d 0x98 LSHL r16, r19, r16 +.delay_slot + 8576 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xa0 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV +.return_address + 8592 0xca 0xc6 0xd0 0x00 0x01 0xf3 0x32 0x20 0x10 0xba LDA r17, [p6, #20]; MOVXM p6, #508992 + 8602 0x10 0x20 0x05 0x18 MOVX r16, #1 + 8606 0x00 0x00 NOPX + 8608 0x00 0x00 NOPX + 8610 0x00 0x00 NOPX + 8612 0x00 0x00 NOPX + 8614 0x00 0x00 NOPX + 8616 0x14 0x51 0x08 0x18 REL r17, r16 + 8620 0xfc 0xce 0xd0 0x00 0x01 0xf1 0x32 0x30 0x10 0xba LDA r19, [p7, #-8]; MOVXM p2, #509024 + 8630 0x06 0x06 0x36 0x98 LDA r17, [p6] + 8634 0x02 0x06 0x56 0x98 LDA r18, [p2] + 8638 0x00 0x00 NOPX + 8640 0x00 0x00 NOPX + 8642 0x00 0x00 NOPX + 8644 0x00 0x00 NOPX + 8646 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 8650 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 8654 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 8658 0x80 0x10 0xf8 0x40 0x01 0x84 JNZ r16, #8688 +.delay_slot +.swstall delay_slot + 8664 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8666 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8668 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8670 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8672 0x00 0x00 NOPX + 8674 0x10 0x20 0x01 0x18 MOVX r16, #0 + 8678 0x00 0x2c 0xf6 0x06 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r16, [p6]; NOPX +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 + 8688 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12] + 8692 0x07 0xfb 0x19 0x18 LDA p6, [sp, #-8] + 8696 0x00 0x00 NOPX + 8698 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 8700 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.noswbrkpt + 8702 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 8706 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 8708 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8712 0x1f 0x67 0xa0 0xf8 MOV p7, r15 +.delay_slot + 8716 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 8722 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8724 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8726 0x00 0x00 NOPX +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + +.text_segment PM 8736 +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.function_start + 8736 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 8740 0x00 0x00 NOPX + 8742 0x00 0x00 NOPX + 8744 0x00 0x00 NOPX + 8746 0x00 0x00 NOPX + 8748 0x00 0x00 NOPX + 8750 0x00 0x00 NOPX + 8752 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 8756 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 8760 0x00 0x00 NOPX + 8762 0x00 0x00 NOPX + 8764 0x00 0x00 NOPX + 8766 0x00 0x00 NOPX + 8768 0x00 0x00 NOPX + 8770 0x00 0x00 NOPX + 8772 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 8776 0x01 0x6c 0x2e 0x98 LDA el0, [p1], #24 + 8780 0x01 0x04 0x12 0x98 LDA.s16 r0, [p1] + 8784 0x00 0x00 NOPX + 8786 0x00 0x00 NOPX + 8788 0x00 0x00 NOPX + 8790 0x00 0x00 NOPX + 8792 0x00 0x00 NOPX + 8794 0x08 0x6c 0x29 0x98 ST el0, [p0], #24 + 8798 0x00 0x04 0x17 0x18 ST.s16 r0, [p0] + 8802 0x00 0x00 NOPX + 8804 0x00 0x00 NOPX + 8806 0x00 0x00 NOPX + 8808 0x00 0x00 NOPX + 8810 0x00 0x00 NOPX + 8812 0x00 0x00 NOPX + 8814 0x01 0x24 0x12 0x98 LDA.s16 r0, [p1, #4] + 8818 0x00 0x14 0x17 0x18 ST.s16 r0, [p0, #2] + 8822 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot + 8826 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8828 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8830 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8832 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8834 0x00 0x00 NOPX +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv__end +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_end0 + +.text_segment PM 8848 +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.function_start + 8848 0xfb 0xc2 0x80 0x3a 0x68 0x00 0x00 0x08 0x79 0x88 0x10 0xb6 MOVA dj0, #-34; VLDB x4, [p0], #64; MOVXM ls, #8976 + 8860 0xff 0x51 0x00 0x39 0x68 0x00 0x00 0x09 0xb9 0xa0 0x10 0xb6 MOVA r17, #-6; VLDB x2, [p0], #64; MOVXM le, #9024 + 8872 0x18 0x14 0xc0 0xf8 MOV r0, p2 + 8876 0x1a 0x60 0x10 0x18 ADD.NC p2, r0, #32 + 8880 0x02 0x1c 0x52 0x98 LDA.s16 r2, [p2], #2 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8884 0x02 0x00 0x16 0x98 LDA r0, [p2, dj0] +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8888 0x40 0x86 0x50 0x3a 0x68 0x3c LDA.s16 r1, [p2]; VLDB x4, [p0], #64 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8894 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8896 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8898 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8900 0x38 0x1c 0xb4 0x18 VLDB x2, [p0], #64 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8904 0x18 0x09 0x72 0xf8 VBCST.16 x0, r2 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8908 0x00 0x3a 0x68 0x01 0x18 0xed 0x50 0x36 0x78 0x3a VLDB x4, [p0], #64; LSHL r17, r0, r17; VMAX_LT.bf16 x5, r16, x4, x0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8918 0x1d 0x78 0xfe 0x98 ADD.NC lc, r17, #-3 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8922 0x18 0x85 0x72 0xf8 VBCST.16 x1, r1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8926 0x19 0xa8 0xac 0xf8 VMIN_GE.bf16 x3, r16, x5, x1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8930 0x00 0x2c 0xf0 0x39 0x68 0x00 0x00 0x31 0x06 0xcf 0x00 0x2b 0x60 0x7e NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8944 0x00 0x2c 0xf0 0x00 0x21 0x1c 0xd3 0x00 0x00 0x01 0xd8 0x56 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8960 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x01 0x50 0x36 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV +.label ZLS_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_128 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8976 0x00 0x2c 0xf0 0x3a 0x69 0x1d 0xd3 0x00 0x00 0x00 0xd4 0x56 0x78 0x00 0x00 0xe1 NOPA; VLDB x4, [p0], #64; VST x7, [p1], #64; NOPX; VMIN_GE.bf16 x3, r16, x5, x1; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8992 0x00 0x2c 0xf0 0x39 0x68 0x01 0x5b 0x00 0x00 0x01 0x88 0x36 0x78 0x00 0x00 0xe1 NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9008 0x00 0x2c 0xf0 0x00 0x21 0x1c 0xd3 0x00 0x00 0x01 0xd8 0x56 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV +.label ZLE_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_176 +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9024 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x01 0x50 0x36 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV +.loop_nesting 0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9040 0x23 0xba 0x60 0x00 0xd4 0x56 0x70 0x02 VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9048 0x1b 0x10 0x6c 0xf8 VMAX_LT.bf16 x6, r16, x2, x0 + 9052 0x23 0x9a 0x60 0x01 0xd8 0x56 0x70 0x02 VST x3, [p1], #64; VMIN_GE.bf16 x7, r16, x6, x1 + 9060 0x05 0x00 0x05 0x40 0xd9 0xe4 RET lr; VMAX_LT.bf16 x5, r16, x4, x0 +.delay_slot + 9066 0x23 0xba 0x60 0x00 0xd4 0x56 0x70 0x02 VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1 +.delay_slot + 9074 0x1b 0x10 0x6c 0xf8 VMAX_LT.bf16 x6, r16, x2, x0 +.delay_slot + 9078 0x1b 0xb0 0xac 0xf8 VMIN_GE.bf16 x7, r16, x6, x1 +.delay_slot + 9082 0x09 0x1c 0xd3 0x18 VST x3, [p1], #64 +.delay_slot + 9086 0x09 0x1d 0xd3 0x18 VST x7, [p1], #64 +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E__end +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_end0 + +.text_segment PM 9104 +.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function_start + 9104 0x00 0x07 0xc6 0xc8 0x80 0x44 MOVXM p3, #508992 + 9110 0x60 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p3]; MOV r17, CORE_ID + 9116 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 9122 0xff 0x63 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p6, [sp, #-8]; MOV r0, r15 + 9130 0xff 0x82 0xb0 0x00 0x01 0xf3 0x32 0x28 0x11 0x3a ST r0, [sp, #-4]; MOVXM p6, #509008 + 9140 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 + 9144 0x00 0x00 NOPX + 9146 0x00 0x00 NOPX + 9148 0x80 0x12 0x30 0x40 0x01 0x84 JNZ r16, #9312 +.delay_slot + 9154 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17 +.delay_slot + 9158 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 9162 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12] +.delay_slot + 9166 0xc0 0xc6 0x30 0x03 0x30 0x60 0x70 0x02 ST r17, [p6]; MOV p6, p0 +.delay_slot + 9174 0x00 0x07 0xc0 0xcb 0x80 0x44 MOVXM p0, #509376 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9180 0x00 0x07 0xc4 0xc8 0xd0 0x44 MOVXM p2, #509032 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9186 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x32 0x32 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #509028 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9196 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 9198 0x00 0x11 0x10 0x00 0x01 0x04 JL #8736 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9204 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9206 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9208 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 9212 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 9216 0x00 0x2c 0xf0 0x00 0x22 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV +.return_address + 9232 0x00 0x07 0xc4 0xc8 0xa0 0x44 MOVXM p2, #509008 + 9238 0x40 0xc2 0xd0 0x00 0x01 0xf1 0x32 0xe0 0x10 0xba LDA r16, [p2]; MOVXM p2, #509376 + 9248 0x40 0xc6 0xd0 0x00 0x01 0xf1 0x32 0xe0 0x10 0xba LDA r17, [p2]; MOVXM p2, #509376 + 9258 0x48 0xcb 0x50 0x00 0x01 0xf0 0xb2 0x2a 0x10 0xba LDA.u16 r18, [p2, #8]; MOVXM p1, #509012 + 9268 0x00 0x00 NOPX + 9270 0x00 0x00 NOPX + 9272 0x00 0x12 0x38 0x00 0x00 0x84 J #9328 +.delay_slot + 9278 0x00 0x07 0xc0 0xc8 0xc0 0x44 MOVXM p0, #509024 +.delay_slot +.swstall delay_slot + 9284 0x00 0x00 NOPX +.delay_slot + 9286 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 9290 0x00 0x2c 0xf0 0x0c 0xa3 0x0c NOPA; ST r18, [p0] +.delay_slot + 9296 0x00 0x2c 0xf0 0x00 0x21 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV +.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 + 9312 0x00 0x2c 0xf0 0x00 0x22 0x80 0x8b 0x00 0x01 0xf0 0xb2 0x2a 0x10 0x00 0x00 0xe1 NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV +.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 + 9328 0x73 0x91 0x60 0x03 0xb3 0xc3 0x00 0x02 MOVS p3, p7; ADD.NC p7, r15, #12 + 9336 0xff 0xee 0xd0 0x00 0x01 0xf0 0x32 0x20 0x10 0xba LDA r27, [p7], #-4; MOVXM p0, #508992 + 9346 0x07 0xfe 0x16 0x98 LDA r16, [p7], #-4 + 9350 0x07 0xfe 0x36 0x98 LDA r17, [p7], #-4 + 9354 0x07 0x46 0x56 0x98 LDA r18, [p7, #16] + 9358 0x00 0x00 NOPX + 9360 0x00 0x00 NOPX + 9362 0x00 0x00 NOPX + 9364 0x00 0x00 NOPX + 9366 0x00 0x00 NOPX + 9368 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 9372 0x0f 0x06 0x11 0x98 ST r16, [p7] + 9376 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 9380 0x00 0x00 NOPX + 9382 0x00 0x00 NOPX + 9384 0x00 0x00 NOPX + 9386 0x14 0x93 0x08 0x18 ACQ r18, r16 + 9390 0x04 0x00 0xa7 0xad 0x81 0xe4 MOVX r16, #1; MOV r15, p3 + 9396 0x00 0x00 NOPX + 9398 0x00 0x00 NOPX + 9400 0x00 0x06 0x36 0x98 LDA r17, [p0] + 9404 0xc0 0xca 0xdc 0xdd 0x81 0xd4 LDA r18, [p6]; MOV p6, p7 + 9410 0x01 0x06 0x76 0x98 LDA r19, [p1] + 9414 0x07 0x5c 0x9e 0x98 LDA p1, [p7], #20 + 9418 0x00 0x00 NOPX +.no_stack_arguments + 9420 0x00 0x11 0x48 0x00 0x01 0x04 JL #8848 +.delay_slot +.swstall delay_slot + 9426 0x00 0x00 NOPX +.delay_slot + 9428 0x14 0x62 0x07 0x18 ADD r17, r17, #1 +.delay_slot + 9432 0x08 0x06 0x31 0x98 ST r17, [p0] +.delay_slot + 9436 0x14 0xe1 0x0d 0x98 LSHL r16, r19, r16 +.delay_slot + 9440 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xa0 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV +.return_address + 9456 0xca 0xc6 0xd0 0x00 0x01 0xf3 0x32 0x20 0x10 0xba LDA r17, [p6, #20]; MOVXM p6, #508992 + 9466 0x10 0x20 0x05 0x18 MOVX r16, #1 + 9470 0x00 0x00 NOPX + 9472 0x00 0x00 NOPX + 9474 0x00 0x00 NOPX + 9476 0x00 0x00 NOPX + 9478 0x00 0x00 NOPX + 9480 0x14 0x51 0x08 0x18 REL r17, r16 + 9484 0xfc 0xce 0xd0 0x00 0x01 0xf1 0x32 0x30 0x10 0xba LDA r19, [p7, #-8]; MOVXM p2, #509024 + 9494 0x06 0x06 0x36 0x98 LDA r17, [p6] + 9498 0x02 0x06 0x56 0x98 LDA r18, [p2] + 9502 0x00 0x00 NOPX + 9504 0x00 0x00 NOPX + 9506 0x00 0x00 NOPX + 9508 0x00 0x00 NOPX + 9510 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 9514 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 9518 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 9522 0x80 0x12 0xa8 0x40 0x01 0x84 JNZ r16, #9552 +.delay_slot +.swstall delay_slot + 9528 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9530 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9532 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9534 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9536 0x00 0x00 NOPX + 9538 0x10 0x20 0x01 0x18 MOVX r16, #0 + 9542 0x00 0x2c 0xf6 0x06 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r16, [p6]; NOPX +.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 + 9552 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12] + 9556 0x07 0xfb 0x19 0x18 LDA p6, [sp, #-8] + 9560 0x00 0x00 NOPX + 9562 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 9564 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.noswbrkpt + 9566 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9570 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9572 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9576 0x1f 0x67 0xa0 0xf8 MOV p7, r15 +.delay_slot + 9580 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 9586 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9588 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9590 0x00 0x00 NOPX +.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + +.text_segment PM 9600 +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv +.function_start + 9600 0x23 0x85 0xd0 0x08 0x20 0x0b 0x08 0x00 0x58 0xba LDA el0, [p1], #4; MOVX r2, #256; MOV r24, #0 + 9610 0x17 0x80 0x01 0x18 MOVX r0, #-128 + 9614 0x00 0x00 NOPX + 9616 0x00 0x00 NOPX + 9618 0x00 0x00 NOPX + 9620 0x00 0x00 NOPX + 9622 0x00 0x00 NOPX + 9624 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 9628 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 9632 0x00 0x00 NOPX + 9634 0x00 0x00 NOPX + 9636 0x00 0x00 NOPX + 9638 0x00 0x00 NOPX + 9640 0x00 0x00 NOPX + 9642 0x00 0x00 NOPX + 9644 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 9648 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 9652 0x00 0x00 NOPX + 9654 0x00 0x00 NOPX + 9656 0x00 0x00 NOPX + 9658 0x00 0x00 NOPX + 9660 0x00 0x00 NOPX + 9662 0x00 0x00 NOPX + 9664 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 9668 0x01 0x14 0x76 0x98 LDA r3, [p1, #4] + 9672 0x00 0x00 NOPX + 9674 0x00 0x00 NOPX + 9676 0x00 0x00 NOPX + 9678 0x00 0x00 NOPX + 9680 0x00 0x00 NOPX + 9682 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9684 0x08 0x4c 0x71 0x98 ST r3, [p0], #16 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9688 0x00 0x04 0x17 0x18 ST.s16 r0, [p0] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9692 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9696 0x00 0x00 0xf0 0xbe 0x00 0x44 MOVXM r1, #65280 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9702 0x10 0xc2 0x14 0x98 AND r1, r3, r1 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9706 0x10 0x76 0x27 0x98 EQ r27, r1, r2 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9710 0x10 0x01 0x82 0x18 SEL.EQZ r0, r0, r24, r27 +.delay_slot +.swstall delay_slot + 9714 0x00 0x00 NOPX +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_end0 + +.text_segment PM 9728 +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv +.function_start + 9728 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 9734 0x0f 0xf8 0x3d 0x98 ST lr, [sp, #-8] +.no_stack_arguments + 9738 0x00 0x12 0xc0 0x00 0x01 0x04 JL #9600 +.delay_slot + 9744 0x0f 0xff 0x9d 0x98 ST p7, [sp, #-4] +.delay_slot + 9748 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.delay_slot +.swstall delay_slot + 9752 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9754 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9756 0x00 0x01 0x67 0x98 NOPA +.return_address + 9760 0x07 0xf8 0x39 0x18 LDA lr, [sp, #-8] + 9764 0x00 0x00 NOPX + 9766 0x00 0x00 NOPX + 9768 0x00 0x00 NOPX + 9770 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9772 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9774 0x07 0xff 0x99 0x18 LDA p7, [sp, #-4] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9778 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9782 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9784 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9786 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9788 0x10 0x20 0x09 0x18 MOVX r16, #2 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9792 0xe8 0xc2 0x30 0x3f 0xfe 0x00 0x00 0x00 0x71 0x3a ST r16, [p7, #16]; PADDXM [sp], #-64 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + +.text_segment PM 9808 +.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE +.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_begin0 +.function_start + 9808 0x18 0x16 0xc0 0xf8 MOV r0, p3 + 9812 0x1b 0x60 0x07 0x18 ADD.NC p3, r0, #14 + 9816 0x03 0x1c 0x52 0x98 LDA.s16 r2, [p3], #2 + 9820 0x03 0x04 0x96 0x98 LDA r4, [p3] + 9824 0x00 0x00 NOPX + 9826 0x00 0x00 NOPX + 9828 0x00 0x00 NOPX + 9830 0x00 0x00 NOPX + 9832 0x10 0x06 0x09 0x18 MOVX r3, #2 + 9836 0x00 0x10 0x00 0x00 0x01 0xc4 PADDXM [sp], #128 + 9842 0x10 0xc6 0x4c 0x98 LTU r3, r3, r4 + 9846 0x00 0x01 0x00 0x06 0x04 0xe2 0x10 0x00 0x60 0xba MOVA r1, #0; JNZ r3, #10000 +.delay_slot + 9856 0x18 0x05 0x72 0xf8 VBCST.16 x0, r1 +.delay_slot + 9860 0x18 0x5e 0xc0 0xf8 MOV r1, p7 +.delay_slot + 9864 0x1f 0x65 0xe0 0xf8 MOV p7, sp +.delay_slot + 9868 0xff 0xf2 0x0a 0xdd 0x81 0xf4 PADDB [p7], #-64; MOV p5, p7 +.delay_slot + 9874 0x0f 0x04 0x13 0x18 VST x0, [p7] + 9878 0x01 0x82 0x84 0x80 0x0b 0x00 0x04 0xb9 0x72 0xba MOVA dj0, #12; MOVS p4, r0; VBCST.16 x0, r2 + 9888 0x80 0x01 0x54 0x01 0x01 0x54 LDA.u8 r0, [p4, dj0]; MOV m2, #64 + 9894 0x00 0x00 NOPX + 9896 0x00 0x00 NOPX + 9898 0x00 0x00 NOPX + 9900 0x00 0x00 NOPX + 9902 0x00 0x00 NOPX + 9904 0x00 0x00 NOPX + 9906 0x00 0x13 0x70 0x40 0x01 0x84 JNZ r0, #9952 +.delay_slot + 9912 0x18 0x00 0x00 0xb8 MOV m0, #0 +.delay_slot + 9916 0x00 0x07 0xc8 0xc8 0xd0 0x44 MOVXM p4, #509032 +.delay_slot +.swstall delay_slot + 9922 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9924 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9926 0x00 0x00 NOPX + 9928 0x00 0x04 0x80 0x00 0x04 0xde 0x00 0x00 0x20 0xba MOVA m1, #0; J #9968 +.delay_slot +.swstall delay_slot + 9938 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9940 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9942 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9944 0x00 0x00 NOPX +.delay_slot + 9946 0x00 0x2c 0xf0 0x08 0x26 0x0c NOPA; VST x0, [p0] +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_144 + 9952 0x19 0x00 0x80 0xb8 MOV m1, #64 + 9956 0x00 0x2c 0xf0 0x00 0x21 0x04 0x13 0x01 0x00 0x00 0x50 0xf6 NOPA; NOPB; VST x0, [p1]; MOV m2, #0 +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_160 + 9968 0x00 0x13 0xc8 0x00 0x00 0x84 J #10128 +.delay_slot + 9974 0x13 0x91 0x60 0x03 0xb0 0x60 0x70 0x02 MOVS p0, p7; MOV p7, p0 +.delay_slot +.swstall delay_slot + 9982 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9984 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9986 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9988 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_192 + 10000 0x10 0x04 0x0d 0x18 MOVX r2, #3 + 10004 0x10 0x84 0x47 0x98 EQ r2, r2, r4 + 10008 0x10 0x13 0xa0 0x40 0x01 0x84 JNZ r2, #10048 +.delay_slot + 10014 0x3f 0x80 0x00 0x20 0x00 0x44 MOVXM r0, #1065353216 +.delay_slot + 10020 0x00 0x07 0xc8 0xc8 0xd0 0x44 MOVXM p4, #509032 +.delay_slot +.swstall delay_slot + 10026 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10028 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10030 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 10032 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x2f 0xe0 0x00 0x08 0x00 0x10 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVXM r0, #-1082130432; NOPV +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_240 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10048 0x80 0x80 0x50 0x02 0xd2 0x00 0x47 0xbe 0x58 0xba LDA.s8 r0, [p4]; MOVX vaddSign0, #1; MOV dj0, #-66 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10058 0x18 0x00 0x80 0xb8 MOV m0, #64 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10062 0x19 0x00 0x00 0xb8 MOV m1, #0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10066 0x1a 0x00 0x80 0xb8 MOV m2, #64 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10070 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10072 0x18 0x00 0x11 0x78 VINSERT.32 x0, x0, #0, r0 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10076 0xa0 0x02 0xe2 0x01 0x25 0xd4 ST.s16 r0, [p5, dj0]; VMOV bmll1, x0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10082 0x10 0x3a 0x80 0x18 MOVX crRnd, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10086 0x08 0x40 0x96 0x18 VCONV.bf16.fp32 wl0, bmll1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10090 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10092 0x18 0x01 0x01 0xb8 VEXTRACT.16 r0, x0, #0, vaddSign0 + 10096 0x00 0x00 NOPX + 10098 0x00 0x00 NOPX + 10100 0x05 0x00 0x12 0x98 LDA.s16 r0, [p5, dj0] + 10104 0x00 0x00 NOPX + 10106 0x00 0x00 NOPX + 10108 0x00 0x00 NOPX + 10110 0x00 0x00 NOPX + 10112 0x00 0x00 NOPX + 10114 0x00 0x00 NOPX + 10116 0x18 0x01 0x72 0xf8 VBCST.16 x0, r0 + 10120 0x00 0x00 NOPX + 10122 0x00 0x2c 0xff 0xf8 0x66 0x0c NOPA; VST x0, [sp, #-64] +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_320 + 10128 0x78 0x8a 0xde 0x50 0xe8 0x00 0x00 0x08 0x7c 0x00 0x10 0xb6 LDA r2, [p3, #-16]; VLDB x1, [p7], m1; MOVXM ls, #10240 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 10140 0xff 0x63 0x02 0x90 0x68 0x00 0x00 0x09 0xbc 0x18 0x10 0xb6 MOVA r3, #-5; VLDB x0, [p1], m2; MOVXM le, #10288 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10152 0x01 0x05 0x7e 0x50 0xe8 0x00 0xf1 0x12 VLDA.CONV.fp32.bf16 cml0, [p0], m0;VLDB x1, [p7], m1; MOVX r0, #60 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10160 0x80 0x90 0x52 0x90 0x68 0x3c LDA.s8 r4, [p4]; VLDB x0, [p1], m2 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10166 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10168 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10172 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10174 0x10 0x84 0x3d 0x98 LSHL r2, r2, r3 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10178 0x05 0x71 0x7e 0x86 0x01 0x02 0x01 0x62 ADD.NC lc, r2, #-3; VMAC.f dm1, dm0, x1, x0, r0 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10186 0x29 0x03 0x7e 0x50 0xe8 0x3c VLDA x0, [p1], m2; VLDB x1, [p7], m1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10192 0x01 0x05 0x70 0x00 0x20 0x01 0x5b 0x09 0xd4 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; MOVX crRnd, r4; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10208 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10224 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x08 0x10 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 +.label ZLS_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_432 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10240 0x29 0x03 0x7e 0x50 0xe8 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA x0, [p1], m2; VLDB x1, [p7], m1; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10256 0x01 0x05 0x70 0x00 0x22 0x1c 0xa3 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10272 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLE_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_480 +.end_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10288 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x08 0x10 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 +.loop_nesting 0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10304 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10310 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 10314 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.noswbrkpt + 10316 0x01 0x02 0x01 0x48 VMAC.f dm1, dm0, x1, x0, r0 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 10320 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 10322 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10326 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.delay_slot + 10330 0x1f 0x60 0xa0 0xf8 MOV p7, r1 +.delay_slot +.swstall delay_slot + 10334 0x00 0x00 NOPX +.delay_slot + 10336 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.delay_slot +.swstall delay_slot + 10340 0x00 0x00 NOPX +.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE__end +.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_end0 + +.text_segment PM 10352 +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E +.function_start + 10352 0x01 0x82 0x83 0x88 0x8b 0x00 0x60 0xf0 0x72 0xba MOVA dj0, #12; MOVS p3, p2; MOV dc0, lr + 10362 0x40 0x01 0x54 0xc5 0x81 0xd4 LDA.u8 r0, [p2, dj0]; MOV p2, p1 + 10368 0x00 0x00 NOPX + 10370 0x00 0x00 NOPX + 10372 0x00 0x00 NOPX + 10374 0x00 0x00 NOPX + 10376 0x00 0x00 NOPX + 10378 0x00 0x00 NOPX + 10380 0x00 0x14 0x68 0x00 0x01 0x84 JZ r0, #10448 +.delay_slot + 10386 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 +.delay_slot + 10392 0x18 0x55 0xe0 0xf8 MOV r1, sp +.delay_slot + 10396 0x19 0x60 0xe0 0x18 ADD.NC p1, r1, #-64 +.delay_slot + 10400 0x09 0x07 0x2b 0x18 VST sfh, [p1] +.delay_slot +.swstall delay_slot + 10404 0x00 0x00 NOPX +.no_stack_arguments + 10406 0x00 0x13 0x28 0x00 0x01 0x04 JL #9808 +.delay_slot +.swstall delay_slot + 10412 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10414 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10416 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10418 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10420 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.return_address + 10432 0x00 0x14 0x78 0x00 0x00 0x84 J #10480 +.delay_slot +.swstall delay_slot + 10438 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10440 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10442 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10444 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10446 0x00 0x00 NOPX +.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_96 +.no_stack_arguments + 10448 0x00 0x13 0x28 0x00 0x01 0x04 JL #9808 +.delay_slot + 10454 0x10 0x91 0x60 0x00 0xb0 0x60 0x70 0x02 MOVS p0, p1; MOV p1, p0 +.delay_slot +.swstall delay_slot + 10462 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10464 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10466 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10468 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_128 +.return_address + 10480 0x1f 0x71 0x80 0xf8 MOV lr, dc0 + 10484 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 10488 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 10494 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10496 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10498 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10500 0x00 0x00 NOPX +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_end0 + +.text_segment PM 10512 +.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function_start + 10512 0x00 0x07 0xc6 0xc8 0x80 0x44 MOVXM p3, #508992 + 10518 0x60 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p3]; MOV r17, CORE_ID + 10524 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 10530 0xff 0x63 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p6, [sp, #-8]; MOV r0, r15 + 10538 0xff 0x82 0xb0 0x00 0x01 0xf3 0x32 0x28 0x11 0x3a ST r0, [sp, #-4]; MOVXM p6, #509008 + 10548 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 + 10552 0x00 0x00 NOPX + 10554 0x00 0x00 NOPX + 10556 0x80 0x14 0xf0 0x40 0x01 0x84 JNZ r16, #10720 +.delay_slot + 10562 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17 +.delay_slot + 10566 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 10570 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12] +.delay_slot + 10574 0xc0 0xc6 0x30 0x03 0x30 0x60 0x70 0x02 ST r17, [p6]; MOV p6, p0 +.delay_slot + 10582 0x00 0x07 0xc0 0xca 0x00 0x44 MOVXM p0, #509184 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 10588 0x00 0x07 0xc4 0xc8 0xd0 0x44 MOVXM p2, #509032 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10594 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x32 0x32 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #509028 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10604 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 10606 0x00 0x13 0x00 0x00 0x01 0x04 JL #9728 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10612 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10614 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10616 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 10620 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 10624 0x00 0x2c 0xf0 0x00 0x22 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV +.return_address + 10640 0x00 0x07 0xc4 0xc8 0xa0 0x44 MOVXM p2, #509008 + 10646 0x40 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x80 0x10 0xba LDA r16, [p2]; MOVXM p2, #509184 + 10656 0x40 0xc6 0xd0 0x00 0x01 0xf1 0x32 0x80 0x10 0xba LDA r17, [p2]; MOVXM p2, #509184 + 10666 0x4a 0xcb 0x50 0x00 0x01 0xf0 0xb2 0x2a 0x10 0xba LDA.u16 r18, [p2, #10]; MOVXM p1, #509012 + 10676 0x00 0x00 NOPX + 10678 0x00 0x00 NOPX + 10680 0x00 0x14 0xf8 0x00 0x00 0x84 J #10736 +.delay_slot + 10686 0x00 0x07 0xc0 0xc8 0xc0 0x44 MOVXM p0, #509024 +.delay_slot +.swstall delay_slot + 10692 0x00 0x00 NOPX +.delay_slot + 10694 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 10698 0x00 0x2c 0xf0 0x0c 0xa3 0x0c NOPA; ST r18, [p0] +.delay_slot + 10704 0x00 0x2c 0xf0 0x00 0x21 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 + 10720 0x00 0x2c 0xf0 0x00 0x22 0x80 0x8b 0x00 0x01 0xf0 0xb2 0x2a 0x10 0x00 0x00 0xe1 NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 + 10736 0x73 0x91 0x60 0x03 0xb3 0xc3 0x00 0x02 MOVS p3, p7; ADD.NC p7, r15, #12 + 10744 0xff 0xee 0xd0 0x00 0x01 0xf0 0x32 0x20 0x10 0xba LDA r27, [p7], #-4; MOVXM p0, #508992 + 10754 0x07 0xfe 0x16 0x98 LDA r16, [p7], #-4 + 10758 0x07 0xfe 0x36 0x98 LDA r17, [p7], #-4 + 10762 0x07 0x46 0x56 0x98 LDA r18, [p7, #16] + 10766 0x00 0x00 NOPX + 10768 0x00 0x00 NOPX + 10770 0x00 0x00 NOPX + 10772 0x00 0x00 NOPX + 10774 0x00 0x00 NOPX + 10776 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 10780 0x0f 0x06 0x11 0x98 ST r16, [p7] + 10784 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 10788 0x00 0x00 NOPX + 10790 0x00 0x00 NOPX + 10792 0x00 0x00 NOPX + 10794 0x14 0x93 0x08 0x18 ACQ r18, r16 + 10798 0x04 0x00 0xa7 0xad 0x81 0xe4 MOVX r16, #1; MOV r15, p3 + 10804 0x00 0x00 NOPX + 10806 0x00 0x00 NOPX + 10808 0x00 0x06 0x36 0x98 LDA r17, [p0] + 10812 0xc0 0xca 0xdc 0xdd 0x81 0xd4 LDA r18, [p6]; MOV p6, p7 + 10818 0x01 0x06 0x76 0x98 LDA r19, [p1] + 10822 0x07 0x5c 0x9e 0x98 LDA p1, [p7], #20 + 10826 0x00 0x00 NOPX +.no_stack_arguments + 10828 0x00 0x14 0x38 0x00 0x01 0x04 JL #10352 +.delay_slot +.swstall delay_slot + 10834 0x00 0x00 NOPX +.delay_slot + 10836 0x14 0x62 0x07 0x18 ADD r17, r17, #1 +.delay_slot + 10840 0x08 0x06 0x31 0x98 ST r17, [p0] +.delay_slot + 10844 0x14 0xe1 0x0d 0x98 LSHL r16, r19, r16 +.delay_slot + 10848 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xa0 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV +.return_address + 10864 0xca 0xc6 0xd0 0x00 0x01 0xf3 0x32 0x20 0x10 0xba LDA r17, [p6, #20]; MOVXM p6, #508992 + 10874 0x10 0x20 0x05 0x18 MOVX r16, #1 + 10878 0x00 0x00 NOPX + 10880 0x00 0x00 NOPX + 10882 0x00 0x00 NOPX + 10884 0x00 0x00 NOPX + 10886 0x00 0x00 NOPX + 10888 0x14 0x51 0x08 0x18 REL r17, r16 + 10892 0xfc 0xce 0xd0 0x00 0x01 0xf1 0x32 0x30 0x10 0xba LDA r19, [p7, #-8]; MOVXM p2, #509024 + 10902 0x06 0x06 0x36 0x98 LDA r17, [p6] + 10906 0x02 0x06 0x56 0x98 LDA r18, [p2] + 10910 0x00 0x00 NOPX + 10912 0x00 0x00 NOPX + 10914 0x00 0x00 NOPX + 10916 0x00 0x00 NOPX + 10918 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 10922 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 10926 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 10930 0x80 0x15 0x68 0x40 0x01 0x84 JNZ r16, #10960 +.delay_slot +.swstall delay_slot + 10936 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10938 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10940 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10942 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10944 0x00 0x00 NOPX + 10946 0x10 0x20 0x01 0x18 MOVX r16, #0 + 10950 0x00 0x2c 0xf6 0x06 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r16, [p6]; NOPX +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 + 10960 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12] + 10964 0x07 0xfb 0x19 0x18 LDA p6, [sp, #-8] + 10968 0x00 0x00 NOPX + 10970 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 10972 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10974 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10978 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10980 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10984 0x1f 0x67 0xa0 0xf8 MOV p7, r15 +.delay_slot + 10988 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 10994 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10996 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10998 0x00 0x00 NOPX +.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + +.text_segment PM 11008 +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv +.function_start + 11008 0x23 0x85 0xd0 0x08 0x20 0x0b 0x08 0x00 0x58 0xba LDA el0, [p1], #4; MOVX r2, #256; MOV r24, #0 + 11018 0x17 0x80 0x01 0x18 MOVX r0, #-128 + 11022 0x00 0x00 NOPX + 11024 0x00 0x00 NOPX + 11026 0x00 0x00 NOPX + 11028 0x00 0x00 NOPX + 11030 0x00 0x00 NOPX + 11032 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 11036 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 11040 0x00 0x00 NOPX + 11042 0x00 0x00 NOPX + 11044 0x00 0x00 NOPX + 11046 0x00 0x00 NOPX + 11048 0x00 0x00 NOPX + 11050 0x00 0x00 NOPX + 11052 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 11056 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 11060 0x00 0x00 NOPX + 11062 0x00 0x00 NOPX + 11064 0x00 0x00 NOPX + 11066 0x00 0x00 NOPX + 11068 0x00 0x00 NOPX + 11070 0x00 0x00 NOPX + 11072 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 11076 0x01 0x14 0x76 0x98 LDA r3, [p1, #4] + 11080 0x00 0x00 NOPX + 11082 0x00 0x00 NOPX + 11084 0x00 0x00 NOPX + 11086 0x00 0x00 NOPX + 11088 0x00 0x00 NOPX + 11090 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11092 0x08 0x4c 0x71 0x98 ST r3, [p0], #16 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11096 0x00 0x04 0x17 0x18 ST.s16 r0, [p0] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11100 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11104 0x00 0x00 0xf0 0xbe 0x00 0x44 MOVXM r1, #65280 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11110 0x10 0xc2 0x14 0x98 AND r1, r3, r1 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11114 0x10 0x76 0x27 0x98 EQ r27, r1, r2 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11118 0x10 0x01 0x82 0x18 SEL.EQZ r0, r0, r24, r27 +.delay_slot +.swstall delay_slot + 11122 0x00 0x00 NOPX +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_end0 + +.text_segment PM 11136 +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv +.function_start + 11136 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 11142 0x0f 0xf8 0x3d 0x98 ST lr, [sp, #-8] +.no_stack_arguments + 11146 0x00 0x15 0x80 0x00 0x01 0x04 JL #11008 +.delay_slot + 11152 0x18 0x17 0xa0 0xf8 MOV r0, r15 +.delay_slot + 11156 0x0f 0xfc 0x15 0x98 ST r0, [sp, #-4] +.delay_slot + 11160 0x1b 0xd0 0xc0 0xf8 MOV r15, p0 +.delay_slot +.swstall delay_slot + 11164 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11166 0x00 0x00 NOPX +.return_address + 11168 0xff 0x07 0x20 0x01 0x00 0x68 0x33 0xc4 0x08 0xba LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 + 11178 0x01 0xe2 0x80 0x01 0x80 0x08 0x07 0xfd 0x58 0xba MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 + 11188 0xff 0xbe 0x20 0x0a 0x11 0x80 0x07 0xa0 0x01 0x7a LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 + 11198 0x00 0x06 0x4a 0x98 LDA.u8 r18, [p0] + 11202 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11204 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11206 0x00 0x02 0x17 0x18 ST.s16 r16, [p0, dj0] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11210 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11214 0x10 0x22 0x05 0x18 MOVX r17, #1 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11218 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11224 0x14 0x77 0x27 0x98 EQ r27, r17, r18 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11228 0x14 0x21 0x82 0x18 SEL.EQZ r16, r16, r24, r27 +.delay_slot +.swstall delay_slot + 11232 0x00 0x00 NOPX +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + +.text_segment PM 11248 +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.tail_call +.function_start + 11248 0x00 0x13 0x28 0x00 0x00 0x84 J #9808 +.delay_slot +.swstall delay_slot + 11254 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11256 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11258 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11260 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11262 0x00 0x00 NOPX +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.function_start + 11264 0x05 0x00 0x00 0x21 0x01 0x64 RET lr; MOV r0, #64 +.delay_slot + 11270 0x18 0x50 0xc0 0xf8 MOV r1, p0 +.delay_slot + 11274 0x18 0x60 0x90 0x18 ADD.NC p0, r1, #32 +.delay_slot + 11278 0x08 0x04 0x11 0x98 ST r0, [p0] +.delay_slot + 11282 0x08 0x14 0x11 0x98 ST r0, [p0, #4] +.delay_slot +.swstall delay_slot + 11286 0x00 0x00 NOPX +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_end0 + +.text_segment PM 11296 +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.function_start + 11296 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 11300 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 11306 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] + 11310 0x00 0x00 NOPX + 11312 0x00 0x00 NOPX + 11314 0x00 0x00 NOPX + 11316 0x00 0x00 NOPX + 11318 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 11322 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 11326 0x00 0x00 NOPX + 11328 0x00 0x00 NOPX + 11330 0x00 0x00 NOPX + 11332 0x00 0x00 NOPX + 11334 0x00 0x00 NOPX + 11336 0x00 0x00 NOPX + 11338 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 11342 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 11346 0x00 0x00 NOPX + 11348 0x00 0x00 NOPX + 11350 0x00 0x00 NOPX + 11352 0x00 0x00 NOPX + 11354 0x00 0x00 NOPX + 11356 0x00 0x00 NOPX + 11358 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 11362 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4] + 11366 0x00 0x00 NOPX + 11368 0x00 0x00 NOPX +.no_stack_arguments + 11370 0x00 0x16 0x00 0x00 0x01 0x04 JL #11264 +.delay_slot + 11376 0x0f 0xfb 0x9d 0x98 ST p7, [sp, #-8] +.delay_slot +.swstall delay_slot + 11380 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11382 0x00 0x00 NOPX +.delay_slot + 11384 0x08 0xdc 0x29 0x98 ST el0, [p0], #-12 +.delay_slot + 11388 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.return_address + 11392 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] + 11396 0x00 0x00 NOPX + 11398 0x00 0x00 NOPX + 11400 0x00 0x00 NOPX + 11402 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11404 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11406 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11410 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11414 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11416 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11418 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11420 0x10 0x20 0x01 0x18 MOVX r16, #0 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11424 0xe8 0xc2 0x30 0x3f 0xfe 0x00 0x00 0x00 0x71 0x3a ST r16, [p7, #16]; PADDXM [sp], #-64 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + +.text_segment PM 11440 +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.function_start + 11440 0x04 0x00 0x80 0x00 0x00 0x08 0x7e 0xb0 0x10 0xba MOVA m0, #32; MOVXM ls, #11616 + 11450 0x61 0x0e 0xd0 0x00 0x00 0x09 0xbe 0xb8 0x10 0xba LDA r3, [p3], m0; MOVXM le, #11632 + 11460 0x60 0x90 0xd0 0x3e 0x17 0x48 0x0b 0x3c 0x58 0xba LDA m1, [p3]; MOVX r1, #-6; MOV r0, #828 + 11470 0x62 0x80 0xd0 0x00 0x01 0xf2 0x32 0x34 0x10 0xba LDA m0, [p3, #4]; MOVXM p4, #509032 + 11480 0x04 0x04 0x42 0x98 LDA.s8 r2, [p4] + 11484 0x00 0x00 NOPX + 11486 0x00 0x00 NOPX + 11488 0x00 0x00 NOPX + 11490 0x10 0xc2 0x1d 0x98 LSHL r1, r3, r1 + 11494 0x05 0x0e 0x8a 0xe1 0xf9 0x34 VLDB x1, [p0], m1; ADD.NC lc, r1, #-7 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11500 0x21 0x13 0x70 0x50 0x68 0x3c VLDA x2, [p1], m0; VLDB x0, [p0], m1 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11506 0x21 0x1b 0x70 0x50 0xe8 0xba 0x80 0x12 VLDA x3, [p1], m0; VLDB x1, [p0], m1; MOVX crRnd, r2 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11514 0x21 0x13 0x70 0x50 0x68 0x3c VLDA x2, [p1], m0; VLDB x0, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11520 0x21 0x1b 0x70 0x50 0xe8 0x3c VLDA x3, [p1], m0; VLDB x1, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11526 0x01 0x08 0x9b 0x98 VLDA x2, [p1], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11530 0x21 0x1b 0x70 0x50 0x68 0x3c VLDA x3, [p1], m0; VLDB x0, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11536 0x21 0x13 0x70 0x50 0xe8 0x3c VLDA x2, [p1], m0; VLDB x1, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11542 0x21 0x1b 0x70 0x28 0x34 0x1d 0x00 0xe2 0x41 0x4a VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11552 0x21 0x13 0x70 0x28 0x74 0x1d 0x01 0xe0 0x61 0x4a VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11562 0x21 0x1b 0x70 0x28 0x34 0x1d 0x00 0xe2 0x41 0x4a VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11572 0x21 0x13 0x70 0x28 0x74 0x1d 0x01 0xe0 0x61 0x4a VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11582 0x21 0x1b 0x70 0x28 0x34 0x1d 0x00 0xe2 0x41 0x4a VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11592 0x21 0x13 0x70 0x28 0x74 0x1d 0x01 0xe0 0x61 0x4a VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11602 0x21 0x1b 0x70 0x50 0x68 0x00 0x00 0x08 0x70 0x8c 0x00 0xe2 0x41 0x6e VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; VMUL.f dm0, x1, x2, r0 +.label ZLS_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_176 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11616 0x21 0x13 0x70 0x50 0xea 0x1c 0xa3 0x00 0x00 0x00 0x01 0xa5 0x78 0x0f 0x03 0x0b VLDA x2, [p1], m0; VLDB x1, [p0], m1; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; VMUL.f dm1, x0, x3, r0 +.label ZLE_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_192 +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11632 0x21 0x1b 0x70 0x50 0x6a 0x1c 0x23 0x00 0x00 0x00 0x01 0xa5 0x78 0x07 0x12 0x0b VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 +.loop_nesting 0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11648 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11656 0x43 0x84 0x60 0x02 0x00 0xe2 0x41 0x62 VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11664 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11672 0x43 0x84 0x60 0x02 0x00 0xe2 0x41 0x62 VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11680 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11688 0x43 0x84 0x60 0x02 0x00 0xe2 0x41 0x62 VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11696 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11704 0x0a 0x1c 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p2], #64 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11708 0x43 0x94 0x60 0x50 0x00 0x5c VST.CONV.bf16.fp32 cml1, [p2], #64;RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11714 0x0a 0x1c 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p2], #64 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11718 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.delay_slot + 11722 0x0a 0x1c 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p2], #64 +.delay_slot + 11726 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.delay_slot +.swstall delay_slot + 11730 0x00 0x00 NOPX +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_end0 + +.text_segment PM 11744 +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.function_start + 11744 0x00 0x07 0xc8 0xc8 0x80 0x44 MOVXM p4, #508992 + 11750 0x80 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p4]; MOV r17, CORE_ID + 11756 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 11762 0xff 0x3a 0xb0 0x23 0x14 0x81 0xca 0x60 0x79 0x3a ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 + 11772 0xfd 0x83 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p0, [sp, #-20]; MOV r0, r15 + 11780 0x0f 0xfc 0x15 0x98 ST r0, [sp, #-4] + 11784 0x0f 0xf0 0x3d 0x98 ST lr, [sp, #-16] + 11788 0x00 0x00 NOPX + 11790 0x80 0x17 0x50 0x40 0x01 0x84 JNZ r16, #11936 +.delay_slot + 11796 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 11800 0x00 0x07 0xc4 0xc8 0xa0 0x44 MOVXM p2, #509008 +.delay_slot + 11806 0x40 0xc6 0x30 0x01 0x37 0x60 0x70 0x02 ST r17, [p2]; MOV p2, p7 +.delay_slot + 11814 0x1b 0xd6 0xc0 0xf8 MOV r15, p3 +.delay_slot + 11818 0xfe 0xa3 0xb0 0x00 0x01 0xf3 0xb2 0xc0 0x11 0x3a ST p2, [sp, #-12]; MOVXM p7, #509312 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11828 0x13 0x91 0x60 0x00 0x01 0xf1 0x32 0x34 0x11 0x3a MOVS p0, p7; MOVXM p2, #509032 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11838 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x32 0x32 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #509028 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11848 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 11850 0x00 0x16 0x10 0x00 0x01 0x04 JL #11296 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11856 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11858 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11860 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 11864 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 11868 0x0a 0x06 0x11 0x98 ST r16, [p2] +.return_address + 11872 0xe0 0xc2 0xd0 0x00 0x01 0xf0 0xb2 0x28 0x10 0xba LDA r16, [p7]; MOVXM p1, #509008 + 11882 0x20 0xc6 0xd0 0x00 0x01 0xf1 0xb2 0x2a 0x10 0xba LDA r17, [p1]; MOVXM p3, #509012 + 11892 0xea 0xcb 0x50 0x00 0x01 0xf0 0xb2 0x2e 0x10 0xba LDA.u16 r18, [p7, #10]; MOVXM p1, #509020 + 11902 0x00 0x00 NOPX + 11904 0x00 0x00 NOPX + 11906 0x00 0x00 NOPX + 11908 0x00 0x17 0x58 0x00 0x00 0x84 J #11952 +.delay_slot + 11914 0x00 0x07 0xc4 0xc8 0xc0 0x44 MOVXM p2, #509024 +.delay_slot + 11920 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 11924 0x0a 0x06 0x51 0x98 ST r18, [p2] +.delay_slot + 11928 0x0b 0x06 0x11 0x98 ST r16, [p3] +.delay_slot + 11932 0x09 0x06 0x11 0x98 ST r16, [p1] +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 + 11936 0x00 0x07 0xc6 0xc8 0xa8 0x44 MOVXM p3, #509012 + 11942 0x00 0x2c 0xf0 0x00 0x01 0xf0 0xb2 0x2e 0x10 0xba NOPA; MOVXM p1, #509020 +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 + 11952 0x18 0x67 0x86 0x18 ADD.NC p0, r15, #12 + 11956 0x1f 0xee 0xd0 0x00 0x01 0xf1 0x32 0x20 0x10 0xba LDA r27, [p0], #-4; MOVXM p2, #508992 + 11966 0x00 0xfe 0x16 0x98 LDA r16, [p0], #-4 + 11970 0x00 0xfe 0x36 0x98 LDA r17, [p0], #-4 + 11974 0x02 0x06 0x56 0x98 LDA r18, [p2] + 11978 0x00 0x46 0x76 0x98 LDA r19, [p0, #16] + 11982 0x00 0x00 NOPX + 11984 0x00 0x00 NOPX + 11986 0x00 0x00 NOPX + 11988 0x00 0x00 NOPX + 11990 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 11994 0x00 0xc2 0x39 0x40 0x0e 0x5c ST r16, [p0]; ADD r16, r18, #1 + 12000 0x0a 0x06 0x11 0x98 ST r16, [p2] + 12004 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 12008 0x00 0x00 NOPX + 12010 0x00 0x00 NOPX + 12012 0x00 0x00 NOPX + 12014 0x14 0xd3 0x08 0x18 ACQ r19, r16 + 12018 0x1a 0x67 0x06 0x18 ADD.NC p2, r14, #12 + 12022 0x00 0x00 NOPX + 12024 0x00 0x00 NOPX + 12026 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4 + 12030 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 + 12034 0x02 0xfe 0x56 0x98 LDA r18, [p2], #-4 + 12038 0x02 0x56 0x76 0x98 LDA r19, [p2, #20] + 12042 0x00 0x00 NOPX + 12044 0x00 0x00 NOPX + 12046 0x00 0x00 NOPX + 12048 0x00 0x00 NOPX + 12050 0x00 0x00 NOPX + 12052 0x14 0xa3 0x12 0x18 SEL.EQZ r17, r18, r17, r27 + 12056 0x0a 0x06 0x31 0x98 ST r17, [p2] + 12060 0x00 0x00 NOPX + 12062 0x00 0x00 NOPX + 12064 0x00 0x00 NOPX + 12066 0x00 0x00 NOPX + 12068 0x14 0xd3 0x08 0x18 ACQ r19, r16 + 12072 0xd1 0x11 0x60 0x01 0x00 0x29 0xce 0x60 0x79 0x3a MOVS p6, p2; MOVX r16, #1; MOV r14, p6 + 12082 0x00 0x00 NOPX + 12084 0x00 0x00 NOPX + 12086 0x07 0xee 0x19 0x18 LDA p4, [sp, #-20] + 12090 0x60 0xc6 0xdf 0xd8 0x3b 0x0c LDA r17, [p3]; ST p0, [sp, #-20] + 12096 0x20 0xd2 0xd6 0xdd 0x81 0xd4 LDA r20, [p1]; MOV p3, p7 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 12102 0x02 0x4e 0x56 0x98 LDA r18, [p2], #16 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 12106 0x00 0x5d 0x1e 0x98 LDA p2, [p0], #20 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12110 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12114 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12116 0x04 0x06 0x76 0x98 LDA r19, [p4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12120 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 12122 0x00 0x16 0x58 0x00 0x01 0x04 JL #11440 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12128 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 +.delay_slot + 12132 0x14 0x63 0x0d 0x98 LSHL r17, r17, r16 +.delay_slot + 12136 0x15 0x21 0x0d 0x98 LSHL r16, r20, r16 +.delay_slot + 12140 0x19 0x69 0x41 0x58 ADD.NC p1, r18, r16 +.delay_slot + 12144 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xe2 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV +.return_address + 12160 0xc8 0xc6 0xd0 0x01 0x00 0x28 0xb3 0xd0 0x78 0xba LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 + 12170 0x00 0x07 0xcc 0xc8 0xc0 0x44 MOVXM p6, #509024 + 12176 0x00 0x00 NOPX + 12178 0x00 0x00 NOPX + 12180 0x00 0x00 NOPX + 12182 0x00 0x00 NOPX + 12184 0x00 0x00 NOPX + 12186 0x14 0x51 0x08 0x18 REL r17, r16 + 12190 0x01 0xf6 0x36 0x98 LDA r17, [p1, #-4] + 12194 0x07 0xed 0x19 0x18 LDA p2, [sp, #-20] + 12198 0x00 0x00 NOPX + 12200 0x00 0x00 NOPX + 12202 0x00 0x00 NOPX + 12204 0x00 0x00 NOPX + 12206 0x00 0x00 NOPX + 12208 0x14 0x23 0x11 0x98 SUB r17, r16, r17 + 12212 0x4a 0xc6 0xd3 0xec 0x63 0x0c LDA r17, [p2, #20]; ST r17, [p1, #-4] + 12218 0x00 0x00 NOPX + 12220 0x00 0x00 NOPX + 12222 0x00 0x00 NOPX + 12224 0x00 0x00 NOPX + 12226 0x00 0x00 NOPX + 12228 0x00 0x00 NOPX + 12230 0x14 0x51 0x08 0x18 REL r17, r16 + 12234 0xfc 0xce 0xd0 0x00 0x01 0xf0 0xb2 0x20 0x10 0xba LDA r19, [p7, #-8]; MOVXM p1, #508992 + 12244 0x06 0x06 0x56 0x98 LDA r18, [p6] + 12248 0x01 0x06 0x36 0x98 LDA r17, [p1] + 12252 0x00 0x00 NOPX + 12254 0x00 0x00 NOPX + 12256 0x00 0x00 NOPX + 12258 0x00 0x00 NOPX + 12260 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 12264 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 12268 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 12272 0x80 0x18 0x08 0x40 0x01 0x84 JNZ r16, #12304 +.delay_slot +.swstall delay_slot + 12278 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12280 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12282 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12284 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12286 0x00 0x00 NOPX + 12288 0x10 0x20 0x01 0x18 MOVX r16, #0 + 12292 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x83 0x08 0xc1 0x36 NOPA; NOPB; ST r16, [p1]; NOPX +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 + 12304 0x07 0xf0 0x39 0x18 LDA lr, [sp, #-16] + 12308 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] + 12312 0x07 0xf7 0x99 0x18 LDA p7, [sp, #-12] +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 12316 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.noswbrkpt + 12318 0x07 0xf9 0xd1 0x18 LDA r14, [sp, #-8] +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 12322 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 12324 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 12326 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12330 0x0e 0x8e 0x0b 0x18 MOVS p6, r14 +.delay_slot + 12334 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 12340 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12342 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12344 0x00 0x00 NOPX +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 + +.text_segment PM 12352 +.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh +.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_begin0 +.function_start + 12352 0x03 0x85 0xd0 0x00 0x01 0xf0 0xb3 0xe0 0x10 0xba LDA el0, [p0], #4; MOVXM p1, #509888 + 12362 0x03 0x81 0xd0 0x01 0x00 0x4b 0x08 0x00 0x58 0xba LDA eh0, [p0], #4; MOVX r16, #2; MOV r24, #0 + 12372 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 12378 0xfe 0xf3 0xb0 0x00 0x01 0xf3 0xb3 0xe0 0x11 0x3a ST p7, [sp, #-12]; MOVXM p7, #509888 + 12388 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] + 12392 0x0f 0xf9 0xf5 0x98 ST r15, [sp, #-8] + 12396 0x00 0x00 NOPX + 12398 0x09 0x1c 0x29 0x98 ST el0, [p1], #4 + 12402 0x09 0x1c 0x09 0x98 ST eh0, [p1], #4 + 12406 0x00 0x04 0x2e 0x98 LDA el0, [p0] + 12410 0x00 0x14 0x0e 0x98 LDA eh0, [p0, #4] + 12414 0x00 0x00 NOPX + 12416 0x00 0x00 NOPX + 12418 0x00 0x00 NOPX + 12420 0x00 0x00 NOPX + 12422 0x00 0x00 NOPX + 12424 0x09 0x04 0x29 0x98 ST el0, [p1] + 12428 0x09 0x14 0x09 0x98 ST eh0, [p1, #4] + 12432 0x07 0x5e 0x2a 0x98 LDA.u8 r17, [p7], #5 + 12436 0x07 0xee 0x4a 0x98 LDA.u8 r18, [p7], #-2 + 12440 0x07 0xec 0x2a 0x98 LDA.u8 r1, [p7], #-2 + 12444 0x00 0x00 NOPX + 12446 0x00 0x00 NOPX + 12448 0x00 0x00 NOPX + 12450 0x00 0x00 NOPX +.no_stack_arguments + 12452 0x00 0x1e 0x98 0x00 0x01 0x04 JL #15664 +.delay_slot + 12458 0xfc 0xca 0xb8 0xbe 0x43 0x5c ST r18, [sp, #-28]; SUB r15, r17, r18 +.delay_slot + 12464 0xfd 0x86 0xb0 0xc2 0x11 0x5c ST r1, [sp, #-20]; NE r16, r1, r16 +.delay_slot + 12470 0xfe 0x42 0xb7 0xef 0x15 0x5c ST r16, [sp, #-16]; LT r27, r15, r24 +.delay_slot + 12476 0x16 0x22 0xf1 0x98 SUB r17, r24, r15 +.delay_slot + 12480 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x1e 0x08 0x90 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SEL.EQZ r0, r15, r17, r27; NOPM; NOPV +.return_address + 12496 0xfd 0xd2 0x20 0x40 0x02 0x2c LDA r20, [sp, #-20]; MOVX r16, #0 + 12502 0xe7 0xc5 0x58 0x48 0x43 0x2c LDA.u8 r17, [p7], #3; SUB r18, r16, r2 + 12508 0x07 0xee 0x6a 0x98 LDA.u8 r19, [p7], #-2 + 12512 0x07 0xec 0x31 0x18 LDA r1, [sp, #-20] + 12516 0x00 0x00 NOPX + 12518 0x00 0x00 NOPX + 12520 0x00 0x00 NOPX + 12522 0x13 0xe9 0x46 0x98 XOR r20, r15, r20 + 12526 0x15 0x37 0x0a 0x98 LT r27, r20, r16 + 12530 0xfd 0x4e 0xb8 0xc6 0x63 0x5c ST r19, [sp, #-24]; SUB r17, r17, r19 +.no_stack_arguments + 12536 0xfc 0x46 0xb0 0x00 0x07 0xa6 0x00 0x00 0x41 0x3a ST r17, [sp, #-32]; JL #15664 +.delay_slot + 12546 0x10 0xa9 0x22 0x18 SEL.EQZ r20, r2, r18, r27 +.delay_slot + 12550 0x14 0x77 0x0a 0x98 LT r27, r17, r16 +.delay_slot + 12554 0x14 0x25 0x11 0x98 SUB r18, r16, r17 +.delay_slot + 12558 0x15 0x26 0x70 0x18 EXTEND.s16 r19, r20 +.delay_slot + 12562 0x00 0x2c 0xf0 0x00 0x24 0x41 0x22 0x3d 0x98 0x09 0x00 0x2b 0x60 0x7e NOPA; NOPB; NOPS; SEL.EQZ r0, r17, r18, r27; ADD.NC r15, r19, #1 +.return_address + 12576 0xfc 0x0e 0x20 0x3f 0x37 0xc8 0x00 0x42 0x58 0xba LDA r3, [sp, #-32]; MOVX r19, #-2; MOV m0, #66 + 12586 0xfd 0xc2 0x20 0x01 0x80 0x08 0x29 0xfc 0x58 0xba LDA r16, [sp, #-20]; MOVX r24, #0; MOV r1, #508 + 12596 0xfc 0xda 0x20 0x00 0x60 0x88 0x88 0x02 0x58 0xba LDA r22, [sp, #-28]; MOVX r6, #4; MOV r4, #2 + 12606 0xe1 0x45 0x50 0x00 0x51 0x0b 0x88 0x17 0x58 0xba LDA.u8 r17, [p7], m0; MOVX r5, #8; MOV r28, #23 + 12616 0xfd 0x56 0x20 0x3f 0x27 0x48 0x80 0x20 0x58 0xba LDA r21, [sp, #-24]; MOVX r18, #-6; MOV m1, #32 + 12626 0xfe 0x7a 0x20 0x01 0x70 0xcb 0x48 0x01 0x58 0xba LDA r30, [sp, #-16]; MOVX r23, #6; MOV r26, #1 + 12636 0xe9 0xc0 0x80 0x05 0xd0 0x0b 0xef 0xc0 0x58 0xba MOVA m0, #-178; MOVX r29, #128; MOV r31, #-64 + 12646 0x16 0x28 0x21 0x98 SUB r20, r24, r2 + 12650 0x10 0xc7 0x06 0x98 XOR r3, r3, r16 + 12654 0x1e 0xf1 0x50 0x36 0x02 0x24 LT r27, r3, r24; ADD.NC r0, r22, #2 + 12660 0x15 0x28 0x4b 0x3f 0xf5 0x64 SEL.EQZ r20, r2, r20, r27; MOV r22, #-3 + 12666 0x78 0xe1 0xf1 0x20 0x1d 0x64 MUL r3, r15, r16; MOV r2, #7 + 12672 0x15 0x28 0x70 0x18 EXTEND.s16 r20, r20 + 12676 0x08 0x00 0x90 0xa0 0x01 0x24 AND r0, r1, r0; ADD.NC r1, r0, #1 + 12682 0x0c 0xe7 0xbd 0xb4 0x01 0x24 LSHL r19, r1, r19; ADD.NC r27, r20, #1 + 12688 0x7d 0x0d 0xb0 0xa3 0x02 0xa4 LSHL r20, r15, r6; ADD.NC r1, r3, r0 + 12694 0x09 0xcd 0xb0 0x35 0xff 0x24 LSHL r7, r1, r6; ADD.NC r0, r21, #-1 + 12700 0x16 0xcd 0x0f 0x98 MUL r6, r27, r16 + 12704 0x13 0xdf 0x1f 0x98 MUL r15, r15, r17 + 12708 0x9d 0x6b 0xf9 0xb3 0xff 0x24 MUL r21, r19, r21; ADD.NC r19, r19, #-1 + 12714 0x11 0x37 0x07 0x98 EQ r27, r4, r16 + 12718 0xff 0xd6 0x37 0x90 0xdf 0x5c ST r21, [p7], #-4; MUL r4, r15, r6 + 12724 0x17 0x38 0x52 0x18 SEL.EQZ r28, r28, r5, r27 + 12728 0x11 0x25 0x2d 0x98 LSHL r18, r4, r18 + 12732 0xe5 0x4a 0x38 0xc8 0x3f 0x5c ST r18, [p7], m1; MUL r18, r17, r1 + 12738 0xf9 0xf2 0x3f 0x72 0xfb 0x5c ST r28, [p7], #-16; LSHL r28, r30, r23 + 12744 0xed 0xf2 0x39 0x70 0x1f 0x5c ST r28, [p7], #24; MUL r28, r18, r0 + 12750 0xe3 0xce 0x39 0xce 0xfb 0x5c ST r19, [p7], #4; LSHL r19, r19, r23 + 12756 0xe7 0x35 0xb9 0xb3 0xea 0xa4 LSHL r28, r28, r26; ADD.NC r19, r19, r29 + 12762 0xe3 0xfe 0x39 0x7b 0x5b 0x5c ST r31, [p7], #4; LSHL r30, r18, r26 + 12768 0x94 0x21 0xf9 0x33 0xe2 0xa4 MUL r16, r18, r16; ADD.NC r18, r19, r28 + 12774 0xe3 0x82 0x3f 0xf3 0x04 0x5c ST r0, [p7], #4; SEL.EQZ r28, r31, r24, r27 + 12780 0x10 0xff 0x6d 0x98 LSHL r31, r3, r22 + 12784 0xf0 0x66 0x39 0xbf 0xff 0x24 SUB r1, r30, r19; ADD.NC r19, r31, #-1 + 12790 0xe3 0x86 0x38 0xc6 0xdb 0x5c ST r1, [p7], #4; LSHL r17, r17, r22 + 12796 0xc5 0xa4 0x39 0x31 0xff 0x24 SUB r22, r24, r18; ADD.NC r18, r17, #-1 + 12802 0xe3 0xda 0x33 0xdb 0xc3 0x5c ST r22, [p7], #4; SUB r22, r7, r30 + 12808 0xe3 0xca 0x38 0x43 0x5b 0x5c ST r18, [p7], #4; LSHL r16, r16, r26 + 12814 0xe3 0x9e 0x39 0xfc 0x5b 0x5c ST r7, [p7], #4; LSHL r31, r19, r2 + 12820 0xe3 0xce 0x3e 0xda 0xc1 0x5c ST r19, [p7], #4; ADD r22, r29, r22 + 12826 0x3c 0x20 0x1e 0xbf 0xf2 0xa4 ADD r16, r7, r16; ADD.NC r29, r31, r30 + 12832 0xe3 0xda 0x38 0x43 0xa3 0x5c ST r22, [p7], #4; SUB r16, r16, r29 + 12838 0xe3 0xc2 0x30 0x1f 0x6d 0x6e 0x0f 0xff 0x59 0x3a ST r16, [p7], #4; LSHL r22, r15, r26; MOV r16, #-1 + 12848 0xe3 0xca 0x3e 0x6a 0x81 0x5c ST r18, [p7], #4; ADD r26, r28, r20 + 12854 0xe3 0xea 0x3a 0x52 0xc3 0x5c ST r26, [p7], #4; SUB r20, r20, r22 + 12860 0x08 0x11 0x07 0x1e 0x71 0xab 0x08 0xb2 0x6d 0x10 0x08 0x76 MOVA r17, #64; ST r19, [p7], #4; MAC r16, r16, r21, r17; ADD.NC r19, r20, #64 + 12872 0x0f 0x1e 0x71 0x98 ST r19, [p7], #4 + 12876 0xe3 0xc6 0x38 0x52 0xfb 0x5c ST r17, [p7], #4; LSHL r20, r16, r23 + 12882 0xe3 0xc2 0x3c 0x42 0x83 0x5c ST r16, [p7], #4; SUB r16, r24, r20 + 12888 0xe3 0xc6 0x39 0x52 0xfb 0x5c ST r17, [p7], #4; LSHL r20, r18, r23 + 12894 0xe3 0xc2 0x3c 0x42 0x83 0x5c ST r16, [p7], #4; SUB r16, r24, r20 + 12900 0x0f 0x1e 0x51 0x98 ST r18, [p7], #4 + 12904 0x0f 0x1e 0x31 0x98 ST r17, [p7], #4 + 12908 0x0f 0x0a 0x11 0x98 ST r16, [p7], m0 + 12912 0x07 0x06 0x0a 0x98 LDA.u8 r16, [p7] + 12916 0x00 0x00 NOPX + 12918 0x00 0x00 NOPX + 12920 0x00 0x00 NOPX + 12922 0x00 0x00 NOPX + 12924 0x00 0x00 NOPX + 12926 0x00 0x00 NOPX + 12928 0x80 0x19 0x50 0x00 0x01 0x84 JZ r16, #12960 +.delay_slot + 12934 0x19 0x3b 0x60 0xf8 MOV vaddSign0, crMCDEn +.delay_slot + 12938 0xff 0x7f 0x09 0xa0 0x00 0x44 MOVXM r19, #-8454144 +.delay_slot +.swstall delay_slot + 12944 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12946 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12948 0x00 0x00 NOPX + 12950 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x00 0x26 0x01 0x7a NOPA; NOPS; MOVX r19, #0 +.label TGT_F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_608 + 12960 0xff 0x87 0x20 0x00 0x01 0xf0 0x32 0x34 0x10 0xba LDA lr, [sp, #-4]; MOVXM p0, #509032 + 12970 0x00 0xc0 0x50 0x04 0xe2 0xd4 LDA.s8 r16, [p0]; VINSERT.32 x0, x0, #0, r19 + 12976 0xfe 0x83 0x21 0x02 0xe9 0x54 LDA p0, [sp, #-12]; MOV dj0, #186 + 12982 0xff 0x3e 0x20 0x01 0x25 0xd4 LDA r15, [sp, #-8]; VMOV bmll0, x0 + 12988 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 + 12994 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 12996 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 12998 0x07 0x02 0x17 0x18 ST.s16 r16, [p7, dj0] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13002 0x05 0x00 0x0f 0x70 0x41 0xe4 RET lr; MOV crRnd, r16 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13008 0x08 0x40 0x16 0x18 VCONV.bf16.fp32 wl0, bmll0 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13012 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13016 0x1c 0x01 0x01 0xb8 VEXTRACT.16 r16, x0, #0, vaddSign0 +.delay_slot +.swstall delay_slot + 13020 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13022 0x00 0x00 NOPX +.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh__end +.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_end0 +.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_begin0 +.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params +.function_start + 13024 0x1c 0x56 0xc0 0xf8 MOV r17, p3 + 13028 0x20 0x93 0xde 0x01 0xa9 0x54 LDA p1, [p1]; MOV m7, #106 + 13034 0x00 0x83 0xd6 0xd1 0x02 0x14 LDA p0, [p0]; ADD.NC p3, r17, #2 + 13040 0x03 0xe8 0x8a 0x98 LDA.u8 r4, [p3], m7 + 13044 0x03 0xfd 0x46 0x98 LDA dj2, [p3], #-4 + 13048 0x03 0x3d 0x26 0x98 LDA dn2, [p3], #12 + 13052 0x03 0xff 0x46 0x98 LDA dj6, [p3], #-4 + 13056 0x03 0x2f 0x26 0x98 LDA dn6, [p3], #8 + 13060 0x03 0x2d 0x06 0x98 LDA m2, [p3], #8 + 13064 0x03 0xfc 0x46 0x98 LDA dj0, [p3], #-4 + 13068 0x03 0x3c 0x26 0x98 LDA dn0, [p3], #12 + 13072 0x03 0xfe 0x46 0x98 LDA dj4, [p3], #-4 + 13076 0x03 0x2e 0x26 0x98 LDA dn4, [p3], #8 + 13080 0x03 0x2c 0x06 0x98 LDA m0, [p3], #8 + 13084 0x03 0xfc 0xc6 0x98 LDA dj1, [p3], #-4 + 13088 0x03 0x3c 0xa6 0x98 LDA dn1, [p3], #12 + 13092 0x03 0xfe 0xc6 0x98 LDA dj5, [p3], #-4 + 13096 0x03 0x2e 0xa6 0x98 LDA dn5, [p3], #8 + 13100 0x03 0x2c 0x86 0x98 LDA m1, [p3], #8 + 13104 0x03 0xff 0xc6 0x98 LDA dj7, [p3], #-4 + 13108 0x03 0x2f 0xa6 0x98 LDA dn7, [p3], #8 + 13112 0x65 0xf0 0xd0 0x00 0x01 0xf2 0x32 0x34 0x10 0xba LDA m7, [p3], #8; MOVXM p4, #509032 + 13122 0x80 0x98 0x58 0xc5 0x81 0xd4 LDA.s8 r6, [p4]; MOV p4, p1 + 13128 0x1b 0x0f 0x10 0xb8 MOV m3, #-120 + 13132 0x80 0x85 0x70 0x3b 0x68 0x00 0x20 0x6a 0x60 0x00 0x58 0xb6 VLDA.CONV.fp32.bf16 cml0, [p4];VLDB x6, [p0], #64; MOVX r2, #3; MOV dc4, #0 + 13144 0x7f 0xb8 0xd0 0x38 0xe9 0x04 0x2d 0xe0 0x10 0x0b 0x62 0x09 0x60 0x7e LDA dj3, [p3], #-4; VLDB x1, [p0], #64; MOVS dc3, dc4; LSHL r2, r4, r2; MOV m6, #128 + 13158 0x65 0xb4 0xd1 0x0c 0x4b 0x02 0x80 0x90 0x72 0xba LDA dn3, [p3], #8; MOVS dc1, dc3; MOV m5, r2 + 13168 0x6d 0x30 0xd1 0xab 0x90 0x03 0xe1 0xc0 0x7e 0xba LDA m3, [p3], m3; PADDB [p1], m5; MOV dc7, dc1 + 13178 0x79 0x0a 0xd1 0xf0 0xf4 0x02 0x07 0x90 0x5e 0xba LDA r2, [p3], m6; VLDB.2D x3, [p1], d7; MOV m4, #-112 + 13188 0x71 0x1e 0x50 0x00 0x82 0x2c LDA.s16 r7, [p3], m4; MOVX r0, #16 + 13194 0x69 0xc0 0xd6 0x10 0x4b 0x00 0x00 0x0c 0x79 0xf8 0x10 0x76 LDA m4, [p3], #16; MOVS dc6, dc4; MOVXM ls, #13296 + 13206 0x72 0x92 0xd2 0x10 0x4b 0x00 0x00 0x0d 0xba 0x28 0x10 0x76 LDA r4, [p3, #-28]; MOVS dc2, dc4; MOVXM le, #13392 + 13218 0x0b 0x16 0x84 0x61 0x05 0xb4 VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r0 + 13224 0x1b 0x00 0x8a 0xf8 VMOV cml3, cml0 + 13228 0x60 0x96 0xd0 0x00 0x00 0x0d 0xb2 0x48 0x10 0xba LDA r5, [p3]; MOVXM p3, #13456 + 13238 0x00 0x2c 0xf0 0x00 0x14 0x0a 0x8e 0x01 0xa8 0xba NOPA; MOVX r1, #32; VEXTBCST.128 x10, x3, #0 + 13248 0x07 0x91 0x00 0x00 0x20 0x01 0x5b 0x00 0x36 0x08 0x0e 0xb9 0x78 0x00 0x00 0xe1 MOVA r17, #60; NOPB; NOPS; MOVX r3, #48; VBCST.16 x0, r7; NOPV + 13264 0x00 0x2c 0xf0 0x00 0x20 0x10 0x4b 0x0d 0xd4 0x02 0x0e 0x03 0xac 0x63 0x6a 0x0b NOPA; NOPB; MOVS dc0, dc4; MOVX crRnd, r6; VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r17 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 13280 0x40 0xa3 0xd0 0x00 0x25 0x10 0x4b 0x04 0x2f 0xda 0xb9 0x3f 0xcc 0x48 0x1a 0x0b LDA p2, [p2]; NOPB; MOVS dc5, dc4; ADD r2, r2, #-2; ADD.NC lc, r4, #-1; VMAC.f dm1, dm0, x1, x10, r17 +.label ZLS_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_272 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt + 13296 0x03 0xb3 0x71 0xf0 0xf4 0x02 0x84 0x81 0x6e 0xba VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13306 0x00 0x38 0xea 0x9c 0x0b 0x46 0x8a 0x89 0x01 0x4a VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13316 0x00 0xb1 0x6a 0x30 0x86 0xc6 0x89 0x35 0x01 0x4a VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13326 0x1d 0x89 0x06 0xd8 VSHIFT x11, x1, x2, r1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13330 0x04 0xb0 0x8e 0xc6 0x8c 0x48 0xa1 0x62 VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r17 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13338 0x03 0x9c 0x0f 0x46 0x8a 0x36 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r17 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13346 0x1d 0x89 0x0e 0xd8 VSHIFT x11, x1, x2, r3 + 13350 0x05 0x1c 0x03 0x46 0x8b 0x92 0xe1 0x62 VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r17 + 13358 0x04 0x1c 0x07 0x46 0x88 0x56 0xe1 0x62 VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r17 + 13366 0x00 0x2c 0xf0 0x00 0x10 0x01 0x18 0x41 0x6e 0xba NOPA; NOPB; VSHIFT x4, x6, x1, r0 + 13376 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7c 0x63 0x6a 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm4, dm3, x6, x10, r17 +.label ZLE_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_368 +.end_of_loop +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 13392 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7c 0x48 0x1a 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r17 +.loop_nesting 0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13408 0x03 0x0c 0xf4 0x73 0x90 0x02 0x84 0x81 0x6e 0xba PADDA.3D [p0], d0; PADDB.2D [p4], d3; VSHIFT x10, x1, x2, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13418 0x02 0x9c 0x0b 0x46 0x8a 0x89 0x01 0x62 VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13426 0x02 0x30 0x86 0xc6 0x89 0x35 0x01 0x62 VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13434 0x1d 0x89 0x06 0xd8 VSHIFT x11, x1, x2, r1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13438 0x03 0x9c 0x0f 0x46 0x8c 0x48 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13446 0x00 0x2c 0xf4 0xb0 0x8e 0xc2 0x8a 0x36 0xa1 0x4a NOPA; VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r17 +.label TGT_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_432 +.loop_nesting 1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13456 0x3e 0x1e 0x8b 0x12 0x1d 0xb4 VLDB.2D x3, [p1], d7; VSHIFT x11, x1, x2, r3 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13462 0x00 0x00 0x01 0xb7 0x54 0x02 0x8b 0x92 0xe1 0x5a MOVXM le, #13632; VMAC.f dm3, dm4, x9, x7, r17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13472 0x80 0x85 0x70 0x00 0x01 0x8f 0x4f 0x02 0x88 0x56 0xe1 0x46 VLDA.CONV.fp32.bf16 cml0, [p4]; MOVXM ls, #13552; VMAC.f dm0, dm2, x11, x7, r17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13484 0x1d 0x72 0x7f 0x98 ADD.NC lc, r4, #-1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13488 0x00 0x1d 0x9b 0x98 VLDA x6, [p0], #64 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13492 0x38 0x1c 0x74 0x18 VLDB x1, [p0], #64 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13496 0x38 0x58 0xb4 0x18 VLDB.3D x2, [p0], d2 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13500 0x0d 0x11 0x96 0x18 VCONV.bf16.fp32 x10, cml3 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13504 0x0b 0x10 0x16 0x18 VCONV.bf16.fp32 x6, cml0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13508 0x1c 0x50 0x6c 0xf8 VMAX_LT.bf16 x8, r16, x10, x0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13512 0x1d 0x53 0x14 0x78 VSHUFFLE x10, x10, x6, r5 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13516 0x51 0x42 0x60 0x02 0xa8 0x36 0x70 0x02 VST x8, [p2], m4; VMAX_LT.bf16 x10, r16, x10, x0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13524 0x1d 0x1c 0x03 0x58 VEXTBCST.128 x10, x3, #0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13528 0x47 0x52 0x60 0x01 0x80 0x45 0x70 0x02 VST.3D x10, [p2], d1; VMOV cml3, cml0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13536 0x04 0x1c 0x07 0x46 0x8c 0x6d 0x41 0x62 VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13544 0x02 0x30 0x82 0xc6 0x89 0x03 0x41 0x62 VSHIFT x4, x6, x1, r0; VMAC.f dm1, dm0, x1, x10, r17 +.label ZLS_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_528 +.loop_nesting 2 +.begin_of_loop +.aggressive_scheduled_block_id 2 +.noswbrkpt + 13552 0x03 0xb3 0x71 0xf0 0xf4 0x02 0x84 0x81 0x6e 0xba VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13562 0x00 0x38 0xea 0x9c 0x0b 0x46 0x8a 0x89 0x01 0x4a VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13572 0x00 0xb1 0x6a 0x30 0x86 0xc6 0x89 0x35 0x01 0x4a VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13582 0x1d 0x89 0x06 0xd8 VSHIFT x11, x1, x2, r1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13586 0x04 0xb0 0x8e 0xc6 0x8c 0x48 0xa1 0x62 VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13594 0x03 0x9c 0x0f 0x46 0x8a 0x36 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r17 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13602 0x1d 0x89 0x0e 0xd8 VSHIFT x11, x1, x2, r3 + 13606 0x05 0x1c 0x03 0x46 0x8b 0x92 0xe1 0x62 VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r17 + 13614 0x04 0x1c 0x07 0x46 0x88 0x56 0xe1 0x62 VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r17 + 13622 0x00 0x2c 0xf4 0x61 0x05 0x94 NOPA; VSHIFT x4, x6, x1, r0 + 13628 0x8c 0x6d 0x41 0x48 VMAC.f dm4, dm3, x6, x10, r17 +.label ZLE_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_608 +.end_of_loop +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 13632 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7c 0x48 0x1a 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r17 +.loop_nesting 1 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13648 0x03 0x0c 0xf8 0xe7 0x20 0x04 0x27 0x02 0x84 0x81 0x68 0xb6 PADDA.3D [p0], d0; PADDB.2D [p4], d3; JNZD r2, r2, p3; VSHIFT x10, x1, x2, r0 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13660 0x02 0x9c 0x0b 0x46 0x8a 0x89 0x01 0x62 VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13668 0x02 0x30 0x86 0xc6 0x89 0x35 0x01 0x62 VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13676 0x1d 0x89 0x06 0xd8 VSHIFT x11, x1, x2, r1 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13680 0x03 0x9c 0x0f 0x46 0x8c 0x48 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r17 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13688 0x04 0xb0 0x8e 0xc6 0x8a 0x36 0xa1 0x62 VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r17 +.loop_nesting 0 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13696 0x1d 0x89 0x0e 0xd8 VSHIFT x11, x1, x2, r3 + 13700 0x8b 0x92 0xe1 0x48 VMAC.f dm3, dm4, x9, x7, r17 + 13704 0x88 0x56 0xe1 0x48 VMAC.f dm0, dm2, x11, x7, r17 + 13708 0x00 0x00 NOPX + 13710 0x00 0x00 NOPX + 13712 0x00 0x00 NOPX + 13714 0x00 0x00 NOPX + 13716 0x0d 0x11 0x96 0x18 VCONV.bf16.fp32 x10, cml3 + 13720 0x62 0x02 0xc0 0x50 0x00 0x5c VCONV.bf16.fp32 x6, cml0; RET lr +.delay_slot + 13726 0x1c 0x50 0x6c 0xf8 VMAX_LT.bf16 x8, r16, x10, x0 +.delay_slot + 13730 0x1d 0x53 0x14 0x78 VSHUFFLE x10, x10, x6, r5 +.delay_slot + 13734 0x1d 0x50 0x6c 0xf8 VMAX_LT.bf16 x10, r16, x10, x0 +.delay_slot + 13738 0x0a 0x8a 0x13 0x18 VST x8, [p2], m4 +.delay_slot + 13742 0x0a 0x3a 0x93 0x18 VST.3D x10, [p2], d1 +.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params__end +.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_end0 + +.text_segment PM 13760 +.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function_start + 13760 0x00 0x07 0xc8 0xc8 0x80 0x44 MOVXM p4, #508992 + 13766 0x80 0xc2 0xd0 0x2f 0x41 0xd4 LDA r16, [p4]; MOV r0, r15 + 13772 0x00 0x10 0x00 0x00 0x01 0xc4 PADDXM [sp], #128 + 13778 0xff 0x3a 0xb0 0x02 0x2d 0x70 0x70 0x02 ST r14, [sp, #-8]; MOV r17, CORE_ID + 13786 0xff 0xb6 0xb0 0x01 0xa8 0xf0 0x70 0x02 ST r13, [sp, #-4]; MOV r13, lr + 13794 0x0f 0xec 0x1d 0x98 ST p0, [sp, #-20] + 13798 0x0f 0xf7 0x9d 0x98 ST p7, [sp, #-12] + 13802 0xfe 0x02 0xb0 0x01 0xca 0x60 0x70 0x02 ST r0, [sp, #-16]; MOV r14, p2 + 13810 0x80 0x1b 0x38 0x40 0x01 0x84 JNZ r16, #13936 +.delay_slot + 13816 0x1b 0xd6 0xc0 0xf8 MOV r15, p3 +.delay_slot + 13820 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17 +.delay_slot + 13824 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 13828 0x00 0x07 0xc6 0xc8 0xa0 0x44 MOVXM p3, #509008 +.delay_slot + 13834 0x0b 0x06 0x31 0x98 ST r17, [p3] +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 13838 0xf0 0x91 0x60 0x00 0x01 0xf0 0xb2 0x34 0x11 0x3a MOVS p7, p1; MOVXM p1, #509032 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 13848 0x20 0xc0 0xe0 0x88 0x8b 0x00 0x01 0xf0 0xb2 0x32 0x10 0x76 ST.s8 r16, [p1]; MOVS p0, p2; MOVXM p1, #509028 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13860 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 13862 0x00 0x18 0x20 0x00 0x01 0x04 JL #12352 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13868 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13870 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13872 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 13876 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 13880 0x20 0xc2 0x30 0x00 0x01 0xa5 0x70 0x02 ST r16, [p1]; NOPM +.return_address + 13888 0x33 0x91 0x60 0x01 0x33 0x82 0x00 0x02 MOVS p1, p7; ADD.NC p2, r14, #8 + 13896 0x02 0x06 0x3a 0x98 LDA.u16 r17, [p2] + 13900 0x44 0xc3 0x50 0x00 0x01 0xf1 0x32 0x30 0x10 0xba LDA.u16 r16, [p2, #4]; MOVXM p2, #509024 + 13910 0x00 0x00 NOPX + 13912 0x00 0x1b 0x40 0x00 0x00 0x84 J #13952 +.delay_slot + 13918 0x00 0x07 0xc6 0xc8 0xb0 0x44 MOVXM p3, #509016 +.delay_slot +.swstall delay_slot + 13924 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13926 0x00 0x00 NOPX +.delay_slot + 13928 0x0b 0x06 0x31 0x98 ST r17, [p3] +.delay_slot + 13932 0x0a 0x06 0x11 0x98 ST r16, [p2] +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_176 + 13936 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x01 0xf1 0xb2 0x2c 0x10 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVXM p3, #509016; NOPV +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_192 + 13952 0x1a 0x67 0x86 0x18 ADD.NC p2, r15, #12 + 13956 0x5f 0xee 0xd0 0x00 0x01 0xf2 0x32 0x28 0x10 0xba LDA r27, [p2], #-4; MOVXM p4, #509008 + 13966 0x02 0xfe 0x16 0x98 LDA r16, [p2], #-4 + 13970 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 + 13974 0x02 0x46 0x56 0x98 LDA r18, [p2, #16] + 13978 0x00 0x00 NOPX + 13980 0x00 0x00 NOPX + 13982 0x00 0x00 NOPX + 13984 0x00 0x00 NOPX + 13986 0x00 0x00 NOPX + 13988 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 13992 0x0a 0x06 0x11 0x98 ST r16, [p2] + 13996 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 14000 0x00 0x00 NOPX + 14002 0x00 0x00 NOPX + 14004 0x00 0x00 NOPX + 14006 0x14 0x93 0x08 0x18 ACQ r18, r16 + 14010 0x00 0x2f 0x00 0x00 0x01 0xf3 0xb2 0x20 0x10 0xba MOVA r15, #1; MOVXM p7, #508992 + 14020 0x06 0x00 0x28 0x2b 0xc1 0xe4 MOVX r24, #0; MOV r16, sp + 14026 0x18 0x68 0x5a 0x18 ADD.NC p0, r16, #-76 + 14030 0xfd 0xd3 0x27 0x29 0x81 0xd4 LDA p5, [sp, #-20]; MOV r14, p2 + 14036 0x04 0x06 0x36 0x98 LDA r17, [p4] + 14040 0x60 0xc2 0xd0 0x00 0x01 0xf1 0xb3 0xe0 0x10 0xba LDA r16, [p3]; MOVXM p3, #509888 + 14050 0x07 0x06 0x56 0x98 LDA r18, [p7] + 14054 0x00 0x00 NOPX + 14056 0x00 0x00 NOPX + 14058 0x00 0x00 NOPX + 14060 0x05 0x06 0x76 0x98 LDA r19, [p5] + 14064 0x00 0x00 NOPX + 14066 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 + 14070 0x14 0xa2 0x07 0x18 ADD r17, r18, #1 + 14074 0x14 0x20 0xfd 0x98 LSHL r16, r16, r15 +.no_stack_arguments + 14078 0x00 0x19 0x70 0x00 0x01 0x04 JL #13024 +.delay_slot + 14084 0x0f 0x06 0x31 0x98 ST r17, [p7] +.delay_slot + 14088 0x18 0x49 0xc1 0x58 ADD.NC dn0, r19, r16 +.delay_slot + 14092 0x0f 0xb4 0x25 0x98 ST dn0, [sp, #-76] +.delay_slot + 14096 0x0f 0xbb 0x15 0x98 ST r24, [sp, #-72] +.delay_slot + 14100 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x03 0xdf 0x8a 0xc1 0x36 NOPA; NOPB; ST r24, [sp, #-68]; NOPX +.return_address + 14112 0x1a 0x67 0x0a 0x18 ADD.NC p2, r14, #20 + 14116 0x02 0x06 0x16 0x98 LDA r16, [p2] + 14120 0x00 0x00 NOPX + 14122 0x00 0x00 NOPX + 14124 0x00 0x00 NOPX + 14126 0x00 0x00 NOPX + 14128 0x00 0x00 NOPX + 14130 0x00 0x00 NOPX + 14132 0x14 0x10 0xf8 0x18 REL r16, r15 + 14136 0x5c 0xc2 0xd0 0x00 0x01 0xf0 0xb2 0x30 0x10 0xba LDA r16, [p2, #-8]; MOVXM p1, #509024 + 14146 0x01 0x06 0x56 0x98 LDA r18, [p1] + 14150 0x07 0x06 0x36 0x98 LDA r17, [p7] + 14154 0x07 0xf4 0x99 0x18 LDA p1, [sp, #-12] + 14158 0x07 0xf9 0xd1 0x18 LDA r14, [sp, #-8] + 14162 0x00 0x00 NOPX + 14164 0x00 0x00 NOPX + 14166 0x13 0xe1 0x01 0x98 SUB r16, r15, r16 + 14170 0x0a 0xe6 0x11 0x98 ST r16, [p2, #-8] + 14174 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 14178 0x80 0x1b 0xc0 0x40 0x01 0x84 JNZ r16, #14208 +.delay_slot + 14184 0x10 0x30 0x01 0x18 MOVX r24, #0 +.delay_slot +.swstall delay_slot + 14188 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14190 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14192 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14194 0x00 0x00 NOPX + 14196 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x03 0x83 0x88 0xc1 0x36 NOPA; NOPB; ST r24, [p7]; NOPX +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 + 14208 0xff 0xb6 0x2e 0xed 0x41 0xd4 LDA r13, [sp, #-4]; MOV lr, r13 + 14214 0x07 0xf1 0xf1 0x18 LDA r15, [sp, #-16] + 14218 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 14222 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128 +.delay_slot +.swstall delay_slot + 14228 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14230 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14232 0x00 0x00 NOPX +.delay_slot + 14234 0x1f 0x62 0xc0 0xf8 MOV p7, p1 +.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + +.text_segment PM 14240 +.label __Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE___func_begin0 +.label _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE +.function_start + 14240 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 14246 0xff 0x73 0xb0 0x00 0x01 0xf3 0xb2 0x20 0x11 0x3a ST p7, [sp, #-8]; MOVXM p7, #508992 + 14256 0xe0 0xc2 0xd7 0xff 0x1d 0x82 0x2d 0x70 0x72 0xba LDA r16, [p7]; ST p6, [sp, #-4]; MOV r17, CORE_ID + 14266 0x0f 0xf6 0x1d 0x98 ST p4, [sp, #-12] + 14270 0x0f 0xf1 0x1d 0x98 ST p2, [sp, #-16] + 14274 0xfd 0x87 0xb0 0x03 0xb3 0x60 0x70 0x02 ST lr, [sp, #-20]; MOV p7, p3 + 14282 0x00 0x00 NOPX + 14284 0x00 0x00 NOPX + 14286 0x00 0x00 NOPX + 14288 0x80 0x1c 0xb0 0x40 0x01 0x84 JNZ r16, #14688 +.delay_slot + 14294 0x0f 0xe8 0x1d 0x98 ST p0, [sp, #-24] +.delay_slot + 14298 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17 +.delay_slot + 14302 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 14306 0x00 0x07 0xcc 0xc8 0xa0 0x44 MOVXM p6, #509008 +.delay_slot + 14312 0x0e 0x06 0x31 0x98 ST r17, [p6] +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 14316 0x00 0x20 0x00 0x00 0x01 0xf3 0x32 0x34 0x10 0xba MOVA r0, #1; MOVXM p6, #509032 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 14326 0xc0 0xc0 0xe6 0x84 0x8b 0x00 0x01 0xf0 0x32 0x32 0x10 0x76 ST.s8 r16, [p6]; MOVS p6, p1; MOVXM p0, #509028 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14338 0x00 0x01 0x00 0x00 0x01 0xf0 0xb3 0x00 0x10 0xba MOVA r1, #0; MOVXM p1, #509440 +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 14348 0x00 0x05 0x60 0x00 0x01 0x04 JL #2752 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14354 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14356 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 14358 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 14362 0x00 0x2c 0xf0 0x40 0x0a 0x2c NOPA; MOVX r16, #1 +.delay_slot + 14368 0x00 0x2c 0xf0 0x00 0x20 0x06 0x11 0x80 0x00 0x00 0x37 0x60 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p0]; NOPX; MOV p0, p7; NOPV +.return_address + 14384 0x04 0x00 0xa1 0x01 0x01 0x64 MOVX r16, #1; MOV dj0, #64 + 14390 0x07 0x02 0x56 0x98 LDA r18, [p7, dj0] + 14394 0x00 0x00 NOPX + 14396 0x00 0x00 NOPX + 14398 0x00 0x00 NOPX + 14400 0x00 0x00 NOPX + 14402 0x00 0x00 NOPX + 14404 0x00 0x00 NOPX + 14406 0x14 0xa1 0x07 0x98 EQ r16, r18, r16 + 14410 0x80 0x1c 0x68 0x40 0x01 0x84 JNZ r16, #14544 +.delay_slot + 14416 0x1c 0x5e 0xc0 0xf8 MOV r17, p7 +.delay_slot + 14420 0x18 0xc8 0x90 0x18 ADD.NC dc0, r17, #32 +.delay_slot +.swstall delay_slot + 14424 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14426 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14428 0x00 0x00 NOPX + 14430 0x90 0x1c 0x58 0x40 0x01 0x84 JNZ r18, #14512 +.delay_slot + 14436 0x00 0x07 0xc8 0x2c 0x00 0x44 MOVXM r16, #509440 +.delay_slot + 14442 0x10 0x22 0x01 0x18 MOVX r17, #0 +.delay_slot +.swstall delay_slot + 14446 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14448 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14450 0x00 0x00 NOPX +.no_stack_arguments + 14452 0xfc 0xe3 0xb0 0x00 0x05 0x70 0x00 0x00 0x41 0x3a ST p6, [sp, #-28]; JL #11136 +.delay_slot + 14462 0x00 0x07 0xcc 0xca 0x80 0x44 MOVXM p6, #509248 +.delay_slot + 14468 0x00 0x07 0xc0 0xca 0x80 0x44 MOVXM p0, #509248 +.delay_slot + 14474 0x19 0x61 0x80 0xf8 MOV p1, dc0 +.delay_slot +.swstall delay_slot + 14478 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14480 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.return_address + 14496 0xc0 0xc6 0xd0 0x00 0x01 0xf2 0x0b 0x00 0x10 0xba LDA r17, [p6]; MOVXM r16, #509440 + 14506 0xfc 0xe3 0x20 0x00 0x20 0x3c LDA p6, [sp, #-28]; NOPB +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_272 + 14512 0x00 0x00 NOPX + 14514 0x00 0x1c 0x80 0x00 0x00 0x84 J #14592 +.delay_slot +.swstall delay_slot + 14520 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14522 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14524 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14526 0x00 0x00 NOPX +.delay_slot + 14528 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0xb6 0x60 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; MOV p1, p6; NOPV +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_304 +.no_stack_arguments + 14544 0xfc 0xe3 0xb0 0x00 0x05 0x84 0x00 0x00 0x41 0x3a ST p6, [sp, #-28]; JL #11296 +.delay_slot + 14554 0x00 0x07 0xcc 0xcb 0x00 0x44 MOVXM p6, #509312 +.delay_slot + 14560 0x00 0x07 0xc0 0xcb 0x00 0x44 MOVXM p0, #509312 +.delay_slot + 14566 0x19 0x61 0x80 0xf8 MOV p1, dc0 +.delay_slot +.swstall delay_slot + 14570 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14572 0x00 0x01 0x67 0x98 NOPA +.return_address + 14576 0xc0 0xc6 0xd0 0x00 0x01 0xf2 0x0b 0x00 0x10 0xba LDA r17, [p6]; MOVXM r16, #509440 + 14586 0xfc 0x93 0x20 0x00 0x20 0x3c LDA p1, [sp, #-28]; NOPB +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_352 + 14592 0x1b 0x68 0x05 0x98 ADD.NC p3, r16, #11 + 14596 0x6f 0xcd 0x50 0x00 0x01 0xf3 0x32 0x28 0x10 0xba LDA.u8 r19, [p3], #7; MOVXM p6, #509008 + 14606 0x06 0x06 0x56 0x98 LDA r18, [p6] + 14610 0x03 0x1e 0xba 0x98 LDA.u16 r21, [p3], #2 + 14614 0x03 0x06 0x1a 0x98 LDA.u16 r16, [p3] + 14618 0x00 0x00 NOPX + 14620 0x03 0x16 0x9a 0x98 LDA.u16 r20, [p3, #2] + 14624 0x00 0x00 NOPX + 14626 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 14628 0x00 0x07 0xc0 0xc8 0x88 0x44 MOVXM p0, #508996 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 14634 0x14 0xe7 0x5f 0x98 MUL r19, r19, r21 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 14638 0x00 0xce 0x30 0x00 0x01 0xf1 0x32 0x2e 0x11 0x3a ST r19, [p0]; MOVXM p2, #509020 + 14648 0x14 0xe1 0x0f 0x98 MUL r16, r19, r16 + 14652 0x14 0x63 0x2f 0x98 MUL r17, r17, r18 + 14656 0x15 0x21 0x0f 0x98 MUL r16, r20, r16 + 14660 0x00 0x2c 0xf2 0x06 0x31 0x80 0x01 0xf3 0x32 0x30 0x10 0x76 NOPA; ST r17, [p2]; MOVXM p6, #509024 + 14672 0x00 0x2c 0xf0 0x00 0x26 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p6]; NOPX; NOPM; NOPV +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_448 + 14688 0x00 0x07 0xc0 0xc8 0x90 0x44 MOVXM p0, #509000 + 14694 0x00 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x20 0x10 0xba LDA r16, [p0]; MOVXM p2, #508992 + 14704 0x40 0xc6 0xd0 0x00 0x01 0xf3 0x32 0x26 0x10 0xba LDA r17, [p2]; MOVXM p6, #509004 + 14714 0x06 0x06 0x56 0x98 LDA r18, [p6] + 14718 0x00 0x00 NOPX + 14720 0x00 0x00 NOPX + 14722 0x00 0x00 NOPX + 14724 0x00 0x00 NOPX + 14726 0x80 0x1c 0xf8 0x40 0x01 0x84 JNZ r16, #14832 +.delay_slot + 14732 0x8c 0x40 0xe9 0xb0 0x01 0x24 ADD r17, r17, #1; ADD.NC r19, r16, #1 +.delay_slot + 14738 0x14 0xa4 0x07 0x18 ADD r18, r18, #1 +.delay_slot + 14742 0x0a 0x06 0x31 0x98 ST r17, [p2] +.delay_slot + 14746 0x0e 0x06 0x51 0x98 ST r18, [p6] +.delay_slot + 14750 0x08 0x06 0x71 0x98 ST r19, [p0] + 14754 0x07 0xf6 0x31 0x18 LDA r17, [sp, #-12] + 14758 0x00 0x00 NOPX + 14760 0x00 0x00 NOPX + 14762 0x00 0x00 NOPX + 14764 0x00 0x00 NOPX + 14766 0x00 0x00 NOPX + 14768 0x00 0x00 NOPX + 14770 0x1e 0x68 0x86 0x18 ADD.NC p6, r17, #12 + 14774 0x06 0xff 0x76 0x98 LDA r27, [p6], #-4 + 14778 0x06 0xfe 0x36 0x98 LDA r17, [p6], #-4 + 14782 0x06 0xfe 0x56 0x98 LDA r18, [p6], #-4 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 14786 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.noswbrkpt + 14788 0x06 0x46 0x36 0x98 LDA r17, [p6, #16] +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 14792 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 14794 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 14796 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 14798 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 14800 0x14 0xa3 0x12 0x18 SEL.EQZ r17, r18, r17, r27 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 14804 0xc0 0xc6 0x3f 0xc1 0xfa 0x5c ST r17, [p6]; MOVX r16, #-1 + 14810 0x00 0x00 NOPX + 14812 0x00 0x00 NOPX + 14814 0x00 0x00 NOPX + 14816 0x00 0x00 NOPX + 14818 0x00 0x2c 0xf0 0x00 0x24 0x53 0x08 0x00 0x34 0xaf 0x00 0x2b 0x60 0x7e NOPA; NOPB; NOPS; ACQ r17, r16; NOPM +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_592 + 14832 0x00 0x00 NOPX + 14834 0x00 0x00 NOPX + 14836 0x00 0x00 NOPX + 14838 0x07 0xf5 0x19 0x18 LDA p2, [sp, #-12] + 14842 0x07 0xe8 0x19 0x18 LDA p0, [sp, #-24] +.no_stack_arguments + 14846 0x00 0x08 0xb8 0x00 0x01 0x04 JL #4464 +.delay_slot + 14852 0x00 0x07 0xc6 0xcc 0x00 0x44 MOVXM p3, #509440 +.delay_slot +.swstall delay_slot + 14858 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14860 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14862 0x00 0x00 NOPX +.delay_slot + 14864 0x00 0x2c 0xf0 0x00 0x26 0x88 0x8b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; MOVS p6, p2; NOPX; NOPM; NOPV +.return_address + 14880 0xfe 0x42 0x20 0x00 0x01 0xf0 0xb2 0x24 0x10 0xba LDA r16, [sp, #-16]; MOVXM p1, #509000 + 14890 0x20 0xc6 0xd0 0x00 0x01 0xf0 0xb2 0x22 0x10 0xba LDA r17, [p1]; MOVXM p1, #508996 + 14900 0x01 0x06 0x56 0x98 LDA r18, [p1] + 14904 0x00 0x00 NOPX + 14906 0x00 0x00 NOPX + 14908 0x00 0x00 NOPX + 14910 0x00 0x00 NOPX + 14912 0x00 0x00 NOPX + 14914 0x00 0x00 NOPX + 14916 0x14 0x63 0x28 0x98 NE r17, r17, r18 + 14920 0x88 0x1d 0xd0 0x40 0x01 0x84 JNZ r17, #15264 +.delay_slot +.swstall delay_slot + 14926 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14928 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14930 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14932 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14934 0x00 0x00 NOPX + 14936 0x08 0x02 0x80 0x3f 0x17 0xe8 0xb4 0x03 0x08 0xba MOVA dj0, #64; MOVX r17, #-1; ADD.NC p1, r16, #12 + 14946 0x3f 0xee 0xd0 0x00 0x01 0xf0 0x32 0x2e 0x10 0xba LDA r27, [p1], #-4; MOVXM p0, #509020 + 14956 0x01 0xfe 0x56 0x98 LDA r18, [p1], #-4 + 14960 0x01 0xfe 0x76 0x98 LDA r19, [p1], #-4 + 14964 0x01 0x56 0x96 0x98 LDA r20, [p1, #20] + 14968 0x00 0x00 NOPX + 14970 0x00 0x00 NOPX + 14972 0x00 0x00 NOPX + 14974 0x00 0x00 NOPX + 14976 0x00 0x00 NOPX + 14978 0x14 0xe5 0x22 0x18 SEL.EQZ r18, r19, r18, r27 + 14982 0x20 0xca 0x30 0x40 0x0a 0x5c ST r18, [p1]; MOVX r16, #1 + 14988 0x00 0x00 NOPX + 14990 0x00 0x00 NOPX + 14992 0x00 0x00 NOPX + 14994 0x00 0x00 NOPX + 14996 0x15 0x13 0x18 0x18 ACQ r20, r17 + 15000 0x00 0x00 NOPX + 15002 0x00 0x00 NOPX + 15004 0x00 0x00 NOPX + 15006 0x00 0x06 0x76 0x98 LDA r19, [p0] + 15010 0x07 0x02 0x56 0x98 LDA r18, [p7, dj0] + 15014 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 15016 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.noswbrkpt + 15018 0x06 0x5c 0x1e 0x98 LDA p0, [p6], #20 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 15022 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 15024 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 15026 0x29 0xc6 0xd0 0x27 0x38 0x6c 0x31 0x60 0x78 0xba LDA r17, [p1], #16; LSHL r19, r19, r16; MOV p0, p1 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 15036 0x14 0xa1 0x07 0x98 EQ r16, r18, r16 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 15040 0x80 0x1d 0x88 0x40 0x01 0x84 JNZ r16, #15120 +.delay_slot +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 15046 0x0f 0x80 0x8b 0x18 MOVS p7, p0 +.delay_slot +.swstall delay_slot + 15050 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15052 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15054 0x00 0x00 NOPX +.delay_slot + 15056 0xfe 0x13 0xb0 0x00 0xb4 0xe2 0xa0 0x02 ST p1, [sp, #-16]; ADD.NC p1, r19, r17 + 15064 0x90 0x1d 0x98 0x40 0x01 0x84 JNZ r18, #15152 +.delay_slot +.swstall delay_slot + 15070 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15072 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15074 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15076 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15078 0x00 0x00 NOPX +.no_stack_arguments + 15080 0x00 0x15 0xf8 0x00 0x01 0x04 JL #11248 +.delay_slot + 15086 0x00 0x07 0xc6 0xca 0x80 0x44 MOVXM p3, #509248 +.delay_slot +.swstall delay_slot + 15092 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15094 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15096 0x00 0x00 NOPX +.delay_slot + 15098 0x00 0x2c 0xf4 0xc1 0x81 0xd4 NOPA; MOV p2, p0 +.return_address + 15104 0x00 0x1d 0x98 0x00 0x00 0x84 J #15152 +.delay_slot +.swstall delay_slot + 15110 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15112 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15114 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15116 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15118 0x00 0x00 NOPX +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_880 +.no_stack_arguments + 15120 0x00 0x16 0x58 0x00 0x01 0x04 JL #11440 +.delay_slot + 15126 0x00 0x07 0xc6 0xcb 0x00 0x44 MOVXM p3, #509312 +.delay_slot + 15132 0x1a 0x60 0xc0 0xf8 MOV p2, p0 +.delay_slot +.swstall delay_slot + 15136 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15138 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15140 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_912 +.return_address + 15152 0x07 0xf0 0x99 0x18 LDA p1, [sp, #-16] + 15156 0xfe 0x83 0x20 0x44 0x0a 0x2c LDA p0, [sp, #-12]; MOVX r17, #1 + 15162 0xe8 0xc2 0xd0 0x00 0x01 0xf3 0xb2 0x24 0x10 0xba LDA r16, [p7, #16]; MOVXM p7, #509000 + 15172 0x00 0x00 NOPX + 15174 0x00 0x00 NOPX + 15176 0x00 0x00 NOPX + 15178 0x00 0x00 NOPX + 15180 0x00 0x00 NOPX + 15182 0x00 0x00 NOPX + 15184 0x14 0x11 0x18 0x18 REL r16, r17 + 15188 0x01 0xf6 0x56 0x98 LDA r18, [p1, #-4] + 15192 0x00 0x56 0x16 0x98 LDA r16, [p0, #20] + 15196 0x00 0x00 NOPX + 15198 0x00 0x00 NOPX + 15200 0x00 0x00 NOPX + 15202 0x00 0x00 NOPX + 15204 0x00 0x00 NOPX + 15206 0x14 0x65 0x21 0x98 SUB r18, r17, r18 + 15210 0x09 0xf6 0x51 0x98 ST r18, [p1, #-4] + 15214 0x00 0x00 NOPX + 15216 0x00 0x00 NOPX + 15218 0x00 0x00 NOPX + 15220 0x00 0x00 NOPX + 15222 0x14 0x11 0x18 0x18 REL r16, r17 + 15226 0x06 0xe6 0x56 0x98 LDA r18, [p6, #-8] + 15230 0x00 0x00 NOPX + 15232 0x00 0x00 NOPX + 15234 0x00 0x1d 0xd8 0x00 0x00 0x84 J #15280 +.delay_slot +.swstall delay_slot + 15240 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15242 0x00 0x00 NOPX +.delay_slot + 15244 0x10 0x20 0x01 0x18 MOVX r16, #0 +.delay_slot + 15248 0xe0 0xc2 0x38 0xc6 0x43 0x5c ST r16, [p7]; SUB r17, r17, r18 +.delay_slot + 15254 0x00 0x2c 0xf6 0xe6 0x31 0x80 0x00 0x00 0x00 0x7a NOPA; ST r17, [p6, #-8]; NOPX +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_1024 + 15264 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x01 0x00 0x08 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVX r16, #0; NOPM; NOPV +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_1040 + 15280 0xfd 0x87 0x20 0x00 0x01 0xf3 0xb2 0x30 0x10 0xba LDA lr, [sp, #-20]; MOVXM p7, #509024 + 15290 0xe0 0xca 0xd0 0x00 0x01 0xf3 0x32 0x20 0x10 0xba LDA r18, [p7]; MOVXM p6, #508992 + 15300 0x06 0x06 0x36 0x98 LDA r17, [p6] + 15304 0x00 0x00 NOPX + 15306 0x00 0x00 NOPX + 15308 0x00 0x00 NOPX + 15310 0x00 0x00 NOPX + 15312 0x00 0x00 NOPX + 15314 0x00 0x00 NOPX + 15316 0x14 0x63 0x28 0x98 NE r17, r17, r18 + 15320 0x88 0x1d 0xf8 0x40 0x01 0x84 JNZ r17, #15344 +.delay_slot + 15326 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8] +.delay_slot +.swstall delay_slot + 15330 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15332 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15334 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15336 0x00 0x00 NOPX + 15338 0x00 0x2c 0xfc 0x0c 0x23 0x0c NOPA; ST r16, [p6] +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_1104 + 15344 0x07 0xff 0x19 0x18 LDA p6, [sp, #-4] + 15348 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 15352 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 15358 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15360 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15362 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15364 0x00 0x00 NOPX +.label _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE__end +.label __Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE___func_end0 + +.text_segment PM 15376 +.label __Z13_b896_wrapperPPv___func_begin0 +.label _Z13_b896_wrapperPPv +.function_start + 15376 0x1a 0x60 0xc0 0xf8 MOV p2, p0 + 15380 0x02 0x1c 0x1e 0x98 LDA p0, [p2], #4 + 15384 0x02 0x2c 0x9e 0x98 LDA p1, [p2], #8 + 15388 0x02 0xf5 0x9e 0x98 LDA p3, [p2, #-4] + 15392 0x02 0x05 0x1e 0x98 LDA p2, [p2] +.tail_call + 15396 0x00 0x0d 0x70 0x00 0x00 0x84 J #6880 +.delay_slot +.swstall delay_slot + 15402 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15404 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15406 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15408 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15410 0x00 0x00 NOPX +.label _Z13_b896_wrapperPPv__end +.label __Z13_b896_wrapperPPv___func_end0 + +.text_segment PM 15424 +.label __Z13_b901_wrapperPPv___func_begin0 +.label _Z13_b901_wrapperPPv +.function_start + 15424 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 15428 0x01 0x2c 0x1e 0x98 LDA p0, [p1], #8 + 15432 0x01 0xf5 0x1e 0x98 LDA p2, [p1, #-4] + 15436 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 15440 0x00 0x10 0x18 0x00 0x00 0x84 J #8240 +.delay_slot +.swstall delay_slot + 15446 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15448 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15450 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15452 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15454 0x00 0x00 NOPX +.label _Z13_b901_wrapperPPv__end +.label __Z13_b901_wrapperPPv___func_end0 +.label __Z13_b906_wrapperPPv___func_begin0 +.label _Z13_b906_wrapperPPv +.function_start + 15456 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 15460 0x01 0x2c 0x1e 0x98 LDA p0, [p1], #8 + 15464 0x01 0xf5 0x1e 0x98 LDA p2, [p1, #-4] + 15468 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 15472 0x00 0x11 0xc8 0x00 0x00 0x84 J #9104 +.delay_slot +.swstall delay_slot + 15478 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15480 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15482 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15484 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15486 0x00 0x00 NOPX +.label _Z13_b906_wrapperPPv__end +.label __Z13_b906_wrapperPPv___func_end0 +.label __Z13_b881_wrapperPPv___func_begin0 +.label _Z13_b881_wrapperPPv +.function_start + 15488 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 15492 0x01 0x2c 0x1e 0x98 LDA p0, [p1], #8 + 15496 0x01 0xf5 0x1e 0x98 LDA p2, [p1, #-4] + 15500 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 15504 0x00 0x14 0x88 0x00 0x00 0x84 J #10512 +.delay_slot +.swstall delay_slot + 15510 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15512 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15514 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15516 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15518 0x00 0x00 NOPX +.label _Z13_b881_wrapperPPv__end +.label __Z13_b881_wrapperPPv___func_end0 +.label __Z13_b891_wrapperPPv___func_begin0 +.label _Z13_b891_wrapperPPv +.function_start + 15520 0x1a 0x60 0xc0 0xf8 MOV p2, p0 + 15524 0x02 0x3c 0x1e 0x98 LDA p0, [p2], #12 + 15528 0x02 0xec 0x9e 0x98 LDA p1, [p2], #-8 + 15532 0x02 0x15 0x9e 0x98 LDA p3, [p2, #4] + 15536 0x02 0x05 0x1e 0x98 LDA p2, [p2] +.tail_call + 15540 0x00 0x16 0xf0 0x00 0x00 0x84 J #11744 +.delay_slot +.swstall delay_slot + 15546 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15548 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15550 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15552 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15554 0x00 0x00 NOPX +.label _Z13_b891_wrapperPPv__end +.label __Z13_b891_wrapperPPv___func_end0 + +.text_segment PM 15568 +.label __Z13_b924_wrapperPPv___func_begin0 +.label _Z13_b924_wrapperPPv +.function_start + 15568 0x1b 0x60 0xc0 0xf8 MOV p3, p0 + 15572 0x03 0x1c 0x1e 0x98 LDA p0, [p3], #4 + 15576 0x03 0x1c 0x9e 0x98 LDA p1, [p3], #4 + 15580 0x03 0x2d 0x1e 0x98 LDA p2, [p3], #8 + 15584 0x03 0xf6 0x1e 0x98 LDA p4, [p3, #-4] + 15588 0x03 0x05 0x9e 0x98 LDA p3, [p3] +.tail_call + 15592 0x00 0x1b 0xd0 0x00 0x00 0x84 J #14240 +.delay_slot +.swstall delay_slot + 15598 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15600 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15602 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15604 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15606 0x00 0x00 NOPX +.label _Z13_b924_wrapperPPv__end +.label __Z13_b924_wrapperPPv___func_end0 + +.text_segment PM 15616 +.label __Z13_b919_wrapperPPv___func_begin0 +.label _Z13_b919_wrapperPPv +.function_start + 15616 0x1a 0x60 0xc0 0xf8 MOV p2, p0 + 15620 0x02 0x1c 0x1e 0x98 LDA p0, [p2], #4 + 15624 0x02 0x2c 0x9e 0x98 LDA p1, [p2], #8 + 15628 0x02 0xf5 0x9e 0x98 LDA p3, [p2, #-4] + 15632 0x02 0x05 0x1e 0x98 LDA p2, [p2] +.tail_call + 15636 0x00 0x1a 0xe0 0x00 0x00 0x84 J #13760 +.delay_slot +.swstall delay_slot + 15642 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15644 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15646 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15648 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 15650 0x00 0x00 NOPX +.label _Z13_b919_wrapperPPv__end +.label __Z13_b919_wrapperPPv___func_end0 + +.text_segment PM 15664 +.label _ZN12me_primitive10udiv_dstepEjjRjS0_ +.function_start + 15664 0x00 0xc0 0x2f 0xa0 0x41 0xe4 MOVX r3, #0; MOV r31, r0 + 15670 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15674 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15678 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15682 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15686 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15690 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15694 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15698 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15702 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15706 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15710 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15714 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15718 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15722 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15726 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15730 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15734 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15738 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15742 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15746 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15750 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15754 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15758 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15762 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15766 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15770 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15774 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15778 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 15782 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 15786 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 15790 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 15794 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 15798 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 15802 0x18 0x9f 0xa0 0xf8 MOV r2, r31 +.label _ZN12me_primitive10udiv_dstepEjjRjS0___end + +.bss_segment DMb 508992 32 + +.data_segment DMb 509024 +.label _ZL8num_iter + 0x1 + 0x0 + 0x0 + 0x0 + +.bss_segment DMb 509028 4 + +.bss_segment DMb 509032 1 + +.rodata_segment DMb 509056 +.label _ZL20g_uniformKernelFuncs + 0x10 + 0x3c + 0x0 + 0x0 + 0x40 + 0x3c + 0x0 + 0x0 + 0x60 + 0x3c + 0x0 + 0x0 + 0x80 + 0x3c + 0x0 + 0x0 + 0xa0 + 0x3c + 0x0 + 0x0 + 0xd0 + 0x3c + 0x0 + 0x0 + 0x0 + 0x3d + 0x0 + 0x0 + +.bss_segment DMb 509120 1024 + +.stack DM_stack 506560 508928 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable7/Release/0_2_reloadable7.map b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable7/Release/0_2_reloadable7.map new file mode 100644 index 0000000000000000000000000000000000000000..b11a3b333f5cabeaaee231f81abbc9a33f2e051a --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable7/Release/0_2_reloadable7.map @@ -0,0 +1,324 @@ + +// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:49:21 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// bridge -o../Release/0_0_reloadable5 ../Release/0_0_reloadable5.o -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/isg -g -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable5.bcf -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork4008 -pme + +// Release: ipp V-2024.06-TGT-241219 + +Memory map for memory 'DM_stack': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 2368 + + 0x0007bac0..0x0007c3ff ( 2368 items) : Stack + +Memory map for memory 'DMb': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 3461 + + 0x00000000..0x0007babf ( 506560 items) : Reserved + 0x0007bac0..0x0007c3ff ( 2368 items) : Stack + 0x0007c400..0x0007c43f ( 64 items) : Reserved + 0x0007c440..0x0007c443 ( 4 items) : ../Release/0_0_reloadable5.o::_ZL9curr_iter (Data, Local, .bss.DMb.4) + 0x0007c444..0x0007c447 ( 4 items) : ../Release/0_0_reloadable5.o::_ZL14num_depth_iter (Data, Local, .bss.DMb.4) + 0x0007c448..0x0007c44b ( 4 items) : ../Release/0_0_reloadable5.o::_ZL10depth_iter (Data, Local, .bss.DMb.4) + 0x0007c44c..0x0007c44f ( 4 items) : ../Release/0_0_reloadable5.o::_ZL11total_iters (Data, Local, .bss.DMb.4) + 0x0007c450..0x0007c453 ( 4 items) : ../Release/0_0_reloadable5.o::_ZL8core_row (Data, Local, .bss.DMb.4) + 0x0007c454..0x0007c457 ( 4 items) : ../Release/0_0_reloadable5.o::_ZL11ifm1_offset (Data, Local, .bss.DMb.4) + 0x0007c458..0x0007c45b ( 4 items) : ../Release/0_0_reloadable5.o::_ZL10ifmsv_size (Data, Local, .bss.DMb.4) + 0x0007c45c..0x0007c45f ( 4 items) : ../Release/0_0_reloadable5.o::_ZL11ifm2_offset (Data, Local, .bss.DMb.4) + 0x0007c460..0x0007c463 ( 4 items) : ../Release/0_0_reloadable5.o::_ZL8num_iter (Data, Local, .data.DMb.4) + 0x0007c464..0x0007c467 ( 4 items) : me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive11control_satE (Data, Global, .bss.DMb.4) + 0x0007c468..0x0007c468 ( 1 items) : me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive11control_rndE (Data, Global, .bss.DMb.1) + 0x0007c480..0x0007c49b ( 28 items) : ../Release/0_0_reloadable5.o::_ZL20g_uniformKernelFuncs (Data, Local, .rodata.DMb.64) + + Called functions : _Z13_b896_wrapperPPv + _Z13_b901_wrapperPPv + _Z13_b906_wrapperPPv + _Z13_b881_wrapperPPv + _Z13_b891_wrapperPPv + _Z13_b924_wrapperPPv + _Z13_b919_wrapperPPv + + 0x0007c4c0..0x0007c4ff ( 64 items) : ../Release/0_0_reloadable5.o::add1d_attribute_broadcasting_params (Data, Global, .bss.DMb.64) + 0x0007c500..0x0007c53f ( 64 items) : ../Release/0_0_reloadable5.o::mul1d_attribute_broadcasting_params (Data, Global, .bss.DMb.64) + 0x0007c540..0x0007c57f ( 64 items) : ../Release/0_0_reloadable5.o::add1d_params (Data, Global, .bss.DMb.64) + 0x0007c580..0x0007c5bf ( 64 items) : ../Release/0_0_reloadable5.o::mul1d_params (Data, Global, .bss.DMb.64) + 0x0007c5c0..0x0007c5ff ( 64 items) : ../Release/0_0_reloadable5.o::clip1d_params (Data, Global, .bss.DMb.64) + 0x0007c600..0x0007c7bf ( 448 items) : ../Release/0_0_reloadable5.o::conv2d_params (Data, Global, .bss.DMb.64) + 0x0007c7c0..0x0007c8bf ( 256 items) : ../Release/0_0_reloadable5.o::conv2d_dw_params (Data, Global, .bss.DMb.64) + 0x0007ccc0..0x000fffff ( 537408 items) : Reserved + +Memory map for memory 'PM': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 13150 + + 0x00000000..0x0000092f ( 2352 items) : Reserved + 0x00000930..0x00000ab5 ( 390 items) : ../Release/0_0_reloadable5.o::_Z13kernelWrapperPPvjjjj (Function, Global, .text) (stack frame size = 64) + + Referenced symbols: _ZL20g_uniformKernelFuncs + + 0x00000ac0..0x00001055 ( 1430 items) : ../Release/0_0_reloadable5.o::_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh (Function, Weak, .text) (stack frame size = 64) + 0x00001060..0x0000116d ( 270 items) : ../Release/0_0_reloadable5.o::_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: _ZN12me_primitive11control_rndE + + 0x00001170..0x00001ad9 ( 2410 items) : ../Release/0_0_reloadable5.o::_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params (Function, Weak, .text) (stack frame size = 128) + + Called functions : _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams + + Referenced symbols: _ZN12me_primitive11control_rndE + + 0x00001ae0..0x00001d17 ( 568 items) : ../Release/0_0_reloadable5.o::_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64) + + Called functions : _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh + _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params + + Referenced symbols: _ZL9curr_iter + conv2d_params + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL14num_depth_iter + _ZL8num_iter + _ZL10depth_iter + _ZL11total_iters + + 0x00001d20..0x00001d37 ( 24 items) : ../Release/0_0_reloadable5.o::_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0) + 0x00001d40..0x00001de1 ( 162 items) : ../Release/0_0_reloadable5.o::_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E + + 0x00001df0..0x00001e27 ( 56 items) : ../Release/0_0_reloadable5.o::_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0) + 0x00001e30..0x00001e6d ( 62 items) : ../Release/0_0_reloadable5.o::_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E + + 0x00001e70..0x00001fa9 ( 314 items) : ../Release/0_0_reloadable5.o::_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: _ZN12me_primitive11control_rndE + + 0x00001fb0..0x00002021 ( 114 items) : ../Release/0_0_reloadable5.o::_ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 128) + + Called functions : _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E + + 0x00002030..0x00002217 ( 488 items) : ../Release/0_0_reloadable5.o::_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64) + + Called functions : _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + add1d_attribute_broadcasting_params + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL11ifm1_offset + _ZL8num_iter + + 0x00002220..0x00002283 ( 100 items) : ../Release/0_0_reloadable5.o::_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 0) + 0x00002290..0x00002381 ( 242 items) : ../Release/0_0_reloadable5.o::_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E (Function, Weak, .text) (stack frame size = 0) + 0x00002390..0x00002577 ( 488 items) : ../Release/0_0_reloadable5.o::_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64) + + Called functions : _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv + _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + clip1d_params + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL11ifm1_offset + _ZL8num_iter + + 0x00002580..0x000025f3 ( 116 items) : ../Release/0_0_reloadable5.o::_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 0) + 0x00002600..0x00002649 ( 74 items) : ../Release/0_0_reloadable5.o::_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv + + 0x00002650..0x00002865 ( 534 items) : ../Release/0_0_reloadable5.o::_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE (Function, Local, .text) (stack frame size = 128) + + Referenced symbols: _ZN12me_primitive11control_rndE + + 0x00002870..0x00002905 ( 150 items) : ../Release/0_0_reloadable5.o::_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE + + 0x00002910..0x00002af7 ( 488 items) : ../Release/0_0_reloadable5.o::_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64) + + Called functions : _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv + _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + mul1d_attribute_broadcasting_params + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL11ifm1_offset + _ZL8num_iter + + 0x00002b00..0x00002b73 ( 116 items) : ../Release/0_0_reloadable5.o::_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv (Function, Weak, .text) (stack frame size = 0) + 0x00002b80..0x00002be1 ( 98 items) : ../Release/0_0_reloadable5.o::_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv + + 0x00002bf0..0x00002bff ( 16 items) : ../Release/0_0_reloadable5.o::_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0) + + Called functions : _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE + + 0x00002c00..0x00002c17 ( 24 items) : ../Release/0_0_reloadable5.o::_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E (Function, Weak, .text) (stack frame size = 0) + 0x00002c20..0x00002ca9 ( 138 items) : ../Release/0_0_reloadable5.o::_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E + + 0x00002cb0..0x00002dd3 ( 292 items) : ../Release/0_0_reloadable5.o::_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: _ZN12me_primitive11control_rndE + + 0x00002de0..0x00003039 ( 602 items) : ../Release/0_0_reloadable5.o::_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE (Function, Global, .text) (stack frame size = 64) + + Called functions : _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + mul1d_params + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL11ifm1_offset + _ZL11ifm2_offset + _ZL8num_iter + + 0x00003040..0x000032df ( 672 items) : ../Release/0_0_reloadable5.o::_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh (Function, Local, .text) (stack frame size = 64) + + Called functions : _ZN12me_primitive10udiv_dstepEjjRjS0_ + + Referenced symbols: conv2d_dw_params + _ZN12me_primitive11control_rndE + + 0x000032e0..0x000035b1 ( 722 items) : ../Release/0_0_reloadable5.o::_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: _ZN12me_primitive11control_rndE + + 0x000035c0..0x0000379d ( 478 items) : ../Release/0_0_reloadable5.o::_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 128) + + Called functions : _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh + _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL8num_iter + _ZL10ifmsv_size + conv2d_dw_params + + 0x000037a0..0x00003c05 ( 1126 items) : ../Release/0_0_reloadable5.o::_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE (Function, Global, .text) (stack frame size = 64) + + Called functions : _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh + _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv + _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params + _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + conv2d_params + add1d_params + mul1d_params + _ZL14num_depth_iter + _ZL11ifm2_offset + _ZL8num_iter + _ZL10depth_iter + _ZL11total_iters + + 0x00003c10..0x00003c33 ( 36 items) : ../Release/0_0_reloadable5.o::_Z13_b896_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + + 0x00003c40..0x00003c5f ( 32 items) : ../Release/0_0_reloadable5.o::_Z13_b901_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + + 0x00003c60..0x00003c7f ( 32 items) : ../Release/0_0_reloadable5.o::_Z13_b906_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + + 0x00003c80..0x00003c9f ( 32 items) : ../Release/0_0_reloadable5.o::_Z13_b881_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + + 0x00003ca0..0x00003cc3 ( 36 items) : ../Release/0_0_reloadable5.o::_Z13_b891_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE + + 0x00003cd0..0x00003cf7 ( 40 items) : ../Release/0_0_reloadable5.o::_Z13_b924_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE + + 0x00003d00..0x00003d23 ( 36 items) : ../Release/0_0_reloadable5.o::_Z13_b919_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + + 0x00003d30..0x00003dbd ( 142 items) : me_div.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive10udiv_dstepEjjRjS0_ (Function, Global, .text) (stack frame size = 0) + +External symbols: + + __dso_handle = 0x0 + _ctors_end = 0x0 + _ctors_start = 0x0 + _dtors_end = 0x0 + _dtors_start = 0x0 + _pc_end = 0x3dbe + _pc_start = 0x930 + _sp_end_DM_stack = 0x7c400 + _sp_start_DM_stack = 0x7bac0 + +Section summary for memory 'DM_stack': + + .stack File + ---------- ---------- + 2368 + ---------- ---------- + 2368 Total + +Section summary for memory 'DMb': + + .bss .data .rodata File + ---------- ---------- ---------- ---------- + 1056 4 28 ../Release/0_0_reloadable5.o + 5 0 0 me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a) + ---------- ---------- ---------- ---------- + 1061 4 28 Total + +Section summary for memory 'PM': + + .text File + ---------- ---------- + 13008 ../Release/0_0_reloadable5.o + 142 me_div.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a) + ---------- ---------- + 13150 Total + +File summary: + +../Release/0_0_reloadable5.o + DMb 1088 + PM 13008 + +me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a) + DMb 5 + +me_div.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a) + PM 142 + diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable7/Release/0_2_reloadable7.sdr b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable7/Release/0_2_reloadable7.sdr new file mode 100644 index 0000000000000000000000000000000000000000..029eac6b3129d1ccada1bf5bd7decb96296f96f7 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable7/Release/0_2_reloadable7.sdr @@ -0,0 +1,129 @@ + +// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:49:21 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// bridge -o../Release/0_0_reloadable5 ../Release/0_0_reloadable5.o -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/isg -g -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable5.bcf -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork4008 -pme + +// Release: ipp V-2024.06-TGT-241219 + +// Symbols in memory 'DM_bankA': +// Symbols in memory 'DM_bankAB': +// Symbols in memory 'DM_bankAC': +// Symbols in memory 'DM_bankAD': +// Symbols in memory 'DM_bankB': +// Symbols in memory 'DM_bankBC': +// Symbols in memory 'DM_bankBD': +// Symbols in memory 'DM_bankC': +// Symbols in memory 'DM_bankCD': +// Symbols in memory 'DM_bankD': +// Symbols in memory 'DM_stack': +// Symbols in memory 'DM_test': +// Symbols in memory 'DMb': +_symbol _ZN12me_primitive11control_satE 0x0007c464 +_symbol _ZN12me_primitive11control_rndE 0x0007c468 +_symbol add1d_attribute_broadcasting_params 0x0007c4c0 +_symbol mul1d_attribute_broadcasting_params 0x0007c500 +_symbol add1d_params 0x0007c540 +_symbol mul1d_params 0x0007c580 +_symbol clip1d_params 0x0007c5c0 +_symbol conv2d_params 0x0007c600 +_symbol conv2d_dw_params 0x0007c7c0 +// Symbols in memory 'DMh': +// Symbols in memory 'DMh_bankA': +// Symbols in memory 'DMh_bankAB': +// Symbols in memory 'DMh_bankAC': +// Symbols in memory 'DMh_bankAD': +// Symbols in memory 'DMh_bankB': +// Symbols in memory 'DMh_bankBC': +// Symbols in memory 'DMh_bankBD': +// Symbols in memory 'DMh_bankC': +// Symbols in memory 'DMh_bankCD': +// Symbols in memory 'DMh_bankD': +// Symbols in memory 'DMh_stack': +// Symbols in memory 'DMs': +// Symbols in memory 'DMs_bankA': +// Symbols in memory 'DMs_bankAB': +// Symbols in memory 'DMs_bankAC': +// Symbols in memory 'DMs_bankAD': +// Symbols in memory 'DMs_bankB': +// Symbols in memory 'DMs_bankBC': +// Symbols in memory 'DMs_bankBD': +// Symbols in memory 'DMs_bankC': +// Symbols in memory 'DMs_bankCD': +// Symbols in memory 'DMs_bankD': +// Symbols in memory 'DMs_stack': +// Symbols in memory 'DMv': +// Symbols in memory 'DMv_bankA': +// Symbols in memory 'DMv_bankAB': +// Symbols in memory 'DMv_bankAC': +// Symbols in memory 'DMv_bankAD': +// Symbols in memory 'DMv_bankB': +// Symbols in memory 'DMv_bankBC': +// Symbols in memory 'DMv_bankBD': +// Symbols in memory 'DMv_bankC': +// Symbols in memory 'DMv_bankCD': +// Symbols in memory 'DMv_bankD': +// Symbols in memory 'DMv_stack': +// Symbols in memory 'DMw': +// Symbols in memory 'DMw_bankA': +// Symbols in memory 'DMw_bankAB': +// Symbols in memory 'DMw_bankAC': +// Symbols in memory 'DMw_bankAD': +// Symbols in memory 'DMw_bankB': +// Symbols in memory 'DMw_bankBC': +// Symbols in memory 'DMw_bankBD': +// Symbols in memory 'DMw_bankC': +// Symbols in memory 'DMw_bankCD': +// Symbols in memory 'DMw_bankD': +// Symbols in memory 'DMw_stack': +// Symbols in memory 'DMx': +// Symbols in memory 'DMx_bankA': +// Symbols in memory 'DMx_bankAB': +// Symbols in memory 'DMx_bankAC': +// Symbols in memory 'DMx_bankAD': +// Symbols in memory 'DMx_bankB': +// Symbols in memory 'DMx_bankBC': +// Symbols in memory 'DMx_bankBD': +// Symbols in memory 'DMx_bankC': +// Symbols in memory 'DMx_bankCD': +// Symbols in memory 'DMx_bankD': +// Symbols in memory 'DMx_stack': +// Symbols in memory 'PM': +_symbol _Z13kernelWrapperPPvjjjj 0x00000930 +_symbol _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh 0x00000ac0 +_symbol _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams 0x00001060 +_symbol _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params 0x00001170 +_symbol _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00001ae0 +_symbol _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E 0x00001d20 +_symbol _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv 0x00001d40 +_symbol _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E 0x00001df0 +_symbol _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv 0x00001e30 +_symbol _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E 0x00001e70 +_symbol _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E 0x00001fb0 +_symbol _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00002030 +_symbol _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv 0x00002220 +_symbol _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E 0x00002290 +_symbol _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00002390 +_symbol _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv 0x00002580 +_symbol _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv 0x00002600 +_symbol _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E 0x00002870 +_symbol _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00002910 +_symbol _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv 0x00002b00 +_symbol _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv 0x00002b80 +_symbol _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E 0x00002bf0 +_symbol _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E 0x00002c00 +_symbol _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv 0x00002c20 +_symbol _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E 0x00002cb0 +_symbol _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE 0x00002de0 +_symbol _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params 0x000032e0 +_symbol _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x000035c0 +_symbol _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE 0x000037a0 +_symbol _Z13_b896_wrapperPPv 0x00003c10 +_symbol _Z13_b901_wrapperPPv 0x00003c40 +_symbol _Z13_b906_wrapperPPv 0x00003c60 +_symbol _Z13_b881_wrapperPPv 0x00003c80 +_symbol _Z13_b891_wrapperPPv 0x00003ca0 +_symbol _Z13_b924_wrapperPPv 0x00003cd0 +_symbol _Z13_b919_wrapperPPv 0x00003d00 +_symbol _ZN12me_primitive10udiv_dstepEjjRjS0_ 0x00003d30 +// Symbols in memory 'PMw': +// Symbols in memory 'TM4': diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable7/Release/0_2_reloadable7.srv b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable7/Release/0_2_reloadable7.srv new file mode 100644 index 0000000000000000000000000000000000000000..cc24263e196c609ab062129e37812e382b48d43f --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable7/Release/0_2_reloadable7.srv @@ -0,0 +1,19187 @@ + +// File generated by darts version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:49:22 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// darts -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -d -h -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable5 me + +// Release: ipp V-2024.06-TGT-241219 +.label __Z13kernelWrapperPPvjjjj___func_begin0 +.label _Z13kernelWrapperPPvjjjj +.function kernelWrapper _Z13kernelWrapperPPvjjjj +.src_ref 0 "0_0_reloadable5.cc" 94 first +.src_ref 0 "0_0_reloadable5.cc" 96 60 first +.src_ref 0 "0_0_reloadable5.cc" 96 110 +.src_ref 0 "0_0_reloadable5.cc" 98 110 +.src_ref 1 "io_buffer_compiler.h" 606 24 +.function_start + 2352 "11010100" // LDA r17, [p0]; MOV r2, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2353 "01000001" // /* MW 5 */ + 2354 "00100001" // /* MW 4 */ + 2355 "11010001" // /* MW 3 */ + 2356 "11000110" // /* MW 2 */ + 2357 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 94 + 2358 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2359 "00000001" // /* MW 5 */ + 2360 "00000000" // /* MW 4 */ + 2361 "00000000" // /* MW 3 */ + 2362 "00001000" // /* MW 2 */ + 2363 "00000000" // /* MW 1 */ + 2364 "00000010" // ST p7, [sp, #-12]; MOV r1, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2365 "01110000" // /* MW 7 */ + 2366 "11010000" // /* MW 6 */ + 2367 "00101011" // /* MW 5 */ + 2368 "00000000" // /* MW 4 */ + 2369 "10110000" // /* MW 3 */ + 2370 "11110011" // /* MW 2 */ + 2371 "11111110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 96 110 +.src_ref 0 "0_0_reloadable5.cc" 98 110 +.src_ref 1 "io_buffer_compiler.h" 606 24 + 2372 "00000010" // ST lr, [sp, #-4]; MOV r15, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2373 "01110000" // /* MW 7 */ + 2374 "10010000" // /* MW 6 */ + 2375 "11101000" // /* MW 5 */ + 2376 "00000001" // /* MW 4 */ + 2377 "10110000" // /* MW 3 */ + 2378 "10000111" // /* MW 2 */ + 2379 "11111111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 96 110 first + 2380 "01011100" // ST r1, [sp, #-8]; NEZ r16, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2381 "11100000" // /* MW 5 */ + 2382 "11000001" // /* MW 4 */ + 2383 "10110111" // /* MW 3 */ + 2384 "00000110" // /* MW 2 */ + 2385 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 + 2386 "11111000" // MOV r26, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2387 "00100000" // /* MW 3 */ + 2388 "10011000" // /* MW 2 */ + 2389 "00011110" // /* MW 1 */ + 2390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2391 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 first + 2392 "00011000" // ADD.NC p7, r17, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2393 "10000010" // /* MW 3 */ + 2394 "01101000" // /* MW 2 */ + 2395 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 + 2396 "10011000" // LDA r17, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2397 "00110110" // /* MW 3 */ + 2398 "00011110" // /* MW 2 */ + 2399 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 36 + 2400 "10011000" // LDA r19, [p7], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2401 "01110110" // /* MW 3 */ + 2402 "00111110" // /* MW 2 */ + 2403 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 46 + 2404 "10011000" // LDA r18, [p7], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2405 "01010110" // /* MW 3 */ + 2406 "11101110" // /* MW 2 */ + 2407 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 60 + 2408 "10011000" // LDA r27, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2409 "01110110" // /* MW 3 */ + 2410 "00000111" // /* MW 2 */ + 2411 "00000111" // /* MW 1 */ + 2412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2413 "00000000" // /* MW 1 */ + 2414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2415 "00000000" // /* MW 1 */ + 2416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2417 "00000000" // /* MW 1 */ + 2418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2419 "00000000" // /* MW 1 */ + 2420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2421 "00000000" // /* MW 1 */ + 2422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2423 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 195 30 first +.src_ref 1 "io_buffer_compiler.h" 195 37 first + 2424 "00011000" // SEL.EQZ r17, r17, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2425 "00110010" // /* MW 3 */ + 2426 "01100011" // /* MW 2 */ + 2427 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 194 23 first + 2428 "10011000" // ST r17, [p7, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2429 "00110001" // /* MW 3 */ + 2430 "11010110" // /* MW 2 */ + 2431 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 +.src_ref 1 "io_buffer_main.h" 410 8 + 2432 "00011000" // MOVX r17, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2433 "11111101" // /* MW 3 */ + 2434 "11100010" // /* MW 2 */ + 2435 "00010111" // /* MW 1 */ + 2436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2437 "00000000" // /* MW 1 */ + 2438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2439 "00000000" // /* MW 1 */ + 2440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2441 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 first + 2442 "00011000" // ACQ.COND r18, r17, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2443 "00011000" // /* MW 3 */ + 2444 "10010111" // /* MW 2 */ + 2445 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 98 60 +.src_ref 0 "0_0_reloadable5.cc" 102 7 + 2446 "00011000" // MOVX r18, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2447 "00001001" // /* MW 3 */ + 2448 "00100100" // /* MW 2 */ + 2449 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 98 60 first + 2450 "10011000" // LSHL r20, r16, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2451 "00101101" // /* MW 3 */ + 2452 "00101001" // /* MW 2 */ + 2453 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 98 60 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 2454 "11111000" // MOV dj0, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2455 "00100000" // /* MW 3 */ + 2456 "10001010" // /* MW 2 */ + 2457 "00011000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 98 60 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 2458 "00001100" // LDA r19, [p0, dj0]; ST dj0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2459 "10001011" // /* MW 5 */ + 2460 "11011000" // /* MW 4 */ + 2461 "11011111" // /* MW 3 */ + 2462 "01001110" // /* MW 2 */ + 2463 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2465 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2467 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2469 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2471 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 98 110 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2472 "00011000" // MOVX r19, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2473 "00000101" // /* MW 3 */ + 2474 "00100110" // /* MW 2 */ + 2475 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 98 110 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2476 "10011000" // LTU r26, r19, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2477 "11111100" // /* MW 3 */ + 2478 "11110100" // /* MW 2 */ + 2479 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 first + 2480 "00000010" // ST r26, [sp, #-16]; ADD.NC p7, r19, #4 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2481 "00000000" // /* MW 7 */ + 2482 "11000001" // /* MW 6 */ + 2483 "10110100" // /* MW 5 */ + 2484 "00000011" // /* MW 4 */ + 2485 "10110000" // /* MW 3 */ + 2486 "01101010" // /* MW 2 */ + 2487 "11111110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 + 2488 "10011000" // LDA r19, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2489 "01110110" // /* MW 3 */ + 2490 "00011110" // /* MW 2 */ + 2491 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 36 + 2492 "10011000" // LDA r21, [p7], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2493 "10110110" // /* MW 3 */ + 2494 "00111110" // /* MW 2 */ + 2495 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 46 + 2496 "10011000" // LDA r20, [p7], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2497 "10010110" // /* MW 3 */ + 2498 "11101110" // /* MW 2 */ + 2499 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 60 + 2500 "10011000" // LDA r27, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2501 "01110110" // /* MW 3 */ + 2502 "00000111" // /* MW 2 */ + 2503 "00000111" // /* MW 1 */ + 2504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2505 "00000000" // /* MW 1 */ + 2506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2507 "00000000" // /* MW 1 */ + 2508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2509 "00000000" // /* MW 1 */ + 2510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2511 "00000000" // /* MW 1 */ + 2512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2513 "00000000" // /* MW 1 */ + 2514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2515 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 195 30 first +.src_ref 1 "io_buffer_compiler.h" 195 37 first + 2516 "00011000" // SEL.EQZ r19, r19, r21, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2517 "01010010" // /* MW 3 */ + 2518 "11100111" // /* MW 2 */ + 2519 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 194 23 first + 2520 "10011000" // ST r19, [p7, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2521 "01110001" // /* MW 3 */ + 2522 "11010110" // /* MW 2 */ + 2523 "00001111" // /* MW 1 */ + 2524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2525 "00000000" // /* MW 1 */ + 2526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2527 "00000000" // /* MW 1 */ + 2528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2529 "00000000" // /* MW 1 */ + 2530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2531 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 first + 2532 "00011000" // ACQ.COND r20, r17, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2533 "00011000" // /* MW 3 */ + 2534 "00010111" // /* MW 2 */ + 2535 "00010101" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 102 7 first + 2536 "10011000" // LSHL r17, r0, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2537 "00101101" // /* MW 3 */ + 2538 "00100011" // /* MW 2 */ + 2539 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 102 7 + 2540 "11111000" // MOV dj0, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2541 "10100000" // /* MW 3 */ + 2542 "10001000" // /* MW 2 */ + 2543 "00011000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 102 7 + 2544 "01000100" // MOVXM p7, #509056 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2545 "00000000" // /* MW 5 */ + 2546 "11001001" // /* MW 4 */ + 2547 "11001110" // /* MW 3 */ + 2548 "00000111" // /* MW 2 */ + 2549 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 102 7 + 2550 "00001100" // LDA p1, [p7, dj0]; ST r16, [sp, #-24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2551 "00101011" // /* MW 5 */ + 2552 "11010100" // /* MW 4 */ + 2553 "11011111" // /* MW 3 */ + 2554 "00010011" // /* MW 2 */ + 2555 "11100000" // /* MW 1 */ + 2556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2557 "00000000" // /* MW 1 */ + 2558 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2559 "00000000" // /* MW 1 */ + 2560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2561 "00000000" // /* MW 1 */ + 2562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2563 "00000000" // /* MW 1 */ + 2564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2565 "00000000" // /* MW 1 */ + 2566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2567 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 102 4 +.no_stack_arguments + 2568 "00011000" // JL p1 /* MW 4 */ /* control_operation: words=4 call unconditional cycles_taken=1 indirect absolute delay_slots=5 */ + 2569 "01000000" // /* MW 3 */ + 2570 "00110000" // /* MW 2 */ + 2571 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 105 60 +.src_ref 0 "0_0_reloadable5.cc" 107 60 +.delay_slot + 2572 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2573 "11000000" // /* MW 3 */ + 2574 "01100000" // /* MW 2 */ + 2575 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2577 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2579 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2581 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2582 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2583 "01111110" // /* MW 9 */ + 2584 "10100101" // /* MW 8 */ + 2585 "00000001" // /* MW 7 */ + 2586 "00000000" // /* MW 6 */ + 2587 "00010000" // /* MW 5 */ + 2588 "00000000" // /* MW 4 */ + 2589 "11110000" // /* MW 3 */ + 2590 "00101100" // /* MW 2 */ + 2591 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 105 60 first +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_main.h" 440 8 +.src_ref 1 "io_buffer_main.h" 440 8 +.return_address + 2592 "00101100" // LDA r17, [p7]; MOVX r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2593 "00001010" // /* MW 5 */ + 2594 "01000000" // /* MW 4 */ + 2595 "11010000" // /* MW 3 */ + 2596 "11000110" // /* MW 2 */ + 2597 "11100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 + 2598 "00011000" // LDA r26, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2599 "01010001" // /* MW 3 */ + 2600 "11101011" // /* MW 2 */ + 2601 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 107 60 + 2602 "00011000" // LDA dj0, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2603 "01000001" // /* MW 3 */ + 2604 "11101100" // /* MW 2 */ + 2605 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_main.h" 440 8 + 2606 "00011000" // LDA el0, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2607 "00101001" // /* MW 3 */ + 2608 "11110000" // /* MW 2 */ + 2609 "00000111" // /* MW 1 */ + 2610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2611 "00000000" // /* MW 1 */ + 2612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2613 "00000000" // /* MW 1 */ + 2614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2615 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 first + 2616 "00011000" // ADD.NC p1, r17, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2617 "10001000" // /* MW 3 */ + 2618 "01101000" // /* MW 2 */ + 2619 "00011001" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 + 2620 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2621 "00110110" // /* MW 3 */ + 2622 "00000110" // /* MW 2 */ + 2623 "00000001" // /* MW 1 */ + 2624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2625 "00000000" // /* MW 1 */ + 2626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2627 "00000000" // /* MW 1 */ + 2628 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2629 "00000000" // /* MW 1 */ + 2630 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2631 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 2632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2633 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 2634 "11111000" // MOV r26, el0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2635 "00011100" // /* MW 3 */ + 2636 "10100000" // /* MW 2 */ + 2637 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2638 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2639 "00001000" // /* MW 3 */ + 2640 "01010101" // /* MW 2 */ + 2641 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_compiler.h" 606 24 first + 2642 "11010100" // LDA r17, [p1, #-4]; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2643 "01000001" // /* MW 5 */ + 2644 "10101111" // /* MW 4 */ + 2645 "11011101" // /* MW 3 */ + 2646 "11000110" // /* MW 2 */ + 2647 "00111110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 107 60 first + 2648 "10011000" // LDA r18, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2649 "01010110" // /* MW 3 */ + 2650 "00000010" // /* MW 2 */ + 2651 "00000111" // /* MW 1 */ + 2652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2653 "00000000" // /* MW 1 */ + 2654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2655 "00000000" // /* MW 1 */ + 2656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2657 "00000000" // /* MW 1 */ + 2658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2659 "00000000" // /* MW 1 */ + 2660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2661 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 first + 2662 "10011000" // SUB r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2663 "00010001" // /* MW 3 */ + 2664 "00100111" // /* MW 2 */ + 2665 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 first +.src_ref 1 "io_buffer_compiler.h" 606 24 + 2666 "00100100" // SEL.EQZ r17, r17, r19, r27; ADD.NC p0, r18, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2667 "00010000" // /* MW 5 */ + 2668 "11010010" // /* MW 4 */ + 2669 "01000000" // /* MW 3 */ + 2670 "01100110" // /* MW 2 */ + 2671 "10001100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 +.src_ref 1 "io_buffer_compiler.h" 606 22 first + 2672 "00001100" // LDA r17, [p0]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2673 "01100011" // /* MW 5 */ + 2674 "11101100" // /* MW 4 */ + 2675 "11010011" // /* MW 3 */ + 2676 "11000110" // /* MW 2 */ + 2677 "00000000" // /* MW 1 */ + 2678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2679 "00000000" // /* MW 1 */ + 2680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2681 "00000000" // /* MW 1 */ + 2682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2683 "00000000" // /* MW 1 */ + 2684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2685 "00000000" // /* MW 1 */ + 2686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2687 "00000000" // /* MW 1 */ + 2688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2689 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 first + 2690 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2691 "00001000" // /* MW 3 */ + 2692 "01010101" // /* MW 2 */ + 2693 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 110 + 2694 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2695 "00111001" // /* MW 3 */ + 2696 "11111100" // /* MW 2 */ + 2697 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 first + 2698 "10011000" // LDA r17, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2699 "00110110" // /* MW 3 */ + 2700 "11110110" // /* MW 2 */ + 2701 "00000000" // /* MW 1 */ + 2702 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2703 "10011001" // /* MW 3 */ + 2704 "11110111" // /* MW 2 */ + 2705 "00000111" // /* MW 1 */ + 2706 "00011000" // LDA r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2707 "11110001" // /* MW 3 */ + 2708 "11111001" // /* MW 2 */ + 2709 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 110 first + 2710 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2711 "00000001" // /* MW 5 */ + 2712 "00000000" // /* MW 4 */ + 2713 "00000000" // /* MW 3 */ + 2714 "11111000" // /* MW 2 */ + 2715 "11111111" // /* MW 1 */ + 2716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2717 "00000000" // /* MW 1 */ + 2718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2719 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 110 + 2720 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 2721 "00000000" // /* MW 3 */ + 2722 "00101000" // /* MW 2 */ + 2723 "00010000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.delay_slot + 2724 "11111000" // MOV r27, el0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2725 "00011100" // /* MW 3 */ + 2726 "11100000" // /* MW 2 */ + 2727 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 first +.delay_slot + 2728 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2729 "00010001" // /* MW 3 */ + 2730 "00100001" // /* MW 2 */ + 2731 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.delay_slot + 2732 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2733 "00000010" // /* MW 3 */ + 2734 "01100001" // /* MW 2 */ + 2735 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 22 +.delay_slot + 2736 "10011000" // ST r16, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2737 "00010001" // /* MW 3 */ + 2738 "11110110" // /* MW 2 */ + 2739 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13kernelWrapperPPvjjjj__end +.label __Z13kernelWrapperPPvjjjj___func_end0 + 2741 "00000000" // /* MW 1 */ +.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_begin0 +.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh +.function setup_conv2d_bf16_params _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh +.src_ref 2 "conv2d_bf16_params.h" 432 first +.src_ref 2 "conv2d_bf16_params.h" 438 17 first +.src_ref 2 "conv2d_bf16_params.h" 452 40 +.src_ref 2 "conv2d_bf16_params.h" 453 40 +.src_ref 2 "conv2d_bf16_params.h" 458 36 +.src_ref 2 "conv2d_bf16_params.h" 470 11 +.function_start + 2752 "10111010" // LDA el0, [p0], #4; MOVX r4, #4; MOV r2, p1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2753 "01111000" // /* MW 9 */ + 2754 "01100000" // /* MW 8 */ + 2755 "01001001" // /* MW 7 */ + 2756 "10001000" // /* MW 6 */ + 2757 "01000000" // /* MW 5 */ + 2758 "00000000" // /* MW 4 */ + 2759 "11010000" // /* MW 3 */ + 2760 "10000101" // /* MW 2 */ + 2761 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 +.src_ref 2 "conv2d_bf16_params.h" 438 17 first +.src_ref 2 "conv2d_bf16_params.h" 452 40 +.src_ref 2 "conv2d_bf16_params.h" 462 7 + 2762 "10111010" // LDA eh0, [p0], #4; MOVX r5, #-1; ADD.NC p2, r2, #9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2763 "01001000" // /* MW 9 */ + 2764 "10000010" // /* MW 8 */ + 2765 "00110000" // /* MW 7 */ + 2766 "11101001" // /* MW 6 */ + 2767 "01010111" // /* MW 5 */ + 2768 "00111110" // /* MW 4 */ + 2769 "11010000" // /* MW 3 */ + 2770 "10000001" // /* MW 2 */ + 2771 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 432 +.src_ref 2 "conv2d_bf16_params.h" 444 52 + 2772 "10111010" // MOVA r1, #-4; PADDXM [sp], #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2773 "01110000" // /* MW 9 */ + 2774 "00000000" // /* MW 8 */ + 2775 "00000000" // /* MW 7 */ + 2776 "00000000" // /* MW 6 */ + 2777 "00000010" // /* MW 5 */ + 2778 "00000000" // /* MW 4 */ + 2779 "00000000" // /* MW 3 */ + 2780 "10000001" // /* MW 2 */ + 2781 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 453 40 +.src_ref 2 "conv2d_bf16_params.h" 458 30 +.src_ref 2 "conv2d_bf16_params.h" 458 30 + 2782 "01110110" // MOVA r6, #12; ST r13, [sp, #-4]; MOVX r16, #1; MOV m0, #16 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 2783 "01011000" // /* MW 11 */ + 2784 "00010000" // /* MW 10 */ + 2785 "00000000" // /* MW 9 */ + 2786 "00101000" // /* MW 8 */ + 2787 "00000000" // /* MW 7 */ + 2788 "10000001" // /* MW 6 */ + 2789 "10110101" // /* MW 5 */ + 2790 "11111101" // /* MW 4 */ + 2791 "00000111" // /* MW 3 */ + 2792 "10000110" // /* MW 2 */ + 2793 "00000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 444 52 +.src_ref 2 "conv2d_bf16_params.h" 470 11 +.src_ref 2 "conv2d_bf16_params.h" 477 40 +.src_ref 2 "conv2d_bf16_params.h" 557 34 + 2794 "01110110" // MOVA r3, #3; ST r14, [sp, #-8]; MOVX r21, #-3; MOV r20, #15 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 2795 "01011000" // /* MW 11 */ + 2796 "00001111" // /* MW 10 */ + 2797 "10001000" // /* MW 9 */ + 2798 "10101010" // /* MW 8 */ + 2799 "01010111" // /* MW 7 */ + 2800 "10111111" // /* MW 6 */ + 2801 "11010101" // /* MW 5 */ + 2802 "11111001" // /* MW 4 */ + 2803 "00000111" // /* MW 3 */ + 2804 "01100011" // /* MW 2 */ + 2805 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 452 40 +.src_ref 2 "conv2d_bf16_params.h" 458 36 +.src_ref 2 "conv2d_bf16_params.h" 462 7 + 2806 "01011100" // ST r15, [sp, #-12]; MOVX r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2807 "00000010" // /* MW 5 */ + 2808 "01100000" // /* MW 4 */ + 2809 "10110000" // /* MW 3 */ + 2810 "10111110" // /* MW 2 */ + 2811 "11111110" // /* MW 1 */ + 2812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2813 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2814 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2815 "00101001" // /* MW 3 */ + 2816 "00011100" // /* MW 2 */ + 2817 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2818 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2819 "00001001" // /* MW 3 */ + 2820 "00011100" // /* MW 2 */ + 2821 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2822 "10011000" // LDA el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2823 "00101110" // /* MW 3 */ + 2824 "00011100" // /* MW 2 */ + 2825 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2826 "10011000" // LDA eh0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2827 "00001110" // /* MW 3 */ + 2828 "00011100" // /* MW 2 */ + 2829 "00000000" // /* MW 1 */ + 2830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2831 "00000000" // /* MW 1 */ + 2832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2833 "00000000" // /* MW 1 */ + 2834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2835 "00000000" // /* MW 1 */ + 2836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2837 "00000000" // /* MW 1 */ + 2838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2839 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2840 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2841 "00101001" // /* MW 3 */ + 2842 "00011100" // /* MW 2 */ + 2843 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2844 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2845 "00001001" // /* MW 3 */ + 2846 "00011100" // /* MW 2 */ + 2847 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2848 "10011000" // LDA el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2849 "00101110" // /* MW 3 */ + 2850 "00011100" // /* MW 2 */ + 2851 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2852 "10011000" // LDA eh0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2853 "00001110" // /* MW 3 */ + 2854 "00011100" // /* MW 2 */ + 2855 "00000000" // /* MW 1 */ + 2856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2857 "00000000" // /* MW 1 */ + 2858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2859 "00000000" // /* MW 1 */ + 2860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2861 "00000000" // /* MW 1 */ + 2862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2863 "00000000" // /* MW 1 */ + 2864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2865 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2866 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2867 "00101001" // /* MW 3 */ + 2868 "00011100" // /* MW 2 */ + 2869 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2870 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2871 "00001001" // /* MW 3 */ + 2872 "00011100" // /* MW 2 */ + 2873 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2874 "10011000" // LDA eh0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2875 "00001110" // /* MW 3 */ + 2876 "00000100" // /* MW 2 */ + 2877 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2878 "10011000" // LDA el0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2879 "00101110" // /* MW 3 */ + 2880 "00010100" // /* MW 2 */ + 2881 "00000000" // /* MW 1 */ + 2882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2883 "00000000" // /* MW 1 */ + 2884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2885 "00000000" // /* MW 1 */ + 2886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2887 "00000000" // /* MW 1 */ + 2888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2889 "00000000" // /* MW 1 */ + 2890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2891 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2892 "10011000" // ST eh0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2893 "00001001" // /* MW 3 */ + 2894 "00000100" // /* MW 2 */ + 2895 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2896 "10011000" // ST el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2897 "00101001" // /* MW 3 */ + 2898 "00010100" // /* MW 2 */ + 2899 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 444 40 first + 2900 "10011000" // LDA.u8 r13, [p2], #-3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2901 "10101010" // /* MW 3 */ + 2902 "11011101" // /* MW 2 */ + 2903 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 447 34 first + 2904 "10011000" // LDA.u8 r17, [p2], #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2905 "00101010" // /* MW 3 */ + 2906 "00011110" // /* MW 2 */ + 2907 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 448 34 first + 2908 "10011000" // LDA.u8 r14, [p2], #-5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2909 "11001010" // /* MW 3 */ + 2910 "10111101" // /* MW 2 */ + 2911 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 452 40 first + 2912 "10011000" // LDA.u16 r15, [p2], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2913 "11111010" // /* MW 3 */ + 2914 "11111101" // /* MW 2 */ + 2915 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 453 40 first + 2916 "10011000" // LDA.u8 r19, [p2], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2917 "01101010" // /* MW 3 */ + 2918 "00001010" // /* MW 2 */ + 2919 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 20 first + 2920 "10011000" // LDA.u8 r7, [p2], #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2921 "11101010" // /* MW 3 */ + 2922 "10101100" // /* MW 2 */ + 2923 "00000010" // /* MW 1 */ + 2924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2925 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 444 52 first + 2926 "10011000" // LSHL r1, r13, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2927 "00011101" // /* MW 3 */ + 2928 "01000010" // /* MW 2 */ + 2929 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 30 first +.src_ref 2 "conv2d_bf16_params.h" 462 7 first + 2930 "00100100" // EQ r16, r1, r16; ADD.NC r18, r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2931 "00000001" // /* MW 5 */ + 2932 "00110001" // /* MW 4 */ + 2933 "11111001" // /* MW 3 */ + 2934 "00100000" // /* MW 2 */ + 2935 "00001100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 462 7 + 2936 "10011000" // LSHL r18, r18, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2937 "01011101" // /* MW 3 */ + 2938 "10100100" // /* MW 2 */ + 2939 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 452 40 first + 2940 "10011000" // EQ r27, r15, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2941 "01000111" // /* MW 3 */ + 2942 "11110110" // /* MW 2 */ + 2943 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 452 40 +.src_ref 2 "conv2d_bf16_params.h" 462 7 first +.src_ref 2 "conv2d_bf16_params.h" 557 34 + 2944 "11100100" // SEL.EQZ r5, r24, r5, r27; MOV eh0, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2945 "00111001" // /* MW 5 */ + 2946 "10110111" // /* MW 4 */ + 2947 "01000000" // /* MW 3 */ + 2948 "01001010" // /* MW 2 */ + 2949 "11000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 462 7 + 2950 "00011000" // SEL.EQZ r29, r17, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2951 "00100010" // /* MW 3 */ + 2952 "01111011" // /* MW 2 */ + 2953 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 30 first + 2954 "10011000" // EQ r6, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2955 "01100111" // /* MW 3 */ + 2956 "11001100" // /* MW 2 */ + 2957 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 + 2958 "10011000" // AND r27, r6, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2959 "00000100" // /* MW 3 */ + 2960 "10110111" // /* MW 2 */ + 2961 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 +.src_ref 2 "conv2d_bf16_params.h" 477 40 +.src_ref 2 "conv2d_bf16_params.h" 557 34 first + 2962 "11100100" // LSHL r15, r15, r21; MOV r25, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2963 "01000001" // /* MW 5 */ + 2964 "10111011" // /* MW 4 */ + 2965 "10111100" // /* MW 3 */ + 2966 "11101011" // /* MW 2 */ + 2967 "01111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 first + 2968 "01011100" // ST r15, [sp, #-20]; SEL.EQZ r6, r7, r24, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2969 "00000100" // /* MW 5 */ + 2970 "10011011" // /* MW 4 */ + 2971 "10110011" // /* MW 3 */ + 2972 "10111110" // /* MW 2 */ + 2973 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 +.src_ref 2 "conv2d_bf16_params.h" 477 40 first + 2974 "10000100" // JNZ r25, #3056 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3056 delay_slots=5 */ + 2975 "00000001" // /* MW 5 */ + 2976 "01000000" // /* MW 4 */ + 2977 "11111000" // /* MW 3 */ + 2978 "00000101" // /* MW 2 */ + 2979 "11001000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 first +.delay_slot + 2980 "10011000" // EQ r27, r6, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2981 "01000111" // /* MW 3 */ + 2982 "10110110" // /* MW 2 */ + 2983 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 444 52 first +.delay_slot + 2984 "10011000" // AND r24, r13, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2985 "01000100" // /* MW 3 */ + 2986 "01110001" // /* MW 2 */ + 2987 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 462 7 first +.delay_slot + 2988 "10011000" // LSHL r30, r19, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2989 "01011101" // /* MW 3 */ + 2990 "11111100" // /* MW 2 */ + 2991 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 470 11 first +.delay_slot + 2992 "10011000" // LSHL r20, r27, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2993 "01001101" // /* MW 3 */ + 2994 "11101000" // /* MW 2 */ + 2995 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 470 11 +.src_ref 2 "conv2d_bf16_params.h" 477 40 first +.delay_slot + 2996 "00011000" // SEL.EQZ r6, r6, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2997 "00110010" // /* MW 3 */ + 2998 "10001100" // /* MW 2 */ + 2999 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 40 + 3000 "10000100" // JNZ r27, #3056 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3056 delay_slots=5 */ + 3001 "00000001" // /* MW 5 */ + 3002 "01000000" // /* MW 4 */ + 3003 "11111000" // /* MW 3 */ + 3004 "00000101" // /* MW 2 */ + 3005 "11011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3007 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3009 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3011 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3013 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3015 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 491 25 +.src_ref 2 "conv2d_bf16_params.h" 492 25 +.src_ref 2 "conv2d_bf16_params.h" 495 99 +.src_ref 2 "conv2d_bf16_params.h" 502 57 +.src_ref 2 "conv2d_bf16_params.h" 533 46 +.src_ref 2 "conv2d_bf16_params.h" 539 82 +.src_ref 2 "conv2d_bf16_params.h" 557 34 +.src_ref 2 "conv2d_bf16_params.h" 621 240 +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.src_ref 2 "conv2d_bf16_params.h" 709 76 + 3016 "10111010" // MOVA r15, #1; J #3104 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=3104 delay_slots=5 */ + 3017 "00100000" // /* MW 9 */ + 3018 "00000000" // /* MW 8 */ + 3019 "00000000" // /* MW 7 */ + 3020 "10000100" // /* MW 6 */ + 3021 "00000001" // /* MW 5 */ + 3022 "00000000" // /* MW 4 */ + 3023 "00000000" // /* MW 3 */ + 3024 "00101111" // /* MW 2 */ + 3025 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 481 24 +.src_ref 2 "conv2d_bf16_params.h" 500 53 +.src_ref 2 "conv2d_bf16_params.h" 506 73 +.src_ref 2 "conv2d_bf16_params.h" 507 53 +.src_ref 2 "conv2d_bf16_params.h" 524 122 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.delay_slot + 3026 "10111010" // MOVA r26, #0; MOVX r5, #-3; MOV r28, #12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3027 "01011000" // /* MW 9 */ + 3028 "00001100" // /* MW 8 */ + 3029 "10001000" // /* MW 7 */ + 3030 "10101011" // /* MW 6 */ + 3031 "01010111" // /* MW 5 */ + 3032 "00111110" // /* MW 4 */ + 3033 "00000000" // /* MW 3 */ + 3034 "00011010" // /* MW 2 */ + 3035 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 504 45 +.src_ref 2 "conv2d_bf16_params.h" 510 45 +.src_ref 2 "conv2d_bf16_params.h" 520 48 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.delay_slot + 3036 "01100100" // MOVX r21, #4; MOV r2, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3037 "01000001" // /* MW 5 */ + 3038 "00100000" // /* MW 4 */ + 3039 "00100001" // /* MW 3 */ + 3040 "01000010" // /* MW 2 */ + 3041 "00000101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 40 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 578 52 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.delay_slot + 3042 "00011000" // MOVX r13, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3043 "00001101" // /* MW 3 */ + 3044 "00011010" // /* MW 2 */ + 3045 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 78 +.src_ref 2 "conv2d_bf16_params.h" 642 20 +.src_ref 2 "conv2d_bf16_params.h" 642 87 +.delay_slot + 3046 "00011000" // MOVX r7, #15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3047 "00111101" // /* MW 3 */ + 3048 "00001110" // /* MW 2 */ + 3049 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.delay_slot + 3050 "00101100" // NOPA; MOVX r4, #-4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3051 "11100010" // /* MW 5 */ + 3052 "10010001" // /* MW 4 */ + 3053 "11111111" // /* MW 3 */ + 3054 "00101100" // /* MW 2 */ + 3055 "00000000" // /* MW 1 */ +.label __ll6__Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh +.src_ref 2 "conv2d_bf16_params.h" 453 40 +.src_ref 2 "conv2d_bf16_params.h" 453 40 +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 504 45 +.src_ref 2 "conv2d_bf16_params.h" 655 23 + 3056 "01110110" // MOVA dj0, #16; MOVS p1, r2; MOVX r21, #4; MOV r4, #-4 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3057 "01011000" // /* MW 11 */ + 3058 "11111100" // /* MW 10 */ + 3059 "10001111" // /* MW 9 */ + 3060 "10001000" // /* MW 8 */ + 3061 "01010000" // /* MW 7 */ + 3062 "00000001" // /* MW 6 */ + 3063 "00001011" // /* MW 5 */ + 3064 "10000010" // /* MW 4 */ + 3065 "10000001" // /* MW 3 */ + 3066 "00000010" // /* MW 2 */ + 3067 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 453 40 first +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 481 24 +.src_ref 2 "conv2d_bf16_params.h" 500 53 +.src_ref 2 "conv2d_bf16_params.h" 506 73 +.src_ref 2 "conv2d_bf16_params.h" 507 53 +.src_ref 2 "conv2d_bf16_params.h" 524 122 +.src_ref 2 "conv2d_bf16_params.h" 539 139 + 3068 "10111010" // ST.s8 r6, [p1, dj0]; MOVX r26, #0; MOV r28, #12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3069 "01011000" // /* MW 9 */ + 3070 "00001100" // /* MW 8 */ + 3071 "10001000" // /* MW 7 */ + 3072 "00001011" // /* MW 6 */ + 3073 "10100000" // /* MW 5 */ + 3074 "00000001" // /* MW 4 */ + 3075 "11100000" // /* MW 3 */ + 3076 "00011000" // /* MW 2 */ + 3077 "00100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 491 25 +.src_ref 2 "conv2d_bf16_params.h" 492 25 +.src_ref 2 "conv2d_bf16_params.h" 495 99 +.src_ref 2 "conv2d_bf16_params.h" 502 57 +.src_ref 2 "conv2d_bf16_params.h" 510 45 +.src_ref 2 "conv2d_bf16_params.h" 520 48 +.src_ref 2 "conv2d_bf16_params.h" 533 46 +.src_ref 2 "conv2d_bf16_params.h" 539 82 +.src_ref 2 "conv2d_bf16_params.h" 557 34 +.src_ref 2 "conv2d_bf16_params.h" 621 240 +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.src_ref 2 "conv2d_bf16_params.h" 709 76 + 3078 "10111010" // MOVA r2, #16; MOVX r5, #-3; MOV r15, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3079 "01011000" // /* MW 9 */ + 3080 "00000001" // /* MW 8 */ + 3081 "11101000" // /* MW 7 */ + 3082 "10101001" // /* MW 6 */ + 3083 "01010111" // /* MW 5 */ + 3084 "00111110" // /* MW 4 */ + 3085 "00000000" // /* MW 3 */ + 3086 "00000010" // /* MW 2 */ + 3087 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 40 +.src_ref 2 "conv2d_bf16_params.h" 529 78 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 578 52 +.src_ref 2 "conv2d_bf16_params.h" 642 20 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 642 87 + 3088 "11100001" // NOPA; NOPB; NOPS; MOVX r7, #15; MOV r13, #3; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3089 "00000000" // /* MW 15 */ + 3090 "00000000" // /* MW 14 */ + 3091 "01011000" // /* MW 13 */ + 3092 "00000011" // /* MW 12 */ + 3093 "10101000" // /* MW 11 */ + 3094 "11101001" // /* MW 10 */ + 3095 "01110001" // /* MW 9 */ + 3096 "00000000" // /* MW 8 */ + 3097 "01011011" // /* MW 7 */ + 3098 "00000001" // /* MW 6 */ + 3099 "00100000" // /* MW 5 */ + 3100 "00000000" // /* MW 4 */ + 3101 "11110000" // /* MW 3 */ + 3102 "00101100" // /* MW 2 */ + 3103 "00000000" // /* MW 1 */ +.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_352 +.src_ref 2 "conv2d_bf16_params.h" 477 40 first +.src_ref 2 "conv2d_bf16_params.h" 495 68 first +.src_ref 2 "conv2d_bf16_params.h" 495 112 +.src_ref 2 "conv2d_bf16_params.h" 682 38 + 3104 "10111010" // LDA.u8 r17, [p2], #-2; EQ r27, r13, r6; MOV m0, #60 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3105 "01011000" // /* MW 9 */ + 3106 "00111100" // /* MW 8 */ + 3107 "00000000" // /* MW 7 */ + 3108 "00111100" // /* MW 6 */ + 3109 "10110011" // /* MW 5 */ + 3110 "00011011" // /* MW 4 */ + 3111 "01010000" // /* MW 3 */ + 3112 "11000101" // /* MW 2 */ + 3113 "01011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 481 24 first +.src_ref 2 "conv2d_bf16_params.h" 495 53 +.src_ref 2 "conv2d_bf16_params.h" 495 112 + 3114 "10111010" // LDA.u8 r1, [p2], m0; SEL.EQZ r18, r1, r26, r27; MOV m5, #-51 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3115 "01011000" // /* MW 9 */ + 3116 "11001101" // /* MW 8 */ + 3117 "10000111" // /* MW 7 */ + 3118 "00010010" // /* MW 6 */ + 3119 "00101101" // /* MW 5 */ + 3120 "00000011" // /* MW 4 */ + 3121 "01010000" // /* MW 3 */ + 3122 "00000101" // /* MW 2 */ + 3123 "01000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 18 first +.src_ref 2 "conv2d_bf16_params.h" 496 68 +.src_ref 2 "conv2d_bf16_params.h" 504 35 +.src_ref 2 "conv2d_bf16_params.h" 539 14 +.src_ref 2 "conv2d_bf16_params.h" 578 47 + 3124 "10111010" // MOVA r23, #2; SEL.EQZ r29, r29, r21, r27; MOV m3, #55 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3125 "01011000" // /* MW 9 */ + 3126 "00110111" // /* MW 8 */ + 3127 "10000000" // /* MW 7 */ + 3128 "10010001" // /* MW 6 */ + 3129 "11011010" // /* MW 5 */ + 3130 "00111011" // /* MW 4 */ + 3131 "00000000" // /* MW 3 */ + 3132 "01010111" // /* MW 2 */ + 3133 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 496 53 +.src_ref 2 "conv2d_bf16_params.h" 499 51 +.src_ref 2 "conv2d_bf16_params.h" 504 45 first +.src_ref 2 "conv2d_bf16_params.h" 509 50 +.src_ref 2 "conv2d_bf16_params.h" 519 42 +.src_ref 2 "conv2d_bf16_params.h" 700 34 + 3134 "10111010" // MOVA r3, #8; EQ r27, r21, r0; MOV m2, #-68 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3135 "01011000" // /* MW 9 */ + 3136 "10111100" // /* MW 8 */ + 3137 "00000111" // /* MW 7 */ + 3138 "00111101" // /* MW 6 */ + 3139 "10110000" // /* MW 5 */ + 3140 "00101011" // /* MW 4 */ + 3141 "00000000" // /* MW 3 */ + 3142 "00000011" // /* MW 2 */ + 3143 "00000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 492 25 first +.src_ref 2 "conv2d_bf16_params.h" 497 46 +.src_ref 2 "conv2d_bf16_params.h" 509 50 + 3144 "10111010" // MOVA r16, #512; LSHL r22, r15, r24; MOV m1, #112 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3145 "01011000" // /* MW 9 */ + 3146 "01110000" // /* MW 8 */ + 3147 "10000000" // /* MW 7 */ + 3148 "01101100" // /* MW 6 */ + 3149 "01101100" // /* MW 5 */ + 3150 "00011111" // /* MW 4 */ + 3151 "00000000" // /* MW 3 */ + 3152 "00010000" // /* MW 2 */ + 3153 "01000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 500 53 +.src_ref 2 "conv2d_bf16_params.h" 520 34 first + 3154 "01100100" // EXTEND.u8 r22, r22; MOV m4, #-105 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3155 "01011101" // /* MW 5 */ + 3156 "00011110" // /* MW 4 */ + 3157 "00001000" // /* MW 3 */ + 3158 "10010010" // /* MW 2 */ + 3159 "10110101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 77 +.src_ref 2 "conv2d_bf16_params.h" 520 48 + 3160 "00111010" // ST r22, [sp, #-16]; LSHL r22, r22, r2; MOV m7, #49 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3161 "01011001" // /* MW 9 */ + 3162 "00110001" // /* MW 8 */ + 3163 "10000000" // /* MW 7 */ + 3164 "01101111" // /* MW 6 */ + 3165 "01100001" // /* MW 5 */ + 3166 "00101101" // /* MW 4 */ + 3167 "10110000" // /* MW 3 */ + 3168 "01011010" // /* MW 2 */ + 3169 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 53 +.src_ref 2 "conv2d_bf16_params.h" 507 42 first + 3170 "01100100" // SUB r30, r30, r29; MOV m6, #-63 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3171 "00000101" // /* MW 5 */ + 3172 "00011111" // /* MW 4 */ + 3173 "00111100" // /* MW 3 */ + 3174 "10111010" // /* MW 2 */ + 3175 "11110111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 495 99 first + 3176 "10011000" // SUB r1, r15, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3177 "00010001" // /* MW 3 */ + 3178 "11000010" // /* MW 2 */ + 3179 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 495 96 +.src_ref 2 "conv2d_bf16_params.h" 495 96 +.src_ref 2 "conv2d_bf16_params.h" 610 64 +.src_ref 2 "conv2d_bf16_params.h" 709 96 + 3180 "01100100" // MUL r31, r17, r1; MOV r1, #7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3181 "00011101" // /* MW 5 */ + 3182 "10100000" // /* MW 4 */ + 3183 "11110000" // /* MW 3 */ + 3184 "11000011" // /* MW 2 */ + 3185 "10001111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 507 53 first + 3186 "10011000" // SUB r17, r26, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3187 "00100001" // /* MW 3 */ + 3188 "10100011" // /* MW 2 */ + 3189 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 495 96 first + 3190 "10011000" // LSHL r31, r31, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3191 "00011101" // /* MW 3 */ + 3192 "11111110" // /* MW 2 */ + 3193 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 495 53 +.src_ref 2 "conv2d_bf16_params.h" 506 48 +.src_ref 2 "conv2d_bf16_params.h" 519 42 first + 3194 "00111010" // ST r31, [p2], m5; LSHL r31, r29, r3; MOV m5, #87 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3195 "01011001" // /* MW 9 */ + 3196 "01010111" // /* MW 8 */ + 3197 "10000000" // /* MW 7 */ + 3198 "11101110" // /* MW 6 */ + 3199 "11110001" // /* MW 5 */ + 3200 "00111011" // /* MW 4 */ + 3201 "00110000" // /* MW 3 */ + 3202 "01111110" // /* MW 2 */ + 3203 "01010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 496 68 first +.src_ref 2 "conv2d_bf16_params.h" 504 35 first +.src_ref 2 "conv2d_bf16_params.h" 522 68 + 3204 "10111010" // LDA.u8 r21, [p2], m3; EQ r19, r23, r0; MOV m3, #-78 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3205 "01011000" // /* MW 9 */ + 3206 "10110010" // /* MW 8 */ + 3207 "10000111" // /* MW 7 */ + 3208 "00111101" // /* MW 6 */ + 3209 "00110000" // /* MW 5 */ + 3210 "00101111" // /* MW 4 */ + 3211 "01010000" // /* MW 3 */ + 3212 "01010101" // /* MW 2 */ + 3213 "01001101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 509 50 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3214 "01011100" // ST r19, [sp, #-24]; LSHL r19, r19, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3215 "01111011" // /* MW 5 */ + 3216 "11001100" // /* MW 4 */ + 3217 "10111001" // /* MW 3 */ + 3218 "01001110" // /* MW 2 */ + 3219 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 496 53 first +.src_ref 2 "conv2d_bf16_params.h" 520 19 first +.src_ref 2 "conv2d_bf16_params.h" 529 62 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3220 "10111010" // ST.s8 r21, [p2], m2; OR r22, r31, r22; MOV m2, #246 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3221 "01011000" // /* MW 9 */ + 3222 "11110110" // /* MW 8 */ + 3223 "00000000" // /* MW 7 */ + 3224 "00101101" // /* MW 6 */ + 3225 "01101011" // /* MW 5 */ + 3226 "00111111" // /* MW 4 */ + 3227 "11100000" // /* MW 3 */ + 3228 "01010100" // /* MW 2 */ + 3229 "01001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 497 46 first +.src_ref 2 "conv2d_bf16_params.h" 509 50 first +.src_ref 2 "conv2d_bf16_params.h" 533 44 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3230 "10111010" // LDA.u16 r16, [p2], m1; SEL.EQZ r19, r19, r16, r27; MOV m1, #-176 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3231 "01011000" // /* MW 9 */ + 3232 "01010000" // /* MW 8 */ + 3233 "10000111" // /* MW 7 */ + 3234 "00010000" // /* MW 6 */ + 3235 "00111000" // /* MW 5 */ + 3236 "00100111" // /* MW 4 */ + 3237 "01010000" // /* MW 3 */ + 3238 "01000011" // /* MW 2 */ + 3239 "01000101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 14 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3240 "10011000" // EQ r31, r23, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3241 "01100111" // /* MW 3 */ + 3242 "11111110" // /* MW 2 */ + 3243 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 499 51 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3244 "10011000" // EQ r16, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3245 "01100111" // /* MW 3 */ + 3246 "11100000" // /* MW 2 */ + 3247 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 499 51 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3248 "10011000" // OR r27, r31, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3249 "00000101" // /* MW 3 */ + 3250 "11110111" // /* MW 2 */ + 3251 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 78 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3252 "10011000" // AND r21, r7, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3253 "01010100" // /* MW 3 */ + 3254 "11101011" // /* MW 2 */ + 3255 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 507 53 first +.src_ref 2 "conv2d_bf16_params.h" 511 47 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3256 "01100100" // ASHL r30, r30, r17; MOV r17, #24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3257 "01100001" // /* MW 5 */ + 3258 "10100000" // /* MW 4 */ + 3259 "11011000" // /* MW 3 */ + 3260 "10100011" // /* MW 2 */ + 3261 "11110111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 491 25 first +.src_ref 2 "conv2d_bf16_params.h" 507 34 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3262 "00111010" // ST r16, [sp, #-32]; LSHL r18, r15, r18; ADD.NC r30, r30, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3263 "01001001" // /* MW 9 */ + 3264 "10000000" // /* MW 8 */ + 3265 "11001111" // /* MW 7 */ + 3266 "01101111" // /* MW 6 */ + 3267 "00101001" // /* MW 5 */ + 3268 "00011111" // /* MW 4 */ + 3269 "10110000" // /* MW 3 */ + 3270 "01000010" // /* MW 2 */ + 3271 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 500 53 first +.src_ref 2 "conv2d_bf16_params.h" 511 47 first + 3272 "01011100" // ST r26, [p2], #4; LSHL r17, r30, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3273 "00111011" // /* MW 5 */ + 3274 "01000110" // /* MW 4 */ + 3275 "00111111" // /* MW 3 */ + 3276 "11101010" // /* MW 2 */ + 3277 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 500 53 +.src_ref 2 "conv2d_bf16_params.h" 534 44 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 3278 "00000010" // ST r26, [p2], m4; MOV m4, #168 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3279 "01010000" // /* MW 7 */ + 3280 "10101000" // /* MW 6 */ + 3281 "00000000" // /* MW 5 */ + 3282 "00000010" // /* MW 4 */ + 3283 "00110000" // /* MW 3 */ + 3284 "01101010" // /* MW 2 */ + 3285 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 77 first +.src_ref 2 "conv2d_bf16_params.h" 509 19 first +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 3286 "01110110" // LDA.u8 r18, [p2], m7; ST r31, [sp, #-28]; OR r27, r19, r0; MOV el0, r27 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3287 "01111000" // /* MW 11 */ + 3288 "11001110" // /* MW 10 */ + 3289 "00001101" // /* MW 9 */ + 3290 "00101100" // /* MW 8 */ + 3291 "10110000" // /* MW 7 */ + 3292 "10100111" // /* MW 6 */ + 3293 "11110101" // /* MW 5 */ + 3294 "11100111" // /* MW 4 */ + 3295 "01010111" // /* MW 3 */ + 3296 "01001001" // /* MW 2 */ + 3297 "01011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 19 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3298 "10011000" // OR r17, r27, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3299 "00010101" // /* MW 3 */ + 3300 "11100011" // /* MW 2 */ + 3301 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 506 73 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3302 "10011000" // SUB r27, r26, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3303 "10000001" // /* MW 3 */ + 3304 "10110111" // /* MW 2 */ + 3305 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 527 47 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3306 "00011000" // EXTEND.u8 r24, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3307 "10010000" // /* MW 3 */ + 3308 "10110000" // /* MW 2 */ + 3309 "00010100" // /* MW 1 */ + 3310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3311 "00000000" // /* MW 1 */ + 3312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3313 "00000000" // /* MW 1 */ + 3314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3315 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 57 first + 3316 "10011000" // SUB r18, r15, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3317 "00100001" // /* MW 3 */ + 3318 "11100101" // /* MW 2 */ + 3319 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 53 + 3320 "10011000" // ST r18, [p2], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3321 "01010001" // /* MW 3 */ + 3322 "11001010" // /* MW 2 */ + 3323 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 506 48 first + 3324 "10011000" // LDA.u8 r18, [p2], m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3325 "01001010" // /* MW 3 */ + 3326 "10101010" // /* MW 2 */ + 3327 "00000010" // /* MW 1 */ + 3328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3329 "00000000" // /* MW 1 */ + 3330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3331 "00000000" // /* MW 1 */ + 3332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3333 "00000000" // /* MW 1 */ + 3334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3335 "00000000" // /* MW 1 */ + 3336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3337 "00000000" // /* MW 1 */ + 3338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3339 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 506 62 + 3340 "10011000" // SUB r18, r18, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3341 "11100001" // /* MW 3 */ + 3342 "10100100" // /* MW 2 */ + 3343 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 506 73 + 3344 "10011000" // ASHL r18, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3345 "10111110" // /* MW 3 */ + 3346 "10100101" // /* MW 2 */ + 3347 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 first + 3348 "10011000" // LSHL r18, r18, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3349 "00101101" // /* MW 3 */ + 3350 "10100100" // /* MW 2 */ + 3351 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 + 3352 "01000100" // MOVXM r27, #65536 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3353 "00000000" // /* MW 5 */ + 3354 "10100000" // /* MW 4 */ + 3355 "00001101" // /* MW 3 */ + 3356 "00000001" // /* MW 2 */ + 3357 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 + 3358 "10011000" // ADD r18, r27, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3359 "00100000" // /* MW 3 */ + 3360 "11100101" // /* MW 2 */ + 3361 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 + 3362 "01000100" // MOVXM r27, #16711680 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3363 "00000000" // /* MW 5 */ + 3364 "10100000" // /* MW 4 */ + 3365 "00001101" // /* MW 3 */ + 3366 "11111111" // /* MW 2 */ + 3367 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 +.src_ref 2 "conv2d_bf16_params.h" 539 14 +.src_ref 2 "conv2d_bf16_params.h" 642 99 + 3368 "01100100" // AND r27, r27, r18; MOV r18, #-16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3369 "11000001" // /* MW 5 */ + 3370 "00111111" // /* MW 4 */ + 3371 "10011001" // /* MW 3 */ + 3372 "11100100" // /* MW 2 */ + 3373 "11011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 511 19 first +.src_ref 2 "conv2d_bf16_params.h" 524 122 +.src_ref 2 "conv2d_bf16_params.h" 539 14 + 3374 "01100100" // OR r27, r27, r17; MOV r17, #-8 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3375 "11100001" // /* MW 5 */ + 3376 "10111111" // /* MW 4 */ + 3377 "10111000" // /* MW 3 */ + 3378 "11100010" // /* MW 2 */ + 3379 "11011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 512 64 first +.src_ref 2 "conv2d_bf16_params.h" 524 122 first + 3380 "01011100" // ST r27, [p2], #4; LSHL r19, r19, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3381 "00111011" // /* MW 5 */ + 3382 "11001110" // /* MW 4 */ + 3383 "00111001" // /* MW 3 */ + 3384 "11101110" // /* MW 2 */ + 3385 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 524 122 + 3386 "10011000" // SUB r26, r26, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3387 "00110001" // /* MW 3 */ + 3388 "10110101" // /* MW 2 */ + 3389 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 524 122 + 3390 "10011000" // LSHL r20, r20, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3391 "10101101" // /* MW 3 */ + 3392 "00101001" // /* MW 2 */ + 3393 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 520 19 first + 3394 "10011000" // OR r26, r14, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3395 "01100101" // /* MW 3 */ + 3396 "10110101" // /* MW 2 */ + 3397 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 511 36 first +.src_ref 2 "conv2d_bf16_params.h" 522 68 first + 3398 "01011100" // ST r26, [p2], m3; EXTEND.u8 r26, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3399 "00100000" // /* MW 5 */ + 3400 "01101001" // /* MW 4 */ + 3401 "00111111" // /* MW 3 */ + 3402 "01101010" // /* MW 2 */ + 3403 "01001101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 524 65 first +.src_ref 2 "conv2d_bf16_params.h" 529 62 first +.src_ref 2 "conv2d_bf16_params.h" 539 14 first + 3404 "10111010" // LDA.u8 r25, [p2], m2; LSHL r20, r27, r18; ADD.NC r30, r26, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3405 "10101000" // /* MW 9 */ + 3406 "10101000" // /* MW 8 */ + 3407 "11001110" // /* MW 7 */ + 3408 "01101111" // /* MW 6 */ + 3409 "01001001" // /* MW 5 */ + 3410 "00110111" // /* MW 4 */ + 3411 "01010000" // /* MW 3 */ + 3412 "01100101" // /* MW 2 */ + 3413 "01001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 534 93 +.src_ref 2 "conv2d_bf16_params.h" 539 14 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 3414 "01100100" // LSHL r22, r22, r17; MOV r17, #254 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3415 "11111001" // /* MW 5 */ + 3416 "10100011" // /* MW 4 */ + 3417 "10111000" // /* MW 3 */ + 3418 "10100011" // /* MW 2 */ + 3419 "10110101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 527 45 first +.src_ref 2 "conv2d_bf16_params.h" 533 44 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 3420 "00101100" // ST.s8 r25, [p2], m1; MUL r26, r26, r24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3421 "00011111" // /* MW 5 */ + 3422 "01101011" // /* MW 4 */ + 3423 "11101101" // /* MW 3 */ + 3424 "01100100" // /* MW 2 */ + 3425 "01000101" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3427 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3428 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3429 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3431 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3433 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 48 first +.src_ref 2 "conv2d_bf16_params.h" 533 46 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3434 "10100100" // LSHL r25, r16, r15; ADD.NC r27, r21, r25 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3435 "11001010" // /* MW 5 */ + 3436 "10110101" // /* MW 4 */ + 3437 "10111101" // /* MW 3 */ + 3438 "01011111" // /* MW 2 */ + 3439 "10000110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 14 first + 3440 "10000100" // JNZ r31, #3568 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3568 delay_slots=5 */ + 3441 "00000001" // /* MW 5 */ + 3442 "01000000" // /* MW 4 */ + 3443 "11111000" // /* MW 3 */ + 3444 "00000110" // /* MW 2 */ + 3445 "11111000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 76 first +.src_ref 2 "conv2d_bf16_params.h" 529 122 +.delay_slot + 3446 "10100100" // ADD r21, r19, #3; ADD.NC r27, r27, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3447 "11110010" // /* MW 5 */ + 3448 "10111011" // /* MW 4 */ + 3449 "11101101" // /* MW 3 */ + 3450 "01000001" // /* MW 2 */ + 3451 "10011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 122 +.delay_slot + 3452 "10011000" // LSHL r21, r27, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3453 "01011101" // /* MW 3 */ + 3454 "11101011" // /* MW 2 */ + 3455 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 534 93 first +.delay_slot + 3456 "10011000" // AND r17, r25, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3457 "00010100" // /* MW 3 */ + 3458 "01100011" // /* MW 2 */ + 3459 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 534 44 +.src_ref 2 "conv2d_bf16_params.h" 539 139 first +.src_ref 2 "conv2d_bf16_params.h" 555 59 +.src_ref 2 "conv2d_bf16_params.h" 559 59 +.src_ref 2 "conv2d_bf16_params.h" 700 17 +.delay_slot + 3460 "00111010" // ST r17, [p2], m4; EQ r27, r6, r28; MOV r17, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3461 "01011001" // /* MW 9 */ + 3462 "00000001" // /* MW 8 */ + 3463 "00101000" // /* MW 7 */ + 3464 "00111110" // /* MW 6 */ + 3465 "10111110" // /* MW 5 */ + 3466 "00001101" // /* MW 4 */ + 3467 "00110000" // /* MW 3 */ + 3468 "01000110" // /* MW 2 */ + 3469 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 669 63 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.delay_slot + 3470 "11111000" // MOV el1, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3471 "10011100" // /* MW 3 */ + 3472 "10011011" // /* MW 2 */ + 3473 "00011000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 14 + 3474 "00011000" // LDA r28, [sp, #-32] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3475 "10010001" // /* MW 3 */ + 3476 "11100011" // /* MW 2 */ + 3477 "00000111" // /* MW 1 */ + 3478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3479 "00000000" // /* MW 1 */ + 3480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3481 "00000000" // /* MW 1 */ + 3482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3483 "00000000" // /* MW 1 */ + 3484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3485 "00000000" // /* MW 1 */ + 3486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3487 "00000000" // /* MW 1 */ + 3488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3489 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 14 + 3490 "10000100" // JNZ r28, #3568 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3568 delay_slots=5 */ + 3491 "00000001" // /* MW 5 */ + 3492 "01000000" // /* MW 4 */ + 3493 "11111000" // /* MW 3 */ + 3494 "00000110" // /* MW 2 */ + 3495 "11100000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3497 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3499 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3501 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3503 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3505 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 115 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 539 162 + 3506 "10111010" // MOVA r28, #5; MOVX r17, #4; MOV r25, #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3507 "01011000" // /* MW 9 */ + 3508 "01000000" // /* MW 8 */ + 3509 "00101000" // /* MW 7 */ + 3510 "10001011" // /* MW 6 */ + 3511 "00010000" // /* MW 5 */ + 3512 "00000001" // /* MW 4 */ + 3513 "00000000" // /* MW 3 */ + 3514 "10111100" // /* MW 2 */ + 3515 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 139 + 3516 "00011000" // SEL.EQZ r31, r17, r13, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3517 "11010010" // /* MW 3 */ + 3518 "01111110" // /* MW 2 */ + 3519 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 162 + 3520 "10011000" // EQ r27, r25, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3521 "01100111" // /* MW 3 */ + 3522 "01110110" // /* MW 2 */ + 3523 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 135 +.src_ref 2 "conv2d_bf16_params.h" 539 139 + 3524 "01100100" // SEL.EQZ r28, r31, r28, r27; MOV r31, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3525 "00000001" // /* MW 5 */ + 3526 "10100000" // /* MW 4 */ + 3527 "01001111" // /* MW 3 */ + 3528 "00111000" // /* MW 2 */ + 3529 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 46 + 3530 "00011000" // EXTEND.s8 r25, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3531 "01010000" // /* MW 3 */ + 3532 "00110010" // /* MW 2 */ + 3533 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 44 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 3534 "10011000" // MUL r30, r25, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3535 "11101111" // /* MW 3 */ + 3536 "01111101" // /* MW 2 */ + 3537 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 115 +.src_ref 2 "conv2d_bf16_params.h" 669 63 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 3538 "11100100" // LT r27, r25, r17; MOV r27, el1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3539 "00111001" // /* MW 5 */ + 3540 "11000100" // /* MW 4 */ + 3541 "01011101" // /* MW 3 */ + 3542 "11100011" // /* MW 2 */ + 3543 "11001110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 82 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3544 "00011000" // SEL.EQZ r17, r15, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3545 "10000010" // /* MW 3 */ + 3546 "11100011" // /* MW 2 */ + 3547 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 79 + 3548 "10011000" // MUL r17, r17, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3549 "11101111" // /* MW 3 */ + 3550 "01100011" // /* MW 2 */ + 3551 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 135 + 3552 "10011000" // SUB r28, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3553 "11000001" // /* MW 3 */ + 3554 "11111001" // /* MW 2 */ + 3555 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 135 + 3556 "10011000" // ASHL r17, r17, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3557 "11001110" // /* MW 3 */ + 3558 "01100011" // /* MW 2 */ + 3559 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 555 55 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 3560 "00100010" // EXTEND.u8 r17, r17; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3561 "00011100" // /* MW 7 */ + 3562 "00000000" // /* MW 6 */ + 3563 "00000000" // /* MW 5 */ + 3564 "10000001" // /* MW 4 */ + 3565 "00010100" // /* MW 3 */ + 3566 "00100011" // /* MW 2 */ + 3567 "00000000" // /* MW 1 */ +.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_816 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 669 63 +.src_ref 2 "conv2d_bf16_params.h" 669 63 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 3568 "10111010" // MOVA r25, #0; MOVX r28, #-1; MOV r27, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3569 "01111000" // /* MW 9 */ + 3570 "00001110" // /* MW 8 */ + 3571 "01110000" // /* MW 7 */ + 3572 "11101011" // /* MW 6 */ + 3573 "11000111" // /* MW 5 */ + 3574 "00111111" // /* MW 4 */ + 3575 "00000000" // /* MW 3 */ + 3576 "00011001" // /* MW 2 */ + 3577 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 669 63 first +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3578 "00011000" // SEL.EQZ r31, r25, r28, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3579 "11000010" // /* MW 3 */ + 3580 "01111111" // /* MW 2 */ + 3581 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 497 34 first +.src_ref 2 "conv2d_bf16_params.h" 641 32 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 3582 "10111010" // LDA r27, [sp, #-24]; EXTEND.u8 r16, r16; ADD.NC r26, r29, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3583 "10101000" // /* MW 9 */ + 3584 "01110100" // /* MW 8 */ + 3585 "01001111" // /* MW 7 */ + 3586 "10000011" // /* MW 6 */ + 3587 "00000100" // /* MW 5 */ + 3588 "00100001" // /* MW 4 */ + 3589 "00100000" // /* MW 3 */ + 3590 "01101110" // /* MW 2 */ + 3591 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 559 61 first +.src_ref 2 "conv2d_bf16_params.h" 640 16 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3592 "10111010" // MOVA r30, #72; EXTEND.u8 r20, r20; MOV r29, #9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3593 "01011000" // /* MW 9 */ + 3594 "00001001" // /* MW 8 */ + 3595 "10101000" // /* MW 7 */ + 3596 "10000011" // /* MW 6 */ + 3597 "01000100" // /* MW 5 */ + 3598 "00101001" // /* MW 4 */ + 3599 "00000000" // /* MW 3 */ + 3600 "00011110" // /* MW 2 */ + 3601 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 25 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3602 "00011000" // SEL.EQZ r25, r29, r30, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3603 "11100010" // /* MW 3 */ + 3604 "01110011" // /* MW 2 */ + 3605 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 578 47 first + 3606 "10011000" // NE r28, r23, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3607 "10001000" // /* MW 3 */ + 3608 "11111001" // /* MW 2 */ + 3609 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 640 16 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 3610 "10011000" // LSHL r29, r29, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3611 "00111101" // /* MW 3 */ + 3612 "01111011" // /* MW 2 */ + 3613 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 557 34 +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.aggressive_scheduled_block_id 6 +.noswbrkpt + 3614 "10111010" // LDA r23, [sp, #-20]; MOVXM r24, #1032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3615 "00010000" // /* MW 9 */ + 3616 "00000100" // /* MW 8 */ + 3617 "00001010" // /* MW 7 */ + 3618 "00000011" // /* MW 6 */ + 3619 "00000000" // /* MW 5 */ + 3620 "00000000" // /* MW 4 */ + 3621 "00100000" // /* MW 3 */ + 3622 "11011110" // /* MW 2 */ + 3623 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 641 44 first +.src_ref 2 "conv2d_bf16_params.h" 642 45 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 3624 "00100100" // LSHL r19, r25, r19; ADD.NC r30, r26, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3625 "11111111" // /* MW 5 */ + 3626 "00111010" // /* MW 4 */ + 3627 "10111111" // /* MW 3 */ + 3628 "11100111" // /* MW 2 */ + 3629 "11001100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 20 +.src_ref 2 "conv2d_bf16_params.h" 642 87 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 3630 "00011000" // MAC r7, r7, r19, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3631 "11100110" // /* MW 3 */ + 3632 "11001111" // /* MW 2 */ + 3633 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 55 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 3634 "01100100" // EXTEND.u8 r19, r22; MOV r23, #522 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3635 "00101001" // /* MW 5 */ + 3636 "10101000" // /* MW 4 */ + 3637 "00001011" // /* MW 3 */ + 3638 "11010010" // /* MW 2 */ + 3639 "10110100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 540 38 +.src_ref 2 "conv2d_bf16_params.h" 645 41 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3640 "01100100" // SEL.EQZ r22, r23, r24, r27; MOV r26, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3641 "00000001" // /* MW 5 */ + 3642 "00100001" // /* MW 4 */ + 3643 "01001101" // /* MW 3 */ + 3644 "10110000" // /* MW 2 */ + 3645 "10111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 540 38 first +.src_ref 2 "conv2d_bf16_params.h" 557 34 + 3646 "11100100" // NE r6, r6, r26; MOV r27, eh0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3647 "00111001" // /* MW 5 */ + 3648 "11000010" // /* MW 4 */ + 3649 "00011101" // /* MW 3 */ + 3650 "10110101" // /* MW 2 */ + 3651 "00110001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 99 first + 3652 "10011000" // AND r7, r7, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3653 "00100100" // /* MW 3 */ + 3654 "11001111" // /* MW 2 */ + 3655 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 557 34 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 + 3656 "11100100" // SEL.EQZ r23, r23, r15, r27; MOV r27, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3657 "01000001" // /* MW 5 */ + 3658 "10100110" // /* MW 4 */ + 3659 "01001101" // /* MW 3 */ + 3660 "11011110" // /* MW 2 */ + 3661 "10111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 119 +.src_ref 2 "conv2d_bf16_params.h" 655 23 first + 3662 "01100100" // SEL.EQZ r4, r5, r4, r27; MOV r18, #31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3663 "01111101" // /* MW 5 */ + 3664 "00100000" // /* MW 4 */ + 3665 "01001001" // /* MW 3 */ + 3666 "00001000" // /* MW 2 */ + 3667 "00101001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 119 first + 3668 "10011000" // AND r23, r23, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3669 "00100100" // /* MW 3 */ + 3670 "11101111" // /* MW 2 */ + 3671 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 540 15 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 + 3672 "10111010" // MOVA r30, #-288; LSHL r4, r16, r4; MOV r18, #-144 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3673 "01011000" // /* MW 9 */ + 3674 "01110000" // /* MW 8 */ + 3675 "01001111" // /* MW 7 */ + 3676 "01101110" // /* MW 6 */ + 3677 "01000010" // /* MW 5 */ + 3678 "00100000" // /* MW 4 */ + 3679 "00000000" // /* MW 3 */ + 3680 "00011110" // /* MW 2 */ + 3681 "11011100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first + 3682 "00011000" // SEL.EQZ r30, r30, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3683 "00100010" // /* MW 3 */ + 3684 "10111101" // /* MW 2 */ + 3685 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 85 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 + 3686 "10111010" // MOVA r5, #144; MUL r26, r23, r19; MOV r16, #288 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3687 "01011000" // /* MW 9 */ + 3688 "00100000" // /* MW 8 */ + 3689 "00001001" // /* MW 7 */ + 3690 "11111110" // /* MW 6 */ + 3691 "10101001" // /* MW 5 */ + 3692 "00101111" // /* MW 4 */ + 3693 "00000000" // /* MW 3 */ + 3694 "00000101" // /* MW 2 */ + 3695 "00010010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first + 3696 "00011000" // SEL.EQZ r16, r16, r5, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3697 "01010010" // /* MW 3 */ + 3698 "00100000" // /* MW 2 */ + 3699 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 126 21 first +.src_ref 2 "conv2d_bf16_params.h" 559 59 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 3700 "10100100" // MUL r24, r17, r4; ADD.NC r27, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3701 "11110010" // /* MW 5 */ + 3702 "10111101" // /* MW 4 */ + 3703 "11111101" // /* MW 3 */ + 3704 "00001001" // /* MW 2 */ + 3705 "10001110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 669 41 first +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.aggressive_scheduled_block_id 7 +.noswbrkpt + 3706 "11100100" // LSHL r16, r16, r31; MOV r27, el1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3707 "00111001" // /* MW 5 */ + 3708 "11000100" // /* MW 4 */ + 3709 "10111101" // /* MW 3 */ + 3710 "00111111" // /* MW 2 */ + 3711 "10000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 117 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3712 "01011100" // ST r27, [sp, #-36]; MUL r26, r14, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3713 "01011111" // /* MW 5 */ + 3714 "01101011" // /* MW 4 */ + 3715 "10110111" // /* MW 3 */ + 3716 "11101110" // /* MW 2 */ + 3717 "11111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 700 34 first + 3718 "00011000" // SEL.EQZ r2, r2, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3719 "00110010" // /* MW 3 */ + 3720 "10000100" // /* MW 2 */ + 3721 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 578 52 first + 3722 "10011000" // LTU r31, r13, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3723 "00001100" // /* MW 3 */ + 3724 "01111110" // /* MW 2 */ + 3725 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 559 92 first + 3726 "10011000" // MUL r24, r20, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3727 "10001111" // /* MW 3 */ + 3728 "00110001" // /* MW 2 */ + 3729 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 578 36 first + 3730 "10011000" // OR r27, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3731 "11000101" // /* MW 3 */ + 3732 "11110111" // /* MW 2 */ + 3733 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 610 64 first +.src_ref 2 "conv2d_bf16_params.h" 611 47 +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 629 82 + 3734 "01110110" // MOVA r3, #128; ST r20, [sp, #-20]; LSHL r28, r27, r1; MOV r20, #256 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3735 "01011000" // /* MW 11 */ + 3736 "00000000" // /* MW 10 */ + 3737 "10001001" // /* MW 9 */ + 3738 "11101110" // /* MW 8 */ + 3739 "11000000" // /* MW 7 */ + 3740 "10110111" // /* MW 6 */ + 3741 "10010101" // /* MW 5 */ + 3742 "11101110" // /* MW 4 */ + 3743 "00000111" // /* MW 3 */ + 3744 "00000011" // /* MW 2 */ + 3745 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 621 156 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.src_ref 2 "conv2d_bf16_params.h" 649 41 + 3746 "11100100" // SEL.EQZ r20, r3, r20, r27; MOV eh0, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3747 "00111001" // /* MW 5 */ + 3748 "10110111" // /* MW 4 */ + 3749 "01000000" // /* MW 3 */ + 3750 "00101000" // /* MW 2 */ + 3751 "00011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 645 41 + 3752 "01000100" // MOVXM r31, #1542 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3753 "00001100" // /* MW 5 */ + 3754 "10101100" // /* MW 4 */ + 3755 "00001111" // /* MW 3 */ + 3756 "00000000" // /* MW 2 */ + 3757 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 554 60 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 first + 3758 "00111010" // ST r4, [sp, #-24]; EQ r27, r15, r0; ADD.NC r4, r4, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3759 "11001001" // /* MW 9 */ + 3760 "00111111" // /* MW 8 */ + 3761 "10001001" // /* MW 7 */ + 3762 "00111100" // /* MW 6 */ + 3763 "10110000" // /* MW 5 */ + 3764 "00011111" // /* MW 4 */ + 3765 "10110000" // /* MW 3 */ + 3766 "00010010" // /* MW 2 */ + 3767 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 554 53 +.src_ref 2 "conv2d_bf16_params.h" 555 53 +.src_ref 2 "conv2d_bf16_params.h" 555 59 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 + 3768 "01110110" // MOVA m3, #-148; ST r4, [p2], #4; SEL.EQZ r31, r22, r31, r27; ADD.NC r22, r17, #-1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3769 "11001000" // /* MW 11 */ + 3770 "01111111" // /* MW 10 */ + 3771 "11001100" // /* MW 9 */ + 3772 "10010010" // /* MW 8 */ + 3773 "11111111" // /* MW 7 */ + 3774 "10101101" // /* MW 6 */ + 3775 "10010001" // /* MW 5 */ + 3776 "00011100" // /* MW 4 */ + 3777 "10000010" // /* MW 3 */ + 3778 "10001100" // /* MW 2 */ + 3779 "11101101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 555 53 +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 621 240 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 + 3780 "00111010" // ST r22, [p2], m3; LSHL r21, r21, r15; MOV r27, eh0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3781 "01111001" // /* MW 9 */ + 3782 "10001110" // /* MW 8 */ + 3783 "01110000" // /* MW 7 */ + 3784 "11101111" // /* MW 6 */ + 3785 "01010111" // /* MW 5 */ + 3786 "00101011" // /* MW 4 */ + 3787 "00110000" // /* MW 3 */ + 3788 "01011010" // /* MW 2 */ + 3789 "01001101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 53 first +.src_ref 2 "conv2d_bf16_params.h" 559 53 +.src_ref 2 "conv2d_bf16_params.h" 621 140 +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 645 41 + 3790 "01110110" // MOVA r25, #22; ST r26, [p2], #4; SUB r20, r20, r28; MOV m4, #88 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3791 "01011000" // /* MW 11 */ + 3792 "01011000" // /* MW 10 */ + 3793 "00000000" // /* MW 9 */ + 3794 "00001110" // /* MW 8 */ + 3795 "01001110" // /* MW 7 */ + 3796 "10101001" // /* MW 6 */ + 3797 "01010001" // /* MW 5 */ + 3798 "00011111" // /* MW 4 */ + 3799 "00000010" // /* MW 3 */ + 3800 "11011001" // /* MW 2 */ + 3801 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 559 53 first +.src_ref 2 "conv2d_bf16_params.h" 621 156 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 first +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 3802 "01011100" // ST r24, [p2], m4; SEL.EQZ r24, r31, r25, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3803 "00100100" // /* MW 5 */ + 3804 "11100011" // /* MW 4 */ + 3805 "00111111" // /* MW 3 */ + 3806 "01100010" // /* MW 2 */ + 3807 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 610 47 first +.src_ref 2 "conv2d_bf16_params.h" 621 222 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 +.aggressive_scheduled_block_id 8 +.noswbrkpt + 3808 "01110110" // LDA r27, [sp, #-32]; ST r28, [p2], #-8; SUB r28, r21, r28; MOV r27, r6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3809 "01111000" // /* MW 11 */ + 3810 "10010000" // /* MW 10 */ + 3811 "01101001" // /* MW 9 */ + 3812 "00001111" // /* MW 8 */ + 3813 "11001110" // /* MW 7 */ + 3814 "10101011" // /* MW 6 */ + 3815 "10010001" // /* MW 5 */ + 3816 "11101111" // /* MW 4 */ + 3817 "00100010" // /* MW 3 */ + 3818 "01101110" // /* MW 2 */ + 3819 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 649 41 +.src_ref 2 "conv2d_bf16_params.h" 655 23 first +.src_ref 2 "conv2d_bf16_params.h" 661 61 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3820 "10111010" // MOVA r19, #279; SEL.EQZ r28, r20, r28, r27; ADD.NC r20, r19, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3821 "11001000" // /* MW 9 */ + 3822 "11111111" // /* MW 8 */ + 3823 "10001100" // /* MW 7 */ + 3824 "00010010" // /* MW 6 */ + 3825 "11001110" // /* MW 5 */ + 3826 "00101001" // /* MW 4 */ + 3827 "00000000" // /* MW 3 */ + 3828 "11110011" // /* MW 2 */ + 3829 "00100010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 127 19 first +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 649 41 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 +.src_ref 2 "conv2d_bf16_params.h" 710 60 +.src_ref 2 "conv2d_bf16_params.h" 710 65 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3830 "10111010" // MOVA r29, #-72; MSC r30, r30, r29, r20; MOV r27, eh0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3831 "01111000" // /* MW 9 */ + 3832 "10001110" // /* MW 8 */ + 3833 "01110000" // /* MW 7 */ + 3834 "01110011" // /* MW 6 */ + 3835 "11101010" // /* MW 5 */ + 3836 "00111011" // /* MW 4 */ + 3837 "00000000" // /* MW 3 */ + 3838 "00011101" // /* MW 2 */ + 3839 "11110111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first +.src_ref 2 "conv2d_bf16_params.h" 679 23 first +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3840 "00101100" // LDA r27, [sp, #-28]; SEL.EQZ r18, r29, r18, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3841 "01000100" // /* MW 5 */ + 3842 "11001010" // /* MW 4 */ + 3843 "00101110" // /* MW 3 */ + 3844 "11101110" // /* MW 2 */ + 3845 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 621 156 first +.src_ref 2 "conv2d_bf16_params.h" 649 41 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3846 "10111010" // MOVA r31, #32; SEL.EQZ r19, r31, r19, r27; MOV r27, r6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3847 "01111000" // /* MW 9 */ + 3848 "10010000" // /* MW 8 */ + 3849 "01101001" // /* MW 7 */ + 3850 "10010011" // /* MW 6 */ + 3851 "00111001" // /* MW 5 */ + 3852 "00111111" // /* MW 4 */ + 3853 "00000000" // /* MW 3 */ + 3854 "00011111" // /* MW 2 */ + 3855 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first +.src_ref 2 "conv2d_bf16_params.h" 700 34 first +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3856 "00011000" // SEL.EQZ r2, r31, r2, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3857 "00100010" // /* MW 3 */ + 3858 "11000100" // /* MW 2 */ + 3859 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 629 82 first +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3860 "10011000" // SUB r21, r3, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3861 "01010001" // /* MW 3 */ + 3862 "11101011" // /* MW 2 */ + 3863 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 611 47 first +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3864 "00111010" // ST r3, [p2], #12; SEL.EQZ r2, r2, r15, r27; MOV r3, #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3865 "01011001" // /* MW 9 */ + 3866 "11000000" // /* MW 8 */ + 3867 "01101111" // /* MW 7 */ + 3868 "10010000" // /* MW 6 */ + 3869 "00100111" // /* MW 5 */ + 3870 "00000100" // /* MW 4 */ + 3871 "00110000" // /* MW 3 */ + 3872 "10001110" // /* MW 2 */ + 3873 "01000111" // /* MW 1 */ +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3874 "00011000" // SEL.EQZ r28, r28, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3875 "00110010" // /* MW 3 */ + 3876 "00111000" // /* MW 2 */ + 3877 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 643 22 first +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id first + 3878 "10011000" // MUL r31, r23, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3879 "01111111" // /* MW 3 */ + 3880 "11111110" // /* MW 2 */ + 3881 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.aggressive_scheduled_block_id 9 +.noswbrkpt + 3882 "00101100" // LDA r17, [sp, #-36]; SEL.EQZ r3, r28, r3, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3883 "01100100" // /* MW 5 */ + 3884 "00001100" // /* MW 4 */ + 3885 "00101110" // /* MW 3 */ + 3886 "11000110" // /* MW 2 */ + 3887 "11111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 621 47 first +.src_ref 2 "conv2d_bf16_params.h" 629 45 +.src_ref 2 "conv2d_bf16_params.h" 684 30 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3888 "00111010" // ST r3, [p2], #-8; MUL r18, r26, r18; MOV m1, #40 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3889 "01011001" // /* MW 9 */ + 3890 "00101000" // /* MW 8 */ + 3891 "10000000" // /* MW 7 */ + 3892 "01111100" // /* MW 6 */ + 3893 "00101001" // /* MW 5 */ + 3894 "00110101" // /* MW 4 */ + 3895 "00110000" // /* MW 3 */ + 3896 "10001110" // /* MW 2 */ + 3897 "01011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 629 45 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3898 "01011100" // ST r21, [p2], m1; SEL.EQZ r3, r2, r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3899 "11100100" // /* MW 5 */ + 3900 "00001101" // /* MW 4 */ + 3901 "00110001" // /* MW 3 */ + 3902 "01010110" // /* MW 2 */ + 3903 "01000101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 644 22 +.src_ref 2 "conv2d_bf16_params.h" 700 17 first +.src_ref 2 "conv2d_bf16_params.h" 705 50 +.src_ref 2 "conv2d_bf16_params.h" 705 61 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3904 "10111010" // LDA r0, [sp, #-16]; MUL r3, r3, r17; ADD.NC r21, r7, r30 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3905 "10101000" // /* MW 9 */ + 3906 "11111100" // /* MW 8 */ + 3907 "10101001" // /* MW 7 */ + 3908 "11111110" // /* MW 6 */ + 3909 "00111000" // /* MW 5 */ + 3910 "00000110" // /* MW 4 */ + 3911 "00100000" // /* MW 3 */ + 3912 "00000010" // /* MW 2 */ + 3913 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 645 38 first +.src_ref 2 "conv2d_bf16_params.h" 700 111 +.src_ref 2 "conv2d_bf16_params.h" 700 149 +.src_ref 2 "conv2d_bf16_params.h" 705 45 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3914 "01111010" // LDA r17, [sp, #-20]; ST r24, [p2], #4; MAC r3, r3, r20, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3915 "00000110" // /* MW 9 */ + 3916 "00000110" // /* MW 8 */ + 3917 "00000101" // /* MW 7 */ + 3918 "10000000" // /* MW 6 */ + 3919 "00010001" // /* MW 5 */ + 3920 "00011111" // /* MW 4 */ + 3921 "00100010" // /* MW 3 */ + 3922 "11000110" // /* MW 2 */ + 3923 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 644 14 +.src_ref 2 "conv2d_bf16_params.h" 649 38 first +.src_ref 2 "conv2d_bf16_params.h" 674 24 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3924 "00111010" // ST r19, [p2], #28; MOVXM r19, #65520 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3925 "00010001" // /* MW 9 */ + 3926 "11111000" // /* MW 8 */ + 3927 "01101111" // /* MW 7 */ + 3928 "00111110" // /* MW 6 */ + 3929 "00000000" // /* MW 5 */ + 3930 "00000000" // /* MW 4 */ + 3931 "00110000" // /* MW 3 */ + 3932 "11001110" // /* MW 2 */ + 3933 "01001111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 644 14 first +.src_ref 2 "conv2d_bf16_params.h" 662 61 +.src_ref 2 "conv2d_bf16_params.h" 664 38 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3934 "00111010" // ST r20, [p2], #4; AND r20, r31, r19; ADD.NC r2, r14, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3935 "11001001" // /* MW 9 */ + 3936 "10111111" // /* MW 8 */ + 3937 "01001011" // /* MW 7 */ + 3938 "10100100" // /* MW 6 */ + 3939 "01001001" // /* MW 5 */ + 3940 "00111111" // /* MW 4 */ + 3941 "00110000" // /* MW 3 */ + 3942 "11010010" // /* MW 2 */ + 3943 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 19 first +.src_ref 2 "conv2d_bf16_params.h" 663 22 first +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3944 "01011100" // ST r17, [p2], #4; MSC r21, r21, r2, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3945 "10011100" // /* MW 5 */ + 3946 "01010110" // /* MW 4 */ + 3947 "00110001" // /* MW 3 */ + 3948 "11000110" // /* MW 2 */ + 3949 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 126 21 first +.src_ref 2 "conv2d_bf16_params.h" 664 38 first + 3950 "01011100" // ST r2, [p2], #4; ADD r30, r30, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3951 "10000001" // /* MW 5 */ + 3952 "01111010" // /* MW 4 */ + 3953 "00111111" // /* MW 3 */ + 3954 "10001010" // /* MW 2 */ + 3955 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 126 21 +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id first + 3956 "01011100" // ST r30, [p2], #4; SUB r28, r16, r31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3957 "11100011" // /* MW 5 */ + 3958 "01110011" // /* MW 4 */ + 3959 "00111000" // /* MW 3 */ + 3960 "11111010" // /* MW 2 */ + 3961 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 127 19 first +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.src_ref 2 "conv2d_bf16_params.h" 675 38 +.src_ref 2 "conv2d_bf16_params.h" 696 37 +.aggressive_scheduled_block_id 10 +.noswbrkpt + 3962 "00111010" // ST r21, [p2], #4; MAC r31, r31, r22, r16; MOV dc0, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3963 "01011001" // /* MW 9 */ + 3964 "00000000" // /* MW 8 */ + 3965 "01100000" // /* MW 7 */ + 3966 "00110000" // /* MW 6 */ + 3967 "11111000" // /* MW 5 */ + 3968 "00101101" // /* MW 4 */ + 3969 "00110000" // /* MW 3 */ + 3970 "11010110" // /* MW 2 */ + 3971 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 644 22 first +.src_ref 2 "conv2d_bf16_params.h" 664 38 first +.src_ref 2 "conv2d_bf16_params.h" 705 45 +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3972 "00111010" // ST dc0, [p2], #4; MUL r2, r31, r0; ADD.NC r17, r17, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3973 "11001001" // /* MW 9 */ + 3974 "01111111" // /* MW 8 */ + 3975 "00101100" // /* MW 7 */ + 3976 "01111110" // /* MW 6 */ + 3977 "00100000" // /* MW 5 */ + 3978 "00111110" // /* MW 4 */ + 3979 "00110000" // /* MW 3 */ + 3980 "10001100" // /* MW 2 */ + 3981 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.src_ref 2 "conv2d_bf16_params.h" 705 50 first +.src_ref 2 "conv2d_bf16_params.h" 705 61 first + 3982 "01011100" // ST dc0, [p2], #4; MAC r14, r14, r17, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3983 "00001100" // /* MW 5 */ + 3984 "10111000" // /* MW 4 */ + 3985 "00111000" // /* MW 3 */ + 3986 "10001100" // /* MW 2 */ + 3987 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 +.src_ref 2 "conv2d_bf16_params.h" 674 24 first +.src_ref 2 "conv2d_bf16_params.h" 675 38 first +.src_ref 2 "conv2d_bf16_params.h" 682 38 +.src_ref 2 "conv2d_bf16_params.h" 718 48 +.src_ref 2 "conv2d_bf16_params.h" 720 50 + 3988 "00111010" // ST r22, [p2], #4; AND r16, r19, r2; MOV r2, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3989 "01011001" // /* MW 9 */ + 3990 "00000000" // /* MW 8 */ + 3991 "01001000" // /* MW 7 */ + 3992 "00100100" // /* MW 6 */ + 3993 "00000001" // /* MW 5 */ + 3994 "00100111" // /* MW 4 */ + 3995 "00110000" // /* MW 3 */ + 3996 "11011010" // /* MW 2 */ + 3997 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 675 38 + 3998 "00111010" // ST r28, [p2], #4; SUB r17, r2, r31; MOV r27, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3999 "01111001" // /* MW 9 */ + 4000 "00001110" // /* MW 8 */ + 4001 "01110000" // /* MW 7 */ + 4002 "10001111" // /* MW 6 */ + 4003 "00011111" // /* MW 5 */ + 4004 "00000101" // /* MW 4 */ + 4005 "00110000" // /* MW 3 */ + 4006 "11110010" // /* MW 2 */ + 4007 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 675 38 first +.src_ref 2 "conv2d_bf16_params.h" 707 61 first + 4008 "01011100" // ST r4, [p2], #4; MUL r14, r23, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4009 "11011111" // /* MW 5 */ + 4010 "10111001" // /* MW 4 */ + 4011 "00111011" // /* MW 3 */ + 4012 "10010010" // /* MW 2 */ + 4013 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 674 22 first +.src_ref 2 "conv2d_bf16_params.h" 675 38 + 4014 "00111010" // ST r17, [p2], #4; SUB r16, r16, r31; MOV r0, #6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4015 "01011001" // /* MW 9 */ + 4016 "00000110" // /* MW 8 */ + 4017 "00001000" // /* MW 7 */ + 4018 "10001100" // /* MW 6 */ + 4019 "00001111" // /* MW 5 */ + 4020 "00100001" // /* MW 4 */ + 4021 "00110000" // /* MW 3 */ + 4022 "11000110" // /* MW 2 */ + 4023 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 25 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 675 38 first +.src_ref 2 "conv2d_bf16_params.h" 679 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id first + 4024 "01110110" // MOVA r0, #72; ST r16, [p2], #4; SEL.EQZ r16, r13, r0, r27; MOV r27, r6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4025 "01111000" // /* MW 11 */ + 4026 "10010000" // /* MW 10 */ + 4027 "01101001" // /* MW 9 */ + 4028 "00010011" // /* MW 8 */ + 4029 "00000000" // /* MW 7 */ + 4030 "10011011" // /* MW 6 */ + 4031 "00010001" // /* MW 5 */ + 4032 "00011110" // /* MW 4 */ + 4033 "00000010" // /* MW 3 */ + 4034 "00000000" // /* MW 2 */ + 4035 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first +.src_ref 2 "conv2d_bf16_params.h" 679 23 first +.src_ref 2 "conv2d_bf16_params.h" 713 12 +.aggressive_scheduled_block_id 11 +.noswbrkpt + 4036 "00101100" // LDA r5, [sp, #-24]; SEL.EQZ r5, r0, r5, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4037 "10100100" // /* MW 5 */ + 4038 "00010100" // /* MW 4 */ + 4039 "00100000" // /* MW 3 */ + 4040 "00010110" // /* MW 2 */ + 4041 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 691 56 first +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4042 "10011000" // MUL r17, r5, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4043 "10101111" // /* MW 3 */ + 4044 "01100011" // /* MW 2 */ + 4045 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 675 38 +.src_ref 2 "conv2d_bf16_params.h" 675 38 first +.src_ref 2 "conv2d_bf16_params.h" 709 71 first + 4046 "00111010" // ST dc0, [p2], #4; LSHL r16, r3, r16; MOV m2, #-56 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4047 "01011001" // /* MW 9 */ + 4048 "11001000" // /* MW 8 */ + 4049 "00000111" // /* MW 7 */ + 4050 "01101101" // /* MW 6 */ + 4051 "00001000" // /* MW 5 */ + 4052 "00000111" // /* MW 4 */ + 4053 "00110000" // /* MW 3 */ + 4054 "10001100" // /* MW 2 */ + 4055 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 675 38 +.src_ref 2 "conv2d_bf16_params.h" 706 23 first +.src_ref 2 "conv2d_bf16_params.h" 706 28 +.src_ref 2 "conv2d_bf16_params.h" 709 76 + 4056 "01110110" // MOVA r3, #-29; ST dc0, [p2], m2; LSHL r15, r16, r15; ADD.NC r13, r3, #7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4057 "11001000" // /* MW 11 */ + 4058 "11000001" // /* MW 10 */ + 4059 "10101000" // /* MW 9 */ + 4060 "11101101" // /* MW 8 */ + 4061 "11110111" // /* MW 7 */ + 4062 "10100000" // /* MW 6 */ + 4063 "01100001" // /* MW 5 */ + 4064 "01001000" // /* MW 4 */ + 4065 "00000010" // /* MW 3 */ + 4066 "01100011" // /* MW 2 */ + 4067 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 682 38 first +.src_ref 2 "conv2d_bf16_params.h" 706 28 + 4068 "01011100" // ST r2, [p2], m0; LSHL r16, r13, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4069 "01111011" // /* MW 5 */ + 4070 "11000000" // /* MW 4 */ + 4071 "00110110" // /* MW 3 */ + 4072 "00001010" // /* MW 2 */ + 4073 "01000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 126 21 first +.src_ref 2 "conv2d_bf16_params.h" 696 37 first + 4074 "01011100" // ST r22, [p2], #4; ADD r3, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4075 "01000001" // /* MW 5 */ + 4076 "10001110" // /* MW 4 */ + 4077 "00111000" // /* MW 3 */ + 4078 "11011010" // /* MW 2 */ + 4079 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 127 19 first +.src_ref 2 "conv2d_bf16_params.h" 696 37 + 4080 "01011100" // ST r18, [p2], #4; MSC r18, r18, r17, r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4081 "10011100" // /* MW 5 */ + 4082 "11001000" // /* MW 4 */ + 4083 "00111000" // /* MW 3 */ + 4084 "11001010" // /* MW 2 */ + 4085 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 first +.src_ref 2 "conv2d_bf16_params.h" 713 12 first + 4086 "01011100" // ST r4, [p2], #4; LSHL r5, r5, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4087 "11011011" // /* MW 5 */ + 4088 "10010100" // /* MW 4 */ + 4089 "00110010" // /* MW 3 */ + 4090 "10010010" // /* MW 2 */ + 4091 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 +.src_ref 2 "conv2d_bf16_params.h" 706 28 +.src_ref 2 "conv2d_bf16_params.h" 706 28 first + 4092 "00111010" // ST r3, [p2], #4; ADD r3, r13, r16; MOV r0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4093 "01011001" // /* MW 9 */ + 4094 "11111101" // /* MW 8 */ + 4095 "00001111" // /* MW 7 */ + 4096 "00000100" // /* MW 6 */ + 4097 "00111000" // /* MW 5 */ + 4098 "00011010" // /* MW 4 */ + 4099 "00110000" // /* MW 3 */ + 4100 "10001110" // /* MW 2 */ + 4101 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 706 28 + 4102 "10011000" // ASHL r0, r3, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4103 "00001110" // /* MW 3 */ + 4104 "11000000" // /* MW 2 */ + 4105 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 first +.src_ref 2 "conv2d_bf16_params.h" 707 66 first + 4106 "01011100" // ST r18, [p2], #4; MUL r4, r14, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4107 "00011111" // /* MW 5 */ + 4108 "00010000" // /* MW 4 */ + 4109 "00110111" // /* MW 3 */ + 4110 "11001010" // /* MW 2 */ + 4111 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 +.src_ref 2 "conv2d_bf16_params.h" 709 96 first + 4112 "01011100" // ST dc0, [p2], #4; LSHL r3, r0, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4113 "00111011" // /* MW 5 */ + 4114 "00001100" // /* MW 4 */ + 4115 "00110000" // /* MW 3 */ + 4116 "10001100" // /* MW 2 */ + 4117 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 first +.src_ref 2 "conv2d_bf16_params.h" 709 90 + 4118 "11111010" // LDA r13, [sp, #-4]; ST dc0, [p2], #4; SUB r3, r15, r3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4119 "00110001" // /* MW 9 */ + 4120 "11000110" // /* MW 8 */ + 4121 "00000011" // /* MW 7 */ + 4122 "10000000" // /* MW 6 */ + 4123 "01100001" // /* MW 5 */ + 4124 "00011100" // /* MW 4 */ + 4125 "00100010" // /* MW 3 */ + 4126 "10110110" // /* MW 2 */ + 4127 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 707 50 first +.src_ref 2 "conv2d_bf16_params.h" 708 59 +.src_ref 2 "conv2d_bf16_params.h" 710 60 first +.src_ref 2 "conv2d_bf16_params.h" 710 65 first + 4128 "01110110" // LDA r14, [sp, #-8]; ST r4, [p2], #4; MAC r7, r7, r29, r0; ADD.NC r1, r0, #-1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4129 "11001000" // /* MW 11 */ + 4130 "00111111" // /* MW 10 */ + 4131 "00101000" // /* MW 9 */ + 4132 "00110000" // /* MW 8 */ + 4133 "01110000" // /* MW 7 */ + 4134 "10111010" // /* MW 6 */ + 4135 "10010001" // /* MW 5 */ + 4136 "00011100" // /* MW 4 */ + 4137 "00100010" // /* MW 3 */ + 4138 "00111010" // /* MW 2 */ + 4139 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 708 48 first +.src_ref 2 "conv2d_bf16_params.h" 713 12 first + 4140 "11111010" // LDA r15, [sp, #-12]; ST r1, [p2], #4; MUL r0, r5, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4141 "10101111" // /* MW 9 */ + 4142 "01000001" // /* MW 8 */ + 4143 "00000001" // /* MW 7 */ + 4144 "10000000" // /* MW 6 */ + 4145 "00110001" // /* MW 5 */ + 4146 "00011100" // /* MW 4 */ + 4147 "00100010" // /* MW 3 */ + 4148 "10111110" // /* MW 2 */ + 4149 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 709 50 first +.src_ref 2 "conv2d_bf16_params.h" 800 first + 4150 "01011100" // ST r3, [p2], #4; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 4151 "00000000" // /* MW 5 */ + 4152 "01010000" // /* MW 4 */ + 4153 "00110000" // /* MW 3 */ + 4154 "10001110" // /* MW 2 */ + 4155 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 710 50 first +.delay_slot + 4156 "10011000" // ST r7, [p2], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4157 "11110001" // /* MW 3 */ + 4158 "01011100" // /* MW 2 */ + 4159 "00001010" // /* MW 1 */ +.delay_slot + 4160 "10011000" // ST r0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4161 "00010001" // /* MW 3 */ + 4162 "00011100" // /* MW 2 */ + 4163 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 718 48 first +.delay_slot + 4164 "10011000" // ST r2, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4165 "01010001" // /* MW 3 */ + 4166 "00011100" // /* MW 2 */ + 4167 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 718 48 +.delay_slot + 4168 "10011000" // ST r2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4169 "01010001" // /* MW 3 */ + 4170 "00000100" // /* MW 2 */ + 4171 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 720 50 first +.src_ref 2 "conv2d_bf16_params.h" 800 first +.delay_slot + 4172 "00111010" // ST r2, [p2, #4]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4173 "01110001" // /* MW 9 */ + 4174 "00000000" // /* MW 8 */ + 4175 "00000000" // /* MW 7 */ + 4176 "00000000" // /* MW 6 */ + 4177 "11111110" // /* MW 5 */ + 4178 "00111111" // /* MW 4 */ + 4179 "00110000" // /* MW 3 */ + 4180 "10001010" // /* MW 2 */ +.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh__end +.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_end0 + 4181 "01000010" // /* MW 1 */ +.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_begin0 +.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams +.function convert_bf16_to_bfp16 _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams +.src_ref 3 "utils.h" 526 11 +.src_ref 3 "utils.h" 531 4 +.src_ref 2 "conv2d_bf16.h" 689 first +.src_ref 2 "conv2d_bf16.h" 698 28 +.src_ref 2 "conv2d_bf16.h" 704 12 +.src_ref 2 "conv2d_bf16.h" 707 12 +.src_ref 2 "conv2d_bf16.h" 707 30 +.function_start + 4192 "01110110" // MOVA dc0, #0; MOVS p2, p1; MOVX r24, #0; MOV r0, p2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4193 "01111000" // /* MW 11 */ + 4194 "01100000" // /* MW 10 */ + 4195 "00001010" // /* MW 9 */ + 4196 "00001000" // /* MW 8 */ + 4197 "10000000" // /* MW 7 */ + 4198 "00000001" // /* MW 6 */ + 4199 "10001011" // /* MW 5 */ + 4200 "10000100" // /* MW 4 */ + 4201 "10000010" // /* MW 3 */ + 4202 "00000011" // /* MW 2 */ + 4203 "00000000" // /* MW 1 */ +.src_ref 3 "utils.h" 526 11 +.src_ref 3 "utils.h" 526 11 +.src_ref 2 "conv2d_bf16.h" 698 28 first +.src_ref 2 "conv2d_bf16.h" 704 12 first +.src_ref 2 "conv2d_bf16.h" 707 12 +.src_ref 2 "conv2d_bf16.h" 707 30 + 4204 "01111110" // MOVA dj1, #0; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc1, dc0; MOVX r26, #0; ADD.NC p3, r0, #4 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 4205 "01100000" // /* MW 13 */ + 4206 "00001001" // /* MW 12 */ + 4207 "00100000" // /* MW 11 */ + 4208 "00100001" // /* MW 10 */ + 4209 "00000000" // /* MW 9 */ + 4210 "00110110" // /* MW 8 */ + 4211 "00000001" // /* MW 7 */ + 4212 "00110100" // /* MW 6 */ + 4213 "00101000" // /* MW 5 */ + 4214 "00101000" // /* MW 4 */ + 4215 "10001000" // /* MW 3 */ + 4216 "00000110" // /* MW 2 */ + 4217 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 698 28 +.src_ref 2 "conv2d_bf16.h" 702 37 + 4218 "10111010" // LDA dn1, [p3], #4; MOVXM p4, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4219 "00010000" // /* MW 9 */ + 4220 "00110100" // /* MW 8 */ + 4221 "00110010" // /* MW 7 */ + 4222 "11110010" // /* MW 6 */ + 4223 "00000001" // /* MW 5 */ + 4224 "00000000" // /* MW 4 */ + 4225 "11010000" // /* MW 3 */ + 4226 "10010100" // /* MW 2 */ + 4227 "01100011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 698 43 +.src_ref 2 "conv2d_bf16.h" 702 4 first + 4228 "10111010" // LDA m1, [p3], #4; MOVXM ls, #4336 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4229 "00010000" // /* MW 9 */ + 4230 "01111000" // /* MW 8 */ + 4231 "01111000" // /* MW 7 */ + 4232 "00000100" // /* MW 6 */ + 4233 "00000000" // /* MW 5 */ + 4234 "00000000" // /* MW 4 */ + 4235 "11010000" // /* MW 3 */ + 4236 "10010000" // /* MW 2 */ + 4237 "01100011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 699 43 first +.src_ref 2 "conv2d_bf16.h" 702 4 + 4238 "10111010" // LDA m0, [p3]; MOVXM le, #4384 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4239 "00010000" // /* MW 9 */ + 4240 "10010000" // /* MW 8 */ + 4241 "10111000" // /* MW 7 */ + 4242 "00000101" // /* MW 6 */ + 4243 "00000000" // /* MW 5 */ + 4244 "00000000" // /* MW 4 */ + 4245 "11010000" // /* MW 3 */ + 4246 "10000000" // /* MW 2 */ + 4247 "01100000" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 +.src_ref 2 "conv2d_bf16.h" 702 37 first + 4248 "01010100" // LDA r0, [p3, #-12]; MOV dj0, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4249 "00000001" // /* MW 5 */ + 4250 "00000000" // /* MW 4 */ + 4251 "11010001" // /* MW 3 */ + 4252 "10000010" // /* MW 2 */ + 4253 "01111010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 702 37 + 4254 "10011000" // LDA.s8 r1, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4255 "00100010" // /* MW 3 */ + 4256 "00000100" // /* MW 2 */ + 4257 "00000100" // /* MW 1 */ + 4258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4259 "00000000" // /* MW 1 */ + 4260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4261 "00000000" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 +.src_ref 2 "conv2d_bf16.h" 705 66 first + 4262 "11110100" // VLDB.POP.512 x1, [p0, lf0, r24]; MOV dn0, dn1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4263 "00000001" // /* MW 5 */ + 4264 "10000101" // /* MW 4 */ + 4265 "10000000" // /* MW 3 */ + 4266 "00001010" // /* MW 2 */ + 4267 "00000000" // /* MW 1 */ +.src_ref 3 "utils.h" 526 11 first + 4268 "00011000" // VLDB.POP.512.2D x0, [p0, lf0, r24, d1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4269 "00010100" // /* MW 3 */ + 4270 "00110000" // /* MW 2 */ + 4271 "00111110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 704 12 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4272 "00011000" // VLDB.FILL.512 [p0, lf0, r24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4273 "00010100" // /* MW 3 */ + 4274 "00010100" // /* MW 2 */ + 4275 "00111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 702 4 first +.src_ref 2 "conv2d_bf16.h" 705 66 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4276 "00110100" // VLDB.POP.512 x1, [p0, lf0, r24]; ADD.NC lc, r0, #-3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4277 "11111101" // /* MW 5 */ + 4278 "11100000" // /* MW 4 */ + 4279 "10001010" // /* MW 3 */ + 4280 "00001010" // /* MW 2 */ + 4281 "00000000" // /* MW 1 */ +.src_ref 3 "utils.h" 526 11 first +.src_ref 2 "conv2d_bf16.h" 707 12 +.src_ref 2 "conv2d_bf16.h" 707 30 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4282 "00011100" // VLDB.POP.512.2D x0, [p0, lf0, r24, d1]; MOVX crRnd, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4283 "00000000" // /* MW 5 */ + 4284 "11110101" // /* MW 4 */ + 4285 "10000000" // /* MW 3 */ + 4286 "00000010" // /* MW 2 */ + 4287 "11000110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 704 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4288 "00011000" // VLDB.FILL.512 [p0, lf0, r24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4289 "00010100" // /* MW 3 */ + 4290 "00010100" // /* MW 2 */ + 4291 "00111100" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4293 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 705 66 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4294 "10111010" // NOPA; VLDB.POP.512 x1, [p0, lf0, r24]; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4295 "01111110" // /* MW 9 */ + 4296 "10100101" // /* MW 8 */ + 4297 "00000001" // /* MW 7 */ + 4298 "00000000" // /* MW 6 */ + 4299 "01010100" // /* MW 5 */ + 4300 "00000000" // /* MW 4 */ + 4301 "11110000" // /* MW 3 */ + 4302 "00101100" // /* MW 2 */ + 4303 "00000000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 first +.src_ref 3 "utils.h" 526 11 first +.src_ref 2 "conv2d_bf16.h" 705 30 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4304 "11100001" // NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];NOPS; NOPX; VCONV.fp32.bf16 cml0, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4305 "00000000" // /* MW 15 */ + 4306 "00000000" // /* MW 14 */ + 4307 "01111000" // /* MW 13 */ + 4308 "11000101" // /* MW 12 */ + 4309 "00000001" // /* MW 11 */ + 4310 "00000000" // /* MW 10 */ + 4311 "00000000" // /* MW 9 */ + 4312 "00000000" // /* MW 8 */ + 4313 "01011011" // /* MW 7 */ + 4314 "00000001" // /* MW 6 */ + 4315 "00101000" // /* MW 5 */ + 4316 "01100000" // /* MW 4 */ + 4317 "11111100" // /* MW 3 */ + 4318 "00101100" // /* MW 2 */ + 4319 "00000000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 +.src_ref 2 "conv2d_bf16.h" 706 18 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4320 "11100001" // NOPA; NOPB; NOPS; NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4321 "00000000" // /* MW 15 */ + 4322 "00000000" // /* MW 14 */ + 4323 "01111000" // /* MW 13 */ + 4324 "11000101" // /* MW 12 */ + 4325 "01000000" // /* MW 11 */ + 4326 "00000000" // /* MW 10 */ + 4327 "00000000" // /* MW 9 */ + 4328 "00000000" // /* MW 8 */ + 4329 "01011011" // /* MW 7 */ + 4330 "00000001" // /* MW 6 */ + 4331 "00100000" // /* MW 5 */ + 4332 "00000000" // /* MW 4 */ + 4333 "11110000" // /* MW 3 */ + 4334 "00101100" // /* MW 2 */ + 4335 "00000000" // /* MW 1 */ +.label ZLS_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_144 +.src_ref 2 "conv2d_bf16.h" 704 12 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 4336 "11100001" // NOPA; VLDB.FILL.512 [p0, lf0, r24]; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4337 "00000000" // /* MW 15 */ + 4338 "00000000" // /* MW 14 */ + 4339 "01111000" // /* MW 13 */ + 4340 "10100101" // /* MW 12 */ + 4341 "00000001" // /* MW 11 */ + 4342 "00000000" // /* MW 10 */ + 4343 "00000000" // /* MW 9 */ + 4344 "00000000" // /* MW 8 */ + 4345 "01011011" // /* MW 7 */ + 4346 "00000001" // /* MW 6 */ + 4347 "00101000" // /* MW 5 */ + 4348 "00101000" // /* MW 4 */ + 4349 "11111000" // /* MW 3 */ + 4350 "00101100" // /* MW 2 */ + 4351 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 705 66 first +.src_ref 2 "conv2d_bf16.h" 707 12 first +.src_ref 2 "conv2d_bf16.h" 707 30 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4352 "11100001" // NOPA; VLDB.POP.512 x1, [p0, lf0, r24];VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4353 "00000000" // /* MW 15 */ + 4354 "00000000" // /* MW 14 */ + 4355 "01111000" // /* MW 13 */ + 4356 "10100101" // /* MW 12 */ + 4357 "00000001" // /* MW 11 */ + 4358 "00000000" // /* MW 10 */ + 4359 "00000000" // /* MW 9 */ + 4360 "00000000" // /* MW 8 */ + 4361 "00000011" // /* MW 7 */ + 4362 "10000000" // /* MW 6 */ + 4363 "10101101" // /* MW 5 */ + 4364 "00000000" // /* MW 4 */ + 4365 "11110000" // /* MW 3 */ + 4366 "00101100" // /* MW 2 */ + 4367 "00000000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 first +.src_ref 3 "utils.h" 526 11 first +.src_ref 2 "conv2d_bf16.h" 705 30 +.src_ref 2 "conv2d_bf16.h" 708 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4368 "11100001" // NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];VST.FLUSH.512.CONV [p2, sf, r26];NOPX; VCONV.fp32.bf16 cml0, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4369 "00000000" // /* MW 15 */ + 4370 "00000000" // /* MW 14 */ + 4371 "01111000" // /* MW 13 */ + 4372 "11000101" // /* MW 12 */ + 4373 "00000001" // /* MW 11 */ + 4374 "00000000" // /* MW 10 */ + 4375 "00000000" // /* MW 9 */ + 4376 "00000000" // /* MW 8 */ + 4377 "00000011" // /* MW 7 */ + 4378 "00000000" // /* MW 6 */ + 4379 "00101001" // /* MW 5 */ + 4380 "01100000" // /* MW 4 */ + 4381 "11111100" // /* MW 3 */ + 4382 "00101100" // /* MW 2 */ + 4383 "00000000" // /* MW 1 */ +.label ZLE_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_192 +.src_ref 3 "kernel_helpers.h" 350 11 +.src_ref 3 "utils.h" 531 4 first +.src_ref 2 "conv2d_bf16.h" 706 18 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4384 "11100001" // NOPA; NOPB; VST.FLUSH.512.CONV.2D [p2, sf, r26, d0];NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4385 "00000000" // /* MW 15 */ + 4386 "00000000" // /* MW 14 */ + 4387 "01111000" // /* MW 13 */ + 4388 "11000101" // /* MW 12 */ + 4389 "01000000" // /* MW 11 */ + 4390 "00000000" // /* MW 10 */ + 4391 "00000000" // /* MW 9 */ + 4392 "00000000" // /* MW 8 */ + 4393 "00000011" // /* MW 7 */ + 4394 "00000000" // /* MW 6 */ + 4395 "00100011" // /* MW 5 */ + 4396 "00000000" // /* MW 4 */ + 4397 "11110000" // /* MW 3 */ + 4398 "00101100" // /* MW 2 */ + 4399 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 4400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4401 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 707 12 first +.src_ref 2 "conv2d_bf16.h" 707 30 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4402 "00011000" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4403 "00000011" // /* MW 3 */ + 4404 "10000000" // /* MW 2 */ + 4405 "00001101" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 first +.src_ref 2 "conv2d_bf16.h" 705 30 first +.src_ref 2 "conv2d_bf16.h" 708 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4406 "00000010" // VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4407 "01110000" // /* MW 7 */ + 4408 "11000101" // /* MW 6 */ + 4409 "00000001" // /* MW 5 */ + 4410 "00000000" // /* MW 4 */ + 4411 "01100000" // /* MW 3 */ + 4412 "00000000" // /* MW 2 */ + 4413 "00100000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 +.src_ref 2 "conv2d_bf16.h" 706 18 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4414 "11111000" // VCONV.fp32.bf16 cmh0, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4415 "10001010" // /* MW 3 */ + 4416 "10000001" // /* MW 2 */ + 4417 "00011000" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 first + 4418 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4419 "00000011" // /* MW 3 */ + 4420 "00000000" // /* MW 2 */ + 4421 "00001011" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 first +.src_ref 2 "conv2d_bf16.h" 705 30 first +.src_ref 2 "conv2d_bf16.h" 707 12 first +.src_ref 2 "conv2d_bf16.h" 707 30 first + 4422 "00000010" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4423 "01110000" // /* MW 7 */ + 4424 "11000101" // /* MW 6 */ + 4425 "00000001" // /* MW 5 */ + 4426 "00000000" // /* MW 4 */ + 4427 "01100000" // /* MW 3 */ + 4428 "00000000" // /* MW 2 */ + 4429 "10110000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 +.src_ref 2 "conv2d_bf16.h" 706 18 first +.src_ref 2 "conv2d_bf16.h" 708 12 first + 4430 "00000010" // VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cmh0, x0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4431 "01110000" // /* MW 7 */ + 4432 "11000101" // /* MW 6 */ + 4433 "01000000" // /* MW 5 */ + 4434 "00000000" // /* MW 4 */ + 4435 "01100000" // /* MW 3 */ + 4436 "00000000" // /* MW 2 */ + 4437 "00100000" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 first + 4438 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4439 "00000011" // /* MW 3 */ + 4440 "00000000" // /* MW 2 */ + 4441 "00001011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 707 12 first +.src_ref 2 "conv2d_bf16.h" 707 30 first +.src_ref 2 "conv2d_bf16.h" 723 first + 4442 "01011100" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 4443 "00000000" // /* MW 5 */ + 4444 "01010000" // /* MW 4 */ + 4445 "01100000" // /* MW 3 */ + 4446 "00000000" // /* MW 2 */ + 4447 "10110000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 708 12 first +.delay_slot + 4448 "00011000" // VST.FLUSH.512.CONV [p2, sf, r26] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4449 "00000011" // /* MW 3 */ + 4450 "00000000" // /* MW 2 */ + 4451 "00001001" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 first +.delay_slot + 4452 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4453 "00000011" // /* MW 3 */ + 4454 "00000000" // /* MW 2 */ + 4455 "00001011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4457 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4459 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams__end +.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_end0 + 4461 "00000000" // /* MW 1 */ +.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_begin0 +.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params +.function conv2d_bf16<(unsigned char)'\x01', (act_t)0, bfloat16, bfloat16, bfloat16, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::async, adf::addressing::linear, adf::margin<0U> >, false, false, true, false> _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 2 "conv2d_bf16.h" 1836 +.src_ref 2 "conv2d_bf16.h" 1836 first +.src_ref 2 "conv2d_bf16.h" 1836 first +.src_ref 2 "conv2d_bf16_params.h" 240 68 +.function_start + 4464 "01111110" // MOVA m0, #-81; PADDB [p3], #64; MOVS p4, p2; PADDXM [sp], #128 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 4465 "01100000" // /* MW 13 */ + 4466 "00010001" // /* MW 12 */ + 4467 "10010001" // /* MW 11 */ + 4468 "00001110" // /* MW 10 */ + 4469 "00000000" // /* MW 9 */ + 4470 "00000000" // /* MW 8 */ + 4471 "10000000" // /* MW 7 */ + 4472 "00000000" // /* MW 6 */ + 4473 "00100000" // /* MW 5 */ + 4474 "00111111" // /* MW 4 */ + 4475 "10000110" // /* MW 3 */ + 4476 "11100000" // /* MW 2 */ + 4477 "11110101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1836 +.src_ref 2 "conv2d_bf16_params.h" 241 95 +.src_ref 2 "conv2d_bf16_params.h" 242 153 +.src_ref 2 "conv2d_bf16_params.h" 250 129 + 4478 "01110110" // MOVA r19, #3; ST r12, [sp, #-16]; MOVX r28, #-24; MOV r17, p3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4479 "01111000" // /* MW 11 */ + 4480 "01100000" // /* MW 10 */ + 4481 "00101011" // /* MW 9 */ + 4482 "00001010" // /* MW 8 */ + 4483 "11000101" // /* MW 7 */ + 4484 "10111111" // /* MW 6 */ + 4485 "10010101" // /* MW 5 */ + 4486 "11110001" // /* MW 4 */ + 4487 "00000111" // /* MW 3 */ + 4488 "01110011" // /* MW 2 */ + 4489 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 866 21 +.src_ref 2 "conv2d_bf16.h" 876 12 +.src_ref 2 "conv2d_bf16.h" 876 51 +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16.h" 1836 +.src_ref 2 "conv2d_bf16_params.h" 242 41 +.src_ref 2 "conv2d_bf16_params.h" 242 54 +.src_ref 2 "conv2d_bf16_params.h" 242 94 +.src_ref 2 "conv2d_bf16_params.h" 242 100 +.src_ref 2 "conv2d_bf16_params.h" 242 153 +.src_ref 2 "conv2d_bf16_params.h" 243 80 +.src_ref 2 "conv2d_bf16_params.h" 245 28 +.src_ref 2 "conv2d_bf16_params.h" 250 106 +.src_ref 2 "conv2d_bf16_params.h" 250 133 + 4490 "01110110" // MOVA r25, #0; ST r17, [sp, #-40]; MOVX r17, #1; ADD.NC p2, r17, #28 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4491 "00001000" // /* MW 11 */ + 4492 "01000111" // /* MW 10 */ + 4493 "00110100" // /* MW 9 */ + 4494 "00101001" // /* MW 8 */ + 4495 "00010000" // /* MW 7 */ + 4496 "10000001" // /* MW 6 */ + 4497 "00110101" // /* MW 5 */ + 4498 "11011010" // /* MW 4 */ + 4499 "00000111" // /* MW 3 */ + 4500 "00011001" // /* MW 2 */ + 4501 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 240 68 +.src_ref 2 "conv2d_bf16_params.h" 240 68 first + 4502 "01110110" // LDA r18, [p2]; ST r9, [sp, #-12]; MOVXM r29, #16777216 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4503 "00010000" // /* MW 11 */ + 4504 "00000000" // /* MW 10 */ + 4505 "10101000" // /* MW 9 */ + 4506 "00000011" // /* MW 8 */ + 4507 "01000000" // /* MW 7 */ + 4508 "10000000" // /* MW 6 */ + 4509 "00110101" // /* MW 5 */ + 4510 "11110101" // /* MW 4 */ + 4511 "11010111" // /* MW 3 */ + 4512 "11001010" // /* MW 2 */ + 4513 "01000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 171 +.src_ref 2 "conv2d_bf16_params.h" 245 20 + 4514 "01110110" // MOVA m6, #88; ST r14, [sp, #-4]; MOVXM r31, #33554431 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4515 "10010000" // /* MW 11 */ + 4516 "11111111" // /* MW 10 */ + 4517 "11101111" // /* MW 9 */ + 4518 "11111111" // /* MW 8 */ + 4519 "01111111" // /* MW 7 */ + 4520 "10000000" // /* MW 6 */ + 4521 "11010101" // /* MW 5 */ + 4522 "11111101" // /* MW 4 */ + 4523 "10000111" // /* MW 3 */ + 4524 "00011000" // /* MW 2 */ + 4525 "00001011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 39 +.src_ref 2 "conv2d_bf16_params.h" 244 82 +.src_ref 2 "conv2d_bf16_params.h" 244 156 +.src_ref 2 "conv2d_bf16_params.h" 249 87 + 4526 "01110110" // MOVA r20, #5; ST r13, [sp, #-32]; MOVX r22, #8; MOV m4, #-20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4527 "01011000" // /* MW 11 */ + 4528 "11101100" // /* MW 10 */ + 4529 "00000111" // /* MW 9 */ + 4530 "00001010" // /* MW 8 */ + 4531 "01100001" // /* MW 7 */ + 4532 "10000001" // /* MW 6 */ + 4533 "10110101" // /* MW 5 */ + 4534 "11100001" // /* MW 4 */ + 4535 "00000111" // /* MW 3 */ + 4536 "10110100" // /* MW 2 */ + 4537 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 243 39 +.src_ref 2 "conv2d_bf16_params.h" 244 87 +.src_ref 2 "conv2d_bf16_params.h" 250 71 + 4538 "01110110" // MOVA r21, #12; ST r15, [sp, #-20]; MOVX r23, #254; MOV m5, #-60 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4539 "01011000" // /* MW 11 */ + 4540 "11000100" // /* MW 10 */ + 4541 "10000111" // /* MW 9 */ + 4542 "11001010" // /* MW 8 */ + 4543 "01110111" // /* MW 7 */ + 4544 "10000111" // /* MW 6 */ + 4545 "11110101" // /* MW 5 */ + 4546 "11101101" // /* MW 4 */ + 4547 "00000111" // /* MW 3 */ + 4548 "10010101" // /* MW 2 */ + 4549 "00000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 44 + 4550 "00000010" // ST p7, [sp, #-8]; MOV m7, #64 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4551 "01010000" // /* MW 7 */ + 4552 "01000000" // /* MW 6 */ + 4553 "10000000" // /* MW 5 */ + 4554 "00000011" // /* MW 4 */ + 4555 "10110000" // /* MW 3 */ + 4556 "01110011" // /* MW 2 */ + 4557 "11111111" // /* MW 1 */ + 4558 "10011000" // ST lr, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4559 "00111101" // /* MW 3 */ + 4560 "11100100" // /* MW 2 */ + 4561 "00001111" // /* MW 1 */ + 4562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4563 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 240 68 + 4564 "10011000" // ADD r12, r29, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4565 "00100000" // /* MW 3 */ + 4566 "01011001" // /* MW 2 */ + 4567 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 240 68 +.src_ref 2 "conv2d_bf16_params.h" 241 95 first + 4568 "01011100" // ST r12, [p2], m0; LSHL r29, r12, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4569 "10011011" // /* MW 5 */ + 4570 "01110111" // /* MW 4 */ + 4571 "00110110" // /* MW 3 */ + 4572 "00110010" // /* MW 2 */ + 4573 "01000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 54 first +.src_ref 2 "conv2d_bf16_params.h" 242 94 first + 4574 "00101100" // LDA.u8 r30, [p2], #-3; EQ r28, r29, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4575 "00101111" // /* MW 5 */ + 4576 "11110010" // /* MW 4 */ + 4577 "01011110" // /* MW 3 */ + 4578 "11111001" // /* MW 2 */ + 4579 "01011011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 245 20 first + 4580 "10011000" // LDA.u8 r9, [p2], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4581 "00101010" // /* MW 3 */ + 4582 "11001001" // /* MW 2 */ + 4583 "00000010" // /* MW 1 */ + 4584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4585 "00000000" // /* MW 1 */ + 4586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4587 "00000000" // /* MW 1 */ + 4588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4589 "00000000" // /* MW 1 */ + 4590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4591 "00000000" // /* MW 1 */ + 4592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4593 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 174 first + 4594 "10011000" // LTU r27, r29, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4595 "11101100" // /* MW 3 */ + 4596 "01110111" // /* MW 2 */ + 4597 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 153 + 4598 "00011000" // SEL.EQZ r14, r25, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4599 "00110010" // /* MW 3 */ + 4600 "01011101" // /* MW 2 */ + 4601 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 171 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4602 "10011000" // LTU r27, r31, r12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4603 "11001100" // /* MW 3 */ + 4604 "11110110" // /* MW 2 */ + 4605 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 241 95 first +.src_ref 2 "conv2d_bf16_params.h" 242 39 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4606 "00101100" // ST.s8 r28, [p2], m4; EQ r13, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4607 "11001111" // /* MW 5 */ + 4608 "10110111" // /* MW 4 */ + 4609 "11101110" // /* MW 3 */ + 4610 "01110000" // /* MW 2 */ + 4611 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 100 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4612 "10011000" // LSHL r31, r13, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4613 "00011101" // /* MW 3 */ + 4614 "01111111" // /* MW 2 */ + 4615 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 153 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4616 "00011000" // SEL.EQZ r12, r25, r14, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4617 "11100010" // /* MW 3 */ + 4618 "01011000" // /* MW 2 */ + 4619 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 98 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4620 "10011000" // OR r28, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4621 "11000101" // /* MW 3 */ + 4622 "11111001" // /* MW 2 */ + 4623 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 54 +.src_ref 2 "conv2d_bf16_params.h" 242 151 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4624 "10100100" // LTU r27, r17, r30; ADD.NC r28, r28, r12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4625 "01100010" // /* MW 5 */ + 4626 "00111100" // /* MW 4 */ + 4627 "10011110" // /* MW 3 */ + 4628 "11111101" // /* MW 2 */ + 4629 "10001110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 41 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4630 "00011000" // SEL.EQZ r28, r25, r28, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4631 "11000010" // /* MW 3 */ + 4632 "01111001" // /* MW 2 */ + 4633 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 243 80 first + 4634 "10011000" // LTU r31, r17, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4635 "11001100" // /* MW 3 */ + 4636 "01111111" // /* MW 2 */ + 4637 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 117 first +.src_ref 2 "conv2d_bf16_params.h" 243 39 + 4638 "01011100" // ST r31, [p2], m5; NE r29, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4639 "11010001" // /* MW 5 */ + 4640 "11110111" // /* MW 4 */ + 4641 "00111110" // /* MW 3 */ + 4642 "01111110" // /* MW 2 */ + 4643 "01010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 44 first +.src_ref 2 "conv2d_bf16_params.h" 245 28 first + 4644 "00101100" // LDA.u8 r30, [p2], m7; NE r12, r9, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4645 "00110001" // /* MW 5 */ + 4646 "10110010" // /* MW 4 */ + 4647 "01010100" // /* MW 3 */ + 4648 "01111001" // /* MW 2 */ + 4649 "01011101" // /* MW 1 */ + 4650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4651 "00000000" // /* MW 1 */ + 4652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4653 "00000000" // /* MW 1 */ + 4654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4655 "00000000" // /* MW 1 */ + 4656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4657 "00000000" // /* MW 1 */ + 4658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4659 "00000000" // /* MW 1 */ + 4660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4661 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 82 +.src_ref 2 "conv2d_bf16_params.h" 244 87 + 4662 "00100100" // NE r22, r30, r22; ADD.NC r31, r30, #-4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4663 "11111100" // /* MW 5 */ + 4664 "10111110" // /* MW 4 */ + 4665 "00011111" // /* MW 3 */ + 4666 "10101101" // /* MW 2 */ + 4667 "11110101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 245 33 + 4668 "10000100" // JNZ r12, #4736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4736 delay_slots=5 */ + 4669 "00000001" // /* MW 5 */ + 4670 "01000000" // /* MW 4 */ + 4671 "01000000" // /* MW 3 */ + 4672 "00001001" // /* MW 2 */ + 4673 "01100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 156 +.delay_slot + 4674 "10011000" // NE r9, r30, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4675 "01001000" // /* MW 3 */ + 4676 "10010011" // /* MW 2 */ + 4677 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 87 +.delay_slot + 4678 "00011000" // EXTEND.u8 r31, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4679 "10010000" // /* MW 3 */ + 4680 "11111110" // /* MW 2 */ + 4681 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 87 +.delay_slot + 4682 "10011000" // AND r22, r9, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4683 "01100100" // /* MW 3 */ + 4684 "01101101" // /* MW 2 */ + 4685 "00010010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 87 +.delay_slot + 4686 "10011000" // LTU r23, r31, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4687 "01111100" // /* MW 3 */ + 4688 "11101111" // /* MW 2 */ + 4689 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 132 +.delay_slot + 4690 "10011000" // AND r16, r23, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4691 "01100100" // /* MW 3 */ + 4692 "11100001" // /* MW 2 */ + 4693 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 245 33 + 4694 "10000100" // JNZ r29, #4736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4736 delay_slots=5 */ + 4695 "00000001" // /* MW 5 */ + 4696 "01000000" // /* MW 4 */ + 4697 "01000000" // /* MW 3 */ + 4698 "00001001" // /* MW 2 */ + 4699 "11101000" // /* MW 1 */ +.delay_slot + 4700 "10011000" // ST p6, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4701 "00011101" // /* MW 3 */ + 4702 "11101011" // /* MW 2 */ + 4703 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4704 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4705 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4707 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4709 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4711 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 +.src_ref 2 "conv2d_bf16.h" 876 51 + 4712 "10111010" // MOVA r27, #1; J #4784 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=4784 delay_slots=5 */ + 4713 "00100000" // /* MW 9 */ + 4714 "00000000" // /* MW 8 */ + 4715 "00000000" // /* MW 7 */ + 4716 "01010110" // /* MW 6 */ + 4717 "00000010" // /* MW 5 */ + 4718 "00000000" // /* MW 4 */ + 4719 "00000000" // /* MW 3 */ + 4720 "00111011" // /* MW 2 */ + 4721 "00000000" // /* MW 1 */ +.delay_slot + 4722 "11111000" // MOV el0, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4723 "10011100" // /* MW 3 */ + 4724 "00011001" // /* MW 2 */ + 4725 "00011000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1849 12 +.delay_slot + 4726 "00011000" // MOVX r19, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4727 "00000101" // /* MW 3 */ + 4728 "00100110" // /* MW 2 */ + 4729 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4731 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4732 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4733 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4735 "00000000" // /* MW 1 */ +.label __ll6__Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params +.src_ref 2 "conv2d_bf16_params.h" 250 71 first +.src_ref 2 "conv2d_bf16_params.h" 250 101 + 4736 "01110110" // MOVA r21, #4; ST p6, [sp, #-24]; EQ r27, r21, r30; MOV el0, r25 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4737 "01111000" // /* MW 11 */ + 4738 "11001110" // /* MW 10 */ + 4739 "00001100" // /* MW 9 */ + 4740 "00111100" // /* MW 8 */ + 4741 "10111111" // /* MW 7 */ + 4742 "10101011" // /* MW 6 */ + 4743 "00011101" // /* MW 5 */ + 4744 "11101011" // /* MW 4 */ + 4745 "00000111" // /* MW 3 */ + 4746 "10010101" // /* MW 2 */ + 4747 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 101 + 4748 "10011000" // LSHL r21, r30, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4749 "01011101" // /* MW 3 */ + 4750 "10101011" // /* MW 2 */ + 4751 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 106 + 4752 "00011000" // SEL.EQZ r21, r21, r25, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4753 "10010010" // /* MW 3 */ + 4754 "01101011" // /* MW 2 */ + 4755 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 129 + 4756 "10011000" // EQ r27, r19, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4757 "11100111" // /* MW 3 */ + 4758 "11110111" // /* MW 2 */ + 4759 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 106 +.src_ref 2 "conv2d_bf16_params.h" 250 133 + 4760 "11100100" // SEL.EQZ r19, r21, r25, r27; MOV r27, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4761 "01000001" // /* MW 5 */ + 4762 "10110000" // /* MW 4 */ + 4763 "01001101" // /* MW 3 */ + 4764 "11110010" // /* MW 2 */ + 4765 "10101100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 133 + 4766 "00011000" // SEL.EQZ r19, r25, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4767 "00110010" // /* MW 3 */ + 4768 "01100111" // /* MW 2 */ + 4769 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 249 87 first + 4770 "10011000" // AND r20, r28, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4771 "01000100" // /* MW 3 */ + 4772 "00101001" // /* MW 2 */ + 4773 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 249 87 + 4774 "00011000" // NEZ r27, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4775 "11110000" // /* MW 3 */ + 4776 "00110110" // /* MW 2 */ + 4777 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 152 first + 4778 "00101100" // NOPA; OR r19, r19, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4779 "10001011" // /* MW 5 */ + 4780 "11001111" // /* MW 4 */ + 4781 "11111001" // /* MW 3 */ + 4782 "00101100" // /* MW 2 */ + 4783 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_320 +.src_ref 2 "conv2d_bf16_params.h" 258 8 first + 4784 "01110110" // MOVA m4, #12; ST r27, [p2], #24; JNZ r29, #4832 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4832 delay_slots=5 */ + 4785 "01100000" // /* MW 11 */ + 4786 "00000000" // /* MW 10 */ + 4787 "00010000" // /* MW 9 */ + 4788 "01011100" // /* MW 8 */ + 4789 "00000010" // /* MW 7 */ + 4790 "10111010" // /* MW 6 */ + 4791 "01110001" // /* MW 5 */ + 4792 "01101111" // /* MW 4 */ + 4793 "10000010" // /* MW 3 */ + 4794 "10010000" // /* MW 2 */ + 4795 "00000001" // /* MW 1 */ +.delay_slot + 4796 "00011000" // ST.s8 r19, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4797 "01100111" // /* MW 3 */ + 4798 "10001010" // /* MW 2 */ + 4799 "00000010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4801 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4803 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4805 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4807 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 259 71 + 4808 "01000100" // MOVXM r20, #16777215 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4809 "11111110" // /* MW 5 */ + 4810 "00111111" // /* MW 4 */ + 4811 "11111010" // /* MW 3 */ + 4812 "11111111" // /* MW 2 */ + 4813 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 259 71 first + 4814 "10011000" // AND r18, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4815 "01000100" // /* MW 3 */ + 4816 "10100101" // /* MW 2 */ + 4817 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 259 71 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4818 "00101110" // NOPA; ST r18, [p3, #28]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 4819 "00011100" // /* MW 13 */ + 4820 "00000000" // /* MW 12 */ + 4821 "00000000" // /* MW 11 */ + 4822 "01010111" // /* MW 10 */ + 4823 "00011010" // /* MW 9 */ + 4824 "01000000" // /* MW 8 */ + 4825 "00000000" // /* MW 7 */ + 4826 "00000000" // /* MW 6 */ + 4827 "10100011" // /* MW 5 */ + 4828 "11101100" // /* MW 4 */ + 4829 "11110110" // /* MW 3 */ + 4830 "00101100" // /* MW 2 */ + 4831 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_368 +.src_ref 2 "conv2d_bf16.h" 1841 65 first +.src_ref 2 "conv2d_bf16.h" 1849 4 +.src_ref 2 "conv2d_bf16.h" 1849 12 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4832 "10111010" // LDA r20, [p2], #-32; EXTEND.u8 r20, r19; MOV r22, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4833 "01011000" // /* MW 9 */ + 4834 "11111101" // /* MW 8 */ + 4835 "11001111" // /* MW 7 */ + 4836 "10000010" // /* MW 6 */ + 4837 "01000100" // /* MW 5 */ + 4838 "00100111" // /* MW 4 */ + 4839 "11010000" // /* MW 3 */ + 4840 "11010010" // /* MW 2 */ + 4841 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16.h" 1841 34 +.src_ref 2 "conv2d_bf16.h" 1842 36 +.src_ref 2 "conv2d_bf16.h" 1842 67 +.src_ref 2 "conv2d_bf16.h" 1842 75 +.src_ref 2 "conv2d_bf16.h" 1845 31 +.src_ref 2 "conv2d_bf16.h" 1849 4 +.src_ref 2 "conv2d_bf16_params.h" 243 80 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4842 "10111010" // MOVA r18, #2; ADD r21, r20, #-1; MOV m4, #36 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4843 "01011000" // /* MW 9 */ + 4844 "00100100" // /* MW 8 */ + 4845 "00000000" // /* MW 7 */ + 4846 "11111010" // /* MW 6 */ + 4847 "01011111" // /* MW 5 */ + 4848 "00101001" // /* MW 4 */ + 4849 "00000000" // /* MW 3 */ + 4850 "01010010" // /* MW 2 */ + 4851 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1842 67 first +.src_ref 2 "conv2d_bf16.h" 1842 106 +.src_ref 2 "conv2d_bf16.h" 1849 4 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4852 "01110110" // LDA r22, [p2], m4; ST el0, [sp, #-48]; AND r22, r21, r22; MOV m4, #-52 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4853 "01011000" // /* MW 11 */ + 4854 "11001100" // /* MW 10 */ + 4855 "00000111" // /* MW 9 */ + 4856 "00100110" // /* MW 8 */ + 4857 "01101011" // /* MW 7 */ + 4858 "10101011" // /* MW 6 */ + 4859 "00101101" // /* MW 5 */ + 4860 "11010000" // /* MW 4 */ + 4861 "11010111" // /* MW 3 */ + 4862 "01011010" // /* MW 2 */ + 4863 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 862 52 +.src_ref 2 "conv2d_bf16.h" 1842 106 +.src_ref 2 "conv2d_bf16.h" 1845 80 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4864 "01110110" // LDA r23, [p2], m4; ST r22, [sp, #-36]; MOVX r19, #-1; MOV m4, #196 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4865 "01011000" // /* MW 11 */ + 4866 "11000100" // /* MW 10 */ + 4867 "00000000" // /* MW 9 */ + 4868 "11101010" // /* MW 8 */ + 4869 "00110111" // /* MW 7 */ + 4870 "10111111" // /* MW 6 */ + 4871 "11010101" // /* MW 5 */ + 4872 "11011110" // /* MW 4 */ + 4873 "11010111" // /* MW 3 */ + 4874 "01011110" // /* MW 2 */ + 4875 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1845 63 first + 4876 "10011000" // LDA r29, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4877 "10110110" // /* MW 3 */ + 4878 "11111111" // /* MW 2 */ + 4879 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 862 52 first + 4880 "10011000" // LDA r31, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4881 "11110110" // /* MW 3 */ + 4882 "10001011" // /* MW 2 */ + 4883 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 4884 "10011000" // LDA r21, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4885 "10110110" // /* MW 3 */ + 4886 "00000110" // /* MW 2 */ + 4887 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 2 "conv2d_bf16.h" 1841 34 first + 4888 "00101100" // LDA r20, [p0]; LSHL r9, r20, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4889 "01011011" // /* MW 5 */ + 4890 "00100110" // /* MW 4 */ + 4891 "11011010" // /* MW 3 */ + 4892 "11010010" // /* MW 2 */ + 4893 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 4894 "10011000" // LDA r30, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4895 "11010110" // /* MW 3 */ + 4896 "00000111" // /* MW 2 */ + 4897 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1842 36 first + 4898 "10011000" // LSHL r22, r22, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4899 "00101101" // /* MW 3 */ + 4900 "10101101" // /* MW 2 */ + 4901 "00010101" // /* MW 1 */ + 4902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4903 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1845 80 first + 4904 "10011000" // ASHL r19, r29, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4905 "00111110" // /* MW 3 */ + 4906 "01100111" // /* MW 2 */ + 4907 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 866 21 first + 4908 "10011000" // NE r17, r31, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4909 "00011000" // /* MW 3 */ + 4910 "11100011" // /* MW 2 */ + 4911 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 866 12 + 4912 "10000100" // JNZ r17, #5024 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5024 delay_slots=5 */ + 4913 "00000001" // /* MW 5 */ + 4914 "01000000" // /* MW 4 */ + 4915 "11010000" // /* MW 3 */ + 4916 "00001001" // /* MW 2 */ + 4917 "10001000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1842 36 first +.src_ref 2 "conv2d_bf16.h" 1842 75 first +.delay_slot + 4918 "10100100" // LSHL r22, r23, r18; ADD.NC r21, r21, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4919 "10110010" // /* MW 5 */ + 4920 "10110101" // /* MW 4 */ + 4921 "10111010" // /* MW 3 */ + 4922 "10100101" // /* MW 2 */ + 4923 "10111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1842 75 +.src_ref 2 "conv2d_bf16.h" 1845 31 first +.delay_slot + 4924 "10100100" // LSHL r21, r19, r18; ADD.NC dn0, r21, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4925 "10110010" // /* MW 5 */ + 4926 "10010101" // /* MW 4 */ + 4927 "10110000" // /* MW 3 */ + 4928 "01100101" // /* MW 2 */ + 4929 "10011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1841 34 first +.delay_slot + 4930 "00000010" // ST dn0, [sp, #-44]; ADD.NC r14, r9, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4931 "10100000" // /* MW 7 */ + 4932 "01101000" // /* MW 6 */ + 4933 "11001010" // /* MW 5 */ + 4934 "00000001" // /* MW 4 */ + 4935 "10110000" // /* MW 3 */ + 4936 "10000100" // /* MW 2 */ + 4937 "11111010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16.h" 885 4 +.delay_slot + 4938 "11111000" // MOV r15, dn0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4939 "10000000" // /* MW 3 */ + 4940 "11010000" // /* MW 2 */ + 4941 "00011011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1845 31 first +.delay_slot + 4942 "01011000" // ADD.NC p6, r21, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4943 "11111001" // /* MW 3 */ + 4944 "01101010" // /* MW 2 */ + 4945 "00011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 + 4946 "01000100" // MOVXM p7, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4947 "11010000" // /* MW 5 */ + 4948 "11001000" // /* MW 4 */ + 4949 "11001110" // /* MW 3 */ + 4950 "00000111" // /* MW 2 */ + 4951 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.src_ref 2 "conv2d_bf16.h" 867 18 first + 4952 "00101100" // LDA.s8 r17, [p7]; MOVX vaddSign0, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4953 "10000000" // /* MW 5 */ + 4954 "10110100" // /* MW 4 */ + 4955 "01010000" // /* MW 3 */ + 4956 "11000100" // /* MW 2 */ + 4957 "11100000" // /* MW 1 */ + 4958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4959 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 + 4960 "01000100" // MOVXM r20, #-8454144 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4961 "00000000" // /* MW 5 */ + 4962 "00100000" // /* MW 4 */ + 4963 "00001010" // /* MW 3 */ + 4964 "01111111" // /* MW 2 */ + 4965 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 + 4966 "01111000" // VINSERT.32 x0, x0, #0, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4967 "10010001" // /* MW 3 */ + 4968 "00000010" // /* MW 2 */ + 4969 "00011000" // /* MW 1 */ + 4970 "11111000" // MOV r20, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4971 "11100000" // /* MW 3 */ + 4972 "00010101" // /* MW 2 */ + 4973 "00011101" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 4974 "00011000" // ADD.NC p7, r20, #-66 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4975 "01011111" // /* MW 3 */ + 4976 "01101010" // /* MW 2 */ + 4977 "00011111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.src_ref 2 "conv2d_bf16.h" 867 18 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 4978 "11010100" // ST.s16 r17, [p7]; VMOV bmll0, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4979 "00100101" // /* MW 5 */ + 4980 "00000001" // /* MW 4 */ + 4981 "11100000" // /* MW 3 */ + 4982 "11000110" // /* MW 2 */ + 4983 "11100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4984 "00011000" // MOVX crRnd, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4985 "10000000" // /* MW 3 */ + 4986 "01111010" // /* MW 2 */ + 4987 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4988 "00011000" // VCONV.bf16.fp32 wl0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4989 "00010110" // /* MW 3 */ + 4990 "01000000" // /* MW 2 */ + 4991 "00001000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4993 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4994 "10111000" // VEXTRACT.16 r17, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4995 "00000001" // /* MW 3 */ + 4996 "01000001" // /* MW 2 */ + 4997 "00011100" // /* MW 1 */ + 4998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4999 "00000000" // /* MW 1 */ + 5000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5001 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 + 5002 "10011000" // LDA.s16 r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5003 "00110010" // /* MW 3 */ + 5004 "00000110" // /* MW 2 */ + 5005 "00000111" // /* MW 1 */ + 5006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5007 "00000000" // /* MW 1 */ + 5008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5009 "00000000" // /* MW 1 */ + 5010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5011 "00000000" // /* MW 1 */ + 5012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5013 "00000000" // /* MW 1 */ + 5014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5015 "00000000" // /* MW 1 */ + 5016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5017 "00000000" // /* MW 1 */ + 5018 "00001100" // NOPA; ST r17, [sp, #-48] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5019 "01101011" // /* MW 5 */ + 5020 "10100100" // /* MW 4 */ + 5021 "11111111" // /* MW 3 */ + 5022 "00101100" // /* MW 2 */ + 5023 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_560 +.src_ref 2 "conv2d_bf16.h" 881 76 +.src_ref 2 "conv2d_bf16.h" 883 4 +.src_ref 2 "conv2d_bf16.h" 884 4 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 5024 "01110110" // MOVA m4, #92; MOVS p1, r14; MOVXM p3, #509032 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5025 "00010000" // /* MW 11 */ + 5026 "00110100" // /* MW 10 */ + 5027 "10110010" // /* MW 9 */ + 5028 "11110001" // /* MW 8 */ + 5029 "00000001" // /* MW 7 */ + 5030 "00000000" // /* MW 6 */ + 5031 "00001011" // /* MW 5 */ + 5032 "10001110" // /* MW 4 */ + 5033 "10000001" // /* MW 3 */ + 5034 "10010000" // /* MW 2 */ + 5035 "00001011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 first +.src_ref 2 "conv2d_bf16.h" 876 51 first +.src_ref 2 "conv2d_bf16.h" 881 76 first +.src_ref 2 "conv2d_bf16.h" 883 4 +.src_ref 2 "conv2d_bf16.h" 884 4 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 5036 "01110110" // LDA.u8 r17, [p2], m4; MOVS p0, p1; SEL.EQZ r17, r25, r19, r27; MOV r19, #11 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5037 "01011000" // /* MW 11 */ + 5038 "00001011" // /* MW 10 */ + 5039 "01101000" // /* MW 9 */ + 5040 "10010010" // /* MW 8 */ + 5041 "00011001" // /* MW 7 */ + 5042 "00110011" // /* MW 6 */ + 5043 "10001011" // /* MW 5 */ + 5044 "10000100" // /* MW 4 */ + 5045 "01010000" // /* MW 3 */ + 5046 "01000101" // /* MW 2 */ + 5047 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 44 +.src_ref 2 "conv2d_bf16_params.h" 243 80 +.src_ref 2 "conv2d_bf16_params.h" 243 80 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5048 "10111010" // MOVA r22, #780; LTU r27, r28, r18; MOV r13, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5049 "01111000" // /* MW 9 */ + 5050 "01100000" // /* MW 8 */ + 5051 "10101010" // /* MW 7 */ + 5052 "01100101" // /* MW 6 */ + 5053 "10111001" // /* MW 5 */ + 5054 "00111001" // /* MW 4 */ + 5055 "00000000" // /* MW 3 */ + 5056 "10010110" // /* MW 2 */ + 5057 "01100001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 883 4 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5058 "00011000" // ST.s8 r19, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5059 "01100111" // /* MW 3 */ + 5060 "00000110" // /* MW 2 */ + 5061 "00000011" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5063 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 884 4 first +.aggressive_scheduled_block_id 4 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 5064 "00000100" // JL #4192 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4192 delay_slots=5 */ + 5065 "00000001" // /* MW 5 */ + 5066 "00000000" // /* MW 4 */ + 5067 "00110000" // /* MW 3 */ + 5068 "00001000" // /* MW 2 */ + 5069 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 first +.delay_slot +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5070 "10011000" // LSHL r21, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5071 "00101101" // /* MW 3 */ + 5072 "01101011" // /* MW 2 */ + 5073 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 +.delay_slot + 5074 "01011000" // ADD.NC p7, r21, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5075 "11111001" // /* MW 3 */ + 5076 "01101010" // /* MW 2 */ + 5077 "00011111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 881 45 first +.delay_slot + 5078 "10011000" // SUB r17, r25, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5079 "00010001" // /* MW 3 */ + 5080 "01100011" // /* MW 2 */ + 5081 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16_params.h" 243 80 +.delay_slot + 5082 "01100100" // LSHL r17, r17, r18; MOV r20, #781 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5083 "00110101" // /* MW 5 */ + 5084 "00101100" // /* MW 4 */ + 5085 "10111010" // /* MW 3 */ + 5086 "01100101" // /* MW 2 */ + 5087 "10001100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16_params.h" 243 80 first +.delay_slot + 5088 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r9, r22, r20, r27; ADD.NC r12, r15, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5089 "00000000" // /* MW 15 */ + 5090 "00000000" // /* MW 14 */ + 5091 "10101000" // /* MW 13 */ + 5092 "11100010" // /* MW 12 */ + 5093 "10001011" // /* MW 11 */ + 5094 "00010001" // /* MW 10 */ + 5095 "10011010" // /* MW 9 */ + 5096 "00101100" // /* MW 8 */ + 5097 "01011011" // /* MW 7 */ + 5098 "00000001" // /* MW 6 */ + 5099 "00100000" // /* MW 5 */ + 5100 "00000000" // /* MW 4 */ + 5101 "11110000" // /* MW 3 */ + 5102 "00101100" // /* MW 2 */ + 5103 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 4 +.return_address + 5104 "00011000" // LDA p1, [sp, #-44] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5105 "10011001" // /* MW 3 */ + 5106 "11010100" // /* MW 2 */ + 5107 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 4 first +.no_stack_arguments + 5108 "00000100" // JL #4192 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4192 delay_slots=5 */ + 5109 "00000001" // /* MW 5 */ + 5110 "00000000" // /* MW 4 */ + 5111 "00110000" // /* MW 3 */ + 5112 "00001000" // /* MW 2 */ + 5113 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5115 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5117 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 44 +.delay_slot + 5118 "00011000" // ADD.NC r13, r13, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5119 "10010000" // /* MW 3 */ + 5120 "01010110" // /* MW 2 */ + 5121 "00011011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 4 +.delay_slot + 5122 "11111000" // MOV p2, r13 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5123 "10100000" // /* MW 3 */ + 5124 "01100110" // /* MW 2 */ + 5125 "00011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 4 +.delay_slot + 5126 "01111010" // NOPA; MOVS p0, r15; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5127 "00000000" // /* MW 9 */ + 5128 "00000000" // /* MW 8 */ + 5129 "00000000" // /* MW 7 */ + 5130 "00000000" // /* MW 6 */ + 5131 "00001011" // /* MW 5 */ + 5132 "10001111" // /* MW 4 */ + 5133 "11110000" // /* MW 3 */ + 5134 "00101100" // /* MW 2 */ + 5135 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 +.src_ref 2 "conv2d_bf16.h" 1115 26 +.src_ref 2 "conv2d_bf16.h" 1115 26 +.return_address + 5136 "10111010" // MOVA dj6, #-332; MOVX r19, #63; ADD.NC p4, r13, #-116 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5137 "00001000" // /* MW 9 */ + 5138 "01100011" // /* MW 8 */ + 5139 "00110011" // /* MW 7 */ + 5140 "11101010" // /* MW 6 */ + 5141 "00110111" // /* MW 5 */ + 5142 "00000001" // /* MW 4 */ + 5143 "10000000" // /* MW 3 */ + 5144 "10011010" // /* MW 2 */ + 5145 "11010110" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 886 4 +.src_ref 2 "conv2d_bf16.h" 896 23 first +.src_ref 2 "conv2d_bf16.h" 1123 71 + 5146 "00101100" // LDA dn0, [p4], #4; MOVX r13, #12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5147 "01100010" // /* MW 5 */ + 5148 "00110100" // /* MW 4 */ + 5149 "11010000" // /* MW 3 */ + 5150 "10000100" // /* MW 2 */ + 5151 "10000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5152 "10011000" // LDA dj0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5153 "01000110" // /* MW 3 */ + 5154 "00011100" // /* MW 2 */ + 5155 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5156 "10011000" // LDA dn4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5157 "00100110" // /* MW 3 */ + 5158 "00011110" // /* MW 2 */ + 5159 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5160 "10011000" // LDA dj4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5161 "01000110" // /* MW 3 */ + 5162 "00011110" // /* MW 2 */ + 5163 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5164 "10011000" // LDA m0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5165 "00000110" // /* MW 3 */ + 5166 "00011100" // /* MW 2 */ + 5167 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5168 "10011000" // LDA dc0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5169 "01100110" // /* MW 3 */ + 5170 "00011100" // /* MW 2 */ + 5171 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5172 "10011000" // LDA dc4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5173 "01100110" // /* MW 3 */ + 5174 "00011110" // /* MW 2 */ + 5175 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 first + 5176 "10011000" // LDA r22, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5177 "11010110" // /* MW 3 */ + 5178 "00011110" // /* MW 2 */ + 5179 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5180 "10011000" // LDA r17, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5181 "00110110" // /* MW 3 */ + 5182 "00011110" // /* MW 2 */ + 5183 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5184 "10011000" // LDA r28, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5185 "10010110" // /* MW 3 */ + 5186 "00011111" // /* MW 2 */ + 5187 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5188 "10011000" // LDA r21, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5189 "10110110" // /* MW 3 */ + 5190 "00011110" // /* MW 2 */ + 5191 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5192 "10011000" // LDA r23, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5193 "11110110" // /* MW 3 */ + 5194 "00011110" // /* MW 2 */ + 5195 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5196 "10011000" // LDA p3, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5197 "10011110" // /* MW 3 */ + 5198 "00011101" // /* MW 2 */ + 5199 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5200 "10011000" // LDA dn2, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5201 "00100110" // /* MW 3 */ + 5202 "00011101" // /* MW 2 */ + 5203 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 first + 5204 "10011000" // LDA dn1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5205 "10100110" // /* MW 3 */ + 5206 "00011100" // /* MW 2 */ + 5207 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5208 "10011000" // LDA dj1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5209 "11000110" // /* MW 3 */ + 5210 "00011100" // /* MW 2 */ + 5211 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5212 "10011000" // LDA dn5, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5213 "10100110" // /* MW 3 */ + 5214 "00011110" // /* MW 2 */ + 5215 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5216 "10011000" // LDA r30, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5217 "11010110" // /* MW 3 */ + 5218 "00011111" // /* MW 2 */ + 5219 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5220 "10011000" // LDA r29, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5221 "10110110" // /* MW 3 */ + 5222 "00011111" // /* MW 2 */ + 5223 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5224 "10011000" // LDA dc1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5225 "11100110" // /* MW 3 */ + 5226 "00011100" // /* MW 2 */ + 5227 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1115 26 first + 5228 "10011000" // LDA.u8 r18, [p4, dj6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5229 "01001010" // /* MW 3 */ + 5230 "11000010" // /* MW 2 */ + 5231 "00000100" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 + 5232 "00011000" // LDA r20, [sp, #-48] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5233 "10010001" // /* MW 3 */ + 5234 "11010010" // /* MW 2 */ + 5235 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 first + 5236 "10011000" // LDA r2, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5237 "01010110" // /* MW 3 */ + 5238 "00000100" // /* MW 2 */ + 5239 "00000100" // /* MW 1 */ + 5240 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5241 "00000000" // /* MW 1 */ + 5242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5243 "00000000" // /* MW 1 */ + 5244 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5245 "00000000" // /* MW 1 */ + 5246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5247 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1115 26 first + 5248 "10011000" // LTU r19, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5249 "00101100" // /* MW 3 */ + 5250 "11100111" // /* MW 2 */ + 5251 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1115 12 + 5252 "10000100" // JNZ r19, #6176 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6176 delay_slots=5 */ + 5253 "00000001" // /* MW 5 */ + 5254 "01000000" // /* MW 4 */ + 5255 "00010000" // /* MW 3 */ + 5256 "00001100" // /* MW 2 */ + 5257 "10011000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 886 4 +.delay_slot + 5258 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5259 "11010000" // /* MW 5 */ + 5260 "11001000" // /* MW 4 */ + 5261 "11000100" // /* MW 3 */ + 5262 "00000111" // /* MW 2 */ + 5263 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 886 4 first +.delay_slot + 5264 "00011000" // ST.s8 r13, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5265 "10100111" // /* MW 3 */ + 5266 "00000101" // /* MW 2 */ + 5267 "00000010" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first +.delay_slot + 5268 "11111000" // VBCST.16 x9, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5269 "01110010" // /* MW 3 */ + 5270 "11010001" // /* MW 2 */ + 5271 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5273 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5275 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1123 71 first + 5276 "10111010" // LDA p4, [sp, #-40]; EQ r27, r13, r18; MOV m7, #132 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5277 "01011000" // /* MW 9 */ + 5278 "10000100" // /* MW 8 */ + 5279 "10000000" // /* MW 7 */ + 5280 "00111111" // /* MW 6 */ + 5281 "10111001" // /* MW 5 */ + 5282 "00011011" // /* MW 4 */ + 5283 "00100000" // /* MW 3 */ + 5284 "01000011" // /* MW 2 */ + 5285 "11111011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1125 16 +.src_ref 2 "conv2d_bf16.h" 1154 80 + 5286 "10111010" // MOVA r19, #0; MOVX r18, #-128; MOV m4, #60 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5287 "01011000" // /* MW 9 */ + 5288 "00111100" // /* MW 8 */ + 5289 "00000000" // /* MW 7 */ + 5290 "00001010" // /* MW 6 */ + 5291 "00100000" // /* MW 5 */ + 5292 "00111101" // /* MW 4 */ + 5293 "00000000" // /* MW 3 */ + 5294 "00010011" // /* MW 2 */ + 5295 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1125 16 + 5296 "10111010" // MOVA m5, #-64; MOVX r26, #0; MOV dc7, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5297 "01111000" // /* MW 9 */ + 5298 "11010000" // /* MW 8 */ + 5299 "11100100" // /* MW 7 */ + 5300 "00001011" // /* MW 6 */ + 5301 "10100000" // /* MW 5 */ + 5302 "00000001" // /* MW 4 */ + 5303 "10000000" // /* MW 3 */ + 5304 "00010100" // /* MW 2 */ + 5305 "11111000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 746 83 + 5306 "01110110" // MOVA m6, #-132; MOVS dc2, dc7; MOVX crRnd, r13; MOV dn3, dc7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5307 "01111000" // /* MW 11 */ + 5308 "11000000" // /* MW 10 */ + 5309 "10100111" // /* MW 9 */ + 5310 "00000001" // /* MW 8 */ + 5311 "11010100" // /* MW 7 */ + 5312 "00011011" // /* MW 6 */ + 5313 "01001011" // /* MW 5 */ + 5314 "00011100" // /* MW 4 */ + 5315 "10000010" // /* MW 3 */ + 5316 "10011000" // /* MW 2 */ + 5317 "11101111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 2 "conv2d_bf16.h" 738 8 +.src_ref 2 "conv2d_bf16.h" 1187 40 +.src_ref 2 "conv2d_bf16.h" 1199 26 +.src_ref 2 "conv2d_bf16.h" 1200 26 +.src_ref 2 "conv2d_bf16.h" 1201 26 +.src_ref 2 "conv2d_bf16.h" 1202 26 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 5318 "01110110" // LDA r5, [sp, #-44]; MOVS dc6, dc7; MOVX r31, #60; MOV r15, #7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5319 "01011000" // /* MW 11 */ + 5320 "00000111" // /* MW 10 */ + 5321 "11101000" // /* MW 9 */ + 5322 "10001001" // /* MW 8 */ + 5323 "11110111" // /* MW 7 */ + 5324 "00000001" // /* MW 6 */ + 5325 "01001011" // /* MW 5 */ + 5326 "00011100" // /* MW 4 */ + 5327 "00100110" // /* MW 3 */ + 5328 "10010110" // /* MW 2 */ + 5329 "11111010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1143 12 +.src_ref 2 "conv2d_bf16.h" 1218 20 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 5330 "10111010" // LDA r18, [sp, #-36]; MOVXM p2, #5440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5331 "00010000" // /* MW 9 */ + 5332 "10100000" // /* MW 8 */ + 5333 "00110010" // /* MW 7 */ + 5334 "00000101" // /* MW 6 */ + 5335 "00000000" // /* MW 5 */ + 5336 "00000000" // /* MW 4 */ + 5337 "00100000" // /* MW 3 */ + 5338 "11001010" // /* MW 2 */ + 5339 "11111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 749 26 +.src_ref 2 "conv2d_bf16.h" 750 26 +.src_ref 2 "conv2d_bf16.h" 751 26 +.src_ref 2 "conv2d_bf16.h" 752 26 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5340 "10111010" // LDA r13, [sp, #-32]; SEL.EQZ r6, r26, r18, r27; MOV r20, #780 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5341 "01011000" // /* MW 9 */ + 5342 "00001100" // /* MW 8 */ + 5343 "10001011" // /* MW 7 */ + 5344 "00010010" // /* MW 6 */ + 5345 "01101001" // /* MW 5 */ + 5346 "00110100" // /* MW 4 */ + 5347 "00100000" // /* MW 3 */ + 5348 "00110110" // /* MW 2 */ + 5349 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 736 8 +.src_ref 2 "conv2d_bf16.h" 738 8 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1873 + 5350 "10110110" // LDA lr, [sp, #-28]; PADDB [p4], m7; MOVX r25, #0; MOV r24, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5351 "01011000" // /* MW 11 */ + 5352 "00000000" // /* MW 10 */ + 5353 "00001000" // /* MW 9 */ + 5354 "00001011" // /* MW 8 */ + 5355 "10010000" // /* MW 7 */ + 5356 "00000001" // /* MW 6 */ + 5357 "00100000" // /* MW 5 */ + 5358 "11010111" // /* MW 4 */ + 5359 "00101001" // /* MW 3 */ + 5360 "10000111" // /* MW 2 */ + 5361 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1125 16 first + 5362 "10011000" // LDA r0, [p4], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5363 "00010110" // /* MW 3 */ + 5364 "10001000" // /* MW 2 */ + 5365 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1125 16 + 5366 "10011000" // LDA dn6, [p4], m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5367 "00100110" // /* MW 3 */ + 5368 "10101011" // /* MW 2 */ + 5369 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1125 16 + 5370 "10011000" // LDA r27, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5371 "01110110" // /* MW 3 */ + 5372 "00101111" // /* MW 2 */ + 5373 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1149 80 first + 5374 "10011000" // LDA m5, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5375 "10000110" // /* MW 3 */ + 5376 "00011110" // /* MW 2 */ + 5377 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1154 80 first + 5378 "10011000" // LDA dj5, [p4], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5379 "11000110" // /* MW 3 */ + 5380 "10001010" // /* MW 2 */ + 5381 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 743 87 first + 5382 "10011000" // LDA m4, [p4], #-28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5383 "00000110" // /* MW 3 */ + 5384 "10011110" // /* MW 2 */ + 5385 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 745 83 first + 5386 "10011000" // LDA r1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5387 "00110110" // /* MW 3 */ + 5388 "00011100" // /* MW 2 */ + 5389 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 746 83 first +.src_ref 2 "conv2d_bf16.h" 1125 16 first + 5390 "10010100" // LDA r0, [p4], m6; ADD.NC dj6, r6, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5391 "00000010" // /* MW 5 */ + 5392 "00000110" // /* MW 4 */ + 5393 "11011101" // /* MW 3 */ + 5394 "00000010" // /* MW 2 */ + 5395 "10011001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1143 66 first + 5396 "10011000" // LDA r3, [p4, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5397 "01110110" // /* MW 3 */ + 5398 "00010100" // /* MW 2 */ + 5399 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1206 63 first + 5400 "10011000" // LDA r4, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5401 "10010110" // /* MW 3 */ + 5402 "00000100" // /* MW 2 */ + 5403 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1149 89 + 5404 "11111000" // MOV r7, m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5405 "00000000" // /* MW 3 */ + 5406 "11011010" // /* MW 2 */ + 5407 "00011001" // /* MW 1 */ + 5408 "01011000" // ADD.NC dj2, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5409 "10011001" // /* MW 3 */ + 5410 "10000011" // /* MW 2 */ + 5411 "00011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1154 89 + 5412 "11111000" // MOV r16, dj5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5413 "00000000" // /* MW 3 */ + 5414 "00011011" // /* MW 2 */ + 5415 "00011100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1125 16 first + 5416 "01011000" // ADD.NC m2, r27, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5417 "10011001" // /* MW 3 */ + 5418 "00001101" // /* MW 2 */ + 5419 "00011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1149 89 first + 5420 "00011000" // ADD.NC m6, r7, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5421 "11100000" // /* MW 3 */ + 5422 "00000011" // /* MW 2 */ + 5423 "00011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1154 89 first + 5424 "00100100" // ADD r3, r3, #-1; ADD.NC m7, r16, #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5425 "11000000" // /* MW 5 */ + 5426 "00010000" // /* MW 4 */ + 5427 "11101110" // /* MW 3 */ + 5428 "11111111" // /* MW 2 */ + 5429 "00011000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1287 37 + 5430 "10111010" // NOPA; NOPB; MOV m1, dj2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5431 "01111110" // /* MW 9 */ + 5432 "10000000" // /* MW 8 */ + 5433 "10000010" // /* MW 7 */ + 5434 "00000000" // /* MW 6 */ + 5435 "00010000" // /* MW 5 */ + 5436 "00000000" // /* MW 4 */ + 5437 "11110000" // /* MW 3 */ + 5438 "00101100" // /* MW 2 */ + 5439 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_976 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 736 8 +.src_ref 2 "conv2d_bf16.h" 738 8 +.src_ref 2 "conv2d_bf16.h" 1147 31 first +.src_ref 2 "conv2d_bf16.h" 1187 40 first +.loop_nesting 1 + 5440 "01110110" // VLDA.CONV.fp32.bf16 cml0, [p6], #64; MOVS p1, r5; LSHL r14, r2, r15; MOV p0, r14 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5441 "01111000" // /* MW 11 */ + 5442 "10010000" // /* MW 10 */ + 5443 "00110011" // /* MW 9 */ + 5444 "11101100" // /* MW 8 */ + 5445 "11100111" // /* MW 7 */ + 5446 "00000100" // /* MW 6 */ + 5447 "00001011" // /* MW 5 */ + 5448 "10000101" // /* MW 4 */ + 5449 "01110001" // /* MW 3 */ + 5450 "10000101" // /* MW 2 */ + 5451 "11000011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 1188 50 first + 5452 "11110110" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc3, dn3; ADD.NC p4, r14, r12 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5453 "10100000" // /* MW 11 */ + 5454 "10011000" // /* MW 10 */ + 5455 "00110011" // /* MW 9 */ + 5456 "00000010" // /* MW 8 */ + 5457 "01001011" // /* MW 7 */ + 5458 "00001110" // /* MW 6 */ + 5459 "00101011" // /* MW 5 */ + 5460 "00101000" // /* MW 4 */ + 5461 "01111000" // /* MW 3 */ + 5462 "10000001" // /* MW 2 */ + 5463 "00100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 742 30 first + 5464 "11110110" // VLDA.POP.576 ex7, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];MOVS dn7, dn6; MOV dj7, dj6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5465 "01110000" // /* MW 11 */ + 5466 "10000000" // /* MW 10 */ + 5467 "11000110" // /* MW 9 */ + 5468 "00000011" // /* MW 8 */ + 5469 "01001011" // /* MW 7 */ + 5470 "01011010" // /* MW 6 */ + 5471 "00101111" // /* MW 5 */ + 5472 "00101000" // /* MW 4 */ + 5473 "01111000" // /* MW 3 */ + 5474 "00111001" // /* MW 2 */ + 5475 "10100000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.src_ref 2 "conv2d_bf16.h" 1149 31 first + 5476 "11110110" // VLDA.CONV.fp32.bf16 cmh0, [p6], m6;VLDB.POP.576 ex6, [p0, lf0, r24];MOVS dn3, r19; MOV m3, m2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5477 "01110000" // /* MW 11 */ + 5478 "00000000" // /* MW 10 */ + 5479 "10000010" // /* MW 9 */ + 5480 "00000001" // /* MW 8 */ + 5481 "00001011" // /* MW 7 */ + 5482 "01010011" // /* MW 6 */ + 5483 "00101011" // /* MW 5 */ + 5484 "00000011" // /* MW 4 */ + 5485 "01110100" // /* MW 3 */ + 5486 "00001101" // /* MW 2 */ + 5487 "11011001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 743 30 first + 5488 "10111010" // VLDA.POP.576 ex8, [p1, lf1, r25, m4];VLDB.POP.576.3D ex11, [p0, lf0, r24, d0]; MOV dj3, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5489 "01011110" // /* MW 9 */ + 5490 "00000000" // /* MW 8 */ + 5491 "11000000" // /* MW 7 */ + 5492 "00000001" // /* MW 6 */ + 5493 "11010100" // /* MW 5 */ + 5494 "00010010" // /* MW 4 */ + 5495 "01110100" // /* MW 3 */ + 5496 "01000001" // /* MW 2 */ + 5497 "01110001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 1152 31 first +.src_ref 2 "conv2d_bf16.h" 1206 8 first + 5498 "10110110" // VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.FILL.512 [p0, lf0, r24]; MOVXM le, #5760 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5499 "00010000" // /* MW 11 */ + 5500 "01000000" // /* MW 10 */ + 5501 "10111011" // /* MW 9 */ + 5502 "00000101" // /* MW 8 */ + 5503 "00000000" // /* MW 7 */ + 5504 "00000000" // /* MW 6 */ + 5505 "00101000" // /* MW 5 */ + 5506 "00101000" // /* MW 4 */ + 5507 "01111000" // /* MW 3 */ + 5508 "10010101" // /* MW 2 */ + 5509 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 1154 31 first +.src_ref 2 "conv2d_bf16.h" 1206 8 + 5510 "10110110" // VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.FILL.512 [p0, lf0, r24]; MOVXM ls, #5712 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5511 "00010000" // /* MW 11 */ + 5512 "00101000" // /* MW 10 */ + 5513 "01111011" // /* MW 9 */ + 5514 "00000100" // /* MW 8 */ + 5515 "00000000" // /* MW 7 */ + 5516 "00000000" // /* MW 6 */ + 5517 "00101000" // /* MW 5 */ + 5518 "00101000" // /* MW 4 */ + 5519 "01111000" // /* MW 3 */ + 5520 "00011101" // /* MW 2 */ + 5521 "11011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 740 30 first + 5522 "00111100" // VLDA.CONV.fp32.bf16 cml3, [p4];VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5523 "00101000" // /* MW 5 */ + 5524 "00000001" // /* MW 4 */ + 5525 "01110100" // /* MW 3 */ + 5526 "10110101" // /* MW 2 */ + 5527 "10000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 1157 31 first + 5528 "00111100" // VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.POP.576.3D ex4, [p0, lf0, r24, d0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5529 "00101000" // /* MW 5 */ + 5530 "00100010" // /* MW 4 */ + 5531 "01111000" // /* MW 3 */ + 5532 "10100101" // /* MW 2 */ + 5533 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 1159 31 first + 5534 "00111100" // VLDA.CONV.fp32.bf16 cmh2, [p6], m6;VLDB.FILL.512 [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5535 "00101000" // /* MW 5 */ + 5536 "00101000" // /* MW 4 */ + 5537 "01111000" // /* MW 3 */ + 5538 "00101101" // /* MW 2 */ + 5539 "11011001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 738 8 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 5540 "00111100" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5541 "00101000" // /* MW 5 */ + 5542 "00101000" // /* MW 4 */ + 5543 "01111000" // /* MW 3 */ + 5544 "10000001" // /* MW 2 */ + 5545 "00100010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.src_ref 2 "conv2d_bf16.h" 1192 29 first +.aggressive_scheduled_block_id 6 +.noswbrkpt + 5546 "00111100" // VLDA.CONV.fp32.bf16 cmh3, [p4], #64;VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5547 "00101000" // /* MW 5 */ + 5548 "00000001" // /* MW 4 */ + 5549 "01110100" // /* MW 3 */ + 5550 "10111101" // /* MW 2 */ + 5551 "10000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 745 30 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5552 "10111010" // VLDA.CONV.fp32.bf16 cmh4, [p4];VLDB.POP.576.3D ex4, [p0, lf0, r24, d0]; VSHUFFLE ex10, ex6, ex11, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5553 "11101110" // /* MW 9 */ + 5554 "11000011" // /* MW 8 */ + 5555 "10011010" // /* MW 7 */ + 5556 "00000010" // /* MW 6 */ + 5557 "00010100" // /* MW 5 */ + 5558 "00010001" // /* MW 4 */ + 5559 "01110100" // /* MW 3 */ + 5560 "11001101" // /* MW 2 */ + 5561 "10000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 1162 81 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5562 "11110110" // VLDA.CONV.fp32.bf16 cml4, [p4];VLDB.FILL.512 [p0, lf0, r24];MOVS p4, p6; VSHUFFLE ex6, ex6, ex11, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5563 "11100000" // /* MW 11 */ + 5564 "11000001" // /* MW 10 */ + 5565 "10011010" // /* MW 9 */ + 5566 "00000001" // /* MW 8 */ + 5567 "10001011" // /* MW 7 */ + 5568 "10011000" // /* MW 6 */ + 5569 "00101100" // /* MW 5 */ + 5570 "00101000" // /* MW 4 */ + 5571 "01111000" // /* MW 3 */ + 5572 "11000101" // /* MW 2 */ + 5573 "10000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 749 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5574 "01001010" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex1, [p1, lf1, r25]; VMAC.f dm0, dm0, ex10, ex7, r9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5575 "11101001" // /* MW 9 */ + 5576 "00010100" // /* MW 8 */ + 5577 "01001000" // /* MW 7 */ + 5578 "00011101" // /* MW 6 */ + 5579 "01010100" // /* MW 5 */ + 5580 "00000000" // /* MW 4 */ + 5581 "01110011" // /* MW 3 */ + 5582 "10000001" // /* MW 2 */ + 5583 "00000010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.src_ref 2 "conv2d_bf16.h" 1286 32 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5584 "01101110" // VLDA.3D.CONV.fp32.bf16 cml3, [p6], d3; MOVS dn3, dn2; MOV dj3, dj5; VMAC.f dm1, dm1, ex6, ex7, r9 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5585 "11101001" // /* MW 13 */ + 5586 "00101100" // /* MW 12 */ + 5587 "01001001" // /* MW 11 */ + 5588 "00000111" // /* MW 10 */ + 5589 "01011000" // /* MW 9 */ + 5590 "01011100" // /* MW 8 */ + 5591 "00000000" // /* MW 7 */ + 5592 "00000000" // /* MW 6 */ + 5593 "10010110" // /* MW 5 */ + 5594 "10010100" // /* MW 4 */ + 5595 "01110110" // /* MW 3 */ + 5596 "00110101" // /* MW 2 */ + 5597 "11001111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 751 26 first +.src_ref 2 "conv2d_bf16.h" 1162 81 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5598 "01101110" // VLDA.CONV.fp32.bf16 cmh3, [p4, #64]; MOVS dc5, dc7; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm2, dm2, ex10, ex8, r9 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5599 "00001001" // /* MW 13 */ + 5600 "01010101" // /* MW 12 */ + 5601 "01001010" // /* MW 11 */ + 5602 "00111110" // /* MW 10 */ + 5603 "10010000" // /* MW 9 */ + 5604 "01001100" // /* MW 8 */ + 5605 "00000000" // /* MW 7 */ + 5606 "00000000" // /* MW 6 */ + 5607 "10010110" // /* MW 5 */ + 5608 "00111000" // /* MW 4 */ + 5609 "01111010" // /* MW 3 */ + 5610 "10111101" // /* MW 2 */ + 5611 "10000010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 1199 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5612 "01101110" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dn2, dc3; VSHUFFLE ex5, ex2, ex4, r0; VADD.f dm0, dm3, dm0, r31 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5613 "00111101" // /* MW 13 */ + 5614 "01100000" // /* MW 12 */ + 5615 "11111000" // /* MW 11 */ + 5616 "00011110" // /* MW 10 */ + 5617 "10010000" // /* MW 9 */ + 5618 "01010100" // /* MW 8 */ + 5619 "00000000" // /* MW 7 */ + 5620 "00000000" // /* MW 6 */ + 5621 "10010110" // /* MW 5 */ + 5622 "10011000" // /* MW 4 */ + 5623 "01110100" // /* MW 3 */ + 5624 "00000001" // /* MW 2 */ + 5625 "01110001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 1200 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5626 "01100010" // VLDA.FILL.512 [p1, lf1, r25]; VADD.f dm1, dm3, dm1, r31 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5627 "00111101" // /* MW 7 */ + 5628 "01100100" // /* MW 6 */ + 5629 "11111001" // /* MW 5 */ + 5630 "00000100" // /* MW 4 */ + 5631 "01110000" // /* MW 3 */ + 5632 "10000001" // /* MW 2 */ + 5633 "00100010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 1201 26 first +.aggressive_scheduled_block_id 6 +.noswbrkpt + 5634 "01100010" // VLDA.POP.576 ex1, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r31 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5635 "00111101" // /* MW 7 */ + 5636 "10001000" // /* MW 6 */ + 5637 "11111010" // /* MW 5 */ + 5638 "00000100" // /* MW 4 */ + 5639 "01110000" // /* MW 3 */ + 5640 "00001001" // /* MW 2 */ + 5641 "10100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5642 "01100010" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; VMAC.f dm3, dm3, ex6, ex8, r9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5643 "00001001" // /* MW 7 */ + 5644 "01101101" // /* MW 6 */ + 5645 "01001011" // /* MW 5 */ + 5646 "00000100" // /* MW 4 */ + 5647 "01110000" // /* MW 3 */ + 5648 "00000001" // /* MW 2 */ + 5649 "01110001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5650 "00111100" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5651 "00101000" // /* MW 5 */ + 5652 "00000001" // /* MW 4 */ + 5653 "01110100" // /* MW 3 */ + 5654 "10000001" // /* MW 2 */ + 5655 "00100010" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 978 11 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5656 "00011000" // VLDB.POP.576.3D ex4, [p0, lf0, r24, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5657 "00010100" // /* MW 3 */ + 5658 "00010001" // /* MW 2 */ + 5659 "00111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 1202 26 first +.src_ref 2 "conv2d_bf16.h" 1206 8 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5660 "01100110" // VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24]; ADD.NC lc, r4, #-5; VADD.f dm3, dm4, dm3, r31 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5661 "00111101" // /* MW 11 */ + 5662 "10001100" // /* MW 10 */ + 5663 "11111011" // /* MW 9 */ + 5664 "10000010" // /* MW 8 */ + 5665 "01111101" // /* MW 7 */ + 5666 "01110010" // /* MW 6 */ + 5667 "00101101" // /* MW 5 */ + 5668 "00101000" // /* MW 4 */ + 5669 "01111000" // /* MW 3 */ + 5670 "00001001" // /* MW 2 */ + 5671 "10100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 749 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5672 "01001010" // VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24]; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5673 "00101001" // /* MW 9 */ + 5674 "00000110" // /* MW 8 */ + 5675 "10100000" // /* MW 7 */ + 5676 "00011101" // /* MW 6 */ + 5677 "00010100" // /* MW 5 */ + 5678 "00010100" // /* MW 4 */ + 5679 "01110100" // /* MW 3 */ + 5680 "00000001" // /* MW 2 */ + 5681 "01110001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.src_ref 2 "conv2d_bf16.h" 751 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5682 "01001110" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24]; NOPX; MOV dj5, r21; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5683 "00001001" // /* MW 13 */ + 5684 "01000110" // /* MW 12 */ + 5685 "10100010" // /* MW 11 */ + 5686 "00001111" // /* MW 10 */ + 5687 "10101010" // /* MW 9 */ + 5688 "01011000" // /* MW 8 */ + 5689 "00000000" // /* MW 7 */ + 5690 "00000000" // /* MW 6 */ + 5691 "00101000" // /* MW 5 */ + 5692 "00000001" // /* MW 4 */ + 5693 "01110100" // /* MW 3 */ + 5694 "10000001" // /* MW 2 */ + 5695 "00100010" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5696 "01001011" // NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5697 "01010001" // /* MW 15 */ + 5698 "00001001" // /* MW 14 */ + 5699 "11101101" // /* MW 13 */ + 5700 "00000011" // /* MW 12 */ + 5701 "11001001" // /* MW 11 */ + 5702 "00000000" // /* MW 10 */ + 5703 "00000000" // /* MW 9 */ + 5704 "00000000" // /* MW 8 */ + 5705 "01011011" // /* MW 7 */ + 5706 "00000001" // /* MW 6 */ + 5707 "00101000" // /* MW 5 */ + 5708 "00100010" // /* MW 4 */ + 5709 "11111000" // /* MW 3 */ + 5710 "00101100" // /* MW 2 */ + 5711 "00000000" // /* MW 1 */ +.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1248 +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.begin_of_loop +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt +.loop_nesting 2 + 5712 "01001011" // VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5713 "01010000" // /* MW 15 */ + 5714 "00011011" // /* MW 14 */ + 5715 "11101101" // /* MW 13 */ + 5716 "00000001" // /* MW 12 */ + 5717 "01001001" // /* MW 11 */ + 5718 "00000001" // /* MW 10 */ + 5719 "00000000" // /* MW 9 */ + 5720 "00000000" // /* MW 8 */ + 5721 "01011011" // /* MW 7 */ + 5722 "00000001" // /* MW 6 */ + 5723 "00101000" // /* MW 5 */ + 5724 "00101000" // /* MW 4 */ + 5725 "01111000" // /* MW 3 */ + 5726 "00001001" // /* MW 2 */ + 5727 "10100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 749 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5728 "01001011" // VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5729 "00110001" // /* MW 15 */ + 5730 "00000000" // /* MW 14 */ + 5731 "01111101" // /* MW 13 */ + 5732 "10100101" // /* MW 12 */ + 5733 "00000001" // /* MW 11 */ + 5734 "00000000" // /* MW 10 */ + 5735 "00000000" // /* MW 9 */ + 5736 "00000000" // /* MW 8 */ + 5737 "01011011" // /* MW 7 */ + 5738 "00000001" // /* MW 6 */ + 5739 "00101000" // /* MW 5 */ + 5740 "00101000" // /* MW 4 */ + 5741 "01111000" // /* MW 3 */ + 5742 "00000001" // /* MW 2 */ + 5743 "01110001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.src_ref 2 "conv2d_bf16.h" 751 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5744 "01001011" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5745 "00110000" // /* MW 15 */ + 5746 "00010010" // /* MW 14 */ + 5747 "01111101" // /* MW 13 */ + 5748 "10100101" // /* MW 12 */ + 5749 "00000001" // /* MW 11 */ + 5750 "00000000" // /* MW 10 */ + 5751 "00000000" // /* MW 9 */ + 5752 "00000000" // /* MW 8 */ + 5753 "01011011" // /* MW 7 */ + 5754 "00000001" // /* MW 6 */ + 5755 "00101000" // /* MW 5 */ + 5756 "00000001" // /* MW 4 */ + 5757 "01110100" // /* MW 3 */ + 5758 "10000001" // /* MW 2 */ + 5759 "00100010" // /* MW 1 */ +.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1296 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.end_of_loop +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5760 "01001011" // NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5761 "01010001" // /* MW 15 */ + 5762 "00001001" // /* MW 14 */ + 5763 "11101101" // /* MW 13 */ + 5764 "00000011" // /* MW 12 */ + 5765 "11001001" // /* MW 11 */ + 5766 "00000000" // /* MW 10 */ + 5767 "00000000" // /* MW 9 */ + 5768 "00000000" // /* MW 8 */ + 5769 "01011011" // /* MW 7 */ + 5770 "00000001" // /* MW 6 */ + 5771 "00101000" // /* MW 5 */ + 5772 "00100010" // /* MW 4 */ + 5773 "11111000" // /* MW 3 */ + 5774 "00101100" // /* MW 2 */ + 5775 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 5776 "01101110" // VLDA.POP.576 ex1, [p1, lf1, r25]; MOVS dn6, dn7; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5777 "00001001" // /* MW 13 */ + 5778 "01101010" // /* MW 12 */ + 5779 "10100011" // /* MW 11 */ + 5780 "00011110" // /* MW 10 */ + 5781 "10010000" // /* MW 9 */ + 5782 "01010100" // /* MW 8 */ + 5783 "00000000" // /* MW 7 */ + 5784 "00000000" // /* MW 6 */ + 5785 "10010110" // /* MW 5 */ + 5786 "10111100" // /* MW 4 */ + 5787 "01111100" // /* MW 3 */ + 5788 "00001001" // /* MW 2 */ + 5789 "10100000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 749 26 first +.src_ref 2 "conv2d_bf16.h" 1286 32 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5790 "01101110" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dc7, dn3; MOV dj7, dj3; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5791 "00101001" // /* MW 13 */ + 5792 "00000110" // /* MW 12 */ + 5793 "10100000" // /* MW 11 */ + 5794 "00000111" // /* MW 10 */ + 5795 "00111000" // /* MW 9 */ + 5796 "01111100" // /* MW 8 */ + 5797 "00000000" // /* MW 7 */ + 5798 "00000000" // /* MW 6 */ + 5799 "10010110" // /* MW 5 */ + 5800 "00011100" // /* MW 4 */ + 5801 "01111110" // /* MW 3 */ + 5802 "00000001" // /* MW 2 */ + 5803 "01110001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 751 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5804 "01001010" // MOVS dc3, p3; MOV r5, dj2; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5805 "00001001" // /* MW 9 */ + 5806 "01000110" // /* MW 8 */ + 5807 "10100010" // /* MW 7 */ + 5808 "11100100" // /* MW 6 */ + 5809 "00000000" // /* MW 5 */ + 5810 "01010101" // /* MW 4 */ + 5811 "01100001" // /* MW 3 */ + 5812 "10010001" // /* MW 2 */ + 5813 "01100001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5814 "01001010" // MOVS dn3, r22; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5815 "00101001" // /* MW 9 */ + 5816 "00101010" // /* MW 8 */ + 5817 "10100001" // /* MW 7 */ + 5818 "11000100" // /* MW 6 */ + 5819 "00000111" // /* MW 5 */ + 5820 "10010010" // /* MW 4 */ + 5821 "01100001" // /* MW 3 */ + 5822 "11000001" // /* MW 2 */ + 5823 "01101010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5824 "01001010" // MOVS dn7, r28; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5825 "00001001" // /* MW 9 */ + 5826 "01101010" // /* MW 8 */ + 5827 "10100011" // /* MW 7 */ + 5828 "11000100" // /* MW 6 */ + 5829 "00000011" // /* MW 5 */ + 5830 "10010010" // /* MW 4 */ + 5831 "01100010" // /* MW 3 */ + 5832 "10000001" // /* MW 2 */ + 5833 "11101011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 749 26 first +.src_ref 2 "conv2d_bf16.h" 1285 32 first +.src_ref 2 "conv2d_bf16.h" 1286 32 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5834 "01100110" // PADDB [p7], m5; MOVS p5, p7; MOV dj2, dj7; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5835 "00101001" // /* MW 11 */ + 5836 "00000110" // /* MW 10 */ + 5837 "10100000" // /* MW 9 */ + 5838 "11100110" // /* MW 8 */ + 5839 "00000000" // /* MW 7 */ + 5840 "10001111" // /* MW 6 */ + 5841 "00100010" // /* MW 5 */ + 5842 "01010111" // /* MW 4 */ + 5843 "01101111" // /* MW 3 */ + 5844 "10010001" // /* MW 2 */ + 5845 "10110011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 751 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5846 "01001010" // MOVS p4, p7; MOV m2, m3; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5847 "00001001" // /* MW 9 */ + 5848 "01000110" // /* MW 8 */ + 5849 "10100010" // /* MW 7 */ + 5850 "11100100" // /* MW 6 */ + 5851 "00000000" // /* MW 5 */ + 5852 "00000110" // /* MW 4 */ + 5853 "01100010" // /* MW 3 */ + 5854 "10010001" // /* MW 2 */ + 5855 "10010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5856 "01100010" // VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5857 "00101001" // /* MW 7 */ + 5858 "00101010" // /* MW 6 */ + 5859 "10100001" // /* MW 5 */ + 5860 "11000110" // /* MW 4 */ + 5861 "00000011" // /* MW 3 */ + 5862 "10010010" // /* MW 2 */ + 5863 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5864 "01100010" // VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5865 "00001001" // /* MW 7 */ + 5866 "01101010" // /* MW 6 */ + 5867 "10100011" // /* MW 5 */ + 5868 "11000110" // /* MW 4 */ + 5869 "00000111" // /* MW 3 */ + 5870 "10010010" // /* MW 2 */ + 5871 "00000001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 + 5872 "11111000" // MOV dj7, dj5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5873 "00000000" // /* MW 3 */ + 5874 "10001011" // /* MW 2 */ + 5875 "00011111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 750 26 first + 5876 "01100010" // MOV m3, r23; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5877 "00101001" // /* MW 7 */ + 5878 "00101010" // /* MW 6 */ + 5879 "10100001" // /* MW 5 */ + 5880 "11100110" // /* MW 4 */ + 5881 "10100000" // /* MW 3 */ + 5882 "00001011" // /* MW 2 */ + 5883 "00000011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 749 26 first + 5884 "01100010" // MOV dj3, r17; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5885 "00101001" // /* MW 7 */ + 5886 "00000110" // /* MW 6 */ + 5887 "10100000" // /* MW 5 */ + 5888 "11100110" // /* MW 4 */ + 5889 "10100000" // /* MW 3 */ + 5890 "10001000" // /* MW 2 */ + 5891 "00000011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.src_ref 2 "conv2d_bf16.h" 1286 32 + 5892 "01001010" // PADDB.3D [p0], d3; MOV m3, dj2; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5893 "00001001" // /* MW 9 */ + 5894 "01101010" // /* MW 8 */ + 5895 "10100011" // /* MW 7 */ + 5896 "11100110" // /* MW 6 */ + 5897 "00000000" // /* MW 5 */ + 5898 "00000101" // /* MW 4 */ + 5899 "00100011" // /* MW 3 */ + 5900 "11110111" // /* MW 2 */ + 5901 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 751 26 first +.src_ref 2 "conv2d_bf16.h" 1286 32 first + 5902 "01100110" // PADDB [p7], m3; MOVS p3, dc3; MOV dj5, r5; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5903 "00001001" // /* MW 11 */ + 5904 "01000110" // /* MW 10 */ + 5905 "10100010" // /* MW 9 */ + 5906 "11100110" // /* MW 8 */ + 5907 "10100000" // /* MW 7 */ + 5908 "10000010" // /* MW 6 */ + 5909 "00100101" // /* MW 5 */ + 5910 "11010111" // /* MW 4 */ + 5911 "01101110" // /* MW 3 */ + 5912 "10001001" // /* MW 2 */ + 5913 "01110001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 + 5914 "00000010" // MOVS dc3, dc5; MOV dj7, dj5 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5915 "01110000" // /* MW 7 */ + 5916 "10000000" // /* MW 6 */ + 5917 "11000101" // /* MW 5 */ + 5918 "00000011" // /* MW 4 */ + 5919 "01100000" // /* MW 3 */ + 5920 "10001001" // /* MW 2 */ + 5921 "01100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 1287 37 + 5922 "00000010" // MOVS dc5, r2; MOV m3, m1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5923 "01110000" // /* MW 7 */ + 5924 "00000000" // /* MW 6 */ + 5925 "10000001" // /* MW 5 */ + 5926 "00000001" // /* MW 4 */ + 5927 "01100000" // /* MW 3 */ + 5928 "01000001" // /* MW 2 */ + 5929 "10100000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first + 5930 "00000010" // VCONV.bf16.fp32 x11, cml1; MOV m1, r29 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5931 "01110000" // /* MW 7 */ + 5932 "01010000" // /* MW 6 */ + 5933 "10000111" // /* MW 5 */ + 5934 "00000000" // /* MW 4 */ + 5935 "11000000" // /* MW 3 */ + 5936 "00010010" // /* MW 2 */ + 5937 "10110010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 + 5938 "00000010" // VCONV.bf16.fp32 x10, cml0; MOV dj5, r30 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5939 "01110000" // /* MW 7 */ + 5940 "10010000" // /* MW 6 */ + 5941 "11000111" // /* MW 5 */ + 5942 "00000010" // /* MW 4 */ + 5943 "11000000" // /* MW 3 */ + 5944 "00000010" // /* MW 2 */ + 5945 "10100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 first +.src_ref 2 "conv2d_bf16.h" 736 8 +.src_ref 2 "conv2d_bf16.h" 1287 37 + 5946 "10111010" // PADDB.3D [p1], d1; MOVS p0, p7; MOV r14, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5947 "01110110" // /* MW 9 */ + 5948 "01100000" // /* MW 8 */ + 5949 "11001000" // /* MW 7 */ + 5950 "00000001" // /* MW 6 */ + 5951 "10010000" // /* MW 5 */ + 5952 "00111011" // /* MW 4 */ + 5953 "01100001" // /* MW 3 */ + 5954 "10010001" // /* MW 2 */ + 5955 "00010011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 2 "conv2d_bf16.h" 1287 37 + 5956 "00000010" // VCONV.bf16.fp32 x6, cmh0; MOV m1, m3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5957 "01110000" // /* MW 7 */ + 5958 "00000000" // /* MW 6 */ + 5959 "10000011" // /* MW 5 */ + 5960 "00000000" // /* MW 4 */ + 5961 "11000000" // /* MW 3 */ + 5962 "00001010" // /* MW 2 */ + 5963 "01100010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 1218 20 first +.src_ref 2 "conv2d_bf16.h" 1287 37 first + 5964 "00110110" // PADDB [p0], m1; VCONV.bf16.fp32 x5, cml2; JZ r18, #6096 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6096 delay_slots=5 */ + 5965 "01100000" // /* MW 11 */ + 5966 "00000000" // /* MW 10 */ + 5967 "00000000" // /* MW 9 */ + 5968 "11111010" // /* MW 8 */ + 5969 "00000010" // /* MW 7 */ + 5970 "00100100" // /* MW 6 */ + 5971 "00100000" // /* MW 5 */ + 5972 "01010111" // /* MW 4 */ + 5973 "11000000" // /* MW 3 */ + 5974 "00100010" // /* MW 2 */ + 5975 "01010010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 2 "conv2d_bf16.h" 738 8 +.delay_slot + 5976 "00000010" // VCONV.bf16.fp32 x7, cmh1; MOV r5, p1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5977 "01110000" // /* MW 7 */ + 5978 "01100000" // /* MW 6 */ + 5979 "10101001" // /* MW 5 */ + 5980 "00000000" // /* MW 4 */ + 5981 "11000000" // /* MW 3 */ + 5982 "00011010" // /* MW 2 */ + 5983 "01110010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 5984 "00000010" // VCONV.bf16.fp32 x8, cml3; MOV dn7, dc7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5985 "01110000" // /* MW 7 */ + 5986 "11000000" // /* MW 6 */ + 5987 "10100111" // /* MW 5 */ + 5988 "00000011" // /* MW 4 */ + 5989 "11000000" // /* MW 3 */ + 5990 "00110010" // /* MW 2 */ + 5991 "10000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 5992 "10111010" // PADDB [p5], m1; VCONV.bf16.fp32 x1, cmh3; MOV p1, p5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5993 "01110110" // /* MW 9 */ + 5994 "01100000" // /* MW 8 */ + 5995 "10110101" // /* MW 7 */ + 5996 "00000000" // /* MW 6 */ + 5997 "10010000" // /* MW 5 */ + 5998 "00101011" // /* MW 4 */ + 5999 "11000101" // /* MW 3 */ + 6000 "00111010" // /* MW 2 */ + 6001 "00010010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 2 "conv2d_bf16.h" 1286 32 +.delay_slot + 6002 "00000010" // VCONV.bf16.fp32 x2, cmh2; MOV dj5, dj2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6003 "01110000" // /* MW 7 */ + 6004 "10000000" // /* MW 6 */ + 6005 "11000010" // /* MW 5 */ + 6006 "00000010" // /* MW 4 */ + 6007 "11000000" // /* MW 3 */ + 6008 "00101010" // /* MW 2 */ + 6009 "00100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 1187 40 +.delay_slot + 6010 "00000010" // MOVS dc7, dc3; MOV r2, dc5 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6011 "01110000" // /* MW 7 */ + 6012 "11000000" // /* MW 6 */ + 6013 "01001101" // /* MW 5 */ + 6014 "00000000" // /* MW 4 */ + 6015 "01100000" // /* MW 3 */ + 6016 "10001001" // /* MW 2 */ + 6017 "11100001" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first + 6018 "11111000" // VMAX_LT.bf16 x11, r16, x11, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6019 "11101100" // /* MW 3 */ + 6020 "11011100" // /* MW 2 */ + 6021 "00011101" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 + 6022 "11111000" // VMAX_LT.bf16 x7, r16, x7, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6023 "11101100" // /* MW 3 */ + 6024 "10111100" // /* MW 2 */ + 6025 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.src_ref 4 "max_min.hpp" 20 104 + 6026 "00000010" // VST x11, [p1, dj7]; VMAX_LT.bf16 x10, r16, x10, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6027 "01110000" // /* MW 7 */ + 6028 "01110110" // /* MW 6 */ + 6029 "10101010" // /* MW 5 */ + 6030 "00000010" // /* MW 4 */ + 6031 "01100000" // /* MW 3 */ + 6032 "01011010" // /* MW 2 */ + 6033 "00111100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first + 6034 "00000010" // VST x7, [p5, #64]; VMAX_LT.bf16 x7, r16, x6, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6035 "01110000" // /* MW 7 */ + 6036 "01110110" // /* MW 6 */ + 6037 "11011010" // /* MW 5 */ + 6038 "00000001" // /* MW 4 */ + 6039 "01100000" // /* MW 3 */ + 6040 "10111010" // /* MW 2 */ + 6041 "10100010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first + 6042 "00111010" // VST x10, [p1]; J #6128 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=6128 delay_slots=5 */ + 6043 "00100001" // /* MW 9 */ + 6044 "00000000" // /* MW 8 */ + 6045 "00000000" // /* MW 7 */ + 6046 "11111110" // /* MW 6 */ + 6047 "00000010" // /* MW 5 */ + 6048 "00000000" // /* MW 4 */ + 6049 "01100000" // /* MW 3 */ + 6050 "11010010" // /* MW 2 */ + 6051 "00100000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 6052 "00000010" // VST x7, [p1, #64]; VMAX_LT.bf16 x10, r16, x8, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6053 "01110000" // /* MW 7 */ + 6054 "01110110" // /* MW 6 */ + 6055 "10100010" // /* MW 5 */ + 6056 "00000010" // /* MW 4 */ + 6057 "01100000" // /* MW 3 */ + 6058 "10111010" // /* MW 2 */ + 6059 "00100010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 +.delay_slot + 6060 "11111000" // VMAX_LT.bf16 x7, r16, x1, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6061 "11101100" // /* MW 3 */ + 6062 "10001100" // /* MW 2 */ + 6063 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.src_ref 4 "max_min.hpp" 20 104 +.delay_slot + 6064 "00000010" // VST x10, [p0]; VMAX_LT.bf16 x10, r16, x5, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6065 "01110000" // /* MW 7 */ + 6066 "01110110" // /* MW 6 */ + 6067 "10010110" // /* MW 5 */ + 6068 "00000010" // /* MW 4 */ + 6069 "01100000" // /* MW 3 */ + 6070 "11010010" // /* MW 2 */ + 6071 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 6072 "00000010" // VST x7, [p0, #64]; VMAX_LT.bf16 x2, r16, x2, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6073 "01110000" // /* MW 7 */ + 6074 "01110110" // /* MW 6 */ + 6075 "10001010" // /* MW 5 */ + 6076 "00000000" // /* MW 4 */ + 6077 "01100000" // /* MW 3 */ + 6078 "10111010" // /* MW 2 */ + 6079 "00000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.delay_slot + 6080 "11100001" // NOPA; NOPB; VST x10, [p4, dj5]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6081 "00000000" // /* MW 15 */ + 6082 "00000000" // /* MW 14 */ + 6083 "01111000" // /* MW 13 */ + 6084 "10100101" // /* MW 12 */ + 6085 "00000001" // /* MW 11 */ + 6086 "00000000" // /* MW 10 */ + 6087 "00000000" // /* MW 9 */ + 6088 "00000000" // /* MW 8 */ + 6089 "10010011" // /* MW 7 */ + 6090 "10100010" // /* MW 6 */ + 6091 "00100100" // /* MW 5 */ + 6092 "00000000" // /* MW 4 */ + 6093 "11110000" // /* MW 3 */ + 6094 "00101100" // /* MW 2 */ + 6095 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1632 +.src_ref 4 "vector.hpp" 1152 43 + 6096 "00011000" // VST.CONV.bf16.fp32 cml1, [p1, dj7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6097 "10100011" // /* MW 3 */ + 6098 "11100000" // /* MW 2 */ + 6099 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6100 "00011000" // VST.CONV.bf16.fp32 cmh1, [p5, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6101 "11100011" // /* MW 3 */ + 6102 "00010100" // /* MW 2 */ + 6103 "00001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6104 "00011000" // VST.CONV.bf16.fp32 cml0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6105 "00100011" // /* MW 3 */ + 6106 "00000100" // /* MW 2 */ + 6107 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6108 "00011000" // VST.CONV.bf16.fp32 cmh0, [p1, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6109 "01100011" // /* MW 3 */ + 6110 "00010100" // /* MW 2 */ + 6111 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6112 "00011000" // VST x8, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6113 "00010011" // /* MW 3 */ + 6114 "00000110" // /* MW 2 */ + 6115 "00001000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6116 "00011000" // VST.CONV.bf16.fp32 cmh3, [p0, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6117 "11100011" // /* MW 3 */ + 6118 "00010101" // /* MW 2 */ + 6119 "00001000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6120 "00000010" // VST.CONV.bf16.fp32 cml2, [p4, dj5]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6121 "01110000" // /* MW 7 */ + 6122 "10100101" // /* MW 6 */ + 6123 "00000001" // /* MW 5 */ + 6124 "00000000" // /* MW 4 */ + 6125 "01100000" // /* MW 3 */ + 6126 "00100100" // /* MW 2 */ + 6127 "10010100" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1664 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 1143 12 first + 6128 "00110110" // PADDB [p7], m5; VST x2, [p7, #64]; JNZD r3, r3, p2; MOV dj2, #0 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 6129 "01011000" // /* MW 11 */ + 6130 "00000000" // /* MW 10 */ + 6131 "01000000" // /* MW 9 */ + 6132 "00000001" // /* MW 8 */ + 6133 "00110101" // /* MW 7 */ + 6134 "00000110" // /* MW 6 */ + 6135 "00100000" // /* MW 5 */ + 6136 "01010111" // /* MW 4 */ + 6137 "01101111" // /* MW 3 */ + 6138 "10010010" // /* MW 2 */ + 6139 "11100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.delay_slot + 6140 "11111000" // MOV dn3, dn2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6141 "10000000" // /* MW 3 */ + 6142 "01000100" // /* MW 2 */ + 6143 "00011011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.delay_slot + 6144 "11111000" // MOV dn2, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6145 "10100000" // /* MW 3 */ + 6146 "01001001" // /* MW 2 */ + 6147 "00011010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.delay_slot + 6148 "11110100" // PADDB.3D [p7], d2; MOV dj2, dj7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6149 "00000001" // /* MW 5 */ + 6150 "00011110" // /* MW 4 */ + 6151 "00000101" // /* MW 3 */ + 6152 "01110010" // /* MW 2 */ + 6153 "11101011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.delay_slot + 6154 "11111000" // MOV dn2, dn7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6155 "10000000" // /* MW 3 */ + 6156 "01001110" // /* MW 2 */ + 6157 "00011010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6159 "00000000" // /* MW 1 */ +.loop_nesting 0 + 6160 "10000100" // J #6832 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6832 delay_slots=5 */ + 6161 "00000000" // /* MW 5 */ + 6162 "00000000" // /* MW 4 */ + 6163 "01011000" // /* MW 3 */ + 6164 "00001101" // /* MW 2 */ + 6165 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6167 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6168 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6169 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6171 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6173 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6175 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1712 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 792 8 +.src_ref 2 "conv2d_bf16.h" 1364 80 +.src_ref 2 "conv2d_bf16.h" 1364 80 + 6176 "01110110" // LDA r31, [sp, #-40]; MOVS dc2, p3; MOVX r14, #136; MOV p1, r14 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6177 "01111000" // /* MW 11 */ + 6178 "10010000" // /* MW 10 */ + 6179 "10110011" // /* MW 9 */ + 6180 "00001000" // /* MW 8 */ + 6181 "11100001" // /* MW 7 */ + 6182 "00000100" // /* MW 6 */ + 6183 "10001011" // /* MW 5 */ + 6184 "00001100" // /* MW 4 */ + 6185 "00100010" // /* MW 3 */ + 6186 "01111110" // /* MW 2 */ + 6187 "11111011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 1369 80 + 6188 "01110110" // MOVA m4, #60; MOVS dn2, r22; MOVX crRnd, r13; MOV dc6, dn2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6189 "01111000" // /* MW 11 */ + 6190 "01000000" // /* MW 10 */ + 6191 "01100010" // /* MW 9 */ + 6192 "00000011" // /* MW 8 */ + 6193 "11010100" // /* MW 7 */ + 6194 "00011011" // /* MW 6 */ + 6195 "00001011" // /* MW 5 */ + 6196 "01010110" // /* MW 4 */ + 6197 "10000010" // /* MW 3 */ + 6198 "10010000" // /* MW 2 */ + 6199 "00000111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 807 26 +.src_ref 2 "conv2d_bf16.h" 808 26 +.src_ref 2 "conv2d_bf16.h" 809 26 +.src_ref 2 "conv2d_bf16.h" 810 26 +.src_ref 2 "conv2d_bf16.h" 1436 26 +.src_ref 2 "conv2d_bf16.h" 1437 26 +.src_ref 2 "conv2d_bf16.h" 1438 26 +.src_ref 2 "conv2d_bf16.h" 1439 26 + 6200 "10111010" // MOVA r20, #60; MOVX r19, #780; MOV m2, r23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6201 "01111000" // /* MW 9 */ + 6202 "11010000" // /* MW 8 */ + 6203 "00000101" // /* MW 7 */ + 6204 "10001001" // /* MW 6 */ + 6205 "00110001" // /* MW 5 */ + 6206 "00011001" // /* MW 4 */ + 6207 "00000000" // /* MW 3 */ + 6208 "10010100" // /* MW 2 */ + 6209 "00000111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 802 83 +.src_ref 2 "conv2d_bf16.h" 1428 39 + 6210 "01110110" // MOVA m6, #-132; MOVS dn6, r28; MOVX r18, #6; MOV dj5, r30 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6211 "01111000" // /* MW 11 */ + 6212 "10010000" // /* MW 10 */ + 6213 "11000111" // /* MW 9 */ + 6214 "11001010" // /* MW 8 */ + 6215 "00100000" // /* MW 7 */ + 6216 "00000001" // /* MW 6 */ + 6217 "00001011" // /* MW 5 */ + 6218 "01011100" // /* MW 4 */ + 6219 "10000110" // /* MW 3 */ + 6220 "10011000" // /* MW 2 */ + 6221 "11101111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 792 8 +.src_ref 2 "conv2d_bf16.h" 794 8 + 6222 "01110110" // LDA p0, [sp, #-44]; MOVS dc5, r2; MOVX r25, #0; MOV m1, r29 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6223 "01111000" // /* MW 11 */ + 6224 "01010000" // /* MW 10 */ + 6225 "10000111" // /* MW 9 */ + 6226 "00001000" // /* MW 8 */ + 6227 "10010000" // /* MW 7 */ + 6228 "00000001" // /* MW 6 */ + 6229 "00001011" // /* MW 5 */ + 6230 "00000010" // /* MW 4 */ + 6231 "00100101" // /* MW 3 */ + 6232 "10000011" // /* MW 2 */ + 6233 "11111010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 794 8 +.src_ref 2 "conv2d_bf16.h" 1455 20 + 6234 "10111010" // LDA r21, [sp, #-36]; MOVX r24, #0; MOV dj6, r21 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6235 "01111000" // /* MW 9 */ + 6236 "01010000" // /* MW 8 */ + 6237 "01000101" // /* MW 7 */ + 6238 "00001011" // /* MW 6 */ + 6239 "10000000" // /* MW 5 */ + 6240 "00000001" // /* MW 4 */ + 6241 "00100000" // /* MW 3 */ + 6242 "11010110" // /* MW 2 */ + 6243 "11111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1337 12 + 6244 "10111010" // LDA r13, [sp, #-32]; MOVXM p2, #6320 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6245 "00010000" // /* MW 9 */ + 6246 "01011000" // /* MW 8 */ + 6247 "00110100" // /* MW 7 */ + 6248 "00000101" // /* MW 6 */ + 6249 "00000000" // /* MW 5 */ + 6250 "00000000" // /* MW 4 */ + 6251 "00100000" // /* MW 3 */ + 6252 "00110110" // /* MW 2 */ + 6253 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 80 first +.src_ref 2 "conv2d_bf16.h" 1873 + 6254 "10010100" // LDA lr, [sp, #-28]; ADD.NC p3, r31, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6255 "01110010" // /* MW 5 */ + 6256 "11011111" // /* MW 4 */ + 6257 "00100110" // /* MW 3 */ + 6258 "10000111" // /* MW 2 */ + 6259 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 80 + 6260 "10011000" // LDA dj3, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6261 "11000110" // /* MW 3 */ + 6262 "00011101" // /* MW 2 */ + 6263 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1369 80 first + 6264 "10011000" // LDA m4, [p3], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6265 "00000110" // /* MW 3 */ + 6266 "10001010" // /* MW 2 */ + 6267 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 799 87 first + 6268 "10011000" // LDA m5, [p3], #-28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6269 "10000110" // /* MW 3 */ + 6270 "10011110" // /* MW 2 */ + 6271 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 801 83 first + 6272 "10011000" // LDA r22, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6273 "11010110" // /* MW 3 */ + 6274 "00011110" // /* MW 2 */ + 6275 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 802 83 first + 6276 "10011000" // LDA r23, [p3], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6277 "11110110" // /* MW 3 */ + 6278 "11001010" // /* MW 2 */ + 6279 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1337 66 first + 6280 "10011000" // LDA r29, [p3, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6281 "10110110" // /* MW 3 */ + 6282 "00010111" // /* MW 2 */ + 6283 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1443 71 first + 6284 "10011000" // LDA r28, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6285 "10010110" // /* MW 3 */ + 6286 "00000111" // /* MW 2 */ + 6287 "00000011" // /* MW 1 */ + 6288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6289 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 1369 89 + 6290 "11111000" // MOV r30, m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6291 "00000000" // /* MW 3 */ + 6292 "10011000" // /* MW 2 */ + 6293 "00011111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 89 +.src_ref 2 "conv2d_bf16.h" 1518 37 + 6294 "11111000" // MOV m6, dj3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6295 "00000000" // /* MW 3 */ + 6296 "00000111" // /* MW 2 */ + 6297 "00011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 89 + 6298 "11111000" // MOV r31, m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6299 "00000000" // /* MW 3 */ + 6300 "11011100" // /* MW 2 */ + 6301 "00011111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 89 first + 6302 "00011000" // ADD.NC m3, r31, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6303 "11100000" // /* MW 3 */ + 6304 "00001111" // /* MW 2 */ + 6305 "00011011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1369 89 first + 6306 "00100100" // ADD r29, r29, #-1; ADD.NC m7, r30, #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6307 "11000000" // /* MW 5 */ + 6308 "00011110" // /* MW 4 */ + 6309 "11101110" // /* MW 3 */ + 6310 "01111111" // /* MW 2 */ + 6311 "11101111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 + 6312 "00000010" // NOPS; MOV dj7, r30 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6313 "01110000" // /* MW 7 */ + 6314 "10010000" // /* MW 6 */ + 6315 "11000111" // /* MW 5 */ + 6316 "00000011" // /* MW 4 */ + 6317 "01100000" // /* MW 3 */ + 6318 "00101011" // /* MW 2 */ + 6319 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1856 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 792 8 first +.src_ref 2 "conv2d_bf16.h" 1362 31 first +.src_ref 2 "conv2d_bf16.h" 1429 50 +.src_ref 2 "conv2d_bf16.h" 1443 16 first +.loop_nesting 1 + 6320 "01111110" // VLDA.CONV.fp32.bf16 cml0, [p6], #64;VLDB.FILL.512 [p1, lf1, r25];MOVS p3, r12; MOVXM ls, #6496 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 6321 "01100000" // /* MW 13 */ + 6322 "10000001" // /* MW 12 */ + 6323 "01110001" // /* MW 11 */ + 6324 "00000010" // /* MW 10 */ + 6325 "10010110" // /* MW 9 */ + 6326 "10001111" // /* MW 8 */ + 6327 "00000000" // /* MW 7 */ + 6328 "00000000" // /* MW 6 */ + 6329 "00101000" // /* MW 5 */ + 6330 "00101000" // /* MW 4 */ + 6331 "01111010" // /* MW 3 */ + 6332 "10000101" // /* MW 2 */ + 6333 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 793 8 first +.src_ref 2 "conv2d_bf16.h" 1364 31 first +.src_ref 2 "conv2d_bf16.h" 1443 16 + 6334 "10110110" // VLDA.CONV.fp32.bf16 cmh0, [p6], m3;VLDB.FILL.512 [p1, lf1, r25]; MOVXM le, #6544 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6335 "00010000" // /* MW 11 */ + 6336 "11001000" // /* MW 10 */ + 6337 "10111100" // /* MW 9 */ + 6338 "00000101" // /* MW 8 */ + 6339 "00000000" // /* MW 7 */ + 6340 "00000000" // /* MW 6 */ + 6341 "00101000" // /* MW 5 */ + 6342 "00101000" // /* MW 4 */ + 6343 "01111010" // /* MW 3 */ + 6344 "00001101" // /* MW 2 */ + 6345 "11001101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 794 8 first +.src_ref 2 "conv2d_bf16.h" 795 30 first +.src_ref 2 "conv2d_bf16.h" 1428 39 first +.src_ref 2 "conv2d_bf16.h" 1443 16 first + 6346 "10110110" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex10, [p1, lf1, r25]; LSHL r30, r2, r18; ADD.NC lc, r28, #-3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6347 "01001000" // /* MW 11 */ + 6348 "00111111" // /* MW 10 */ + 6349 "10111111" // /* MW 9 */ + 6350 "01101110" // /* MW 8 */ + 6351 "11101001" // /* MW 7 */ + 6352 "00000101" // /* MW 6 */ + 6353 "00101000" // /* MW 5 */ + 6354 "00000101" // /* MW 4 */ + 6355 "01110110" // /* MW 3 */ + 6356 "10000001" // /* MW 2 */ + 6357 "00000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 796 30 first +.src_ref 2 "conv2d_bf16.h" 799 30 first +.src_ref 2 "conv2d_bf16.h" 1429 50 + 6358 "10111010" // VLDA.POP.576 ex11, [p0, lf0, r24, m5];VLDB.POP.576 ex4, [p1, lf1, r25]; MOV dj2, r30 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6359 "01111110" // /* MW 9 */ + 6360 "10010000" // /* MW 8 */ + 6361 "01000111" // /* MW 7 */ + 6362 "00000001" // /* MW 6 */ + 6363 "00010100" // /* MW 5 */ + 6364 "00000001" // /* MW 4 */ + 6365 "01110011" // /* MW 3 */ + 6366 "01011001" // /* MW 2 */ + 6367 "01010101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 797 30 first +.src_ref 2 "conv2d_bf16.h" 1367 31 first + 6368 "00111100" // VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.POP.576 ex2, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6369 "00101000" // /* MW 5 */ + 6370 "00000001" // /* MW 4 */ + 6371 "01110110" // /* MW 3 */ + 6372 "10010101" // /* MW 2 */ + 6373 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 1369 31 first + 6374 "00111100" // VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.POP.576.3D ex3, [p1, lf1, r25, d0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6375 "10101000" // /* MW 5 */ + 6376 "00100001" // /* MW 4 */ + 6377 "01111010" // /* MW 3 */ + 6378 "00011101" // /* MW 2 */ + 6379 "11011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 792 8 first +.src_ref 2 "conv2d_bf16.h" 1372 31 first + 6380 "00111100" // VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.FILL.512 [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6381 "00101000" // /* MW 5 */ + 6382 "00101000" // /* MW 4 */ + 6383 "01111010" // /* MW 3 */ + 6384 "10100101" // /* MW 2 */ + 6385 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 793 8 first +.src_ref 2 "conv2d_bf16.h" 1374 31 first + 6386 "00111100" // VLDA.CONV.fp32.bf16 cmh2, [p6], m3;VLDB.FILL.512 [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6387 "00101000" // /* MW 5 */ + 6388 "00101000" // /* MW 4 */ + 6389 "01111010" // /* MW 3 */ + 6390 "00101101" // /* MW 2 */ + 6391 "11001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 795 30 first +.src_ref 2 "conv2d_bf16.h" 1377 31 first + 6392 "00111100" // VLDA.CONV.fp32.bf16 cml3, [p6], #64;VLDB.POP.576 ex1, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6393 "10101000" // /* MW 5 */ + 6394 "00000000" // /* MW 4 */ + 6395 "01110110" // /* MW 3 */ + 6396 "10110101" // /* MW 2 */ + 6397 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 796 30 first +.src_ref 2 "conv2d_bf16.h" 1379 31 first + 6398 "00111100" // VLDA.CONV.fp32.bf16 cmh3, [p6], m7;VLDB.POP.576 ex6, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6399 "00101000" // /* MW 5 */ + 6400 "00000011" // /* MW 4 */ + 6401 "01110110" // /* MW 3 */ + 6402 "00111101" // /* MW 2 */ + 6403 "11011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 797 30 first +.src_ref 2 "conv2d_bf16.h" 1429 50 first + 6404 "00111100" // VLDA.CONV.fp32.bf16 cml4, [p3, dj2];VLDB.POP.576 ex7, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6405 "10101000" // /* MW 5 */ + 6406 "00000011" // /* MW 4 */ + 6407 "01110110" // /* MW 3 */ + 6408 "01000101" // /* MW 2 */ + 6409 "01101000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 801 30 first +.src_ref 2 "conv2d_bf16.h" 1429 50 + 6410 "10111010" // VLDA.CONV.fp32.bf16 cmh4, [p3, dj2];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VSHUFFLE ex5, ex10, ex4, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6411 "11101110" // /* MW 9 */ + 6412 "00101101" // /* MW 8 */ + 6413 "01101001" // /* MW 7 */ + 6414 "00000001" // /* MW 6 */ + 6415 "00010100" // /* MW 5 */ + 6416 "00010010" // /* MW 4 */ + 6417 "01110101" // /* MW 3 */ + 6418 "01001101" // /* MW 2 */ + 6419 "01101000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 792 8 first +.src_ref 2 "conv2d_bf16.h" 794 8 first +.src_ref 2 "conv2d_bf16.h" 802 30 first + 6420 "10111010" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex10, ex10, ex4, r23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6421 "11101110" // /* MW 9 */ + 6422 "00101111" // /* MW 8 */ + 6423 "10101001" // /* MW 7 */ + 6424 "00000010" // /* MW 6 */ + 6425 "00010100" // /* MW 5 */ + 6426 "00010100" // /* MW 4 */ + 6427 "01110101" // /* MW 3 */ + 6428 "10000001" // /* MW 2 */ + 6429 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 793 8 first +.src_ref 2 "conv2d_bf16.h" 799 30 first +.src_ref 2 "conv2d_bf16.h" 803 30 first +.src_ref 2 "conv2d_bf16.h" 807 26 first + 6430 "01100110" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex2, ex3, r22; VMAC.f dm0, dm0, ex5, ex11, r9 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6431 "01101001" // /* MW 11 */ + 6432 "00001011" // /* MW 10 */ + 6433 "01001000" // /* MW 9 */ + 6434 "11000010" // /* MW 8 */ + 6435 "11011011" // /* MW 7 */ + 6436 "00010001" // /* MW 6 */ + 6437 "00101010" // /* MW 5 */ + 6438 "00101000" // /* MW 4 */ + 6439 "01111010" // /* MW 3 */ + 6440 "00000001" // /* MW 2 */ + 6441 "01010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 794 8 first +.src_ref 2 "conv2d_bf16.h" 804 30 first +.src_ref 2 "conv2d_bf16.h" 808 26 first + 6442 "01001010" // VLDA.FILL.512 [p0, lf0, r24]; VSHUFFLE ex10, ex2, ex3, r23; VMAC.f dm1, dm1, ex10, ex11, r9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6443 "01101001" // /* MW 9 */ + 6444 "00110101" // /* MW 8 */ + 6445 "01001001" // /* MW 7 */ + 6446 "11000010" // /* MW 6 */ + 6447 "11011111" // /* MW 5 */ + 6448 "00010001" // /* MW 4 */ + 6449 "01110101" // /* MW 3 */ + 6450 "10000001" // /* MW 2 */ + 6451 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 809 26 first + 6452 "01001000" // VMAC.f dm2, dm2, ex4, ex11, r9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6453 "01101001" // /* MW 3 */ + 6454 "01001001" // /* MW 2 */ + 6455 "01001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 810 26 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 6456 "01001000" // VMAC.f dm3, dm3, ex10, ex11, r9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6457 "01101001" // /* MW 3 */ + 6458 "01110101" // /* MW 2 */ + 6459 "01001011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 795 30 first +.src_ref 2 "conv2d_bf16.h" 802 30 first +.src_ref 2 "conv2d_bf16.h" 1437 26 first +.aggressive_scheduled_block_id 7 +.noswbrkpt + 6460 "01001010" // VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex5, ex1, ex6, r23; VADD.f dm1, dm4, dm1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6461 "00111101" // /* MW 9 */ + 6462 "10000100" // /* MW 8 */ + 6463 "10100001" // /* MW 7 */ + 6464 "11000110" // /* MW 6 */ + 6465 "01011111" // /* MW 5 */ + 6466 "10001011" // /* MW 4 */ + 6467 "10101010" // /* MW 3 */ + 6468 "00000000" // /* MW 2 */ + 6469 "00000110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 796 30 first +.src_ref 2 "conv2d_bf16.h" 1436 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6470 "01100010" // VLDB.POP.576 ex6, [p1, lf1, r25]; VADD.f dm0, dm4, dm0, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6471 "00111101" // /* MW 7 */ + 6472 "10000000" // /* MW 6 */ + 6473 "10100000" // /* MW 5 */ + 6474 "00000000" // /* MW 4 */ + 6475 "10010100" // /* MW 3 */ + 6476 "00000001" // /* MW 2 */ + 6477 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 797 30 first +.src_ref 2 "conv2d_bf16.h" 1438 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6478 "01100010" // VLDB.POP.576 ex7, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6479 "00111101" // /* MW 7 */ + 6480 "10001000" // /* MW 6 */ + 6481 "10100010" // /* MW 5 */ + 6482 "00000000" // /* MW 4 */ + 6483 "11010100" // /* MW 3 */ + 6484 "00000001" // /* MW 2 */ + 6485 "00000011" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 799 30 first +.src_ref 2 "conv2d_bf16.h" 1439 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6486 "01001010" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VADD.f dm3, dm4, dm3, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6487 "00111101" // /* MW 9 */ + 6488 "10001100" // /* MW 8 */ + 6489 "10100011" // /* MW 7 */ + 6490 "00011101" // /* MW 6 */ + 6491 "00010100" // /* MW 5 */ + 6492 "00010010" // /* MW 4 */ + 6493 "01110101" // /* MW 3 */ + 6494 "00000001" // /* MW 2 */ + 6495 "01010101" // /* MW 1 */ +.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2032 +.src_ref 2 "conv2d_bf16.h" 792 8 first +.src_ref 2 "conv2d_bf16.h" 801 30 first +.begin_of_loop +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt +.loop_nesting 2 + 6496 "10110100" // VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex2, ex1, ex6, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6497 "10110111" // /* MW 5 */ + 6498 "00010110" // /* MW 4 */ + 6499 "10000010" // /* MW 3 */ + 6500 "10000010" // /* MW 2 */ + 6501 "10100010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 793 8 first +.src_ref 2 "conv2d_bf16.h" 804 30 first +.src_ref 2 "conv2d_bf16.h" 808 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6502 "01001010" // VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6503 "00001001" // /* MW 9 */ + 6504 "00101010" // /* MW 8 */ + 6505 "10011001" // /* MW 7 */ + 6506 "11000110" // /* MW 6 */ + 6507 "01011111" // /* MW 5 */ + 6508 "00111100" // /* MW 4 */ + 6509 "00101010" // /* MW 3 */ + 6510 "00101000" // /* MW 2 */ + 6511 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 795 30 first +.src_ref 2 "conv2d_bf16.h" 803 30 first +.src_ref 2 "conv2d_bf16.h" 807 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6512 "01001010" // VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6513 "00001001" // /* MW 9 */ + 6514 "00000100" // /* MW 8 */ + 6515 "10011000" // /* MW 7 */ + 6516 "11000110" // /* MW 6 */ + 6517 "01011011" // /* MW 5 */ + 6518 "10111100" // /* MW 4 */ + 6519 "10101001" // /* MW 3 */ + 6520 "00000000" // /* MW 2 */ + 6521 "00000110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 796 30 first +.src_ref 2 "conv2d_bf16.h" 810 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6522 "01100010" // VLDB.POP.576 ex6, [p1, lf1, r25]; VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6523 "00001001" // /* MW 7 */ + 6524 "01101000" // /* MW 6 */ + 6525 "10011011" // /* MW 5 */ + 6526 "00000000" // /* MW 4 */ + 6527 "10010100" // /* MW 3 */ + 6528 "00000001" // /* MW 2 */ + 6529 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 794 8 first +.src_ref 2 "conv2d_bf16.h" 797 30 first +.src_ref 2 "conv2d_bf16.h" 809 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6530 "01101110" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex7, [p1, lf1, r25];NOPS; NOPX; VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 6531 "00001001" // /* MW 13 */ + 6532 "01000110" // /* MW 12 */ + 6533 "10011010" // /* MW 11 */ + 6534 "01101100" // /* MW 10 */ + 6535 "00000101" // /* MW 9 */ + 6536 "00000000" // /* MW 8 */ + 6537 "00000000" // /* MW 7 */ + 6538 "00000000" // /* MW 6 */ + 6539 "10101000" // /* MW 5 */ + 6540 "00000011" // /* MW 4 */ + 6541 "01110110" // /* MW 3 */ + 6542 "10000001" // /* MW 2 */ + 6543 "00000010" // /* MW 1 */ +.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2080 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 799 30 first +.src_ref 2 "conv2d_bf16.h" 802 30 first +.end_of_loop +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6544 "11100001" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0];NOPS; NOPX; VSHUFFLE ex5, ex1, ex6, r23; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6545 "00000000" // /* MW 15 */ + 6546 "00000000" // /* MW 14 */ + 6547 "11101000" // /* MW 13 */ + 6548 "10101111" // /* MW 12 */ + 6549 "01000101" // /* MW 11 */ + 6550 "00000001" // /* MW 10 */ + 6551 "00000000" // /* MW 9 */ + 6552 "00000000" // /* MW 8 */ + 6553 "01011011" // /* MW 7 */ + 6554 "00000001" // /* MW 6 */ + 6555 "00101000" // /* MW 5 */ + 6556 "00100100" // /* MW 4 */ + 6557 "01111010" // /* MW 3 */ + 6558 "00000001" // /* MW 2 */ + 6559 "01010101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 first +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 801 30 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 6560 "11110110" // PADDA.3D [p0], d1; PADDB [p7], m6; MOVS p5, p7; VSHUFFLE ex2, ex1, ex6, r22 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6561 "11100000" // /* MW 11 */ + 6562 "10101101" // /* MW 10 */ + 6563 "10000101" // /* MW 9 */ + 6564 "00000000" // /* MW 8 */ + 6565 "10001011" // /* MW 7 */ + 6566 "10011100" // /* MW 6 */ + 6567 "00100101" // /* MW 5 */ + 6568 "10010111" // /* MW 4 */ + 6569 "11111111" // /* MW 3 */ + 6570 "00001100" // /* MW 2 */ + 6571 "00000111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 804 30 first +.src_ref 2 "conv2d_bf16.h" 808 26 first +.src_ref 2 "conv2d_bf16.h" 1517 32 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6572 "01100110" // PADDB [p7], m4; MOVS p4, p7; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6573 "00001001" // /* MW 11 */ + 6574 "00101010" // /* MW 10 */ + 6575 "10011001" // /* MW 9 */ + 6576 "11000110" // /* MW 8 */ + 6577 "01011111" // /* MW 7 */ + 6578 "00111100" // /* MW 6 */ + 6579 "00100010" // /* MW 5 */ + 6580 "00010111" // /* MW 4 */ + 6581 "01101111" // /* MW 3 */ + 6582 "10010001" // /* MW 2 */ + 6583 "10010011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 803 30 first +.src_ref 2 "conv2d_bf16.h" 807 26 first +.src_ref 2 "conv2d_bf16.h" 1518 37 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6584 "01100110" // PADDB [p7], m6; MOVS p3, p7; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6585 "00001001" // /* MW 11 */ + 6586 "00000100" // /* MW 10 */ + 6587 "10011000" // /* MW 9 */ + 6588 "11000110" // /* MW 8 */ + 6589 "01011011" // /* MW 7 */ + 6590 "10111100" // /* MW 6 */ + 6591 "00100001" // /* MW 5 */ + 6592 "10010111" // /* MW 4 */ + 6593 "01101111" // /* MW 3 */ + 6594 "10010001" // /* MW 2 */ + 6595 "01110011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 810 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6596 "01100010" // MOV dj2, r17; VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6597 "00001001" // /* MW 7 */ + 6598 "01101000" // /* MW 6 */ + 6599 "10011011" // /* MW 5 */ + 6600 "11100110" // /* MW 4 */ + 6601 "10100000" // /* MW 3 */ + 6602 "10001000" // /* MW 2 */ + 6603 "00000010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 first +.src_ref 2 "conv2d_bf16.h" 809 26 first +.src_ref 2 "conv2d_bf16.h" 1428 39 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6604 "01001010" // PADDB.3D [p1], d2; MOV r2, dc5; VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6605 "00001001" // /* MW 9 */ + 6606 "01000110" // /* MW 8 */ + 6607 "10011010" // /* MW 7 */ + 6608 "11100110" // /* MW 6 */ + 6609 "10000000" // /* MW 5 */ + 6610 "10011011" // /* MW 4 */ + 6611 "00100000" // /* MW 3 */ + 6612 "10110111" // /* MW 2 */ + 6613 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 801 30 first + 6614 "11011000" // VSHUFFLE ex2, ex1, ex6, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6615 "01011011" // /* MW 3 */ + 6616 "00001011" // /* MW 2 */ + 6617 "00011001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 802 30 first + 6618 "11011000" // VSHUFFLE ex5, ex1, ex6, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6619 "01011111" // /* MW 3 */ + 6620 "10001011" // /* MW 2 */ + 6621 "00011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 803 30 first +.src_ref 2 "conv2d_bf16.h" 807 26 first + 6622 "01100010" // VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6623 "00001001" // /* MW 7 */ + 6624 "00000100" // /* MW 6 */ + 6625 "10011000" // /* MW 5 */ + 6626 "11000110" // /* MW 4 */ + 6627 "01011011" // /* MW 3 */ + 6628 "10111100" // /* MW 2 */ + 6629 "00000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 804 30 first +.src_ref 2 "conv2d_bf16.h" 808 26 first + 6630 "01100010" // VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6631 "00001001" // /* MW 7 */ + 6632 "00101010" // /* MW 6 */ + 6633 "10011001" // /* MW 5 */ + 6634 "11000110" // /* MW 4 */ + 6635 "01011111" // /* MW 3 */ + 6636 "00111100" // /* MW 2 */ + 6637 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 809 26 first + 6638 "01001000" // VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6639 "00001001" // /* MW 3 */ + 6640 "01000110" // /* MW 2 */ + 6641 "10011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 810 26 first + 6642 "01001000" // VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6643 "00001001" // /* MW 3 */ + 6644 "01101000" // /* MW 2 */ + 6645 "10011011" // /* MW 1 */ + 6646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6647 "00000000" // /* MW 1 */ + 6648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6649 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first + 6650 "00011000" // VCONV.bf16.fp32 x10, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6651 "00010110" // /* MW 3 */ + 6652 "00010000" // /* MW 2 */ + 6653 "00001101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 + 6654 "00011000" // VCONV.bf16.fp32 x11, cml1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6655 "10010110" // /* MW 3 */ + 6656 "10010000" // /* MW 2 */ + 6657 "00001101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 1455 20 first + 6658 "00111010" // VCONV.bf16.fp32 x1, cmh1; JZ r21, #6768 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6768 delay_slots=5 */ + 6659 "01100001" // /* MW 9 */ + 6660 "00000000" // /* MW 8 */ + 6661 "00000000" // /* MW 7 */ + 6662 "01001110" // /* MW 6 */ + 6663 "00000011" // /* MW 5 */ + 6664 "00101010" // /* MW 4 */ + 6665 "11000000" // /* MW 3 */ + 6666 "00011010" // /* MW 2 */ + 6667 "00010010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first +.delay_slot + 6668 "00011000" // VCONV.bf16.fp32 x6, cmh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6669 "01010110" // /* MW 3 */ + 6670 "00010000" // /* MW 2 */ + 6671 "00001011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 6672 "00011000" // VCONV.bf16.fp32 x2, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6673 "10010110" // /* MW 3 */ + 6674 "00010001" // /* MW 2 */ + 6675 "00001001" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 6676 "00011000" // VCONV.bf16.fp32 x7, cmh3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6677 "11010110" // /* MW 3 */ + 6678 "10010001" // /* MW 2 */ + 6679 "00001011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 6680 "00011000" // VCONV.bf16.fp32 x5, cml2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6681 "00010110" // /* MW 3 */ + 6682 "10010001" // /* MW 2 */ + 6683 "00001010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 6684 "00011000" // VCONV.bf16.fp32 x8, cmh2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6685 "01010110" // /* MW 3 */ + 6686 "00010001" // /* MW 2 */ + 6687 "00001100" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first + 6688 "11111000" // VMAX_LT.bf16 x11, r16, x11, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6689 "11101100" // /* MW 3 */ + 6690 "11011100" // /* MW 2 */ + 6691 "00011101" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 + 6692 "11111000" // VMAX_LT.bf16 x1, r16, x1, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6693 "11101100" // /* MW 3 */ + 6694 "10001100" // /* MW 2 */ + 6695 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.src_ref 4 "max_min.hpp" 20 104 + 6696 "00000010" // VST x11, [p5, dj3]; VMAX_LT.bf16 x10, r16, x10, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6697 "01110000" // /* MW 7 */ + 6698 "01110110" // /* MW 6 */ + 6699 "10101010" // /* MW 5 */ + 6700 "00000010" // /* MW 4 */ + 6701 "01100000" // /* MW 3 */ + 6702 "01011010" // /* MW 2 */ + 6703 "10101100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first + 6704 "00000010" // VST x1, [p4, #64]; VMAX_LT.bf16 x1, r16, x6, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6705 "01110000" // /* MW 7 */ + 6706 "01110110" // /* MW 6 */ + 6707 "01011010" // /* MW 5 */ + 6708 "00000000" // /* MW 4 */ + 6709 "01100000" // /* MW 3 */ + 6710 "10001010" // /* MW 2 */ + 6711 "10000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first + 6712 "00111010" // VST x10, [p5]; J #6800 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=6800 delay_slots=5 */ + 6713 "00100001" // /* MW 9 */ + 6714 "00000000" // /* MW 8 */ + 6715 "00000000" // /* MW 7 */ + 6716 "01010010" // /* MW 6 */ + 6717 "00000011" // /* MW 5 */ + 6718 "00000000" // /* MW 4 */ + 6719 "01100000" // /* MW 3 */ + 6720 "11010010" // /* MW 2 */ + 6721 "10100000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 6722 "00000010" // VST x1, [p5, #64]; VMAX_LT.bf16 x10, r16, x2, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6723 "01110000" // /* MW 7 */ + 6724 "01110110" // /* MW 6 */ + 6725 "10001010" // /* MW 5 */ + 6726 "00000010" // /* MW 4 */ + 6727 "01100000" // /* MW 3 */ + 6728 "10001010" // /* MW 2 */ + 6729 "10100010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 +.delay_slot + 6730 "11111000" // VMAX_LT.bf16 x1, r16, x7, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6731 "11101100" // /* MW 3 */ + 6732 "10111100" // /* MW 2 */ + 6733 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.src_ref 4 "max_min.hpp" 20 104 +.delay_slot + 6734 "00000010" // VST x10, [p3, dj3]; VMAX_LT.bf16 x10, r16, x5, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6735 "01110000" // /* MW 7 */ + 6736 "01110110" // /* MW 6 */ + 6737 "10010110" // /* MW 5 */ + 6738 "00000010" // /* MW 4 */ + 6739 "01100000" // /* MW 3 */ + 6740 "01010010" // /* MW 2 */ + 6741 "01101100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 6742 "10111010" // NOPA; VST x1, [p7, #64]; VMAX_LT.bf16 x8, r16, x8, x9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6743 "01110010" // /* MW 9 */ + 6744 "01110110" // /* MW 8 */ + 6745 "00100010" // /* MW 7 */ + 6746 "00000010" // /* MW 6 */ + 6747 "01010011" // /* MW 5 */ + 6748 "00010100" // /* MW 4 */ + 6749 "11110111" // /* MW 3 */ + 6750 "00101100" // /* MW 2 */ + 6751 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.delay_slot + 6752 "11100001" // NOPA; NOPB; VST x10, [p4, dj7]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6753 "00000000" // /* MW 15 */ + 6754 "00000000" // /* MW 14 */ + 6755 "01111000" // /* MW 13 */ + 6756 "10100101" // /* MW 12 */ + 6757 "00000001" // /* MW 11 */ + 6758 "00000000" // /* MW 10 */ + 6759 "00000000" // /* MW 9 */ + 6760 "00000000" // /* MW 8 */ + 6761 "10010011" // /* MW 7 */ + 6762 "11100010" // /* MW 6 */ + 6763 "00100100" // /* MW 5 */ + 6764 "00000000" // /* MW 4 */ + 6765 "11110000" // /* MW 3 */ + 6766 "00101100" // /* MW 2 */ + 6767 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2304 +.src_ref 4 "vector.hpp" 1152 43 + 6768 "00011000" // VST.CONV.bf16.fp32 cml1, [p5, dj3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6769 "10100011" // /* MW 3 */ + 6770 "01100000" // /* MW 2 */ + 6771 "00001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6772 "00011000" // VST.CONV.bf16.fp32 cmh1, [p4, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6773 "11100011" // /* MW 3 */ + 6774 "00010100" // /* MW 2 */ + 6775 "00001100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6776 "00011000" // VST.CONV.bf16.fp32 cml0, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6777 "00100011" // /* MW 3 */ + 6778 "00000100" // /* MW 2 */ + 6779 "00001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6780 "00011000" // VST.CONV.bf16.fp32 cmh0, [p5, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6781 "01100011" // /* MW 3 */ + 6782 "00010100" // /* MW 2 */ + 6783 "00001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6784 "00011000" // VST.CONV.bf16.fp32 cml3, [p3, dj3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6785 "10100011" // /* MW 3 */ + 6786 "01100001" // /* MW 2 */ + 6787 "00001011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6788 "00011000" // VST.CONV.bf16.fp32 cmh3, [p7, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6789 "11100011" // /* MW 3 */ + 6790 "00010101" // /* MW 2 */ + 6791 "00001111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6792 "00000010" // VST.CONV.bf16.fp32 cml2, [p4, dj7]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6793 "01110000" // /* MW 7 */ + 6794 "10100101" // /* MW 6 */ + 6795 "00000001" // /* MW 5 */ + 6796 "00000000" // /* MW 4 */ + 6797 "01100000" // /* MW 3 */ + 6798 "00100100" // /* MW 2 */ + 6799 "10011100" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2336 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 1337 12 first + 6800 "01011100" // VST x8, [p3, #64]; JNZD r29, r29, p2 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 6801 "01000000" // /* MW 5 */ + 6802 "11110101" // /* MW 4 */ + 6803 "01101110" // /* MW 3 */ + 6804 "11000010" // /* MW 2 */ + 6805 "01100010" // /* MW 1 */ +.delay_slot + 6806 "00011000" // PADDB [p7], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6807 "10010000" // /* MW 3 */ + 6808 "10001011" // /* MW 2 */ + 6809 "00111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6811 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6813 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6815 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6816 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6817 "00000000" // /* MW 15 */ + 6818 "00000000" // /* MW 14 */ + 6819 "01111000" // /* MW 13 */ + 6820 "10100101" // /* MW 12 */ + 6821 "00000001" // /* MW 11 */ + 6822 "00000000" // /* MW 10 */ + 6823 "00000000" // /* MW 9 */ + 6824 "00000000" // /* MW 8 */ + 6825 "01011011" // /* MW 7 */ + 6826 "00000001" // /* MW 6 */ + 6827 "00100000" // /* MW 5 */ + 6828 "00000000" // /* MW 4 */ + 6829 "11110000" // /* MW 3 */ + 6830 "00101100" // /* MW 2 */ + 6831 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2368 +.loop_nesting 0 + 6832 "00011000" // LDA r15, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6833 "11110001" // /* MW 3 */ + 6834 "11101101" // /* MW 2 */ + 6835 "00000111" // /* MW 1 */ + 6836 "00011000" // LDA r12, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6837 "10010001" // /* MW 3 */ + 6838 "11110001" // /* MW 2 */ + 6839 "00000111" // /* MW 1 */ + 6840 "00011000" // LDA r9, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6841 "00110001" // /* MW 3 */ + 6842 "11110101" // /* MW 2 */ + 6843 "00000111" // /* MW 1 */ + 6844 "00011000" // LDA p6, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6845 "00011001" // /* MW 3 */ + 6846 "11101011" // /* MW 2 */ + 6847 "00000111" // /* MW 1 */ + 6848 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6849 "10011001" // /* MW 3 */ + 6850 "11111011" // /* MW 2 */ + 6851 "00000111" // /* MW 1 */ + 6852 "00011000" // LDA r14, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6853 "11010001" // /* MW 3 */ + 6854 "11111101" // /* MW 2 */ + 6855 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1873 first + 6856 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 6857 "00000000" // /* MW 3 */ + 6858 "00101000" // /* MW 2 */ + 6859 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1873 +.delay_slot + 6860 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6861 "00000001" // /* MW 5 */ + 6862 "00000000" // /* MW 4 */ + 6863 "00000000" // /* MW 3 */ + 6864 "11110000" // /* MW 2 */ + 6865 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6867 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6869 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6871 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params__end +.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_end0 + 6873 "00000000" // /* MW 1 */ +.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function conv2d_maxpool _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 7 "superkernels.cpp" 74 first +.src_ref 7 "superkernels.cpp" 79 6 +.src_ref 7 "superkernels.cpp" 81 4 +.function_start + 6880 "10111010" // MOVA r0, #1; MOVXM p4, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6881 "00010000" // /* MW 9 */ + 6882 "00100000" // /* MW 8 */ + 6883 "00110010" // /* MW 7 */ + 6884 "11110010" // /* MW 6 */ + 6885 "00000001" // /* MW 5 */ + 6886 "00000000" // /* MW 4 */ + 6887 "00000000" // /* MW 3 */ + 6888 "00100000" // /* MW 2 */ + 6889 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 79 6 first +.src_ref 7 "superkernels.cpp" 81 4 + 6890 "10111010" // LDA r16, [p4]; MOVX r1, #0; MOV r2, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6891 "01111000" // /* MW 9 */ + 6892 "11010000" // /* MW 8 */ + 6893 "01001011" // /* MW 7 */ + 6894 "00001000" // /* MW 6 */ + 6895 "00010000" // /* MW 5 */ + 6896 "00000000" // /* MW 4 */ + 6897 "11010000" // /* MW 3 */ + 6898 "11000010" // /* MW 2 */ + 6899 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 74 + 6900 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6901 "00000001" // /* MW 5 */ + 6902 "00000000" // /* MW 4 */ + 6903 "00000000" // /* MW 3 */ + 6904 "00001000" // /* MW 2 */ + 6905 "00000000" // /* MW 1 */ + 6906 "10011000" // ST r2, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6907 "01010101" // /* MW 3 */ + 6908 "11110000" // /* MW 2 */ + 6909 "00001111" // /* MW 1 */ + 6910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6911 "00000000" // /* MW 1 */ + 6912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6913 "00000000" // /* MW 1 */ + 6914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6915 "00000000" // /* MW 1 */ + 6916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6917 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 79 6 +.src_ref 7 "superkernels.cpp" 79 16 + 6918 "10000100" // JNZ r16, #7088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7088 delay_slots=5 */ + 6919 "00000001" // /* MW 5 */ + 6920 "01000000" // /* MW 4 */ + 6921 "11011000" // /* MW 3 */ + 6922 "00001101" // /* MW 2 */ + 6923 "10000000" // /* MW 1 */ +.delay_slot + 6924 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6925 "10011101" // /* MW 3 */ + 6926 "11111011" // /* MW 2 */ + 6927 "00001111" // /* MW 1 */ +.delay_slot + 6928 "10011000" // ST p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6929 "00011101" // /* MW 3 */ + 6930 "11111111" // /* MW 2 */ + 6931 "00001111" // /* MW 1 */ +.delay_slot + 6932 "10011000" // ST p3, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6933 "10011101" // /* MW 3 */ + 6934 "11101101" // /* MW 2 */ + 6935 "00001111" // /* MW 1 */ +.delay_slot + 6936 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6937 "00111101" // /* MW 3 */ + 6938 "11110100" // /* MW 2 */ + 6939 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 +.delay_slot + 6940 "01000100" // MOVXM r15, #509440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6941 "00000000" // /* MW 5 */ + 6942 "10101100" // /* MW 4 */ + 6943 "11000111" // /* MW 3 */ + 6944 "00000111" // /* MW 2 */ + 6945 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 113 2 +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 6946 "00111010" // MOVS p6, p1; MOVXM p7, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6947 "00010001" // /* MW 9 */ + 6948 "00110100" // /* MW 8 */ + 6949 "10110010" // /* MW 7 */ + 6950 "11110011" // /* MW 6 */ + 6951 "00000001" // /* MW 5 */ + 6952 "00000000" // /* MW 4 */ + 6953 "01100000" // /* MW 3 */ + 6954 "10010001" // /* MW 2 */ + 6955 "11010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 6956 "01110110" // ST.s8 r16, [p7]; MOVS p1, r15; MOVXM p7, #509028 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6957 "00010000" // /* MW 11 */ + 6958 "00110010" // /* MW 10 */ + 6959 "10110010" // /* MW 9 */ + 6960 "11110011" // /* MW 8 */ + 6961 "00000001" // /* MW 7 */ + 6962 "00000000" // /* MW 6 */ + 6963 "00001011" // /* MW 5 */ + 6964 "10001111" // /* MW 4 */ + 6965 "11100001" // /* MW 3 */ + 6966 "11000000" // /* MW 2 */ + 6967 "11100000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6969 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6971 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6972 "00000100" // JL #2752 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=2752 delay_slots=5 */ + 6973 "00000001" // /* MW 5 */ + 6974 "00000000" // /* MW 4 */ + 6975 "01100000" // /* MW 3 */ + 6976 "00000101" // /* MW 2 */ + 6977 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6979 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6980 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6981 "00110001" // /* MW 3 */ + 6982 "00100000" // /* MW 2 */ + 6983 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 6984 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6985 "00000101" // /* MW 3 */ + 6986 "00100000" // /* MW 2 */ + 6987 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 113 2 +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 6988 "00000010" // ST r16, [p7]; MOV p7, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6989 "01110000" // /* MW 7 */ + 6990 "01100000" // /* MW 6 */ + 6991 "10110000" // /* MW 5 */ + 6992 "00000011" // /* MW 4 */ + 6993 "00110000" // /* MW 3 */ + 6994 "11000010" // /* MW 2 */ + 6995 "11100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 +.delay_slot + 6996 "11110110" // NOPA; NOPB; NOPS; MOV p0, p2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6997 "01110000" // /* MW 11 */ + 6998 "01100000" // /* MW 10 */ + 6999 "00110010" // /* MW 9 */ + 7000 "00000000" // /* MW 8 */ + 7001 "01011011" // /* MW 7 */ + 7002 "00000001" // /* MW 6 */ + 7003 "00100000" // /* MW 5 */ + 7004 "00000000" // /* MW 4 */ + 7005 "11110000" // /* MW 3 */ + 7006 "00101100" // /* MW 2 */ + 7007 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 +.return_address + 7008 "10011000" // ADD.NC p2, r15, #11 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7009 "10000101" // /* MW 3 */ + 7010 "01100111" // /* MW 2 */ + 7011 "00011010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 87 19 +.src_ref 7 "superkernels.cpp" 87 35 first + 7012 "10111010" // LDA.u8 r16, [p2], #7; MOVXM p1, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7013 "00010000" // /* MW 9 */ + 7014 "00100010" // /* MW 8 */ + 7015 "10110010" // /* MW 7 */ + 7016 "11110000" // /* MW 6 */ + 7017 "00000001" // /* MW 5 */ + 7018 "00000000" // /* MW 4 */ + 7019 "01010000" // /* MW 3 */ + 7020 "11000001" // /* MW 2 */ + 7021 "01001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 37 first +.src_ref 7 "superkernels.cpp" 89 13 + 7022 "10111010" // LDA.u16 r19, [p2], #2; MOVXM p0, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7023 "00010000" // /* MW 9 */ + 7024 "00110000" // /* MW 8 */ + 7025 "00110010" // /* MW 7 */ + 7026 "11110000" // /* MW 6 */ + 7027 "00000001" // /* MW 5 */ + 7028 "00000000" // /* MW 4 */ + 7029 "01010000" // /* MW 3 */ + 7030 "11001111" // /* MW 2 */ + 7031 "01000011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 73 + 7032 "10011000" // LDA.u16 r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7033 "00111010" // /* MW 3 */ + 7034 "00000110" // /* MW 2 */ + 7035 "00000010" // /* MW 1 */ + 7036 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7037 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 110 + 7038 "10011000" // LDA.u16 r18, [p2, #2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7039 "01011010" // /* MW 3 */ + 7040 "00010110" // /* MW 2 */ + 7041 "00000010" // /* MW 1 */ + 7042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7043 "00000000" // /* MW 1 */ + 7044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7045 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 87 19 first +.src_ref 7 "superkernels.cpp" 113 2 + 7046 "00000010" // ST r16, [p1]; MOV p1, p6 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7047 "01110000" // /* MW 7 */ + 7048 "01100000" // /* MW 6 */ + 7049 "10110110" // /* MW 5 */ + 7050 "00000000" // /* MW 4 */ + 7051 "00110000" // /* MW 3 */ + 7052 "11000010" // /* MW 2 */ + 7053 "00100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 57 first + 7054 "10011000" // MUL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7055 "00001111" // /* MW 3 */ + 7056 "11100001" // /* MW 2 */ + 7057 "00010100" // /* MW 1 */ + 7058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7059 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 94 + 7060 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7061 "00001111" // /* MW 3 */ + 7062 "01100001" // /* MW 2 */ + 7063 "00010100" // /* MW 1 */ + 7064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7065 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 89 28 first + 7066 "10011000" // MUL r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7067 "00001111" // /* MW 3 */ + 7068 "10100001" // /* MW 2 */ + 7069 "00010100" // /* MW 1 */ + 7070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7071 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 89 13 +.src_ref 7 "superkernels.cpp" 113 2 + 7072 "11100001" // NOPA; NOPB; ST r16, [p0]; NOPX; MOV p0, p7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7073 "00000000" // /* MW 15 */ + 7074 "00000000" // /* MW 14 */ + 7075 "01111000" // /* MW 13 */ + 7076 "01100000" // /* MW 12 */ + 7077 "00110111" // /* MW 11 */ + 7078 "00000000" // /* MW 10 */ + 7079 "00000000" // /* MW 9 */ + 7080 "10000000" // /* MW 8 */ + 7081 "00010001" // /* MW 7 */ + 7082 "00000110" // /* MW 6 */ + 7083 "00100000" // /* MW 5 */ + 7084 "00000000" // /* MW 4 */ + 7085 "11110000" // /* MW 3 */ + 7086 "00101100" // /* MW 2 */ + 7087 "00000000" // /* MW 1 */ +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 7 "superkernels.cpp" 106 12 +.src_ref 7 "superkernels.cpp" 113 2 +.src_ref 7 "superkernels.cpp" 117 6 +.src_ref 7 "superkernels.cpp" 136 15 +.src_ref 1 "io_buffer_main.h" 218 49 +.src_ref 1 "io_buffer_main.h" 324 51 + 7088 "10111010" // LDA r15, [sp, #-20]; MOVXM p6, #509000 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7089 "00010000" // /* MW 9 */ + 7090 "00100100" // /* MW 8 */ + 7091 "00110010" // /* MW 7 */ + 7092 "11110011" // /* MW 6 */ + 7093 "00000001" // /* MW 5 */ + 7094 "00000000" // /* MW 4 */ + 7095 "00100000" // /* MW 3 */ + 7096 "10111110" // /* MW 2 */ + 7097 "11111101" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 106 12 first +.src_ref 7 "superkernels.cpp" 108 13 + 7098 "10111010" // LDA r16, [p6]; MOVXM p2, #509004 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7099 "00010000" // /* MW 9 */ + 7100 "00100110" // /* MW 8 */ + 7101 "00110010" // /* MW 7 */ + 7102 "11110001" // /* MW 6 */ + 7103 "00000001" // /* MW 5 */ + 7104 "00000000" // /* MW 4 */ + 7105 "11010000" // /* MW 3 */ + 7106 "11000010" // /* MW 2 */ + 7107 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 107 11 +.src_ref 7 "superkernels.cpp" 108 13 first +.src_ref 7 "superkernels.cpp" 139 6 +.src_ref 7 "superkernels.cpp" 140 14 + 7108 "10111010" // LDA r17, [p2]; MOVXM p7, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7109 "00010000" // /* MW 9 */ + 7110 "00100000" // /* MW 8 */ + 7111 "10110010" // /* MW 7 */ + 7112 "11110011" // /* MW 6 */ + 7113 "00000001" // /* MW 5 */ + 7114 "00000000" // /* MW 4 */ + 7115 "11010000" // /* MW 3 */ + 7116 "11000110" // /* MW 2 */ + 7117 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 107 11 first + 7118 "10011000" // LDA r18, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7119 "01010110" // /* MW 3 */ + 7120 "00000110" // /* MW 2 */ + 7121 "00000111" // /* MW 1 */ + 7122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7123 "00000000" // /* MW 1 */ + 7124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7125 "00000000" // /* MW 1 */ + 7126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7127 "00000000" // /* MW 1 */ + 7128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7129 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 110 6 first +.src_ref 7 "superkernels.cpp" 110 17 first + 7130 "10000100" // JNZ r16, #7216 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7216 delay_slots=5 */ + 7131 "00000001" // /* MW 5 */ + 7132 "01000000" // /* MW 4 */ + 7133 "00011000" // /* MW 3 */ + 7134 "00001110" // /* MW 2 */ + 7135 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 108 13 first +.delay_slot + 7136 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7137 "00000111" // /* MW 3 */ + 7138 "01100010" // /* MW 2 */ + 7139 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 107 11 first +.src_ref 7 "superkernels.cpp" 108 13 +.delay_slot + 7140 "01011100" // ST r17, [p2]; ADD r17, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7141 "00001110" // /* MW 5 */ + 7142 "01000100" // /* MW 4 */ + 7143 "00111001" // /* MW 3 */ + 7144 "11000110" // /* MW 2 */ + 7145 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 106 12 first +.delay_slot + 7146 "00011000" // ADD r19, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7147 "00000111" // /* MW 3 */ + 7148 "00100110" // /* MW 2 */ + 7149 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 106 12 +.delay_slot + 7150 "10011000" // ST r19, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7151 "01110001" // /* MW 3 */ + 7152 "00000110" // /* MW 2 */ + 7153 "00001110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 107 11 first +.delay_slot + 7154 "10011000" // ST r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7155 "00110001" // /* MW 3 */ + 7156 "00000110" // /* MW 2 */ + 7157 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 first + 7158 "00011000" // ADD.NC p2, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7159 "10000110" // /* MW 3 */ + 7160 "01100111" // /* MW 2 */ + 7161 "00011010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 7162 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7163 "01110110" // /* MW 3 */ + 7164 "11111111" // /* MW 2 */ + 7165 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 7166 "10011000" // LDA r16, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7167 "00010110" // /* MW 3 */ + 7168 "11111110" // /* MW 2 */ + 7169 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 7170 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7171 "00110110" // /* MW 3 */ + 7172 "11111110" // /* MW 2 */ + 7173 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 7174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7175 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 7176 "10011000" // LDA r16, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7177 "00010110" // /* MW 3 */ + 7178 "01000110" // /* MW 2 */ + 7179 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7181 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7183 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7185 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7187 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7188 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7189 "00000010" // /* MW 3 */ + 7190 "01100001" // /* MW 2 */ + 7191 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7192 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7193 "00010001" // /* MW 3 */ + 7194 "00000110" // /* MW 2 */ + 7195 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 + 7196 "00011000" // MOVX r17, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7197 "11111101" // /* MW 3 */ + 7198 "11100010" // /* MW 2 */ + 7199 "00010111" // /* MW 1 */ + 7200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7201 "00000000" // /* MW 1 */ + 7202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7203 "00000000" // /* MW 1 */ + 7204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7205 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 7206 "01111010" // NOPA; NOPS; ACQ r16, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7207 "00011000" // /* MW 9 */ + 7208 "00010011" // /* MW 8 */ + 7209 "00000100" // /* MW 7 */ + 7210 "00000000" // /* MW 6 */ + 7211 "01011011" // /* MW 5 */ + 7212 "00000001" // /* MW 4 */ + 7213 "11110000" // /* MW 3 */ + 7214 "00101100" // /* MW 2 */ + 7215 "00000000" // /* MW 1 */ +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_336 +.src_ref 7 "superkernels.cpp" 113 2 first +.no_stack_arguments + 7216 "00000100" // JL #4464 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4464 delay_slots=5 */ + 7217 "00000001" // /* MW 5 */ + 7218 "00000000" // /* MW 4 */ + 7219 "10111000" // /* MW 3 */ + 7220 "00001000" // /* MW 2 */ + 7221 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 113 2 +.delay_slot + 7222 "01000100" // MOVXM p3, #509440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7223 "00000000" // /* MW 5 */ + 7224 "11001100" // /* MW 4 */ + 7225 "11000110" // /* MW 3 */ + 7226 "00000111" // /* MW 2 */ + 7227 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7229 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7231 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7233 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 113 2 +.delay_slot + 7234 "00101110" // NOPA; NOPS; MOV p2, r15; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 7235 "00011100" // /* MW 13 */ + 7236 "00000000" // /* MW 12 */ + 7237 "00000000" // /* MW 11 */ + 7238 "00000111" // /* MW 10 */ + 7239 "00111101" // /* MW 9 */ + 7240 "01010011" // /* MW 8 */ + 7241 "00000000" // /* MW 7 */ + 7242 "00000000" // /* MW 6 */ + 7243 "10110110" // /* MW 5 */ + 7244 "00000010" // /* MW 4 */ + 7245 "11110000" // /* MW 3 */ + 7246 "00101100" // /* MW 2 */ + 7247 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 117 6 first +.src_ref 7 "superkernels.cpp" 117 20 +.return_address + 7248 "10111010" // LDA r16, [p6]; MOVXM p1, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7249 "00010000" // /* MW 9 */ + 7250 "00100010" // /* MW 8 */ + 7251 "10110010" // /* MW 7 */ + 7252 "11110000" // /* MW 6 */ + 7253 "00000001" // /* MW 5 */ + 7254 "00000000" // /* MW 4 */ + 7255 "11010000" // /* MW 3 */ + 7256 "11000010" // /* MW 2 */ + 7257 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 117 20 + 7258 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7259 "00110110" // /* MW 3 */ + 7260 "00000110" // /* MW 2 */ + 7261 "00000001" // /* MW 1 */ + 7262 "00011000" // LDA r0, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7263 "00010001" // /* MW 3 */ + 7264 "11110000" // /* MW 2 */ + 7265 "00000111" // /* MW 1 */ + 7266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7267 "00000000" // /* MW 1 */ + 7268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7269 "00000000" // /* MW 1 */ + 7270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7271 "00000000" // /* MW 1 */ + 7272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7273 "00000000" // /* MW 1 */ + 7274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7275 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 117 17 + 7276 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7277 "00001000" // /* MW 3 */ + 7278 "01100001" // /* MW 2 */ + 7279 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 117 6 + 7280 "10000100" // JNZ r16, #7360 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7360 delay_slots=5 */ + 7281 "00000001" // /* MW 5 */ + 7282 "01000000" // /* MW 4 */ + 7283 "01100000" // /* MW 3 */ + 7284 "00001110" // /* MW 2 */ + 7285 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 136 15 +.src_ref 7 "superkernels.cpp" 140 14 +.delay_slot + 7286 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7287 "00000001" // /* MW 3 */ + 7288 "00110000" // /* MW 2 */ + 7289 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7291 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7293 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7294 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7295 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7297 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 first +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 + 7298 "00100100" // MOVX r16, #1; ADD.NC p1, r15, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7299 "00010100" // /* MW 5 */ + 7300 "11001111" // /* MW 4 */ + 7301 "10100010" // /* MW 3 */ + 7302 "00000000" // /* MW 2 */ + 7303 "00000100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 + 7304 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7305 "00110110" // /* MW 3 */ + 7306 "00000110" // /* MW 2 */ + 7307 "00000001" // /* MW 1 */ + 7308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7309 "00000000" // /* MW 1 */ + 7310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7311 "00000000" // /* MW 1 */ + 7312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7313 "00000000" // /* MW 1 */ + 7314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7315 "00000000" // /* MW 1 */ + 7316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7317 "00000000" // /* MW 1 */ + 7318 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7319 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 7320 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7321 "00001000" // /* MW 3 */ + 7322 "01010001" // /* MW 2 */ + 7323 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 136 15 first +.src_ref 1 "io_buffer_main.h" 327 40 first + 7324 "00001100" // LDA r17, [p1, #-8]; ST r24, [p6] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7325 "00100011" // /* MW 5 */ + 7326 "00001110" // /* MW 4 */ + 7327 "11011100" // /* MW 3 */ + 7328 "11000110" // /* MW 2 */ + 7329 "00111100" // /* MW 1 */ + 7330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7331 "00000000" // /* MW 1 */ + 7332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7333 "00000000" // /* MW 1 */ + 7334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7335 "00000000" // /* MW 1 */ + 7336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7337 "00000000" // /* MW 1 */ + 7338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7339 "00000000" // /* MW 1 */ + 7340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7341 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 + 7342 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7343 "00010001" // /* MW 3 */ + 7344 "00100001" // /* MW 2 */ + 7345 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 7346 "00101110" // NOPA; ST r16, [p1, #-8]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 7347 "00011100" // /* MW 13 */ + 7348 "00000000" // /* MW 12 */ + 7349 "00000000" // /* MW 11 */ + 7350 "01010111" // /* MW 10 */ + 7351 "00011010" // /* MW 9 */ + 7352 "01000000" // /* MW 8 */ + 7353 "00000000" // /* MW 7 */ + 7354 "00000000" // /* MW 6 */ + 7355 "00100011" // /* MW 5 */ + 7356 "11001100" // /* MW 4 */ + 7357 "11110011" // /* MW 3 */ + 7358 "00101100" // /* MW 2 */ + 7359 "00000000" // /* MW 1 */ +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_480 +.src_ref 7 "superkernels.cpp" 139 6 first +.src_ref 7 "superkernels.cpp" 139 19 + 7360 "10111010" // LDA r16, [p7]; MOVXM p6, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7361 "00010000" // /* MW 9 */ + 7362 "00110000" // /* MW 8 */ + 7363 "00110010" // /* MW 7 */ + 7364 "11110011" // /* MW 6 */ + 7365 "00000001" // /* MW 5 */ + 7366 "00000000" // /* MW 4 */ + 7367 "11010000" // /* MW 3 */ + 7368 "11000010" // /* MW 2 */ + 7369 "11100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 139 19 + 7370 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7371 "00110110" // /* MW 3 */ + 7372 "00000110" // /* MW 2 */ + 7373 "00000110" // /* MW 1 */ + 7374 "00011000" // LDA p1, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7375 "10011001" // /* MW 3 */ + 7376 "11111000" // /* MW 2 */ + 7377 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 142 + 7378 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7379 "00111001" // /* MW 3 */ + 7380 "11110100" // /* MW 2 */ + 7381 "00000111" // /* MW 1 */ + 7382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7383 "00000000" // /* MW 1 */ + 7384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7385 "00000000" // /* MW 1 */ + 7386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7387 "00000000" // /* MW 1 */ + 7388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7389 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 139 16 + 7390 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7391 "00001000" // /* MW 3 */ + 7392 "01100001" // /* MW 2 */ + 7393 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 139 6 + 7394 "10000100" // JNZ r16, #7424 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7424 delay_slots=5 */ + 7395 "00000001" // /* MW 5 */ + 7396 "01000000" // /* MW 4 */ + 7397 "10000000" // /* MW 3 */ + 7398 "00001110" // /* MW 2 */ + 7399 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7401 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7403 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7405 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7407 "00000000" // /* MW 1 */ +.delay_slot + 7408 "11111000" // MOV r15, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7409 "00100000" // /* MW 3 */ + 7410 "11010000" // /* MW 2 */ + 7411 "00011011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 140 14 first + 7412 "00110110" // NOPA; NOPB; ST r24, [p7]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7413 "11000001" // /* MW 11 */ + 7414 "10001000" // /* MW 10 */ + 7415 "10000011" // /* MW 9 */ + 7416 "00000011" // /* MW 8 */ + 7417 "00000000" // /* MW 7 */ + 7418 "00000000" // /* MW 6 */ + 7419 "00100000" // /* MW 5 */ + 7420 "00000000" // /* MW 4 */ + 7421 "11110000" // /* MW 3 */ + 7422 "00101100" // /* MW 2 */ + 7423 "00000000" // /* MW 1 */ +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_544 + 7424 "00011000" // LDA p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7425 "00011001" // /* MW 3 */ + 7426 "11111111" // /* MW 2 */ + 7427 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 142 first + 7428 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 7429 "00000000" // /* MW 3 */ + 7430 "00101000" // /* MW 2 */ + 7431 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 142 +.delay_slot + 7432 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7433 "00000001" // /* MW 5 */ + 7434 "00000000" // /* MW 4 */ + 7435 "00000000" // /* MW 3 */ + 7436 "11111000" // /* MW 2 */ + 7437 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7439 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7441 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7443 "00000000" // /* MW 1 */ +.delay_slot + 7444 "00011000" // MOVS p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7445 "10001011" // /* MW 3 */ + 7446 "10000100" // /* MW 2 */ +.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 7447 "00001111" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.src_ref 3 "elementwise_binary.h" 141 first +.src_ref 3 "elementwise_binary.h" 142 23 +.src_ref 3 "elementwise_binary.h" 144 4 first +.function_start + 7456 "01100100" // RET lr; MOV r0, #64 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 7457 "00000001" // /* MW 5 */ + 7458 "00100001" // /* MW 4 */ + 7459 "00000000" // /* MW 3 */ + 7460 "00000000" // /* MW 2 */ + 7461 "00000101" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 141 +.delay_slot + 7462 "11111000" // MOV r1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7463 "11000000" // /* MW 3 */ + 7464 "01010000" // /* MW 2 */ + 7465 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 141 +.delay_slot + 7466 "00011000" // ADD.NC p0, r1, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7467 "10010000" // /* MW 3 */ + 7468 "01100000" // /* MW 2 */ + 7469 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 142 23 first +.delay_slot + 7470 "10011000" // ST r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7471 "00010001" // /* MW 3 */ + 7472 "00000100" // /* MW 2 */ + 7473 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 142 23 +.delay_slot + 7474 "10011000" // ST r0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7475 "00010001" // /* MW 3 */ + 7476 "00010100" // /* MW 2 */ + 7477 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_end0 + 7479 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 3 "elementwise_binary.h" 130 first +.src_ref 3 "elementwise_binary.h" 133 24 first +.function_start + 7488 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7489 "00101110" // /* MW 3 */ + 7490 "00011100" // /* MW 2 */ + 7491 "00000001" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 130 + 7492 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7493 "00000001" // /* MW 5 */ + 7494 "00000000" // /* MW 4 */ + 7495 "00000000" // /* MW 3 */ + 7496 "00001000" // /* MW 2 */ + 7497 "00000000" // /* MW 1 */ + 7498 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7499 "00111101" // /* MW 3 */ + 7500 "11111000" // /* MW 2 */ + 7501 "00001111" // /* MW 1 */ + 7502 "10011000" // ST r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7503 "11110101" // /* MW 3 */ + 7504 "11111101" // /* MW 2 */ + 7505 "00001111" // /* MW 1 */ + 7506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7507 "00000000" // /* MW 1 */ + 7508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7509 "00000000" // /* MW 1 */ + 7510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7511 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 133 22 first + 7512 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7513 "00101001" // /* MW 3 */ + 7514 "00011100" // /* MW 2 */ + 7515 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 134 24 first + 7516 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7517 "00101110" // /* MW 3 */ + 7518 "00011100" // /* MW 2 */ + 7519 "00000001" // /* MW 1 */ + 7520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7521 "00000000" // /* MW 1 */ + 7522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7523 "00000000" // /* MW 1 */ + 7524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7525 "00000000" // /* MW 1 */ + 7526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7527 "00000000" // /* MW 1 */ + 7528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7529 "00000000" // /* MW 1 */ + 7530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7531 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 134 22 + 7532 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7533 "00101001" // /* MW 3 */ + 7534 "00011100" // /* MW 2 */ + 7535 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 135 24 first + 7536 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7537 "00101110" // /* MW 3 */ + 7538 "00000100" // /* MW 2 */ + 7539 "00000001" // /* MW 1 */ + 7540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7541 "00000000" // /* MW 1 */ + 7542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7543 "00000000" // /* MW 1 */ + 7544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7545 "00000000" // /* MW 1 */ + 7546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7547 "00000000" // /* MW 1 */ + 7548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7549 "00000000" // /* MW 1 */ + 7550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7551 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 135 22 + 7552 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7553 "00101001" // /* MW 3 */ + 7554 "00011100" // /* MW 2 */ + 7555 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 136 24 first + 7556 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7557 "00101110" // /* MW 3 */ + 7558 "00010100" // /* MW 2 */ + 7559 "00000001" // /* MW 1 */ + 7560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7561 "00000000" // /* MW 1 */ + 7562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7563 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 137 8 first +.no_stack_arguments + 7564 "00000100" // JL #7456 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7456 delay_slots=5 */ + 7565 "00000001" // /* MW 5 */ + 7566 "00000000" // /* MW 4 */ + 7567 "10010000" // /* MW 3 */ + 7568 "00001110" // /* MW 2 */ + 7569 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7571 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7572 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7573 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7575 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 136 22 first +.delay_slot + 7576 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7577 "00101001" // /* MW 3 */ + 7578 "11011100" // /* MW 2 */ + 7579 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 137 8 +.delay_slot + 7580 "11111000" // MOV r15, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7581 "11000000" // /* MW 3 */ + 7582 "11010000" // /* MW 2 */ + 7583 "00011011" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 137 8 +.src_ref 3 "elementwise_binary.h" 139 4 +.src_ref 8 "add_impl.h" 146 29 +.return_address + 7584 "10111010" // LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7585 "00001000" // /* MW 9 */ + 7586 "11000100" // /* MW 8 */ + 7587 "00110011" // /* MW 7 */ + 7588 "01101000" // /* MW 6 */ + 7589 "00000000" // /* MW 5 */ + 7590 "00000001" // /* MW 4 */ + 7591 "00100000" // /* MW 3 */ + 7592 "00000111" // /* MW 2 */ + 7593 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 146 29 +.src_ref 8 "add_impl.h" 147 37 +.src_ref 8 "add_impl.h" 147 39 + 7594 "10111010" // MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7595 "01011000" // /* MW 9 */ + 7596 "11111101" // /* MW 8 */ + 7597 "00000111" // /* MW 7 */ + 7598 "00001000" // /* MW 6 */ + 7599 "10000000" // /* MW 5 */ + 7600 "00000001" // /* MW 4 */ + 7601 "10000000" // /* MW 3 */ + 7602 "11100010" // /* MW 2 */ + 7603 "00000001" // /* MW 1 */ +.src_ref 8 "add_impl.h" 146 29 first +.src_ref 8 "add_impl.h" 147 39 + 7604 "01111010" // LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7605 "00000001" // /* MW 9 */ + 7606 "10100000" // /* MW 8 */ + 7607 "00000111" // /* MW 7 */ + 7608 "10000000" // /* MW 6 */ + 7609 "00010001" // /* MW 5 */ + 7610 "00001010" // /* MW 4 */ + 7611 "00100000" // /* MW 3 */ + 7612 "10111110" // /* MW 2 */ + 7613 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 50 first + 7614 "10011000" // LDA.u8 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7615 "01001010" // /* MW 3 */ + 7616 "00000110" // /* MW 2 */ + 7617 "00000000" // /* MW 1 */ + 7618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7619 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7621 "00000000" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 37 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7622 "00011000" // ST.s16 r16, [p0, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7623 "00010111" // /* MW 3 */ + 7624 "00000010" // /* MW 2 */ + 7625 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7626 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 7627 "00000000" // /* MW 3 */ + 7628 "00101000" // /* MW 2 */ + 7629 "00010000" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 54 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7630 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7631 "00000101" // /* MW 3 */ + 7632 "00100010" // /* MW 2 */ + 7633 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7634 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7635 "00000001" // /* MW 5 */ + 7636 "00000000" // /* MW 4 */ + 7637 "00000000" // /* MW 3 */ + 7638 "11111000" // /* MW 2 */ + 7639 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 54 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7640 "10011000" // EQ r27, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7641 "00100111" // /* MW 3 */ + 7642 "01110111" // /* MW 2 */ + 7643 "00010100" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 39 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7644 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7645 "10000010" // /* MW 3 */ + 7646 "00100001" // /* MW 2 */ + 7647 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 7649 "00000000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.src_ref 3 "elementwise_binary_broadcasting.h" 81 +.src_ref 3 "elementwise_binary_broadcasting.h" 81 first +.src_ref 3 "elementwise_binary_broadcasting.h" 82 25 +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 +.src_ref 3 "elementwise_binary_broadcasting.h" 83 25 +.function_start + 7664 "10111010" // MOVA m0, #20; MOVX r1, #6; MOV r0, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7665 "01111000" // /* MW 9 */ + 7666 "01100000" // /* MW 8 */ + 7667 "00001000" // /* MW 7 */ + 7668 "11001000" // /* MW 6 */ + 7669 "00010000" // /* MW 5 */ + 7670 "00000000" // /* MW 4 */ + 7671 "10000000" // /* MW 3 */ + 7672 "10000000" // /* MW 2 */ + 7673 "00000010" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 81 +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 + 7674 "00100100" // MOVX r0, #1; ADD.NC p0, r0, #12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7675 "00001100" // /* MW 5 */ + 7676 "11000000" // /* MW 4 */ + 7677 "10100000" // /* MW 3 */ + 7678 "00000000" // /* MW 2 */ + 7679 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first + 7680 "10011000" // LDA.u8 r2, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7681 "01001010" // /* MW 3 */ + 7682 "00001000" // /* MW 2 */ + 7683 "00000000" // /* MW 1 */ + 7684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7685 "00000000" // /* MW 1 */ + 7686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7687 "00000000" // /* MW 1 */ + 7688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7689 "00000000" // /* MW 1 */ + 7690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7691 "00000000" // /* MW 1 */ + 7692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7693 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 84 4 first + 7694 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 7695 "00000000" // /* MW 3 */ + 7696 "00101000" // /* MW 2 */ + 7697 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first +.delay_slot + 7698 "10011000" // NE r0, r2, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7699 "00001000" // /* MW 3 */ + 7700 "10000000" // /* MW 2 */ + 7701 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 83 25 first +.delay_slot + 7702 "10011000" // LSHL r0, r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7703 "00011101" // /* MW 3 */ + 7704 "00000000" // /* MW 2 */ + 7705 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first +.src_ref 3 "elementwise_binary_broadcasting.h" 83 23 +.delay_slot + 7706 "01011100" // ST r0, [p0, #4]; NEZ r3, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7707 "11100000" // /* MW 5 */ + 7708 "00001101" // /* MW 4 */ + 7709 "00110001" // /* MW 3 */ + 7710 "10000010" // /* MW 2 */ + 7711 "00000010" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 25 +.delay_slot + 7712 "10011000" // LSHL r2, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7713 "00011101" // /* MW 3 */ + 7714 "11000100" // /* MW 2 */ + 7715 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 23 +.delay_slot + 7716 "10011000" // ST r2, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7717 "01010001" // /* MW 3 */ + 7718 "00000100" // /* MW 2 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_end0 + 7719 "00001000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 3 "elementwise_binary_broadcasting.h" 76 +.src_ref 3 "elementwise_binary_broadcasting.h" 76 first +.function_start + 7728 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7729 "00000001" // /* MW 5 */ + 7730 "00000000" // /* MW 4 */ + 7731 "00000000" // /* MW 3 */ + 7732 "00001000" // /* MW 2 */ + 7733 "00000000" // /* MW 1 */ + 7734 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7735 "00111101" // /* MW 3 */ + 7736 "11111100" // /* MW 2 */ + 7737 "00001111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 77 8 first +.no_stack_arguments + 7738 "00000100" // JL #7488 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7488 delay_slots=5 */ + 7739 "00000001" // /* MW 5 */ + 7740 "00000000" // /* MW 4 */ + 7741 "10100000" // /* MW 3 */ + 7742 "00001110" // /* MW 2 */ + 7743 "00000000" // /* MW 1 */ +.delay_slot + 7744 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7745 "10011101" // /* MW 3 */ + 7746 "11111011" // /* MW 2 */ + 7747 "00001111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 78 8 +.delay_slot + 7748 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7749 "11000000" // /* MW 3 */ + 7750 "01100000" // /* MW 2 */ + 7751 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7753 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7755 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7756 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7757 "01100111" // /* MW 3 */ + 7758 "00000001" // /* MW 2 */ + 7759 "00000000" // /* MW 1 */ +.return_address +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7760 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7761 "10011001" // /* MW 3 */ + 7762 "11111011" // /* MW 2 */ + 7763 "00000111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 78 8 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7764 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7765 "00111001" // /* MW 3 */ + 7766 "11111100" // /* MW 2 */ + 7767 "00000111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 78 8 first +.tail_call +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7768 "10000100" // J #7664 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=7664 delay_slots=5 */ + 7769 "00000000" // /* MW 5 */ + 7770 "00000000" // /* MW 4 */ + 7771 "11111000" // /* MW 3 */ + 7772 "00001110" // /* MW 2 */ + 7773 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 78 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7774 "11111000" // MOV p0, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7775 "11000000" // /* MW 3 */ + 7776 "01101110" // /* MW 2 */ + 7777 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 79 4 first +.delay_slot + 7778 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7779 "00000001" // /* MW 5 */ + 7780 "00000000" // /* MW 4 */ + 7781 "00000000" // /* MW 3 */ + 7782 "11111000" // /* MW 2 */ + 7783 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7785 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7787 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 7789 "00000000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.src_ref 3 "elementwise_binary_broadcasting.h" 89 first +.src_ref 3 "elementwise_binary_broadcasting.h" 96 37 first +.src_ref 3 "elementwise_binary_broadcasting.h" 102 19 +.function_start + 7792 "01010100" // LDA r0, [p3], #12; MOV m0, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7793 "01010001" // /* MW 5 */ + 7794 "00000000" // /* MW 4 */ + 7795 "11010000" // /* MW 3 */ + 7796 "10000010" // /* MW 2 */ + 7797 "01100111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 102 19 first +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 7798 "11010100" // LDA.u8 r1, [p3], m0; MOV p4, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7799 "10000001" // /* MW 5 */ + 7800 "11001101" // /* MW 4 */ + 7801 "01011000" // /* MW 3 */ + 7802 "00000101" // /* MW 2 */ + 7803 "01100001" // /* MW 1 */ + 7804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7805 "00000000" // /* MW 1 */ + 7806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7807 "00000000" // /* MW 1 */ + 7808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7809 "00000000" // /* MW 1 */ + 7810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7811 "00000000" // /* MW 1 */ + 7812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7813 "00000000" // /* MW 1 */ + 7814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7815 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 102 12 +.src_ref 3 "elementwise_binary_broadcasting.h" 102 35 + 7816 "10000100" // JNZ r1, #7872 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7872 delay_slots=5 */ + 7817 "00000001" // /* MW 5 */ + 7818 "01000000" // /* MW 4 */ + 7819 "01100000" // /* MW 3 */ + 7820 "00001111" // /* MW 2 */ + 7821 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 96 78 +.delay_slot + 7822 "00011000" // MOVX r2, #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7823 "11101001" // /* MW 3 */ + 7824 "11000100" // /* MW 2 */ + 7825 "00010111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 96 78 first +.delay_slot + 7826 "10011000" // LSHL r0, r0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7827 "00101101" // /* MW 3 */ + 7828 "00000000" // /* MW 2 */ + 7829 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7831 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7833 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7835 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 103 28 first + 7836 "10011000" // LDA.s16 r1, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7837 "00110010" // /* MW 3 */ + 7838 "00000100" // /* MW 2 */ + 7839 "00000000" // /* MW 1 */ + 7840 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7841 "00000000" // /* MW 1 */ + 7842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7843 "00000000" // /* MW 1 */ + 7844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7845 "00000000" // /* MW 1 */ + 7846 "10000100" // J #7904 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=7904 delay_slots=5 */ + 7847 "00000000" // /* MW 5 */ + 7848 "00000000" // /* MW 4 */ + 7849 "01110000" // /* MW 3 */ + 7850 "00001111" // /* MW 2 */ + 7851 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7853 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7854 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7855 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first +.delay_slot + 7856 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7857 "01110010" // /* MW 3 */ + 7858 "00000101" // /* MW 2 */ + 7859 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7861 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.delay_slot + 7862 "01111010" // NOPA; VST x0, [p0]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7863 "00000000" // /* MW 9 */ + 7864 "00000000" // /* MW 8 */ + 7865 "00000000" // /* MW 7 */ + 7866 "00000000" // /* MW 6 */ + 7867 "00010011" // /* MW 5 */ + 7868 "00000100" // /* MW 4 */ + 7869 "11110000" // /* MW 3 */ + 7870 "00101100" // /* MW 2 */ + 7871 "00000000" // /* MW 1 */ +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_80 +.src_ref 3 "elementwise_binary_broadcasting.h" 106 28 first + 7872 "10011000" // LDA.s16 r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7873 "00110010" // /* MW 3 */ + 7874 "00000100" // /* MW 2 */ + 7875 "00000001" // /* MW 1 */ + 7876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7877 "00000000" // /* MW 1 */ + 7878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7879 "00000000" // /* MW 1 */ + 7880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7881 "00000000" // /* MW 1 */ + 7882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7883 "00000000" // /* MW 1 */ + 7884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7885 "00000000" // /* MW 1 */ + 7886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7887 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first + 7888 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7889 "01110010" // /* MW 3 */ + 7890 "00000101" // /* MW 2 */ + 7891 "00011000" // /* MW 1 */ + 7892 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7893 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first + 7894 "01111010" // NOPA; VST x0, [p1]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7895 "00000000" // /* MW 9 */ + 7896 "00000000" // /* MW 8 */ + 7897 "00000000" // /* MW 7 */ + 7898 "00000000" // /* MW 6 */ + 7899 "00010011" // /* MW 5 */ + 7900 "00000100" // /* MW 4 */ + 7901 "11110001" // /* MW 3 */ + 7902 "00101100" // /* MW 2 */ + 7903 "00000000" // /* MW 1 */ +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_112 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 first +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 first + 7904 "10111010" // LDA m0, [p4, #20]; MOVX r0, #60; ADD.NC lc, r0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7905 "01001000" // /* MW 9 */ + 7906 "00111111" // /* MW 8 */ + 7907 "10111000" // /* MW 7 */ + 7908 "10001010" // /* MW 6 */ + 7909 "00000111" // /* MW 5 */ + 7910 "00000000" // /* MW 4 */ + 7911 "11010000" // /* MW 3 */ + 7912 "10000000" // /* MW 2 */ + 7913 "10001010" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 7914 "10111010" // LDA m1, [p3, #4]; MOVXM ls, #8016 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7915 "00010000" // /* MW 9 */ + 7916 "10101000" // /* MW 8 */ + 7917 "01111111" // /* MW 7 */ + 7918 "00000100" // /* MW 6 */ + 7919 "00000000" // /* MW 5 */ + 7920 "00000000" // /* MW 4 */ + 7921 "11010000" // /* MW 3 */ + 7922 "10010000" // /* MW 2 */ + 7923 "01100010" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 7924 "01000100" // MOVXM le, #8048 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7925 "11100000" // /* MW 5 */ + 7926 "11111110" // /* MW 4 */ + 7927 "00010110" // /* MW 3 */ + 7928 "00000000" // /* MW 2 */ + 7929 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 7930 "01000100" // MOVXM p4, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7931 "11010000" // /* MW 5 */ + 7932 "11001000" // /* MW 4 */ + 7933 "11001000" // /* MW 3 */ + 7934 "00000111" // /* MW 2 */ + 7935 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 7936 "10011000" // LDA.s8 r1, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7937 "00100010" // /* MW 3 */ + 7938 "00000100" // /* MW 2 */ + 7939 "00000100" // /* MW 1 */ + 7940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7941 "00000000" // /* MW 1 */ + 7942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7943 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 187 20 first + 7944 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7945 "10101011" // /* MW 3 */ + 7946 "00001000" // /* MW 2 */ + 7947 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary.h" 189 20 first + 7948 "10011000" // VLDA.CONV.fp32.bf16 cml2, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7949 "00101011" // /* MW 3 */ + 7950 "00101001" // /* MW 2 */ + 7951 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 211 20 first + 7952 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7953 "00101011" // /* MW 3 */ + 7954 "00001000" // /* MW 2 */ + 7955 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7956 "10011000" // VLDA.CONV.fp32.bf16 cml4, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7957 "00101011" // /* MW 3 */ + 7958 "00101010" // /* MW 2 */ + 7959 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 195 20 +.src_ref 3 "elementwise_binary.h" 218 20 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7960 "00101100" // VLDA.CONV.fp32.bf16 cml1, [p0], m0; MOVX crRnd, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7961 "00000000" // /* MW 5 */ + 7962 "11110101" // /* MW 4 */ + 7963 "01110000" // /* MW 3 */ + 7964 "00010101" // /* MW 2 */ + 7965 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7966 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7967 "00111101" // /* MW 7 */ + 7968 "00101000" // /* MW 6 */ + 7969 "00000011" // /* MW 5 */ + 7970 "00000100" // /* MW 4 */ + 7971 "01110000" // /* MW 3 */ + 7972 "00100101" // /* MW 2 */ + 7973 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7974 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7975 "00101011" // /* MW 3 */ + 7976 "00001000" // /* MW 2 */ + 7977 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7978 "01100010" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7979 "00111101" // /* MW 7 */ + 7980 "00010000" // /* MW 6 */ + 7981 "00000100" // /* MW 5 */ + 7982 "00000100" // /* MW 4 */ + 7983 "01110000" // /* MW 3 */ + 7984 "01000101" // /* MW 2 */ + 7985 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 187 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7986 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7987 "10101011" // /* MW 3 */ + 7988 "00001000" // /* MW 2 */ + 7989 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7990 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7991 "00111101" // /* MW 7 */ + 7992 "00101000" // /* MW 6 */ + 7993 "00000011" // /* MW 5 */ + 7994 "00000100" // /* MW 4 */ + 7995 "01110000" // /* MW 3 */ + 7996 "00100101" // /* MW 2 */ + 7997 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7998 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7999 "00101011" // /* MW 3 */ + 8000 "00001000" // /* MW 2 */ + 8001 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8002 "01101110" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VST.CONV.bf16.fp32 cml3, [p2], #64; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 8003 "00111101" // /* MW 13 */ + 8004 "00010000" // /* MW 12 */ + 8005 "00000100" // /* MW 11 */ + 8006 "01010111" // /* MW 10 */ + 8007 "00011010" // /* MW 9 */ + 8008 "01000000" // /* MW 8 */ + 8009 "00000000" // /* MW 7 */ + 8010 "00000000" // /* MW 6 */ + 8011 "01000110" // /* MW 5 */ + 8012 "00111011" // /* MW 4 */ + 8013 "01110100" // /* MW 3 */ + 8014 "01000101" // /* MW 2 */ + 8015 "00100101" // /* MW 1 */ +.label ZLS_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_224 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 187 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 8016 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8017 "10101011" // /* MW 3 */ + 8018 "00001000" // /* MW 2 */ + 8019 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8020 "01100110" // VLDA.CONV.fp32.bf16 cml2, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8021 "00111101" // /* MW 11 */ + 8022 "00101000" // /* MW 10 */ + 8023 "00000011" // /* MW 9 */ + 8024 "10001110" // /* MW 8 */ + 8025 "00010001" // /* MW 7 */ + 8026 "00001111" // /* MW 6 */ + 8027 "00100001" // /* MW 5 */ + 8028 "00000000" // /* MW 4 */ + 8029 "01110000" // /* MW 3 */ + 8030 "00100101" // /* MW 2 */ + 8031 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8032 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8033 "00000000" // /* MW 15 */ + 8034 "00000000" // /* MW 14 */ + 8035 "01111000" // /* MW 13 */ + 8036 "10100101" // /* MW 12 */ + 8037 "00000001" // /* MW 11 */ + 8038 "00000000" // /* MW 10 */ + 8039 "00000000" // /* MW 9 */ + 8040 "00000000" // /* MW 8 */ + 8041 "01011011" // /* MW 7 */ + 8042 "00000001" // /* MW 6 */ + 8043 "00100000" // /* MW 5 */ + 8044 "00000000" // /* MW 4 */ + 8045 "01110000" // /* MW 3 */ + 8046 "00000101" // /* MW 2 */ + 8047 "00000001" // /* MW 1 */ +.label ZLE_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_256 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8048 "11101011" // VLDA.CONV.fp32.bf16 cml4, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml3, [p2], #64;NOPX; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8049 "10000001" // /* MW 15 */ + 8050 "00100000" // /* MW 14 */ + 8051 "01111000" // /* MW 13 */ + 8052 "10100101" // /* MW 12 */ + 8053 "00000001" // /* MW 11 */ + 8054 "00000000" // /* MW 10 */ + 8055 "00000000" // /* MW 9 */ + 8056 "00000000" // /* MW 8 */ + 8057 "10100011" // /* MW 7 */ + 8058 "00011101" // /* MW 6 */ + 8059 "00100010" // /* MW 5 */ + 8060 "00000000" // /* MW 4 */ + 8061 "01110000" // /* MW 3 */ + 8062 "01000101" // /* MW 2 */ + 8063 "00100101" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 8064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8065 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8066 "01100010" // VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8067 "00111101" // /* MW 7 */ + 8068 "00101000" // /* MW 6 */ + 8069 "00000011" // /* MW 5 */ + 8070 "00000010" // /* MW 4 */ + 8071 "01100000" // /* MW 3 */ + 8072 "11000100" // /* MW 2 */ + 8073 "01000011" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8075 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8076 "01100010" // VST.CONV.bf16.fp32 cml3, [p2], #64; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8077 "00111101" // /* MW 7 */ + 8078 "00010000" // /* MW 6 */ + 8079 "00000100" // /* MW 5 */ + 8080 "00000010" // /* MW 4 */ + 8081 "01100000" // /* MW 3 */ + 8082 "10110100" // /* MW 2 */ + 8083 "01000011" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8085 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 195 20 first +.src_ref 3 "elementwise_binary_broadcasting.h" 121 4 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8086 "01011100" // VST.CONV.bf16.fp32 cml4, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 8087 "00000000" // /* MW 5 */ + 8088 "01010000" // /* MW 4 */ + 8089 "01100000" // /* MW 3 */ + 8090 "11000100" // /* MW 2 */ + 8091 "01000011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8093 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.delay_slot + 8094 "00011000" // VST.CONV.bf16.fp32 cml3, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8095 "10100011" // /* MW 3 */ + 8096 "00011101" // /* MW 2 */ + 8097 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8099 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 195 20 first +.delay_slot + 8100 "00011000" // VST.CONV.bf16.fp32 cml4, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8101 "00100011" // /* MW 3 */ + 8102 "00011110" // /* MW 2 */ + 8103 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 8105 "00000000" // /* MW 1 */ +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 82 +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 82 first +.function_start + 8112 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8113 "00000001" // /* MW 5 */ + 8114 "00000000" // /* MW 4 */ + 8115 "00000000" // /* MW 3 */ + 8116 "00010000" // /* MW 2 */ + 8117 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 90 24 + 8118 "00000010" // ST lr, [sp, #-4]; MOV r16, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8119 "01110000" // /* MW 7 */ + 8120 "01100000" // /* MW 6 */ + 8121 "00001010" // /* MW 5 */ + 8122 "00000010" // /* MW 4 */ + 8123 "10110000" // /* MW 3 */ + 8124 "10000111" // /* MW 2 */ + 8125 "11111111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 90 24 first +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8 + 8126 "00000010" // MOVS p2, p1; ADD.NC p3, r16, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8127 "00000000" // /* MW 7 */ + 8128 "00000011" // /* MW 6 */ + 8129 "10110100" // /* MW 5 */ + 8130 "00000001" // /* MW 4 */ + 8131 "01100000" // /* MW 3 */ + 8132 "10010001" // /* MW 2 */ + 8133 "01010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 19 first +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35 + 8134 "11010100" // LDA.u8 r27, [p3], #2; MOV r16, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8135 "10000001" // /* MW 5 */ + 8136 "00100001" // /* MW 4 */ + 8137 "01011000" // /* MW 3 */ + 8138 "11101101" // /* MW 2 */ + 8139 "01100101" // /* MW 1 */ + 8140 "11010100" // LDA.s16 r18, [p3], #-14; MOV r17, sp /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8141 "11000001" // /* MW 5 */ + 8142 "10101011" // /* MW 4 */ + 8143 "01011000" // /* MW 3 */ + 8144 "11001010" // /* MW 2 */ + 8145 "01110011" // /* MW 1 */ + 8146 "00011000" // ADD.NC p0, r17, #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8147 "11000000" // /* MW 3 */ + 8148 "01101000" // /* MW 2 */ + 8149 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 538 13 first +.src_ref 4 "vector_native_types.hpp" 374 137 first + 8150 "00011000" // VST sfh, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8151 "00101011" // /* MW 3 */ + 8152 "00000111" // /* MW 2 */ + 8153 "00001000" // /* MW 1 */ + 8154 "00011000" // ST.s16 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8155 "01010111" // /* MW 3 */ + 8156 "00000110" // /* MW 2 */ + 8157 "00000000" // /* MW 1 */ + 8158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8159 "00000000" // /* MW 1 */ + 8160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8161 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8 first +.no_stack_arguments + 8162 "00000100" // JL #7792 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7792 delay_slots=5 */ + 8163 "00000001" // /* MW 5 */ + 8164 "00000000" // /* MW 4 */ + 8165 "00111000" // /* MW 3 */ + 8166 "00001111" // /* MW 2 */ + 8167 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35 +.delay_slot + 8168 "11111000" // MOV r17, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8169 "11000000" // /* MW 3 */ + 8170 "01010000" // /* MW 2 */ + 8171 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8173 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35 first +.delay_slot + 8174 "00011000" // SEL.EQZ r18, r16, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8175 "00010010" // /* MW 3 */ + 8176 "00100101" // /* MW 2 */ + 8177 "00010100" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35 +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8 +.delay_slot + 8178 "11100100" // SEL.EQZ r16, r17, r16, r27; MOV p1, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8179 "01000001" // /* MW 5 */ + 8180 "11010010" // /* MW 4 */ + 8181 "01000010" // /* MW 3 */ + 8182 "00100000" // /* MW 2 */ + 8183 "10001100" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8 +.delay_slot + 8184 "00000010" // NOPS; MOV p0, r16 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8185 "01110000" // /* MW 7 */ + 8186 "00010000" // /* MW 6 */ + 8187 "00110100" // /* MW 5 */ + 8188 "00000000" // /* MW 4 */ + 8189 "01100000" // /* MW 3 */ + 8190 "00101011" // /* MW 2 */ + 8191 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4 +.return_address + 8192 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8193 "00111001" // /* MW 3 */ + 8194 "11111100" // /* MW 2 */ + 8195 "00000111" // /* MW 1 */ + 8196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8197 "00000000" // /* MW 1 */ + 8198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8199 "00000000" // /* MW 1 */ + 8200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8201 "00000000" // /* MW 1 */ + 8202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8203 "00000000" // /* MW 1 */ + 8204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8205 "00000000" // /* MW 1 */ + 8206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8207 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4 first + 8208 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 8209 "00000000" // /* MW 3 */ + 8210 "00101000" // /* MW 2 */ + 8211 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4 +.delay_slot + 8212 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8213 "00000001" // /* MW 5 */ + 8214 "00000000" // /* MW 4 */ + 8215 "00000000" // /* MW 3 */ + 8216 "11110000" // /* MW 2 */ + 8217 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8219 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8221 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8223 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 8225 "00000000" // /* MW 1 */ +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_add1d_attribute_broadcasting _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 7 "superkernels.cpp" 147 first +.src_ref 7 "superkernels.cpp" 152 6 +.function_start + 8240 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8241 "10000000" // /* MW 5 */ + 8242 "11001000" // /* MW 4 */ + 8243 "11000110" // /* MW 3 */ + 8244 "00000111" // /* MW 2 */ + 8245 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 152 6 first + 8246 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8247 "11000001" // /* MW 5 */ + 8248 "10110101" // /* MW 4 */ + 8249 "11011000" // /* MW 3 */ + 8250 "11000010" // /* MW 2 */ + 8251 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 147 + 8252 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8253 "00000001" // /* MW 5 */ + 8254 "00000000" // /* MW 4 */ + 8255 "00000000" // /* MW 3 */ + 8256 "00001000" // /* MW 2 */ + 8257 "00000000" // /* MW 1 */ + 8258 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8259 "01110000" // /* MW 7 */ + 8260 "11010000" // /* MW 6 */ + 8261 "00001011" // /* MW 5 */ + 8262 "00000000" // /* MW 4 */ + 8263 "10110000" // /* MW 3 */ + 8264 "01100011" // /* MW 2 */ + 8265 "11111111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 149 11 + 8266 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8267 "00010001" // /* MW 9 */ + 8268 "00101000" // /* MW 8 */ + 8269 "00110010" // /* MW 7 */ + 8270 "11110011" // /* MW 6 */ + 8271 "00000001" // /* MW 5 */ + 8272 "00000000" // /* MW 4 */ + 8273 "10110000" // /* MW 3 */ + 8274 "10000010" // /* MW 2 */ + 8275 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 8276 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8277 "11000000" // /* MW 3 */ + 8278 "11010100" // /* MW 2 */ + 8279 "00011011" // /* MW 1 */ + 8280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8281 "00000000" // /* MW 1 */ + 8282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8283 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 152 6 +.src_ref 7 "superkernels.cpp" 152 16 + 8284 "10000100" // JNZ r16, #8448 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8448 delay_slots=5 */ + 8285 "00000001" // /* MW 5 */ + 8286 "01000000" // /* MW 4 */ + 8287 "10000000" // /* MW 3 */ + 8288 "00010000" // /* MW 2 */ + 8289 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 149 22 first +.delay_slot + 8290 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8291 "10010000" // /* MW 3 */ + 8292 "01100010" // /* MW 2 */ + 8293 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 149 30 +.delay_slot + 8294 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8295 "11111011" // /* MW 3 */ + 8296 "01100011" // /* MW 2 */ + 8297 "00010100" // /* MW 1 */ +.delay_slot + 8298 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8299 "00111101" // /* MW 3 */ + 8300 "11110100" // /* MW 2 */ + 8301 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 149 11 +.src_ref 1 "io_buffer_main.h" 125 25 +.delay_slot + 8302 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8303 "01110000" // /* MW 7 */ + 8304 "01100000" // /* MW 6 */ + 8305 "00110000" // /* MW 5 */ + 8306 "00000011" // /* MW 4 */ + 8307 "00110000" // /* MW 3 */ + 8308 "11000110" // /* MW 2 */ + 8309 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 155 4 +.src_ref 7 "superkernels.cpp" 166 2 +.delay_slot + 8310 "01000100" // MOVXM p0, #509120 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8311 "10000000" // /* MW 5 */ + 8312 "11001001" // /* MW 4 */ + 8313 "11000000" // /* MW 3 */ + 8314 "00000111" // /* MW 2 */ + 8315 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8316 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8317 "11010000" // /* MW 5 */ + 8318 "11001000" // /* MW 4 */ + 8319 "11000100" // /* MW 3 */ + 8320 "00000111" // /* MW 2 */ + 8321 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8322 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8323 "00010000" // /* MW 9 */ + 8324 "00110010" // /* MW 8 */ + 8325 "00110010" // /* MW 7 */ + 8326 "11110001" // /* MW 6 */ + 8327 "00000001" // /* MW 5 */ + 8328 "00000000" // /* MW 4 */ + 8329 "11100000" // /* MW 3 */ + 8330 "11000000" // /* MW 2 */ + 8331 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8333 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 155 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 8334 "00000100" // JL #7728 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7728 delay_slots=5 */ + 8335 "00000001" // /* MW 5 */ + 8336 "00000000" // /* MW 4 */ + 8337 "00011000" // /* MW 3 */ + 8338 "00001111" // /* MW 2 */ + 8339 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8341 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8343 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8344 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8345 "00110001" // /* MW 3 */ + 8346 "00100000" // /* MW 2 */ + 8347 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 8348 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8349 "00000101" // /* MW 3 */ + 8350 "00100000" // /* MW 2 */ + 8351 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 8352 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8353 "00000000" // /* MW 15 */ + 8354 "00000000" // /* MW 14 */ + 8355 "01111000" // /* MW 13 */ + 8356 "10100101" // /* MW 12 */ + 8357 "00000001" // /* MW 11 */ + 8358 "00000000" // /* MW 10 */ + 8359 "00000000" // /* MW 9 */ + 8360 "10000000" // /* MW 8 */ + 8361 "00010001" // /* MW 7 */ + 8362 "00000110" // /* MW 6 */ + 8363 "00100010" // /* MW 5 */ + 8364 "00000000" // /* MW 4 */ + 8365 "11110000" // /* MW 3 */ + 8366 "00101100" // /* MW 2 */ + 8367 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 159 18 +.return_address + 8368 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8369 "10100000" // /* MW 5 */ + 8370 "11001000" // /* MW 4 */ + 8371 "11000100" // /* MW 3 */ + 8372 "00000111" // /* MW 2 */ + 8373 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 159 18 first +.src_ref 7 "superkernels.cpp" 159 65 + 8374 "10111010" // LDA r16, [p2]; MOVXM p2, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8375 "00010000" // /* MW 9 */ + 8376 "01100000" // /* MW 8 */ + 8377 "00110010" // /* MW 7 */ + 8378 "11110001" // /* MW 6 */ + 8379 "00000001" // /* MW 5 */ + 8380 "00000000" // /* MW 4 */ + 8381 "11010000" // /* MW 3 */ + 8382 "11000010" // /* MW 2 */ + 8383 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 157 51 +.src_ref 7 "superkernels.cpp" 159 65 +.src_ref 7 "superkernels.cpp" 166 2 + 8384 "10111010" // LDA r17, [p2]; MOVXM p2, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8385 "00010000" // /* MW 9 */ + 8386 "01100000" // /* MW 8 */ + 8387 "00110010" // /* MW 7 */ + 8388 "11110001" // /* MW 6 */ + 8389 "00000001" // /* MW 5 */ + 8390 "00000000" // /* MW 4 */ + 8391 "11010000" // /* MW 3 */ + 8392 "11000110" // /* MW 2 */ + 8393 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 157 51 first +.src_ref 7 "superkernels.cpp" 159 16 +.src_ref 7 "superkernels.cpp" 164 47 + 8394 "10111010" // LDA.u16 r18, [p2, #10]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8395 "00010000" // /* MW 9 */ + 8396 "00101010" // /* MW 8 */ + 8397 "10110010" // /* MW 7 */ + 8398 "11110000" // /* MW 6 */ + 8399 "00000001" // /* MW 5 */ + 8400 "00000000" // /* MW 4 */ + 8401 "01010000" // /* MW 3 */ + 8402 "11001011" // /* MW 2 */ + 8403 "01001010" // /* MW 1 */ + 8404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8405 "00000000" // /* MW 1 */ + 8406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8407 "00000000" // /* MW 1 */ + 8408 "10000100" // J #8464 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8464 delay_slots=5 */ + 8409 "00000000" // /* MW 5 */ + 8410 "00000000" // /* MW 4 */ + 8411 "10001000" // /* MW 3 */ + 8412 "00010000" // /* MW 2 */ + 8413 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 157 13 +.delay_slot + 8414 "01000100" // MOVXM p0, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8415 "11000000" // /* MW 5 */ + 8416 "11001000" // /* MW 4 */ + 8417 "11000000" // /* MW 3 */ + 8418 "00000111" // /* MW 2 */ + 8419 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8421 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 159 27 first +.delay_slot + 8422 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8423 "00001111" // /* MW 3 */ + 8424 "01100001" // /* MW 2 */ + 8425 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 157 13 first +.delay_slot + 8426 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8427 "10100011" // /* MW 5 */ + 8428 "00001100" // /* MW 4 */ + 8429 "11110000" // /* MW 3 */ + 8430 "00101100" // /* MW 2 */ + 8431 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 159 16 first +.delay_slot + 8432 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8433 "00000000" // /* MW 15 */ + 8434 "00000000" // /* MW 14 */ + 8435 "01111000" // /* MW 13 */ + 8436 "10100101" // /* MW 12 */ + 8437 "00000001" // /* MW 11 */ + 8438 "00000000" // /* MW 10 */ + 8439 "00000000" // /* MW 9 */ + 8440 "10000000" // /* MW 8 */ + 8441 "00010001" // /* MW 7 */ + 8442 "00000110" // /* MW 6 */ + 8443 "00100001" // /* MW 5 */ + 8444 "00000000" // /* MW 4 */ + 8445 "11110000" // /* MW 3 */ + 8446 "00101100" // /* MW 2 */ + 8447 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 7 "superkernels.cpp" 164 47 +.src_ref 7 "superkernels.cpp" 166 2 + 8448 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8449 "00000000" // /* MW 15 */ + 8450 "00000000" // /* MW 14 */ + 8451 "00010000" // /* MW 13 */ + 8452 "00101010" // /* MW 12 */ + 8453 "10110010" // /* MW 11 */ + 8454 "11110000" // /* MW 10 */ + 8455 "00000001" // /* MW 9 */ + 8456 "00000000" // /* MW 8 */ + 8457 "10001011" // /* MW 7 */ + 8458 "10000000" // /* MW 6 */ + 8459 "00100010" // /* MW 5 */ + 8460 "00000000" // /* MW 4 */ + 8461 "11110000" // /* MW 3 */ + 8462 "00101100" // /* MW 2 */ + 8463 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 218 49 first + 8464 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8465 "00000000" // /* MW 7 */ + 8466 "11000011" // /* MW 6 */ + 8467 "10110011" // /* MW 5 */ + 8468 "00000011" // /* MW 4 */ + 8469 "01100000" // /* MW 3 */ + 8470 "10010001" // /* MW 2 */ + 8471 "01110011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 163 2 +.src_ref 1 "io_buffer_main.h" 218 49 + 8472 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8473 "00010000" // /* MW 9 */ + 8474 "00100000" // /* MW 8 */ + 8475 "00110010" // /* MW 7 */ + 8476 "11110000" // /* MW 6 */ + 8477 "00000001" // /* MW 5 */ + 8478 "00000000" // /* MW 4 */ + 8479 "11010000" // /* MW 3 */ + 8480 "11101110" // /* MW 2 */ + 8481 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 8482 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8483 "00010110" // /* MW 3 */ + 8484 "11111110" // /* MW 2 */ + 8485 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 8486 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8487 "00110110" // /* MW 3 */ + 8488 "11111110" // /* MW 2 */ + 8489 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 8490 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8491 "01010110" // /* MW 3 */ + 8492 "01000110" // /* MW 2 */ + 8493 "00000111" // /* MW 1 */ + 8494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8495 "00000000" // /* MW 1 */ + 8496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8497 "00000000" // /* MW 1 */ + 8498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8499 "00000000" // /* MW 1 */ + 8500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8501 "00000000" // /* MW 1 */ + 8502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8503 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 8504 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8505 "00000010" // /* MW 3 */ + 8506 "01100001" // /* MW 2 */ + 8507 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 8508 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8509 "00010001" // /* MW 3 */ + 8510 "00000110" // /* MW 2 */ + 8511 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 + 8512 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8513 "11111101" // /* MW 3 */ + 8514 "11100000" // /* MW 2 */ + 8515 "00010111" // /* MW 1 */ + 8516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8517 "00000000" // /* MW 1 */ + 8518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8519 "00000000" // /* MW 1 */ + 8520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8521 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 8522 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8523 "00001000" // /* MW 3 */ + 8524 "10010011" // /* MW 2 */ + 8525 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 164 45 + 8526 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8527 "10000001" // /* MW 5 */ + 8528 "10101101" // /* MW 4 */ + 8529 "10100111" // /* MW 3 */ + 8530 "00000000" // /* MW 2 */ + 8531 "00000100" // /* MW 1 */ + 8532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8533 "00000000" // /* MW 1 */ + 8534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8535 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 163 2 first + 8536 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8537 "00110110" // /* MW 3 */ + 8538 "00000110" // /* MW 2 */ + 8539 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 1 "io_buffer_main.h" 324 51 + 8540 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8541 "10000001" // /* MW 5 */ + 8542 "11011101" // /* MW 4 */ + 8543 "11011100" // /* MW 3 */ + 8544 "11001010" // /* MW 2 */ + 8545 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 164 47 first + 8546 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8547 "01110110" // /* MW 3 */ + 8548 "00000110" // /* MW 2 */ + 8549 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 8550 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8551 "10011110" // /* MW 3 */ + 8552 "01011100" // /* MW 2 */ + 8553 "00000111" // /* MW 1 */ + 8554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8555 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 166 2 first +.no_stack_arguments + 8556 "00000100" // JL #8112 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8112 delay_slots=5 */ + 8557 "00000001" // /* MW 5 */ + 8558 "00000000" // /* MW 4 */ + 8559 "11011000" // /* MW 3 */ + 8560 "00001111" // /* MW 2 */ + 8561 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8563 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 163 2 first +.delay_slot + 8564 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8565 "00000111" // /* MW 3 */ + 8566 "01100010" // /* MW 2 */ + 8567 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 163 2 +.delay_slot + 8568 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8569 "00110001" // /* MW 3 */ + 8570 "00000110" // /* MW 2 */ + 8571 "00001000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 164 45 first +.delay_slot + 8572 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8573 "00001101" // /* MW 3 */ + 8574 "11100001" // /* MW 2 */ + 8575 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 164 45 +.delay_slot + 8576 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8577 "00000000" // /* MW 15 */ + 8578 "00000000" // /* MW 14 */ + 8579 "10101000" // /* MW 13 */ + 8580 "10100000" // /* MW 12 */ + 8581 "00110100" // /* MW 11 */ + 8582 "00000000" // /* MW 10 */ + 8583 "00000000" // /* MW 9 */ + 8584 "00000000" // /* MW 8 */ + 8585 "01011011" // /* MW 7 */ + 8586 "00000001" // /* MW 6 */ + 8587 "00100000" // /* MW 5 */ + 8588 "00000000" // /* MW 4 */ + 8589 "11110000" // /* MW 3 */ + 8590 "00101100" // /* MW 2 */ + 8591 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 6 +.src_ref 7 "superkernels.cpp" 169 14 +.src_ref 1 "io_buffer_main.h" 324 51 first +.return_address + 8592 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8593 "00010000" // /* MW 9 */ + 8594 "00100000" // /* MW 8 */ + 8595 "00110010" // /* MW 7 */ + 8596 "11110011" // /* MW 6 */ + 8597 "00000001" // /* MW 5 */ + 8598 "00000000" // /* MW 4 */ + 8599 "11010000" // /* MW 3 */ + 8600 "11000110" // /* MW 2 */ + 8601 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 + 8602 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8603 "00000101" // /* MW 3 */ + 8604 "00100000" // /* MW 2 */ + 8605 "00010000" // /* MW 1 */ + 8606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8607 "00000000" // /* MW 1 */ + 8608 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8609 "00000000" // /* MW 1 */ + 8610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8611 "00000000" // /* MW 1 */ + 8612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8613 "00000000" // /* MW 1 */ + 8614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8615 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 8616 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8617 "00001000" // /* MW 3 */ + 8618 "01010001" // /* MW 2 */ + 8619 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 19 +.src_ref 1 "io_buffer_main.h" 327 40 first + 8620 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8621 "00010000" // /* MW 9 */ + 8622 "00110000" // /* MW 8 */ + 8623 "00110010" // /* MW 7 */ + 8624 "11110001" // /* MW 6 */ + 8625 "00000001" // /* MW 5 */ + 8626 "00000000" // /* MW 4 */ + 8627 "11010000" // /* MW 3 */ + 8628 "11001110" // /* MW 2 */ + 8629 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 6 first + 8630 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8631 "00110110" // /* MW 3 */ + 8632 "00000110" // /* MW 2 */ + 8633 "00000110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 19 + 8634 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8635 "01010110" // /* MW 3 */ + 8636 "00000110" // /* MW 2 */ + 8637 "00000010" // /* MW 1 */ + 8638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8639 "00000000" // /* MW 1 */ + 8640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8641 "00000000" // /* MW 1 */ + 8642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8643 "00000000" // /* MW 1 */ + 8644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8645 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 8646 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8647 "00110001" // /* MW 3 */ + 8648 "00100001" // /* MW 2 */ + 8649 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 8650 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8651 "00010001" // /* MW 3 */ + 8652 "11100110" // /* MW 2 */ + 8653 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 16 first + 8654 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8655 "00101000" // /* MW 3 */ + 8656 "01100001" // /* MW 2 */ + 8657 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 6 + 8658 "10000100" // JNZ r16, #8688 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8688 delay_slots=5 */ + 8659 "00000001" // /* MW 5 */ + 8660 "01000000" // /* MW 4 */ + 8661 "11111000" // /* MW 3 */ + 8662 "00010000" // /* MW 2 */ + 8663 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8665 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8667 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8669 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8671 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8673 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 169 14 + 8674 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8675 "00000001" // /* MW 3 */ + 8676 "00100000" // /* MW 2 */ + 8677 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 169 14 first + 8678 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8679 "00000000" // /* MW 9 */ + 8680 "00000000" // /* MW 8 */ + 8681 "00000000" // /* MW 7 */ + 8682 "10000000" // /* MW 6 */ + 8683 "00010001" // /* MW 5 */ + 8684 "00000110" // /* MW 4 */ + 8685 "11110110" // /* MW 3 */ + 8686 "00101100" // /* MW 2 */ + 8687 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 7 "superkernels.cpp" 171 + 8688 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8689 "00111001" // /* MW 3 */ + 8690 "11110100" // /* MW 2 */ + 8691 "00000111" // /* MW 1 */ + 8692 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8693 "00011001" // /* MW 3 */ + 8694 "11111011" // /* MW 2 */ + 8695 "00000111" // /* MW 1 */ + 8696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8697 "00000000" // /* MW 1 */ + 8698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8699 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 8700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8701 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 8702 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8703 "11110001" // /* MW 3 */ + 8704 "11111101" // /* MW 2 */ + 8705 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 8706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8707 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 171 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 8708 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 8709 "00000000" // /* MW 3 */ + 8710 "00101000" // /* MW 2 */ + 8711 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8712 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8713 "10100000" // /* MW 3 */ + 8714 "01100111" // /* MW 2 */ + 8715 "00011111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 171 +.delay_slot + 8716 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8717 "00000001" // /* MW 5 */ + 8718 "00000000" // /* MW 4 */ + 8719 "00000000" // /* MW 3 */ + 8720 "11111000" // /* MW 2 */ + 8721 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8723 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8725 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 8727 "00000000" // /* MW 1 */ +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.function setup _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.src_ref 3 "elementwise_unary.h" 124 first +.src_ref 3 "elementwise_unary.h" 126 24 first +.function_start + 8736 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8737 "00101110" // /* MW 3 */ + 8738 "00011100" // /* MW 2 */ + 8739 "00000001" // /* MW 1 */ + 8740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8741 "00000000" // /* MW 1 */ + 8742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8743 "00000000" // /* MW 1 */ + 8744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8745 "00000000" // /* MW 1 */ + 8746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8747 "00000000" // /* MW 1 */ + 8748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8749 "00000000" // /* MW 1 */ + 8750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8751 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 126 22 first + 8752 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8753 "00101001" // /* MW 3 */ + 8754 "00011100" // /* MW 2 */ + 8755 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 127 24 first + 8756 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8757 "00101110" // /* MW 3 */ + 8758 "00011100" // /* MW 2 */ + 8759 "00000001" // /* MW 1 */ + 8760 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8761 "00000000" // /* MW 1 */ + 8762 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8763 "00000000" // /* MW 1 */ + 8764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8765 "00000000" // /* MW 1 */ + 8766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8767 "00000000" // /* MW 1 */ + 8768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8769 "00000000" // /* MW 1 */ + 8770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8771 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 127 22 + 8772 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8773 "00101001" // /* MW 3 */ + 8774 "00011100" // /* MW 2 */ + 8775 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 128 24 first + 8776 "10011000" // LDA el0, [p1], #24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8777 "00101110" // /* MW 3 */ + 8778 "01101100" // /* MW 2 */ + 8779 "00000001" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 113 33 first + 8780 "10011000" // LDA.s16 r0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8781 "00010010" // /* MW 3 */ + 8782 "00000100" // /* MW 2 */ + 8783 "00000001" // /* MW 1 */ + 8784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8785 "00000000" // /* MW 1 */ + 8786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8787 "00000000" // /* MW 1 */ + 8788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8789 "00000000" // /* MW 1 */ + 8790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8791 "00000000" // /* MW 1 */ + 8792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8793 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 128 22 first + 8794 "10011000" // ST el0, [p0], #24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8795 "00101001" // /* MW 3 */ + 8796 "01101100" // /* MW 2 */ + 8797 "00001000" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 113 33 first + 8798 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8799 "00010111" // /* MW 3 */ + 8800 "00000100" // /* MW 2 */ + 8801 "00000000" // /* MW 1 */ + 8802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8803 "00000000" // /* MW 1 */ + 8804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8805 "00000000" // /* MW 1 */ + 8806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8807 "00000000" // /* MW 1 */ + 8808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8809 "00000000" // /* MW 1 */ + 8810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8811 "00000000" // /* MW 1 */ + 8812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8813 "00000000" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 114 33 first + 8814 "10011000" // LDA.s16 r0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8815 "00010010" // /* MW 3 */ + 8816 "00100100" // /* MW 2 */ + 8817 "00000001" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 114 33 + 8818 "00011000" // ST.s16 r0, [p0, #2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8819 "00010111" // /* MW 3 */ + 8820 "00010100" // /* MW 2 */ + 8821 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 130 4 first + 8822 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 8823 "00000000" // /* MW 3 */ + 8824 "00101000" // /* MW 2 */ + 8825 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8827 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8829 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8831 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8833 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv__end +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_end0 + 8835 "00000000" // /* MW 1 */ +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.function run _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_unary.h" 136 first +.src_ref 3 "elementwise_unary.h" 142 37 +.src_ref 3 "elementwise_unary.h" 154 8 first +.src_ref 3 "elementwise_unary.h" 171 19 +.function_start + 8848 "10110110" // MOVA dj0, #-34; VLDB x4, [p0], #64; MOVXM ls, #8976 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8849 "00010000" // /* MW 11 */ + 8850 "10001000" // /* MW 10 */ + 8851 "01111001" // /* MW 9 */ + 8852 "00001000" // /* MW 8 */ + 8853 "00000000" // /* MW 7 */ + 8854 "00000000" // /* MW 6 */ + 8855 "01101000" // /* MW 5 */ + 8856 "00111010" // /* MW 4 */ + 8857 "10000000" // /* MW 3 */ + 8858 "11000010" // /* MW 2 */ + 8859 "11111011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_unary.h" 142 78 +.src_ref 3 "elementwise_unary.h" 154 8 first +.src_ref 3 "elementwise_unary.h" 190 19 first + 8860 "10110110" // MOVA r17, #-6; VLDB x2, [p0], #64; MOVXM le, #9024 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8861 "00010000" // /* MW 11 */ + 8862 "10100000" // /* MW 10 */ + 8863 "10111001" // /* MW 9 */ + 8864 "00001001" // /* MW 8 */ + 8865 "00000000" // /* MW 7 */ + 8866 "00000000" // /* MW 6 */ + 8867 "01101000" // /* MW 5 */ + 8868 "00111001" // /* MW 4 */ + 8869 "00000000" // /* MW 3 */ + 8870 "01010001" // /* MW 2 */ + 8871 "11111111" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 136 + 8872 "11111000" // MOV r0, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8873 "11000000" // /* MW 3 */ + 8874 "00010100" // /* MW 2 */ + 8875 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 136 first + 8876 "00011000" // ADD.NC p2, r0, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8877 "00010000" // /* MW 3 */ + 8878 "01100000" // /* MW 2 */ + 8879 "00011010" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 103 16 first + 8880 "10011000" // LDA.s16 r2, [p2], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8881 "01010010" // /* MW 3 */ + 8882 "00011100" // /* MW 2 */ + 8883 "00000010" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 142 37 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8884 "10011000" // LDA r0, [p2, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8885 "00010110" // /* MW 3 */ + 8886 "00000000" // /* MW 2 */ + 8887 "00000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_unary.h" 171 19 first +.src_ref 8 "clip_impl.h" 104 16 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8888 "00111100" // LDA.s16 r1, [p2]; VLDB x4, [p0], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8889 "01101000" // /* MW 5 */ + 8890 "00111010" // /* MW 4 */ + 8891 "01010000" // /* MW 3 */ + 8892 "10000110" // /* MW 2 */ + 8893 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8895 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8897 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8899 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_unary.h" 190 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8900 "00011000" // VLDB x2, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8901 "10110100" // /* MW 3 */ + 8902 "00011100" // /* MW 2 */ + 8903 "00111000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8904 "11111000" // VBCST.16 x0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8905 "01110010" // /* MW 3 */ + 8906 "00001001" // /* MW 2 */ + 8907 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 3 "elementwise_unary.h" 142 78 first +.src_ref 3 "elementwise_unary.h" 171 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8908 "00111010" // VLDB x4, [p0], #64; LSHL r17, r0, r17; VMAX_LT.bf16 x5, r16, x4, x0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8909 "01111000" // /* MW 9 */ + 8910 "00110110" // /* MW 8 */ + 8911 "01010000" // /* MW 7 */ + 8912 "11101101" // /* MW 6 */ + 8913 "00011000" // /* MW 5 */ + 8914 "00000001" // /* MW 4 */ + 8915 "01101000" // /* MW 3 */ + 8916 "00111010" // /* MW 2 */ + 8917 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 154 8 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8918 "10011000" // ADD.NC lc, r17, #-3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8919 "11111110" // /* MW 3 */ + 8920 "01111000" // /* MW 2 */ + 8921 "00011101" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8922 "11111000" // VBCST.16 x1, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8923 "01110010" // /* MW 3 */ + 8924 "10000101" // /* MW 2 */ + 8925 "00011000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8926 "11111000" // VMIN_GE.bf16 x3, r16, x5, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8927 "10101100" // /* MW 3 */ + 8928 "10101000" // /* MW 2 */ + 8929 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 3 "elementwise_unary.h" 190 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8930 "01111110" // NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 8931 "01100000" // /* MW 13 */ + 8932 "00101011" // /* MW 12 */ + 8933 "00000000" // /* MW 11 */ + 8934 "11001111" // /* MW 10 */ + 8935 "00000110" // /* MW 9 */ + 8936 "00110001" // /* MW 8 */ + 8937 "00000000" // /* MW 7 */ + 8938 "00000000" // /* MW 6 */ + 8939 "01101000" // /* MW 5 */ + 8940 "00111001" // /* MW 4 */ + 8941 "11110000" // /* MW 3 */ + 8942 "00101100" // /* MW 2 */ + 8943 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8944 "11100001" // NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8945 "00000000" // /* MW 15 */ + 8946 "00000000" // /* MW 14 */ + 8947 "01111000" // /* MW 13 */ + 8948 "01010110" // /* MW 12 */ + 8949 "11011000" // /* MW 11 */ + 8950 "00000001" // /* MW 10 */ + 8951 "00000000" // /* MW 9 */ + 8952 "00000000" // /* MW 8 */ + 8953 "11010011" // /* MW 7 */ + 8954 "00011100" // /* MW 6 */ + 8955 "00100001" // /* MW 5 */ + 8956 "00000000" // /* MW 4 */ + 8957 "11110000" // /* MW 3 */ + 8958 "00101100" // /* MW 2 */ + 8959 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8960 "11100001" // NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8961 "00000000" // /* MW 15 */ + 8962 "00000000" // /* MW 14 */ + 8963 "01111000" // /* MW 13 */ + 8964 "00110110" // /* MW 12 */ + 8965 "01010000" // /* MW 11 */ + 8966 "00000001" // /* MW 10 */ + 8967 "00000000" // /* MW 9 */ + 8968 "00000000" // /* MW 8 */ + 8969 "01011011" // /* MW 7 */ + 8970 "00000001" // /* MW 6 */ + 8971 "00100000" // /* MW 5 */ + 8972 "00000000" // /* MW 4 */ + 8973 "11110000" // /* MW 3 */ + 8974 "00101100" // /* MW 2 */ + 8975 "00000000" // /* MW 1 */ +.label ZLS_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_128 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 171 19 first +.src_ref 3 "elementwise_unary.h" 176 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 8976 "11100001" // NOPA; VLDB x4, [p0], #64; VST x7, [p1], #64; NOPX; VMIN_GE.bf16 x3, r16, x5, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8977 "00000000" // /* MW 15 */ + 8978 "00000000" // /* MW 14 */ + 8979 "01111000" // /* MW 13 */ + 8980 "01010110" // /* MW 12 */ + 8981 "11010100" // /* MW 11 */ + 8982 "00000000" // /* MW 10 */ + 8983 "00000000" // /* MW 9 */ + 8984 "00000000" // /* MW 8 */ + 8985 "11010011" // /* MW 7 */ + 8986 "00011101" // /* MW 6 */ + 8987 "01101001" // /* MW 5 */ + 8988 "00111010" // /* MW 4 */ + 8989 "11110000" // /* MW 3 */ + 8990 "00101100" // /* MW 2 */ + 8991 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 3 "elementwise_unary.h" 190 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8992 "11100001" // NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8993 "00000000" // /* MW 15 */ + 8994 "00000000" // /* MW 14 */ + 8995 "01111000" // /* MW 13 */ + 8996 "00110110" // /* MW 12 */ + 8997 "10001000" // /* MW 11 */ + 8998 "00000001" // /* MW 10 */ + 8999 "00000000" // /* MW 9 */ + 9000 "00000000" // /* MW 8 */ + 9001 "01011011" // /* MW 7 */ + 9002 "00000001" // /* MW 6 */ + 9003 "01101000" // /* MW 5 */ + 9004 "00111001" // /* MW 4 */ + 9005 "11110000" // /* MW 3 */ + 9006 "00101100" // /* MW 2 */ + 9007 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9008 "11100001" // NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9009 "00000000" // /* MW 15 */ + 9010 "00000000" // /* MW 14 */ + 9011 "01111000" // /* MW 13 */ + 9012 "01010110" // /* MW 12 */ + 9013 "11011000" // /* MW 11 */ + 9014 "00000001" // /* MW 10 */ + 9015 "00000000" // /* MW 9 */ + 9016 "00000000" // /* MW 8 */ + 9017 "11010011" // /* MW 7 */ + 9018 "00011100" // /* MW 6 */ + 9019 "00100001" // /* MW 5 */ + 9020 "00000000" // /* MW 4 */ + 9021 "11110000" // /* MW 3 */ + 9022 "00101100" // /* MW 2 */ + 9023 "00000000" // /* MW 1 */ +.label ZLE_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_176 +.src_ref 4 "max_min.hpp" 20 104 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9024 "11100001" // NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9025 "00000000" // /* MW 15 */ + 9026 "00000000" // /* MW 14 */ + 9027 "01111000" // /* MW 13 */ + 9028 "00110110" // /* MW 12 */ + 9029 "01010000" // /* MW 11 */ + 9030 "00000001" // /* MW 10 */ + 9031 "00000000" // /* MW 9 */ + 9032 "00000000" // /* MW 8 */ + 9033 "01011011" // /* MW 7 */ + 9034 "00000001" // /* MW 6 */ + 9035 "00100000" // /* MW 5 */ + 9036 "00000000" // /* MW 4 */ + 9037 "11110000" // /* MW 3 */ + 9038 "00101100" // /* MW 2 */ + 9039 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 176 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 9040 "00000010" // VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9041 "01110000" // /* MW 7 */ + 9042 "01010110" // /* MW 6 */ + 9043 "11010100" // /* MW 5 */ + 9044 "00000000" // /* MW 4 */ + 9045 "01100000" // /* MW 3 */ + 9046 "10111010" // /* MW 2 */ + 9047 "00100011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9048 "11111000" // VMAX_LT.bf16 x6, r16, x2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9049 "01101100" // /* MW 3 */ + 9050 "00010000" // /* MW 2 */ + 9051 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 195 20 first + 9052 "00000010" // VST x3, [p1], #64; VMIN_GE.bf16 x7, r16, x6, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9053 "01110000" // /* MW 7 */ + 9054 "01010110" // /* MW 6 */ + 9055 "11011000" // /* MW 5 */ + 9056 "00000001" // /* MW 4 */ + 9057 "01100000" // /* MW 3 */ + 9058 "10011010" // /* MW 2 */ + 9059 "00100011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 3 "elementwise_unary.h" 158 4 first + 9060 "11100100" // RET lr; VMAX_LT.bf16 x5, r16, x4, x0 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 9061 "11011001" // /* MW 5 */ + 9062 "01000000" // /* MW 4 */ + 9063 "00000101" // /* MW 3 */ + 9064 "00000000" // /* MW 2 */ + 9065 "00000101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 176 20 first +.delay_slot + 9066 "00000010" // VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9067 "01110000" // /* MW 7 */ + 9068 "01010110" // /* MW 6 */ + 9069 "11010100" // /* MW 5 */ + 9070 "00000000" // /* MW 4 */ + 9071 "01100000" // /* MW 3 */ + 9072 "10111010" // /* MW 2 */ + 9073 "00100011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 9074 "11111000" // VMAX_LT.bf16 x6, r16, x2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9075 "01101100" // /* MW 3 */ + 9076 "00010000" // /* MW 2 */ + 9077 "00011011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.delay_slot + 9078 "11111000" // VMIN_GE.bf16 x7, r16, x6, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9079 "10101100" // /* MW 3 */ + 9080 "10110000" // /* MW 2 */ + 9081 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "elementwise_unary.h" 195 20 first +.delay_slot + 9082 "00011000" // VST x3, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9083 "11010011" // /* MW 3 */ + 9084 "00011100" // /* MW 2 */ + 9085 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "elementwise_unary.h" 176 20 first +.delay_slot + 9086 "00011000" // VST x7, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9087 "11010011" // /* MW 3 */ + 9088 "00011101" // /* MW 2 */ +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E__end +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_end0 + 9089 "00001001" // /* MW 1 */ +.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_clip1d _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 7 "superkernels.cpp" 176 first +.src_ref 7 "superkernels.cpp" 181 6 +.function_start + 9104 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9105 "10000000" // /* MW 5 */ + 9106 "11001000" // /* MW 4 */ + 9107 "11000110" // /* MW 3 */ + 9108 "00000111" // /* MW 2 */ + 9109 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 181 6 first + 9110 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9111 "11000001" // /* MW 5 */ + 9112 "10110101" // /* MW 4 */ + 9113 "11011000" // /* MW 3 */ + 9114 "11000010" // /* MW 2 */ + 9115 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 176 + 9116 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9117 "00000001" // /* MW 5 */ + 9118 "00000000" // /* MW 4 */ + 9119 "00000000" // /* MW 3 */ + 9120 "00001000" // /* MW 2 */ + 9121 "00000000" // /* MW 1 */ + 9122 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9123 "01110000" // /* MW 7 */ + 9124 "11010000" // /* MW 6 */ + 9125 "00001011" // /* MW 5 */ + 9126 "00000000" // /* MW 4 */ + 9127 "10110000" // /* MW 3 */ + 9128 "01100011" // /* MW 2 */ + 9129 "11111111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 178 11 + 9130 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9131 "00010001" // /* MW 9 */ + 9132 "00101000" // /* MW 8 */ + 9133 "00110010" // /* MW 7 */ + 9134 "11110011" // /* MW 6 */ + 9135 "00000001" // /* MW 5 */ + 9136 "00000000" // /* MW 4 */ + 9137 "10110000" // /* MW 3 */ + 9138 "10000010" // /* MW 2 */ + 9139 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 9140 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9141 "11000000" // /* MW 3 */ + 9142 "11010100" // /* MW 2 */ + 9143 "00011011" // /* MW 1 */ + 9144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9145 "00000000" // /* MW 1 */ + 9146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9147 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 181 6 +.src_ref 7 "superkernels.cpp" 181 16 + 9148 "10000100" // JNZ r16, #9312 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9312 delay_slots=5 */ + 9149 "00000001" // /* MW 5 */ + 9150 "01000000" // /* MW 4 */ + 9151 "00110000" // /* MW 3 */ + 9152 "00010010" // /* MW 2 */ + 9153 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 178 22 first +.delay_slot + 9154 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9155 "10010000" // /* MW 3 */ + 9156 "01100010" // /* MW 2 */ + 9157 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 178 30 +.delay_slot + 9158 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9159 "11111011" // /* MW 3 */ + 9160 "01100011" // /* MW 2 */ + 9161 "00010100" // /* MW 1 */ +.delay_slot + 9162 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9163 "00111101" // /* MW 3 */ + 9164 "11110100" // /* MW 2 */ + 9165 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 178 11 +.src_ref 1 "io_buffer_main.h" 125 25 +.delay_slot + 9166 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9167 "01110000" // /* MW 7 */ + 9168 "01100000" // /* MW 6 */ + 9169 "00110000" // /* MW 5 */ + 9170 "00000011" // /* MW 4 */ + 9171 "00110000" // /* MW 3 */ + 9172 "11000110" // /* MW 2 */ + 9173 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 184 4 +.src_ref 7 "superkernels.cpp" 195 2 +.delay_slot + 9174 "01000100" // MOVXM p0, #509376 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9175 "10000000" // /* MW 5 */ + 9176 "11001011" // /* MW 4 */ + 9177 "11000000" // /* MW 3 */ + 9178 "00000111" // /* MW 2 */ + 9179 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9180 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9181 "11010000" // /* MW 5 */ + 9182 "11001000" // /* MW 4 */ + 9183 "11000100" // /* MW 3 */ + 9184 "00000111" // /* MW 2 */ + 9185 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9186 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9187 "00010000" // /* MW 9 */ + 9188 "00110010" // /* MW 8 */ + 9189 "00110010" // /* MW 7 */ + 9190 "11110001" // /* MW 6 */ + 9191 "00000001" // /* MW 5 */ + 9192 "00000000" // /* MW 4 */ + 9193 "11100000" // /* MW 3 */ + 9194 "11000000" // /* MW 2 */ + 9195 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9197 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 184 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 9198 "00000100" // JL #8736 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8736 delay_slots=5 */ + 9199 "00000001" // /* MW 5 */ + 9200 "00000000" // /* MW 4 */ + 9201 "00010000" // /* MW 3 */ + 9202 "00010001" // /* MW 2 */ + 9203 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9205 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9207 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9208 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9209 "00110001" // /* MW 3 */ + 9210 "00100000" // /* MW 2 */ + 9211 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 9212 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9213 "00000101" // /* MW 3 */ + 9214 "00100000" // /* MW 2 */ + 9215 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 9216 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9217 "00000000" // /* MW 15 */ + 9218 "00000000" // /* MW 14 */ + 9219 "01111000" // /* MW 13 */ + 9220 "10100101" // /* MW 12 */ + 9221 "00000001" // /* MW 11 */ + 9222 "00000000" // /* MW 10 */ + 9223 "00000000" // /* MW 9 */ + 9224 "10000000" // /* MW 8 */ + 9225 "00010001" // /* MW 7 */ + 9226 "00000110" // /* MW 6 */ + 9227 "00100010" // /* MW 5 */ + 9228 "00000000" // /* MW 4 */ + 9229 "11110000" // /* MW 3 */ + 9230 "00101100" // /* MW 2 */ + 9231 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 188 18 +.return_address + 9232 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9233 "10100000" // /* MW 5 */ + 9234 "11001000" // /* MW 4 */ + 9235 "11000100" // /* MW 3 */ + 9236 "00000111" // /* MW 2 */ + 9237 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 188 18 first +.src_ref 7 "superkernels.cpp" 188 43 + 9238 "10111010" // LDA r16, [p2]; MOVXM p2, #509376 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9239 "00010000" // /* MW 9 */ + 9240 "11100000" // /* MW 8 */ + 9241 "00110010" // /* MW 7 */ + 9242 "11110001" // /* MW 6 */ + 9243 "00000001" // /* MW 5 */ + 9244 "00000000" // /* MW 4 */ + 9245 "11010000" // /* MW 3 */ + 9246 "11000010" // /* MW 2 */ + 9247 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 186 29 +.src_ref 7 "superkernels.cpp" 188 43 +.src_ref 7 "superkernels.cpp" 195 2 + 9248 "10111010" // LDA r17, [p2]; MOVXM p2, #509376 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9249 "00010000" // /* MW 9 */ + 9250 "11100000" // /* MW 8 */ + 9251 "00110010" // /* MW 7 */ + 9252 "11110001" // /* MW 6 */ + 9253 "00000001" // /* MW 5 */ + 9254 "00000000" // /* MW 4 */ + 9255 "11010000" // /* MW 3 */ + 9256 "11000110" // /* MW 2 */ + 9257 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 186 29 first +.src_ref 7 "superkernels.cpp" 188 16 +.src_ref 7 "superkernels.cpp" 193 47 + 9258 "10111010" // LDA.u16 r18, [p2, #8]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9259 "00010000" // /* MW 9 */ + 9260 "00101010" // /* MW 8 */ + 9261 "10110010" // /* MW 7 */ + 9262 "11110000" // /* MW 6 */ + 9263 "00000001" // /* MW 5 */ + 9264 "00000000" // /* MW 4 */ + 9265 "01010000" // /* MW 3 */ + 9266 "11001011" // /* MW 2 */ + 9267 "01001000" // /* MW 1 */ + 9268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9269 "00000000" // /* MW 1 */ + 9270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9271 "00000000" // /* MW 1 */ + 9272 "10000100" // J #9328 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9328 delay_slots=5 */ + 9273 "00000000" // /* MW 5 */ + 9274 "00000000" // /* MW 4 */ + 9275 "00111000" // /* MW 3 */ + 9276 "00010010" // /* MW 2 */ + 9277 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 186 13 +.delay_slot + 9278 "01000100" // MOVXM p0, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9279 "11000000" // /* MW 5 */ + 9280 "11001000" // /* MW 4 */ + 9281 "11000000" // /* MW 3 */ + 9282 "00000111" // /* MW 2 */ + 9283 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9285 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 188 27 first +.delay_slot + 9286 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9287 "00001111" // /* MW 3 */ + 9288 "01100001" // /* MW 2 */ + 9289 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 186 13 first +.delay_slot + 9290 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9291 "10100011" // /* MW 5 */ + 9292 "00001100" // /* MW 4 */ + 9293 "11110000" // /* MW 3 */ + 9294 "00101100" // /* MW 2 */ + 9295 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 188 16 first +.delay_slot + 9296 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9297 "00000000" // /* MW 15 */ + 9298 "00000000" // /* MW 14 */ + 9299 "01111000" // /* MW 13 */ + 9300 "10100101" // /* MW 12 */ + 9301 "00000001" // /* MW 11 */ + 9302 "00000000" // /* MW 10 */ + 9303 "00000000" // /* MW 9 */ + 9304 "10000000" // /* MW 8 */ + 9305 "00010001" // /* MW 7 */ + 9306 "00000110" // /* MW 6 */ + 9307 "00100001" // /* MW 5 */ + 9308 "00000000" // /* MW 4 */ + 9309 "11110000" // /* MW 3 */ + 9310 "00101100" // /* MW 2 */ + 9311 "00000000" // /* MW 1 */ +.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 7 "superkernels.cpp" 193 47 +.src_ref 7 "superkernels.cpp" 195 2 + 9312 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9313 "00000000" // /* MW 15 */ + 9314 "00000000" // /* MW 14 */ + 9315 "00010000" // /* MW 13 */ + 9316 "00101010" // /* MW 12 */ + 9317 "10110010" // /* MW 11 */ + 9318 "11110000" // /* MW 10 */ + 9319 "00000001" // /* MW 9 */ + 9320 "00000000" // /* MW 8 */ + 9321 "10001011" // /* MW 7 */ + 9322 "10000000" // /* MW 6 */ + 9323 "00100010" // /* MW 5 */ + 9324 "00000000" // /* MW 4 */ + 9325 "11110000" // /* MW 3 */ + 9326 "00101100" // /* MW 2 */ + 9327 "00000000" // /* MW 1 */ +.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 218 49 first + 9328 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9329 "00000000" // /* MW 7 */ + 9330 "11000011" // /* MW 6 */ + 9331 "10110011" // /* MW 5 */ + 9332 "00000011" // /* MW 4 */ + 9333 "01100000" // /* MW 3 */ + 9334 "10010001" // /* MW 2 */ + 9335 "01110011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 192 2 +.src_ref 1 "io_buffer_main.h" 218 49 + 9336 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9337 "00010000" // /* MW 9 */ + 9338 "00100000" // /* MW 8 */ + 9339 "00110010" // /* MW 7 */ + 9340 "11110000" // /* MW 6 */ + 9341 "00000001" // /* MW 5 */ + 9342 "00000000" // /* MW 4 */ + 9343 "11010000" // /* MW 3 */ + 9344 "11101110" // /* MW 2 */ + 9345 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 9346 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9347 "00010110" // /* MW 3 */ + 9348 "11111110" // /* MW 2 */ + 9349 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 9350 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9351 "00110110" // /* MW 3 */ + 9352 "11111110" // /* MW 2 */ + 9353 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 9354 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9355 "01010110" // /* MW 3 */ + 9356 "01000110" // /* MW 2 */ + 9357 "00000111" // /* MW 1 */ + 9358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9359 "00000000" // /* MW 1 */ + 9360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9361 "00000000" // /* MW 1 */ + 9362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9363 "00000000" // /* MW 1 */ + 9364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9365 "00000000" // /* MW 1 */ + 9366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9367 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 9368 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9369 "00000010" // /* MW 3 */ + 9370 "01100001" // /* MW 2 */ + 9371 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 9372 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9373 "00010001" // /* MW 3 */ + 9374 "00000110" // /* MW 2 */ + 9375 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 + 9376 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9377 "11111101" // /* MW 3 */ + 9378 "11100000" // /* MW 2 */ + 9379 "00010111" // /* MW 1 */ + 9380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9381 "00000000" // /* MW 1 */ + 9382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9383 "00000000" // /* MW 1 */ + 9384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9385 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 9386 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9387 "00001000" // /* MW 3 */ + 9388 "10010011" // /* MW 2 */ + 9389 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 193 45 + 9390 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9391 "10000001" // /* MW 5 */ + 9392 "10101101" // /* MW 4 */ + 9393 "10100111" // /* MW 3 */ + 9394 "00000000" // /* MW 2 */ + 9395 "00000100" // /* MW 1 */ + 9396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9397 "00000000" // /* MW 1 */ + 9398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9399 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 192 2 first + 9400 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9401 "00110110" // /* MW 3 */ + 9402 "00000110" // /* MW 2 */ + 9403 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 1 "io_buffer_main.h" 324 51 + 9404 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9405 "10000001" // /* MW 5 */ + 9406 "11011101" // /* MW 4 */ + 9407 "11011100" // /* MW 3 */ + 9408 "11001010" // /* MW 2 */ + 9409 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 193 47 first + 9410 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9411 "01110110" // /* MW 3 */ + 9412 "00000110" // /* MW 2 */ + 9413 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 9414 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9415 "10011110" // /* MW 3 */ + 9416 "01011100" // /* MW 2 */ + 9417 "00000111" // /* MW 1 */ + 9418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9419 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 195 2 first +.no_stack_arguments + 9420 "00000100" // JL #8848 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8848 delay_slots=5 */ + 9421 "00000001" // /* MW 5 */ + 9422 "00000000" // /* MW 4 */ + 9423 "01001000" // /* MW 3 */ + 9424 "00010001" // /* MW 2 */ + 9425 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9427 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 192 2 first +.delay_slot + 9428 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9429 "00000111" // /* MW 3 */ + 9430 "01100010" // /* MW 2 */ + 9431 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 192 2 +.delay_slot + 9432 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9433 "00110001" // /* MW 3 */ + 9434 "00000110" // /* MW 2 */ + 9435 "00001000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 193 45 first +.delay_slot + 9436 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9437 "00001101" // /* MW 3 */ + 9438 "11100001" // /* MW 2 */ + 9439 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 193 45 +.delay_slot + 9440 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9441 "00000000" // /* MW 15 */ + 9442 "00000000" // /* MW 14 */ + 9443 "10101000" // /* MW 13 */ + 9444 "10100000" // /* MW 12 */ + 9445 "00110100" // /* MW 11 */ + 9446 "00000000" // /* MW 10 */ + 9447 "00000000" // /* MW 9 */ + 9448 "00000000" // /* MW 8 */ + 9449 "01011011" // /* MW 7 */ + 9450 "00000001" // /* MW 6 */ + 9451 "00100000" // /* MW 5 */ + 9452 "00000000" // /* MW 4 */ + 9453 "11110000" // /* MW 3 */ + 9454 "00101100" // /* MW 2 */ + 9455 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 6 +.src_ref 7 "superkernels.cpp" 198 14 +.src_ref 1 "io_buffer_main.h" 324 51 first +.return_address + 9456 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9457 "00010000" // /* MW 9 */ + 9458 "00100000" // /* MW 8 */ + 9459 "00110010" // /* MW 7 */ + 9460 "11110011" // /* MW 6 */ + 9461 "00000001" // /* MW 5 */ + 9462 "00000000" // /* MW 4 */ + 9463 "11010000" // /* MW 3 */ + 9464 "11000110" // /* MW 2 */ + 9465 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 + 9466 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9467 "00000101" // /* MW 3 */ + 9468 "00100000" // /* MW 2 */ + 9469 "00010000" // /* MW 1 */ + 9470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9471 "00000000" // /* MW 1 */ + 9472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9473 "00000000" // /* MW 1 */ + 9474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9475 "00000000" // /* MW 1 */ + 9476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9477 "00000000" // /* MW 1 */ + 9478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9479 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 9480 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9481 "00001000" // /* MW 3 */ + 9482 "01010001" // /* MW 2 */ + 9483 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 19 +.src_ref 1 "io_buffer_main.h" 327 40 first + 9484 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9485 "00010000" // /* MW 9 */ + 9486 "00110000" // /* MW 8 */ + 9487 "00110010" // /* MW 7 */ + 9488 "11110001" // /* MW 6 */ + 9489 "00000001" // /* MW 5 */ + 9490 "00000000" // /* MW 4 */ + 9491 "11010000" // /* MW 3 */ + 9492 "11001110" // /* MW 2 */ + 9493 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 6 first + 9494 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9495 "00110110" // /* MW 3 */ + 9496 "00000110" // /* MW 2 */ + 9497 "00000110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 19 + 9498 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9499 "01010110" // /* MW 3 */ + 9500 "00000110" // /* MW 2 */ + 9501 "00000010" // /* MW 1 */ + 9502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9503 "00000000" // /* MW 1 */ + 9504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9505 "00000000" // /* MW 1 */ + 9506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9507 "00000000" // /* MW 1 */ + 9508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9509 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 9510 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9511 "00110001" // /* MW 3 */ + 9512 "00100001" // /* MW 2 */ + 9513 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 9514 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9515 "00010001" // /* MW 3 */ + 9516 "11100110" // /* MW 2 */ + 9517 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 16 first + 9518 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9519 "00101000" // /* MW 3 */ + 9520 "01100001" // /* MW 2 */ + 9521 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 6 + 9522 "10000100" // JNZ r16, #9552 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9552 delay_slots=5 */ + 9523 "00000001" // /* MW 5 */ + 9524 "01000000" // /* MW 4 */ + 9525 "10101000" // /* MW 3 */ + 9526 "00010010" // /* MW 2 */ + 9527 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9529 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9531 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9533 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9535 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9536 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9537 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 198 14 + 9538 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9539 "00000001" // /* MW 3 */ + 9540 "00100000" // /* MW 2 */ + 9541 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 198 14 first + 9542 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9543 "00000000" // /* MW 9 */ + 9544 "00000000" // /* MW 8 */ + 9545 "00000000" // /* MW 7 */ + 9546 "10000000" // /* MW 6 */ + 9547 "00010001" // /* MW 5 */ + 9548 "00000110" // /* MW 4 */ + 9549 "11110110" // /* MW 3 */ + 9550 "00101100" // /* MW 2 */ + 9551 "00000000" // /* MW 1 */ +.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 7 "superkernels.cpp" 200 + 9552 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9553 "00111001" // /* MW 3 */ + 9554 "11110100" // /* MW 2 */ + 9555 "00000111" // /* MW 1 */ + 9556 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9557 "00011001" // /* MW 3 */ + 9558 "11111011" // /* MW 2 */ + 9559 "00000111" // /* MW 1 */ + 9560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9561 "00000000" // /* MW 1 */ + 9562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9563 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 9564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9565 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 9566 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9567 "11110001" // /* MW 3 */ + 9568 "11111101" // /* MW 2 */ + 9569 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9571 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 200 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9572 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9573 "00000000" // /* MW 3 */ + 9574 "00101000" // /* MW 2 */ + 9575 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9576 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9577 "10100000" // /* MW 3 */ + 9578 "01100111" // /* MW 2 */ + 9579 "00011111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 200 +.delay_slot + 9580 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9581 "00000001" // /* MW 5 */ + 9582 "00000000" // /* MW 4 */ + 9583 "00000000" // /* MW 3 */ + 9584 "11111000" // /* MW 2 */ + 9585 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9587 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9589 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 9591 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv +.function shared_setup_backbone _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv +.src_ref 3 "elementwise_binary_shared.h" 205 first +.src_ref 3 "elementwise_binary_shared.h" 211 24 first +.src_ref 3 "elementwise_binary_shared.h" 216 36 +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.function_start + 9600 "10111010" // LDA el0, [p1], #4; MOVX r2, #256; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9601 "01011000" // /* MW 9 */ + 9602 "00000000" // /* MW 8 */ + 9603 "00001000" // /* MW 7 */ + 9604 "00001011" // /* MW 6 */ + 9605 "00100000" // /* MW 5 */ + 9606 "00001000" // /* MW 4 */ + 9607 "11010000" // /* MW 3 */ + 9608 "10000101" // /* MW 2 */ + 9609 "00100011" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 36 + 9610 "00011000" // MOVX r0, #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9611 "00000001" // /* MW 3 */ + 9612 "10000000" // /* MW 2 */ + 9613 "00010111" // /* MW 1 */ + 9614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9615 "00000000" // /* MW 1 */ + 9616 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9617 "00000000" // /* MW 1 */ + 9618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9619 "00000000" // /* MW 1 */ + 9620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9621 "00000000" // /* MW 1 */ + 9622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9623 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 211 22 first + 9624 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9625 "00101001" // /* MW 3 */ + 9626 "00011100" // /* MW 2 */ + 9627 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 212 24 first + 9628 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9629 "00101110" // /* MW 3 */ + 9630 "00011100" // /* MW 2 */ + 9631 "00000001" // /* MW 1 */ + 9632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9633 "00000000" // /* MW 1 */ + 9634 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9635 "00000000" // /* MW 1 */ + 9636 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9637 "00000000" // /* MW 1 */ + 9638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9639 "00000000" // /* MW 1 */ + 9640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9641 "00000000" // /* MW 1 */ + 9642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9643 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 212 22 + 9644 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9645 "00101001" // /* MW 3 */ + 9646 "00011100" // /* MW 2 */ + 9647 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 213 24 first + 9648 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9649 "00101110" // /* MW 3 */ + 9650 "00000100" // /* MW 2 */ + 9651 "00000001" // /* MW 1 */ + 9652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9653 "00000000" // /* MW 1 */ + 9654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9655 "00000000" // /* MW 1 */ + 9656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9657 "00000000" // /* MW 1 */ + 9658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9659 "00000000" // /* MW 1 */ + 9660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9661 "00000000" // /* MW 1 */ + 9662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9663 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 213 22 + 9664 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9665 "00101001" // /* MW 3 */ + 9666 "00011100" // /* MW 2 */ + 9667 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 214 24 first + 9668 "10011000" // LDA r3, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9669 "01110110" // /* MW 3 */ + 9670 "00010100" // /* MW 2 */ + 9671 "00000001" // /* MW 1 */ + 9672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9673 "00000000" // /* MW 1 */ + 9674 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9675 "00000000" // /* MW 1 */ + 9676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9677 "00000000" // /* MW 1 */ + 9678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9679 "00000000" // /* MW 1 */ + 9680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9681 "00000000" // /* MW 1 */ + 9682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9683 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 214 22 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9684 "10011000" // ST r3, [p0], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9685 "01110001" // /* MW 3 */ + 9686 "01001100" // /* MW 2 */ + 9687 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 34 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9688 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9689 "00010111" // /* MW 3 */ + 9690 "00000100" // /* MW 2 */ + 9691 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 217 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9692 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9693 "00000000" // /* MW 3 */ + 9694 "00101000" // /* MW 2 */ + 9695 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9696 "01000100" // MOVXM r1, #65280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9697 "00000000" // /* MW 5 */ + 9698 "10111110" // /* MW 4 */ + 9699 "11110000" // /* MW 3 */ + 9700 "00000000" // /* MW 2 */ + 9701 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9702 "10011000" // AND r1, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9703 "00010100" // /* MW 3 */ + 9704 "11000010" // /* MW 2 */ + 9705 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9706 "10011000" // EQ r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9707 "00100111" // /* MW 3 */ + 9708 "01110110" // /* MW 2 */ + 9709 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 36 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9710 "00011000" // SEL.EQZ r0, r0, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9711 "10000010" // /* MW 3 */ + 9712 "00000001" // /* MW 2 */ + 9713 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_end0 + 9715 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv +.function setup _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv +.src_ref 3 "elementwise_binary_shared.h" 219 +.src_ref 3 "elementwise_binary_shared.h" 219 first +.function_start + 9728 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9729 "00000001" // /* MW 5 */ + 9730 "00000000" // /* MW 4 */ + 9731 "00000000" // /* MW 3 */ + 9732 "00001000" // /* MW 2 */ + 9733 "00000000" // /* MW 1 */ + 9734 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9735 "00111101" // /* MW 3 */ + 9736 "11111000" // /* MW 2 */ + 9737 "00001111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 220 8 first +.no_stack_arguments + 9738 "00000100" // JL #9600 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9600 delay_slots=5 */ + 9739 "00000001" // /* MW 5 */ + 9740 "00000000" // /* MW 4 */ + 9741 "11000000" // /* MW 3 */ + 9742 "00010010" // /* MW 2 */ + 9743 "00000000" // /* MW 1 */ +.delay_slot + 9744 "10011000" // ST p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9745 "10011101" // /* MW 3 */ + 9746 "11111111" // /* MW 2 */ + 9747 "00001111" // /* MW 1 */ +.src_ref 8 "mul_impl.h" 193 25 +.delay_slot + 9748 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9749 "11000000" // /* MW 3 */ + 9750 "01100000" // /* MW 2 */ + 9751 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9753 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9755 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9756 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9757 "01100111" // /* MW 3 */ + 9758 "00000001" // /* MW 2 */ + 9759 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 222 4 +.return_address + 9760 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9761 "00111001" // /* MW 3 */ + 9762 "11111000" // /* MW 2 */ + 9763 "00000111" // /* MW 1 */ + 9764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9765 "00000000" // /* MW 1 */ + 9766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9767 "00000000" // /* MW 1 */ + 9768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9769 "00000000" // /* MW 1 */ + 9770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9771 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9773 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9774 "00011000" // LDA p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9775 "10011001" // /* MW 3 */ + 9776 "11111111" // /* MW 2 */ + 9777 "00000111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 222 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9778 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9779 "00000000" // /* MW 3 */ + 9780 "00101000" // /* MW 2 */ + 9781 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9783 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9785 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9787 "00000000" // /* MW 1 */ +.src_ref 8 "mul_impl.h" 193 25 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9788 "00011000" // MOVX r16, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9789 "00001001" // /* MW 3 */ + 9790 "00100000" // /* MW 2 */ + 9791 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 222 4 +.src_ref 8 "mul_impl.h" 193 25 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9792 "00111010" // ST r16, [p7, #16]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9793 "01110001" // /* MW 9 */ + 9794 "00000000" // /* MW 8 */ + 9795 "00000000" // /* MW 7 */ + 9796 "00000000" // /* MW 6 */ + 9797 "11111110" // /* MW 5 */ + 9798 "00111111" // /* MW 4 */ + 9799 "00110000" // /* MW 3 */ + 9800 "11000010" // /* MW 2 */ +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + 9801 "11101000" // /* MW 1 */ +.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE +.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_begin0 +.function shared_run_backbone _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE +.src_ref 3 "elementwise_binary_shared.h" 107 first +.src_ref 3 "elementwise_binary_shared.h" 119 37 +.src_ref 3 "elementwise_binary_shared.h" 126 34 +.src_ref 3 "elementwise_binary_shared.h" 131 19 +.function_start + 9808 "11111000" // MOV r0, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9809 "11000000" // /* MW 3 */ + 9810 "00010110" // /* MW 2 */ + 9811 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 119 37 first + 9812 "00011000" // ADD.NC p3, r0, #14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9813 "00000111" // /* MW 3 */ + 9814 "01100000" // /* MW 2 */ + 9815 "00011011" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 122 22 first + 9816 "10011000" // LDA.s16 r2, [p3], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9817 "01010010" // /* MW 3 */ + 9818 "00011100" // /* MW 2 */ + 9819 "00000011" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 124 15 first + 9820 "10011000" // LDA r4, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9821 "10010110" // /* MW 3 */ + 9822 "00000100" // /* MW 2 */ + 9823 "00000011" // /* MW 1 */ + 9824 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9825 "00000000" // /* MW 1 */ + 9826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9827 "00000000" // /* MW 1 */ + 9828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9829 "00000000" // /* MW 1 */ + 9830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9831 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 124 26 + 9832 "00011000" // MOVX r3, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9833 "00001001" // /* MW 3 */ + 9834 "00000110" // /* MW 2 */ + 9835 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 107 + 9836 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9837 "00000001" // /* MW 5 */ + 9838 "00000000" // /* MW 4 */ + 9839 "00000000" // /* MW 3 */ + 9840 "00010000" // /* MW 2 */ + 9841 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 124 26 + 9842 "10011000" // LTU r3, r3, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9843 "01001100" // /* MW 3 */ + 9844 "11000110" // /* MW 2 */ + 9845 "00010000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 65 25 +.src_ref 3 "elementwise_binary_shared.h" 124 8 + 9846 "10111010" // MOVA r1, #0; JNZ r3, #10000 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10000 delay_slots=5 */ + 9847 "01100000" // /* MW 9 */ + 9848 "00000000" // /* MW 8 */ + 9849 "00010000" // /* MW 7 */ + 9850 "11100010" // /* MW 6 */ + 9851 "00000100" // /* MW 5 */ + 9852 "00000110" // /* MW 4 */ + 9853 "00000000" // /* MW 3 */ + 9854 "00000001" // /* MW 2 */ + 9855 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 65 25 first +.delay_slot + 9856 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9857 "01110010" // /* MW 3 */ + 9858 "00000101" // /* MW 2 */ + 9859 "00011000" // /* MW 1 */ +.delay_slot + 9860 "11111000" // MOV r1, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9861 "11000000" // /* MW 3 */ + 9862 "01011110" // /* MW 2 */ + 9863 "00011000" // /* MW 1 */ +.delay_slot + 9864 "11111000" // MOV p7, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9865 "11100000" // /* MW 3 */ + 9866 "01100101" // /* MW 2 */ + 9867 "00011111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.delay_slot + 9868 "11110100" // PADDB [p7], #-64; MOV p5, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9869 "10000001" // /* MW 5 */ + 9870 "11011101" // /* MW 4 */ + 9871 "00001010" // /* MW 3 */ + 9872 "11110010" // /* MW 2 */ + 9873 "11111111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 119 37 first +.delay_slot + 9874 "00011000" // VST x0, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9875 "00010011" // /* MW 3 */ + 9876 "00000100" // /* MW 2 */ + 9877 "00001111" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first +.src_ref 3 "elementwise_binary_shared.h" 126 34 +.src_ref 3 "elementwise_binary_shared.h" 126 34 +.src_ref 3 "elementwise_binary_shared.h" 131 19 +.src_ref 3 "elementwise_binary_shared.h" 131 19 + 9878 "10111010" // MOVA dj0, #12; MOVS p4, r0; VBCST.16 x0, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9879 "01110010" // /* MW 9 */ + 9880 "10111001" // /* MW 8 */ + 9881 "00000100" // /* MW 7 */ + 9882 "00000000" // /* MW 6 */ + 9883 "00001011" // /* MW 5 */ + 9884 "10000000" // /* MW 4 */ + 9885 "10000100" // /* MW 3 */ + 9886 "10000010" // /* MW 2 */ + 9887 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 126 34 first +.src_ref 3 "elementwise_binary_shared.h" 131 19 first +.src_ref 3 "elementwise_binary_shared.h" 171 16 + 9888 "01010100" // LDA.u8 r0, [p4, dj0]; MOV m2, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9889 "00000001" // /* MW 5 */ + 9890 "00000001" // /* MW 4 */ + 9891 "01010100" // /* MW 3 */ + 9892 "00000001" // /* MW 2 */ + 9893 "10000000" // /* MW 1 */ + 9894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9895 "00000000" // /* MW 1 */ + 9896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9897 "00000000" // /* MW 1 */ + 9898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9899 "00000000" // /* MW 1 */ + 9900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9901 "00000000" // /* MW 1 */ + 9902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9903 "00000000" // /* MW 1 */ + 9904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9905 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 131 12 +.src_ref 3 "elementwise_binary_shared.h" 131 35 + 9906 "10000100" // JNZ r0, #9952 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9952 delay_slots=5 */ + 9907 "00000001" // /* MW 5 */ + 9908 "01000000" // /* MW 4 */ + 9909 "01110000" // /* MW 3 */ + 9910 "00010011" // /* MW 2 */ + 9911 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary_shared.h" 173 18 +.delay_slot + 9912 "10111000" // MOV m0, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9913 "00000000" // /* MW 3 */ + 9914 "00000000" // /* MW 2 */ + 9915 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 166 31 +.delay_slot + 9916 "01000100" // MOVXM p4, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9917 "11010000" // /* MW 5 */ + 9918 "11001000" // /* MW 4 */ + 9919 "11001000" // /* MW 3 */ + 9920 "00000111" // /* MW 2 */ + 9921 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9923 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9925 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9927 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 169 16 + 9928 "10111010" // MOVA m1, #0; J #9968 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=9968 delay_slots=5 */ + 9929 "00100000" // /* MW 9 */ + 9930 "00000000" // /* MW 8 */ + 9931 "00000000" // /* MW 7 */ + 9932 "11011110" // /* MW 6 */ + 9933 "00000100" // /* MW 5 */ + 9934 "00000000" // /* MW 4 */ + 9935 "10000000" // /* MW 3 */ + 9936 "00000100" // /* MW 2 */ + 9937 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9939 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9941 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9943 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9945 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.delay_slot + 9946 "00001100" // NOPA; VST x0, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9947 "00100110" // /* MW 5 */ + 9948 "00001000" // /* MW 4 */ + 9949 "11110000" // /* MW 3 */ + 9950 "00101100" // /* MW 2 */ + 9951 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_144 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 169 16 + 9952 "10111000" // MOV m1, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9953 "10000000" // /* MW 3 */ + 9954 "00000000" // /* MW 2 */ + 9955 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "elementwise_binary_shared.h" 171 16 + 9956 "11110110" // NOPA; NOPB; VST x0, [p1]; MOV m2, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9957 "01010000" // /* MW 11 */ + 9958 "00000000" // /* MW 10 */ + 9959 "00000000" // /* MW 9 */ + 9960 "00000001" // /* MW 8 */ + 9961 "00010011" // /* MW 7 */ + 9962 "00000100" // /* MW 6 */ + 9963 "00100001" // /* MW 5 */ + 9964 "00000000" // /* MW 4 */ + 9965 "11110000" // /* MW 3 */ + 9966 "00101100" // /* MW 2 */ + 9967 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_160 + 9968 "10000100" // J #10128 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10128 delay_slots=5 */ + 9969 "00000000" // /* MW 5 */ + 9970 "00000000" // /* MW 4 */ + 9971 "11001000" // /* MW 3 */ + 9972 "00010011" // /* MW 2 */ + 9973 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary_shared.h" 169 16 +.src_ref 3 "elementwise_binary_shared.h" 173 18 +.delay_slot + 9974 "00000010" // MOVS p0, p7; MOV p7, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9975 "01110000" // /* MW 7 */ + 9976 "01100000" // /* MW 6 */ + 9977 "10110000" // /* MW 5 */ + 9978 "00000011" // /* MW 4 */ + 9979 "01100000" // /* MW 3 */ + 9980 "10010001" // /* MW 2 */ + 9981 "00010011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9983 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9985 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9987 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9988 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9989 "10000001" // /* MW 11 */ + 9990 "10101101" // /* MW 10 */ + 9991 "00000000" // /* MW 9 */ + 9992 "00000000" // /* MW 8 */ + 9993 "00000000" // /* MW 7 */ + 9994 "00000000" // /* MW 6 */ + 9995 "00100000" // /* MW 5 */ + 9996 "00000000" // /* MW 4 */ + 9997 "11110000" // /* MW 3 */ + 9998 "00101100" // /* MW 2 */ + 9999 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_192 +.src_ref 3 "elementwise_binary_shared.h" 150 97 + 10000 "00011000" // MOVX r2, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10001 "00001101" // /* MW 3 */ + 10002 "00000100" // /* MW 2 */ + 10003 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 97 first + 10004 "10011000" // EQ r2, r2, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10005 "01000111" // /* MW 3 */ + 10006 "10000100" // /* MW 2 */ + 10007 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 + 10008 "10000100" // JNZ r2, #10048 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10048 delay_slots=5 */ + 10009 "00000001" // /* MW 5 */ + 10010 "01000000" // /* MW 4 */ + 10011 "10100000" // /* MW 3 */ + 10012 "00010011" // /* MW 2 */ + 10013 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.delay_slot + 10014 "01000100" // MOVXM r0, #1065353216 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10015 "00000000" // /* MW 5 */ + 10016 "00100000" // /* MW 4 */ + 10017 "00000000" // /* MW 3 */ + 10018 "10000000" // /* MW 2 */ + 10019 "00111111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.src_ref 3 "elementwise_binary_shared.h" 166 31 +.delay_slot + 10020 "01000100" // MOVXM p4, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10021 "11010000" // /* MW 5 */ + 10022 "11001000" // /* MW 4 */ + 10023 "11001000" // /* MW 3 */ + 10024 "00000111" // /* MW 2 */ + 10025 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10027 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10029 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10031 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 10032 "11100001" // NOPA; NOPB; NOPS; MOVXM r0, #-1082130432; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10033 "00000000" // /* MW 15 */ + 10034 "00000000" // /* MW 14 */ + 10035 "00010000" // /* MW 13 */ + 10036 "00000000" // /* MW 12 */ + 10037 "00001000" // /* MW 11 */ + 10038 "00000000" // /* MW 10 */ + 10039 "11100000" // /* MW 9 */ + 10040 "00101111" // /* MW 8 */ + 10041 "01011011" // /* MW 7 */ + 10042 "00000001" // /* MW 6 */ + 10043 "00100000" // /* MW 5 */ + 10044 "00000000" // /* MW 4 */ + 10045 "11110000" // /* MW 3 */ + 10046 "00101100" // /* MW 2 */ + 10047 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_240 +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10048 "10111010" // LDA.s8 r0, [p4]; MOVX vaddSign0, #1; MOV dj0, #-66 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10049 "01011000" // /* MW 9 */ + 10050 "10111110" // /* MW 8 */ + 10051 "01000111" // /* MW 7 */ + 10052 "00000000" // /* MW 6 */ + 10053 "11010010" // /* MW 5 */ + 10054 "00000010" // /* MW 4 */ + 10055 "01010000" // /* MW 3 */ + 10056 "10000000" // /* MW 2 */ + 10057 "10000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary_shared.h" 173 18 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10058 "10111000" // MOV m0, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10059 "10000000" // /* MW 3 */ + 10060 "00000000" // /* MW 2 */ + 10061 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 169 16 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10062 "10111000" // MOV m1, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10063 "00000000" // /* MW 3 */ + 10064 "00000000" // /* MW 2 */ + 10065 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 171 16 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10066 "10111000" // MOV m2, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10067 "10000000" // /* MW 3 */ + 10068 "00000000" // /* MW 2 */ + 10069 "00011010" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10071 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10072 "01111000" // VINSERT.32 x0, x0, #0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10073 "00010001" // /* MW 3 */ + 10074 "00000000" // /* MW 2 */ + 10075 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10076 "11010100" // ST.s16 r0, [p5, dj0]; VMOV bmll1, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10077 "00100101" // /* MW 5 */ + 10078 "00000001" // /* MW 4 */ + 10079 "11100010" // /* MW 3 */ + 10080 "00000010" // /* MW 2 */ + 10081 "10100000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10082 "00011000" // MOVX crRnd, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10083 "10000000" // /* MW 3 */ + 10084 "00111010" // /* MW 2 */ + 10085 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10086 "00011000" // VCONV.bf16.fp32 wl0, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10087 "10010110" // /* MW 3 */ + 10088 "01000000" // /* MW 2 */ + 10089 "00001000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10091 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10092 "10111000" // VEXTRACT.16 r0, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10093 "00000001" // /* MW 3 */ + 10094 "00000001" // /* MW 2 */ + 10095 "00011000" // /* MW 1 */ + 10096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10097 "00000000" // /* MW 1 */ + 10098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10099 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 + 10100 "10011000" // LDA.s16 r0, [p5, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10101 "00010010" // /* MW 3 */ + 10102 "00000000" // /* MW 2 */ + 10103 "00000101" // /* MW 1 */ + 10104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10105 "00000000" // /* MW 1 */ + 10106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10107 "00000000" // /* MW 1 */ + 10108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10109 "00000000" // /* MW 1 */ + 10110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10111 "00000000" // /* MW 1 */ + 10112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10113 "00000000" // /* MW 1 */ + 10114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10115 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first + 10116 "11111000" // VBCST.16 x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10117 "01110010" // /* MW 3 */ + 10118 "00000001" // /* MW 2 */ + 10119 "00011000" // /* MW 1 */ + 10120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10121 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first + 10122 "00001100" // NOPA; VST x0, [sp, #-64] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10123 "01100110" // /* MW 5 */ + 10124 "11111000" // /* MW 4 */ + 10125 "11111111" // /* MW 3 */ + 10126 "00101100" // /* MW 2 */ + 10127 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_320 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary_shared.h" 166 4 first +.src_ref 3 "elementwise_binary_shared.h" 166 31 first +.src_ref 3 "elementwise_binary_shared.h" 169 16 first + 10128 "10110110" // LDA r2, [p3, #-16]; VLDB x1, [p7], m1; MOVXM ls, #10240 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10129 "00010000" // /* MW 11 */ + 10130 "00000000" // /* MW 10 */ + 10131 "01111100" // /* MW 9 */ + 10132 "00001000" // /* MW 8 */ + 10133 "00000000" // /* MW 7 */ + 10134 "00000000" // /* MW 6 */ + 10135 "11101000" // /* MW 5 */ + 10136 "01010000" // /* MW 4 */ + 10137 "11011110" // /* MW 3 */ + 10138 "10001010" // /* MW 2 */ + 10139 "01111000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 166 4 +.src_ref 3 "elementwise_binary_shared.h" 166 31 +.src_ref 3 "elementwise_binary_shared.h" 171 16 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 10140 "10110110" // MOVA r3, #-5; VLDB x0, [p1], m2; MOVXM le, #10288 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10141 "00010000" // /* MW 11 */ + 10142 "00011000" // /* MW 10 */ + 10143 "10111100" // /* MW 9 */ + 10144 "00001001" // /* MW 8 */ + 10145 "00000000" // /* MW 7 */ + 10146 "00000000" // /* MW 6 */ + 10147 "01101000" // /* MW 5 */ + 10148 "10010000" // /* MW 4 */ + 10149 "00000010" // /* MW 3 */ + 10150 "01100011" // /* MW 2 */ + 10151 "11111111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary_shared.h" 169 16 first +.src_ref 3 "elementwise_binary_shared.h" 173 18 first +.src_ref 3 "elementwise_binary_shared.h" 177 44 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10152 "00010010" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;VLDB x1, [p7], m1; MOVX r0, #60 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10153 "11110001" // /* MW 7 */ + 10154 "00000000" // /* MW 6 */ + 10155 "11101000" // /* MW 5 */ + 10156 "01010000" // /* MW 4 */ + 10157 "01111110" // /* MW 3 */ + 10158 "00000101" // /* MW 2 */ + 10159 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 166 31 first +.src_ref 3 "elementwise_binary_shared.h" 171 16 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10160 "00111100" // LDA.s8 r4, [p4]; VLDB x0, [p1], m2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10161 "01101000" // /* MW 5 */ + 10162 "10010000" // /* MW 4 */ + 10163 "01010010" // /* MW 3 */ + 10164 "10010000" // /* MW 2 */ + 10165 "10000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10167 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary_shared.h" 173 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10168 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10169 "00101011" // /* MW 3 */ + 10170 "00001000" // /* MW 2 */ + 10171 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10173 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 166 31 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10174 "10011000" // LSHL r2, r2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10175 "00111101" // /* MW 3 */ + 10176 "10000100" // /* MW 2 */ + 10177 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 166 4 +.src_ref 3 "elementwise_binary_shared.h" 177 44 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10178 "01100010" // ADD.NC lc, r2, #-3; VMAC.f dm1, dm0, x1, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10179 "00000001" // /* MW 7 */ + 10180 "00000010" // /* MW 6 */ + 10181 "00000001" // /* MW 5 */ + 10182 "10000110" // /* MW 4 */ + 10183 "01111110" // /* MW 3 */ + 10184 "01110001" // /* MW 2 */ + 10185 "00000101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary_shared.h" 169 16 first +.src_ref 3 "elementwise_binary_shared.h" 171 16 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10186 "00111100" // VLDA x0, [p1], m2; VLDB x1, [p7], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10187 "11101000" // /* MW 5 */ + 10188 "01010000" // /* MW 4 */ + 10189 "01111110" // /* MW 3 */ + 10190 "00000011" // /* MW 2 */ + 10191 "00101001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary_shared.h" 173 18 first +.src_ref 3 "elementwise_binary_shared.h" 185 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10192 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; MOVX crRnd, r4; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10193 "00000000" // /* MW 15 */ + 10194 "00000000" // /* MW 14 */ + 10195 "01111000" // /* MW 13 */ + 10196 "10100101" // /* MW 12 */ + 10197 "00000001" // /* MW 11 */ + 10198 "00000000" // /* MW 10 */ + 10199 "11010100" // /* MW 9 */ + 10200 "00001001" // /* MW 8 */ + 10201 "01011011" // /* MW 7 */ + 10202 "00000001" // /* MW 6 */ + 10203 "00100000" // /* MW 5 */ + 10204 "00000000" // /* MW 4 */ + 10205 "01110000" // /* MW 3 */ + 10206 "00000101" // /* MW 2 */ + 10207 "00000001" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10208 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10209 "00000000" // /* MW 15 */ + 10210 "00000000" // /* MW 14 */ + 10211 "01111000" // /* MW 13 */ + 10212 "10100101" // /* MW 12 */ + 10213 "00000001" // /* MW 11 */ + 10214 "00000000" // /* MW 10 */ + 10215 "00000000" // /* MW 9 */ + 10216 "00000000" // /* MW 8 */ + 10217 "01011011" // /* MW 7 */ + 10218 "00000001" // /* MW 6 */ + 10219 "00100000" // /* MW 5 */ + 10220 "00000000" // /* MW 4 */ + 10221 "11110000" // /* MW 3 */ + 10222 "00101100" // /* MW 2 */ + 10223 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 177 44 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10224 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10225 "00010000" // /* MW 15 */ + 10226 "00001000" // /* MW 14 */ + 10227 "01111000" // /* MW 13 */ + 10228 "10100101" // /* MW 12 */ + 10229 "00000001" // /* MW 11 */ + 10230 "00000000" // /* MW 10 */ + 10231 "00000000" // /* MW 9 */ + 10232 "00000000" // /* MW 8 */ + 10233 "01011011" // /* MW 7 */ + 10234 "00000001" // /* MW 6 */ + 10235 "00100000" // /* MW 5 */ + 10236 "00000000" // /* MW 4 */ + 10237 "11110000" // /* MW 3 */ + 10238 "00101100" // /* MW 2 */ + 10239 "00000000" // /* MW 1 */ +.label ZLS_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_432 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary_shared.h" 169 16 first +.src_ref 3 "elementwise_binary_shared.h" 171 16 first +.begin_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 10240 "11100001" // VLDA x0, [p1], m2; VLDB x1, [p7], m1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10241 "00000000" // /* MW 15 */ + 10242 "00000000" // /* MW 14 */ + 10243 "01111000" // /* MW 13 */ + 10244 "10100101" // /* MW 12 */ + 10245 "00000001" // /* MW 11 */ + 10246 "00000000" // /* MW 10 */ + 10247 "00000000" // /* MW 9 */ + 10248 "00000000" // /* MW 8 */ + 10249 "01011011" // /* MW 7 */ + 10250 "00000001" // /* MW 6 */ + 10251 "11101000" // /* MW 5 */ + 10252 "01010000" // /* MW 4 */ + 10253 "01111110" // /* MW 3 */ + 10254 "00000011" // /* MW 2 */ + 10255 "00101001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary_shared.h" 173 18 first +.src_ref 3 "elementwise_binary_shared.h" 185 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10256 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10257 "00000000" // /* MW 15 */ + 10258 "00000000" // /* MW 14 */ + 10259 "01111000" // /* MW 13 */ + 10260 "10100101" // /* MW 12 */ + 10261 "00000001" // /* MW 11 */ + 10262 "00000000" // /* MW 10 */ + 10263 "00000000" // /* MW 9 */ + 10264 "00000000" // /* MW 8 */ + 10265 "10100011" // /* MW 7 */ + 10266 "00011100" // /* MW 6 */ + 10267 "00100010" // /* MW 5 */ + 10268 "00000000" // /* MW 4 */ + 10269 "01110000" // /* MW 3 */ + 10270 "00000101" // /* MW 2 */ + 10271 "00000001" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10272 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10273 "00000000" // /* MW 15 */ + 10274 "00000000" // /* MW 14 */ + 10275 "01111000" // /* MW 13 */ + 10276 "10100101" // /* MW 12 */ + 10277 "00000001" // /* MW 11 */ + 10278 "00000000" // /* MW 10 */ + 10279 "00000000" // /* MW 9 */ + 10280 "00000000" // /* MW 8 */ + 10281 "01011011" // /* MW 7 */ + 10282 "00000001" // /* MW 6 */ + 10283 "00100000" // /* MW 5 */ + 10284 "00000000" // /* MW 4 */ + 10285 "11110000" // /* MW 3 */ + 10286 "00101100" // /* MW 2 */ + 10287 "00000000" // /* MW 1 */ +.label ZLE_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_480 +.src_ref 3 "elementwise_binary_shared.h" 177 44 first +.end_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10288 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10289 "00010000" // /* MW 15 */ + 10290 "00001000" // /* MW 14 */ + 10291 "01111000" // /* MW 13 */ + 10292 "10100101" // /* MW 12 */ + 10293 "00000001" // /* MW 11 */ + 10294 "00000000" // /* MW 10 */ + 10295 "00000000" // /* MW 9 */ + 10296 "00000000" // /* MW 8 */ + 10297 "01011011" // /* MW 7 */ + 10298 "00000001" // /* MW 6 */ + 10299 "00100000" // /* MW 5 */ + 10300 "00000000" // /* MW 4 */ + 10301 "11110000" // /* MW 3 */ + 10302 "00101100" // /* MW 2 */ + 10303 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 187 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 10304 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10305 "00000001" // /* MW 5 */ + 10306 "00000000" // /* MW 4 */ + 10307 "00000000" // /* MW 3 */ + 10308 "11110000" // /* MW 2 */ + 10309 "11111111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary_shared.h" 185 18 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10310 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10311 "10100011" // /* MW 3 */ + 10312 "00011100" // /* MW 2 */ + 10313 "00001010" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 10314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10315 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 177 44 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 10316 "01001000" // VMAC.f dm1, dm0, x1, x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10317 "00000001" // /* MW 3 */ + 10318 "00000010" // /* MW 2 */ + 10319 "00000001" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 10320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10321 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 187 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 10322 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10323 "00000000" // /* MW 3 */ + 10324 "00101000" // /* MW 2 */ + 10325 "00010000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary_shared.h" 185 18 first +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10326 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10327 "10100011" // /* MW 3 */ + 10328 "00011100" // /* MW 2 */ + 10329 "00001010" // /* MW 1 */ +.delay_slot + 10330 "11111000" // MOV p7, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10331 "10100000" // /* MW 3 */ + 10332 "01100000" // /* MW 2 */ + 10333 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10335 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary_shared.h" 185 18 +.delay_slot + 10336 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10337 "10100011" // /* MW 3 */ + 10338 "00011100" // /* MW 2 */ + 10339 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE__end +.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_end0 + 10341 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E +.function run _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E +.src_ref 4 "vector.hpp" 538 13 +.src_ref 3 "elementwise_binary_shared.h" 237 first +.src_ref 3 "elementwise_binary_shared.h" 244 19 +.src_ref 3 "elementwise_binary_shared.h" 245 12 +.src_ref 3 "elementwise_binary_shared.h" 247 12 +.src_ref 3 "elementwise_binary_shared.h" 250 4 +.function_start + 10352 "10111010" // MOVA dj0, #12; MOVS p3, p2; MOV dc0, lr /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10353 "01110010" // /* MW 9 */ + 10354 "11110000" // /* MW 8 */ + 10355 "01100000" // /* MW 7 */ + 10356 "00000000" // /* MW 6 */ + 10357 "10001011" // /* MW 5 */ + 10358 "10001000" // /* MW 4 */ + 10359 "10000011" // /* MW 3 */ + 10360 "10000010" // /* MW 2 */ + 10361 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 538 13 first +.src_ref 3 "elementwise_binary_shared.h" 244 19 first +.src_ref 3 "elementwise_binary_shared.h" 245 12 +.src_ref 3 "elementwise_binary_shared.h" 247 12 + 10362 "11010100" // LDA.u8 r0, [p2, dj0]; MOV p2, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10363 "10000001" // /* MW 5 */ + 10364 "11000101" // /* MW 4 */ + 10365 "01010100" // /* MW 3 */ + 10366 "00000001" // /* MW 2 */ + 10367 "01000000" // /* MW 1 */ + 10368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10369 "00000000" // /* MW 1 */ + 10370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10371 "00000000" // /* MW 1 */ + 10372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10373 "00000000" // /* MW 1 */ + 10374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10375 "00000000" // /* MW 1 */ + 10376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10377 "00000000" // /* MW 1 */ + 10378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10379 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 244 12 +.src_ref 3 "elementwise_binary_shared.h" 244 35 + 10380 "10000100" // JZ r0, #10448 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10448 delay_slots=5 */ + 10381 "00000001" // /* MW 5 */ + 10382 "00000000" // /* MW 4 */ + 10383 "01101000" // /* MW 3 */ + 10384 "00010100" // /* MW 2 */ + 10385 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 237 +.delay_slot + 10386 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10387 "00000001" // /* MW 5 */ + 10388 "00000000" // /* MW 4 */ + 10389 "00000000" // /* MW 3 */ + 10390 "00001000" // /* MW 2 */ + 10391 "00000000" // /* MW 1 */ +.delay_slot + 10392 "11111000" // MOV r1, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10393 "11100000" // /* MW 3 */ + 10394 "01010101" // /* MW 2 */ + 10395 "00011000" // /* MW 1 */ +.delay_slot + 10396 "00011000" // ADD.NC p1, r1, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10397 "11100000" // /* MW 3 */ + 10398 "01100000" // /* MW 2 */ + 10399 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 538 13 +.src_ref 4 "vector_native_types.hpp" 374 137 first +.delay_slot + 10400 "00011000" // VST sfh, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10401 "00101011" // /* MW 3 */ + 10402 "00000111" // /* MW 2 */ + 10403 "00001001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10405 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 247 12 first +.no_stack_arguments + 10406 "00000100" // JL #9808 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9808 delay_slots=5 */ + 10407 "00000001" // /* MW 5 */ + 10408 "00000000" // /* MW 4 */ + 10409 "00101000" // /* MW 3 */ + 10410 "00010011" // /* MW 2 */ + 10411 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10413 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10415 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10417 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10419 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10420 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10421 "10000001" // /* MW 11 */ + 10422 "10101101" // /* MW 10 */ + 10423 "00000000" // /* MW 9 */ + 10424 "00000000" // /* MW 8 */ + 10425 "00000000" // /* MW 7 */ + 10426 "00000000" // /* MW 6 */ + 10427 "00100000" // /* MW 5 */ + 10428 "00000000" // /* MW 4 */ + 10429 "11110000" // /* MW 3 */ + 10430 "00101100" // /* MW 2 */ + 10431 "00000000" // /* MW 1 */ +.return_address + 10432 "10000100" // J #10480 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10480 delay_slots=5 */ + 10433 "00000000" // /* MW 5 */ + 10434 "00000000" // /* MW 4 */ + 10435 "01111000" // /* MW 3 */ + 10436 "00010100" // /* MW 2 */ + 10437 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10439 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10441 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10443 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10445 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10447 "00000000" // /* MW 1 */ +.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_96 +.src_ref 3 "elementwise_binary_shared.h" 245 12 first +.no_stack_arguments + 10448 "00000100" // JL #9808 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9808 delay_slots=5 */ + 10449 "00000001" // /* MW 5 */ + 10450 "00000000" // /* MW 4 */ + 10451 "00101000" // /* MW 3 */ + 10452 "00010011" // /* MW 2 */ + 10453 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 245 12 +.src_ref 3 "elementwise_binary_shared.h" 245 12 +.delay_slot + 10454 "00000010" // MOVS p0, p1; MOV p1, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10455 "01110000" // /* MW 7 */ + 10456 "01100000" // /* MW 6 */ + 10457 "10110000" // /* MW 5 */ + 10458 "00000000" // /* MW 4 */ + 10459 "01100000" // /* MW 3 */ + 10460 "10010001" // /* MW 2 */ + 10461 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10463 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10465 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10467 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10468 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10469 "10000001" // /* MW 11 */ + 10470 "10101101" // /* MW 10 */ + 10471 "00000000" // /* MW 9 */ + 10472 "00000000" // /* MW 8 */ + 10473 "00000000" // /* MW 7 */ + 10474 "00000000" // /* MW 6 */ + 10475 "00100000" // /* MW 5 */ + 10476 "00000000" // /* MW 4 */ + 10477 "11110000" // /* MW 3 */ + 10478 "00101100" // /* MW 2 */ + 10479 "00000000" // /* MW 1 */ +.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_128 +.src_ref 3 "elementwise_binary_shared.h" 250 4 +.return_address + 10480 "11111000" // MOV lr, dc0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10481 "10000000" // /* MW 3 */ + 10482 "01110001" // /* MW 2 */ + 10483 "00011111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 250 4 first + 10484 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10485 "00000000" // /* MW 3 */ + 10486 "00101000" // /* MW 2 */ + 10487 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 250 4 +.delay_slot + 10488 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10489 "00000001" // /* MW 5 */ + 10490 "00000000" // /* MW 4 */ + 10491 "00000000" // /* MW 3 */ + 10492 "11111000" // /* MW 2 */ + 10493 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10495 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10497 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10499 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_end0 + 10501 "00000000" // /* MW 1 */ +.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_mul1d_attribute_broadcasting _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 7 "superkernels.cpp" 205 first +.src_ref 7 "superkernels.cpp" 210 6 +.function_start + 10512 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10513 "10000000" // /* MW 5 */ + 10514 "11001000" // /* MW 4 */ + 10515 "11000110" // /* MW 3 */ + 10516 "00000111" // /* MW 2 */ + 10517 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 210 6 first + 10518 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10519 "11000001" // /* MW 5 */ + 10520 "10110101" // /* MW 4 */ + 10521 "11011000" // /* MW 3 */ + 10522 "11000010" // /* MW 2 */ + 10523 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 205 + 10524 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10525 "00000001" // /* MW 5 */ + 10526 "00000000" // /* MW 4 */ + 10527 "00000000" // /* MW 3 */ + 10528 "00001000" // /* MW 2 */ + 10529 "00000000" // /* MW 1 */ + 10530 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10531 "01110000" // /* MW 7 */ + 10532 "11010000" // /* MW 6 */ + 10533 "00001011" // /* MW 5 */ + 10534 "00000000" // /* MW 4 */ + 10535 "10110000" // /* MW 3 */ + 10536 "01100011" // /* MW 2 */ + 10537 "11111111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 207 11 + 10538 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10539 "00010001" // /* MW 9 */ + 10540 "00101000" // /* MW 8 */ + 10541 "00110010" // /* MW 7 */ + 10542 "11110011" // /* MW 6 */ + 10543 "00000001" // /* MW 5 */ + 10544 "00000000" // /* MW 4 */ + 10545 "10110000" // /* MW 3 */ + 10546 "10000010" // /* MW 2 */ + 10547 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 10548 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10549 "11000000" // /* MW 3 */ + 10550 "11010100" // /* MW 2 */ + 10551 "00011011" // /* MW 1 */ + 10552 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10553 "00000000" // /* MW 1 */ + 10554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10555 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 210 6 +.src_ref 7 "superkernels.cpp" 210 16 + 10556 "10000100" // JNZ r16, #10720 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10720 delay_slots=5 */ + 10557 "00000001" // /* MW 5 */ + 10558 "01000000" // /* MW 4 */ + 10559 "11110000" // /* MW 3 */ + 10560 "00010100" // /* MW 2 */ + 10561 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 207 22 first +.delay_slot + 10562 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10563 "10010000" // /* MW 3 */ + 10564 "01100010" // /* MW 2 */ + 10565 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 207 30 +.delay_slot + 10566 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10567 "11111011" // /* MW 3 */ + 10568 "01100011" // /* MW 2 */ + 10569 "00010100" // /* MW 1 */ +.delay_slot + 10570 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10571 "00111101" // /* MW 3 */ + 10572 "11110100" // /* MW 2 */ + 10573 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 207 11 +.src_ref 1 "io_buffer_main.h" 125 25 +.delay_slot + 10574 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10575 "01110000" // /* MW 7 */ + 10576 "01100000" // /* MW 6 */ + 10577 "00110000" // /* MW 5 */ + 10578 "00000011" // /* MW 4 */ + 10579 "00110000" // /* MW 3 */ + 10580 "11000110" // /* MW 2 */ + 10581 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 213 4 +.src_ref 7 "superkernels.cpp" 224 2 +.delay_slot + 10582 "01000100" // MOVXM p0, #509184 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10583 "00000000" // /* MW 5 */ + 10584 "11001010" // /* MW 4 */ + 10585 "11000000" // /* MW 3 */ + 10586 "00000111" // /* MW 2 */ + 10587 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 10588 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10589 "11010000" // /* MW 5 */ + 10590 "11001000" // /* MW 4 */ + 10591 "11000100" // /* MW 3 */ + 10592 "00000111" // /* MW 2 */ + 10593 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10594 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10595 "00010000" // /* MW 9 */ + 10596 "00110010" // /* MW 8 */ + 10597 "00110010" // /* MW 7 */ + 10598 "11110001" // /* MW 6 */ + 10599 "00000001" // /* MW 5 */ + 10600 "00000000" // /* MW 4 */ + 10601 "11100000" // /* MW 3 */ + 10602 "11000000" // /* MW 2 */ + 10603 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10605 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 213 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 10606 "00000100" // JL #9728 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9728 delay_slots=5 */ + 10607 "00000001" // /* MW 5 */ + 10608 "00000000" // /* MW 4 */ + 10609 "00000000" // /* MW 3 */ + 10610 "00010011" // /* MW 2 */ + 10611 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10613 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10615 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10616 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10617 "00110001" // /* MW 3 */ + 10618 "00100000" // /* MW 2 */ + 10619 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 10620 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10621 "00000101" // /* MW 3 */ + 10622 "00100000" // /* MW 2 */ + 10623 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 10624 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10625 "00000000" // /* MW 15 */ + 10626 "00000000" // /* MW 14 */ + 10627 "01111000" // /* MW 13 */ + 10628 "10100101" // /* MW 12 */ + 10629 "00000001" // /* MW 11 */ + 10630 "00000000" // /* MW 10 */ + 10631 "00000000" // /* MW 9 */ + 10632 "10000000" // /* MW 8 */ + 10633 "00010001" // /* MW 7 */ + 10634 "00000110" // /* MW 6 */ + 10635 "00100010" // /* MW 5 */ + 10636 "00000000" // /* MW 4 */ + 10637 "11110000" // /* MW 3 */ + 10638 "00101100" // /* MW 2 */ + 10639 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 217 18 +.return_address + 10640 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10641 "10100000" // /* MW 5 */ + 10642 "11001000" // /* MW 4 */ + 10643 "11000100" // /* MW 3 */ + 10644 "00000111" // /* MW 2 */ + 10645 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 217 18 first +.src_ref 7 "superkernels.cpp" 217 65 + 10646 "10111010" // LDA r16, [p2]; MOVXM p2, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10647 "00010000" // /* MW 9 */ + 10648 "10000000" // /* MW 8 */ + 10649 "00110010" // /* MW 7 */ + 10650 "11110001" // /* MW 6 */ + 10651 "00000001" // /* MW 5 */ + 10652 "00000000" // /* MW 4 */ + 10653 "11010000" // /* MW 3 */ + 10654 "11000010" // /* MW 2 */ + 10655 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 215 51 +.src_ref 7 "superkernels.cpp" 217 65 +.src_ref 7 "superkernels.cpp" 224 2 + 10656 "10111010" // LDA r17, [p2]; MOVXM p2, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10657 "00010000" // /* MW 9 */ + 10658 "10000000" // /* MW 8 */ + 10659 "00110010" // /* MW 7 */ + 10660 "11110001" // /* MW 6 */ + 10661 "00000001" // /* MW 5 */ + 10662 "00000000" // /* MW 4 */ + 10663 "11010000" // /* MW 3 */ + 10664 "11000110" // /* MW 2 */ + 10665 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 215 51 first +.src_ref 7 "superkernels.cpp" 217 16 +.src_ref 7 "superkernels.cpp" 222 47 + 10666 "10111010" // LDA.u16 r18, [p2, #10]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10667 "00010000" // /* MW 9 */ + 10668 "00101010" // /* MW 8 */ + 10669 "10110010" // /* MW 7 */ + 10670 "11110000" // /* MW 6 */ + 10671 "00000001" // /* MW 5 */ + 10672 "00000000" // /* MW 4 */ + 10673 "01010000" // /* MW 3 */ + 10674 "11001011" // /* MW 2 */ + 10675 "01001010" // /* MW 1 */ + 10676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10677 "00000000" // /* MW 1 */ + 10678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10679 "00000000" // /* MW 1 */ + 10680 "10000100" // J #10736 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10736 delay_slots=5 */ + 10681 "00000000" // /* MW 5 */ + 10682 "00000000" // /* MW 4 */ + 10683 "11111000" // /* MW 3 */ + 10684 "00010100" // /* MW 2 */ + 10685 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 215 13 +.delay_slot + 10686 "01000100" // MOVXM p0, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10687 "11000000" // /* MW 5 */ + 10688 "11001000" // /* MW 4 */ + 10689 "11000000" // /* MW 3 */ + 10690 "00000111" // /* MW 2 */ + 10691 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10693 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 217 27 first +.delay_slot + 10694 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10695 "00001111" // /* MW 3 */ + 10696 "01100001" // /* MW 2 */ + 10697 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 215 13 first +.delay_slot + 10698 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10699 "10100011" // /* MW 5 */ + 10700 "00001100" // /* MW 4 */ + 10701 "11110000" // /* MW 3 */ + 10702 "00101100" // /* MW 2 */ + 10703 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 217 16 first +.delay_slot + 10704 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10705 "00000000" // /* MW 15 */ + 10706 "00000000" // /* MW 14 */ + 10707 "01111000" // /* MW 13 */ + 10708 "10100101" // /* MW 12 */ + 10709 "00000001" // /* MW 11 */ + 10710 "00000000" // /* MW 10 */ + 10711 "00000000" // /* MW 9 */ + 10712 "10000000" // /* MW 8 */ + 10713 "00010001" // /* MW 7 */ + 10714 "00000110" // /* MW 6 */ + 10715 "00100001" // /* MW 5 */ + 10716 "00000000" // /* MW 4 */ + 10717 "11110000" // /* MW 3 */ + 10718 "00101100" // /* MW 2 */ + 10719 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 7 "superkernels.cpp" 222 47 +.src_ref 7 "superkernels.cpp" 224 2 + 10720 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10721 "00000000" // /* MW 15 */ + 10722 "00000000" // /* MW 14 */ + 10723 "00010000" // /* MW 13 */ + 10724 "00101010" // /* MW 12 */ + 10725 "10110010" // /* MW 11 */ + 10726 "11110000" // /* MW 10 */ + 10727 "00000001" // /* MW 9 */ + 10728 "00000000" // /* MW 8 */ + 10729 "10001011" // /* MW 7 */ + 10730 "10000000" // /* MW 6 */ + 10731 "00100010" // /* MW 5 */ + 10732 "00000000" // /* MW 4 */ + 10733 "11110000" // /* MW 3 */ + 10734 "00101100" // /* MW 2 */ + 10735 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 218 49 first + 10736 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10737 "00000000" // /* MW 7 */ + 10738 "11000011" // /* MW 6 */ + 10739 "10110011" // /* MW 5 */ + 10740 "00000011" // /* MW 4 */ + 10741 "01100000" // /* MW 3 */ + 10742 "10010001" // /* MW 2 */ + 10743 "01110011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 221 2 +.src_ref 1 "io_buffer_main.h" 218 49 + 10744 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10745 "00010000" // /* MW 9 */ + 10746 "00100000" // /* MW 8 */ + 10747 "00110010" // /* MW 7 */ + 10748 "11110000" // /* MW 6 */ + 10749 "00000001" // /* MW 5 */ + 10750 "00000000" // /* MW 4 */ + 10751 "11010000" // /* MW 3 */ + 10752 "11101110" // /* MW 2 */ + 10753 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 10754 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10755 "00010110" // /* MW 3 */ + 10756 "11111110" // /* MW 2 */ + 10757 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 10758 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10759 "00110110" // /* MW 3 */ + 10760 "11111110" // /* MW 2 */ + 10761 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 10762 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10763 "01010110" // /* MW 3 */ + 10764 "01000110" // /* MW 2 */ + 10765 "00000111" // /* MW 1 */ + 10766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10767 "00000000" // /* MW 1 */ + 10768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10769 "00000000" // /* MW 1 */ + 10770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10771 "00000000" // /* MW 1 */ + 10772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10773 "00000000" // /* MW 1 */ + 10774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10775 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 10776 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10777 "00000010" // /* MW 3 */ + 10778 "01100001" // /* MW 2 */ + 10779 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 10780 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10781 "00010001" // /* MW 3 */ + 10782 "00000110" // /* MW 2 */ + 10783 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 + 10784 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10785 "11111101" // /* MW 3 */ + 10786 "11100000" // /* MW 2 */ + 10787 "00010111" // /* MW 1 */ + 10788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10789 "00000000" // /* MW 1 */ + 10790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10791 "00000000" // /* MW 1 */ + 10792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10793 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 10794 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10795 "00001000" // /* MW 3 */ + 10796 "10010011" // /* MW 2 */ + 10797 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 222 45 + 10798 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10799 "10000001" // /* MW 5 */ + 10800 "10101101" // /* MW 4 */ + 10801 "10100111" // /* MW 3 */ + 10802 "00000000" // /* MW 2 */ + 10803 "00000100" // /* MW 1 */ + 10804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10805 "00000000" // /* MW 1 */ + 10806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10807 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 221 2 first + 10808 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10809 "00110110" // /* MW 3 */ + 10810 "00000110" // /* MW 2 */ + 10811 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 1 "io_buffer_main.h" 324 51 + 10812 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10813 "10000001" // /* MW 5 */ + 10814 "11011101" // /* MW 4 */ + 10815 "11011100" // /* MW 3 */ + 10816 "11001010" // /* MW 2 */ + 10817 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 222 47 first + 10818 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10819 "01110110" // /* MW 3 */ + 10820 "00000110" // /* MW 2 */ + 10821 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 10822 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10823 "10011110" // /* MW 3 */ + 10824 "01011100" // /* MW 2 */ + 10825 "00000111" // /* MW 1 */ + 10826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10827 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 224 2 first +.no_stack_arguments + 10828 "00000100" // JL #10352 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10352 delay_slots=5 */ + 10829 "00000001" // /* MW 5 */ + 10830 "00000000" // /* MW 4 */ + 10831 "00111000" // /* MW 3 */ + 10832 "00010100" // /* MW 2 */ + 10833 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10835 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 221 2 first +.delay_slot + 10836 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10837 "00000111" // /* MW 3 */ + 10838 "01100010" // /* MW 2 */ + 10839 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 221 2 +.delay_slot + 10840 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10841 "00110001" // /* MW 3 */ + 10842 "00000110" // /* MW 2 */ + 10843 "00001000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 222 45 first +.delay_slot + 10844 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10845 "00001101" // /* MW 3 */ + 10846 "11100001" // /* MW 2 */ + 10847 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 222 45 +.delay_slot + 10848 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10849 "00000000" // /* MW 15 */ + 10850 "00000000" // /* MW 14 */ + 10851 "10101000" // /* MW 13 */ + 10852 "10100000" // /* MW 12 */ + 10853 "00110100" // /* MW 11 */ + 10854 "00000000" // /* MW 10 */ + 10855 "00000000" // /* MW 9 */ + 10856 "00000000" // /* MW 8 */ + 10857 "01011011" // /* MW 7 */ + 10858 "00000001" // /* MW 6 */ + 10859 "00100000" // /* MW 5 */ + 10860 "00000000" // /* MW 4 */ + 10861 "11110000" // /* MW 3 */ + 10862 "00101100" // /* MW 2 */ + 10863 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 6 +.src_ref 7 "superkernels.cpp" 227 14 +.src_ref 1 "io_buffer_main.h" 324 51 first +.return_address + 10864 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10865 "00010000" // /* MW 9 */ + 10866 "00100000" // /* MW 8 */ + 10867 "00110010" // /* MW 7 */ + 10868 "11110011" // /* MW 6 */ + 10869 "00000001" // /* MW 5 */ + 10870 "00000000" // /* MW 4 */ + 10871 "11010000" // /* MW 3 */ + 10872 "11000110" // /* MW 2 */ + 10873 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 + 10874 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10875 "00000101" // /* MW 3 */ + 10876 "00100000" // /* MW 2 */ + 10877 "00010000" // /* MW 1 */ + 10878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10879 "00000000" // /* MW 1 */ + 10880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10881 "00000000" // /* MW 1 */ + 10882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10883 "00000000" // /* MW 1 */ + 10884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10885 "00000000" // /* MW 1 */ + 10886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10887 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 10888 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10889 "00001000" // /* MW 3 */ + 10890 "01010001" // /* MW 2 */ + 10891 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 19 +.src_ref 1 "io_buffer_main.h" 327 40 first + 10892 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10893 "00010000" // /* MW 9 */ + 10894 "00110000" // /* MW 8 */ + 10895 "00110010" // /* MW 7 */ + 10896 "11110001" // /* MW 6 */ + 10897 "00000001" // /* MW 5 */ + 10898 "00000000" // /* MW 4 */ + 10899 "11010000" // /* MW 3 */ + 10900 "11001110" // /* MW 2 */ + 10901 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 6 first + 10902 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10903 "00110110" // /* MW 3 */ + 10904 "00000110" // /* MW 2 */ + 10905 "00000110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 19 + 10906 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10907 "01010110" // /* MW 3 */ + 10908 "00000110" // /* MW 2 */ + 10909 "00000010" // /* MW 1 */ + 10910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10911 "00000000" // /* MW 1 */ + 10912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10913 "00000000" // /* MW 1 */ + 10914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10915 "00000000" // /* MW 1 */ + 10916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10917 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 10918 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10919 "00110001" // /* MW 3 */ + 10920 "00100001" // /* MW 2 */ + 10921 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 10922 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10923 "00010001" // /* MW 3 */ + 10924 "11100110" // /* MW 2 */ + 10925 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 16 first + 10926 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10927 "00101000" // /* MW 3 */ + 10928 "01100001" // /* MW 2 */ + 10929 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 6 + 10930 "10000100" // JNZ r16, #10960 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10960 delay_slots=5 */ + 10931 "00000001" // /* MW 5 */ + 10932 "01000000" // /* MW 4 */ + 10933 "01101000" // /* MW 3 */ + 10934 "00010101" // /* MW 2 */ + 10935 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10937 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10939 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10941 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10943 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10945 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 227 14 + 10946 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10947 "00000001" // /* MW 3 */ + 10948 "00100000" // /* MW 2 */ + 10949 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 227 14 first + 10950 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10951 "00000000" // /* MW 9 */ + 10952 "00000000" // /* MW 8 */ + 10953 "00000000" // /* MW 7 */ + 10954 "10000000" // /* MW 6 */ + 10955 "00010001" // /* MW 5 */ + 10956 "00000110" // /* MW 4 */ + 10957 "11110110" // /* MW 3 */ + 10958 "00101100" // /* MW 2 */ + 10959 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 7 "superkernels.cpp" 229 + 10960 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10961 "00111001" // /* MW 3 */ + 10962 "11110100" // /* MW 2 */ + 10963 "00000111" // /* MW 1 */ + 10964 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10965 "00011001" // /* MW 3 */ + 10966 "11111011" // /* MW 2 */ + 10967 "00000111" // /* MW 1 */ + 10968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10969 "00000000" // /* MW 1 */ + 10970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10971 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 10972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10973 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10974 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10975 "11110001" // /* MW 3 */ + 10976 "11111101" // /* MW 2 */ + 10977 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10979 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 229 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10980 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10981 "00000000" // /* MW 3 */ + 10982 "00101000" // /* MW 2 */ + 10983 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10984 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10985 "10100000" // /* MW 3 */ + 10986 "01100111" // /* MW 2 */ + 10987 "00011111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 229 +.delay_slot + 10988 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10989 "00000001" // /* MW 5 */ + 10990 "00000000" // /* MW 4 */ + 10991 "00000000" // /* MW 3 */ + 10992 "11111000" // /* MW 2 */ + 10993 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10995 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10997 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 10999 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv +.function shared_setup_backbone _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv +.src_ref 3 "elementwise_binary_shared.h" 205 first +.src_ref 3 "elementwise_binary_shared.h" 211 24 first +.src_ref 3 "elementwise_binary_shared.h" 216 36 +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.function_start + 11008 "10111010" // LDA el0, [p1], #4; MOVX r2, #256; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11009 "01011000" // /* MW 9 */ + 11010 "00000000" // /* MW 8 */ + 11011 "00001000" // /* MW 7 */ + 11012 "00001011" // /* MW 6 */ + 11013 "00100000" // /* MW 5 */ + 11014 "00001000" // /* MW 4 */ + 11015 "11010000" // /* MW 3 */ + 11016 "10000101" // /* MW 2 */ + 11017 "00100011" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 36 + 11018 "00011000" // MOVX r0, #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11019 "00000001" // /* MW 3 */ + 11020 "10000000" // /* MW 2 */ + 11021 "00010111" // /* MW 1 */ + 11022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11023 "00000000" // /* MW 1 */ + 11024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11025 "00000000" // /* MW 1 */ + 11026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11027 "00000000" // /* MW 1 */ + 11028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11029 "00000000" // /* MW 1 */ + 11030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11031 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 211 22 first + 11032 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11033 "00101001" // /* MW 3 */ + 11034 "00011100" // /* MW 2 */ + 11035 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 212 24 first + 11036 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11037 "00101110" // /* MW 3 */ + 11038 "00011100" // /* MW 2 */ + 11039 "00000001" // /* MW 1 */ + 11040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11041 "00000000" // /* MW 1 */ + 11042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11043 "00000000" // /* MW 1 */ + 11044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11045 "00000000" // /* MW 1 */ + 11046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11047 "00000000" // /* MW 1 */ + 11048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11049 "00000000" // /* MW 1 */ + 11050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11051 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 212 22 + 11052 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11053 "00101001" // /* MW 3 */ + 11054 "00011100" // /* MW 2 */ + 11055 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 213 24 first + 11056 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11057 "00101110" // /* MW 3 */ + 11058 "00000100" // /* MW 2 */ + 11059 "00000001" // /* MW 1 */ + 11060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11061 "00000000" // /* MW 1 */ + 11062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11063 "00000000" // /* MW 1 */ + 11064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11065 "00000000" // /* MW 1 */ + 11066 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11067 "00000000" // /* MW 1 */ + 11068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11069 "00000000" // /* MW 1 */ + 11070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11071 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 213 22 + 11072 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11073 "00101001" // /* MW 3 */ + 11074 "00011100" // /* MW 2 */ + 11075 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 214 24 first + 11076 "10011000" // LDA r3, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11077 "01110110" // /* MW 3 */ + 11078 "00010100" // /* MW 2 */ + 11079 "00000001" // /* MW 1 */ + 11080 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11081 "00000000" // /* MW 1 */ + 11082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11083 "00000000" // /* MW 1 */ + 11084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11085 "00000000" // /* MW 1 */ + 11086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11087 "00000000" // /* MW 1 */ + 11088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11089 "00000000" // /* MW 1 */ + 11090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11091 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 214 22 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11092 "10011000" // ST r3, [p0], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11093 "01110001" // /* MW 3 */ + 11094 "01001100" // /* MW 2 */ + 11095 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 34 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11096 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11097 "00010111" // /* MW 3 */ + 11098 "00000100" // /* MW 2 */ + 11099 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 217 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11100 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11101 "00000000" // /* MW 3 */ + 11102 "00101000" // /* MW 2 */ + 11103 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11104 "01000100" // MOVXM r1, #65280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11105 "00000000" // /* MW 5 */ + 11106 "10111110" // /* MW 4 */ + 11107 "11110000" // /* MW 3 */ + 11108 "00000000" // /* MW 2 */ + 11109 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11110 "10011000" // AND r1, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11111 "00010100" // /* MW 3 */ + 11112 "11000010" // /* MW 2 */ + 11113 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11114 "10011000" // EQ r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11115 "00100111" // /* MW 3 */ + 11116 "01110110" // /* MW 2 */ + 11117 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 36 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11118 "00011000" // SEL.EQZ r0, r0, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11119 "10000010" // /* MW 3 */ + 11120 "00000001" // /* MW 2 */ + 11121 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_end0 + 11123 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 3 "elementwise_binary_shared.h" 219 +.src_ref 3 "elementwise_binary_shared.h" 219 first +.function_start + 11136 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11137 "00000001" // /* MW 5 */ + 11138 "00000000" // /* MW 4 */ + 11139 "00000000" // /* MW 3 */ + 11140 "00001000" // /* MW 2 */ + 11141 "00000000" // /* MW 1 */ + 11142 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11143 "00111101" // /* MW 3 */ + 11144 "11111000" // /* MW 2 */ + 11145 "00001111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 220 8 first +.no_stack_arguments + 11146 "00000100" // JL #11008 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11008 delay_slots=5 */ + 11147 "00000001" // /* MW 5 */ + 11148 "00000000" // /* MW 4 */ + 11149 "10000000" // /* MW 3 */ + 11150 "00010101" // /* MW 2 */ + 11151 "00000000" // /* MW 1 */ +.delay_slot + 11152 "11111000" // MOV r0, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11153 "10100000" // /* MW 3 */ + 11154 "00010111" // /* MW 2 */ + 11155 "00011000" // /* MW 1 */ +.delay_slot + 11156 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11157 "00010101" // /* MW 3 */ + 11158 "11111100" // /* MW 2 */ + 11159 "00001111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 220 8 +.delay_slot + 11160 "11111000" // MOV r15, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11161 "11000000" // /* MW 3 */ + 11162 "11010000" // /* MW 2 */ + 11163 "00011011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11165 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11167 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 220 8 +.src_ref 3 "elementwise_binary_shared.h" 222 4 +.src_ref 8 "add_impl.h" 146 29 +.return_address + 11168 "10111010" // LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11169 "00001000" // /* MW 9 */ + 11170 "11000100" // /* MW 8 */ + 11171 "00110011" // /* MW 7 */ + 11172 "01101000" // /* MW 6 */ + 11173 "00000000" // /* MW 5 */ + 11174 "00000001" // /* MW 4 */ + 11175 "00100000" // /* MW 3 */ + 11176 "00000111" // /* MW 2 */ + 11177 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 146 29 +.src_ref 8 "add_impl.h" 147 37 +.src_ref 8 "add_impl.h" 147 39 + 11178 "10111010" // MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11179 "01011000" // /* MW 9 */ + 11180 "11111101" // /* MW 8 */ + 11181 "00000111" // /* MW 7 */ + 11182 "00001000" // /* MW 6 */ + 11183 "10000000" // /* MW 5 */ + 11184 "00000001" // /* MW 4 */ + 11185 "10000000" // /* MW 3 */ + 11186 "11100010" // /* MW 2 */ + 11187 "00000001" // /* MW 1 */ +.src_ref 8 "add_impl.h" 146 29 first +.src_ref 8 "add_impl.h" 147 39 + 11188 "01111010" // LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11189 "00000001" // /* MW 9 */ + 11190 "10100000" // /* MW 8 */ + 11191 "00000111" // /* MW 7 */ + 11192 "10000000" // /* MW 6 */ + 11193 "00010001" // /* MW 5 */ + 11194 "00001010" // /* MW 4 */ + 11195 "00100000" // /* MW 3 */ + 11196 "10111110" // /* MW 2 */ + 11197 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 50 first + 11198 "10011000" // LDA.u8 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11199 "01001010" // /* MW 3 */ + 11200 "00000110" // /* MW 2 */ + 11201 "00000000" // /* MW 1 */ + 11202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11203 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11205 "00000000" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 37 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11206 "00011000" // ST.s16 r16, [p0, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11207 "00010111" // /* MW 3 */ + 11208 "00000010" // /* MW 2 */ + 11209 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 222 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11210 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11211 "00000000" // /* MW 3 */ + 11212 "00101000" // /* MW 2 */ + 11213 "00010000" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 54 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11214 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11215 "00000101" // /* MW 3 */ + 11216 "00100010" // /* MW 2 */ + 11217 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 222 4 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11218 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11219 "00000001" // /* MW 5 */ + 11220 "00000000" // /* MW 4 */ + 11221 "00000000" // /* MW 3 */ + 11222 "11111000" // /* MW 2 */ + 11223 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 54 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11224 "10011000" // EQ r27, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11225 "00100111" // /* MW 3 */ + 11226 "01110111" // /* MW 2 */ + 11227 "00010100" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 39 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11228 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11229 "10000010" // /* MW 3 */ + 11230 "00100001" // /* MW 2 */ + 11231 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 11233 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.src_ref 3 "elementwise_binary_shared.h" 227 first +.src_ref 3 "elementwise_binary_shared.h" 232 8 first +.tail_call +.function_start + 11248 "10000100" // J #9808 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=9808 delay_slots=5 */ + 11249 "00000000" // /* MW 5 */ + 11250 "00000000" // /* MW 4 */ + 11251 "00101000" // /* MW 3 */ + 11252 "00010011" // /* MW 2 */ + 11253 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11255 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11256 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11257 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11259 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11261 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 11263 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.src_ref 3 "elementwise_binary.h" 141 first +.src_ref 3 "elementwise_binary.h" 142 23 +.src_ref 3 "elementwise_binary.h" 144 4 first +.function_start + 11264 "01100100" // RET lr; MOV r0, #64 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 11265 "00000001" // /* MW 5 */ + 11266 "00100001" // /* MW 4 */ + 11267 "00000000" // /* MW 3 */ + 11268 "00000000" // /* MW 2 */ + 11269 "00000101" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 141 +.delay_slot + 11270 "11111000" // MOV r1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11271 "11000000" // /* MW 3 */ + 11272 "01010000" // /* MW 2 */ + 11273 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 141 +.delay_slot + 11274 "00011000" // ADD.NC p0, r1, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11275 "10010000" // /* MW 3 */ + 11276 "01100000" // /* MW 2 */ + 11277 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 142 23 first +.delay_slot + 11278 "10011000" // ST r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11279 "00010001" // /* MW 3 */ + 11280 "00000100" // /* MW 2 */ + 11281 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 142 23 +.delay_slot + 11282 "10011000" // ST r0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11283 "00010001" // /* MW 3 */ + 11284 "00010100" // /* MW 2 */ + 11285 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_end0 + 11287 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.src_ref 3 "elementwise_binary.h" 130 first +.src_ref 3 "elementwise_binary.h" 133 24 first +.function_start + 11296 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11297 "00101110" // /* MW 3 */ + 11298 "00011100" // /* MW 2 */ + 11299 "00000001" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 130 + 11300 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11301 "00000001" // /* MW 5 */ + 11302 "00000000" // /* MW 4 */ + 11303 "00000000" // /* MW 3 */ + 11304 "00001000" // /* MW 2 */ + 11305 "00000000" // /* MW 1 */ + 11306 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11307 "00111101" // /* MW 3 */ + 11308 "11111100" // /* MW 2 */ + 11309 "00001111" // /* MW 1 */ + 11310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11311 "00000000" // /* MW 1 */ + 11312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11313 "00000000" // /* MW 1 */ + 11314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11315 "00000000" // /* MW 1 */ + 11316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11317 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 133 22 first + 11318 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11319 "00101001" // /* MW 3 */ + 11320 "00011100" // /* MW 2 */ + 11321 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 134 24 first + 11322 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11323 "00101110" // /* MW 3 */ + 11324 "00011100" // /* MW 2 */ + 11325 "00000001" // /* MW 1 */ + 11326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11327 "00000000" // /* MW 1 */ + 11328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11329 "00000000" // /* MW 1 */ + 11330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11331 "00000000" // /* MW 1 */ + 11332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11333 "00000000" // /* MW 1 */ + 11334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11335 "00000000" // /* MW 1 */ + 11336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11337 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 134 22 + 11338 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11339 "00101001" // /* MW 3 */ + 11340 "00011100" // /* MW 2 */ + 11341 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 135 24 first + 11342 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11343 "00101110" // /* MW 3 */ + 11344 "00000100" // /* MW 2 */ + 11345 "00000001" // /* MW 1 */ + 11346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11347 "00000000" // /* MW 1 */ + 11348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11349 "00000000" // /* MW 1 */ + 11350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11351 "00000000" // /* MW 1 */ + 11352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11353 "00000000" // /* MW 1 */ + 11354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11355 "00000000" // /* MW 1 */ + 11356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11357 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 135 22 + 11358 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11359 "00101001" // /* MW 3 */ + 11360 "00011100" // /* MW 2 */ + 11361 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 136 24 first + 11362 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11363 "00101110" // /* MW 3 */ + 11364 "00010100" // /* MW 2 */ + 11365 "00000001" // /* MW 1 */ + 11366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11367 "00000000" // /* MW 1 */ + 11368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11369 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 137 8 first +.no_stack_arguments + 11370 "00000100" // JL #11264 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11264 delay_slots=5 */ + 11371 "00000001" // /* MW 5 */ + 11372 "00000000" // /* MW 4 */ + 11373 "00000000" // /* MW 3 */ + 11374 "00010110" // /* MW 2 */ + 11375 "00000000" // /* MW 1 */ +.delay_slot + 11376 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11377 "10011101" // /* MW 3 */ + 11378 "11111011" // /* MW 2 */ + 11379 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11381 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11383 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 136 22 first +.delay_slot + 11384 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11385 "00101001" // /* MW 3 */ + 11386 "11011100" // /* MW 2 */ + 11387 "00001000" // /* MW 1 */ +.src_ref 8 "mul_impl.h" 134 25 +.delay_slot + 11388 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11389 "11000000" // /* MW 3 */ + 11390 "01100000" // /* MW 2 */ + 11391 "00011111" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 +.return_address + 11392 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11393 "00111001" // /* MW 3 */ + 11394 "11111100" // /* MW 2 */ + 11395 "00000111" // /* MW 1 */ + 11396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11397 "00000000" // /* MW 1 */ + 11398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11399 "00000000" // /* MW 1 */ + 11400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11401 "00000000" // /* MW 1 */ + 11402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11403 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11405 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11406 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11407 "10011001" // /* MW 3 */ + 11408 "11111011" // /* MW 2 */ + 11409 "00000111" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11410 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11411 "00000000" // /* MW 3 */ + 11412 "00101000" // /* MW 2 */ + 11413 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11415 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11417 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11419 "00000000" // /* MW 1 */ +.src_ref 8 "mul_impl.h" 134 25 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11420 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11421 "00000001" // /* MW 3 */ + 11422 "00100000" // /* MW 2 */ + 11423 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 +.src_ref 8 "mul_impl.h" 134 25 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11424 "00111010" // ST r16, [p7, #16]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11425 "01110001" // /* MW 9 */ + 11426 "00000000" // /* MW 8 */ + 11427 "00000000" // /* MW 7 */ + 11428 "00000000" // /* MW 6 */ + 11429 "11111110" // /* MW 5 */ + 11430 "00111111" // /* MW 4 */ + 11431 "00110000" // /* MW 3 */ + 11432 "11000010" // /* MW 2 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + 11433 "11101000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.function run _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.src_ref 3 "elementwise_binary.h" 149 first +.src_ref 3 "elementwise_binary.h" 156 37 +.src_ref 3 "elementwise_binary.h" 168 8 first +.function_start + 11440 "10111010" // MOVA m0, #32; MOVXM ls, #11616 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11441 "00010000" // /* MW 9 */ + 11442 "10110000" // /* MW 8 */ + 11443 "01111110" // /* MW 7 */ + 11444 "00001000" // /* MW 6 */ + 11445 "00000000" // /* MW 5 */ + 11446 "00000000" // /* MW 4 */ + 11447 "10000000" // /* MW 3 */ + 11448 "00000000" // /* MW 2 */ + 11449 "00000100" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 156 37 first +.src_ref 3 "elementwise_binary.h" 168 8 first + 11450 "10111010" // LDA r3, [p3], m0; MOVXM le, #11632 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11451 "00010000" // /* MW 9 */ + 11452 "10111000" // /* MW 8 */ + 11453 "10111110" // /* MW 7 */ + 11454 "00001001" // /* MW 6 */ + 11455 "00000000" // /* MW 5 */ + 11456 "00000000" // /* MW 4 */ + 11457 "11010000" // /* MW 3 */ + 11458 "00001110" // /* MW 2 */ + 11459 "01100001" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 156 78 +.src_ref 3 "elementwise_binary.h" 156 78 + 11460 "10111010" // LDA m1, [p3]; MOVX r1, #-6; MOV r0, #828 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11461 "01011000" // /* MW 9 */ + 11462 "00111100" // /* MW 8 */ + 11463 "00001011" // /* MW 7 */ + 11464 "01001000" // /* MW 6 */ + 11465 "00010111" // /* MW 5 */ + 11466 "00111110" // /* MW 4 */ + 11467 "11010000" // /* MW 3 */ + 11468 "10010000" // /* MW 2 */ + 11469 "01100000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 156 78 +.src_ref 3 "elementwise_binary.h" 156 78 + 11470 "10111010" // LDA m0, [p3, #4]; MOVXM p4, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11471 "00010000" // /* MW 9 */ + 11472 "00110100" // /* MW 8 */ + 11473 "00110010" // /* MW 7 */ + 11474 "11110010" // /* MW 6 */ + 11475 "00000001" // /* MW 5 */ + 11476 "00000000" // /* MW 4 */ + 11477 "11010000" // /* MW 3 */ + 11478 "10000000" // /* MW 2 */ + 11479 "01100010" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 156 78 + 11480 "10011000" // LDA.s8 r2, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11481 "01000010" // /* MW 3 */ + 11482 "00000100" // /* MW 2 */ + 11483 "00000100" // /* MW 1 */ + 11484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11485 "00000000" // /* MW 1 */ + 11486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11487 "00000000" // /* MW 1 */ + 11488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11489 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 156 78 + 11490 "10011000" // LSHL r1, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11491 "00011101" // /* MW 3 */ + 11492 "11000010" // /* MW 2 */ + 11493 "00010000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary.h" 168 8 +.src_ref 3 "elementwise_binary.h" 187 20 first + 11494 "00110100" // VLDB x1, [p0], m1; ADD.NC lc, r1, #-7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11495 "11111001" // /* MW 5 */ + 11496 "11100001" // /* MW 4 */ + 11497 "10001010" // /* MW 3 */ + 11498 "00001110" // /* MW 2 */ + 11499 "00000101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary.h" 189 20 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11500 "00111100" // VLDA x2, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11501 "01101000" // /* MW 5 */ + 11502 "01010000" // /* MW 4 */ + 11503 "01110000" // /* MW 3 */ + 11504 "00010011" // /* MW 2 */ + 11505 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 195 20 +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11506 "00010010" // VLDA x3, [p1], m0; VLDB x1, [p0], m1; MOVX crRnd, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11507 "10000000" // /* MW 7 */ + 11508 "10111010" // /* MW 6 */ + 11509 "11101000" // /* MW 5 */ + 11510 "01010000" // /* MW 4 */ + 11511 "01110000" // /* MW 3 */ + 11512 "00011011" // /* MW 2 */ + 11513 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary.h" 189 20 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11514 "00111100" // VLDA x2, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11515 "01101000" // /* MW 5 */ + 11516 "01010000" // /* MW 4 */ + 11517 "01110000" // /* MW 3 */ + 11518 "00010011" // /* MW 2 */ + 11519 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11520 "00111100" // VLDA x3, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11521 "11101000" // /* MW 5 */ + 11522 "01010000" // /* MW 4 */ + 11523 "01110000" // /* MW 3 */ + 11524 "00011011" // /* MW 2 */ + 11525 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11526 "10011000" // VLDA x2, [p1], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11527 "10011011" // /* MW 3 */ + 11528 "00001000" // /* MW 2 */ + 11529 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11530 "00111100" // VLDA x3, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11531 "01101000" // /* MW 5 */ + 11532 "01010000" // /* MW 4 */ + 11533 "01110000" // /* MW 3 */ + 11534 "00011011" // /* MW 2 */ + 11535 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11536 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11537 "11101000" // /* MW 5 */ + 11538 "01010000" // /* MW 4 */ + 11539 "01110000" // /* MW 3 */ + 11540 "00010011" // /* MW 2 */ + 11541 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11542 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11543 "01000001" // /* MW 9 */ + 11544 "11100010" // /* MW 8 */ + 11545 "00000000" // /* MW 7 */ + 11546 "00011101" // /* MW 6 */ + 11547 "00110100" // /* MW 5 */ + 11548 "00101000" // /* MW 4 */ + 11549 "01110000" // /* MW 3 */ + 11550 "00011011" // /* MW 2 */ + 11551 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11552 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11553 "01100001" // /* MW 9 */ + 11554 "11100000" // /* MW 8 */ + 11555 "00000001" // /* MW 7 */ + 11556 "00011101" // /* MW 6 */ + 11557 "01110100" // /* MW 5 */ + 11558 "00101000" // /* MW 4 */ + 11559 "01110000" // /* MW 3 */ + 11560 "00010011" // /* MW 2 */ + 11561 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11562 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11563 "01000001" // /* MW 9 */ + 11564 "11100010" // /* MW 8 */ + 11565 "00000000" // /* MW 7 */ + 11566 "00011101" // /* MW 6 */ + 11567 "00110100" // /* MW 5 */ + 11568 "00101000" // /* MW 4 */ + 11569 "01110000" // /* MW 3 */ + 11570 "00011011" // /* MW 2 */ + 11571 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11572 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11573 "01100001" // /* MW 9 */ + 11574 "11100000" // /* MW 8 */ + 11575 "00000001" // /* MW 7 */ + 11576 "00011101" // /* MW 6 */ + 11577 "01110100" // /* MW 5 */ + 11578 "00101000" // /* MW 4 */ + 11579 "01110000" // /* MW 3 */ + 11580 "00010011" // /* MW 2 */ + 11581 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11582 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11583 "01000001" // /* MW 9 */ + 11584 "11100010" // /* MW 8 */ + 11585 "00000000" // /* MW 7 */ + 11586 "00011101" // /* MW 6 */ + 11587 "00110100" // /* MW 5 */ + 11588 "00101000" // /* MW 4 */ + 11589 "01110000" // /* MW 3 */ + 11590 "00011011" // /* MW 2 */ + 11591 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11592 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11593 "01100001" // /* MW 9 */ + 11594 "11100000" // /* MW 8 */ + 11595 "00000001" // /* MW 7 */ + 11596 "00011101" // /* MW 6 */ + 11597 "01110100" // /* MW 5 */ + 11598 "00101000" // /* MW 4 */ + 11599 "01110000" // /* MW 3 */ + 11600 "00010011" // /* MW 2 */ + 11601 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11602 "01101110" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; VMUL.f dm0, x1, x2, r0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 11603 "01000001" // /* MW 13 */ + 11604 "11100010" // /* MW 12 */ + 11605 "00000000" // /* MW 11 */ + 11606 "10001100" // /* MW 10 */ + 11607 "01110000" // /* MW 9 */ + 11608 "00001000" // /* MW 8 */ + 11609 "00000000" // /* MW 7 */ + 11610 "00000000" // /* MW 6 */ + 11611 "01101000" // /* MW 5 */ + 11612 "01010000" // /* MW 4 */ + 11613 "01110000" // /* MW 3 */ + 11614 "00011011" // /* MW 2 */ + 11615 "00100001" // /* MW 1 */ +.label ZLS_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_176 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.src_ref 3 "elementwise_binary.h" 195 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 11616 "00001011" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; VMUL.f dm1, x0, x3, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11617 "00000011" // /* MW 15 */ + 11618 "00001111" // /* MW 14 */ + 11619 "01111000" // /* MW 13 */ + 11620 "10100101" // /* MW 12 */ + 11621 "00000001" // /* MW 11 */ + 11622 "00000000" // /* MW 10 */ + 11623 "00000000" // /* MW 9 */ + 11624 "00000000" // /* MW 8 */ + 11625 "10100011" // /* MW 7 */ + 11626 "00011100" // /* MW 6 */ + 11627 "11101010" // /* MW 5 */ + 11628 "01010000" // /* MW 4 */ + 11629 "01110000" // /* MW 3 */ + 11630 "00010011" // /* MW 2 */ + 11631 "00100001" // /* MW 1 */ +.label ZLE_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_192 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11632 "00001011" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11633 "00010010" // /* MW 15 */ + 11634 "00000111" // /* MW 14 */ + 11635 "01111000" // /* MW 13 */ + 11636 "10100101" // /* MW 12 */ + 11637 "00000001" // /* MW 11 */ + 11638 "00000000" // /* MW 10 */ + 11639 "00000000" // /* MW 9 */ + 11640 "00000000" // /* MW 8 */ + 11641 "00100011" // /* MW 7 */ + 11642 "00011100" // /* MW 6 */ + 11643 "01101010" // /* MW 5 */ + 11644 "01010000" // /* MW 4 */ + 11645 "01110000" // /* MW 3 */ + 11646 "00011011" // /* MW 2 */ + 11647 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 11648 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11649 "01100001" // /* MW 7 */ + 11650 "11100000" // /* MW 6 */ + 11651 "00000001" // /* MW 5 */ + 11652 "00000010" // /* MW 4 */ + 11653 "01100000" // /* MW 3 */ + 11654 "10010100" // /* MW 2 */ + 11655 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11656 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11657 "01000001" // /* MW 7 */ + 11658 "11100010" // /* MW 6 */ + 11659 "00000000" // /* MW 5 */ + 11660 "00000010" // /* MW 4 */ + 11661 "01100000" // /* MW 3 */ + 11662 "10000100" // /* MW 2 */ + 11663 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11664 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11665 "01100001" // /* MW 7 */ + 11666 "11100000" // /* MW 6 */ + 11667 "00000001" // /* MW 5 */ + 11668 "00000010" // /* MW 4 */ + 11669 "01100000" // /* MW 3 */ + 11670 "10010100" // /* MW 2 */ + 11671 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11672 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11673 "01000001" // /* MW 7 */ + 11674 "11100010" // /* MW 6 */ + 11675 "00000000" // /* MW 5 */ + 11676 "00000010" // /* MW 4 */ + 11677 "01100000" // /* MW 3 */ + 11678 "10000100" // /* MW 2 */ + 11679 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11680 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11681 "01100001" // /* MW 7 */ + 11682 "11100000" // /* MW 6 */ + 11683 "00000001" // /* MW 5 */ + 11684 "00000010" // /* MW 4 */ + 11685 "01100000" // /* MW 3 */ + 11686 "10010100" // /* MW 2 */ + 11687 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11688 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11689 "01000001" // /* MW 7 */ + 11690 "11100010" // /* MW 6 */ + 11691 "00000000" // /* MW 5 */ + 11692 "00000010" // /* MW 4 */ + 11693 "01100000" // /* MW 3 */ + 11694 "10000100" // /* MW 2 */ + 11695 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11696 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11697 "01100001" // /* MW 7 */ + 11698 "11100000" // /* MW 6 */ + 11699 "00000001" // /* MW 5 */ + 11700 "00000010" // /* MW 4 */ + 11701 "01100000" // /* MW 3 */ + 11702 "10010100" // /* MW 2 */ + 11703 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11704 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11705 "00100011" // /* MW 3 */ + 11706 "00011100" // /* MW 2 */ + 11707 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 172 4 first +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11708 "01011100" // VST.CONV.bf16.fp32 cml1, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 11709 "00000000" // /* MW 5 */ + 11710 "01010000" // /* MW 4 */ + 11711 "01100000" // /* MW 3 */ + 11712 "10010100" // /* MW 2 */ + 11713 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11714 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11715 "00100011" // /* MW 3 */ + 11716 "00011100" // /* MW 2 */ + 11717 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 195 20 first +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11718 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11719 "10100011" // /* MW 3 */ + 11720 "00011100" // /* MW 2 */ + 11721 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.delay_slot + 11722 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11723 "00100011" // /* MW 3 */ + 11724 "00011100" // /* MW 2 */ + 11725 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 195 20 first +.delay_slot + 11726 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11727 "10100011" // /* MW 3 */ + 11728 "00011100" // /* MW 2 */ + 11729 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_end0 + 11731 "00000000" // /* MW 1 */ +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.function superkernel_mul1d _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.src_ref 7 "superkernels.cpp" 369 first +.src_ref 7 "superkernels.cpp" 374 6 +.function_start + 11744 "01000100" // MOVXM p4, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11745 "10000000" // /* MW 5 */ + 11746 "11001000" // /* MW 4 */ + 11747 "11001000" // /* MW 3 */ + 11748 "00000111" // /* MW 2 */ + 11749 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 374 6 first + 11750 "11010100" // LDA r16, [p4]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11751 "11000001" // /* MW 5 */ + 11752 "10110101" // /* MW 4 */ + 11753 "11011000" // /* MW 3 */ + 11754 "11000010" // /* MW 2 */ + 11755 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 369 + 11756 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11757 "00000001" // /* MW 5 */ + 11758 "00000000" // /* MW 4 */ + 11759 "00000000" // /* MW 3 */ + 11760 "00001000" // /* MW 2 */ + 11761 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 371 22 first +.src_ref 1 "io_buffer_main.h" 218 49 + 11762 "00111010" // ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11763 "01111001" // /* MW 9 */ + 11764 "01100000" // /* MW 8 */ + 11765 "11001010" // /* MW 7 */ + 11766 "10000001" // /* MW 6 */ + 11767 "00010100" // /* MW 5 */ + 11768 "00100011" // /* MW 4 */ + 11769 "10110000" // /* MW 3 */ + 11770 "00111010" // /* MW 2 */ + 11771 "11111111" // /* MW 1 */ + 11772 "00000010" // ST p0, [sp, #-20]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11773 "01110000" // /* MW 7 */ + 11774 "11010000" // /* MW 6 */ + 11775 "00001011" // /* MW 5 */ + 11776 "00000000" // /* MW 4 */ + 11777 "10110000" // /* MW 3 */ + 11778 "10000011" // /* MW 2 */ + 11779 "11111101" // /* MW 1 */ + 11780 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11781 "00010101" // /* MW 3 */ + 11782 "11111100" // /* MW 2 */ + 11783 "00001111" // /* MW 1 */ + 11784 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11785 "00111101" // /* MW 3 */ + 11786 "11110000" // /* MW 2 */ + 11787 "00001111" // /* MW 1 */ + 11788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11789 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 374 6 first +.src_ref 7 "superkernels.cpp" 374 16 first + 11790 "10000100" // JNZ r16, #11936 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11936 delay_slots=5 */ + 11791 "00000001" // /* MW 5 */ + 11792 "01000000" // /* MW 4 */ + 11793 "01010000" // /* MW 3 */ + 11794 "00010111" // /* MW 2 */ + 11795 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 371 30 first +.delay_slot + 11796 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11797 "11111011" // /* MW 3 */ + 11798 "01100011" // /* MW 2 */ + 11799 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 371 11 +.delay_slot + 11800 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11801 "10100000" // /* MW 5 */ + 11802 "11001000" // /* MW 4 */ + 11803 "11000100" // /* MW 3 */ + 11804 "00000111" // /* MW 2 */ + 11805 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 371 11 +.delay_slot + 11806 "00000010" // ST r17, [p2]; MOV p2, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11807 "01110000" // /* MW 7 */ + 11808 "01100000" // /* MW 6 */ + 11809 "00110111" // /* MW 5 */ + 11810 "00000001" // /* MW 4 */ + 11811 "00110000" // /* MW 3 */ + 11812 "11000110" // /* MW 2 */ + 11813 "01000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 +.delay_slot + 11814 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11815 "11000000" // /* MW 3 */ + 11816 "11010110" // /* MW 2 */ + 11817 "00011011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 377 4 +.src_ref 7 "superkernels.cpp" 379 28 +.src_ref 7 "superkernels.cpp" 381 42 +.src_ref 7 "superkernels.cpp" 393 2 +.delay_slot + 11818 "00111010" // ST p2, [sp, #-12]; MOVXM p7, #509312 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11819 "00010001" // /* MW 9 */ + 11820 "11000000" // /* MW 8 */ + 11821 "10110010" // /* MW 7 */ + 11822 "11110011" // /* MW 6 */ + 11823 "00000001" // /* MW 5 */ + 11824 "00000000" // /* MW 4 */ + 11825 "10110000" // /* MW 3 */ + 11826 "10100011" // /* MW 2 */ + 11827 "11111110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 377 4 +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11828 "00111010" // MOVS p0, p7; MOVXM p2, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11829 "00010001" // /* MW 9 */ + 11830 "00110100" // /* MW 8 */ + 11831 "00110010" // /* MW 7 */ + 11832 "11110001" // /* MW 6 */ + 11833 "00000001" // /* MW 5 */ + 11834 "00000000" // /* MW 4 */ + 11835 "01100000" // /* MW 3 */ + 11836 "10010001" // /* MW 2 */ + 11837 "00010011" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11838 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11839 "00010000" // /* MW 9 */ + 11840 "00110010" // /* MW 8 */ + 11841 "00110010" // /* MW 7 */ + 11842 "11110001" // /* MW 6 */ + 11843 "00000001" // /* MW 5 */ + 11844 "00000000" // /* MW 4 */ + 11845 "11100000" // /* MW 3 */ + 11846 "11000000" // /* MW 2 */ + 11847 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11849 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 377 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 11850 "00000100" // JL #11296 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11296 delay_slots=5 */ + 11851 "00000001" // /* MW 5 */ + 11852 "00000000" // /* MW 4 */ + 11853 "00010000" // /* MW 3 */ + 11854 "00010110" // /* MW 2 */ + 11855 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11857 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11859 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11860 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11861 "00110001" // /* MW 3 */ + 11862 "00100000" // /* MW 2 */ + 11863 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 11864 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11865 "00000101" // /* MW 3 */ + 11866 "00100000" // /* MW 2 */ + 11867 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 11868 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11869 "00010001" // /* MW 3 */ + 11870 "00000110" // /* MW 2 */ + 11871 "00001010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 381 18 +.src_ref 7 "superkernels.cpp" 381 42 first +.return_address + 11872 "10111010" // LDA r16, [p7]; MOVXM p1, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11873 "00010000" // /* MW 9 */ + 11874 "00101000" // /* MW 8 */ + 11875 "10110010" // /* MW 7 */ + 11876 "11110000" // /* MW 6 */ + 11877 "00000001" // /* MW 5 */ + 11878 "00000000" // /* MW 4 */ + 11879 "11010000" // /* MW 3 */ + 11880 "11000010" // /* MW 2 */ + 11881 "11100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 381 16 +.src_ref 7 "superkernels.cpp" 381 18 +.src_ref 7 "superkernels.cpp" 390 48 + 11882 "10111010" // LDA r17, [p1]; MOVXM p3, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11883 "00010000" // /* MW 9 */ + 11884 "00101010" // /* MW 8 */ + 11885 "10110010" // /* MW 7 */ + 11886 "11110001" // /* MW 6 */ + 11887 "00000001" // /* MW 5 */ + 11888 "00000000" // /* MW 4 */ + 11889 "11010000" // /* MW 3 */ + 11890 "11000110" // /* MW 2 */ + 11891 "00100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 379 28 first +.src_ref 7 "superkernels.cpp" 382 16 +.src_ref 7 "superkernels.cpp" 391 48 + 11892 "10111010" // LDA.u16 r18, [p7, #10]; MOVXM p1, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11893 "00010000" // /* MW 9 */ + 11894 "00101110" // /* MW 8 */ + 11895 "10110010" // /* MW 7 */ + 11896 "11110000" // /* MW 6 */ + 11897 "00000001" // /* MW 5 */ + 11898 "00000000" // /* MW 4 */ + 11899 "01010000" // /* MW 3 */ + 11900 "11001011" // /* MW 2 */ + 11901 "11101010" // /* MW 1 */ + 11902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11903 "00000000" // /* MW 1 */ + 11904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11905 "00000000" // /* MW 1 */ + 11906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11907 "00000000" // /* MW 1 */ + 11908 "10000100" // J #11952 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11952 delay_slots=5 */ + 11909 "00000000" // /* MW 5 */ + 11910 "00000000" // /* MW 4 */ + 11911 "01011000" // /* MW 3 */ + 11912 "00010111" // /* MW 2 */ + 11913 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 379 13 +.delay_slot + 11914 "01000100" // MOVXM p2, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11915 "11000000" // /* MW 5 */ + 11916 "11001000" // /* MW 4 */ + 11917 "11000100" // /* MW 3 */ + 11918 "00000111" // /* MW 2 */ + 11919 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 381 27 first +.delay_slot + 11920 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11921 "00001111" // /* MW 3 */ + 11922 "01100001" // /* MW 2 */ + 11923 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 379 13 first +.delay_slot + 11924 "10011000" // ST r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11925 "01010001" // /* MW 3 */ + 11926 "00000110" // /* MW 2 */ + 11927 "00001010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 381 16 first +.delay_slot + 11928 "10011000" // ST r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11929 "00010001" // /* MW 3 */ + 11930 "00000110" // /* MW 2 */ + 11931 "00001011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 382 16 first +.delay_slot + 11932 "10011000" // ST r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11933 "00010001" // /* MW 3 */ + 11934 "00000110" // /* MW 2 */ + 11935 "00001001" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 +.src_ref 7 "superkernels.cpp" 390 48 + 11936 "01000100" // MOVXM p3, #509012 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11937 "10101000" // /* MW 5 */ + 11938 "11001000" // /* MW 4 */ + 11939 "11000110" // /* MW 3 */ + 11940 "00000111" // /* MW 2 */ + 11941 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 391 48 + 11942 "10111010" // NOPA; MOVXM p1, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11943 "00010000" // /* MW 9 */ + 11944 "00101110" // /* MW 8 */ + 11945 "10110010" // /* MW 7 */ + 11946 "11110000" // /* MW 6 */ + 11947 "00000001" // /* MW 5 */ + 11948 "00000000" // /* MW 4 */ + 11949 "11110000" // /* MW 3 */ + 11950 "00101100" // /* MW 2 */ + 11951 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 +.src_ref 1 "io_buffer_main.h" 218 49 first + 11952 "00011000" // ADD.NC p0, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11953 "10000110" // /* MW 3 */ + 11954 "01100111" // /* MW 2 */ + 11955 "00011000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 385 2 +.src_ref 1 "io_buffer_main.h" 218 49 + 11956 "10111010" // LDA r27, [p0], #-4; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11957 "00010000" // /* MW 9 */ + 11958 "00100000" // /* MW 8 */ + 11959 "00110010" // /* MW 7 */ + 11960 "11110001" // /* MW 6 */ + 11961 "00000001" // /* MW 5 */ + 11962 "00000000" // /* MW 4 */ + 11963 "11010000" // /* MW 3 */ + 11964 "11101110" // /* MW 2 */ + 11965 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 11966 "10011000" // LDA r16, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11967 "00010110" // /* MW 3 */ + 11968 "11111110" // /* MW 2 */ + 11969 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 11970 "10011000" // LDA r17, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11971 "00110110" // /* MW 3 */ + 11972 "11111110" // /* MW 2 */ + 11973 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 385 2 first + 11974 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11975 "01010110" // /* MW 3 */ + 11976 "00000110" // /* MW 2 */ + 11977 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 11978 "10011000" // LDA r19, [p0, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11979 "01110110" // /* MW 3 */ + 11980 "01000110" // /* MW 2 */ + 11981 "00000000" // /* MW 1 */ + 11982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11983 "00000000" // /* MW 1 */ + 11984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11985 "00000000" // /* MW 1 */ + 11986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11987 "00000000" // /* MW 1 */ + 11988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11989 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 11990 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11991 "00000010" // /* MW 3 */ + 11992 "01100001" // /* MW 2 */ + 11993 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 385 2 first +.src_ref 1 "io_buffer_main.h" 218 20 + 11994 "01011100" // ST r16, [p0]; ADD r16, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11995 "00001110" // /* MW 5 */ + 11996 "01000000" // /* MW 4 */ + 11997 "00111001" // /* MW 3 */ + 11998 "11000010" // /* MW 2 */ + 11999 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 385 2 + 12000 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12001 "00010001" // /* MW 3 */ + 12002 "00000110" // /* MW 2 */ + 12003 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 +.src_ref 1 "io_buffer_main.h" 395 8 + 12004 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12005 "11111101" // /* MW 3 */ + 12006 "11100000" // /* MW 2 */ + 12007 "00010111" // /* MW 1 */ + 12008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12009 "00000000" // /* MW 1 */ + 12010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12011 "00000000" // /* MW 1 */ + 12012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12013 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 12014 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12015 "00001000" // /* MW 3 */ + 12016 "11010011" // /* MW 2 */ + 12017 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 first + 12018 "00011000" // ADD.NC p2, r14, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12019 "00000110" // /* MW 3 */ + 12020 "01100111" // /* MW 2 */ + 12021 "00011010" // /* MW 1 */ + 12022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12023 "00000000" // /* MW 1 */ + 12024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12025 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 12026 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12027 "01110110" // /* MW 3 */ + 12028 "11111111" // /* MW 2 */ + 12029 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 12030 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12031 "00110110" // /* MW 3 */ + 12032 "11111110" // /* MW 2 */ + 12033 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 12034 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12035 "01010110" // /* MW 3 */ + 12036 "11111110" // /* MW 2 */ + 12037 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 47 first + 12038 "10011000" // LDA r19, [p2, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12039 "01110110" // /* MW 3 */ + 12040 "01010110" // /* MW 2 */ + 12041 "00000010" // /* MW 1 */ + 12042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12043 "00000000" // /* MW 1 */ + 12044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12045 "00000000" // /* MW 1 */ + 12046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12047 "00000000" // /* MW 1 */ + 12048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12049 "00000000" // /* MW 1 */ + 12050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12051 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 12052 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12053 "00010010" // /* MW 3 */ + 12054 "10100011" // /* MW 2 */ + 12055 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 12056 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12057 "00110001" // /* MW 3 */ + 12058 "00000110" // /* MW 2 */ + 12059 "00001010" // /* MW 1 */ + 12060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12061 "00000000" // /* MW 1 */ + 12062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12063 "00000000" // /* MW 1 */ + 12064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12065 "00000000" // /* MW 1 */ + 12066 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12067 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 12068 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12069 "00001000" // /* MW 3 */ + 12070 "11010011" // /* MW 2 */ + 12071 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 390 46 +.src_ref 7 "superkernels.cpp" 391 46 +.src_ref 1 "io_buffer_main.h" 324 32 + 12072 "00111010" // MOVS p6, p2; MOVX r16, #1; MOV r14, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12073 "01111001" // /* MW 9 */ + 12074 "01100000" // /* MW 8 */ + 12075 "11001110" // /* MW 7 */ + 12076 "00101001" // /* MW 6 */ + 12077 "00000000" // /* MW 5 */ + 12078 "00000001" // /* MW 4 */ + 12079 "01100000" // /* MW 3 */ + 12080 "00010001" // /* MW 2 */ + 12081 "11010001" // /* MW 1 */ + 12082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12083 "00000000" // /* MW 1 */ + 12084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12085 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 + 12086 "00011000" // LDA p4, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12087 "00011001" // /* MW 3 */ + 12088 "11101110" // /* MW 2 */ + 12089 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 390 48 first + 12090 "00001100" // LDA r17, [p3]; ST p0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12091 "00111011" // /* MW 5 */ + 12092 "11011000" // /* MW 4 */ + 12093 "11011111" // /* MW 3 */ + 12094 "11000110" // /* MW 2 */ + 12095 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 391 48 first +.src_ref 7 "superkernels.cpp" 393 2 + 12096 "11010100" // LDA r20, [p1]; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12097 "10000001" // /* MW 5 */ + 12098 "11011101" // /* MW 4 */ + 12099 "11010110" // /* MW 3 */ + 12100 "11010010" // /* MW 2 */ + 12101 "00100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 12102 "10011000" // LDA r18, [p2], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12103 "01010110" // /* MW 3 */ + 12104 "01001110" // /* MW 2 */ + 12105 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 12106 "10011000" // LDA p2, [p0], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12107 "00011110" // /* MW 3 */ + 12108 "01011101" // /* MW 2 */ + 12109 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 +.src_ref 1 "io_buffer_main.h" 327 40 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12110 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12111 "11000000" // /* MW 3 */ + 12112 "01100000" // /* MW 2 */ + 12113 "00011111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12115 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12116 "10011000" // LDA r19, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12117 "01110110" // /* MW 3 */ + 12118 "00000110" // /* MW 2 */ + 12119 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12121 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 393 2 first +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 12122 "00000100" // JL #11440 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11440 delay_slots=5 */ + 12123 "00000001" // /* MW 5 */ + 12124 "00000000" // /* MW 4 */ + 12125 "01011000" // /* MW 3 */ + 12126 "00010110" // /* MW 2 */ + 12127 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 +.src_ref 1 "io_buffer_main.h" 327 40 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12128 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12129 "11000000" // /* MW 3 */ + 12130 "11010100" // /* MW 2 */ + 12131 "00011011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 390 46 first +.delay_slot + 12132 "10011000" // LSHL r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12133 "00001101" // /* MW 3 */ + 12134 "01100011" // /* MW 2 */ + 12135 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 391 46 first +.delay_slot + 12136 "10011000" // LSHL r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12137 "00001101" // /* MW 3 */ + 12138 "00100001" // /* MW 2 */ + 12139 "00010101" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 391 46 +.delay_slot + 12140 "01011000" // ADD.NC p1, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12141 "01000001" // /* MW 3 */ + 12142 "01101001" // /* MW 2 */ + 12143 "00011001" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 390 46 first +.delay_slot + 12144 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12145 "00000000" // /* MW 15 */ + 12146 "00000000" // /* MW 14 */ + 12147 "10101000" // /* MW 13 */ + 12148 "11100010" // /* MW 12 */ + 12149 "00110100" // /* MW 11 */ + 12150 "00000000" // /* MW 10 */ + 12151 "00000000" // /* MW 9 */ + 12152 "00000000" // /* MW 8 */ + 12153 "01011011" // /* MW 7 */ + 12154 "00000001" // /* MW 6 */ + 12155 "00100000" // /* MW 5 */ + 12156 "00000000" // /* MW 4 */ + 12157 "11110000" // /* MW 3 */ + 12158 "00101100" // /* MW 2 */ + 12159 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 32 first +.src_ref 1 "io_buffer_main.h" 327 28 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 40 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 +.return_address + 12160 "10111010" // LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12161 "01111000" // /* MW 9 */ + 12162 "11010000" // /* MW 8 */ + 12163 "10110011" // /* MW 7 */ + 12164 "00101000" // /* MW 6 */ + 12165 "00000000" // /* MW 5 */ + 12166 "00000001" // /* MW 4 */ + 12167 "11010000" // /* MW 3 */ + 12168 "11000110" // /* MW 2 */ + 12169 "11001000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 19 + 12170 "01000100" // MOVXM p6, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12171 "11000000" // /* MW 5 */ + 12172 "11001000" // /* MW 4 */ + 12173 "11001100" // /* MW 3 */ + 12174 "00000111" // /* MW 2 */ + 12175 "00000000" // /* MW 1 */ + 12176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12177 "00000000" // /* MW 1 */ + 12178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12179 "00000000" // /* MW 1 */ + 12180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12181 "00000000" // /* MW 1 */ + 12182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12183 "00000000" // /* MW 1 */ + 12184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12185 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 12186 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12187 "00001000" // /* MW 3 */ + 12188 "01010001" // /* MW 2 */ + 12189 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 40 first + 12190 "10011000" // LDA r17, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12191 "00110110" // /* MW 3 */ + 12192 "11110110" // /* MW 2 */ + 12193 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 + 12194 "00011000" // LDA p2, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12195 "00011001" // /* MW 3 */ + 12196 "11101101" // /* MW 2 */ + 12197 "00000111" // /* MW 1 */ + 12198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12199 "00000000" // /* MW 1 */ + 12200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12201 "00000000" // /* MW 1 */ + 12202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12203 "00000000" // /* MW 1 */ + 12204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12205 "00000000" // /* MW 1 */ + 12206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12207 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 + 12208 "10011000" // SUB r17, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12209 "00010001" // /* MW 3 */ + 12210 "00100011" // /* MW 2 */ + 12211 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 first +.src_ref 1 "io_buffer_main.h" 327 28 + 12212 "00001100" // LDA r17, [p2, #20]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12213 "01100011" // /* MW 5 */ + 12214 "11101100" // /* MW 4 */ + 12215 "11010011" // /* MW 3 */ + 12216 "11000110" // /* MW 2 */ + 12217 "01001010" // /* MW 1 */ + 12218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12219 "00000000" // /* MW 1 */ + 12220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12221 "00000000" // /* MW 1 */ + 12222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12223 "00000000" // /* MW 1 */ + 12224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12225 "00000000" // /* MW 1 */ + 12226 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12227 "00000000" // /* MW 1 */ + 12228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12229 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 12230 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12231 "00001000" // /* MW 3 */ + 12232 "01010001" // /* MW 2 */ + 12233 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 6 +.src_ref 7 "superkernels.cpp" 398 14 +.src_ref 1 "io_buffer_main.h" 327 40 first + 12234 "10111010" // LDA r19, [p7, #-8]; MOVXM p1, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12235 "00010000" // /* MW 9 */ + 12236 "00100000" // /* MW 8 */ + 12237 "10110010" // /* MW 7 */ + 12238 "11110000" // /* MW 6 */ + 12239 "00000001" // /* MW 5 */ + 12240 "00000000" // /* MW 4 */ + 12241 "11010000" // /* MW 3 */ + 12242 "11001110" // /* MW 2 */ + 12243 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 19 first + 12244 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12245 "01010110" // /* MW 3 */ + 12246 "00000110" // /* MW 2 */ + 12247 "00000110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 6 + 12248 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12249 "00110110" // /* MW 3 */ + 12250 "00000110" // /* MW 2 */ + 12251 "00000001" // /* MW 1 */ + 12252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12253 "00000000" // /* MW 1 */ + 12254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12255 "00000000" // /* MW 1 */ + 12256 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12257 "00000000" // /* MW 1 */ + 12258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12259 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 12260 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12261 "00110001" // /* MW 3 */ + 12262 "00100001" // /* MW 2 */ + 12263 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 12264 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12265 "00010001" // /* MW 3 */ + 12266 "11100110" // /* MW 2 */ + 12267 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 16 first + 12268 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12269 "00101000" // /* MW 3 */ + 12270 "01100001" // /* MW 2 */ + 12271 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 6 + 12272 "10000100" // JNZ r16, #12304 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12304 delay_slots=5 */ + 12273 "00000001" // /* MW 5 */ + 12274 "01000000" // /* MW 4 */ + 12275 "00001000" // /* MW 3 */ + 12276 "00011000" // /* MW 2 */ + 12277 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12279 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12281 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12283 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12285 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12287 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 398 14 + 12288 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12289 "00000001" // /* MW 3 */ + 12290 "00100000" // /* MW 2 */ + 12291 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 398 14 first + 12292 "00110110" // NOPA; NOPB; ST r16, [p1]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12293 "11000001" // /* MW 11 */ + 12294 "00001000" // /* MW 10 */ + 12295 "10000011" // /* MW 9 */ + 12296 "00000000" // /* MW 8 */ + 12297 "00000000" // /* MW 7 */ + 12298 "00000000" // /* MW 6 */ + 12299 "00100000" // /* MW 5 */ + 12300 "00000000" // /* MW 4 */ + 12301 "11110000" // /* MW 3 */ + 12302 "00101100" // /* MW 2 */ + 12303 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 +.src_ref 7 "superkernels.cpp" 400 + 12304 "00011000" // LDA lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12305 "00111001" // /* MW 3 */ + 12306 "11110000" // /* MW 2 */ + 12307 "00000111" // /* MW 1 */ + 12308 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12309 "11110001" // /* MW 3 */ + 12310 "11111101" // /* MW 2 */ + 12311 "00000111" // /* MW 1 */ + 12312 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12313 "10011001" // /* MW 3 */ + 12314 "11110111" // /* MW 2 */ + 12315 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 12316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12317 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.noswbrkpt + 12318 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12319 "11010001" // /* MW 3 */ + 12320 "11111001" // /* MW 2 */ + 12321 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 12322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12323 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 12324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12325 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 400 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 12326 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12327 "00000000" // /* MW 3 */ + 12328 "00101000" // /* MW 2 */ + 12329 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12330 "00011000" // MOVS p6, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12331 "00001011" // /* MW 3 */ + 12332 "10001110" // /* MW 2 */ + 12333 "00001110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 400 +.delay_slot + 12334 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12335 "00000001" // /* MW 5 */ + 12336 "00000000" // /* MW 4 */ + 12337 "00000000" // /* MW 3 */ + 12338 "11111000" // /* MW 2 */ + 12339 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12341 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12343 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 + 12345 "00000000" // /* MW 1 */ +.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh +.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_begin0 +.function setup_conv2d_dw_params_bf16 _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh +.src_ref 2 "conv2d_dw_bf16_params.h" 211 first +.src_ref 2 "conv2d_dw_bf16_params.h" 215 15 +.src_ref 2 "conv2d_dw_bf16_params.h" 215 17 first +.function_start + 12352 "10111010" // LDA el0, [p0], #4; MOVXM p1, #509888 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12353 "00010000" // /* MW 9 */ + 12354 "11100000" // /* MW 8 */ + 12355 "10110011" // /* MW 7 */ + 12356 "11110000" // /* MW 6 */ + 12357 "00000001" // /* MW 5 */ + 12358 "00000000" // /* MW 4 */ + 12359 "11010000" // /* MW 3 */ + 12360 "10000101" // /* MW 2 */ + 12361 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 17 first +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.src_ref 2 "conv2d_dw_bf16_params.h" 218 80 + 12362 "10111010" // LDA eh0, [p0], #4; MOVX r16, #2; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12363 "01011000" // /* MW 9 */ + 12364 "00000000" // /* MW 8 */ + 12365 "00001000" // /* MW 7 */ + 12366 "01001011" // /* MW 6 */ + 12367 "00000000" // /* MW 5 */ + 12368 "00000001" // /* MW 4 */ + 12369 "11010000" // /* MW 3 */ + 12370 "10000001" // /* MW 2 */ + 12371 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 211 + 12372 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12373 "00000001" // /* MW 5 */ + 12374 "00000000" // /* MW 4 */ + 12375 "00000000" // /* MW 3 */ + 12376 "00001000" // /* MW 2 */ + 12377 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 32 + 12378 "00111010" // ST p7, [sp, #-12]; MOVXM p7, #509888 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12379 "00010001" // /* MW 9 */ + 12380 "11100000" // /* MW 8 */ + 12381 "10110011" // /* MW 7 */ + 12382 "11110011" // /* MW 6 */ + 12383 "00000001" // /* MW 5 */ + 12384 "00000000" // /* MW 4 */ + 12385 "10110000" // /* MW 3 */ + 12386 "11110011" // /* MW 2 */ + 12387 "11111110" // /* MW 1 */ + 12388 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12389 "00111101" // /* MW 3 */ + 12390 "11111100" // /* MW 2 */ + 12391 "00001111" // /* MW 1 */ + 12392 "10011000" // ST r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12393 "11110101" // /* MW 3 */ + 12394 "11111001" // /* MW 2 */ + 12395 "00001111" // /* MW 1 */ + 12396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12397 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 15 + 12398 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12399 "00101001" // /* MW 3 */ + 12400 "00011100" // /* MW 2 */ + 12401 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 15 + 12402 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12403 "00001001" // /* MW 3 */ + 12404 "00011100" // /* MW 2 */ + 12405 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 17 + 12406 "10011000" // LDA el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12407 "00101110" // /* MW 3 */ + 12408 "00000100" // /* MW 2 */ + 12409 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 17 + 12410 "10011000" // LDA eh0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12411 "00001110" // /* MW 3 */ + 12412 "00010100" // /* MW 2 */ + 12413 "00000000" // /* MW 1 */ + 12414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12415 "00000000" // /* MW 1 */ + 12416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12417 "00000000" // /* MW 1 */ + 12418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12419 "00000000" // /* MW 1 */ + 12420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12421 "00000000" // /* MW 1 */ + 12422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12423 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 15 + 12424 "10011000" // ST el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12425 "00101001" // /* MW 3 */ + 12426 "00000100" // /* MW 2 */ + 12427 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 15 + 12428 "10011000" // ST eh0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12429 "00001001" // /* MW 3 */ + 12430 "00010100" // /* MW 2 */ + 12431 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 32 first + 12432 "10011000" // LDA.u8 r17, [p7], #5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12433 "00101010" // /* MW 3 */ + 12434 "01011110" // /* MW 2 */ + 12435 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 52 + 12436 "10011000" // LDA.u8 r18, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12437 "01001010" // /* MW 3 */ + 12438 "11101110" // /* MW 2 */ + 12439 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 80 + 12440 "10011000" // LDA.u8 r1, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12441 "00101010" // /* MW 3 */ + 12442 "11101100" // /* MW 2 */ + 12443 "00000111" // /* MW 1 */ + 12444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12445 "00000000" // /* MW 1 */ + 12446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12447 "00000000" // /* MW 1 */ + 12448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12449 "00000000" // /* MW 1 */ + 12450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12451 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.no_stack_arguments + 12452 "00000100" // JL #15664 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=15664 delay_slots=5 */ + 12453 "00000001" // /* MW 5 */ + 12454 "00000000" // /* MW 4 */ + 12455 "10011000" // /* MW 3 */ + 12456 "00011110" // /* MW 2 */ + 12457 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 38 +.delay_slot + 12458 "01011100" // ST r18, [sp, #-28]; SUB r15, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12459 "01000011" // /* MW 5 */ + 12460 "10111110" // /* MW 4 */ + 12461 "10111000" // /* MW 3 */ + 12462 "11001010" // /* MW 2 */ + 12463 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 80 +.delay_slot + 12464 "01011100" // ST r1, [sp, #-20]; NE r16, r1, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12465 "00010001" // /* MW 5 */ + 12466 "11000010" // /* MW 4 */ + 12467 "10110000" // /* MW 3 */ + 12468 "10000110" // /* MW 2 */ + 12469 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.delay_slot + 12470 "01011100" // ST r16, [sp, #-16]; LT r27, r15, r24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12471 "00010101" // /* MW 5 */ + 12472 "11101111" // /* MW 4 */ + 12473 "10110111" // /* MW 3 */ + 12474 "01000010" // /* MW 2 */ + 12475 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.delay_slot + 12476 "10011000" // SUB r17, r24, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12477 "11110001" // /* MW 3 */ + 12478 "00100010" // /* MW 2 */ + 12479 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.delay_slot + 12480 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r0, r15, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12481 "00000000" // /* MW 15 */ + 12482 "00000000" // /* MW 14 */ + 12483 "01111000" // /* MW 13 */ + 12484 "10100101" // /* MW 12 */ + 12485 "00000001" // /* MW 11 */ + 12486 "10010000" // /* MW 10 */ + 12487 "00001000" // /* MW 9 */ + 12488 "00011110" // /* MW 8 */ + 12489 "01011011" // /* MW 7 */ + 12490 "00000001" // /* MW 6 */ + 12491 "00100000" // /* MW 5 */ + 12492 "00000000" // /* MW 4 */ + 12493 "11110000" // /* MW 3 */ + 12494 "00101100" // /* MW 2 */ + 12495 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.return_address + 12496 "00101100" // LDA r20, [sp, #-20]; MOVX r16, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12497 "00000010" // /* MW 5 */ + 12498 "01000000" // /* MW 4 */ + 12499 "00100000" // /* MW 3 */ + 12500 "11010010" // /* MW 2 */ + 12501 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.src_ref 2 "conv2d_dw_bf16_params.h" 219 32 first + 12502 "00101100" // LDA.u8 r17, [p7], #3; SUB r18, r16, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12503 "01000011" // /* MW 5 */ + 12504 "01001000" // /* MW 4 */ + 12505 "01011000" // /* MW 3 */ + 12506 "11000101" // /* MW 2 */ + 12507 "11100111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 52 + 12508 "10011000" // LDA.u8 r19, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12509 "01101010" // /* MW 3 */ + 12510 "11101110" // /* MW 2 */ + 12511 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 + 12512 "00011000" // LDA r1, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12513 "00110001" // /* MW 3 */ + 12514 "11101100" // /* MW 2 */ + 12515 "00000111" // /* MW 1 */ + 12516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12517 "00000000" // /* MW 1 */ + 12518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12519 "00000000" // /* MW 1 */ + 12520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12521 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 first + 12522 "10011000" // XOR r20, r15, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12523 "01000110" // /* MW 3 */ + 12524 "11101001" // /* MW 2 */ + 12525 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 + 12526 "10011000" // LT r27, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12527 "00001010" // /* MW 3 */ + 12528 "00110111" // /* MW 2 */ + 12529 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 38 first + 12530 "01011100" // ST r19, [sp, #-24]; SUB r17, r17, r19 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12531 "01100011" // /* MW 5 */ + 12532 "11000110" // /* MW 4 */ + 12533 "10111000" // /* MW 3 */ + 12534 "01001110" // /* MW 2 */ + 12535 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.no_stack_arguments + 12536 "00111010" // ST r17, [sp, #-32]; JL #15664 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=15664 delay_slots=5 */ + 12537 "01000001" // /* MW 9 */ + 12538 "00000000" // /* MW 8 */ + 12539 "00000000" // /* MW 7 */ + 12540 "10100110" // /* MW 6 */ + 12541 "00000111" // /* MW 5 */ + 12542 "00000000" // /* MW 4 */ + 12543 "10110000" // /* MW 3 */ + 12544 "01000110" // /* MW 2 */ + 12545 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 first +.delay_slot + 12546 "00011000" // SEL.EQZ r20, r2, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12547 "00100010" // /* MW 3 */ + 12548 "10101001" // /* MW 2 */ + 12549 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first +.delay_slot + 12550 "10011000" // LT r27, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12551 "00001010" // /* MW 3 */ + 12552 "01110111" // /* MW 2 */ + 12553 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.delay_slot + 12554 "10011000" // SUB r18, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12555 "00010001" // /* MW 3 */ + 12556 "00100101" // /* MW 2 */ + 12557 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 first +.delay_slot + 12558 "00011000" // EXTEND.s16 r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12559 "01110000" // /* MW 3 */ + 12560 "00100110" // /* MW 2 */ + 12561 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 87 +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first +.delay_slot + 12562 "01111110" // NOPA; NOPB; NOPS; SEL.EQZ r0, r17, r18, r27; ADD.NC r15, r19, #1 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 12563 "01100000" // /* MW 13 */ + 12564 "00101011" // /* MW 12 */ + 12565 "00000000" // /* MW 11 */ + 12566 "00001001" // /* MW 10 */ + 12567 "10011000" // /* MW 9 */ + 12568 "00111101" // /* MW 8 */ + 12569 "00100010" // /* MW 7 */ + 12570 "01000001" // /* MW 6 */ + 12571 "00100100" // /* MW 5 */ + 12572 "00000000" // /* MW 4 */ + 12573 "11110000" // /* MW 3 */ + 12574 "00101100" // /* MW 2 */ + 12575 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.src_ref 2 "conv2d_dw_bf16_params.h" 226 50 +.src_ref 2 "conv2d_dw_bf16_params.h" 231 68 +.return_address + 12576 "10111010" // LDA r3, [sp, #-32]; MOVX r19, #-2; MOV m0, #66 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12577 "01011000" // /* MW 9 */ + 12578 "01000010" // /* MW 8 */ + 12579 "00000000" // /* MW 7 */ + 12580 "11001000" // /* MW 6 */ + 12581 "00110111" // /* MW 5 */ + 12582 "00111111" // /* MW 4 */ + 12583 "00100000" // /* MW 3 */ + 12584 "00001110" // /* MW 2 */ + 12585 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.src_ref 2 "conv2d_dw_bf16_params.h" 220 23 +.src_ref 2 "conv2d_dw_bf16_params.h" 220 84 +.src_ref 2 "conv2d_dw_bf16_params.h" 232 45 +.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 +.src_ref 2 "conv2d_dw_bf16_params.h" 234 64 +.src_ref 2 "conv2d_dw_bf16_params.h" 243 100 +.src_ref 2 "conv2d_dw_bf16_params.h" 250 53 +.src_ref 2 "conv2d_dw_bf16_params.h" 253 63 +.src_ref 2 "conv2d_dw_bf16_params.h" 260 49 +.src_ref 2 "conv2d_dw_bf16_params.h" 264 47 + 12586 "10111010" // LDA r16, [sp, #-20]; MOVX r24, #0; MOV r1, #508 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12587 "01011000" // /* MW 9 */ + 12588 "11111100" // /* MW 8 */ + 12589 "00101001" // /* MW 7 */ + 12590 "00001000" // /* MW 6 */ + 12591 "10000000" // /* MW 5 */ + 12592 "00000001" // /* MW 4 */ + 12593 "00100000" // /* MW 3 */ + 12594 "11000010" // /* MW 2 */ + 12595 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 220 74 +.src_ref 2 "conv2d_dw_bf16_params.h" 234 64 +.src_ref 2 "conv2d_dw_bf16_params.h" 246 52 +.src_ref 2 "conv2d_dw_bf16_params.h" 253 53 + 12596 "10111010" // LDA r22, [sp, #-28]; MOVX r6, #4; MOV r4, #2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12597 "01011000" // /* MW 9 */ + 12598 "00000010" // /* MW 8 */ + 12599 "10001000" // /* MW 7 */ + 12600 "10001000" // /* MW 6 */ + 12601 "01100000" // /* MW 5 */ + 12602 "00000000" // /* MW 4 */ + 12603 "00100000" // /* MW 3 */ + 12604 "11011010" // /* MW 2 */ + 12605 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 226 50 first +.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 +.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 + 12606 "10111010" // LDA.u8 r17, [p7], m0; MOVX r5, #8; MOV r28, #23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12607 "01011000" // /* MW 9 */ + 12608 "00010111" // /* MW 8 */ + 12609 "10001000" // /* MW 7 */ + 12610 "00001011" // /* MW 6 */ + 12611 "01010001" // /* MW 5 */ + 12612 "00000000" // /* MW 4 */ + 12613 "01010000" // /* MW 3 */ + 12614 "01000101" // /* MW 2 */ + 12615 "11100001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 231 78 +.src_ref 2 "conv2d_dw_bf16_params.h" 232 39 +.src_ref 2 "conv2d_dw_bf16_params.h" 232 76 + 12616 "10111010" // LDA r21, [sp, #-24]; MOVX r18, #-6; MOV m1, #32 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12617 "01011000" // /* MW 9 */ + 12618 "00100000" // /* MW 8 */ + 12619 "10000000" // /* MW 7 */ + 12620 "01001000" // /* MW 6 */ + 12621 "00100111" // /* MW 5 */ + 12622 "00111111" // /* MW 4 */ + 12623 "00100000" // /* MW 3 */ + 12624 "01010110" // /* MW 2 */ + 12625 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 235 50 +.src_ref 2 "conv2d_dw_bf16_params.h" 235 50 +.src_ref 2 "conv2d_dw_bf16_params.h" 242 16 +.src_ref 2 "conv2d_dw_bf16_params.h" 242 63 +.src_ref 2 "conv2d_dw_bf16_params.h" 243 93 +.src_ref 2 "conv2d_dw_bf16_params.h" 250 53 +.src_ref 2 "conv2d_dw_bf16_params.h" 255 73 +.src_ref 2 "conv2d_dw_bf16_params.h" 260 49 +.src_ref 2 "conv2d_dw_bf16_params.h" 264 47 + 12626 "10111010" // LDA r30, [sp, #-16]; MOVX r23, #6; MOV r26, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12627 "01011000" // /* MW 9 */ + 12628 "00000001" // /* MW 8 */ + 12629 "01001000" // /* MW 7 */ + 12630 "11001011" // /* MW 6 */ + 12631 "01110000" // /* MW 5 */ + 12632 "00000001" // /* MW 4 */ + 12633 "00100000" // /* MW 3 */ + 12634 "01111010" // /* MW 2 */ + 12635 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 +.src_ref 2 "conv2d_dw_bf16_params.h" 239 42 +.src_ref 2 "conv2d_dw_bf16_params.h" 242 71 +.src_ref 2 "conv2d_dw_bf16_params.h" 248 72 +.src_ref 2 "conv2d_dw_bf16_params.h" 253 63 +.src_ref 2 "conv2d_dw_bf16_params.h" 264 41 + 12636 "10111010" // MOVA m0, #-178; MOVX r29, #128; MOV r31, #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12637 "01011000" // /* MW 9 */ + 12638 "11000000" // /* MW 8 */ + 12639 "11101111" // /* MW 7 */ + 12640 "00001011" // /* MW 6 */ + 12641 "11010000" // /* MW 5 */ + 12642 "00000101" // /* MW 4 */ + 12643 "10000000" // /* MW 3 */ + 12644 "11000000" // /* MW 2 */ + 12645 "11101001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first + 12646 "10011000" // SUB r20, r24, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12647 "00100001" // /* MW 3 */ + 12648 "00101000" // /* MW 2 */ + 12649 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 + 12650 "10011000" // XOR r3, r3, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12651 "00000110" // /* MW 3 */ + 12652 "11000111" // /* MW 2 */ + 12653 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.src_ref 2 "conv2d_dw_bf16_params.h" 220 74 + 12654 "00100100" // LT r27, r3, r24; ADD.NC r0, r22, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12655 "00000010" // /* MW 5 */ + 12656 "00110110" // /* MW 4 */ + 12657 "01010000" // /* MW 3 */ + 12658 "11110001" // /* MW 2 */ + 12659 "00011110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.src_ref 2 "conv2d_dw_bf16_params.h" 243 42 +.src_ref 2 "conv2d_dw_bf16_params.h" 247 69 + 12660 "01100100" // SEL.EQZ r20, r2, r20, r27; MOV r22, #-3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12661 "11110101" // /* MW 5 */ + 12662 "00111111" // /* MW 4 */ + 12663 "01001011" // /* MW 3 */ + 12664 "00101000" // /* MW 2 */ + 12665 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 220 23 first +.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 + 12666 "01100100" // MUL r3, r15, r16; MOV r2, #7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12667 "00011101" // /* MW 5 */ + 12668 "00100000" // /* MW 4 */ + 12669 "11110001" // /* MW 3 */ + 12670 "11100001" // /* MW 2 */ + 12671 "01111000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first + 12672 "00011000" // EXTEND.s16 r20, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12673 "01110000" // /* MW 3 */ + 12674 "00101000" // /* MW 2 */ + 12675 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 220 84 first +.src_ref 2 "conv2d_dw_bf16_params.h" 231 68 + 12676 "00100100" // AND r0, r1, r0; ADD.NC r1, r0, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12677 "00000001" // /* MW 5 */ + 12678 "10100000" // /* MW 4 */ + 12679 "10010000" // /* MW 3 */ + 12680 "00000000" // /* MW 2 */ + 12681 "00001000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 88 first +.src_ref 2 "conv2d_dw_bf16_params.h" 231 68 first + 12682 "00100100" // LSHL r19, r1, r19; ADD.NC r27, r20, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12683 "00000001" // /* MW 5 */ + 12684 "10110100" // /* MW 4 */ + 12685 "10111101" // /* MW 3 */ + 12686 "11100111" // /* MW 2 */ + 12687 "00001100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 220 44 first +.src_ref 2 "conv2d_dw_bf16_params.h" 253 53 first + 12688 "10100100" // LSHL r20, r15, r6; ADD.NC r1, r3, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12689 "00000010" // /* MW 5 */ + 12690 "10100011" // /* MW 4 */ + 12691 "10110000" // /* MW 3 */ + 12692 "00001101" // /* MW 2 */ + 12693 "01111101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 240 70 +.src_ref 2 "conv2d_dw_bf16_params.h" 246 52 first + 12694 "00100100" // LSHL r7, r1, r6; ADD.NC r0, r21, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12695 "11111111" // /* MW 5 */ + 12696 "00110101" // /* MW 4 */ + 12697 "10110000" // /* MW 3 */ + 12698 "11001101" // /* MW 2 */ + 12699 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 232 45 first + 12700 "10011000" // MUL r6, r27, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12701 "00001111" // /* MW 3 */ + 12702 "11001101" // /* MW 2 */ + 12703 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 226 22 first + 12704 "10011000" // MUL r15, r15, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12705 "00011111" // /* MW 3 */ + 12706 "11011111" // /* MW 2 */ + 12707 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 231 78 first +.src_ref 2 "conv2d_dw_bf16_params.h" 238 79 + 12708 "00100100" // MUL r21, r19, r21; ADD.NC r19, r19, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12709 "11111111" // /* MW 5 */ + 12710 "10110011" // /* MW 4 */ + 12711 "11111001" // /* MW 3 */ + 12712 "01101011" // /* MW 2 */ + 12713 "10011101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 234 64 first + 12714 "10011000" // EQ r27, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12715 "00000111" // /* MW 3 */ + 12716 "00110111" // /* MW 2 */ + 12717 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 231 39 first +.src_ref 2 "conv2d_dw_bf16_params.h" 232 55 first + 12718 "01011100" // ST r21, [p7], #-4; MUL r4, r15, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12719 "11011111" // /* MW 5 */ + 12720 "10010000" // /* MW 4 */ + 12721 "00110111" // /* MW 3 */ + 12722 "11010110" // /* MW 2 */ + 12723 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 first + 12724 "00011000" // SEL.EQZ r28, r28, r5, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12725 "01010010" // /* MW 3 */ + 12726 "00111000" // /* MW 2 */ + 12727 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 232 76 first + 12728 "10011000" // LSHL r18, r4, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12729 "00101101" // /* MW 3 */ + 12730 "00100101" // /* MW 2 */ + 12731 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 227 22 first +.src_ref 2 "conv2d_dw_bf16_params.h" 232 39 + 12732 "01011100" // ST r18, [p7], m1; MUL r18, r17, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12733 "00111111" // /* MW 5 */ + 12734 "11001000" // /* MW 4 */ + 12735 "00111000" // /* MW 3 */ + 12736 "01001010" // /* MW 2 */ + 12737 "11100101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 234 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 235 50 first + 12738 "01011100" // ST r28, [p7], #-16; LSHL r28, r30, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12739 "11111011" // /* MW 5 */ + 12740 "01110010" // /* MW 4 */ + 12741 "00111111" // /* MW 3 */ + 12742 "11110010" // /* MW 2 */ + 12743 "11111001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 235 47 +.src_ref 2 "conv2d_dw_bf16_params.h" 243 53 first + 12744 "01011100" // ST r28, [p7], #24; MUL r28, r18, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12745 "00011111" // /* MW 5 */ + 12746 "01110000" // /* MW 4 */ + 12747 "00111001" // /* MW 3 */ + 12748 "11110010" // /* MW 2 */ + 12749 "11101101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 238 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 242 63 first + 12750 "01011100" // ST r19, [p7], #4; LSHL r19, r19, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12751 "11111011" // /* MW 5 */ + 12752 "11001110" // /* MW 4 */ + 12753 "00111001" // /* MW 3 */ + 12754 "11001110" // /* MW 2 */ + 12755 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 242 71 +.src_ref 2 "conv2d_dw_bf16_params.h" 243 93 first + 12756 "10100100" // LSHL r28, r28, r26; ADD.NC r19, r19, r29 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12757 "11101010" // /* MW 5 */ + 12758 "10110011" // /* MW 4 */ + 12759 "10111001" // /* MW 3 */ + 12760 "00110101" // /* MW 2 */ + 12761 "11100111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 239 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 242 16 first + 12762 "01011100" // ST r31, [p7], #4; LSHL r30, r18, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12763 "01011011" // /* MW 5 */ + 12764 "01111011" // /* MW 4 */ + 12765 "00111001" // /* MW 3 */ + 12766 "11111110" // /* MW 2 */ + 12767 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 243 100 first +.src_ref 2 "conv2d_dw_bf16_params.h" 250 53 first + 12768 "10100100" // MUL r16, r18, r16; ADD.NC r18, r19, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12769 "11100010" // /* MW 5 */ + 12770 "00110011" // /* MW 4 */ + 12771 "11111001" // /* MW 3 */ + 12772 "00100001" // /* MW 2 */ + 12773 "10010100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 first +.src_ref 2 "conv2d_dw_bf16_params.h" 240 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 253 63 first + 12774 "01011100" // ST r0, [p7], #4; SEL.EQZ r28, r31, r24, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12775 "00000100" // /* MW 5 */ + 12776 "11110011" // /* MW 4 */ + 12777 "00111111" // /* MW 3 */ + 12778 "10000010" // /* MW 2 */ + 12779 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 247 69 first + 12780 "10011000" // LSHL r31, r3, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12781 "01101101" // /* MW 3 */ + 12782 "11111111" // /* MW 2 */ + 12783 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 242 23 first +.src_ref 2 "conv2d_dw_bf16_params.h" 247 73 + 12784 "00100100" // SUB r1, r30, r19; ADD.NC r19, r31, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12785 "11111111" // /* MW 5 */ + 12786 "10111111" // /* MW 4 */ + 12787 "00111001" // /* MW 3 */ + 12788 "01100110" // /* MW 2 */ + 12789 "11110000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 241 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 243 42 first + 12790 "01011100" // ST r1, [p7], #4; LSHL r17, r17, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12791 "11011011" // /* MW 5 */ + 12792 "11000110" // /* MW 4 */ + 12793 "00111000" // /* MW 3 */ + 12794 "10000110" // /* MW 2 */ + 12795 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 243 100 +.src_ref 2 "conv2d_dw_bf16_params.h" 245 77 first + 12796 "00100100" // SUB r22, r24, r18; ADD.NC r18, r17, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12797 "11111111" // /* MW 5 */ + 12798 "00110001" // /* MW 4 */ + 12799 "00111001" // /* MW 3 */ + 12800 "10100100" // /* MW 2 */ + 12801 "11000101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 243 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 248 72 first + 12802 "01011100" // ST r22, [p7], #4; SUB r22, r7, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12803 "11000011" // /* MW 5 */ + 12804 "11011011" // /* MW 4 */ + 12805 "00110011" // /* MW 3 */ + 12806 "11011010" // /* MW 2 */ + 12807 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 245 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 250 53 first + 12808 "01011100" // ST r18, [p7], #4; LSHL r16, r16, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12809 "01011011" // /* MW 5 */ + 12810 "01000011" // /* MW 4 */ + 12811 "00111000" // /* MW 3 */ + 12812 "11001010" // /* MW 2 */ + 12813 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 246 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 + 12814 "01011100" // ST r7, [p7], #4; LSHL r31, r19, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12815 "01011011" // /* MW 5 */ + 12816 "11111100" // /* MW 4 */ + 12817 "00111001" // /* MW 3 */ + 12818 "10011110" // /* MW 2 */ + 12819 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 247 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 248 72 first + 12820 "01011100" // ST r19, [p7], #4; ADD r22, r29, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12821 "11000001" // /* MW 5 */ + 12822 "11011010" // /* MW 4 */ + 12823 "00111110" // /* MW 3 */ + 12824 "11001110" // /* MW 2 */ + 12825 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 first +.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 first + 12826 "10100100" // ADD r16, r7, r16; ADD.NC r29, r31, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12827 "11110010" // /* MW 5 */ + 12828 "10111111" // /* MW 4 */ + 12829 "00011110" // /* MW 3 */ + 12830 "00100000" // /* MW 2 */ + 12831 "00111100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 248 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 + 12832 "01011100" // ST r22, [p7], #4; SUB r16, r16, r29 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12833 "10100011" // /* MW 5 */ + 12834 "01000011" // /* MW 4 */ + 12835 "00111000" // /* MW 3 */ + 12836 "11011010" // /* MW 2 */ + 12837 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 249 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 255 73 first +.src_ref 2 "conv2d_dw_bf16_params.h" 258 116 +.src_ref 2 "conv2d_dw_bf16_params.h" 258 140 + 12838 "00111010" // ST r16, [p7], #4; LSHL r22, r15, r26; MOV r16, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12839 "01011001" // /* MW 9 */ + 12840 "11111111" // /* MW 8 */ + 12841 "00001111" // /* MW 7 */ + 12842 "01101110" // /* MW 6 */ + 12843 "01101101" // /* MW 5 */ + 12844 "00011111" // /* MW 4 */ + 12845 "00110000" // /* MW 3 */ + 12846 "11000010" // /* MW 2 */ + 12847 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 252 43 first +.src_ref 2 "conv2d_dw_bf16_params.h" 253 60 first + 12848 "01011100" // ST r18, [p7], #4; ADD r26, r28, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12849 "10000001" // /* MW 5 */ + 12850 "01101010" // /* MW 4 */ + 12851 "00111110" // /* MW 3 */ + 12852 "11001010" // /* MW 2 */ + 12853 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 253 43 +.src_ref 2 "conv2d_dw_bf16_params.h" 255 73 first + 12854 "01011100" // ST r26, [p7], #4; SUB r20, r20, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12855 "11000011" // /* MW 5 */ + 12856 "01010010" // /* MW 4 */ + 12857 "00111010" // /* MW 3 */ + 12858 "11101010" // /* MW 2 */ + 12859 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 254 43 first +.src_ref 2 "conv2d_dw_bf16_params.h" 255 73 +.src_ref 2 "conv2d_dw_bf16_params.h" 256 43 +.src_ref 2 "conv2d_dw_bf16_params.h" 258 116 first +.src_ref 2 "conv2d_dw_bf16_params.h" 258 140 first +.src_ref 2 "conv2d_dw_bf16_params.h" 259 43 +.src_ref 2 "conv2d_dw_bf16_params.h" 263 41 + 12860 "01110110" // MOVA r17, #64; ST r19, [p7], #4; MAC r16, r16, r21, r17; ADD.NC r19, r20, #64 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12861 "00001000" // /* MW 11 */ + 12862 "00010000" // /* MW 10 */ + 12863 "01101101" // /* MW 9 */ + 12864 "10110010" // /* MW 8 */ + 12865 "00001000" // /* MW 7 */ + 12866 "10101011" // /* MW 6 */ + 12867 "01110001" // /* MW 5 */ + 12868 "00011110" // /* MW 4 */ + 12869 "00000111" // /* MW 3 */ + 12870 "00010001" // /* MW 2 */ + 12871 "00001000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 255 43 first + 12872 "10011000" // ST r19, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12873 "01110001" // /* MW 3 */ + 12874 "00011110" // /* MW 2 */ + 12875 "00001111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 256 43 first +.src_ref 2 "conv2d_dw_bf16_params.h" 260 49 first + 12876 "01011100" // ST r17, [p7], #4; LSHL r20, r16, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12877 "11111011" // /* MW 5 */ + 12878 "01010010" // /* MW 4 */ + 12879 "00111000" // /* MW 3 */ + 12880 "11000110" // /* MW 2 */ + 12881 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 258 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 260 49 + 12882 "01011100" // ST r16, [p7], #4; SUB r16, r24, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12883 "10000011" // /* MW 5 */ + 12884 "01000010" // /* MW 4 */ + 12885 "00111100" // /* MW 3 */ + 12886 "11000010" // /* MW 2 */ + 12887 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 259 43 first +.src_ref 2 "conv2d_dw_bf16_params.h" 264 47 first + 12888 "01011100" // ST r17, [p7], #4; LSHL r20, r18, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12889 "11111011" // /* MW 5 */ + 12890 "01010010" // /* MW 4 */ + 12891 "00111001" // /* MW 3 */ + 12892 "11000110" // /* MW 2 */ + 12893 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 260 43 first +.src_ref 2 "conv2d_dw_bf16_params.h" 264 47 + 12894 "01011100" // ST r16, [p7], #4; SUB r16, r24, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12895 "10000011" // /* MW 5 */ + 12896 "01000010" // /* MW 4 */ + 12897 "00111100" // /* MW 3 */ + 12898 "11000010" // /* MW 2 */ + 12899 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 262 40 first + 12900 "10011000" // ST r18, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12901 "01010001" // /* MW 3 */ + 12902 "00011110" // /* MW 2 */ + 12903 "00001111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 263 41 first + 12904 "10011000" // ST r17, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12905 "00110001" // /* MW 3 */ + 12906 "00011110" // /* MW 2 */ + 12907 "00001111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 264 41 first + 12908 "10011000" // ST r16, [p7], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12909 "00010001" // /* MW 3 */ + 12910 "00001010" // /* MW 2 */ + 12911 "00001111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 54 first + 12912 "10011000" // LDA.u8 r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12913 "00001010" // /* MW 3 */ + 12914 "00000110" // /* MW 2 */ + 12915 "00000111" // /* MW 1 */ + 12916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12917 "00000000" // /* MW 1 */ + 12918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12919 "00000000" // /* MW 1 */ + 12920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12921 "00000000" // /* MW 1 */ + 12922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12923 "00000000" // /* MW 1 */ + 12924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12925 "00000000" // /* MW 1 */ + 12926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12927 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.src_ref 2 "conv2d_dw_bf16_params.h" 266 58 + 12928 "10000100" // JZ r16, #12960 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12960 delay_slots=5 */ + 12929 "00000001" // /* MW 5 */ + 12930 "00000000" // /* MW 4 */ + 12931 "01010000" // /* MW 3 */ + 12932 "00011001" // /* MW 2 */ + 12933 "10000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.delay_slot + 12934 "11111000" // MOV vaddSign0, crMCDEn /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12935 "01100000" // /* MW 3 */ + 12936 "00111011" // /* MW 2 */ + 12937 "00011001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.delay_slot + 12938 "01000100" // MOVXM r19, #-8454144 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12939 "00000000" // /* MW 5 */ + 12940 "10100000" // /* MW 4 */ + 12941 "00001001" // /* MW 3 */ + 12942 "01111111" // /* MW 2 */ + 12943 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12945 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12947 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12949 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 + 12950 "01111010" // NOPA; NOPS; MOVX r19, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12951 "00000001" // /* MW 9 */ + 12952 "00100110" // /* MW 8 */ + 12953 "00000000" // /* MW 7 */ + 12954 "00000000" // /* MW 6 */ + 12955 "01011011" // /* MW 5 */ + 12956 "00000001" // /* MW 4 */ + 12957 "11110000" // /* MW 3 */ + 12958 "00101100" // /* MW 2 */ + 12959 "00000000" // /* MW 1 */ +.label TGT_F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_608 +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.src_ref 2 "conv2d_dw_bf16_params.h" 267 + 12960 "10111010" // LDA lr, [sp, #-4]; MOVXM p0, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12961 "00010000" // /* MW 9 */ + 12962 "00110100" // /* MW 8 */ + 12963 "00110010" // /* MW 7 */ + 12964 "11110000" // /* MW 6 */ + 12965 "00000001" // /* MW 5 */ + 12966 "00000000" // /* MW 4 */ + 12967 "00100000" // /* MW 3 */ + 12968 "10000111" // /* MW 2 */ + 12969 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 + 12970 "11010100" // LDA.s8 r16, [p0]; VINSERT.32 x0, x0, #0, r19 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12971 "11100010" // /* MW 5 */ + 12972 "00000100" // /* MW 4 */ + 12973 "01010000" // /* MW 3 */ + 12974 "11000000" // /* MW 2 */ + 12975 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 39 + 12976 "01010100" // LDA p0, [sp, #-12]; MOV dj0, #186 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12977 "11101001" // /* MW 5 */ + 12978 "00000010" // /* MW 4 */ + 12979 "00100001" // /* MW 3 */ + 12980 "10000011" // /* MW 2 */ + 12981 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 + 12982 "11010100" // LDA r15, [sp, #-8]; VMOV bmll0, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12983 "00100101" // /* MW 5 */ + 12984 "00000001" // /* MW 4 */ + 12985 "00100000" // /* MW 3 */ + 12986 "00111110" // /* MW 2 */ + 12987 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 267 first + 12988 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12989 "00000001" // /* MW 5 */ + 12990 "00000000" // /* MW 4 */ + 12991 "00000000" // /* MW 3 */ + 12992 "11111000" // /* MW 2 */ + 12993 "11111111" // /* MW 1 */ + 12994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12995 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 12996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12997 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 39 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 12998 "00011000" // ST.s16 r16, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12999 "00010111" // /* MW 3 */ + 13000 "00000010" // /* MW 2 */ + 13001 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.src_ref 2 "conv2d_dw_bf16_params.h" 267 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13002 "11100100" // RET lr; MOV crRnd, r16 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 13003 "01000001" // /* MW 5 */ + 13004 "01110000" // /* MW 4 */ + 13005 "00001111" // /* MW 3 */ + 13006 "00000000" // /* MW 2 */ + 13007 "00000101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13008 "00011000" // VCONV.bf16.fp32 wl0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13009 "00010110" // /* MW 3 */ + 13010 "01000000" // /* MW 2 */ + 13011 "00001000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13012 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13013 "11000000" // /* MW 3 */ + 13014 "01100000" // /* MW 2 */ + 13015 "00011111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13016 "10111000" // VEXTRACT.16 r16, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13017 "00000001" // /* MW 3 */ + 13018 "00000001" // /* MW 2 */ + 13019 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13021 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh__end +.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_end0 + 13023 "00000000" // /* MW 1 */ +.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_begin0 +.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params +.function conv2d_dw<(unsigned char)'\x01', bfloat16, bfloat16, bfloat16, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::async, adf::addressing::linear, adf::margin<0U> > > _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params +.src_ref 1 "io_buffer_main.h" 125 12 +.src_ref 2 "conv2d_dw_bf16.h" 199 first +.function_start + 13024 "11111000" // MOV r17, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13025 "11000000" // /* MW 3 */ + 13026 "01010110" // /* MW 2 */ + 13027 "00011100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 2 "conv2d_dw_bf16.h" 204 82 + 13028 "01010100" // LDA p1, [p1]; MOV m7, #106 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13029 "10101001" // /* MW 5 */ + 13030 "00000001" // /* MW 4 */ + 13031 "11011110" // /* MW 3 */ + 13032 "10010011" // /* MW 2 */ + 13033 "00100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 12 +.src_ref 1 "io_buffer_main.h" 125 25 + 13034 "00010100" // LDA p0, [p0]; ADD.NC p3, r17, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13035 "00000010" // /* MW 5 */ + 13036 "11010001" // /* MW 4 */ + 13037 "11010110" // /* MW 3 */ + 13038 "10000011" // /* MW 2 */ + 13039 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 204 82 first + 13040 "10011000" // LDA.u8 r4, [p3], m7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13041 "10001010" // /* MW 3 */ + 13042 "11101000" // /* MW 2 */ + 13043 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 221 4 first + 13044 "10011000" // LDA dj2, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13045 "01000110" // /* MW 3 */ + 13046 "11111101" // /* MW 2 */ + 13047 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 221 4 + 13048 "10011000" // LDA dn2, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13049 "00100110" // /* MW 3 */ + 13050 "00111101" // /* MW 2 */ + 13051 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 221 4 + 13052 "10011000" // LDA dj6, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13053 "01000110" // /* MW 3 */ + 13054 "11111111" // /* MW 2 */ + 13055 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 221 4 + 13056 "10011000" // LDA dn6, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13057 "00100110" // /* MW 3 */ + 13058 "00101111" // /* MW 2 */ + 13059 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 221 4 + 13060 "10011000" // LDA m2, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13061 "00000110" // /* MW 3 */ + 13062 "00101101" // /* MW 2 */ + 13063 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 222 4 first + 13064 "10011000" // LDA dj0, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13065 "01000110" // /* MW 3 */ + 13066 "11111100" // /* MW 2 */ + 13067 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 222 4 + 13068 "10011000" // LDA dn0, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13069 "00100110" // /* MW 3 */ + 13070 "00111100" // /* MW 2 */ + 13071 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 222 4 + 13072 "10011000" // LDA dj4, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13073 "01000110" // /* MW 3 */ + 13074 "11111110" // /* MW 2 */ + 13075 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 222 4 + 13076 "10011000" // LDA dn4, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13077 "00100110" // /* MW 3 */ + 13078 "00101110" // /* MW 2 */ + 13079 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 222 4 + 13080 "10011000" // LDA m0, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13081 "00000110" // /* MW 3 */ + 13082 "00101100" // /* MW 2 */ + 13083 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 223 4 first + 13084 "10011000" // LDA dj1, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13085 "11000110" // /* MW 3 */ + 13086 "11111100" // /* MW 2 */ + 13087 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 223 4 + 13088 "10011000" // LDA dn1, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13089 "10100110" // /* MW 3 */ + 13090 "00111100" // /* MW 2 */ + 13091 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 223 4 + 13092 "10011000" // LDA dj5, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13093 "11000110" // /* MW 3 */ + 13094 "11111110" // /* MW 2 */ + 13095 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 223 4 + 13096 "10011000" // LDA dn5, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13097 "10100110" // /* MW 3 */ + 13098 "00101110" // /* MW 2 */ + 13099 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 223 4 + 13100 "10011000" // LDA m1, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13101 "10000110" // /* MW 3 */ + 13102 "00101100" // /* MW 2 */ + 13103 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 224 4 first + 13104 "10011000" // LDA dj7, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13105 "11000110" // /* MW 3 */ + 13106 "11111111" // /* MW 2 */ + 13107 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 224 4 + 13108 "10011000" // LDA dn7, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13109 "10100110" // /* MW 3 */ + 13110 "00101111" // /* MW 2 */ + 13111 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 224 4 +.src_ref 2 "conv2d_dw_bf16.h" 244 56 + 13112 "10111010" // LDA m7, [p3], #8; MOVXM p4, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13113 "00010000" // /* MW 9 */ + 13114 "00110100" // /* MW 8 */ + 13115 "00110010" // /* MW 7 */ + 13116 "11110010" // /* MW 6 */ + 13117 "00000001" // /* MW 5 */ + 13118 "00000000" // /* MW 4 */ + 13119 "11010000" // /* MW 3 */ + 13120 "11110000" // /* MW 2 */ + 13121 "01100101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_dw_bf16.h" 244 56 first + 13122 "11010100" // LDA.s8 r6, [p4]; MOV p4, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13123 "10000001" // /* MW 5 */ + 13124 "11000101" // /* MW 4 */ + 13125 "01011000" // /* MW 3 */ + 13126 "10011000" // /* MW 2 */ + 13127 "10000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 225 4 + 13128 "10111000" // MOV m3, #-120 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13129 "00010000" // /* MW 3 */ + 13130 "00001111" // /* MW 2 */ + 13131 "00011011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 6 "aie_core.h" 81 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_dw_bf16.h" 204 43 + 13132 "10110110" // VLDA.CONV.fp32.bf16 cml0, [p4];VLDB x6, [p0], #64; MOVX r2, #3; MOV dc4, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 13133 "01011000" // /* MW 11 */ + 13134 "00000000" // /* MW 10 */ + 13135 "01100000" // /* MW 9 */ + 13136 "01101010" // /* MW 8 */ + 13137 "00100000" // /* MW 7 */ + 13138 "00000000" // /* MW 6 */ + 13139 "01101000" // /* MW 5 */ + 13140 "00111011" // /* MW 4 */ + 13141 "01110000" // /* MW 3 */ + 13142 "10000101" // /* MW 2 */ + 13143 "10000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 6 "aie_core.h" 81 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 2 "conv2d_dw_bf16.h" 204 43 first +.src_ref 2 "conv2d_dw_bf16.h" 225 4 first +.src_ref 2 "conv2d_dw_bf16.h" 244 56 + 13144 "01111110" // LDA dj3, [p3], #-4; VLDB x1, [p0], #64; MOVS dc3, dc4; LSHL r2, r4, r2; MOV m6, #128 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 13145 "01100000" // /* MW 13 */ + 13146 "00001001" // /* MW 12 */ + 13147 "01100010" // /* MW 11 */ + 13148 "00001011" // /* MW 10 */ + 13149 "00010000" // /* MW 9 */ + 13150 "11100000" // /* MW 8 */ + 13151 "00101101" // /* MW 7 */ + 13152 "00000100" // /* MW 6 */ + 13153 "11101001" // /* MW 5 */ + 13154 "00111000" // /* MW 4 */ + 13155 "11010000" // /* MW 3 */ + 13156 "10111000" // /* MW 2 */ + 13157 "01111111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 2 "conv2d_dw_bf16.h" 204 43 +.src_ref 2 "conv2d_dw_bf16.h" 225 4 + 13158 "10111010" // LDA dn3, [p3], #8; MOVS dc1, dc3; MOV m5, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13159 "01110010" // /* MW 9 */ + 13160 "10010000" // /* MW 8 */ + 13161 "10000000" // /* MW 7 */ + 13162 "00000010" // /* MW 6 */ + 13163 "01001011" // /* MW 5 */ + 13164 "00001100" // /* MW 4 */ + 13165 "11010001" // /* MW 3 */ + 13166 "10110100" // /* MW 2 */ + 13167 "01100101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "conv2d_dw_bf16.h" 204 43 +.src_ref 2 "conv2d_dw_bf16.h" 225 4 + 13168 "10111010" // LDA m3, [p3], m3; PADDB [p1], m5; MOV dc7, dc1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13169 "01111110" // /* MW 9 */ + 13170 "11000000" // /* MW 8 */ + 13171 "11100001" // /* MW 7 */ + 13172 "00000011" // /* MW 6 */ + 13173 "10010000" // /* MW 5 */ + 13174 "10101011" // /* MW 4 */ + 13175 "11010001" // /* MW 3 */ + 13176 "00110000" // /* MW 2 */ + 13177 "01101101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "conv2d_dw_bf16.h" 244 56 +.src_ref 2 "conv2d_dw_bf16.h" 244 56 first + 13178 "10111010" // LDA r2, [p3], m6; VLDB.2D x3, [p1], d7; MOV m4, #-112 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13179 "01011110" // /* MW 9 */ + 13180 "10010000" // /* MW 8 */ + 13181 "00000111" // /* MW 7 */ + 13182 "00000010" // /* MW 6 */ + 13183 "11110100" // /* MW 5 */ + 13184 "11110000" // /* MW 4 */ + 13185 "11010001" // /* MW 3 */ + 13186 "00001010" // /* MW 2 */ + 13187 "01111001" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 2 "conv2d_dw_bf16.h" 244 56 + 13188 "00101100" // LDA.s16 r7, [p3], m4; MOVX r0, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13189 "10000010" // /* MW 5 */ + 13190 "00000000" // /* MW 4 */ + 13191 "01010000" // /* MW 3 */ + 13192 "00011110" // /* MW 2 */ + 13193 "01110001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "conv2d_dw_bf16.h" 250 8 first + 13194 "01110110" // LDA m4, [p3], #16; MOVS dc6, dc4; MOVXM ls, #13296 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 13195 "00010000" // /* MW 11 */ + 13196 "11111000" // /* MW 10 */ + 13197 "01111001" // /* MW 9 */ + 13198 "00001100" // /* MW 8 */ + 13199 "00000000" // /* MW 7 */ + 13200 "00000000" // /* MW 6 */ + 13201 "01001011" // /* MW 5 */ + 13202 "00010000" // /* MW 4 */ + 13203 "11010110" // /* MW 3 */ + 13204 "11000000" // /* MW 2 */ + 13205 "01101001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "conv2d_dw_bf16.h" 244 56 first +.src_ref 2 "conv2d_dw_bf16.h" 250 8 + 13206 "01110110" // LDA r4, [p3, #-28]; MOVS dc2, dc4; MOVXM le, #13392 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 13207 "00010000" // /* MW 11 */ + 13208 "00101000" // /* MW 10 */ + 13209 "10111010" // /* MW 9 */ + 13210 "00001101" // /* MW 8 */ + 13211 "00000000" // /* MW 7 */ + 13212 "00000000" // /* MW 6 */ + 13213 "01001011" // /* MW 5 */ + 13214 "00010000" // /* MW 4 */ + 13215 "11010010" // /* MW 3 */ + 13216 "10010010" // /* MW 2 */ + 13217 "01110010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 142 18 first + 13218 "10110100" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13219 "00000101" // /* MW 5 */ + 13220 "01100001" // /* MW 4 */ + 13221 "10000100" // /* MW 3 */ + 13222 "00010110" // /* MW 2 */ + 13223 "00001011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 270 12 + 13224 "11111000" // VMOV cml3, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13225 "10001010" // /* MW 3 */ + 13226 "00000000" // /* MW 2 */ + 13227 "00011011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 244 4 + 13228 "10111010" // LDA r5, [p3]; MOVXM p3, #13456 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13229 "00010000" // /* MW 9 */ + 13230 "01001000" // /* MW 8 */ + 13231 "10110010" // /* MW 7 */ + 13232 "00001101" // /* MW 6 */ + 13233 "00000000" // /* MW 5 */ + 13234 "00000000" // /* MW 4 */ + 13235 "11010000" // /* MW 3 */ + 13236 "10010110" // /* MW 2 */ + 13237 "01100000" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 2 "conv2d_dw_bf16.h" 268 12 first + 13238 "10111010" // NOPA; MOVX r1, #32; VEXTBCST.128 x10, x3, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13239 "10101000" // /* MW 9 */ + 13240 "00000001" // /* MW 8 */ + 13241 "10001110" // /* MW 7 */ + 13242 "00001010" // /* MW 6 */ + 13243 "00010100" // /* MW 5 */ + 13244 "00000000" // /* MW 4 */ + 13245 "11110000" // /* MW 3 */ + 13246 "00101100" // /* MW 2 */ + 13247 "00000000" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 2 "conv2d_dw_bf16.h" 270 12 +.src_ref 2 "conv2d_dw_bf16.h" 271 12 +.src_ref 2 "conv2d_dw_bf16.h" 272 12 +.src_ref 2 "conv2d_dw_bf16.h" 273 12 +.src_ref 2 "conv2d_dw_bf16.h" 274 12 +.src_ref 2 "conv2d_dw_bf16.h" 275 12 +.src_ref 2 "conv2d_dw_bf16.h" 276 12 +.src_ref 2 "conv2d_dw_bf16.h" 277 12 + 13248 "11100001" // MOVA r17, #60; NOPB; NOPS; MOVX r3, #48; VBCST.16 x0, r7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13249 "00000000" // /* MW 15 */ + 13250 "00000000" // /* MW 14 */ + 13251 "01111000" // /* MW 13 */ + 13252 "10111001" // /* MW 12 */ + 13253 "00001110" // /* MW 11 */ + 13254 "00001000" // /* MW 10 */ + 13255 "00110110" // /* MW 9 */ + 13256 "00000000" // /* MW 8 */ + 13257 "01011011" // /* MW 7 */ + 13258 "00000001" // /* MW 6 */ + 13259 "00100000" // /* MW 5 */ + 13260 "00000000" // /* MW 4 */ + 13261 "00000000" // /* MW 3 */ + 13262 "10010001" // /* MW 2 */ + 13263 "00000111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_dw_bf16.h" 265 12 first +.src_ref 2 "conv2d_dw_bf16.h" 270 12 first + 13264 "00001011" // NOPA; NOPB; MOVS dc0, dc4; MOVX crRnd, r6; VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13265 "01101010" // /* MW 15 */ + 13266 "01100011" // /* MW 14 */ + 13267 "10101100" // /* MW 13 */ + 13268 "00000011" // /* MW 12 */ + 13269 "00001110" // /* MW 11 */ + 13270 "00000010" // /* MW 10 */ + 13271 "11010100" // /* MW 9 */ + 13272 "00001101" // /* MW 8 */ + 13273 "01001011" // /* MW 7 */ + 13274 "00010000" // /* MW 6 */ + 13275 "00100000" // /* MW 5 */ + 13276 "00000000" // /* MW 4 */ + 13277 "11110000" // /* MW 3 */ + 13278 "00101100" // /* MW 2 */ + 13279 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 2 "conv2d_dw_bf16.h" 250 8 first +.src_ref 2 "conv2d_dw_bf16.h" 274 12 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 13280 "00001011" // LDA p2, [p2]; NOPB; MOVS dc5, dc4; ADD r2, r2, #-2; ADD.NC lc, r4, #-1; VMAC.f dm1, dm0, x1, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13281 "00011010" // /* MW 15 */ + 13282 "01001000" // /* MW 14 */ + 13283 "11001100" // /* MW 13 */ + 13284 "00111111" // /* MW 12 */ + 13285 "10111001" // /* MW 11 */ + 13286 "11011010" // /* MW 10 */ + 13287 "00101111" // /* MW 9 */ + 13288 "00000100" // /* MW 8 */ + 13289 "01001011" // /* MW 7 */ + 13290 "00010000" // /* MW 6 */ + 13291 "00100101" // /* MW 5 */ + 13292 "00000000" // /* MW 4 */ + 13293 "11010000" // /* MW 3 */ + 13294 "10100011" // /* MW 2 */ + 13295 "01000000" // /* MW 1 */ +.label ZLS_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_272 +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 142 18 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt +.loop_nesting 1 + 13296 "10111010" // VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13297 "01101110" // /* MW 9 */ + 13298 "10000001" // /* MW 8 */ + 13299 "10000100" // /* MW 7 */ + 13300 "00000010" // /* MW 6 */ + 13301 "11110100" // /* MW 5 */ + 13302 "11110000" // /* MW 4 */ + 13303 "01110001" // /* MW 3 */ + 13304 "10110011" // /* MW 2 */ + 13305 "00000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "conv2d_dw_bf16.h" 266 12 first +.src_ref 2 "conv2d_dw_bf16.h" 271 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13306 "01001010" // VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13307 "00000001" // /* MW 9 */ + 13308 "10001001" // /* MW 8 */ + 13309 "10001010" // /* MW 7 */ + 13310 "01000110" // /* MW 6 */ + 13311 "00001011" // /* MW 5 */ + 13312 "10011100" // /* MW 4 */ + 13313 "11101010" // /* MW 3 */ + 13314 "00111000" // /* MW 2 */ + 13315 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 275 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13316 "01001010" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13317 "00000001" // /* MW 9 */ + 13318 "00110101" // /* MW 8 */ + 13319 "10001001" // /* MW 7 */ + 13320 "11000110" // /* MW 6 */ + 13321 "10000110" // /* MW 5 */ + 13322 "00110000" // /* MW 4 */ + 13323 "01101010" // /* MW 3 */ + 13324 "10110001" // /* MW 2 */ + 13325 "00000000" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13326 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13327 "00000110" // /* MW 3 */ + 13328 "10001001" // /* MW 2 */ + 13329 "00011101" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 2 "conv2d_dw_bf16.h" 272 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13330 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13331 "10100001" // /* MW 7 */ + 13332 "01001000" // /* MW 6 */ + 13333 "10001100" // /* MW 5 */ + 13334 "11000110" // /* MW 4 */ + 13335 "10001110" // /* MW 3 */ + 13336 "10110000" // /* MW 2 */ + 13337 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 267 12 first +.src_ref 2 "conv2d_dw_bf16.h" 276 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13338 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13339 "10100001" // /* MW 7 */ + 13340 "00110110" // /* MW 6 */ + 13341 "10001010" // /* MW 5 */ + 13342 "01000110" // /* MW 4 */ + 13343 "00001111" // /* MW 3 */ + 13344 "10011100" // /* MW 2 */ + 13345 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13346 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13347 "00001110" // /* MW 3 */ + 13348 "10001001" // /* MW 2 */ + 13349 "00011101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 268 12 first +.src_ref 2 "conv2d_dw_bf16.h" 273 12 first + 13350 "01100010" // VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13351 "11100001" // /* MW 7 */ + 13352 "10010010" // /* MW 6 */ + 13353 "10001011" // /* MW 5 */ + 13354 "01000110" // /* MW 4 */ + 13355 "00000011" // /* MW 3 */ + 13356 "00011100" // /* MW 2 */ + 13357 "00000101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 265 12 first +.src_ref 2 "conv2d_dw_bf16.h" 277 12 first + 13358 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13359 "11100001" // /* MW 7 */ + 13360 "01010110" // /* MW 6 */ + 13361 "10001000" // /* MW 5 */ + 13362 "01000110" // /* MW 4 */ + 13363 "00000111" // /* MW 3 */ + 13364 "00011100" // /* MW 2 */ + 13365 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first + 13366 "10111010" // NOPA; NOPB; VSHIFT x4, x6, x1, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13367 "01101110" // /* MW 9 */ + 13368 "01000001" // /* MW 8 */ + 13369 "00011000" // /* MW 7 */ + 13370 "00000001" // /* MW 6 */ + 13371 "00010000" // /* MW 5 */ + 13372 "00000000" // /* MW 4 */ + 13373 "11110000" // /* MW 3 */ + 13374 "00101100" // /* MW 2 */ + 13375 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 270 12 first + 13376 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm4, dm3, x6, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13377 "01101010" // /* MW 15 */ + 13378 "01100011" // /* MW 14 */ + 13379 "01111100" // /* MW 13 */ + 13380 "10100101" // /* MW 12 */ + 13381 "00000001" // /* MW 11 */ + 13382 "00000000" // /* MW 10 */ + 13383 "00000000" // /* MW 9 */ + 13384 "00000000" // /* MW 8 */ + 13385 "01011011" // /* MW 7 */ + 13386 "00000001" // /* MW 6 */ + 13387 "00100000" // /* MW 5 */ + 13388 "00000000" // /* MW 4 */ + 13389 "11110000" // /* MW 3 */ + 13390 "00101100" // /* MW 2 */ + 13391 "00000000" // /* MW 1 */ +.label ZLE_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_368 +.src_ref 2 "conv2d_dw_bf16.h" 274 12 first +.end_of_loop +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 13392 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13393 "00011010" // /* MW 15 */ + 13394 "01001000" // /* MW 14 */ + 13395 "01111100" // /* MW 13 */ + 13396 "10100101" // /* MW 12 */ + 13397 "00000001" // /* MW 11 */ + 13398 "00000000" // /* MW 10 */ + 13399 "00000000" // /* MW 9 */ + 13400 "00000000" // /* MW 8 */ + 13401 "01011011" // /* MW 7 */ + 13402 "00000001" // /* MW 6 */ + 13403 "00100000" // /* MW 5 */ + 13404 "00000000" // /* MW 4 */ + 13405 "11110000" // /* MW 3 */ + 13406 "00101100" // /* MW 2 */ + 13407 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "shuffle.hpp" 142 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 13408 "10111010" // PADDA.3D [p0], d0; PADDB.2D [p4], d3; VSHIFT x10, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13409 "01101110" // /* MW 9 */ + 13410 "10000001" // /* MW 8 */ + 13411 "10000100" // /* MW 7 */ + 13412 "00000010" // /* MW 6 */ + 13413 "10010000" // /* MW 5 */ + 13414 "01110011" // /* MW 4 */ + 13415 "11110100" // /* MW 3 */ + 13416 "00001100" // /* MW 2 */ + 13417 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 266 12 first +.src_ref 2 "conv2d_dw_bf16.h" 271 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13418 "01100010" // VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13419 "00000001" // /* MW 7 */ + 13420 "10001001" // /* MW 6 */ + 13421 "10001010" // /* MW 5 */ + 13422 "01000110" // /* MW 4 */ + 13423 "00001011" // /* MW 3 */ + 13424 "10011100" // /* MW 2 */ + 13425 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 275 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13426 "01100010" // VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13427 "00000001" // /* MW 7 */ + 13428 "00110101" // /* MW 6 */ + 13429 "10001001" // /* MW 5 */ + 13430 "11000110" // /* MW 4 */ + 13431 "10000110" // /* MW 3 */ + 13432 "00110000" // /* MW 2 */ + 13433 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13434 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13435 "00000110" // /* MW 3 */ + 13436 "10001001" // /* MW 2 */ + 13437 "00011101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 267 12 first +.src_ref 2 "conv2d_dw_bf16.h" 272 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13438 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13439 "10100001" // /* MW 7 */ + 13440 "01001000" // /* MW 6 */ + 13441 "10001100" // /* MW 5 */ + 13442 "01000110" // /* MW 4 */ + 13443 "00001111" // /* MW 3 */ + 13444 "10011100" // /* MW 2 */ + 13445 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 276 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13446 "01001010" // NOPA; VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13447 "10100001" // /* MW 9 */ + 13448 "00110110" // /* MW 8 */ + 13449 "10001010" // /* MW 7 */ + 13450 "11000010" // /* MW 6 */ + 13451 "10001110" // /* MW 5 */ + 13452 "10110000" // /* MW 4 */ + 13453 "11110100" // /* MW 3 */ + 13454 "00101100" // /* MW 2 */ + 13455 "00000000" // /* MW 1 */ +.label TGT_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_432 +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 142 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 13456 "10110100" // VLDB.2D x3, [p1], d7; VSHIFT x11, x1, x2, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13457 "00011101" // /* MW 5 */ + 13458 "00010010" // /* MW 4 */ + 13459 "10001011" // /* MW 3 */ + 13460 "00011110" // /* MW 2 */ + 13461 "00111110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 250 8 first +.src_ref 2 "conv2d_dw_bf16.h" 273 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13462 "01011010" // MOVXM le, #13632; VMAC.f dm3, dm4, x9, x7, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13463 "11100001" // /* MW 9 */ + 13464 "10010010" // /* MW 8 */ + 13465 "10001011" // /* MW 7 */ + 13466 "00000010" // /* MW 6 */ + 13467 "01010100" // /* MW 5 */ + 13468 "10110111" // /* MW 4 */ + 13469 "00000001" // /* MW 3 */ + 13470 "00000000" // /* MW 2 */ + 13471 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_dw_bf16.h" 250 8 +.src_ref 2 "conv2d_dw_bf16.h" 277 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13472 "01000110" // VLDA.CONV.fp32.bf16 cml0, [p4]; MOVXM ls, #13552; VMAC.f dm0, dm2, x11, x7, r17 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 13473 "11100001" // /* MW 11 */ + 13474 "01010110" // /* MW 10 */ + 13475 "10001000" // /* MW 9 */ + 13476 "00000010" // /* MW 8 */ + 13477 "01001111" // /* MW 7 */ + 13478 "10001111" // /* MW 6 */ + 13479 "00000001" // /* MW 5 */ + 13480 "00000000" // /* MW 4 */ + 13481 "01110000" // /* MW 3 */ + 13482 "10000101" // /* MW 2 */ + 13483 "10000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 250 8 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13484 "10011000" // ADD.NC lc, r4, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13485 "01111111" // /* MW 3 */ + 13486 "01110010" // /* MW 2 */ + 13487 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13488 "10011000" // VLDA x6, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13489 "10011011" // /* MW 3 */ + 13490 "00011101" // /* MW 2 */ + 13491 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13492 "00011000" // VLDB x1, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13493 "01110100" // /* MW 3 */ + 13494 "00011100" // /* MW 2 */ + 13495 "00111000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13496 "00011000" // VLDB.3D x2, [p0], d2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13497 "10110100" // /* MW 3 */ + 13498 "01011000" // /* MW 2 */ + 13499 "00111000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1110 102 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13500 "00011000" // VCONV.bf16.fp32 x10, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13501 "10010110" // /* MW 3 */ + 13502 "00010001" // /* MW 2 */ + 13503 "00001101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1110 102 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13504 "00011000" // VCONV.bf16.fp32 x6, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13505 "00010110" // /* MW 3 */ + 13506 "00010000" // /* MW 2 */ + 13507 "00001011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13508 "11111000" // VMAX_LT.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13509 "01101100" // /* MW 3 */ + 13510 "01010000" // /* MW 2 */ + 13511 "00011100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 286 17 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13512 "01111000" // VSHUFFLE x10, x10, x6, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13513 "00010100" // /* MW 3 */ + 13514 "01010011" // /* MW 2 */ + 13515 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 2 "conv2d_dw_bf16.h" 285 16 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13516 "00000010" // VST x8, [p2], m4; VMAX_LT.bf16 x10, r16, x10, x0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13517 "01110000" // /* MW 7 */ + 13518 "00110110" // /* MW 6 */ + 13519 "10101000" // /* MW 5 */ + 13520 "00000010" // /* MW 4 */ + 13521 "01100000" // /* MW 3 */ + 13522 "01000010" // /* MW 2 */ + 13523 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 268 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13524 "01011000" // VEXTBCST.128 x10, x3, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13525 "00000011" // /* MW 3 */ + 13526 "00011100" // /* MW 2 */ + 13527 "00011101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 2 "conv2d_dw_bf16.h" 270 12 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13528 "00000010" // VST.3D x10, [p2], d1; VMOV cml3, cml0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13529 "01110000" // /* MW 7 */ + 13530 "01000101" // /* MW 6 */ + 13531 "10000000" // /* MW 5 */ + 13532 "00000001" // /* MW 4 */ + 13533 "01100000" // /* MW 3 */ + 13534 "01010010" // /* MW 2 */ + 13535 "01000111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 265 12 first +.src_ref 2 "conv2d_dw_bf16.h" 270 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13536 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13537 "01000001" // /* MW 7 */ + 13538 "01101101" // /* MW 6 */ + 13539 "10001100" // /* MW 5 */ + 13540 "01000110" // /* MW 4 */ + 13541 "00000111" // /* MW 3 */ + 13542 "00011100" // /* MW 2 */ + 13543 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 274 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13544 "01100010" // VSHIFT x4, x6, x1, r0; VMAC.f dm1, dm0, x1, x10, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13545 "01000001" // /* MW 7 */ + 13546 "00000011" // /* MW 6 */ + 13547 "10001001" // /* MW 5 */ + 13548 "11000110" // /* MW 4 */ + 13549 "10000010" // /* MW 3 */ + 13550 "00110000" // /* MW 2 */ + 13551 "00000010" // /* MW 1 */ +.label ZLS_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_528 +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 142 18 +.begin_of_loop +.aggressive_scheduled_block_id 2 +.noswbrkpt +.loop_nesting 2 + 13552 "10111010" // VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13553 "01101110" // /* MW 9 */ + 13554 "10000001" // /* MW 8 */ + 13555 "10000100" // /* MW 7 */ + 13556 "00000010" // /* MW 6 */ + 13557 "11110100" // /* MW 5 */ + 13558 "11110000" // /* MW 4 */ + 13559 "01110001" // /* MW 3 */ + 13560 "10110011" // /* MW 2 */ + 13561 "00000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "conv2d_dw_bf16.h" 266 12 first +.src_ref 2 "conv2d_dw_bf16.h" 271 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13562 "01001010" // VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13563 "00000001" // /* MW 9 */ + 13564 "10001001" // /* MW 8 */ + 13565 "10001010" // /* MW 7 */ + 13566 "01000110" // /* MW 6 */ + 13567 "00001011" // /* MW 5 */ + 13568 "10011100" // /* MW 4 */ + 13569 "11101010" // /* MW 3 */ + 13570 "00111000" // /* MW 2 */ + 13571 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 275 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13572 "01001010" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13573 "00000001" // /* MW 9 */ + 13574 "00110101" // /* MW 8 */ + 13575 "10001001" // /* MW 7 */ + 13576 "11000110" // /* MW 6 */ + 13577 "10000110" // /* MW 5 */ + 13578 "00110000" // /* MW 4 */ + 13579 "01101010" // /* MW 3 */ + 13580 "10110001" // /* MW 2 */ + 13581 "00000000" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13582 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13583 "00000110" // /* MW 3 */ + 13584 "10001001" // /* MW 2 */ + 13585 "00011101" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 2 "conv2d_dw_bf16.h" 272 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13586 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13587 "10100001" // /* MW 7 */ + 13588 "01001000" // /* MW 6 */ + 13589 "10001100" // /* MW 5 */ + 13590 "11000110" // /* MW 4 */ + 13591 "10001110" // /* MW 3 */ + 13592 "10110000" // /* MW 2 */ + 13593 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 267 12 first +.src_ref 2 "conv2d_dw_bf16.h" 276 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13594 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13595 "10100001" // /* MW 7 */ + 13596 "00110110" // /* MW 6 */ + 13597 "10001010" // /* MW 5 */ + 13598 "01000110" // /* MW 4 */ + 13599 "00001111" // /* MW 3 */ + 13600 "10011100" // /* MW 2 */ + 13601 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13602 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13603 "00001110" // /* MW 3 */ + 13604 "10001001" // /* MW 2 */ + 13605 "00011101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 268 12 first +.src_ref 2 "conv2d_dw_bf16.h" 273 12 first + 13606 "01100010" // VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13607 "11100001" // /* MW 7 */ + 13608 "10010010" // /* MW 6 */ + 13609 "10001011" // /* MW 5 */ + 13610 "01000110" // /* MW 4 */ + 13611 "00000011" // /* MW 3 */ + 13612 "00011100" // /* MW 2 */ + 13613 "00000101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 265 12 first +.src_ref 2 "conv2d_dw_bf16.h" 277 12 first + 13614 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13615 "11100001" // /* MW 7 */ + 13616 "01010110" // /* MW 6 */ + 13617 "10001000" // /* MW 5 */ + 13618 "01000110" // /* MW 4 */ + 13619 "00000111" // /* MW 3 */ + 13620 "00011100" // /* MW 2 */ + 13621 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first + 13622 "10010100" // NOPA; VSHIFT x4, x6, x1, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13623 "00000101" // /* MW 5 */ + 13624 "01100001" // /* MW 4 */ + 13625 "11110100" // /* MW 3 */ + 13626 "00101100" // /* MW 2 */ + 13627 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 270 12 first + 13628 "01001000" // VMAC.f dm4, dm3, x6, x10, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13629 "01000001" // /* MW 3 */ + 13630 "01101101" // /* MW 2 */ + 13631 "10001100" // /* MW 1 */ +.label ZLE_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_608 +.src_ref 2 "conv2d_dw_bf16.h" 274 12 first +.end_of_loop +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 13632 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13633 "00011010" // /* MW 15 */ + 13634 "01001000" // /* MW 14 */ + 13635 "01111100" // /* MW 13 */ + 13636 "10100101" // /* MW 12 */ + 13637 "00000001" // /* MW 11 */ + 13638 "00000000" // /* MW 10 */ + 13639 "00000000" // /* MW 9 */ + 13640 "00000000" // /* MW 8 */ + 13641 "01011011" // /* MW 7 */ + 13642 "00000001" // /* MW 6 */ + 13643 "00100000" // /* MW 5 */ + 13644 "00000000" // /* MW 4 */ + 13645 "11110000" // /* MW 3 */ + 13646 "00101100" // /* MW 2 */ + 13647 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 244 4 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 13648 "10110110" // PADDA.3D [p0], d0; PADDB.2D [p4], d3; JNZD r2, r2, p3; VSHIFT x10, x1, x2, r0 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 13649 "01101000" // /* MW 11 */ + 13650 "10000001" // /* MW 10 */ + 13651 "10000100" // /* MW 9 */ + 13652 "00000010" // /* MW 8 */ + 13653 "00100111" // /* MW 7 */ + 13654 "00000100" // /* MW 6 */ + 13655 "00100000" // /* MW 5 */ + 13656 "11100111" // /* MW 4 */ + 13657 "11111000" // /* MW 3 */ + 13658 "00001100" // /* MW 2 */ + 13659 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 266 12 first +.src_ref 2 "conv2d_dw_bf16.h" 271 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13660 "01100010" // VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13661 "00000001" // /* MW 7 */ + 13662 "10001001" // /* MW 6 */ + 13663 "10001010" // /* MW 5 */ + 13664 "01000110" // /* MW 4 */ + 13665 "00001011" // /* MW 3 */ + 13666 "10011100" // /* MW 2 */ + 13667 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 275 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13668 "01100010" // VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13669 "00000001" // /* MW 7 */ + 13670 "00110101" // /* MW 6 */ + 13671 "10001001" // /* MW 5 */ + 13672 "11000110" // /* MW 4 */ + 13673 "10000110" // /* MW 3 */ + 13674 "00110000" // /* MW 2 */ + 13675 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13676 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13677 "00000110" // /* MW 3 */ + 13678 "10001001" // /* MW 2 */ + 13679 "00011101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 267 12 first +.src_ref 2 "conv2d_dw_bf16.h" 272 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13680 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13681 "10100001" // /* MW 7 */ + 13682 "01001000" // /* MW 6 */ + 13683 "10001100" // /* MW 5 */ + 13684 "01000110" // /* MW 4 */ + 13685 "00001111" // /* MW 3 */ + 13686 "10011100" // /* MW 2 */ + 13687 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 276 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13688 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13689 "10100001" // /* MW 7 */ + 13690 "00110110" // /* MW 6 */ + 13691 "10001010" // /* MW 5 */ + 13692 "11000110" // /* MW 4 */ + 13693 "10001110" // /* MW 3 */ + 13694 "10110000" // /* MW 2 */ + 13695 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 13696 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13697 "00001110" // /* MW 3 */ + 13698 "10001001" // /* MW 2 */ + 13699 "00011101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 273 12 first + 13700 "01001000" // VMAC.f dm3, dm4, x9, x7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13701 "11100001" // /* MW 3 */ + 13702 "10010010" // /* MW 2 */ + 13703 "10001011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 277 12 first + 13704 "01001000" // VMAC.f dm0, dm2, x11, x7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13705 "11100001" // /* MW 3 */ + 13706 "01010110" // /* MW 2 */ + 13707 "10001000" // /* MW 1 */ + 13708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13709 "00000000" // /* MW 1 */ + 13710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13711 "00000000" // /* MW 1 */ + 13712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13713 "00000000" // /* MW 1 */ + 13714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13715 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1110 102 first + 13716 "00011000" // VCONV.bf16.fp32 x10, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13717 "10010110" // /* MW 3 */ + 13718 "00010001" // /* MW 2 */ + 13719 "00001101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_dw_bf16.h" 290 first + 13720 "01011100" // VCONV.bf16.fp32 x6, cml0; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 13721 "00000000" // /* MW 5 */ + 13722 "01010000" // /* MW 4 */ + 13723 "11000000" // /* MW 3 */ + 13724 "00000010" // /* MW 2 */ + 13725 "01100010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 13726 "11111000" // VMAX_LT.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13727 "01101100" // /* MW 3 */ + 13728 "01010000" // /* MW 2 */ + 13729 "00011100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 286 17 first +.delay_slot + 13730 "01111000" // VSHUFFLE x10, x10, x6, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13731 "00010100" // /* MW 3 */ + 13732 "01010011" // /* MW 2 */ + 13733 "00011101" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 13734 "11111000" // VMAX_LT.bf16 x10, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13735 "01101100" // /* MW 3 */ + 13736 "01010000" // /* MW 2 */ + 13737 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 2 "conv2d_dw_bf16.h" 285 16 first +.delay_slot + 13738 "00011000" // VST x8, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13739 "00010011" // /* MW 3 */ + 13740 "10001010" // /* MW 2 */ + 13741 "00001010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1159 33 +.delay_slot + 13742 "00011000" // VST.3D x10, [p2], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13743 "10010011" // /* MW 3 */ + 13744 "00111010" // /* MW 2 */ +.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params__end +.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_end0 + 13745 "00001010" // /* MW 1 */ +.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_conv2d_dwc _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 7 "superkernels.cpp" 444 first +.src_ref 7 "superkernels.cpp" 449 6 +.function_start + 13760 "01000100" // MOVXM p4, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13761 "10000000" // /* MW 5 */ + 13762 "11001000" // /* MW 4 */ + 13763 "11001000" // /* MW 3 */ + 13764 "00000111" // /* MW 2 */ + 13765 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 449 6 first + 13766 "11010100" // LDA r16, [p4]; MOV r0, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13767 "01000001" // /* MW 5 */ + 13768 "00101111" // /* MW 4 */ + 13769 "11010000" // /* MW 3 */ + 13770 "11000010" // /* MW 2 */ + 13771 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 444 + 13772 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13773 "00000001" // /* MW 5 */ + 13774 "00000000" // /* MW 4 */ + 13775 "00000000" // /* MW 3 */ + 13776 "00010000" // /* MW 2 */ + 13777 "00000000" // /* MW 1 */ + 13778 "00000010" // ST r14, [sp, #-8]; MOV r17, CORE_ID /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13779 "01110000" // /* MW 7 */ + 13780 "01110000" // /* MW 6 */ + 13781 "00101101" // /* MW 5 */ + 13782 "00000010" // /* MW 4 */ + 13783 "10110000" // /* MW 3 */ + 13784 "00111010" // /* MW 2 */ + 13785 "11111111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 467 + 13786 "00000010" // ST r13, [sp, #-4]; MOV r13, lr /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13787 "01110000" // /* MW 7 */ + 13788 "11110000" // /* MW 6 */ + 13789 "10101000" // /* MW 5 */ + 13790 "00000001" // /* MW 4 */ + 13791 "10110000" // /* MW 3 */ + 13792 "10110110" // /* MW 2 */ + 13793 "11111111" // /* MW 1 */ + 13794 "10011000" // ST p0, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13795 "00011101" // /* MW 3 */ + 13796 "11101100" // /* MW 2 */ + 13797 "00001111" // /* MW 1 */ + 13798 "10011000" // ST p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13799 "10011101" // /* MW 3 */ + 13800 "11110111" // /* MW 2 */ + 13801 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 452 44 + 13802 "00000010" // ST r0, [sp, #-16]; MOV r14, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13803 "01110000" // /* MW 7 */ + 13804 "01100000" // /* MW 6 */ + 13805 "11001010" // /* MW 5 */ + 13806 "00000001" // /* MW 4 */ + 13807 "10110000" // /* MW 3 */ + 13808 "00000010" // /* MW 2 */ + 13809 "11111110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 449 6 +.src_ref 7 "superkernels.cpp" 449 16 + 13810 "10000100" // JNZ r16, #13936 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=13936 delay_slots=5 */ + 13811 "00000001" // /* MW 5 */ + 13812 "01000000" // /* MW 4 */ + 13813 "00111000" // /* MW 3 */ + 13814 "00011011" // /* MW 2 */ + 13815 "10000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 +.delay_slot + 13816 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13817 "11000000" // /* MW 3 */ + 13818 "11010110" // /* MW 2 */ + 13819 "00011011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 446 22 first +.delay_slot + 13820 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13821 "10010000" // /* MW 3 */ + 13822 "01100010" // /* MW 2 */ + 13823 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 446 30 +.delay_slot + 13824 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13825 "11111011" // /* MW 3 */ + 13826 "01100011" // /* MW 2 */ + 13827 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 446 11 +.delay_slot + 13828 "01000100" // MOVXM p3, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13829 "10100000" // /* MW 5 */ + 13830 "11001000" // /* MW 4 */ + 13831 "11000110" // /* MW 3 */ + 13832 "00000111" // /* MW 2 */ + 13833 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 446 11 +.delay_slot + 13834 "10011000" // ST r17, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13835 "00110001" // /* MW 3 */ + 13836 "00000110" // /* MW 2 */ + 13837 "00001011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 461 2 +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 13838 "00111010" // MOVS p7, p1; MOVXM p1, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13839 "00010001" // /* MW 9 */ + 13840 "00110100" // /* MW 8 */ + 13841 "10110010" // /* MW 7 */ + 13842 "11110000" // /* MW 6 */ + 13843 "00000001" // /* MW 5 */ + 13844 "00000000" // /* MW 4 */ + 13845 "01100000" // /* MW 3 */ + 13846 "10010001" // /* MW 2 */ + 13847 "11110000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 451 4 +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 13848 "01110110" // ST.s8 r16, [p1]; MOVS p0, p2; MOVXM p1, #509028 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 13849 "00010000" // /* MW 11 */ + 13850 "00110010" // /* MW 10 */ + 13851 "10110010" // /* MW 9 */ + 13852 "11110000" // /* MW 8 */ + 13853 "00000001" // /* MW 7 */ + 13854 "00000000" // /* MW 6 */ + 13855 "10001011" // /* MW 5 */ + 13856 "10001000" // /* MW 4 */ + 13857 "11100000" // /* MW 3 */ + 13858 "11000000" // /* MW 2 */ + 13859 "00100000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13861 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 451 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 13862 "00000100" // JL #12352 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12352 delay_slots=5 */ + 13863 "00000001" // /* MW 5 */ + 13864 "00000000" // /* MW 4 */ + 13865 "00100000" // /* MW 3 */ + 13866 "00011000" // /* MW 2 */ + 13867 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13869 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13871 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13872 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13873 "00110001" // /* MW 3 */ + 13874 "00100000" // /* MW 2 */ + 13875 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 13876 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13877 "00000101" // /* MW 3 */ + 13878 "00100000" // /* MW 2 */ + 13879 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 13880 "00000010" // ST r16, [p1]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13881 "01110000" // /* MW 7 */ + 13882 "10100101" // /* MW 6 */ + 13883 "00000001" // /* MW 5 */ + 13884 "00000000" // /* MW 4 */ + 13885 "00110000" // /* MW 3 */ + 13886 "11000010" // /* MW 2 */ + 13887 "00100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 452 44 +.src_ref 7 "superkernels.cpp" 461 2 +.return_address + 13888 "00000010" // MOVS p1, p7; ADD.NC p2, r14, #8 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13889 "00000000" // /* MW 7 */ + 13890 "10000010" // /* MW 6 */ + 13891 "00110011" // /* MW 5 */ + 13892 "00000001" // /* MW 4 */ + 13893 "01100000" // /* MW 3 */ + 13894 "10010001" // /* MW 2 */ + 13895 "00110011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 452 17 first + 13896 "10011000" // LDA.u16 r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13897 "00111010" // /* MW 3 */ + 13898 "00000110" // /* MW 2 */ + 13899 "00000010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 453 13 +.src_ref 7 "superkernels.cpp" 453 15 first + 13900 "10111010" // LDA.u16 r16, [p2, #4]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13901 "00010000" // /* MW 9 */ + 13902 "00110000" // /* MW 8 */ + 13903 "00110010" // /* MW 7 */ + 13904 "11110001" // /* MW 6 */ + 13905 "00000001" // /* MW 5 */ + 13906 "00000000" // /* MW 4 */ + 13907 "01010000" // /* MW 3 */ + 13908 "11000011" // /* MW 2 */ + 13909 "01000100" // /* MW 1 */ + 13910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13911 "00000000" // /* MW 1 */ + 13912 "10000100" // J #13952 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=13952 delay_slots=5 */ + 13913 "00000000" // /* MW 5 */ + 13914 "00000000" // /* MW 4 */ + 13915 "01000000" // /* MW 3 */ + 13916 "00011011" // /* MW 2 */ + 13917 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 452 15 +.src_ref 7 "superkernels.cpp" 457 26 +.delay_slot + 13918 "01000100" // MOVXM p3, #509016 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13919 "10110000" // /* MW 5 */ + 13920 "11001000" // /* MW 4 */ + 13921 "11000110" // /* MW 3 */ + 13922 "00000111" // /* MW 2 */ + 13923 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13925 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13927 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 452 15 first +.delay_slot + 13928 "10011000" // ST r17, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13929 "00110001" // /* MW 3 */ + 13930 "00000110" // /* MW 2 */ + 13931 "00001011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 453 13 first +.delay_slot + 13932 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13933 "00010001" // /* MW 3 */ + 13934 "00000110" // /* MW 2 */ + 13935 "00001010" // /* MW 1 */ +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_176 +.src_ref 7 "superkernels.cpp" 457 26 + 13936 "11100001" // NOPA; NOPB; NOPS; MOVXM p3, #509016; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13937 "00000000" // /* MW 15 */ + 13938 "00000000" // /* MW 14 */ + 13939 "00010000" // /* MW 13 */ + 13940 "00101100" // /* MW 12 */ + 13941 "10110010" // /* MW 11 */ + 13942 "11110001" // /* MW 10 */ + 13943 "00000001" // /* MW 9 */ + 13944 "00000000" // /* MW 8 */ + 13945 "01011011" // /* MW 7 */ + 13946 "00000001" // /* MW 6 */ + 13947 "00100000" // /* MW 5 */ + 13948 "00000000" // /* MW 4 */ + 13949 "11110000" // /* MW 3 */ + 13950 "00101100" // /* MW 2 */ + 13951 "00000000" // /* MW 1 */ +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_192 +.src_ref 1 "io_buffer_main.h" 218 49 first + 13952 "00011000" // ADD.NC p2, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13953 "10000110" // /* MW 3 */ + 13954 "01100111" // /* MW 2 */ + 13955 "00011010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 457 15 +.src_ref 1 "io_buffer_main.h" 218 49 + 13956 "10111010" // LDA r27, [p2], #-4; MOVXM p4, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13957 "00010000" // /* MW 9 */ + 13958 "00101000" // /* MW 8 */ + 13959 "00110010" // /* MW 7 */ + 13960 "11110010" // /* MW 6 */ + 13961 "00000001" // /* MW 5 */ + 13962 "00000000" // /* MW 4 */ + 13963 "11010000" // /* MW 3 */ + 13964 "11101110" // /* MW 2 */ + 13965 "01011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 13966 "10011000" // LDA r16, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13967 "00010110" // /* MW 3 */ + 13968 "11111110" // /* MW 2 */ + 13969 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 13970 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13971 "00110110" // /* MW 3 */ + 13972 "11111110" // /* MW 2 */ + 13973 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 13974 "10011000" // LDA r18, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13975 "01010110" // /* MW 3 */ + 13976 "01000110" // /* MW 2 */ + 13977 "00000010" // /* MW 1 */ + 13978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13979 "00000000" // /* MW 1 */ + 13980 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13981 "00000000" // /* MW 1 */ + 13982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13983 "00000000" // /* MW 1 */ + 13984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13985 "00000000" // /* MW 1 */ + 13986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13987 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 13988 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13989 "00000010" // /* MW 3 */ + 13990 "01100001" // /* MW 2 */ + 13991 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 13992 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13993 "00010001" // /* MW 3 */ + 13994 "00000110" // /* MW 2 */ + 13995 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 + 13996 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13997 "11111101" // /* MW 3 */ + 13998 "11100000" // /* MW 2 */ + 13999 "00010111" // /* MW 1 */ + 14000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14001 "00000000" // /* MW 1 */ + 14002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14003 "00000000" // /* MW 1 */ + 14004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14005 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 14006 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14007 "00001000" // /* MW 3 */ + 14008 "10010011" // /* MW 2 */ + 14009 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 456 11 +.src_ref 7 "superkernels.cpp" 459 47 +.src_ref 7 "superkernels.cpp" 464 6 +.src_ref 7 "superkernels.cpp" 465 16 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 + 14010 "10111010" // MOVA r15, #1; MOVXM p7, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14011 "00010000" // /* MW 9 */ + 14012 "00100000" // /* MW 8 */ + 14013 "10110010" // /* MW 7 */ + 14014 "11110011" // /* MW 6 */ + 14015 "00000001" // /* MW 5 */ + 14016 "00000000" // /* MW 4 */ + 14017 "00000000" // /* MW 3 */ + 14018 "00101111" // /* MW 2 */ + 14019 "00000000" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 52 16 + 14020 "11100100" // MOVX r24, #0; MOV r16, sp /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14021 "11000001" // /* MW 5 */ + 14022 "00101011" // /* MW 4 */ + 14023 "00101000" // /* MW 3 */ + 14024 "00000000" // /* MW 2 */ + 14025 "00000110" // /* MW 1 */ + 14026 "00011000" // ADD.NC p0, r16, #-76 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14027 "01011010" // /* MW 3 */ + 14028 "01101000" // /* MW 2 */ + 14029 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 1 "io_buffer_main.h" 324 51 + 14030 "11010100" // LDA p5, [sp, #-20]; MOV r14, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14031 "10000001" // /* MW 5 */ + 14032 "00101001" // /* MW 4 */ + 14033 "00100111" // /* MW 3 */ + 14034 "11010011" // /* MW 2 */ + 14035 "11111101" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 457 15 first + 14036 "10011000" // LDA r17, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14037 "00110110" // /* MW 3 */ + 14038 "00000110" // /* MW 2 */ + 14039 "00000100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 457 26 +.src_ref 7 "superkernels.cpp" 461 2 + 14040 "10111010" // LDA r16, [p3]; MOVXM p3, #509888 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14041 "00010000" // /* MW 9 */ + 14042 "11100000" // /* MW 8 */ + 14043 "10110011" // /* MW 7 */ + 14044 "11110001" // /* MW 6 */ + 14045 "00000001" // /* MW 5 */ + 14046 "00000000" // /* MW 4 */ + 14047 "11010000" // /* MW 3 */ + 14048 "11000010" // /* MW 2 */ + 14049 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 456 11 first + 14050 "10011000" // LDA r18, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14051 "01010110" // /* MW 3 */ + 14052 "00000110" // /* MW 2 */ + 14053 "00000111" // /* MW 1 */ + 14054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14055 "00000000" // /* MW 1 */ + 14056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14057 "00000000" // /* MW 1 */ + 14058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14059 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 14060 "10011000" // LDA r19, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14061 "01110110" // /* MW 3 */ + 14062 "00000110" // /* MW 2 */ + 14063 "00000101" // /* MW 1 */ + 14064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14065 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 457 24 first + 14066 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14067 "00001111" // /* MW 3 */ + 14068 "01100001" // /* MW 2 */ + 14069 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 456 11 first + 14070 "00011000" // ADD r17, r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14071 "00000111" // /* MW 3 */ + 14072 "10100010" // /* MW 2 */ + 14073 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 459 47 first + 14074 "10011000" // LSHL r16, r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14075 "11111101" // /* MW 3 */ + 14076 "00100000" // /* MW 2 */ + 14077 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 461 2 first +.no_stack_arguments + 14078 "00000100" // JL #13024 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=13024 delay_slots=5 */ + 14079 "00000001" // /* MW 5 */ + 14080 "00000000" // /* MW 4 */ + 14081 "01110000" // /* MW 3 */ + 14082 "00011001" // /* MW 2 */ + 14083 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 456 11 first +.delay_slot + 14084 "10011000" // ST r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14085 "00110001" // /* MW 3 */ + 14086 "00000110" // /* MW 2 */ + 14087 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 459 47 first +.delay_slot + 14088 "01011000" // ADD.NC dn0, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14089 "11000001" // /* MW 3 */ + 14090 "01001001" // /* MW 2 */ + 14091 "00011000" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 201 10 first +.delay_slot + 14092 "10011000" // ST dn0, [sp, #-76] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14093 "00100101" // /* MW 3 */ + 14094 "10110100" // /* MW 2 */ + 14095 "00001111" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 52 16 first +.delay_slot + 14096 "10011000" // ST r24, [sp, #-72] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14097 "00010101" // /* MW 3 */ + 14098 "10111011" // /* MW 2 */ + 14099 "00001111" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 52 16 +.delay_slot + 14100 "00110110" // NOPA; NOPB; ST r24, [sp, #-68]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 14101 "11000001" // /* MW 11 */ + 14102 "10001010" // /* MW 10 */ + 14103 "11011111" // /* MW 9 */ + 14104 "00000011" // /* MW 8 */ + 14105 "00000000" // /* MW 7 */ + 14106 "00000000" // /* MW 6 */ + 14107 "00100000" // /* MW 5 */ + 14108 "00000000" // /* MW 4 */ + 14109 "11110000" // /* MW 3 */ + 14110 "00101100" // /* MW 2 */ + 14111 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 +.return_address + 14112 "00011000" // ADD.NC p2, r14, #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14113 "00001010" // /* MW 3 */ + 14114 "01100111" // /* MW 2 */ + 14115 "00011010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 first + 14116 "10011000" // LDA r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14117 "00010110" // /* MW 3 */ + 14118 "00000110" // /* MW 2 */ + 14119 "00000010" // /* MW 1 */ + 14120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14121 "00000000" // /* MW 1 */ + 14122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14123 "00000000" // /* MW 1 */ + 14124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14125 "00000000" // /* MW 1 */ + 14126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14127 "00000000" // /* MW 1 */ + 14128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14129 "00000000" // /* MW 1 */ + 14130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14131 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 14132 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14133 "11111000" // /* MW 3 */ + 14134 "00010000" // /* MW 2 */ + 14135 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 464 19 +.src_ref 1 "io_buffer_main.h" 327 40 first + 14136 "10111010" // LDA r16, [p2, #-8]; MOVXM p1, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14137 "00010000" // /* MW 9 */ + 14138 "00110000" // /* MW 8 */ + 14139 "10110010" // /* MW 7 */ + 14140 "11110000" // /* MW 6 */ + 14141 "00000001" // /* MW 5 */ + 14142 "00000000" // /* MW 4 */ + 14143 "11010000" // /* MW 3 */ + 14144 "11000010" // /* MW 2 */ + 14145 "01011100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 464 19 first + 14146 "10011000" // LDA r18, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14147 "01010110" // /* MW 3 */ + 14148 "00000110" // /* MW 2 */ + 14149 "00000001" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 464 6 + 14150 "10011000" // LDA r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14151 "00110110" // /* MW 3 */ + 14152 "00000110" // /* MW 2 */ + 14153 "00000111" // /* MW 1 */ + 14154 "00011000" // LDA p1, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14155 "10011001" // /* MW 3 */ + 14156 "11110100" // /* MW 2 */ + 14157 "00000111" // /* MW 1 */ + 14158 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14159 "11010001" // /* MW 3 */ + 14160 "11111001" // /* MW 2 */ + 14161 "00000111" // /* MW 1 */ + 14162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14163 "00000000" // /* MW 1 */ + 14164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14165 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 14166 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14167 "00000001" // /* MW 3 */ + 14168 "11100001" // /* MW 2 */ + 14169 "00010011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 14170 "10011000" // ST r16, [p2, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14171 "00010001" // /* MW 3 */ + 14172 "11100110" // /* MW 2 */ + 14173 "00001010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 464 16 first + 14174 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14175 "00101000" // /* MW 3 */ + 14176 "01100001" // /* MW 2 */ + 14177 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 464 6 + 14178 "10000100" // JNZ r16, #14208 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14208 delay_slots=5 */ + 14179 "00000001" // /* MW 5 */ + 14180 "01000000" // /* MW 4 */ + 14181 "11000000" // /* MW 3 */ + 14182 "00011011" // /* MW 2 */ + 14183 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 465 16 +.delay_slot + 14184 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14185 "00000001" // /* MW 3 */ + 14186 "00110000" // /* MW 2 */ + 14187 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14189 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14191 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14193 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14195 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 465 16 first + 14196 "00110110" // NOPA; NOPB; ST r24, [p7]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 14197 "11000001" // /* MW 11 */ + 14198 "10001000" // /* MW 10 */ + 14199 "10000011" // /* MW 9 */ + 14200 "00000011" // /* MW 8 */ + 14201 "00000000" // /* MW 7 */ + 14202 "00000000" // /* MW 6 */ + 14203 "00100000" // /* MW 5 */ + 14204 "00000000" // /* MW 4 */ + 14205 "11110000" // /* MW 3 */ + 14206 "00101100" // /* MW 2 */ + 14207 "00000000" // /* MW 1 */ +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 7 "superkernels.cpp" 467 + 14208 "11010100" // LDA r13, [sp, #-4]; MOV lr, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14209 "01000001" // /* MW 5 */ + 14210 "11101101" // /* MW 4 */ + 14211 "00101110" // /* MW 3 */ + 14212 "10110110" // /* MW 2 */ + 14213 "11111111" // /* MW 1 */ + 14214 "00011000" // LDA r15, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14215 "11110001" // /* MW 3 */ + 14216 "11110001" // /* MW 2 */ + 14217 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 467 first + 14218 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 14219 "00000000" // /* MW 3 */ + 14220 "00101000" // /* MW 2 */ + 14221 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 467 +.delay_slot + 14222 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14223 "00000001" // /* MW 5 */ + 14224 "00000000" // /* MW 4 */ + 14225 "00000000" // /* MW 3 */ + 14226 "11110000" // /* MW 2 */ + 14227 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14229 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14231 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14233 "00000000" // /* MW 1 */ +.delay_slot + 14234 "11111000" // MOV p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14235 "11000000" // /* MW 3 */ + 14236 "01100010" // /* MW 2 */ +.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 14237 "00011111" // /* MW 1 */ +.label __Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE___func_begin0 +.label _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE +.function superkernel_conv_eltbinary _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE +.src_ref 7 "superkernels.cpp" 578 +.src_ref 7 "superkernels.cpp" 578 first +.function_start + 14240 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14241 "00000001" // /* MW 5 */ + 14242 "00000000" // /* MW 4 */ + 14243 "00000000" // /* MW 3 */ + 14244 "00001000" // /* MW 2 */ + 14245 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 583 6 + 14246 "00111010" // ST p7, [sp, #-8]; MOVXM p7, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14247 "00010001" // /* MW 9 */ + 14248 "00100000" // /* MW 8 */ + 14249 "10110010" // /* MW 7 */ + 14250 "11110011" // /* MW 6 */ + 14251 "00000001" // /* MW 5 */ + 14252 "00000000" // /* MW 4 */ + 14253 "10110000" // /* MW 3 */ + 14254 "01110011" // /* MW 2 */ + 14255 "11111111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 583 6 first + 14256 "10111010" // LDA r16, [p7]; ST p6, [sp, #-4]; MOV r17, CORE_ID /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14257 "01110010" // /* MW 9 */ + 14258 "01110000" // /* MW 8 */ + 14259 "00101101" // /* MW 7 */ + 14260 "10000010" // /* MW 6 */ + 14261 "00011101" // /* MW 5 */ + 14262 "11111111" // /* MW 4 */ + 14263 "11010111" // /* MW 3 */ + 14264 "11000010" // /* MW 2 */ + 14265 "11100000" // /* MW 1 */ + 14266 "10011000" // ST p4, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14267 "00011101" // /* MW 3 */ + 14268 "11110110" // /* MW 2 */ + 14269 "00001111" // /* MW 1 */ + 14270 "10011000" // ST p2, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14271 "00011101" // /* MW 3 */ + 14272 "11110001" // /* MW 2 */ + 14273 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 587 4 +.src_ref 7 "superkernels.cpp" 590 35 +.src_ref 7 "superkernels.cpp" 599 105 +.src_ref 7 "superkernels.cpp" 629 34 + 14274 "00000010" // ST lr, [sp, #-20]; MOV p7, p3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 14275 "01110000" // /* MW 7 */ + 14276 "01100000" // /* MW 6 */ + 14277 "10110011" // /* MW 5 */ + 14278 "00000011" // /* MW 4 */ + 14279 "10110000" // /* MW 3 */ + 14280 "10000111" // /* MW 2 */ + 14281 "11111101" // /* MW 1 */ + 14282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14283 "00000000" // /* MW 1 */ + 14284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14285 "00000000" // /* MW 1 */ + 14286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14287 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 583 6 +.src_ref 7 "superkernels.cpp" 583 16 + 14288 "10000100" // JNZ r16, #14688 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14688 delay_slots=5 */ + 14289 "00000001" // /* MW 5 */ + 14290 "01000000" // /* MW 4 */ + 14291 "10110000" // /* MW 3 */ + 14292 "00011100" // /* MW 2 */ + 14293 "10000000" // /* MW 1 */ +.delay_slot + 14294 "10011000" // ST p0, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14295 "00011101" // /* MW 3 */ + 14296 "11101000" // /* MW 2 */ + 14297 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 580 22 first +.delay_slot + 14298 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14299 "10010000" // /* MW 3 */ + 14300 "01100010" // /* MW 2 */ + 14301 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 580 30 +.delay_slot + 14302 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14303 "11111011" // /* MW 3 */ + 14304 "01100011" // /* MW 2 */ + 14305 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 580 11 +.delay_slot + 14306 "01000100" // MOVXM p6, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14307 "10100000" // /* MW 5 */ + 14308 "11001000" // /* MW 4 */ + 14309 "11001100" // /* MW 3 */ + 14310 "00000111" // /* MW 2 */ + 14311 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 580 11 +.delay_slot + 14312 "10011000" // ST r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14313 "00110001" // /* MW 3 */ + 14314 "00000110" // /* MW 2 */ + 14315 "00001110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 587 4 +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 14316 "10111010" // MOVA r0, #1; MOVXM p6, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14317 "00010000" // /* MW 9 */ + 14318 "00110100" // /* MW 8 */ + 14319 "00110010" // /* MW 7 */ + 14320 "11110011" // /* MW 6 */ + 14321 "00000001" // /* MW 5 */ + 14322 "00000000" // /* MW 4 */ + 14323 "00000000" // /* MW 3 */ + 14324 "00100000" // /* MW 2 */ + 14325 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 621 2 +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 14326 "01110110" // ST.s8 r16, [p6]; MOVS p6, p1; MOVXM p0, #509028 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 14327 "00010000" // /* MW 11 */ + 14328 "00110010" // /* MW 10 */ + 14329 "00110010" // /* MW 9 */ + 14330 "11110000" // /* MW 8 */ + 14331 "00000001" // /* MW 7 */ + 14332 "00000000" // /* MW 6 */ + 14333 "10001011" // /* MW 5 */ + 14334 "10000100" // /* MW 4 */ + 14335 "11100110" // /* MW 3 */ + 14336 "11000000" // /* MW 2 */ + 14337 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 587 4 +.src_ref 7 "superkernels.cpp" 587 4 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14338 "10111010" // MOVA r1, #0; MOVXM p1, #509440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14339 "00010000" // /* MW 9 */ + 14340 "00000000" // /* MW 8 */ + 14341 "10110011" // /* MW 7 */ + 14342 "11110000" // /* MW 6 */ + 14343 "00000001" // /* MW 5 */ + 14344 "00000000" // /* MW 4 */ + 14345 "00000000" // /* MW 3 */ + 14346 "00000001" // /* MW 2 */ + 14347 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 587 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 14348 "00000100" // JL #2752 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=2752 delay_slots=5 */ + 14349 "00000001" // /* MW 5 */ + 14350 "00000000" // /* MW 4 */ + 14351 "01100000" // /* MW 3 */ + 14352 "00000101" // /* MW 2 */ + 14353 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14355 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 14356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14357 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 14358 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14359 "00110001" // /* MW 3 */ + 14360 "00100000" // /* MW 2 */ + 14361 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 14362 "00101100" // NOPA; MOVX r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14363 "00001010" // /* MW 5 */ + 14364 "01000000" // /* MW 4 */ + 14365 "11110000" // /* MW 3 */ + 14366 "00101100" // /* MW 2 */ + 14367 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 587 4 +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 14368 "11100001" // NOPA; NOPB; ST r16, [p0]; NOPX; MOV p0, p7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14369 "00000000" // /* MW 15 */ + 14370 "00000000" // /* MW 14 */ + 14371 "01111000" // /* MW 13 */ + 14372 "01100000" // /* MW 12 */ + 14373 "00110111" // /* MW 11 */ + 14374 "00000000" // /* MW 10 */ + 14375 "00000000" // /* MW 9 */ + 14376 "10000000" // /* MW 8 */ + 14377 "00010001" // /* MW 7 */ + 14378 "00000110" // /* MW 6 */ + 14379 "00100000" // /* MW 5 */ + 14380 "00000000" // /* MW 4 */ + 14381 "11110000" // /* MW 3 */ + 14382 "00101100" // /* MW 2 */ + 14383 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 590 35 +.src_ref 7 "superkernels.cpp" 591 4 +.return_address + 14384 "01100100" // MOVX r16, #1; MOV dj0, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14385 "00000001" // /* MW 5 */ + 14386 "00000001" // /* MW 4 */ + 14387 "10100001" // /* MW 3 */ + 14388 "00000000" // /* MW 2 */ + 14389 "00000100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 590 35 first + 14390 "10011000" // LDA r18, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14391 "01010110" // /* MW 3 */ + 14392 "00000010" // /* MW 2 */ + 14393 "00000111" // /* MW 1 */ + 14394 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14395 "00000000" // /* MW 1 */ + 14396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14397 "00000000" // /* MW 1 */ + 14398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14399 "00000000" // /* MW 1 */ + 14400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14401 "00000000" // /* MW 1 */ + 14402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14403 "00000000" // /* MW 1 */ + 14404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14405 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 591 4 first + 14406 "10011000" // EQ r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14407 "00000111" // /* MW 3 */ + 14408 "10100001" // /* MW 2 */ + 14409 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 591 4 + 14410 "10000100" // JNZ r16, #14544 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14544 delay_slots=5 */ + 14411 "00000001" // /* MW 5 */ + 14412 "01000000" // /* MW 4 */ + 14413 "01101000" // /* MW 3 */ + 14414 "00011100" // /* MW 2 */ + 14415 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 599 105 +.delay_slot + 14416 "11111000" // MOV r17, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14417 "11000000" // /* MW 3 */ + 14418 "01011110" // /* MW 2 */ + 14419 "00011100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 599 105 first +.delay_slot + 14420 "00011000" // ADD.NC dc0, r17, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14421 "10010000" // /* MW 3 */ + 14422 "11001000" // /* MW 2 */ + 14423 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14425 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14427 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14428 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14429 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 591 4 first + 14430 "10000100" // JNZ r18, #14512 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14512 delay_slots=5 */ + 14431 "00000001" // /* MW 5 */ + 14432 "01000000" // /* MW 4 */ + 14433 "01011000" // /* MW 3 */ + 14434 "00011100" // /* MW 2 */ + 14435 "10010000" // /* MW 1 */ +.delay_slot + 14436 "01000100" // MOVXM r16, #509440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14437 "00000000" // /* MW 5 */ + 14438 "00101100" // /* MW 4 */ + 14439 "11001000" // /* MW 3 */ + 14440 "00000111" // /* MW 2 */ + 14441 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 611 27 +.delay_slot + 14442 "00011000" // MOVX r17, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14443 "00000001" // /* MW 3 */ + 14444 "00100010" // /* MW 2 */ + 14445 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14447 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14449 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14451 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 594 8 first +.no_stack_arguments + 14452 "00111010" // ST p6, [sp, #-28]; JL #11136 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=11136 delay_slots=5 */ + 14453 "01000001" // /* MW 9 */ + 14454 "00000000" // /* MW 8 */ + 14455 "00000000" // /* MW 7 */ + 14456 "01110000" // /* MW 6 */ + 14457 "00000101" // /* MW 5 */ + 14458 "00000000" // /* MW 4 */ + 14459 "10110000" // /* MW 3 */ + 14460 "11100011" // /* MW 2 */ + 14461 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 595 38 +.delay_slot + 14462 "01000100" // MOVXM p6, #509248 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14463 "10000000" // /* MW 5 */ + 14464 "11001010" // /* MW 4 */ + 14465 "11001100" // /* MW 3 */ + 14466 "00000111" // /* MW 2 */ + 14467 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 594 8 +.delay_slot + 14468 "01000100" // MOVXM p0, #509248 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14469 "10000000" // /* MW 5 */ + 14470 "11001010" // /* MW 4 */ + 14471 "11000000" // /* MW 3 */ + 14472 "00000111" // /* MW 2 */ + 14473 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 594 8 +.delay_slot + 14474 "11111000" // MOV p1, dc0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14475 "10000000" // /* MW 3 */ + 14476 "01100001" // /* MW 2 */ + 14477 "00011001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14479 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14480 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14481 "00000000" // /* MW 15 */ + 14482 "00000000" // /* MW 14 */ + 14483 "01111000" // /* MW 13 */ + 14484 "10100101" // /* MW 12 */ + 14485 "00000001" // /* MW 11 */ + 14486 "00000000" // /* MW 10 */ + 14487 "00000000" // /* MW 9 */ + 14488 "00000000" // /* MW 8 */ + 14489 "01011011" // /* MW 7 */ + 14490 "00000001" // /* MW 6 */ + 14491 "00100000" // /* MW 5 */ + 14492 "00000000" // /* MW 4 */ + 14493 "11110000" // /* MW 3 */ + 14494 "00101100" // /* MW 2 */ + 14495 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 595 38 first +.return_address + 14496 "10111010" // LDA r17, [p6]; MOVXM r16, #509440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14497 "00010000" // /* MW 9 */ + 14498 "00000000" // /* MW 8 */ + 14499 "00001011" // /* MW 7 */ + 14500 "11110010" // /* MW 6 */ + 14501 "00000001" // /* MW 5 */ + 14502 "00000000" // /* MW 4 */ + 14503 "11010000" // /* MW 3 */ + 14504 "11000110" // /* MW 2 */ + 14505 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 621 2 + 14506 "00111100" // LDA p6, [sp, #-28]; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14507 "00100000" // /* MW 5 */ + 14508 "00000000" // /* MW 4 */ + 14509 "00100000" // /* MW 3 */ + 14510 "11100011" // /* MW 2 */ + 14511 "11111100" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_272 + 14512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14513 "00000000" // /* MW 1 */ + 14514 "10000100" // J #14592 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=14592 delay_slots=5 */ + 14515 "00000000" // /* MW 5 */ + 14516 "00000000" // /* MW 4 */ + 14517 "10000000" // /* MW 3 */ + 14518 "00011100" // /* MW 2 */ + 14519 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14521 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14523 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14525 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14527 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 621 2 +.delay_slot + 14528 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV p1, p6; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14529 "00000000" // /* MW 15 */ + 14530 "00000000" // /* MW 14 */ + 14531 "01111000" // /* MW 13 */ + 14532 "01100000" // /* MW 12 */ + 14533 "10110110" // /* MW 11 */ + 14534 "00000000" // /* MW 10 */ + 14535 "00000000" // /* MW 9 */ + 14536 "00000000" // /* MW 8 */ + 14537 "01011011" // /* MW 7 */ + 14538 "00000001" // /* MW 6 */ + 14539 "00100000" // /* MW 5 */ + 14540 "00000000" // /* MW 4 */ + 14541 "11110000" // /* MW 3 */ + 14542 "00101100" // /* MW 2 */ + 14543 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_304 +.src_ref 7 "superkernels.cpp" 599 8 first +.no_stack_arguments + 14544 "00111010" // ST p6, [sp, #-28]; JL #11296 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=11296 delay_slots=5 */ + 14545 "01000001" // /* MW 9 */ + 14546 "00000000" // /* MW 8 */ + 14547 "00000000" // /* MW 7 */ + 14548 "10000100" // /* MW 6 */ + 14549 "00000101" // /* MW 5 */ + 14550 "00000000" // /* MW 4 */ + 14551 "10110000" // /* MW 3 */ + 14552 "11100011" // /* MW 2 */ + 14553 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 600 38 +.delay_slot + 14554 "01000100" // MOVXM p6, #509312 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14555 "00000000" // /* MW 5 */ + 14556 "11001011" // /* MW 4 */ + 14557 "11001100" // /* MW 3 */ + 14558 "00000111" // /* MW 2 */ + 14559 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 599 8 +.delay_slot + 14560 "01000100" // MOVXM p0, #509312 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14561 "00000000" // /* MW 5 */ + 14562 "11001011" // /* MW 4 */ + 14563 "11000000" // /* MW 3 */ + 14564 "00000111" // /* MW 2 */ + 14565 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 599 8 +.delay_slot + 14566 "11111000" // MOV p1, dc0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14567 "10000000" // /* MW 3 */ + 14568 "01100001" // /* MW 2 */ + 14569 "00011001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14571 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14572 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14573 "01100111" // /* MW 3 */ + 14574 "00000001" // /* MW 2 */ + 14575 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 600 38 first +.return_address + 14576 "10111010" // LDA r17, [p6]; MOVXM r16, #509440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14577 "00010000" // /* MW 9 */ + 14578 "00000000" // /* MW 8 */ + 14579 "00001011" // /* MW 7 */ + 14580 "11110010" // /* MW 6 */ + 14581 "00000001" // /* MW 5 */ + 14582 "00000000" // /* MW 4 */ + 14583 "11010000" // /* MW 3 */ + 14584 "11000110" // /* MW 2 */ + 14585 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 621 2 + 14586 "00111100" // LDA p1, [sp, #-28]; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14587 "00100000" // /* MW 5 */ + 14588 "00000000" // /* MW 4 */ + 14589 "00100000" // /* MW 3 */ + 14590 "10010011" // /* MW 2 */ + 14591 "11111100" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_352 + 14592 "10011000" // ADD.NC p3, r16, #11 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14593 "00000101" // /* MW 3 */ + 14594 "01101000" // /* MW 2 */ + 14595 "00011011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 606 35 first +.src_ref 7 "superkernels.cpp" 611 18 + 14596 "10111010" // LDA.u8 r19, [p3], #7; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14597 "00010000" // /* MW 9 */ + 14598 "00101000" // /* MW 8 */ + 14599 "00110010" // /* MW 7 */ + 14600 "11110011" // /* MW 6 */ + 14601 "00000001" // /* MW 5 */ + 14602 "00000000" // /* MW 4 */ + 14603 "01010000" // /* MW 3 */ + 14604 "11001101" // /* MW 2 */ + 14605 "01101111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 611 18 first + 14606 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14607 "01010110" // /* MW 3 */ + 14608 "00000110" // /* MW 2 */ + 14609 "00000110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 607 37 first + 14610 "10011000" // LDA.u16 r21, [p3], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14611 "10111010" // /* MW 3 */ + 14612 "00011110" // /* MW 2 */ + 14613 "00000011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 607 73 + 14614 "10011000" // LDA.u16 r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14615 "00011010" // /* MW 3 */ + 14616 "00000110" // /* MW 2 */ + 14617 "00000011" // /* MW 1 */ + 14618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14619 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 607 110 + 14620 "10011000" // LDA.u16 r20, [p3, #2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14621 "10011010" // /* MW 3 */ + 14622 "00010110" // /* MW 2 */ + 14623 "00000011" // /* MW 1 */ + 14624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14625 "00000000" // /* MW 1 */ + 14626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14627 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 606 19 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 14628 "01000100" // MOVXM p0, #508996 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14629 "10001000" // /* MW 5 */ + 14630 "11001000" // /* MW 4 */ + 14631 "11000000" // /* MW 3 */ + 14632 "00000111" // /* MW 2 */ + 14633 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 607 57 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 14634 "10011000" // MUL r19, r19, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14635 "01011111" // /* MW 3 */ + 14636 "11100111" // /* MW 2 */ + 14637 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 606 19 first +.src_ref 7 "superkernels.cpp" 611 16 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 14638 "00111010" // ST r19, [p0]; MOVXM p2, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14639 "00010001" // /* MW 9 */ + 14640 "00101110" // /* MW 8 */ + 14641 "00110010" // /* MW 7 */ + 14642 "11110001" // /* MW 6 */ + 14643 "00000001" // /* MW 5 */ + 14644 "00000000" // /* MW 4 */ + 14645 "00110000" // /* MW 3 */ + 14646 "11001110" // /* MW 2 */ + 14647 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 607 94 first + 14648 "10011000" // MUL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14649 "00001111" // /* MW 3 */ + 14650 "11100001" // /* MW 2 */ + 14651 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 611 27 first + 14652 "10011000" // MUL r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14653 "00101111" // /* MW 3 */ + 14654 "01100011" // /* MW 2 */ + 14655 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 608 28 first + 14656 "10011000" // MUL r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14657 "00001111" // /* MW 3 */ + 14658 "00100001" // /* MW 2 */ + 14659 "00010101" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 608 13 +.src_ref 7 "superkernels.cpp" 611 16 first + 14660 "01110110" // NOPA; ST r17, [p2]; MOVXM p6, #509024 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 14661 "00010000" // /* MW 11 */ + 14662 "00110000" // /* MW 10 */ + 14663 "00110010" // /* MW 9 */ + 14664 "11110011" // /* MW 8 */ + 14665 "00000001" // /* MW 7 */ + 14666 "10000000" // /* MW 6 */ + 14667 "00110001" // /* MW 5 */ + 14668 "00000110" // /* MW 4 */ + 14669 "11110010" // /* MW 3 */ + 14670 "00101100" // /* MW 2 */ + 14671 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 608 13 first + 14672 "11100001" // NOPA; NOPB; ST r16, [p6]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14673 "00000000" // /* MW 15 */ + 14674 "00000000" // /* MW 14 */ + 14675 "01111000" // /* MW 13 */ + 14676 "10100101" // /* MW 12 */ + 14677 "00000001" // /* MW 11 */ + 14678 "00000000" // /* MW 10 */ + 14679 "00000000" // /* MW 9 */ + 14680 "10000000" // /* MW 8 */ + 14681 "00010001" // /* MW 7 */ + 14682 "00000110" // /* MW 6 */ + 14683 "00100110" // /* MW 5 */ + 14684 "00000000" // /* MW 4 */ + 14685 "11110000" // /* MW 3 */ + 14686 "00101100" // /* MW 2 */ + 14687 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_448 +.src_ref 7 "superkernels.cpp" 614 12 + 14688 "01000100" // MOVXM p0, #509000 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14689 "10010000" // /* MW 5 */ + 14690 "11001000" // /* MW 4 */ + 14691 "11000000" // /* MW 3 */ + 14692 "00000111" // /* MW 2 */ + 14693 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 614 12 first +.src_ref 7 "superkernels.cpp" 616 11 + 14694 "10111010" // LDA r16, [p0]; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14695 "00010000" // /* MW 9 */ + 14696 "00100000" // /* MW 8 */ + 14697 "00110010" // /* MW 7 */ + 14698 "11110001" // /* MW 6 */ + 14699 "00000001" // /* MW 5 */ + 14700 "00000000" // /* MW 4 */ + 14701 "11010000" // /* MW 3 */ + 14702 "11000010" // /* MW 2 */ + 14703 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 615 13 +.src_ref 7 "superkernels.cpp" 616 11 first + 14704 "10111010" // LDA r17, [p2]; MOVXM p6, #509004 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14705 "00010000" // /* MW 9 */ + 14706 "00100110" // /* MW 8 */ + 14707 "00110010" // /* MW 7 */ + 14708 "11110011" // /* MW 6 */ + 14709 "00000001" // /* MW 5 */ + 14710 "00000000" // /* MW 4 */ + 14711 "11010000" // /* MW 3 */ + 14712 "11000110" // /* MW 2 */ + 14713 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 615 13 first + 14714 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14715 "01010110" // /* MW 3 */ + 14716 "00000110" // /* MW 2 */ + 14717 "00000110" // /* MW 1 */ + 14718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14719 "00000000" // /* MW 1 */ + 14720 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14721 "00000000" // /* MW 1 */ + 14722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14723 "00000000" // /* MW 1 */ + 14724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14725 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 618 6 first +.src_ref 7 "superkernels.cpp" 618 17 first + 14726 "10000100" // JNZ r16, #14832 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=14832 delay_slots=5 */ + 14727 "00000001" // /* MW 5 */ + 14728 "01000000" // /* MW 4 */ + 14729 "11111000" // /* MW 3 */ + 14730 "00011100" // /* MW 2 */ + 14731 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 614 12 first +.src_ref 7 "superkernels.cpp" 616 11 first +.delay_slot + 14732 "00100100" // ADD r17, r17, #1; ADD.NC r19, r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14733 "00000001" // /* MW 5 */ + 14734 "10110000" // /* MW 4 */ + 14735 "11101001" // /* MW 3 */ + 14736 "01000000" // /* MW 2 */ + 14737 "10001100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 615 13 first +.delay_slot + 14738 "00011000" // ADD r18, r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14739 "00000111" // /* MW 3 */ + 14740 "10100100" // /* MW 2 */ + 14741 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 616 11 first +.delay_slot + 14742 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14743 "00110001" // /* MW 3 */ + 14744 "00000110" // /* MW 2 */ + 14745 "00001010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 615 13 first +.delay_slot + 14746 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14747 "01010001" // /* MW 3 */ + 14748 "00000110" // /* MW 2 */ + 14749 "00001110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 614 12 first +.delay_slot + 14750 "10011000" // ST r19, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14751 "01110001" // /* MW 3 */ + 14752 "00000110" // /* MW 2 */ + 14753 "00001000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 14754 "00011000" // LDA r17, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14755 "00110001" // /* MW 3 */ + 14756 "11110110" // /* MW 2 */ + 14757 "00000111" // /* MW 1 */ + 14758 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14759 "00000000" // /* MW 1 */ + 14760 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14761 "00000000" // /* MW 1 */ + 14762 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14763 "00000000" // /* MW 1 */ + 14764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14765 "00000000" // /* MW 1 */ + 14766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14767 "00000000" // /* MW 1 */ + 14768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14769 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 first + 14770 "00011000" // ADD.NC p6, r17, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14771 "10000110" // /* MW 3 */ + 14772 "01101000" // /* MW 2 */ + 14773 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 14774 "10011000" // LDA r27, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14775 "01110110" // /* MW 3 */ + 14776 "11111111" // /* MW 2 */ + 14777 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 14778 "10011000" // LDA r17, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14779 "00110110" // /* MW 3 */ + 14780 "11111110" // /* MW 2 */ + 14781 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 14782 "10011000" // LDA r18, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14783 "01010110" // /* MW 3 */ + 14784 "11111110" // /* MW 2 */ + 14785 "00000110" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 14786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14787 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 14788 "10011000" // LDA r17, [p6, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14789 "00110110" // /* MW 3 */ + 14790 "01000110" // /* MW 2 */ + 14791 "00000110" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 14792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14793 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 14794 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14795 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 14796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14797 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 14798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14799 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 14800 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14801 "00010010" // /* MW 3 */ + 14802 "10100011" // /* MW 2 */ + 14803 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 +.src_ref 1 "io_buffer_main.h" 395 8 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 14804 "01011100" // ST r17, [p6]; MOVX r16, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14805 "11111010" // /* MW 5 */ + 14806 "11000001" // /* MW 4 */ + 14807 "00111111" // /* MW 3 */ + 14808 "11000110" // /* MW 2 */ + 14809 "11000000" // /* MW 1 */ + 14810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14811 "00000000" // /* MW 1 */ + 14812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14813 "00000000" // /* MW 1 */ + 14814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14815 "00000000" // /* MW 1 */ + 14816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14817 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 14818 "01111110" // NOPA; NOPB; NOPS; ACQ r17, r16; NOPM /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 14819 "01100000" // /* MW 13 */ + 14820 "00101011" // /* MW 12 */ + 14821 "00000000" // /* MW 11 */ + 14822 "10101111" // /* MW 10 */ + 14823 "00110100" // /* MW 9 */ + 14824 "00000000" // /* MW 8 */ + 14825 "00001000" // /* MW 7 */ + 14826 "01010011" // /* MW 6 */ + 14827 "00100100" // /* MW 5 */ + 14828 "00000000" // /* MW 4 */ + 14829 "11110000" // /* MW 3 */ + 14830 "00101100" // /* MW 2 */ + 14831 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_592 + 14832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14833 "00000000" // /* MW 1 */ + 14834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14835 "00000000" // /* MW 1 */ + 14836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14837 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 621 2 +.src_ref 1 "io_buffer_main.h" 125 25 + 14838 "00011000" // LDA p2, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14839 "00011001" // /* MW 3 */ + 14840 "11110101" // /* MW 2 */ + 14841 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 621 2 + 14842 "00011000" // LDA p0, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14843 "00011001" // /* MW 3 */ + 14844 "11101000" // /* MW 2 */ + 14845 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 621 2 first +.no_stack_arguments + 14846 "00000100" // JL #4464 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4464 delay_slots=5 */ + 14847 "00000001" // /* MW 5 */ + 14848 "00000000" // /* MW 4 */ + 14849 "10111000" // /* MW 3 */ + 14850 "00001000" // /* MW 2 */ + 14851 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 621 2 +.delay_slot + 14852 "01000100" // MOVXM p3, #509440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14853 "00000000" // /* MW 5 */ + 14854 "11001100" // /* MW 4 */ + 14855 "11000110" // /* MW 3 */ + 14856 "00000111" // /* MW 2 */ + 14857 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14859 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14861 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14863 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.delay_slot + 14864 "11100001" // NOPA; NOPB; MOVS p6, p2; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 14865 "00000000" // /* MW 15 */ + 14866 "00000000" // /* MW 14 */ + 14867 "01111000" // /* MW 13 */ + 14868 "10100101" // /* MW 12 */ + 14869 "00000001" // /* MW 11 */ + 14870 "00000000" // /* MW 10 */ + 14871 "00000000" // /* MW 9 */ + 14872 "00000000" // /* MW 8 */ + 14873 "10001011" // /* MW 7 */ + 14874 "10001000" // /* MW 6 */ + 14875 "00100110" // /* MW 5 */ + 14876 "00000000" // /* MW 4 */ + 14877 "11110000" // /* MW 3 */ + 14878 "00101100" // /* MW 2 */ + 14879 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 623 6 +.src_ref 1 "io_buffer_main.h" 218 49 +.return_address + 14880 "10111010" // LDA r16, [sp, #-16]; MOVXM p1, #509000 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14881 "00010000" // /* MW 9 */ + 14882 "00100100" // /* MW 8 */ + 14883 "10110010" // /* MW 7 */ + 14884 "11110000" // /* MW 6 */ + 14885 "00000001" // /* MW 5 */ + 14886 "00000000" // /* MW 4 */ + 14887 "00100000" // /* MW 3 */ + 14888 "01000010" // /* MW 2 */ + 14889 "11111110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 623 6 first +.src_ref 7 "superkernels.cpp" 623 20 + 14890 "10111010" // LDA r17, [p1]; MOVXM p1, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14891 "00010000" // /* MW 9 */ + 14892 "00100010" // /* MW 8 */ + 14893 "10110010" // /* MW 7 */ + 14894 "11110000" // /* MW 6 */ + 14895 "00000001" // /* MW 5 */ + 14896 "00000000" // /* MW 4 */ + 14897 "11010000" // /* MW 3 */ + 14898 "11000110" // /* MW 2 */ + 14899 "00100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 623 20 + 14900 "10011000" // LDA r18, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14901 "01010110" // /* MW 3 */ + 14902 "00000110" // /* MW 2 */ + 14903 "00000001" // /* MW 1 */ + 14904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14905 "00000000" // /* MW 1 */ + 14906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14907 "00000000" // /* MW 1 */ + 14908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14909 "00000000" // /* MW 1 */ + 14910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14911 "00000000" // /* MW 1 */ + 14912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14913 "00000000" // /* MW 1 */ + 14914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14915 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 623 17 + 14916 "10011000" // NE r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14917 "00101000" // /* MW 3 */ + 14918 "01100011" // /* MW 2 */ + 14919 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 623 6 + 14920 "10000100" // JNZ r17, #15264 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=15264 delay_slots=5 */ + 14921 "00000001" // /* MW 5 */ + 14922 "01000000" // /* MW 4 */ + 14923 "11010000" // /* MW 3 */ + 14924 "00011101" // /* MW 2 */ + 14925 "10001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14927 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14929 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14931 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14933 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14935 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 629 34 +.src_ref 1 "io_buffer_main.h" 218 49 first +.src_ref 1 "io_buffer_main.h" 395 8 + 14936 "10111010" // MOVA dj0, #64; MOVX r17, #-1; ADD.NC p1, r16, #12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14937 "00001000" // /* MW 9 */ + 14938 "00000011" // /* MW 8 */ + 14939 "10110100" // /* MW 7 */ + 14940 "11101000" // /* MW 6 */ + 14941 "00010111" // /* MW 5 */ + 14942 "00111111" // /* MW 4 */ + 14943 "10000000" // /* MW 3 */ + 14944 "00000010" // /* MW 2 */ + 14945 "00001000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 626 52 +.src_ref 1 "io_buffer_main.h" 218 49 + 14946 "10111010" // LDA r27, [p1], #-4; MOVXM p0, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 14947 "00010000" // /* MW 9 */ + 14948 "00101110" // /* MW 8 */ + 14949 "00110010" // /* MW 7 */ + 14950 "11110000" // /* MW 6 */ + 14951 "00000001" // /* MW 5 */ + 14952 "00000000" // /* MW 4 */ + 14953 "11010000" // /* MW 3 */ + 14954 "11101110" // /* MW 2 */ + 14955 "00111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 14956 "10011000" // LDA r18, [p1], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14957 "01010110" // /* MW 3 */ + 14958 "11111110" // /* MW 2 */ + 14959 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 14960 "10011000" // LDA r19, [p1], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14961 "01110110" // /* MW 3 */ + 14962 "11111110" // /* MW 2 */ + 14963 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 47 first + 14964 "10011000" // LDA r20, [p1, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14965 "10010110" // /* MW 3 */ + 14966 "01010110" // /* MW 2 */ + 14967 "00000001" // /* MW 1 */ + 14968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14969 "00000000" // /* MW 1 */ + 14970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14971 "00000000" // /* MW 1 */ + 14972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14973 "00000000" // /* MW 1 */ + 14974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14975 "00000000" // /* MW 1 */ + 14976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14977 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 14978 "00011000" // SEL.EQZ r18, r19, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14979 "00100010" // /* MW 3 */ + 14980 "11100101" // /* MW 2 */ + 14981 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 626 50 +.src_ref 7 "superkernels.cpp" 630 3 +.src_ref 1 "io_buffer_main.h" 218 20 + 14982 "01011100" // ST r18, [p1]; MOVX r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14983 "00001010" // /* MW 5 */ + 14984 "01000000" // /* MW 4 */ + 14985 "00110000" // /* MW 3 */ + 14986 "11001010" // /* MW 2 */ + 14987 "00100000" // /* MW 1 */ + 14988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14989 "00000000" // /* MW 1 */ + 14990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14991 "00000000" // /* MW 1 */ + 14992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14993 "00000000" // /* MW 1 */ + 14994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14995 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 14996 "00011000" // ACQ r20, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14997 "00011000" // /* MW 3 */ + 14998 "00010011" // /* MW 2 */ + 14999 "00010101" // /* MW 1 */ + 15000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15001 "00000000" // /* MW 1 */ + 15002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15003 "00000000" // /* MW 1 */ + 15004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15005 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 626 52 first + 15006 "10011000" // LDA r19, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15007 "01110110" // /* MW 3 */ + 15008 "00000110" // /* MW 2 */ + 15009 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 629 34 first + 15010 "10011000" // LDA r18, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15011 "01010110" // /* MW 3 */ + 15012 "00000010" // /* MW 2 */ + 15013 "00000111" // /* MW 1 */ + 15014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15015 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 15016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15017 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.aggressive_scheduled_block_id 4 +.noswbrkpt + 15018 "10011000" // LDA p0, [p6], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15019 "00011110" // /* MW 3 */ + 15020 "01011100" // /* MW 2 */ + 15021 "00000110" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 15022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15023 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 15024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15025 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 626 50 first +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 1 "io_buffer_main.h" 324 32 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 15026 "10111010" // LDA r17, [p1], #16; LSHL r19, r19, r16; MOV p0, p1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15027 "01111000" // /* MW 9 */ + 15028 "01100000" // /* MW 8 */ + 15029 "00110001" // /* MW 7 */ + 15030 "01101100" // /* MW 6 */ + 15031 "00111000" // /* MW 5 */ + 15032 "00100111" // /* MW 4 */ + 15033 "11010000" // /* MW 3 */ + 15034 "11000110" // /* MW 2 */ + 15035 "00101001" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 630 3 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 15036 "10011000" // EQ r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15037 "00000111" // /* MW 3 */ + 15038 "10100001" // /* MW 2 */ + 15039 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 630 3 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 15040 "10000100" // JNZ r16, #15120 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=15120 delay_slots=5 */ + 15041 "00000001" // /* MW 5 */ + 15042 "01000000" // /* MW 4 */ + 15043 "10001000" // /* MW 3 */ + 15044 "00011101" // /* MW 2 */ + 15045 "10000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 32 +.delay_slot +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 15046 "00011000" // MOVS p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15047 "10001011" // /* MW 3 */ + 15048 "10000000" // /* MW 2 */ + 15049 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15051 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15053 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15055 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 626 50 first +.delay_slot + 15056 "00000010" // ST p1, [sp, #-16]; ADD.NC p1, r19, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 15057 "10100000" // /* MW 7 */ + 15058 "11100010" // /* MW 6 */ + 15059 "10110100" // /* MW 5 */ + 15060 "00000000" // /* MW 4 */ + 15061 "10110000" // /* MW 3 */ + 15062 "00010011" // /* MW 2 */ + 15063 "11111110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 630 3 first + 15064 "10000100" // JNZ r18, #15152 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=15152 delay_slots=5 */ + 15065 "00000001" // /* MW 5 */ + 15066 "01000000" // /* MW 4 */ + 15067 "10011000" // /* MW 3 */ + 15068 "00011101" // /* MW 2 */ + 15069 "10010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15071 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15073 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15075 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15077 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15078 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15079 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 633 8 first +.no_stack_arguments + 15080 "00000100" // JL #11248 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11248 delay_slots=5 */ + 15081 "00000001" // /* MW 5 */ + 15082 "00000000" // /* MW 4 */ + 15083 "11111000" // /* MW 3 */ + 15084 "00010101" // /* MW 2 */ + 15085 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 633 8 +.delay_slot + 15086 "01000100" // MOVXM p3, #509248 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15087 "10000000" // /* MW 5 */ + 15088 "11001010" // /* MW 4 */ + 15089 "11000110" // /* MW 3 */ + 15090 "00000111" // /* MW 2 */ + 15091 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15093 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15095 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15097 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 633 8 +.delay_slot + 15098 "11010100" // NOPA; MOV p2, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15099 "10000001" // /* MW 5 */ + 15100 "11000001" // /* MW 4 */ + 15101 "11110100" // /* MW 3 */ + 15102 "00101100" // /* MW 2 */ + 15103 "00000000" // /* MW 1 */ +.return_address + 15104 "10000100" // J #15152 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=15152 delay_slots=5 */ + 15105 "00000000" // /* MW 5 */ + 15106 "00000000" // /* MW 4 */ + 15107 "10011000" // /* MW 3 */ + 15108 "00011101" // /* MW 2 */ + 15109 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15111 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15113 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15115 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15117 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15119 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_880 +.src_ref 7 "superkernels.cpp" 637 8 first +.no_stack_arguments + 15120 "00000100" // JL #11440 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11440 delay_slots=5 */ + 15121 "00000001" // /* MW 5 */ + 15122 "00000000" // /* MW 4 */ + 15123 "01011000" // /* MW 3 */ + 15124 "00010110" // /* MW 2 */ + 15125 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 637 8 +.delay_slot + 15126 "01000100" // MOVXM p3, #509312 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15127 "00000000" // /* MW 5 */ + 15128 "11001011" // /* MW 4 */ + 15129 "11000110" // /* MW 3 */ + 15130 "00000111" // /* MW 2 */ + 15131 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 637 8 +.delay_slot + 15132 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15133 "11000000" // /* MW 3 */ + 15134 "01100000" // /* MW 2 */ + 15135 "00011010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15137 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15139 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15140 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 15141 "10000001" // /* MW 11 */ + 15142 "10101101" // /* MW 10 */ + 15143 "00000000" // /* MW 9 */ + 15144 "00000000" // /* MW 8 */ + 15145 "00000000" // /* MW 7 */ + 15146 "00000000" // /* MW 6 */ + 15147 "00100000" // /* MW 5 */ + 15148 "00000000" // /* MW 4 */ + 15149 "11110000" // /* MW 3 */ + 15150 "00101100" // /* MW 2 */ + 15151 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_912 +.src_ref 1 "io_buffer_main.h" 327 28 +.src_ref 1 "io_buffer_main.h" 327 40 +.return_address + 15152 "00011000" // LDA p1, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15153 "10011001" // /* MW 3 */ + 15154 "11110000" // /* MW 2 */ + 15155 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 + 15156 "00101100" // LDA p0, [sp, #-12]; MOVX r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15157 "00001010" // /* MW 5 */ + 15158 "01000100" // /* MW 4 */ + 15159 "00100000" // /* MW 3 */ + 15160 "10000011" // /* MW 2 */ + 15161 "11111110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 645 15 +.src_ref 1 "io_buffer_main.h" 324 32 first + 15162 "10111010" // LDA r16, [p7, #16]; MOVXM p7, #509000 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15163 "00010000" // /* MW 9 */ + 15164 "00100100" // /* MW 8 */ + 15165 "10110010" // /* MW 7 */ + 15166 "11110011" // /* MW 6 */ + 15167 "00000001" // /* MW 5 */ + 15168 "00000000" // /* MW 4 */ + 15169 "11010000" // /* MW 3 */ + 15170 "11000010" // /* MW 2 */ + 15171 "11101000" // /* MW 1 */ + 15172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15173 "00000000" // /* MW 1 */ + 15174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15175 "00000000" // /* MW 1 */ + 15176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15177 "00000000" // /* MW 1 */ + 15178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15179 "00000000" // /* MW 1 */ + 15180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15181 "00000000" // /* MW 1 */ + 15182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15183 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 15184 "00011000" // REL r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15185 "00011000" // /* MW 3 */ + 15186 "00010001" // /* MW 2 */ + 15187 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 40 first + 15188 "10011000" // LDA r18, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15189 "01010110" // /* MW 3 */ + 15190 "11110110" // /* MW 2 */ + 15191 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 first + 15192 "10011000" // LDA r16, [p0, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15193 "00010110" // /* MW 3 */ + 15194 "01010110" // /* MW 2 */ + 15195 "00000000" // /* MW 1 */ + 15196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15197 "00000000" // /* MW 1 */ + 15198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15199 "00000000" // /* MW 1 */ + 15200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15201 "00000000" // /* MW 1 */ + 15202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15203 "00000000" // /* MW 1 */ + 15204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15205 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 15206 "10011000" // SUB r18, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15207 "00100001" // /* MW 3 */ + 15208 "01100101" // /* MW 2 */ + 15209 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 15210 "10011000" // ST r18, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15211 "01010001" // /* MW 3 */ + 15212 "11110110" // /* MW 2 */ + 15213 "00001001" // /* MW 1 */ + 15214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15215 "00000000" // /* MW 1 */ + 15216 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15217 "00000000" // /* MW 1 */ + 15218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15219 "00000000" // /* MW 1 */ + 15220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15221 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 15222 "00011000" // REL r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15223 "00011000" // /* MW 3 */ + 15224 "00010001" // /* MW 2 */ + 15225 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 40 first + 15226 "10011000" // LDA r18, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15227 "01010110" // /* MW 3 */ + 15228 "11100110" // /* MW 2 */ + 15229 "00000110" // /* MW 1 */ + 15230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15231 "00000000" // /* MW 1 */ + 15232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15233 "00000000" // /* MW 1 */ + 15234 "10000100" // J #15280 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=15280 delay_slots=5 */ + 15235 "00000000" // /* MW 5 */ + 15236 "00000000" // /* MW 4 */ + 15237 "11011000" // /* MW 3 */ + 15238 "00011101" // /* MW 2 */ + 15239 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15240 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15241 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15243 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 645 15 +.src_ref 7 "superkernels.cpp" 649 14 +.delay_slot + 15244 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15245 "00000001" // /* MW 3 */ + 15246 "00100000" // /* MW 2 */ + 15247 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 645 15 first +.src_ref 1 "io_buffer_main.h" 327 32 +.delay_slot + 15248 "01011100" // ST r16, [p7]; SUB r17, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15249 "01000011" // /* MW 5 */ + 15250 "11000110" // /* MW 4 */ + 15251 "00111000" // /* MW 3 */ + 15252 "11000010" // /* MW 2 */ + 15253 "11100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 first +.delay_slot + 15254 "01111010" // NOPA; ST r17, [p6, #-8]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15255 "00000000" // /* MW 9 */ + 15256 "00000000" // /* MW 8 */ + 15257 "00000000" // /* MW 7 */ + 15258 "10000000" // /* MW 6 */ + 15259 "00110001" // /* MW 5 */ + 15260 "11100110" // /* MW 4 */ + 15261 "11110110" // /* MW 3 */ + 15262 "00101100" // /* MW 2 */ + 15263 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_1024 +.src_ref 7 "superkernels.cpp" 649 14 + 15264 "11100001" // NOPA; NOPB; NOPS; MOVX r16, #0; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 15265 "00000000" // /* MW 15 */ + 15266 "00000000" // /* MW 14 */ + 15267 "01111000" // /* MW 13 */ + 15268 "10100101" // /* MW 12 */ + 15269 "00000001" // /* MW 11 */ + 15270 "00001000" // /* MW 10 */ + 15271 "00000000" // /* MW 9 */ + 15272 "00000001" // /* MW 8 */ + 15273 "01011011" // /* MW 7 */ + 15274 "00000001" // /* MW 6 */ + 15275 "00100000" // /* MW 5 */ + 15276 "00000000" // /* MW 4 */ + 15277 "11110000" // /* MW 3 */ + 15278 "00101100" // /* MW 2 */ + 15279 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_1040 +.src_ref 7 "superkernels.cpp" 648 19 +.src_ref 7 "superkernels.cpp" 651 + 15280 "10111010" // LDA lr, [sp, #-20]; MOVXM p7, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15281 "00010000" // /* MW 9 */ + 15282 "00110000" // /* MW 8 */ + 15283 "10110010" // /* MW 7 */ + 15284 "11110011" // /* MW 6 */ + 15285 "00000001" // /* MW 5 */ + 15286 "00000000" // /* MW 4 */ + 15287 "00100000" // /* MW 3 */ + 15288 "10000111" // /* MW 2 */ + 15289 "11111101" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 648 6 +.src_ref 7 "superkernels.cpp" 648 19 first +.src_ref 7 "superkernels.cpp" 649 14 + 15290 "10111010" // LDA r18, [p7]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 15291 "00010000" // /* MW 9 */ + 15292 "00100000" // /* MW 8 */ + 15293 "00110010" // /* MW 7 */ + 15294 "11110011" // /* MW 6 */ + 15295 "00000001" // /* MW 5 */ + 15296 "00000000" // /* MW 4 */ + 15297 "11010000" // /* MW 3 */ + 15298 "11001010" // /* MW 2 */ + 15299 "11100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 648 6 + 15300 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15301 "00110110" // /* MW 3 */ + 15302 "00000110" // /* MW 2 */ + 15303 "00000110" // /* MW 1 */ + 15304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15305 "00000000" // /* MW 1 */ + 15306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15307 "00000000" // /* MW 1 */ + 15308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15309 "00000000" // /* MW 1 */ + 15310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15311 "00000000" // /* MW 1 */ + 15312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15313 "00000000" // /* MW 1 */ + 15314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15315 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 648 16 + 15316 "10011000" // NE r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15317 "00101000" // /* MW 3 */ + 15318 "01100011" // /* MW 2 */ + 15319 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 648 6 + 15320 "10000100" // JNZ r17, #15344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=15344 delay_slots=5 */ + 15321 "00000001" // /* MW 5 */ + 15322 "01000000" // /* MW 4 */ + 15323 "11111000" // /* MW 3 */ + 15324 "00011101" // /* MW 2 */ + 15325 "10001000" // /* MW 1 */ +.delay_slot + 15326 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15327 "10011001" // /* MW 3 */ + 15328 "11111011" // /* MW 2 */ + 15329 "00000111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15331 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15333 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15335 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15337 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 649 14 first + 15338 "00001100" // NOPA; ST r16, [p6] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15339 "00100011" // /* MW 5 */ + 15340 "00001100" // /* MW 4 */ + 15341 "11111100" // /* MW 3 */ + 15342 "00101100" // /* MW 2 */ + 15343 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE_1104 + 15344 "00011000" // LDA p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15345 "00011001" // /* MW 3 */ + 15346 "11111111" // /* MW 2 */ + 15347 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 651 first + 15348 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 15349 "00000000" // /* MW 3 */ + 15350 "00101000" // /* MW 2 */ + 15351 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 651 +.delay_slot + 15352 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15353 "00000001" // /* MW 5 */ + 15354 "00000000" // /* MW 4 */ + 15355 "00000000" // /* MW 3 */ + 15356 "11111000" // /* MW 2 */ + 15357 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15359 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15361 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15363 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE__end +.label __Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE___func_end0 + 15365 "00000000" // /* MW 1 */ +.label __Z13_b896_wrapperPPv___func_begin0 +.label _Z13_b896_wrapperPPv +.function _b896_wrapper _Z13_b896_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 21 first +.src_ref 0 "0_0_reloadable5.cc" 23 79 +.function_start + 15376 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15377 "11000000" // /* MW 3 */ + 15378 "01100000" // /* MW 2 */ + 15379 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 23 79 first + 15380 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15381 "00011110" // /* MW 3 */ + 15382 "00011100" // /* MW 2 */ + 15383 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 24 79 first + 15384 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15385 "10011110" // /* MW 3 */ + 15386 "00101100" // /* MW 2 */ + 15387 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 26 81 first + 15388 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15389 "10011110" // /* MW 3 */ + 15390 "11110101" // /* MW 2 */ + 15391 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 25 47 first + 15392 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15393 "00011110" // /* MW 3 */ + 15394 "00000101" // /* MW 2 */ + 15395 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 22 4 first +.tail_call + 15396 "10000100" // J #6880 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=6880 delay_slots=5 */ + 15397 "00000000" // /* MW 5 */ + 15398 "00000000" // /* MW 4 */ + 15399 "01110000" // /* MW 3 */ + 15400 "00001101" // /* MW 2 */ + 15401 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15403 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15405 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15407 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15409 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b896_wrapperPPv__end +.label __Z13_b896_wrapperPPv___func_end0 + 15411 "00000000" // /* MW 1 */ +.label __Z13_b901_wrapperPPv___func_begin0 +.label _Z13_b901_wrapperPPv +.function _b901_wrapper _Z13_b901_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 30 first +.src_ref 0 "0_0_reloadable5.cc" 32 79 +.function_start + 15424 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15425 "11000000" // /* MW 3 */ + 15426 "01100000" // /* MW 2 */ + 15427 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 32 79 first + 15428 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15429 "00011110" // /* MW 3 */ + 15430 "00101100" // /* MW 2 */ + 15431 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 34 81 first + 15432 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15433 "00011110" // /* MW 3 */ + 15434 "11110101" // /* MW 2 */ + 15435 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 33 47 first + 15436 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15437 "10011110" // /* MW 3 */ + 15438 "00000100" // /* MW 2 */ + 15439 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 31 4 first +.tail_call + 15440 "10000100" // J #8240 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=8240 delay_slots=5 */ + 15441 "00000000" // /* MW 5 */ + 15442 "00000000" // /* MW 4 */ + 15443 "00011000" // /* MW 3 */ + 15444 "00010000" // /* MW 2 */ + 15445 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15447 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15449 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15451 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15453 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b901_wrapperPPv__end +.label __Z13_b901_wrapperPPv___func_end0 + 15455 "00000000" // /* MW 1 */ +.label __Z13_b906_wrapperPPv___func_begin0 +.label _Z13_b906_wrapperPPv +.function _b906_wrapper _Z13_b906_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 38 first +.src_ref 0 "0_0_reloadable5.cc" 40 79 +.function_start + 15456 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15457 "11000000" // /* MW 3 */ + 15458 "01100000" // /* MW 2 */ + 15459 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 40 79 first + 15460 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15461 "00011110" // /* MW 3 */ + 15462 "00101100" // /* MW 2 */ + 15463 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 42 81 first + 15464 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15465 "00011110" // /* MW 3 */ + 15466 "11110101" // /* MW 2 */ + 15467 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 41 47 first + 15468 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15469 "10011110" // /* MW 3 */ + 15470 "00000100" // /* MW 2 */ + 15471 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 39 4 first +.tail_call + 15472 "10000100" // J #9104 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=9104 delay_slots=5 */ + 15473 "00000000" // /* MW 5 */ + 15474 "00000000" // /* MW 4 */ + 15475 "11001000" // /* MW 3 */ + 15476 "00010001" // /* MW 2 */ + 15477 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15479 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15481 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15483 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15485 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b906_wrapperPPv__end +.label __Z13_b906_wrapperPPv___func_end0 + 15487 "00000000" // /* MW 1 */ +.label __Z13_b881_wrapperPPv___func_begin0 +.label _Z13_b881_wrapperPPv +.function _b881_wrapper _Z13_b881_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 46 first +.src_ref 0 "0_0_reloadable5.cc" 48 79 +.function_start + 15488 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15489 "11000000" // /* MW 3 */ + 15490 "01100000" // /* MW 2 */ + 15491 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 48 79 first + 15492 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15493 "00011110" // /* MW 3 */ + 15494 "00101100" // /* MW 2 */ + 15495 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 50 81 first + 15496 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15497 "00011110" // /* MW 3 */ + 15498 "11110101" // /* MW 2 */ + 15499 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 49 47 first + 15500 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15501 "10011110" // /* MW 3 */ + 15502 "00000100" // /* MW 2 */ + 15503 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 47 4 first +.tail_call + 15504 "10000100" // J #10512 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10512 delay_slots=5 */ + 15505 "00000000" // /* MW 5 */ + 15506 "00000000" // /* MW 4 */ + 15507 "10001000" // /* MW 3 */ + 15508 "00010100" // /* MW 2 */ + 15509 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15511 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15513 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15515 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15517 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b881_wrapperPPv__end +.label __Z13_b881_wrapperPPv___func_end0 + 15519 "00000000" // /* MW 1 */ +.label __Z13_b891_wrapperPPv___func_begin0 +.label _Z13_b891_wrapperPPv +.function _b891_wrapper _Z13_b891_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 54 first +.src_ref 0 "0_0_reloadable5.cc" 56 79 +.function_start + 15520 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15521 "11000000" // /* MW 3 */ + 15522 "01100000" // /* MW 2 */ + 15523 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 56 79 first + 15524 "10011000" // LDA p0, [p2], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15525 "00011110" // /* MW 3 */ + 15526 "00111100" // /* MW 2 */ + 15527 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 57 47 first + 15528 "10011000" // LDA p1, [p2], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15529 "10011110" // /* MW 3 */ + 15530 "11101100" // /* MW 2 */ + 15531 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 59 81 first + 15532 "10011000" // LDA p3, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15533 "10011110" // /* MW 3 */ + 15534 "00010101" // /* MW 2 */ + 15535 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 58 80 first + 15536 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15537 "00011110" // /* MW 3 */ + 15538 "00000101" // /* MW 2 */ + 15539 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 55 4 first +.tail_call + 15540 "10000100" // J #11744 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=11744 delay_slots=5 */ + 15541 "00000000" // /* MW 5 */ + 15542 "00000000" // /* MW 4 */ + 15543 "11110000" // /* MW 3 */ + 15544 "00010110" // /* MW 2 */ + 15545 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15547 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15549 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15551 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15552 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15553 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b891_wrapperPPv__end +.label __Z13_b891_wrapperPPv___func_end0 + 15555 "00000000" // /* MW 1 */ +.label __Z13_b924_wrapperPPv___func_begin0 +.label _Z13_b924_wrapperPPv +.function _b924_wrapper _Z13_b924_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 63 first +.src_ref 0 "0_0_reloadable5.cc" 65 79 +.function_start + 15568 "11111000" // MOV p3, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15569 "11000000" // /* MW 3 */ + 15570 "01100000" // /* MW 2 */ + 15571 "00011011" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 65 79 first + 15572 "10011000" // LDA p0, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15573 "00011110" // /* MW 3 */ + 15574 "00011100" // /* MW 2 */ + 15575 "00000011" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 66 79 first + 15576 "10011000" // LDA p1, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15577 "10011110" // /* MW 3 */ + 15578 "00011100" // /* MW 2 */ + 15579 "00000011" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 67 80 first + 15580 "10011000" // LDA p2, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15581 "00011110" // /* MW 3 */ + 15582 "00101101" // /* MW 2 */ + 15583 "00000011" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 69 81 first + 15584 "10011000" // LDA p4, [p3, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15585 "00011110" // /* MW 3 */ + 15586 "11110110" // /* MW 2 */ + 15587 "00000011" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 68 47 first + 15588 "10011000" // LDA p3, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15589 "10011110" // /* MW 3 */ + 15590 "00000101" // /* MW 2 */ + 15591 "00000011" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 64 4 first +.tail_call + 15592 "10000100" // J #14240 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=14240 delay_slots=5 */ + 15593 "00000000" // /* MW 5 */ + 15594 "00000000" // /* MW 4 */ + 15595 "11010000" // /* MW 3 */ + 15596 "00011011" // /* MW 2 */ + 15597 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15598 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15599 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15601 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15603 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15605 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b924_wrapperPPv__end +.label __Z13_b924_wrapperPPv___func_end0 + 15607 "00000000" // /* MW 1 */ +.label __Z13_b919_wrapperPPv___func_begin0 +.label _Z13_b919_wrapperPPv +.function _b919_wrapper _Z13_b919_wrapperPPv +.src_ref 0 "0_0_reloadable5.cc" 73 first +.src_ref 0 "0_0_reloadable5.cc" 75 79 +.function_start + 15616 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15617 "11000000" // /* MW 3 */ + 15618 "01100000" // /* MW 2 */ + 15619 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 75 79 first + 15620 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15621 "00011110" // /* MW 3 */ + 15622 "00011100" // /* MW 2 */ + 15623 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 76 79 first + 15624 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15625 "10011110" // /* MW 3 */ + 15626 "00101100" // /* MW 2 */ + 15627 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 78 81 first + 15628 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15629 "10011110" // /* MW 3 */ + 15630 "11110101" // /* MW 2 */ + 15631 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 77 47 first + 15632 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15633 "00011110" // /* MW 3 */ + 15634 "00000101" // /* MW 2 */ + 15635 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable5.cc" 74 4 first +.tail_call + 15636 "10000100" // J #13760 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=13760 delay_slots=5 */ + 15637 "00000000" // /* MW 5 */ + 15638 "00000000" // /* MW 4 */ + 15639 "11100000" // /* MW 3 */ + 15640 "00011010" // /* MW 2 */ + 15641 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15643 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15645 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15647 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 15649 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 15650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b919_wrapperPPv__end +.label __Z13_b919_wrapperPPv___func_end0 + 15651 "00000000" // /* MW 1 */ +.label _ZN12me_primitive10udiv_dstepEjjRjS0_ +.function udiv_dstep _ZN12me_primitive10udiv_dstepEjjRjS0_ +.src_ref 10 "me_div.c" 108 19 +.src_ref 10 "me_div.c" 108 19 +.src_ref 10 "me_div.c" 115 4 first +.function_start + 15664 "11100100" // MOVX r3, #0; MOV r31, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 15665 "01000001" // /* MW 5 */ + 15666 "10100000" // /* MW 4 */ + 15667 "00101111" // /* MW 3 */ + 15668 "11000000" // /* MW 2 */ + 15669 "00000000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15670 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15671 "00011100" // /* MW 3 */ + 15672 "11000110" // /* MW 2 */ + 15673 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15674 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15675 "00011100" // /* MW 3 */ + 15676 "11000110" // /* MW 2 */ + 15677 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15678 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15679 "00011100" // /* MW 3 */ + 15680 "11000110" // /* MW 2 */ + 15681 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15682 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15683 "00011100" // /* MW 3 */ + 15684 "11000110" // /* MW 2 */ + 15685 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15686 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15687 "00011100" // /* MW 3 */ + 15688 "11000110" // /* MW 2 */ + 15689 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15690 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15691 "00011100" // /* MW 3 */ + 15692 "11000110" // /* MW 2 */ + 15693 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15694 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15695 "00011100" // /* MW 3 */ + 15696 "11000110" // /* MW 2 */ + 15697 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15698 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15699 "00011100" // /* MW 3 */ + 15700 "11000110" // /* MW 2 */ + 15701 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15702 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15703 "00011100" // /* MW 3 */ + 15704 "11000110" // /* MW 2 */ + 15705 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15706 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15707 "00011100" // /* MW 3 */ + 15708 "11000110" // /* MW 2 */ + 15709 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15710 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15711 "00011100" // /* MW 3 */ + 15712 "11000110" // /* MW 2 */ + 15713 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15714 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15715 "00011100" // /* MW 3 */ + 15716 "11000110" // /* MW 2 */ + 15717 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15718 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15719 "00011100" // /* MW 3 */ + 15720 "11000110" // /* MW 2 */ + 15721 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15722 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15723 "00011100" // /* MW 3 */ + 15724 "11000110" // /* MW 2 */ + 15725 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15726 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15727 "00011100" // /* MW 3 */ + 15728 "11000110" // /* MW 2 */ + 15729 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15730 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15731 "00011100" // /* MW 3 */ + 15732 "11000110" // /* MW 2 */ + 15733 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15734 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15735 "00011100" // /* MW 3 */ + 15736 "11000110" // /* MW 2 */ + 15737 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15738 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15739 "00011100" // /* MW 3 */ + 15740 "11000110" // /* MW 2 */ + 15741 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15742 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15743 "00011100" // /* MW 3 */ + 15744 "11000110" // /* MW 2 */ + 15745 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15746 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15747 "00011100" // /* MW 3 */ + 15748 "11000110" // /* MW 2 */ + 15749 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15750 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15751 "00011100" // /* MW 3 */ + 15752 "11000110" // /* MW 2 */ + 15753 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15754 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15755 "00011100" // /* MW 3 */ + 15756 "11000110" // /* MW 2 */ + 15757 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15758 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15759 "00011100" // /* MW 3 */ + 15760 "11000110" // /* MW 2 */ + 15761 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15762 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15763 "00011100" // /* MW 3 */ + 15764 "11000110" // /* MW 2 */ + 15765 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15766 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15767 "00011100" // /* MW 3 */ + 15768 "11000110" // /* MW 2 */ + 15769 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15770 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15771 "00011100" // /* MW 3 */ + 15772 "11000110" // /* MW 2 */ + 15773 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15774 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15775 "00011100" // /* MW 3 */ + 15776 "11000110" // /* MW 2 */ + 15777 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 15778 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15779 "00011100" // /* MW 3 */ + 15780 "11000110" // /* MW 2 */ + 15781 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 119 first + 15782 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 15783 "00000000" // /* MW 3 */ + 15784 "00101000" // /* MW 2 */ + 15785 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 first +.delay_slot + 15786 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15787 "00011100" // /* MW 3 */ + 15788 "11000110" // /* MW 2 */ + 15789 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 +.delay_slot + 15790 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15791 "00011100" // /* MW 3 */ + 15792 "11000110" // /* MW 2 */ + 15793 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 +.delay_slot + 15794 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15795 "00011100" // /* MW 3 */ + 15796 "11000110" // /* MW 2 */ + 15797 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 +.delay_slot + 15798 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15799 "00011100" // /* MW 3 */ + 15800 "11000110" // /* MW 2 */ + 15801 "00010000" // /* MW 1 */ +.delay_slot + 15802 "11111000" // MOV r2, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 15803 "10100000" // /* MW 3 */ + 15804 "10011111" // /* MW 2 */ +.label _ZN12me_primitive10udiv_dstepEjjRjS0___end + 15805 "00011000" // /* MW 1 */ +.dir 0 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src" +.dir 1 "/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer" +.dir 2 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/conv" +.dir 3 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common" +.dir 4 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2" +.dir 5 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p" +.dir 6 "/usr/local/lib/python3.10/dist-packages/data/aie2p/lib" +.dir 7 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend" +.dir 8 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc" +.dir 9 "/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/detail" +.dir 10 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21708/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib" diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable7/Release/0_2_reloadable7.txt b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable7/Release/0_2_reloadable7.txt new file mode 100644 index 0000000000000000000000000000000000000000..eaa1644fb33f11a55e17a2e7f02cedec89cc05c6 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable7/Release/0_2_reloadable7.txt @@ -0,0 +1,5263 @@ +Contents of the .debug_line section: + +sigmoid_carf_templated_lut.h: +File name Line number Starting address View Stmt + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 205 0x2580 x +elementwise_binary_shared.h 211 0x2580 1 x +elementwise_binary_shared.h 216 0x2580 2 +elementwise_binary_shared.h 216 0x2580 3 +elementwise_binary_shared.h 216 0x258a +elementwise_binary_shared.h 211 0x2598 x +elementwise_binary_shared.h 212 0x259c x +elementwise_binary_shared.h 212 0x25ac +elementwise_binary_shared.h 213 0x25b0 x +elementwise_binary_shared.h 213 0x25c0 +elementwise_binary_shared.h 214 0x25c4 x +elementwise_binary_shared.h 214 0x25d4 +elementwise_binary_shared.h 216 0x25d8 x +elementwise_binary_shared.h 217 0x25dc x +elementwise_binary_shared.h 216 0x25e0 +elementwise_binary_shared.h 216 0x25e6 x +elementwise_binary_shared.h 216 0x25ea +elementwise_binary_shared.h 216 0x25ee +elementwise_binary_shared.h 107 0x2650 x +elementwise_binary_shared.h 119 0x2650 1 +elementwise_binary_shared.h 126 0x2650 2 +elementwise_binary_shared.h 131 0x2650 3 +elementwise_binary_shared.h 119 0x2654 x +elementwise_binary_shared.h 122 0x2658 x +elementwise_binary_shared.h 124 0x265c x +elementwise_binary_shared.h 124 0x2668 +elementwise_binary_shared.h 107 0x266c +elementwise_binary_shared.h 124 0x2672 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 65 0x2676 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 124 0x2676 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 65 0x2680 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 150 0x268c +elementwise_binary_shared.h 119 0x2692 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 56 0x2696 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 126 0x2696 1 +elementwise_binary_shared.h 126 0x2696 2 +elementwise_binary_shared.h 131 0x2696 3 +elementwise_binary_shared.h 131 0x2696 4 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x26a0 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 126 0x26a0 1 x +elementwise_binary_shared.h 131 0x26a0 2 x +elementwise_binary_shared.h 171 0x26a0 3 +elementwise_binary_shared.h 131 0x26b2 +elementwise_binary_shared.h 131 0x26b2 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x26b8 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x26b8 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 173 0x26b8 2 +elementwise_binary_shared.h 166 0x26bc + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x26c8 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 169 0x26c8 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x26da x +vector.hpp 1139 0x26e0 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 169 0x26e0 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x26e4 +vector.hpp 1159 0x26e4 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 171 0x26e4 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x26f6 +vector.hpp 1139 0x26f6 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x26f6 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 169 0x26f6 3 +elementwise_binary_shared.h 173 0x26f6 4 +elementwise_binary_shared.h 150 0x2710 +elementwise_binary_shared.h 150 0x2714 x +elementwise_binary_shared.h 150 0x2718 +elementwise_binary_shared.h 150 0x271e +elementwise_binary_shared.h 150 0x2724 +elementwise_binary_shared.h 166 0x2724 1 +elementwise_binary_shared.h 150 0x2730 +elementwise_binary_shared.h 150 0x2740 +elementwise_binary_shared.h 150 0x2740 1 +elementwise_binary_shared.h 150 0x2740 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x274a + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x274a 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 173 0x274a 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x274e + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 169 0x274e 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2752 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 171 0x2752 1 +elementwise_binary_shared.h 150 0x2758 +elementwise_binary_shared.h 150 0x275c +elementwise_binary_shared.h 150 0x275c 1 +elementwise_binary_shared.h 150 0x2762 +elementwise_binary_shared.h 150 0x2766 +elementwise_binary_shared.h 150 0x276c +elementwise_binary_shared.h 150 0x2774 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 56 0x2784 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x278a x +vector.hpp 1139 0x2790 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 166 0x2790 1 x +elementwise_binary_shared.h 166 0x2790 2 x +elementwise_binary_shared.h 169 0x2790 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x279c + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 166 0x279c 1 +elementwise_binary_shared.h 166 0x279c 2 +elementwise_binary_shared.h 171 0x279c 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x27a8 x +vector.hpp 1139 0x27a8 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x27a8 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 169 0x27a8 3 x +elementwise_binary_shared.h 173 0x27a8 4 x +elementwise_binary_shared.h 177 0x27a8 5 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x27b0 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 166 0x27b0 1 x +elementwise_binary_shared.h 171 0x27b0 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x27b8 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x27b8 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 173 0x27b8 2 x +elementwise_binary_shared.h 166 0x27be x +elementwise_binary_shared.h 166 0x27c2 +elementwise_binary_shared.h 177 0x27c2 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x27ca x +vector.hpp 1139 0x27ca 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 169 0x27ca 2 x +elementwise_binary_shared.h 171 0x27ca 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x27d0 +vector.hpp 1159 0x27d0 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x27d0 2 x +accum.hpp 1110 0x27d0 3 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 173 0x27d0 4 x +elementwise_binary_shared.h 185 0x27d0 5 +elementwise_binary_shared.h 177 0x27f0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2800 x +vector.hpp 1139 0x2800 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 169 0x2800 2 x +elementwise_binary_shared.h 171 0x2800 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2810 +vector.hpp 1159 0x2810 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x2810 2 x +accum.hpp 1110 0x2810 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 173 0x2810 4 x +elementwise_binary_shared.h 185 0x2810 5 x +elementwise_binary_shared.h 177 0x2830 x +elementwise_binary_shared.h 187 0x2840 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2846 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2846 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 185 0x2846 2 x +elementwise_binary_shared.h 177 0x284c x +elementwise_binary_shared.h 187 0x2852 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2856 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2856 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 185 0x2856 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2860 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2860 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 185 0x2860 2 +elementwise_binary_shared.h 205 0x2b00 x +elementwise_binary_shared.h 211 0x2b00 1 x +elementwise_binary_shared.h 216 0x2b00 2 +elementwise_binary_shared.h 216 0x2b00 3 +elementwise_binary_shared.h 216 0x2b0a +elementwise_binary_shared.h 211 0x2b18 x +elementwise_binary_shared.h 212 0x2b1c x +elementwise_binary_shared.h 212 0x2b2c +elementwise_binary_shared.h 213 0x2b30 x +elementwise_binary_shared.h 213 0x2b40 +elementwise_binary_shared.h 214 0x2b44 x +elementwise_binary_shared.h 214 0x2b54 +elementwise_binary_shared.h 216 0x2b58 x +elementwise_binary_shared.h 217 0x2b5c x +elementwise_binary_shared.h 216 0x2b60 +elementwise_binary_shared.h 216 0x2b66 x +elementwise_binary_shared.h 216 0x2b6a +elementwise_binary_shared.h 216 0x2b6e + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x32e0 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 199 0x32e0 1 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x32e4 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 204 0x32e4 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x32ea +io_buffer_main.h 125 0x32ea 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 204 0x32f0 x +conv2d_dw_bf16.h 221 0x32f4 x +conv2d_dw_bf16.h 221 0x32f8 +conv2d_dw_bf16.h 221 0x32fc +conv2d_dw_bf16.h 221 0x3300 +conv2d_dw_bf16.h 221 0x3304 +conv2d_dw_bf16.h 222 0x3308 x +conv2d_dw_bf16.h 222 0x330c +conv2d_dw_bf16.h 222 0x3310 +conv2d_dw_bf16.h 222 0x3314 +conv2d_dw_bf16.h 222 0x3318 +conv2d_dw_bf16.h 223 0x331c x +conv2d_dw_bf16.h 223 0x3320 +conv2d_dw_bf16.h 223 0x3324 +conv2d_dw_bf16.h 223 0x3328 +conv2d_dw_bf16.h 223 0x332c +conv2d_dw_bf16.h 224 0x3330 x +conv2d_dw_bf16.h 224 0x3334 +conv2d_dw_bf16.h 224 0x3338 +conv2d_dw_bf16.h 244 0x3338 1 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x3342 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3342 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x3342 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 244 0x3342 3 x +conv2d_dw_bf16.h 225 0x3348 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x334c +aie_core.h 81 0x334c 1 +aie_core.h 100 0x334c 2 +aie_core.h 100 0x334c 3 +aie_core.h 100 0x334c 4 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x334c 5 +vector.hpp 1139 0x334c 6 +vector.hpp 1139 0x334c 7 x +vector.hpp 1139 0x334c 8 x +vector.hpp 1159 0x334c 9 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x334c 10 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 204 0x334c 11 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x3358 +aie_core.h 81 0x3358 1 +aie_core.h 100 0x3358 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3358 3 +vector.hpp 1139 0x3358 4 +vector.hpp 1159 0x3358 5 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 204 0x3358 6 x +conv2d_dw_bf16.h 225 0x3358 7 x +conv2d_dw_bf16.h 244 0x3358 8 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x3366 +aie_core.h 100 0x3366 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3366 2 +vector.hpp 1159 0x3366 3 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 204 0x3366 4 +conv2d_dw_bf16.h 225 0x3366 5 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x3370 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3370 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 204 0x3370 2 +conv2d_dw_bf16.h 225 0x3370 3 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x337a x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x337a 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 244 0x337a 2 +conv2d_dw_bf16.h 244 0x337a 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3384 +shuffle.hpp 142 0x3384 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 244 0x3384 2 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x338a + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x338a 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 250 0x338a 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x3396 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3396 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 244 0x3396 2 x +conv2d_dw_bf16.h 250 0x3396 3 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x33a2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x33a2 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x33a2 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 270 0x33a8 +conv2d_dw_bf16.h 244 0x33ac + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x33b6 +shuffle.hpp 142 0x33b6 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 268 0x33b6 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x33c0 +shuffle.hpp 142 0x33c0 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 270 0x33c0 2 +conv2d_dw_bf16.h 271 0x33c0 3 +conv2d_dw_bf16.h 272 0x33c0 4 +conv2d_dw_bf16.h 273 0x33c0 5 +conv2d_dw_bf16.h 274 0x33c0 6 +conv2d_dw_bf16.h 275 0x33c0 7 +conv2d_dw_bf16.h 276 0x33c0 8 +conv2d_dw_bf16.h 277 0x33c0 9 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x33d0 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x33d0 1 +accum.hpp 1110 0x33d0 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 265 0x33d0 3 x +conv2d_dw_bf16.h 270 0x33d0 4 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x33e0 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x33e0 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x33e0 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 250 0x33e0 3 x +conv2d_dw_bf16.h 274 0x33e0 4 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x33f0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x33f0 1 x +vector.hpp 1139 0x33f0 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x33f0 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x33fa + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 266 0x33fa 1 x +conv2d_dw_bf16.h 271 0x33fa 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x3404 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3404 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3404 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 275 0x3404 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x340e +shuffle.hpp 142 0x3412 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 272 0x3412 1 x +conv2d_dw_bf16.h 267 0x341a x +conv2d_dw_bf16.h 276 0x341a 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3422 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 268 0x3426 x +conv2d_dw_bf16.h 273 0x3426 1 x +conv2d_dw_bf16.h 265 0x342e x +conv2d_dw_bf16.h 277 0x342e 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3436 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 270 0x3440 x +conv2d_dw_bf16.h 274 0x3450 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x3460 x +aie_core.h 100 0x3460 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3460 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 266 0x346a x +conv2d_dw_bf16.h 271 0x346a 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3472 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 275 0x3472 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x347a + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 267 0x347e x +conv2d_dw_bf16.h 272 0x347e 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3486 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 276 0x3486 1 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x3490 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3490 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3490 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 250 0x3496 x +conv2d_dw_bf16.h 273 0x3496 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x34a0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x34a0 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 250 0x34a0 2 +conv2d_dw_bf16.h 277 0x34a0 3 x +conv2d_dw_bf16.h 250 0x34ac x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x34b0 x +vector.hpp 1139 0x34b4 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x34b8 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x34b8 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x34bc x +accum.hpp 1110 0x34c0 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x34c4 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 286 0x34c8 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x34cc x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x34cc 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 285 0x34cc 2 x +conv2d_dw_bf16.h 268 0x34d4 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x34d8 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x34d8 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 270 0x34d8 2 +conv2d_dw_bf16.h 265 0x34e0 x +conv2d_dw_bf16.h 270 0x34e0 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x34e8 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 274 0x34e8 1 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x34f0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x34f0 1 x +vector.hpp 1139 0x34f0 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x34f0 3 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x34fa + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 266 0x34fa 1 x +conv2d_dw_bf16.h 271 0x34fa 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x3504 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3504 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3504 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 275 0x3504 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x350e +shuffle.hpp 142 0x3512 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 272 0x3512 1 x +conv2d_dw_bf16.h 267 0x351a x +conv2d_dw_bf16.h 276 0x351a 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3522 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 268 0x3526 x +conv2d_dw_bf16.h 273 0x3526 1 x +conv2d_dw_bf16.h 265 0x352e x +conv2d_dw_bf16.h 277 0x352e 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3536 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 270 0x353c x +conv2d_dw_bf16.h 274 0x3540 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x3550 x +aie_core.h 100 0x3550 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3550 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 244 0x3550 3 x +conv2d_dw_bf16.h 266 0x355c x +conv2d_dw_bf16.h 271 0x355c 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3564 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 275 0x3564 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x356c + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 267 0x3570 x +conv2d_dw_bf16.h 272 0x3570 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3578 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 276 0x3578 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3580 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 273 0x3584 x +conv2d_dw_bf16.h 277 0x3588 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x3594 x +accum.hpp 1110 0x3598 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 290 0x3598 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x359e x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 286 0x35a2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x35a6 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x35aa x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 285 0x35aa 1 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x35ae x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x35ae 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h: +conv2d_bf16_params.h 432 0xac0 x +conv2d_bf16_params.h 438 0xac0 1 x +conv2d_bf16_params.h 452 0xac0 2 +conv2d_bf16_params.h 453 0xac0 3 +conv2d_bf16_params.h 458 0xac0 4 +conv2d_bf16_params.h 470 0xac0 5 +conv2d_bf16_params.h 438 0xaca +conv2d_bf16_params.h 438 0xaca 1 x +conv2d_bf16_params.h 452 0xaca 2 +conv2d_bf16_params.h 462 0xaca 3 +conv2d_bf16_params.h 432 0xad4 +conv2d_bf16_params.h 444 0xad4 1 +conv2d_bf16_params.h 453 0xade +conv2d_bf16_params.h 458 0xade 1 +conv2d_bf16_params.h 458 0xade 2 +conv2d_bf16_params.h 444 0xaea +conv2d_bf16_params.h 470 0xaea 1 +conv2d_bf16_params.h 477 0xaea 2 +conv2d_bf16_params.h 557 0xaea 3 +conv2d_bf16_params.h 452 0xaf6 +conv2d_bf16_params.h 458 0xaf6 1 +conv2d_bf16_params.h 462 0xaf6 2 +conv2d_bf16_params.h 438 0xafe +conv2d_bf16_params.h 438 0xb02 +conv2d_bf16_params.h 438 0xb06 +conv2d_bf16_params.h 438 0xb0a +conv2d_bf16_params.h 438 0xb18 +conv2d_bf16_params.h 438 0xb1c +conv2d_bf16_params.h 438 0xb20 +conv2d_bf16_params.h 438 0xb24 +conv2d_bf16_params.h 438 0xb32 +conv2d_bf16_params.h 438 0xb36 +conv2d_bf16_params.h 438 0xb3a +conv2d_bf16_params.h 438 0xb3e +conv2d_bf16_params.h 438 0xb4c +conv2d_bf16_params.h 438 0xb50 +conv2d_bf16_params.h 444 0xb54 x +conv2d_bf16_params.h 447 0xb58 x +conv2d_bf16_params.h 448 0xb5c x +conv2d_bf16_params.h 452 0xb60 x +conv2d_bf16_params.h 453 0xb64 x +conv2d_bf16_params.h 458 0xb68 x +conv2d_bf16_params.h 444 0xb6e x +conv2d_bf16_params.h 458 0xb72 x +conv2d_bf16_params.h 462 0xb72 1 x +conv2d_bf16_params.h 462 0xb78 +conv2d_bf16_params.h 452 0xb7c x +conv2d_bf16_params.h 452 0xb80 +conv2d_bf16_params.h 462 0xb80 1 x +conv2d_bf16_params.h 557 0xb80 2 +conv2d_bf16_params.h 462 0xb86 +conv2d_bf16_params.h 458 0xb8a x +conv2d_bf16_params.h 458 0xb8e +conv2d_bf16_params.h 458 0xb92 +conv2d_bf16_params.h 477 0xb92 1 +conv2d_bf16_params.h 557 0xb92 2 x +conv2d_bf16_params.h 458 0xb98 x +conv2d_bf16_params.h 458 0xb9e +conv2d_bf16_params.h 477 0xb9e 1 x +conv2d_bf16_params.h 458 0xba4 x +conv2d_bf16_params.h 444 0xba8 x +conv2d_bf16_params.h 462 0xbac x +conv2d_bf16_params.h 470 0xbb0 x +conv2d_bf16_params.h 470 0xbb4 +conv2d_bf16_params.h 477 0xbb4 1 x +conv2d_bf16_params.h 477 0xbb8 +conv2d_bf16_params.h 491 0xbc8 +conv2d_bf16_params.h 492 0xbc8 1 +conv2d_bf16_params.h 495 0xbc8 2 +conv2d_bf16_params.h 502 0xbc8 3 +conv2d_bf16_params.h 533 0xbc8 4 +conv2d_bf16_params.h 539 0xbc8 5 +conv2d_bf16_params.h 557 0xbc8 6 +conv2d_bf16_params.h 621 0xbc8 7 +conv2d_bf16_params.h 645 0xbc8 8 +conv2d_bf16_params.h 709 0xbc8 9 +conv2d_bf16_params.h 477 0xbd2 +conv2d_bf16_params.h 481 0xbd2 1 +conv2d_bf16_params.h 500 0xbd2 2 +conv2d_bf16_params.h 506 0xbd2 3 +conv2d_bf16_params.h 507 0xbd2 4 +conv2d_bf16_params.h 524 0xbd2 5 +conv2d_bf16_params.h 539 0xbd2 6 +conv2d_bf16_params.h 655 0xbd2 7 +conv2d_bf16_params.h 477 0xbdc +conv2d_bf16_params.h 504 0xbdc 1 +conv2d_bf16_params.h 510 0xbdc 2 +conv2d_bf16_params.h 520 0xbdc 3 +conv2d_bf16_params.h 700 0xbdc 4 +conv2d_bf16_params.h 477 0xbe2 +conv2d_bf16_params.h 539 0xbe2 1 +conv2d_bf16_params.h 578 0xbe2 2 +conv2d_bf16_params.h 642 0xbe2 3 +conv2d_bf16_params.h 529 0xbe6 +conv2d_bf16_params.h 642 0xbe6 1 +conv2d_bf16_params.h 642 0xbe6 2 +conv2d_bf16_params.h 655 0xbea +conv2d_bf16_params.h 453 0xbf0 +conv2d_bf16_params.h 453 0xbf0 1 +conv2d_bf16_params.h 477 0xbf0 2 +conv2d_bf16_params.h 504 0xbf0 3 +conv2d_bf16_params.h 655 0xbf0 4 +conv2d_bf16_params.h 453 0xbfc x +conv2d_bf16_params.h 477 0xbfc 1 +conv2d_bf16_params.h 481 0xbfc 2 +conv2d_bf16_params.h 500 0xbfc 3 +conv2d_bf16_params.h 506 0xbfc 4 +conv2d_bf16_params.h 507 0xbfc 5 +conv2d_bf16_params.h 524 0xbfc 6 +conv2d_bf16_params.h 539 0xbfc 7 +conv2d_bf16_params.h 491 0xc06 +conv2d_bf16_params.h 492 0xc06 1 +conv2d_bf16_params.h 495 0xc06 2 +conv2d_bf16_params.h 502 0xc06 3 +conv2d_bf16_params.h 510 0xc06 4 +conv2d_bf16_params.h 520 0xc06 5 +conv2d_bf16_params.h 533 0xc06 6 +conv2d_bf16_params.h 539 0xc06 7 +conv2d_bf16_params.h 557 0xc06 8 +conv2d_bf16_params.h 621 0xc06 9 +conv2d_bf16_params.h 645 0xc06 10 +conv2d_bf16_params.h 655 0xc06 11 +conv2d_bf16_params.h 700 0xc06 12 +conv2d_bf16_params.h 709 0xc06 13 +conv2d_bf16_params.h 477 0xc10 +conv2d_bf16_params.h 529 0xc10 1 +conv2d_bf16_params.h 539 0xc10 2 +conv2d_bf16_params.h 578 0xc10 3 +conv2d_bf16_params.h 642 0xc10 4 +conv2d_bf16_params.h 642 0xc10 5 +conv2d_bf16_params.h 642 0xc10 6 +conv2d_bf16_params.h 477 0xc20 x +conv2d_bf16_params.h 495 0xc20 1 x +conv2d_bf16_params.h 495 0xc20 2 +conv2d_bf16_params.h 682 0xc20 3 +conv2d_bf16_params.h 477 0xc2a +conv2d_bf16_params.h 481 0xc2a 1 x +conv2d_bf16_params.h 495 0xc2a 2 +conv2d_bf16_params.h 495 0xc2a 3 +conv2d_bf16_params.h 477 0xc34 x +conv2d_bf16_params.h 496 0xc34 1 +conv2d_bf16_params.h 504 0xc34 2 +conv2d_bf16_params.h 539 0xc34 3 +conv2d_bf16_params.h 578 0xc34 4 +conv2d_bf16_params.h 496 0xc3e +conv2d_bf16_params.h 499 0xc3e 1 +conv2d_bf16_params.h 504 0xc3e 2 x +conv2d_bf16_params.h 509 0xc3e 3 +conv2d_bf16_params.h 519 0xc3e 4 +conv2d_bf16_params.h 700 0xc3e 5 +conv2d_bf16_params.h 492 0xc48 x +conv2d_bf16_params.h 497 0xc48 1 +conv2d_bf16_params.h 509 0xc48 2 +conv2d_bf16_params.h 500 0xc52 +conv2d_bf16_params.h 520 0xc52 1 x +conv2d_bf16_params.h 502 0xc58 +conv2d_bf16_params.h 520 0xc58 1 +conv2d_bf16_params.h 502 0xc62 +conv2d_bf16_params.h 507 0xc62 1 x +conv2d_bf16_params.h 495 0xc68 x +conv2d_bf16_params.h 495 0xc6c +conv2d_bf16_params.h 495 0xc6c 1 +conv2d_bf16_params.h 610 0xc6c 2 +conv2d_bf16_params.h 709 0xc6c 3 +conv2d_bf16_params.h 507 0xc72 x +conv2d_bf16_params.h 495 0xc76 x +conv2d_bf16_params.h 495 0xc7a +conv2d_bf16_params.h 506 0xc7a 1 +conv2d_bf16_params.h 519 0xc7a 2 x +conv2d_bf16_params.h 496 0xc84 x +conv2d_bf16_params.h 504 0xc84 1 x +conv2d_bf16_params.h 522 0xc84 2 +conv2d_bf16_params.h 509 0xc8e x +conv2d_bf16_params.h 496 0xc94 x +conv2d_bf16_params.h 520 0xc94 1 x +conv2d_bf16_params.h 529 0xc94 2 +conv2d_bf16_params.h 497 0xc9e x +conv2d_bf16_params.h 509 0xc9e 1 x +conv2d_bf16_params.h 533 0xc9e 2 +conv2d_bf16_params.h 539 0xca8 x +conv2d_bf16_params.h 499 0xcac x +conv2d_bf16_params.h 499 0xcb0 +conv2d_bf16_params.h 529 0xcb4 x +conv2d_bf16_params.h 507 0xcb8 x +conv2d_bf16_params.h 511 0xcb8 1 +conv2d_bf16_params.h 491 0xcbe x +conv2d_bf16_params.h 507 0xcbe 1 +conv2d_bf16_params.h 500 0xcc8 x +conv2d_bf16_params.h 511 0xcc8 1 x +conv2d_bf16_params.h 500 0xcce +conv2d_bf16_params.h 534 0xcce 1 +conv2d_bf16_params.h 502 0xcd6 x +conv2d_bf16_params.h 509 0xcd6 1 x +conv2d_bf16_params.h 642 0xcd6 2 +conv2d_bf16_params.h 510 0xce2 x +conv2d_bf16_params.h 506 0xce6 x +conv2d_bf16_params.h 527 0xcea x +conv2d_bf16_params.h 502 0xcf4 x +conv2d_bf16_params.h 502 0xcf8 +conv2d_bf16_params.h 506 0xcfc x +conv2d_bf16_params.h 506 0xd0c +conv2d_bf16_params.h 506 0xd10 +conv2d_bf16_params.h 510 0xd14 x +conv2d_bf16_params.h 510 0xd18 +conv2d_bf16_params.h 510 0xd1e +conv2d_bf16_params.h 510 0xd22 +conv2d_bf16_params.h 510 0xd28 +conv2d_bf16_params.h 539 0xd28 1 +conv2d_bf16_params.h 642 0xd28 2 +conv2d_bf16_params.h 511 0xd2e x +conv2d_bf16_params.h 524 0xd2e 1 +conv2d_bf16_params.h 539 0xd2e 2 +conv2d_bf16_params.h 512 0xd34 x +conv2d_bf16_params.h 524 0xd34 1 x +conv2d_bf16_params.h 524 0xd3a +conv2d_bf16_params.h 524 0xd3e +conv2d_bf16_params.h 520 0xd42 x +conv2d_bf16_params.h 511 0xd46 x +conv2d_bf16_params.h 522 0xd46 1 x +conv2d_bf16_params.h 524 0xd4c x +conv2d_bf16_params.h 529 0xd4c 1 x +conv2d_bf16_params.h 539 0xd4c 2 x +conv2d_bf16_params.h 534 0xd56 +conv2d_bf16_params.h 539 0xd56 1 +conv2d_bf16_params.h 527 0xd5c x +conv2d_bf16_params.h 533 0xd5c 1 x +conv2d_bf16_params.h 529 0xd6a x +conv2d_bf16_params.h 533 0xd6a 1 +conv2d_bf16_params.h 539 0xd70 x +conv2d_bf16_params.h 529 0xd76 x +conv2d_bf16_params.h 529 0xd76 1 +conv2d_bf16_params.h 529 0xd7c +conv2d_bf16_params.h 534 0xd80 x +conv2d_bf16_params.h 534 0xd84 +conv2d_bf16_params.h 539 0xd84 1 x +conv2d_bf16_params.h 555 0xd84 2 +conv2d_bf16_params.h 559 0xd84 3 +conv2d_bf16_params.h 700 0xd84 4 +conv2d_bf16_params.h 669 0xd8e +conv2d_bf16_params.h 700 0xd8e 1 +conv2d_bf16_params.h 539 0xd92 +conv2d_bf16_params.h 539 0xda2 +conv2d_bf16_params.h 539 0xdb2 +conv2d_bf16_params.h 539 0xdb2 1 +conv2d_bf16_params.h 539 0xdb2 2 +conv2d_bf16_params.h 539 0xdb2 3 +conv2d_bf16_params.h 539 0xdbc +conv2d_bf16_params.h 539 0xdc0 +conv2d_bf16_params.h 539 0xdc4 +conv2d_bf16_params.h 539 0xdc4 1 +conv2d_bf16_params.h 539 0xdca +conv2d_bf16_params.h 539 0xdce +conv2d_bf16_params.h 539 0xdd2 +conv2d_bf16_params.h 669 0xdd2 1 +conv2d_bf16_params.h 539 0xdd8 +conv2d_bf16_params.h 539 0xddc +conv2d_bf16_params.h 539 0xde0 +conv2d_bf16_params.h 539 0xde4 +conv2d_bf16_params.h 555 0xde8 x +conv2d_bf16_params.h 642 0xdf0 +conv2d_bf16_params.h 669 0xdf0 1 +conv2d_bf16_params.h 669 0xdf0 2 +conv2d_bf16_params.h 669 0xdfa x +conv2d_bf16_params.h 497 0xdfe x +conv2d_bf16_params.h 641 0xdfe 1 x +conv2d_bf16_params.h 645 0xdfe 2 +conv2d_bf16_params.h 559 0xe08 x +conv2d_bf16_params.h 640 0xe08 1 +conv2d_bf16_params.h 642 0xe08 2 +conv2d_bf16_params.h 642 0xe08 3 +conv2d_bf16_params.h 642 0xe12 x +conv2d_bf16_params.h 578 0xe16 x +conv2d_bf16_params.h 640 0xe1a x +conv2d_bf16_params.h 557 0xe1e +conv2d_bf16_params.h 645 0xe1e 1 +conv2d_bf16_params.h 641 0xe28 x +conv2d_bf16_params.h 642 0xe28 1 x +conv2d_bf16_params.h 642 0xe2e +conv2d_bf16_params.h 642 0xe2e 1 +conv2d_bf16_params.h 558 0xe32 x +conv2d_bf16_params.h 645 0xe32 1 +conv2d_bf16_params.h 540 0xe38 +conv2d_bf16_params.h 645 0xe38 1 x +conv2d_bf16_params.h 540 0xe3e x +conv2d_bf16_params.h 557 0xe3e 1 +conv2d_bf16_params.h 642 0xe44 x +conv2d_bf16_params.h 557 0xe48 x +conv2d_bf16_params.h 655 0xe48 1 +conv2d_bf16_params.h 558 0xe4e +conv2d_bf16_params.h 655 0xe4e 1 x +conv2d_bf16_params.h 558 0xe54 x +conv2d_bf16_params.h 540 0xe58 x +conv2d_bf16_params.h 655 0xe58 1 +conv2d_bf16_params.h 655 0xe58 2 +conv2d_bf16_params.h 679 0xe58 3 +conv2d_bf16_params.h 655 0xe62 x +conv2d_bf16_params.h 558 0xe66 x +conv2d_bf16_params.h 655 0xe66 1 +conv2d_bf16_params.h 655 0xe66 2 +conv2d_bf16_params.h 679 0xe66 3 +conv2d_bf16_params.h 655 0xe70 x +conv2d_bf16_params.h 126 0xe74 x +conv2d_bf16_params.h 559 0xe74 1 x +conv2d_bf16_params.h 669 0xe7a x +conv2d_bf16_params.h 700 0xe7a 1 +conv2d_bf16_params.h 558 0xe80 x +conv2d_bf16_params.h 700 0xe86 x +conv2d_bf16_params.h 578 0xe8a x +conv2d_bf16_params.h 559 0xe8e x +conv2d_bf16_params.h 578 0xe92 x +conv2d_bf16_params.h 610 0xe96 x +conv2d_bf16_params.h 611 0xe96 1 +conv2d_bf16_params.h 621 0xe96 2 +conv2d_bf16_params.h 621 0xe96 3 +conv2d_bf16_params.h 629 0xe96 4 +conv2d_bf16_params.h 621 0xea2 +conv2d_bf16_params.h 621 0xea2 1 x +conv2d_bf16_params.h 645 0xea2 2 +conv2d_bf16_params.h 649 0xea2 3 +conv2d_bf16_params.h 645 0xea8 +conv2d_bf16_params.h 554 0xeae x +conv2d_bf16_params.h 645 0xeae 1 x +conv2d_bf16_params.h 554 0xeb8 +conv2d_bf16_params.h 555 0xeb8 1 +conv2d_bf16_params.h 555 0xeb8 2 x +conv2d_bf16_params.h 645 0xeb8 3 +conv2d_bf16_params.h 555 0xec4 +conv2d_bf16_params.h 621 0xec4 1 +conv2d_bf16_params.h 621 0xec4 2 x +conv2d_bf16_params.h 645 0xec4 3 +conv2d_bf16_params.h 558 0xece x +conv2d_bf16_params.h 559 0xece 1 +conv2d_bf16_params.h 621 0xece 2 +conv2d_bf16_params.h 621 0xece 3 +conv2d_bf16_params.h 645 0xece 4 +conv2d_bf16_params.h 559 0xeda x +conv2d_bf16_params.h 621 0xeda 1 x +conv2d_bf16_params.h 645 0xeda 2 x +conv2d_bf16_params.h 610 0xee0 x +conv2d_bf16_params.h 621 0xee0 1 +conv2d_bf16_params.h 655 0xee0 2 +conv2d_bf16_params.h 679 0xee0 3 +conv2d_bf16_params.h 621 0xeec +conv2d_bf16_params.h 649 0xeec 1 +conv2d_bf16_params.h 655 0xeec 2 x +conv2d_bf16_params.h 661 0xeec 3 +conv2d_bf16_params.h 127 0xef6 x +conv2d_bf16_params.h 127 0xef6 1 x +conv2d_bf16_params.h 621 0xef6 2 +conv2d_bf16_params.h 649 0xef6 3 +conv2d_bf16_params.h 655 0xef6 4 +conv2d_bf16_params.h 679 0xef6 5 +conv2d_bf16_params.h 710 0xef6 6 +conv2d_bf16_params.h 710 0xef6 7 +conv2d_bf16_params.h 655 0xf00 x +conv2d_bf16_params.h 679 0xf00 1 x +conv2d_bf16_params.h 621 0xf06 x +conv2d_bf16_params.h 649 0xf06 1 x +conv2d_bf16_params.h 655 0xf06 2 +conv2d_bf16_params.h 655 0xf06 3 +conv2d_bf16_params.h 700 0xf06 4 +conv2d_bf16_params.h 700 0xf06 5 +conv2d_bf16_params.h 655 0xf10 x +conv2d_bf16_params.h 700 0xf10 1 x +conv2d_bf16_params.h 629 0xf14 x +conv2d_bf16_params.h 611 0xf18 x +conv2d_bf16_params.h 643 0xf26 x +conv2d_bf16_params.h 664 0xf2a +conv2d_bf16_params.h 621 0xf30 x +conv2d_bf16_params.h 629 0xf30 1 +conv2d_bf16_params.h 684 0xf30 2 x +conv2d_bf16_params.h 629 0xf3a x +conv2d_bf16_params.h 127 0xf40 x +conv2d_bf16_params.h 644 0xf40 1 +conv2d_bf16_params.h 700 0xf40 2 x +conv2d_bf16_params.h 705 0xf40 3 +conv2d_bf16_params.h 705 0xf40 4 +conv2d_bf16_params.h 645 0xf4a x +conv2d_bf16_params.h 700 0xf4a 1 +conv2d_bf16_params.h 700 0xf4a 2 +conv2d_bf16_params.h 705 0xf4a 3 +conv2d_bf16_params.h 644 0xf54 +conv2d_bf16_params.h 649 0xf54 1 x +conv2d_bf16_params.h 674 0xf54 2 +conv2d_bf16_params.h 644 0xf5e x +conv2d_bf16_params.h 662 0xf5e 1 +conv2d_bf16_params.h 664 0xf5e 2 x +conv2d_bf16_params.h 127 0xf68 x +conv2d_bf16_params.h 663 0xf68 1 x +conv2d_bf16_params.h 664 0xf68 2 +conv2d_bf16_params.h 126 0xf6e x +conv2d_bf16_params.h 664 0xf6e 1 x +conv2d_bf16_params.h 126 0xf74 +conv2d_bf16_params.h 664 0xf74 1 +conv2d_bf16_params.h 127 0xf7a x +conv2d_bf16_params.h 127 0xf7a 1 x +conv2d_bf16_params.h 664 0xf7a 2 +conv2d_bf16_params.h 664 0xf7a 3 +conv2d_bf16_params.h 675 0xf7a 4 +conv2d_bf16_params.h 696 0xf7a 5 +conv2d_bf16_params.h 644 0xf84 x +conv2d_bf16_params.h 664 0xf84 1 x +conv2d_bf16_params.h 705 0xf84 2 +conv2d_bf16_params.h 664 0xf8e +conv2d_bf16_params.h 705 0xf8e 1 x +conv2d_bf16_params.h 705 0xf8e 2 x +conv2d_bf16_params.h 127 0xf94 +conv2d_bf16_params.h 674 0xf94 1 x +conv2d_bf16_params.h 675 0xf94 2 x +conv2d_bf16_params.h 682 0xf94 3 +conv2d_bf16_params.h 718 0xf94 4 +conv2d_bf16_params.h 720 0xf94 5 +conv2d_bf16_params.h 127 0xf9e x +conv2d_bf16_params.h 642 0xf9e 1 +conv2d_bf16_params.h 675 0xf9e 2 +conv2d_bf16_params.h 675 0xfa8 x +conv2d_bf16_params.h 707 0xfa8 1 x +conv2d_bf16_params.h 642 0xfae +conv2d_bf16_params.h 674 0xfae 1 x +conv2d_bf16_params.h 675 0xfae 2 +conv2d_bf16_params.h 642 0xfb8 x +conv2d_bf16_params.h 655 0xfb8 1 +conv2d_bf16_params.h 655 0xfb8 2 +conv2d_bf16_params.h 675 0xfb8 3 x +conv2d_bf16_params.h 679 0xfb8 4 +conv2d_bf16_params.h 679 0xfb8 5 +conv2d_bf16_params.h 655 0xfc4 x +conv2d_bf16_params.h 679 0xfc4 1 x +conv2d_bf16_params.h 713 0xfc4 2 +conv2d_bf16_params.h 691 0xfca x +conv2d_bf16_params.h 675 0xfce +conv2d_bf16_params.h 675 0xfce 1 x +conv2d_bf16_params.h 709 0xfce 2 x +conv2d_bf16_params.h 675 0xfd8 +conv2d_bf16_params.h 706 0xfd8 1 x +conv2d_bf16_params.h 706 0xfd8 2 +conv2d_bf16_params.h 709 0xfd8 3 +conv2d_bf16_params.h 682 0xfe4 x +conv2d_bf16_params.h 706 0xfe4 1 +conv2d_bf16_params.h 126 0xfea x +conv2d_bf16_params.h 696 0xfea 1 x +conv2d_bf16_params.h 127 0xff0 x +conv2d_bf16_params.h 127 0xff0 1 x +conv2d_bf16_params.h 696 0xff0 2 +conv2d_bf16_params.h 696 0xff6 x +conv2d_bf16_params.h 713 0xff6 1 x +conv2d_bf16_params.h 696 0xffc +conv2d_bf16_params.h 706 0xffc 1 +conv2d_bf16_params.h 706 0xffc 2 x +conv2d_bf16_params.h 706 0x1006 +conv2d_bf16_params.h 696 0x100a x +conv2d_bf16_params.h 707 0x100a 1 x +conv2d_bf16_params.h 696 0x1010 +conv2d_bf16_params.h 709 0x1010 1 x +conv2d_bf16_params.h 696 0x1016 x +conv2d_bf16_params.h 709 0x1016 1 +conv2d_bf16_params.h 707 0x1020 x +conv2d_bf16_params.h 708 0x1020 1 +conv2d_bf16_params.h 710 0x1020 2 x +conv2d_bf16_params.h 710 0x1020 3 x +conv2d_bf16_params.h 708 0x102c x +conv2d_bf16_params.h 713 0x102c 1 x +conv2d_bf16_params.h 709 0x1036 x +conv2d_bf16_params.h 800 0x1036 1 x +conv2d_bf16_params.h 710 0x103c x +conv2d_bf16_params.h 718 0x1044 x +conv2d_bf16_params.h 718 0x1048 +conv2d_bf16_params.h 720 0x104c x +conv2d_bf16_params.h 800 0x104c 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 526 0x1060 +utils.h 531 0x1060 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 689 0x1060 2 x +conv2d_bf16.h 698 0x1060 3 +conv2d_bf16.h 704 0x1060 4 +conv2d_bf16.h 707 0x1060 5 +conv2d_bf16.h 707 0x1060 6 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 526 0x106c +utils.h 526 0x106c 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 698 0x106c 2 x +conv2d_bf16.h 704 0x106c 3 x +conv2d_bf16.h 707 0x106c 4 +conv2d_bf16.h 707 0x106c 5 +conv2d_bf16.h 698 0x107a +conv2d_bf16.h 702 0x107a 1 +conv2d_bf16.h 698 0x1084 +conv2d_bf16.h 702 0x1084 1 x +conv2d_bf16.h 699 0x108e x +conv2d_bf16.h 702 0x108e 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 531 0x1098 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 702 0x1098 1 x +conv2d_bf16.h 702 0x109e + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 531 0x10a6 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 705 0x10a6 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 526 0x10ac x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 704 0x10b0 x +conv2d_bf16.h 702 0x10b4 x +conv2d_bf16.h 705 0x10b4 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 526 0x10ba x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 707 0x10ba 1 +conv2d_bf16.h 707 0x10ba 2 +conv2d_bf16.h 704 0x10c0 x +conv2d_bf16.h 705 0x10c6 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 350 0x10d0 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 526 0x10d0 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 705 0x10d0 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 350 0x10e0 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 706 0x10e0 1 x +conv2d_bf16.h 704 0x10f0 x +conv2d_bf16.h 705 0x1100 x +conv2d_bf16.h 707 0x1100 1 x +conv2d_bf16.h 707 0x1100 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 350 0x1110 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 526 0x1110 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 705 0x1110 2 +conv2d_bf16.h 708 0x1110 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 350 0x1120 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 531 0x1120 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 706 0x1120 2 x +conv2d_bf16.h 707 0x1132 x +conv2d_bf16.h 707 0x1132 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 350 0x1136 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 705 0x1136 1 x +conv2d_bf16.h 708 0x1136 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 350 0x113e + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 706 0x113e 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 531 0x1142 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 350 0x1146 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 705 0x1146 1 x +conv2d_bf16.h 707 0x1146 2 x +conv2d_bf16.h 707 0x1146 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 350 0x114e + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 706 0x114e 1 x +conv2d_bf16.h 708 0x114e 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 531 0x1156 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 707 0x115a x +conv2d_bf16.h 707 0x115a 1 x +conv2d_bf16.h 723 0x115a 2 x +conv2d_bf16.h 708 0x1160 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 531 0x1164 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x1170 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1836 0x1170 1 +conv2d_bf16.h 1836 0x1170 2 x +conv2d_bf16.h 1836 0x1170 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h: +conv2d_bf16_params.h 240 0x1170 4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1836 0x117e + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h: +conv2d_bf16_params.h 241 0x117e 1 +conv2d_bf16_params.h 242 0x117e 2 +conv2d_bf16_params.h 250 0x117e 3 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 866 0x118a +conv2d_bf16.h 876 0x118a 1 +conv2d_bf16.h 876 0x118a 2 +conv2d_bf16.h 881 0x118a 3 +conv2d_bf16.h 1836 0x118a 4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h: +conv2d_bf16_params.h 242 0x118a 5 +conv2d_bf16_params.h 242 0x118a 6 +conv2d_bf16_params.h 242 0x118a 7 +conv2d_bf16_params.h 242 0x118a 8 +conv2d_bf16_params.h 242 0x118a 9 +conv2d_bf16_params.h 243 0x118a 10 +conv2d_bf16_params.h 245 0x118a 11 +conv2d_bf16_params.h 250 0x118a 12 +conv2d_bf16_params.h 250 0x118a 13 +conv2d_bf16_params.h 240 0x1196 +conv2d_bf16_params.h 240 0x1196 1 x +conv2d_bf16_params.h 242 0x11a2 +conv2d_bf16_params.h 245 0x11a2 1 +conv2d_bf16_params.h 242 0x11ae +conv2d_bf16_params.h 244 0x11ae 1 +conv2d_bf16_params.h 244 0x11ae 2 +conv2d_bf16_params.h 249 0x11ae 3 +conv2d_bf16_params.h 243 0x11ba +conv2d_bf16_params.h 244 0x11ba 1 +conv2d_bf16_params.h 250 0x11ba 2 +conv2d_bf16_params.h 244 0x11c6 +conv2d_bf16_params.h 240 0x11d4 +conv2d_bf16_params.h 240 0x11d8 +conv2d_bf16_params.h 241 0x11d8 1 x +conv2d_bf16_params.h 242 0x11de x +conv2d_bf16_params.h 242 0x11de 1 x +conv2d_bf16_params.h 245 0x11e4 x +conv2d_bf16_params.h 242 0x11f2 x +conv2d_bf16_params.h 242 0x11f6 +conv2d_bf16_params.h 242 0x11fa +conv2d_bf16_params.h 241 0x11fe x +conv2d_bf16_params.h 242 0x11fe 1 +conv2d_bf16_params.h 242 0x1204 x +conv2d_bf16_params.h 242 0x1208 +conv2d_bf16_params.h 242 0x120c +conv2d_bf16_params.h 242 0x1210 +conv2d_bf16_params.h 242 0x1210 1 +conv2d_bf16_params.h 242 0x1216 +conv2d_bf16_params.h 243 0x121a x +conv2d_bf16_params.h 242 0x121e x +conv2d_bf16_params.h 243 0x121e 1 +conv2d_bf16_params.h 244 0x1224 x +conv2d_bf16_params.h 245 0x1224 1 x +conv2d_bf16_params.h 244 0x1236 +conv2d_bf16_params.h 244 0x1236 1 +conv2d_bf16_params.h 245 0x123c +conv2d_bf16_params.h 244 0x1242 +conv2d_bf16_params.h 244 0x1246 +conv2d_bf16_params.h 244 0x124a +conv2d_bf16_params.h 244 0x124e +conv2d_bf16_params.h 244 0x1252 +conv2d_bf16_params.h 245 0x1256 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 876 0x1268 +conv2d_bf16.h 876 0x1268 1 +conv2d_bf16.h 1849 0x1276 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h: +conv2d_bf16_params.h 250 0x1280 x +conv2d_bf16_params.h 250 0x1280 1 +conv2d_bf16_params.h 250 0x128c +conv2d_bf16_params.h 250 0x1290 +conv2d_bf16_params.h 250 0x1294 +conv2d_bf16_params.h 250 0x1298 +conv2d_bf16_params.h 250 0x1298 1 +conv2d_bf16_params.h 250 0x129e +conv2d_bf16_params.h 249 0x12a2 x +conv2d_bf16_params.h 249 0x12a6 +conv2d_bf16_params.h 250 0x12aa x +conv2d_bf16_params.h 258 0x12b0 x +conv2d_bf16_params.h 259 0x12c8 +conv2d_bf16_params.h 259 0x12ce x +conv2d_bf16_params.h 259 0x12d2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1841 0x12e0 x +conv2d_bf16.h 1849 0x12e0 1 +conv2d_bf16.h 1849 0x12e0 2 x +conv2d_bf16.h 876 0x12ea +conv2d_bf16.h 881 0x12ea 1 +conv2d_bf16.h 1841 0x12ea 2 +conv2d_bf16.h 1842 0x12ea 3 +conv2d_bf16.h 1842 0x12ea 4 +conv2d_bf16.h 1842 0x12ea 5 +conv2d_bf16.h 1845 0x12ea 6 +conv2d_bf16.h 1849 0x12ea 7 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h: +conv2d_bf16_params.h 243 0x12ea 8 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1842 0x12f4 x +conv2d_bf16.h 1842 0x12f4 1 +conv2d_bf16.h 1849 0x12f4 2 +conv2d_bf16.h 862 0x1300 +conv2d_bf16.h 1842 0x1300 1 +conv2d_bf16.h 1845 0x1300 2 +conv2d_bf16.h 1845 0x130c x +conv2d_bf16.h 862 0x1310 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x1314 x +io_buffer_main.h 125 0x1318 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1841 0x1318 1 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x131e x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1842 0x1322 x +conv2d_bf16.h 1845 0x1328 x +conv2d_bf16.h 866 0x132c x +conv2d_bf16.h 866 0x1330 +conv2d_bf16.h 1842 0x1336 x +conv2d_bf16.h 1842 0x1336 1 x +conv2d_bf16.h 1842 0x133c +conv2d_bf16.h 1845 0x133c 1 x +conv2d_bf16.h 1841 0x1342 x +conv2d_bf16.h 881 0x134a +conv2d_bf16.h 885 0x134a 1 +conv2d_bf16.h 1845 0x134e x +conv2d_bf16.h 867 0x1352 +conv2d_bf16.h 867 0x1358 +conv2d_bf16.h 867 0x1358 1 x +conv2d_bf16.h 867 0x1360 +conv2d_bf16.h 867 0x1366 +conv2d_bf16.h 867 0x1372 +conv2d_bf16.h 867 0x1372 1 +conv2d_bf16.h 867 0x1378 +conv2d_bf16.h 867 0x137c +conv2d_bf16.h 867 0x1382 +conv2d_bf16.h 867 0x138a +conv2d_bf16.h 881 0x13a0 +conv2d_bf16.h 883 0x13a0 1 +conv2d_bf16.h 884 0x13a0 2 +conv2d_bf16.h 876 0x13ac x +conv2d_bf16.h 876 0x13ac 1 x +conv2d_bf16.h 881 0x13ac 2 x +conv2d_bf16.h 883 0x13ac 3 +conv2d_bf16.h 884 0x13ac 4 +conv2d_bf16.h 885 0x13b8 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h: +conv2d_bf16_params.h 243 0x13b8 1 +conv2d_bf16_params.h 243 0x13b8 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 883 0x13c2 x +conv2d_bf16.h 884 0x13c8 x +conv2d_bf16.h 876 0x13ce x +conv2d_bf16.h 876 0x13d2 +conv2d_bf16.h 881 0x13d6 x +conv2d_bf16.h 881 0x13da + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h: +conv2d_bf16_params.h 243 0x13da 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 881 0x13e0 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h: +conv2d_bf16_params.h 243 0x13e0 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 885 0x13f0 +conv2d_bf16.h 885 0x13f4 x +conv2d_bf16.h 885 0x13fe +conv2d_bf16.h 885 0x1402 +conv2d_bf16.h 885 0x1406 +conv2d_bf16.h 896 0x1410 +conv2d_bf16.h 1115 0x1410 1 +conv2d_bf16.h 1115 0x1410 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x141a +vector.hpp 1152 0x141a 1 +vector.hpp 1152 0x141a 2 +vector.hpp 1152 0x141a 3 +vector.hpp 1152 0x141a 4 +vector.hpp 1152 0x141a 5 +vector.hpp 1152 0x141a 6 +vector.hpp 1152 0x141a 7 +vector.hpp 1152 0x141a 8 +vector.hpp 1152 0x141a 9 +vector.hpp 1152 0x141a 10 +vector.hpp 1152 0x141a 11 +vector.hpp 1152 0x141a 12 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x141a 13 +accum.hpp 149 0x141a 14 +accum.hpp 149 0x141a 15 +accum.hpp 149 0x141a 16 +accum.hpp 149 0x141a 17 +accum.hpp 149 0x141a 18 +accum.hpp 149 0x141a 19 +accum.hpp 149 0x141a 20 +accum.hpp 149 0x141a 21 +accum.hpp 149 0x141a 22 +accum.hpp 149 0x141a 23 +accum.hpp 149 0x141a 24 +accum.hpp 149 0x141a 25 +accum.hpp 149 0x141a 26 +accum.hpp 149 0x141a 27 +accum.hpp 149 0x141a 28 +accum.hpp 1110 0x141a 29 +accum.hpp 1110 0x141a 30 +accum.hpp 1110 0x141a 31 +accum.hpp 1110 0x141a 32 +accum.hpp 1110 0x141a 33 +accum.hpp 1110 0x141a 34 +accum.hpp 1110 0x141a 35 +accum.hpp 1110 0x141a 36 +accum.hpp 1110 0x141a 37 +accum.hpp 1110 0x141a 38 +accum.hpp 1110 0x141a 39 +accum.hpp 1110 0x141a 40 +accum.hpp 1110 0x141a 41 +accum.hpp 1110 0x141a 42 +accum.hpp 1110 0x141a 43 +accum.hpp 1110 0x141a 44 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 886 0x141a 45 +conv2d_bf16.h 896 0x141a 46 x +conv2d_bf16.h 1123 0x141a 47 +conv2d_bf16.h 896 0x1420 +conv2d_bf16.h 896 0x1424 +conv2d_bf16.h 896 0x1428 +conv2d_bf16.h 896 0x142c +conv2d_bf16.h 896 0x1430 +conv2d_bf16.h 896 0x1434 +conv2d_bf16.h 897 0x1438 x +conv2d_bf16.h 897 0x143c +conv2d_bf16.h 897 0x1440 +conv2d_bf16.h 897 0x1444 +conv2d_bf16.h 897 0x1448 +conv2d_bf16.h 897 0x144c +conv2d_bf16.h 897 0x1450 +conv2d_bf16.h 898 0x1454 x +conv2d_bf16.h 898 0x1458 +conv2d_bf16.h 898 0x145c +conv2d_bf16.h 898 0x1460 +conv2d_bf16.h 898 0x1464 +conv2d_bf16.h 898 0x1468 +conv2d_bf16.h 1115 0x146c x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 56 0x1470 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 898 0x1474 x +conv2d_bf16.h 1115 0x1480 x +conv2d_bf16.h 1115 0x1484 +conv2d_bf16.h 886 0x148a +conv2d_bf16.h 886 0x1490 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 56 0x1494 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1123 0x149c +conv2d_bf16.h 1123 0x149c 1 +conv2d_bf16.h 1123 0x149c 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x14a6 +aie_core.h 100 0x14a6 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x14a6 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x14a6 3 +accum.hpp 946 0x14a6 4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1123 0x14a6 5 +conv2d_bf16.h 1125 0x14a6 6 +conv2d_bf16.h 1154 0x14a6 7 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x14b0 +aie_core.h 100 0x14b0 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x14b0 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x14b0 3 +accum.hpp 946 0x14b0 4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1123 0x14b0 5 +conv2d_bf16.h 1125 0x14b0 6 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x14ba +aie_core.h 100 0x14ba 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x14ba 2 +vector.hpp 1152 0x14ba 3 +vector.hpp 1152 0x14ba 4 +vector.hpp 1152 0x14ba 5 +vector.hpp 1152 0x14ba 6 +vector.hpp 1152 0x14ba 7 +vector.hpp 1152 0x14ba 8 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x14ba 9 +accum.hpp 149 0x14ba 10 +accum.hpp 149 0x14ba 11 +accum.hpp 149 0x14ba 12 +accum.hpp 149 0x14ba 13 +accum.hpp 149 0x14ba 14 +accum.hpp 149 0x14ba 15 +accum.hpp 149 0x14ba 16 +accum.hpp 578 0x14ba 17 +accum.hpp 946 0x14ba 18 +accum.hpp 1110 0x14ba 19 +accum.hpp 1110 0x14ba 20 +accum.hpp 1110 0x14ba 21 +accum.hpp 1110 0x14ba 22 +accum.hpp 1110 0x14ba 23 +accum.hpp 1110 0x14ba 24 +accum.hpp 1110 0x14ba 25 +accum.hpp 1110 0x14ba 26 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 746 0x14ba 27 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x14c6 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 738 0x14c6 1 +conv2d_bf16.h 1187 0x14c6 2 +conv2d_bf16.h 1199 0x14c6 3 +conv2d_bf16.h 1200 0x14c6 4 +conv2d_bf16.h 1201 0x14c6 5 +conv2d_bf16.h 1202 0x14c6 6 +conv2d_bf16.h 1143 0x14d2 +conv2d_bf16.h 1218 0x14d2 1 +conv2d_bf16.h 749 0x14dc +conv2d_bf16.h 750 0x14dc 1 +conv2d_bf16.h 751 0x14dc 2 +conv2d_bf16.h 752 0x14dc 3 +conv2d_bf16.h 1123 0x14dc 4 +conv2d_bf16.h 736 0x14e6 +conv2d_bf16.h 738 0x14e6 1 +conv2d_bf16.h 1123 0x14e6 2 +conv2d_bf16.h 1873 0x14e6 3 +conv2d_bf16.h 1125 0x14f2 x +conv2d_bf16.h 1125 0x14f6 +conv2d_bf16.h 1125 0x14fa +conv2d_bf16.h 1149 0x14fe x +conv2d_bf16.h 1154 0x1502 x +conv2d_bf16.h 743 0x1506 x +conv2d_bf16.h 745 0x150a x +conv2d_bf16.h 746 0x150e x +conv2d_bf16.h 1125 0x150e 1 x +conv2d_bf16.h 1143 0x1514 x +conv2d_bf16.h 1206 0x1518 x +conv2d_bf16.h 1149 0x151c +conv2d_bf16.h 1154 0x1524 +conv2d_bf16.h 1125 0x1528 x +conv2d_bf16.h 1149 0x152c x +conv2d_bf16.h 1154 0x1530 x +conv2d_bf16.h 1287 0x1536 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1540 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1540 1 x +accum.hpp 946 0x1540 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 736 0x1540 3 +conv2d_bf16.h 738 0x1540 4 +conv2d_bf16.h 1147 0x1540 5 x +conv2d_bf16.h 1187 0x1540 6 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x154c + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x154c 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x154c 2 +accum.hpp 946 0x154c 3 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 736 0x154c 4 x +conv2d_bf16.h 738 0x154c 5 x +conv2d_bf16.h 1188 0x154c 6 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1558 +aie_core.h 100 0x1558 1 +aie_core.h 100 0x1558 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1558 3 +vector.hpp 1139 0x1558 4 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1558 5 +accum.hpp 578 0x1558 6 +accum.hpp 946 0x1558 7 +accum.hpp 946 0x1558 8 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 737 0x1558 9 x +conv2d_bf16.h 742 0x1558 10 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1564 +aie_core.h 100 0x1564 1 +aie_core.h 100 0x1564 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1564 3 +vector.hpp 1139 0x1564 4 +vector.hpp 1139 0x1564 5 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1564 6 +accum.hpp 578 0x1564 7 +accum.hpp 578 0x1564 8 x +accum.hpp 946 0x1564 9 +accum.hpp 946 0x1564 10 +accum.hpp 946 0x1564 11 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 740 0x1564 12 x +conv2d_bf16.h 1149 0x1564 13 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1570 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1570 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1570 2 +accum.hpp 946 0x1570 3 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x1570 4 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 743 0x1570 5 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x157a x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x157a 1 x +accum.hpp 946 0x157a 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 736 0x157a 3 x +conv2d_bf16.h 1152 0x157a 4 x +conv2d_bf16.h 1206 0x157a 5 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1586 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1586 1 +accum.hpp 946 0x1586 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 737 0x1586 3 x +conv2d_bf16.h 1154 0x1586 4 x +conv2d_bf16.h 1206 0x1586 5 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1592 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1592 1 x +accum.hpp 946 0x1592 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 740 0x1592 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1598 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1598 1 +accum.hpp 946 0x1598 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x1598 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1157 0x1598 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x159e x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x159e 1 x +accum.hpp 946 0x159e 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 736 0x159e 3 x +conv2d_bf16.h 1159 0x159e 4 x +conv2d_bf16.h 737 0x15a4 x +conv2d_bf16.h 738 0x15a4 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x15aa x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x15aa 1 x +accum.hpp 946 0x15aa 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 740 0x15aa 3 x +conv2d_bf16.h 1192 0x15aa 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x15b0 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x15b0 1 +accum.hpp 946 0x15b0 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x15b0 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 745 0x15b0 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x15ba +vector.hpp 1139 0x15ba 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x15ba 2 +accum.hpp 578 0x15ba 3 x +accum.hpp 946 0x15ba 4 +accum.hpp 946 0x15ba 5 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 736 0x15ba 6 x +conv2d_bf16.h 746 0x15ba 7 x +conv2d_bf16.h 1162 0x15ba 8 +conv2d_bf16.h 737 0x15c6 x +conv2d_bf16.h 742 0x15c6 1 x +conv2d_bf16.h 749 0x15c6 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x15d0 x +aie_core.h 143 0x15d0 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x15d0 2 x +vector.hpp 1152 0x15d0 3 +vector.hpp 1152 0x15d0 4 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x15d0 5 x +accum.hpp 946 0x15d0 6 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 750 0x15d0 7 x +conv2d_bf16.h 1286 0x15d0 8 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x15de + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x15de 1 +vector.hpp 1139 0x15de 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x15de 3 +accum.hpp 578 0x15de 4 +accum.hpp 946 0x15de 5 +accum.hpp 946 0x15de 6 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 745 0x15de 7 x +conv2d_bf16.h 751 0x15de 8 x +conv2d_bf16.h 1162 0x15de 9 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x15ec + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x15ec 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x15ec 2 +accum.hpp 946 0x15ec 3 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 743 0x15ec 4 x +conv2d_bf16.h 746 0x15ec 5 x +conv2d_bf16.h 1199 0x15ec 6 x +conv2d_bf16.h 738 0x15fa x +conv2d_bf16.h 1200 0x15fa 1 x +conv2d_bf16.h 742 0x1602 x +conv2d_bf16.h 1201 0x1602 1 x +conv2d_bf16.h 743 0x160a x +conv2d_bf16.h 752 0x160a 1 x +conv2d_bf16.h 738 0x1612 x +conv2d_bf16.h 740 0x1612 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x1618 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 736 0x161c x +conv2d_bf16.h 742 0x161c 1 x +conv2d_bf16.h 1202 0x161c 2 x +conv2d_bf16.h 1206 0x161c 3 x +conv2d_bf16.h 737 0x1628 x +conv2d_bf16.h 743 0x1628 1 x +conv2d_bf16.h 749 0x1628 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x1632 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 738 0x1632 1 x +conv2d_bf16.h 740 0x1632 2 x +conv2d_bf16.h 751 0x1632 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x1640 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 745 0x1640 1 x +conv2d_bf16.h 750 0x1640 2 x +conv2d_bf16.h 736 0x1650 x +conv2d_bf16.h 742 0x1650 1 x +conv2d_bf16.h 746 0x1650 2 x +conv2d_bf16.h 752 0x1650 3 x +conv2d_bf16.h 737 0x1660 x +conv2d_bf16.h 743 0x1660 1 x +conv2d_bf16.h 749 0x1660 2 x +conv2d_bf16.h 738 0x1670 x +conv2d_bf16.h 740 0x1670 1 x +conv2d_bf16.h 751 0x1670 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x1680 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 745 0x1680 1 x +conv2d_bf16.h 750 0x1680 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1690 +aie_core.h 100 0x1690 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1690 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1690 3 +accum.hpp 946 0x1690 4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 742 0x1690 5 x +conv2d_bf16.h 746 0x1690 6 x +conv2d_bf16.h 752 0x1690 7 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x169e + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x169e 1 +vector.hpp 1152 0x169e 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 743 0x169e 3 x +conv2d_bf16.h 749 0x169e 4 x +conv2d_bf16.h 1286 0x169e 5 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x16ac + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x16ac 1 +vector.hpp 1152 0x16ac 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 751 0x16ac 3 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x16b6 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 745 0x16b6 1 x +conv2d_bf16.h 750 0x16b6 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x16c0 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 746 0x16c0 1 x +conv2d_bf16.h 752 0x16c0 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x16ca +vector.hpp 1152 0x16ca 1 +vector.hpp 1152 0x16ca 2 +vector.hpp 1152 0x16ca 3 +vector.hpp 1152 0x16ca 4 +vector.hpp 1152 0x16ca 5 +vector.hpp 1152 0x16ca 6 +vector.hpp 1152 0x16ca 7 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 749 0x16ca 8 x +conv2d_bf16.h 1285 0x16ca 9 x +conv2d_bf16.h 1286 0x16ca 10 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x16d6 +aie_core.h 100 0x16d6 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x16d6 2 +vector.hpp 1152 0x16d6 3 +vector.hpp 1152 0x16d6 4 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x16d6 5 +accum.hpp 946 0x16d6 6 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 751 0x16d6 7 x +conv2d_bf16.h 746 0x16e0 x +conv2d_bf16.h 750 0x16e0 1 x +conv2d_bf16.h 745 0x16e8 x +conv2d_bf16.h 752 0x16e8 1 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x16f0 +aie_core.h 143 0x16f4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 750 0x16f4 1 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x16fc + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 749 0x16fc 1 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x1704 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 752 0x1704 1 x +conv2d_bf16.h 1286 0x1704 2 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x170e + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x170e 1 +vector.hpp 1152 0x170e 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 751 0x170e 3 x +conv2d_bf16.h 1286 0x170e 4 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x171a + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x171a 1 +vector.hpp 1152 0x171a 2 +vector.hpp 1152 0x171a 3 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x171a 4 +accum.hpp 946 0x171a 5 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x1722 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1287 0x1722 1 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x172a + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x172a 1 x +accum.hpp 1110 0x172a 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x1732 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x1732 1 +accum.hpp 1110 0x1732 2 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x173a x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 736 0x173a 1 +conv2d_bf16.h 1287 0x173a 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x1744 x +accum.hpp 1110 0x1744 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1287 0x1744 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x174c +accum.hpp 1110 0x174c 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1218 0x174c 2 x +conv2d_bf16.h 1287 0x174c 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x1758 x +accum.hpp 1110 0x1758 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 738 0x1758 2 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x1760 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x1760 1 +accum.hpp 1110 0x1760 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1768 +vector.hpp 1152 0x1768 1 +vector.hpp 1152 0x1768 2 +vector.hpp 1152 0x1768 3 +vector.hpp 1152 0x1768 4 +vector.hpp 1152 0x1768 5 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x1768 6 +accum.hpp 1110 0x1768 7 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1772 +vector.hpp 1152 0x1772 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x1772 2 x +accum.hpp 1110 0x1772 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1286 0x1772 4 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x177a +aie_core.h 143 0x177a 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x177a 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x177a 3 +accum.hpp 946 0x177a 4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1187 0x177a 5 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1782 x +max_min.hpp 20 0x1786 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x178a x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x178a 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1792 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1792 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x179a x +vector.hpp 1152 0x17a4 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x17a4 1 x +max_min.hpp 20 0x17ac + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x17b0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x17b0 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x17b8 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x17b8 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x17c0 x +vector.hpp 1152 0x17d0 +vector.hpp 1152 0x17d4 +vector.hpp 1152 0x17d8 +vector.hpp 1152 0x17dc +vector.hpp 1152 0x17e0 +vector.hpp 1152 0x17e4 +vector.hpp 1152 0x17e8 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x17f0 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x17f0 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1143 0x17f0 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x17fc + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x17fc 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x17fc 2 +accum.hpp 946 0x17fc 3 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1800 +aie_core.h 100 0x1804 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1804 1 +vector.hpp 1152 0x1804 2 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x180a +aie_core.h 143 0x1820 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 792 0x1820 1 +conv2d_bf16.h 1364 0x1820 2 +conv2d_bf16.h 1364 0x1820 3 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x182c +aie_core.h 143 0x182c 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x182c 2 +vector.hpp 1152 0x182c 3 +vector.hpp 1152 0x182c 4 +vector.hpp 1152 0x182c 5 +vector.hpp 1152 0x182c 6 +vector.hpp 1152 0x182c 7 +vector.hpp 1152 0x182c 8 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x182c 9 +accum.hpp 149 0x182c 10 +accum.hpp 149 0x182c 11 +accum.hpp 149 0x182c 12 +accum.hpp 149 0x182c 13 +accum.hpp 149 0x182c 14 +accum.hpp 149 0x182c 15 +accum.hpp 149 0x182c 16 +accum.hpp 1110 0x182c 17 +accum.hpp 1110 0x182c 18 +accum.hpp 1110 0x182c 19 +accum.hpp 1110 0x182c 20 +accum.hpp 1110 0x182c 21 +accum.hpp 1110 0x182c 22 +accum.hpp 1110 0x182c 23 +accum.hpp 1110 0x182c 24 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1369 0x182c 25 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x1838 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 807 0x1838 1 +conv2d_bf16.h 808 0x1838 2 +conv2d_bf16.h 809 0x1838 3 +conv2d_bf16.h 810 0x1838 4 +conv2d_bf16.h 1436 0x1838 5 +conv2d_bf16.h 1437 0x1838 6 +conv2d_bf16.h 1438 0x1838 7 +conv2d_bf16.h 1439 0x1838 8 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x1842 +aie_core.h 143 0x1842 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 802 0x1842 2 +conv2d_bf16.h 1428 0x1842 3 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x184e +aie_core.h 143 0x184e 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 792 0x184e 2 +conv2d_bf16.h 794 0x184e 3 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x185a + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 794 0x185a 1 +conv2d_bf16.h 1455 0x185a 2 +conv2d_bf16.h 1337 0x1864 +conv2d_bf16.h 1364 0x186e x +conv2d_bf16.h 1873 0x186e 1 +conv2d_bf16.h 1364 0x1874 +conv2d_bf16.h 1369 0x1878 x +conv2d_bf16.h 799 0x187c x +conv2d_bf16.h 801 0x1880 x +conv2d_bf16.h 802 0x1884 x +conv2d_bf16.h 1337 0x1888 x +conv2d_bf16.h 1443 0x188c x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1892 +vector.hpp 1152 0x1892 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1369 0x1892 2 +conv2d_bf16.h 1364 0x1896 +conv2d_bf16.h 1518 0x1896 1 +conv2d_bf16.h 1364 0x189a +conv2d_bf16.h 1364 0x189e x +conv2d_bf16.h 1369 0x18a2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x18a8 +vector.hpp 1152 0x18a8 1 +vector.hpp 1139 0x18b0 +vector.hpp 1139 0x18b0 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x18b0 2 +accum.hpp 578 0x18b0 3 +accum.hpp 578 0x18b0 4 x +accum.hpp 946 0x18b0 5 +accum.hpp 946 0x18b0 6 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 792 0x18b0 7 x +conv2d_bf16.h 1362 0x18b0 8 x +conv2d_bf16.h 1429 0x18b0 9 +conv2d_bf16.h 1443 0x18b0 10 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x18be + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x18be 1 +accum.hpp 946 0x18be 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 793 0x18be 3 x +conv2d_bf16.h 1364 0x18be 4 x +conv2d_bf16.h 1443 0x18be 5 +conv2d_bf16.h 794 0x18ca x +conv2d_bf16.h 795 0x18ca 1 x +conv2d_bf16.h 1428 0x18ca 2 x +conv2d_bf16.h 1443 0x18ca 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x18d6 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x18d6 1 +accum.hpp 578 0x18d6 2 +accum.hpp 946 0x18d6 3 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 796 0x18d6 4 x +conv2d_bf16.h 799 0x18d6 5 x +conv2d_bf16.h 1429 0x18d6 6 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x18e0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x18e0 1 x +accum.hpp 946 0x18e0 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 797 0x18e0 3 x +conv2d_bf16.h 1367 0x18e0 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x18e6 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x18e6 1 +accum.hpp 946 0x18e6 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x18e6 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1369 0x18e6 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x18ec x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x18ec 1 x +accum.hpp 946 0x18ec 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 792 0x18ec 3 x +conv2d_bf16.h 1372 0x18ec 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x18f2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x18f2 1 +accum.hpp 946 0x18f2 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 793 0x18f2 3 x +conv2d_bf16.h 1374 0x18f2 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x18f8 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x18f8 1 x +accum.hpp 946 0x18f8 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 795 0x18f8 3 x +conv2d_bf16.h 1377 0x18f8 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x18fe + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x18fe 1 +accum.hpp 946 0x18fe 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 796 0x18fe 3 x +conv2d_bf16.h 1379 0x18fe 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1904 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1904 1 x +accum.hpp 946 0x1904 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 797 0x1904 3 x +conv2d_bf16.h 1429 0x1904 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x190a + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x190a 1 +accum.hpp 946 0x190a 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x190a 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 801 0x190a 4 x +conv2d_bf16.h 1429 0x190a 5 +conv2d_bf16.h 792 0x1914 x +conv2d_bf16.h 794 0x1914 1 x +conv2d_bf16.h 802 0x1914 2 x +conv2d_bf16.h 793 0x191e x +conv2d_bf16.h 799 0x191e 1 x +conv2d_bf16.h 803 0x191e 2 x +conv2d_bf16.h 807 0x191e 3 x +conv2d_bf16.h 794 0x192a x +conv2d_bf16.h 804 0x192a 1 x +conv2d_bf16.h 808 0x192a 2 x +conv2d_bf16.h 809 0x1934 x +conv2d_bf16.h 810 0x1938 x +conv2d_bf16.h 795 0x193c x +conv2d_bf16.h 802 0x193c 1 x +conv2d_bf16.h 1437 0x193c 2 x +conv2d_bf16.h 796 0x1946 x +conv2d_bf16.h 1436 0x1946 1 x +conv2d_bf16.h 797 0x194e x +conv2d_bf16.h 1438 0x194e 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x1956 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 799 0x1956 1 x +conv2d_bf16.h 1439 0x1956 2 x +conv2d_bf16.h 792 0x1960 x +conv2d_bf16.h 801 0x1960 1 x +conv2d_bf16.h 793 0x1966 x +conv2d_bf16.h 804 0x1966 1 x +conv2d_bf16.h 808 0x1966 2 x +conv2d_bf16.h 795 0x1970 x +conv2d_bf16.h 803 0x1970 1 x +conv2d_bf16.h 807 0x1970 2 x +conv2d_bf16.h 796 0x197a x +conv2d_bf16.h 810 0x197a 1 x +conv2d_bf16.h 794 0x1982 x +conv2d_bf16.h 797 0x1982 1 x +conv2d_bf16.h 809 0x1982 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x1990 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 799 0x1990 1 x +conv2d_bf16.h 802 0x1990 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x19a0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x19a0 1 +vector.hpp 1152 0x19a0 2 +vector.hpp 1152 0x19a0 3 +vector.hpp 1152 0x19a0 4 +vector.hpp 1152 0x19a0 5 +vector.hpp 1152 0x19a0 6 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 801 0x19a0 7 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x19ac +vector.hpp 1152 0x19ac 1 +vector.hpp 1152 0x19ac 2 +vector.hpp 1152 0x19ac 3 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 804 0x19ac 4 x +conv2d_bf16.h 808 0x19ac 5 x +conv2d_bf16.h 1517 0x19ac 6 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x19b8 +vector.hpp 1152 0x19b8 1 +vector.hpp 1152 0x19b8 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 803 0x19b8 3 x +conv2d_bf16.h 807 0x19b8 4 x +conv2d_bf16.h 1518 0x19b8 5 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x19c4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 810 0x19c4 1 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x19cc x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 809 0x19cc 1 x +conv2d_bf16.h 1428 0x19cc 2 +conv2d_bf16.h 801 0x19d6 x +conv2d_bf16.h 802 0x19da x +conv2d_bf16.h 803 0x19de x +conv2d_bf16.h 807 0x19de 1 x +conv2d_bf16.h 804 0x19e6 x +conv2d_bf16.h 808 0x19e6 1 x +conv2d_bf16.h 809 0x19ee x +conv2d_bf16.h 810 0x19f2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x19fa x +accum.hpp 1110 0x19fa 1 x +accum.hpp 149 0x19fe +accum.hpp 1110 0x19fe 1 +accum.hpp 149 0x1a02 +accum.hpp 1110 0x1a02 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1455 0x1a02 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x1a0c x +accum.hpp 1110 0x1a0c 1 x +accum.hpp 149 0x1a10 +accum.hpp 1110 0x1a10 1 +accum.hpp 149 0x1a14 +accum.hpp 1110 0x1a14 1 +accum.hpp 149 0x1a18 +accum.hpp 1110 0x1a18 1 +accum.hpp 149 0x1a1c +accum.hpp 1110 0x1a1c 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1a20 x +max_min.hpp 20 0x1a24 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1a28 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1a28 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1a30 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1a30 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1a38 x +vector.hpp 1152 0x1a42 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1a42 1 x +max_min.hpp 20 0x1a4a + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1a4e x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1a4e 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1a56 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1a56 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1a60 x +vector.hpp 1152 0x1a70 +vector.hpp 1152 0x1a74 +vector.hpp 1152 0x1a78 +vector.hpp 1152 0x1a7c +vector.hpp 1152 0x1a80 +vector.hpp 1152 0x1a84 +vector.hpp 1152 0x1a88 +vector.hpp 1152 0x1a90 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1337 0x1a90 1 x +conv2d_bf16.h 1873 0x1ac8 x +conv2d_bf16.h 1873 0x1acc + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 74 0x1ae0 x +superkernels.cpp 79 0x1ae0 1 +superkernels.cpp 81 0x1ae0 2 +superkernels.cpp 79 0x1aea x +superkernels.cpp 81 0x1aea 1 +superkernels.cpp 74 0x1af4 +superkernels.cpp 79 0x1b06 +superkernels.cpp 79 0x1b06 1 +superkernels.cpp 81 0x1b1c +superkernels.cpp 113 0x1b22 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x1b22 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 81 0x1b2c + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x1b2c 1 +tile.hpp 86 0x1b2c 2 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 81 0x1b3c x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x1b44 +tile.hpp 74 0x1b48 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 113 0x1b4c + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x1b4c 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 81 0x1b54 +superkernels.cpp 81 0x1b60 +superkernels.cpp 87 0x1b64 +superkernels.cpp 87 0x1b64 1 x +superkernels.cpp 88 0x1b6e x +superkernels.cpp 89 0x1b6e 1 +superkernels.cpp 88 0x1b78 +superkernels.cpp 88 0x1b7e +superkernels.cpp 87 0x1b86 x +superkernels.cpp 113 0x1b86 1 +superkernels.cpp 88 0x1b8e x +superkernels.cpp 88 0x1b94 +superkernels.cpp 89 0x1b9a x +superkernels.cpp 89 0x1ba0 +superkernels.cpp 113 0x1ba0 1 +superkernels.cpp 106 0x1bb0 +superkernels.cpp 113 0x1bb0 1 +superkernels.cpp 117 0x1bb0 2 +superkernels.cpp 136 0x1bb0 3 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x1bb0 4 +io_buffer_main.h 324 0x1bb0 5 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 106 0x1bba x +superkernels.cpp 108 0x1bba 1 +superkernels.cpp 107 0x1bc4 +superkernels.cpp 108 0x1bc4 1 x +superkernels.cpp 139 0x1bc4 2 +superkernels.cpp 140 0x1bc4 3 +superkernels.cpp 107 0x1bce x +superkernels.cpp 110 0x1bda x +superkernels.cpp 110 0x1bda 1 x +superkernels.cpp 108 0x1be0 x +superkernels.cpp 107 0x1be4 x +superkernels.cpp 108 0x1be4 1 +superkernels.cpp 106 0x1bea x +superkernels.cpp 106 0x1bee +superkernels.cpp 107 0x1bf2 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x1bf6 x +io_buffer_main.h 218 0x1bfa +io_buffer_main.h 218 0x1bfe +io_buffer_main.h 218 0x1c02 +io_buffer_main.h 235 0x1c08 x +io_buffer_main.h 218 0x1c14 x +io_buffer_main.h 218 0x1c14 1 x +io_buffer_main.h 218 0x1c18 +io_buffer_main.h 395 0x1c1c +io_buffer_main.h 395 0x1c26 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 113 0x1c30 x +superkernels.cpp 113 0x1c36 +superkernels.cpp 113 0x1c42 +superkernels.cpp 117 0x1c50 x +superkernels.cpp 117 0x1c50 1 +superkernels.cpp 117 0x1c5a +superkernels.cpp 117 0x1c6c +superkernels.cpp 117 0x1c70 +superkernels.cpp 136 0x1c76 +superkernels.cpp 140 0x1c76 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x1c82 x +io_buffer_main.h 327 0x1c82 1 +io_buffer_main.h 425 0x1c82 2 +io_buffer_main.h 324 0x1c88 +io_buffer_main.h 425 0x1c98 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 136 0x1c9c x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x1c9c 1 x +io_buffer_main.h 327 0x1cae +io_buffer_main.h 327 0x1cb2 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 139 0x1cc0 x +superkernels.cpp 139 0x1cc0 1 +superkernels.cpp 139 0x1cca +superkernels.cpp 142 0x1cd2 +superkernels.cpp 139 0x1cde +superkernels.cpp 139 0x1ce2 +superkernels.cpp 140 0x1cf4 x +superkernels.cpp 142 0x1d04 x +superkernels.cpp 142 0x1d08 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 141 0x1d20 x +elementwise_binary.h 142 0x1d20 1 +elementwise_binary.h 144 0x1d20 2 x +elementwise_binary.h 141 0x1d26 +elementwise_binary.h 141 0x1d2a +elementwise_binary.h 142 0x1d2e x +elementwise_binary.h 142 0x1d32 +elementwise_binary.h 130 0x1d40 x +elementwise_binary.h 133 0x1d40 1 x +elementwise_binary.h 130 0x1d44 +elementwise_binary.h 133 0x1d58 x +elementwise_binary.h 134 0x1d5c x +elementwise_binary.h 134 0x1d6c +elementwise_binary.h 135 0x1d70 x +elementwise_binary.h 135 0x1d80 +elementwise_binary.h 136 0x1d84 x +elementwise_binary.h 137 0x1d8c x +elementwise_binary.h 136 0x1d98 x +elementwise_binary.h 137 0x1d9c +elementwise_binary.h 137 0x1da0 +elementwise_binary.h 139 0x1da0 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h: +add_impl.h 146 0x1da0 2 +add_impl.h 146 0x1daa +add_impl.h 147 0x1daa 1 +add_impl.h 147 0x1daa 2 +add_impl.h 146 0x1db4 x +add_impl.h 147 0x1db4 1 +add_impl.h 147 0x1dbe x +add_impl.h 147 0x1dc6 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 139 0x1dca x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h: +add_impl.h 147 0x1dce + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 139 0x1dd2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h: +add_impl.h 147 0x1dd8 x +add_impl.h 147 0x1ddc + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h: +elementwise_binary_broadcasting.h 81 0x1df0 +elementwise_binary_broadcasting.h 81 0x1df0 1 x +elementwise_binary_broadcasting.h 82 0x1df0 2 +elementwise_binary_broadcasting.h 82 0x1df0 3 +elementwise_binary_broadcasting.h 83 0x1df0 4 +elementwise_binary_broadcasting.h 81 0x1dfa +elementwise_binary_broadcasting.h 82 0x1dfa 1 +elementwise_binary_broadcasting.h 82 0x1e00 x +elementwise_binary_broadcasting.h 84 0x1e0e x +elementwise_binary_broadcasting.h 82 0x1e12 x +elementwise_binary_broadcasting.h 83 0x1e16 x +elementwise_binary_broadcasting.h 82 0x1e1a x +elementwise_binary_broadcasting.h 83 0x1e1a 1 +elementwise_binary_broadcasting.h 82 0x1e20 +elementwise_binary_broadcasting.h 82 0x1e24 +elementwise_binary_broadcasting.h 76 0x1e30 +elementwise_binary_broadcasting.h 76 0x1e30 1 x +elementwise_binary_broadcasting.h 77 0x1e3a x +elementwise_binary_broadcasting.h 78 0x1e44 +elementwise_binary_broadcasting.h 78 0x1e54 +elementwise_binary_broadcasting.h 78 0x1e58 x +elementwise_binary_broadcasting.h 78 0x1e5e +elementwise_binary_broadcasting.h 79 0x1e62 x +elementwise_binary_broadcasting.h 89 0x1e70 x +elementwise_binary_broadcasting.h 96 0x1e70 1 x +elementwise_binary_broadcasting.h 102 0x1e70 2 +elementwise_binary_broadcasting.h 102 0x1e76 x +elementwise_binary_broadcasting.h 117 0x1e76 1 +elementwise_binary_broadcasting.h 102 0x1e88 +elementwise_binary_broadcasting.h 102 0x1e88 1 +elementwise_binary_broadcasting.h 96 0x1e8e +elementwise_binary_broadcasting.h 96 0x1e92 x +elementwise_binary_broadcasting.h 103 0x1e9c x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 56 0x1eb0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1eb6 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h: +elementwise_binary_broadcasting.h 106 0x1ec0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 56 0x1ed0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1ed6 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1ee0 +add_accum.hpp 19 0x1ee0 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h: +elementwise_binary_broadcasting.h 117 0x1ee0 2 x +elementwise_binary_broadcasting.h 117 0x1ee0 3 x +elementwise_binary_broadcasting.h 117 0x1eea +elementwise_binary_broadcasting.h 117 0x1eea 1 +elementwise_binary_broadcasting.h 117 0x1ef4 +elementwise_binary_broadcasting.h 117 0x1efa +elementwise_binary_broadcasting.h 117 0x1f00 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f08 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f08 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x1f08 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f0c + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f0c 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 189 0x1f0c 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f10 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f10 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x1f10 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f14 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f14 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 213 0x1f14 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f18 x +vector.hpp 1159 0x1f18 1 +vector.hpp 1159 0x1f18 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f18 3 x +accum.hpp 1110 0x1f18 4 +accum.hpp 1110 0x1f18 5 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x1f18 6 x +elementwise_binary.h 195 0x1f18 7 +elementwise_binary.h 218 0x1f18 8 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f1e + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f1e 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1f1e 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 189 0x1f1e 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f26 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f26 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x1f26 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f2a + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f2a 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1f2a 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 213 0x1f2a 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f32 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f32 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x1f32 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f36 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f36 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1f36 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 189 0x1f36 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f3e x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f3e 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x1f3e 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f42 +vector.hpp 1159 0x1f42 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f42 2 +accum.hpp 1110 0x1f42 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1f42 4 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 213 0x1f42 5 x +elementwise_binary.h 218 0x1f42 6 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f50 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f50 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x1f50 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f54 +vector.hpp 1159 0x1f54 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f54 2 +accum.hpp 1110 0x1f54 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1f54 4 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 189 0x1f54 5 x +elementwise_binary.h 195 0x1f54 6 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f60 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f60 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x1f60 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f70 +vector.hpp 1159 0x1f70 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f70 2 +accum.hpp 1110 0x1f70 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1f70 4 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 213 0x1f70 5 x +elementwise_binary.h 218 0x1f70 6 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1f82 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x1f82 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1f82 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 195 0x1f82 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1f8c x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x1f8c 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1f8c 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 218 0x1f8c 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1f96 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x1f96 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 195 0x1f96 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h: +elementwise_binary_broadcasting.h 121 0x1f96 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1f9e x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x1f9e 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 218 0x1f9e 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1fa4 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x1fa4 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 195 0x1fa4 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_attribute_broadcasting.h: +ise_binary_attribute_broadcasting.h 82 0x1fb0 +ise_binary_attribute_broadcasting.h 82 0x1fb0 1 x +ise_binary_attribute_broadcasting.h 90 0x1fb6 +ise_binary_attribute_broadcasting.h 90 0x1fbe x +ise_binary_attribute_broadcasting.h 117 0x1fbe 1 +ise_binary_attribute_broadcasting.h 92 0x1fc6 x +ise_binary_attribute_broadcasting.h 92 0x1fc6 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 538 0x1fd6 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector_native_types.hpp: +vector_native_types.hpp 374 0x1fd6 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_attribute_broadcasting.h: +ise_binary_attribute_broadcasting.h 117 0x1fe2 x +ise_binary_attribute_broadcasting.h 92 0x1fe8 +ise_binary_attribute_broadcasting.h 92 0x1fee x +ise_binary_attribute_broadcasting.h 92 0x1ff2 +ise_binary_attribute_broadcasting.h 117 0x1ff2 1 +ise_binary_attribute_broadcasting.h 117 0x1ff8 +ise_binary_attribute_broadcasting.h 118 0x2000 +ise_binary_attribute_broadcasting.h 118 0x2010 x +ise_binary_attribute_broadcasting.h 118 0x2014 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 147 0x2030 x +superkernels.cpp 152 0x2030 1 +superkernels.cpp 152 0x2036 x +superkernels.cpp 147 0x203c +superkernels.cpp 149 0x204a + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2054 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 152 0x205c +superkernels.cpp 152 0x205c 1 +superkernels.cpp 149 0x2062 x +superkernels.cpp 149 0x2066 +superkernels.cpp 149 0x206e + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x206e 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 155 0x2076 +superkernels.cpp 166 0x2076 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x207c +tile.hpp 74 0x2082 +tile.hpp 86 0x2082 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 155 0x208e x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x2098 +tile.hpp 74 0x209c +tile.hpp 74 0x20a0 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 159 0x20b0 +superkernels.cpp 159 0x20b6 x +superkernels.cpp 159 0x20b6 1 +superkernels.cpp 157 0x20c0 +superkernels.cpp 159 0x20c0 1 +superkernels.cpp 166 0x20c0 2 +superkernels.cpp 157 0x20ca x +superkernels.cpp 159 0x20ca 1 +superkernels.cpp 164 0x20ca 2 +superkernels.cpp 157 0x20de +superkernels.cpp 159 0x20e6 x +superkernels.cpp 157 0x20ea x +superkernels.cpp 159 0x20f0 x +superkernels.cpp 164 0x2100 +superkernels.cpp 166 0x2100 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2110 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 163 0x2118 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2118 1 +io_buffer_main.h 218 0x2122 +io_buffer_main.h 218 0x2126 +io_buffer_main.h 235 0x212a x +io_buffer_main.h 218 0x2138 x +io_buffer_main.h 218 0x2138 1 x +io_buffer_main.h 218 0x213c +io_buffer_main.h 395 0x2140 +io_buffer_main.h 395 0x214a x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 164 0x214e +superkernels.cpp 163 0x2158 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x215c x +io_buffer_main.h 324 0x215c 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 164 0x2162 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x2166 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 166 0x216c x +superkernels.cpp 163 0x2174 x +superkernels.cpp 163 0x2178 +superkernels.cpp 164 0x217c x +superkernels.cpp 164 0x2180 +superkernels.cpp 168 0x2190 +superkernels.cpp 169 0x2190 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x2190 2 x +io_buffer_main.h 327 0x219a +io_buffer_main.h 425 0x219a 1 +io_buffer_main.h 425 0x21a8 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 168 0x21ac + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x21ac 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 168 0x21b6 x +superkernels.cpp 168 0x21ba + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x21c6 x +io_buffer_main.h 327 0x21ca + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 168 0x21ce x +superkernels.cpp 168 0x21d2 +superkernels.cpp 169 0x21e2 +superkernels.cpp 169 0x21e6 x +superkernels.cpp 171 0x21f0 +superkernels.cpp 171 0x2204 x +superkernels.cpp 171 0x220c + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 124 0x2220 x +elementwise_unary.h 126 0x2220 1 x +elementwise_unary.h 126 0x2230 x +elementwise_unary.h 127 0x2234 x +elementwise_unary.h 127 0x2244 +elementwise_unary.h 128 0x2248 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/clip_impl.h: +clip_impl.h 113 0x224c x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 128 0x225a x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/clip_impl.h: +clip_impl.h 113 0x225e x +clip_impl.h 114 0x226e x +clip_impl.h 114 0x2272 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 130 0x2276 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2290 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 136 0x2290 1 x +elementwise_unary.h 142 0x2290 2 +elementwise_unary.h 154 0x2290 3 x +elementwise_unary.h 171 0x2290 4 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x229c x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 142 0x229c 1 +elementwise_unary.h 154 0x229c 2 x +elementwise_unary.h 190 0x229c 3 x +elementwise_unary.h 136 0x22a8 +elementwise_unary.h 136 0x22ac x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/clip_impl.h: +clip_impl.h 103 0x22b0 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 142 0x22b4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x22b8 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 171 0x22b8 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/clip_impl.h: +clip_impl.h 104 0x22b8 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x22c4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 190 0x22c4 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x22cc x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x22cc 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 142 0x22cc 2 x +elementwise_unary.h 171 0x22cc 3 x +elementwise_unary.h 154 0x22d6 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x22de x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x22e2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x22e2 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 190 0x22e2 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x22f0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x22f0 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 195 0x22f0 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x2300 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2310 x +vector.hpp 1159 0x2310 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x2310 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 171 0x2310 3 x +elementwise_unary.h 176 0x2310 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2320 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x2320 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 190 0x2320 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2330 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x2330 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 195 0x2330 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x2340 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2350 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x2350 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 176 0x2350 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x2358 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x235c x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x235c 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 195 0x235c 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x2364 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 158 0x2364 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x236a x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x236a 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 176 0x236a 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x2372 x +max_min.hpp 21 0x2376 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x237a x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 195 0x237a 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x237e + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 176 0x237e 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 176 0x2390 x +superkernels.cpp 181 0x2390 1 +superkernels.cpp 181 0x2396 x +superkernels.cpp 176 0x239c +superkernels.cpp 178 0x23aa + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x23b4 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 181 0x23bc +superkernels.cpp 181 0x23bc 1 +superkernels.cpp 178 0x23c2 x +superkernels.cpp 178 0x23c6 +superkernels.cpp 178 0x23ce + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x23ce 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 184 0x23d6 +superkernels.cpp 195 0x23d6 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x23dc +tile.hpp 74 0x23e2 +tile.hpp 86 0x23e2 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 184 0x23ee x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x23f8 +tile.hpp 74 0x23fc +tile.hpp 74 0x2400 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 188 0x2410 +superkernels.cpp 188 0x2416 x +superkernels.cpp 188 0x2416 1 +superkernels.cpp 186 0x2420 +superkernels.cpp 188 0x2420 1 +superkernels.cpp 195 0x2420 2 +superkernels.cpp 186 0x242a x +superkernels.cpp 188 0x242a 1 +superkernels.cpp 193 0x242a 2 +superkernels.cpp 186 0x243e +superkernels.cpp 188 0x2446 x +superkernels.cpp 186 0x244a x +superkernels.cpp 188 0x2450 x +superkernels.cpp 193 0x2460 +superkernels.cpp 195 0x2460 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2470 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 192 0x2478 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2478 1 +io_buffer_main.h 218 0x2482 +io_buffer_main.h 218 0x2486 +io_buffer_main.h 235 0x248a x +io_buffer_main.h 218 0x2498 x +io_buffer_main.h 218 0x2498 1 x +io_buffer_main.h 218 0x249c +io_buffer_main.h 395 0x24a0 +io_buffer_main.h 395 0x24aa x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 193 0x24ae +superkernels.cpp 192 0x24b8 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x24bc x +io_buffer_main.h 324 0x24bc 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 193 0x24c2 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x24c6 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 195 0x24cc x +superkernels.cpp 192 0x24d4 x +superkernels.cpp 192 0x24d8 +superkernels.cpp 193 0x24dc x +superkernels.cpp 193 0x24e0 +superkernels.cpp 197 0x24f0 +superkernels.cpp 198 0x24f0 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x24f0 2 x +io_buffer_main.h 327 0x24fa +io_buffer_main.h 425 0x24fa 1 +io_buffer_main.h 425 0x2508 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 197 0x250c + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x250c 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 197 0x2516 x +superkernels.cpp 197 0x251a + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x2526 x +io_buffer_main.h 327 0x252a + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 197 0x252e x +superkernels.cpp 197 0x2532 +superkernels.cpp 198 0x2542 +superkernels.cpp 198 0x2546 x +superkernels.cpp 200 0x2550 +superkernels.cpp 200 0x2564 x +superkernels.cpp 200 0x256c + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 219 0x2600 +elementwise_binary_shared.h 219 0x2600 1 x +elementwise_binary_shared.h 220 0x260a x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h: +mul_impl.h 193 0x2614 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 222 0x2620 +elementwise_binary_shared.h 222 0x2632 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h: +mul_impl.h 193 0x263c + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 222 0x2640 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h: +mul_impl.h 193 0x2640 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 538 0x2870 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 237 0x2870 1 x +elementwise_binary_shared.h 244 0x2870 2 +elementwise_binary_shared.h 245 0x2870 3 +elementwise_binary_shared.h 247 0x2870 4 +elementwise_binary_shared.h 250 0x2870 5 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 538 0x287a x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 244 0x287a 1 x +elementwise_binary_shared.h 245 0x287a 2 +elementwise_binary_shared.h 247 0x287a 3 +elementwise_binary_shared.h 244 0x288c +elementwise_binary_shared.h 244 0x288c 1 +elementwise_binary_shared.h 237 0x2892 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 538 0x28a0 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector_native_types.hpp: +vector_native_types.hpp 374 0x28a0 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 247 0x28a6 x +elementwise_binary_shared.h 245 0x28d0 x +elementwise_binary_shared.h 245 0x28d6 +elementwise_binary_shared.h 245 0x28d6 1 +elementwise_binary_shared.h 250 0x28f0 +elementwise_binary_shared.h 250 0x28f4 x +elementwise_binary_shared.h 250 0x28f8 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 205 0x2910 x +superkernels.cpp 210 0x2910 1 +superkernels.cpp 210 0x2916 x +superkernels.cpp 205 0x291c +superkernels.cpp 207 0x292a + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2934 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 210 0x293c +superkernels.cpp 210 0x293c 1 +superkernels.cpp 207 0x2942 x +superkernels.cpp 207 0x2946 +superkernels.cpp 207 0x294e + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x294e 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 213 0x2956 +superkernels.cpp 224 0x2956 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x295c +tile.hpp 74 0x2962 +tile.hpp 86 0x2962 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 213 0x296e x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x2978 +tile.hpp 74 0x297c +tile.hpp 74 0x2980 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 217 0x2990 +superkernels.cpp 217 0x2996 x +superkernels.cpp 217 0x2996 1 +superkernels.cpp 215 0x29a0 +superkernels.cpp 217 0x29a0 1 +superkernels.cpp 224 0x29a0 2 +superkernels.cpp 215 0x29aa x +superkernels.cpp 217 0x29aa 1 +superkernels.cpp 222 0x29aa 2 +superkernels.cpp 215 0x29be +superkernels.cpp 217 0x29c6 x +superkernels.cpp 215 0x29ca x +superkernels.cpp 217 0x29d0 x +superkernels.cpp 222 0x29e0 +superkernels.cpp 224 0x29e0 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x29f0 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 221 0x29f8 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x29f8 1 +io_buffer_main.h 218 0x2a02 +io_buffer_main.h 218 0x2a06 +io_buffer_main.h 235 0x2a0a x +io_buffer_main.h 218 0x2a18 x +io_buffer_main.h 218 0x2a18 1 x +io_buffer_main.h 218 0x2a1c +io_buffer_main.h 395 0x2a20 +io_buffer_main.h 395 0x2a2a x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 222 0x2a2e +superkernels.cpp 221 0x2a38 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x2a3c x +io_buffer_main.h 324 0x2a3c 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 222 0x2a42 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x2a46 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 224 0x2a4c x +superkernels.cpp 221 0x2a54 x +superkernels.cpp 221 0x2a58 +superkernels.cpp 222 0x2a5c x +superkernels.cpp 222 0x2a60 +superkernels.cpp 226 0x2a70 +superkernels.cpp 227 0x2a70 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x2a70 2 x +io_buffer_main.h 327 0x2a7a +io_buffer_main.h 425 0x2a7a 1 +io_buffer_main.h 425 0x2a88 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 226 0x2a8c + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x2a8c 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 226 0x2a96 x +superkernels.cpp 226 0x2a9a + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x2aa6 x +io_buffer_main.h 327 0x2aaa + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 226 0x2aae x +superkernels.cpp 226 0x2ab2 +superkernels.cpp 227 0x2ac2 +superkernels.cpp 227 0x2ac6 x +superkernels.cpp 229 0x2ad0 +superkernels.cpp 229 0x2ae4 x +superkernels.cpp 229 0x2aec + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 219 0x2b80 +elementwise_binary_shared.h 219 0x2b80 1 x +elementwise_binary_shared.h 220 0x2b8a x +elementwise_binary_shared.h 220 0x2b98 +elementwise_binary_shared.h 220 0x2ba0 +elementwise_binary_shared.h 222 0x2ba0 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h: +add_impl.h 146 0x2ba0 2 +add_impl.h 146 0x2baa +add_impl.h 147 0x2baa 1 +add_impl.h 147 0x2baa 2 +add_impl.h 146 0x2bb4 x +add_impl.h 147 0x2bb4 1 +add_impl.h 147 0x2bbe x +add_impl.h 147 0x2bc6 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 222 0x2bca x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h: +add_impl.h 147 0x2bce + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 222 0x2bd2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h: +add_impl.h 147 0x2bd8 x +add_impl.h 147 0x2bdc + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 227 0x2bf0 x +elementwise_binary_shared.h 232 0x2bf0 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 141 0x2c00 x +elementwise_binary.h 142 0x2c00 1 +elementwise_binary.h 144 0x2c00 2 x +elementwise_binary.h 141 0x2c06 +elementwise_binary.h 141 0x2c0a +elementwise_binary.h 142 0x2c0e x +elementwise_binary.h 142 0x2c12 +elementwise_binary.h 130 0x2c20 x +elementwise_binary.h 133 0x2c20 1 x +elementwise_binary.h 130 0x2c24 +elementwise_binary.h 133 0x2c36 x +elementwise_binary.h 134 0x2c3a x +elementwise_binary.h 134 0x2c4a +elementwise_binary.h 135 0x2c4e x +elementwise_binary.h 135 0x2c5e +elementwise_binary.h 136 0x2c62 x +elementwise_binary.h 137 0x2c6a x +elementwise_binary.h 136 0x2c78 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h: +mul_impl.h 134 0x2c7c + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 139 0x2c80 +elementwise_binary.h 139 0x2c92 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h: +mul_impl.h 134 0x2c9c + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 139 0x2ca0 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h: +mul_impl.h 134 0x2ca0 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 149 0x2cb0 x +elementwise_binary.h 156 0x2cb0 1 +elementwise_binary.h 168 0x2cb0 2 x +elementwise_binary.h 156 0x2cba x +elementwise_binary.h 168 0x2cba 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2cc4 +mul_acc32_fp.hpp 36 0x2cc4 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 156 0x2cc4 2 +elementwise_binary.h 156 0x2cc4 3 +elementwise_binary.h 156 0x2cce +elementwise_binary.h 156 0x2cce 1 +elementwise_binary.h 156 0x2cd8 +elementwise_binary.h 156 0x2ce2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2ce6 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 168 0x2ce6 1 +elementwise_binary.h 187 0x2ce6 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2cec +vector.hpp 1139 0x2cec 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 189 0x2cec 2 x +elementwise_binary.h 211 0x2cec 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2cf2 x +vector.hpp 1139 0x2cf2 1 x +vector.hpp 1159 0x2cf2 2 +vector.hpp 1159 0x2cf2 3 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2cf2 4 +accum.hpp 1110 0x2cf2 5 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x2cf2 6 x +elementwise_binary.h 195 0x2cf2 7 +elementwise_binary.h 213 0x2cf2 8 x +elementwise_binary.h 218 0x2cf2 9 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2cfa +vector.hpp 1139 0x2cfa 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 189 0x2cfa 2 x +elementwise_binary.h 211 0x2cfa 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2d00 x +vector.hpp 1139 0x2d00 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x2d00 2 x +elementwise_binary.h 213 0x2d00 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2d06 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 189 0x2d06 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2d0a x +vector.hpp 1139 0x2d0a 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x2d0a 2 x +elementwise_binary.h 213 0x2d0a 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2d10 +vector.hpp 1139 0x2d10 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x2d10 2 x +elementwise_binary.h 189 0x2d10 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2d16 x +vector.hpp 1139 0x2d16 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2d16 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x2d16 3 x +elementwise_binary.h 213 0x2d16 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2d20 +vector.hpp 1139 0x2d20 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2d20 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x2d20 3 x +elementwise_binary.h 189 0x2d20 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2d2a x +vector.hpp 1139 0x2d2a 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2d2a 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x2d2a 3 x +elementwise_binary.h 213 0x2d2a 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2d34 +vector.hpp 1139 0x2d34 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2d34 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x2d34 3 x +elementwise_binary.h 189 0x2d34 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2d3e x +vector.hpp 1139 0x2d3e 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2d3e 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x2d3e 3 x +elementwise_binary.h 213 0x2d3e 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2d48 +vector.hpp 1139 0x2d48 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2d48 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x2d48 3 x +elementwise_binary.h 189 0x2d48 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2d52 x +vector.hpp 1139 0x2d52 1 x +vector.hpp 1159 0x2d52 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2d52 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2d52 4 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x2d52 5 x +elementwise_binary.h 213 0x2d52 6 x +elementwise_binary.h 218 0x2d52 7 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2d60 +vector.hpp 1139 0x2d60 1 +vector.hpp 1159 0x2d60 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2d60 3 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2d60 4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x2d60 5 x +elementwise_binary.h 189 0x2d60 6 x +elementwise_binary.h 195 0x2d60 7 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2d70 x +vector.hpp 1139 0x2d70 1 x +vector.hpp 1159 0x2d70 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2d70 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2d70 4 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x2d70 5 x +elementwise_binary.h 213 0x2d70 6 x +elementwise_binary.h 218 0x2d70 7 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2d80 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2d80 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2d80 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 195 0x2d80 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2d88 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2d88 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2d88 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 218 0x2d88 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2d90 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2d90 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2d90 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 195 0x2d90 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2d98 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2d98 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2d98 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 218 0x2d98 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2da0 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2da0 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2da0 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 195 0x2da0 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2da8 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2da8 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2da8 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 218 0x2da8 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2db0 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2db0 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2db0 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 195 0x2db0 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2db8 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2db8 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 218 0x2db8 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2dbc + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2dbc 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 172 0x2dbc 2 x +elementwise_binary.h 195 0x2dbc 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2dc2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2dc2 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 218 0x2dc2 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2dc6 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2dc6 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 195 0x2dc6 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2dca x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2dca 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 218 0x2dca 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2dce + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2dce 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 195 0x2dce 2 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 369 0x2de0 x +superkernels.cpp 374 0x2de0 1 +superkernels.cpp 374 0x2de6 x +superkernels.cpp 369 0x2dec +superkernels.cpp 371 0x2df2 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2df2 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 374 0x2e0e x +superkernels.cpp 374 0x2e0e 1 x +superkernels.cpp 371 0x2e14 x +superkernels.cpp 371 0x2e18 +superkernels.cpp 371 0x2e1e + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2e26 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 377 0x2e2a +superkernels.cpp 379 0x2e2a 1 +superkernels.cpp 381 0x2e2a 2 +superkernels.cpp 393 0x2e2a 3 +superkernels.cpp 377 0x2e34 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x2e34 1 +tile.hpp 74 0x2e3e +tile.hpp 86 0x2e3e 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 377 0x2e4a x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x2e54 +tile.hpp 74 0x2e58 +tile.hpp 74 0x2e5c x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 381 0x2e60 +superkernels.cpp 381 0x2e60 1 x +superkernels.cpp 381 0x2e6a +superkernels.cpp 381 0x2e6a 1 +superkernels.cpp 390 0x2e6a 2 +superkernels.cpp 379 0x2e74 x +superkernels.cpp 382 0x2e74 1 +superkernels.cpp 391 0x2e74 2 +superkernels.cpp 379 0x2e8a +superkernels.cpp 381 0x2e90 x +superkernels.cpp 379 0x2e94 x +superkernels.cpp 381 0x2e98 x +superkernels.cpp 382 0x2e9c x +superkernels.cpp 390 0x2ea0 +superkernels.cpp 391 0x2ea6 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2eb0 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 385 0x2eb4 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2eb4 1 +io_buffer_main.h 218 0x2ebe +io_buffer_main.h 218 0x2ec2 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 385 0x2ec6 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 235 0x2eca x +io_buffer_main.h 218 0x2ed6 x +io_buffer_main.h 218 0x2ed6 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 385 0x2eda x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2eda 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 385 0x2ee0 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 395 0x2ee4 +io_buffer_main.h 395 0x2ee4 1 +io_buffer_main.h 395 0x2eee x +io_buffer_main.h 218 0x2ef2 x +io_buffer_main.h 218 0x2efa +io_buffer_main.h 218 0x2efe +io_buffer_main.h 218 0x2f02 +io_buffer_main.h 235 0x2f06 x +io_buffer_main.h 218 0x2f14 x +io_buffer_main.h 218 0x2f14 1 x +io_buffer_main.h 218 0x2f18 +io_buffer_main.h 395 0x2f24 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 390 0x2f28 +superkernels.cpp 391 0x2f28 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x2f28 2 +io_buffer_main.h 125 0x2f36 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 390 0x2f3a x +superkernels.cpp 391 0x2f40 x +superkernels.cpp 393 0x2f40 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x2f46 x +io_buffer_main.h 125 0x2f4a +io_buffer_main.h 327 0x2f4e +io_buffer_main.h 327 0x2f4e 1 +io_buffer_main.h 125 0x2f54 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 393 0x2f5a x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x2f60 +io_buffer_main.h 327 0x2f60 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 390 0x2f64 x +superkernels.cpp 391 0x2f68 x +superkernels.cpp 391 0x2f6c +superkernels.cpp 390 0x2f70 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x2f80 x +io_buffer_main.h 327 0x2f80 1 +io_buffer_main.h 327 0x2f80 2 +io_buffer_main.h 327 0x2f80 3 +io_buffer_main.h 327 0x2f80 4 +io_buffer_main.h 425 0x2f80 5 +io_buffer_main.h 425 0x2f80 6 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 397 0x2f8a + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 425 0x2f9a x +io_buffer_main.h 327 0x2f9e x +io_buffer_main.h 324 0x2fa2 +io_buffer_main.h 327 0x2fb0 +io_buffer_main.h 324 0x2fb4 x +io_buffer_main.h 327 0x2fb4 1 +io_buffer_main.h 425 0x2fc6 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 397 0x2fca +superkernels.cpp 398 0x2fca 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x2fca 2 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 397 0x2fd4 x +superkernels.cpp 397 0x2fd8 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x2fe4 x +io_buffer_main.h 327 0x2fe8 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 397 0x2fec x +superkernels.cpp 397 0x2ff0 +superkernels.cpp 398 0x3000 +superkernels.cpp 398 0x3004 x +superkernels.cpp 400 0x3010 +superkernels.cpp 400 0x3026 x +superkernels.cpp 400 0x302e + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16_params.h: +conv2d_dw_bf16_params.h 211 0x3040 x +conv2d_dw_bf16_params.h 215 0x3040 1 +conv2d_dw_bf16_params.h 215 0x3040 2 x +conv2d_dw_bf16_params.h 215 0x304a x +conv2d_dw_bf16_params.h 218 0x304a 1 +conv2d_dw_bf16_params.h 218 0x304a 2 +conv2d_dw_bf16_params.h 211 0x3054 +conv2d_dw_bf16_params.h 218 0x305a +conv2d_dw_bf16_params.h 215 0x306e +conv2d_dw_bf16_params.h 215 0x3072 +conv2d_dw_bf16_params.h 215 0x3076 +conv2d_dw_bf16_params.h 215 0x307a +conv2d_dw_bf16_params.h 215 0x3088 +conv2d_dw_bf16_params.h 215 0x308c +conv2d_dw_bf16_params.h 218 0x3090 x +conv2d_dw_bf16_params.h 218 0x3094 +conv2d_dw_bf16_params.h 218 0x3098 +conv2d_dw_bf16_params.h 218 0x30a4 +conv2d_dw_bf16_params.h 218 0x30aa +conv2d_dw_bf16_params.h 218 0x30b0 +conv2d_dw_bf16_params.h 218 0x30b6 +conv2d_dw_bf16_params.h 218 0x30bc +conv2d_dw_bf16_params.h 218 0x30c0 +conv2d_dw_bf16_params.h 218 0x30d0 +conv2d_dw_bf16_params.h 218 0x30d0 1 +conv2d_dw_bf16_params.h 219 0x30d0 2 +conv2d_dw_bf16_params.h 218 0x30d6 +conv2d_dw_bf16_params.h 219 0x30d6 1 x +conv2d_dw_bf16_params.h 219 0x30dc +conv2d_dw_bf16_params.h 219 0x30e0 +conv2d_dw_bf16_params.h 218 0x30ea x +conv2d_dw_bf16_params.h 218 0x30ee +conv2d_dw_bf16_params.h 219 0x30f2 x +conv2d_dw_bf16_params.h 219 0x30f8 +conv2d_dw_bf16_params.h 218 0x3102 x +conv2d_dw_bf16_params.h 219 0x3106 x +conv2d_dw_bf16_params.h 219 0x310a +conv2d_dw_bf16_params.h 218 0x310e x +conv2d_dw_bf16_params.h 218 0x3112 +conv2d_dw_bf16_params.h 219 0x3112 1 x +conv2d_dw_bf16_params.h 219 0x3120 +conv2d_dw_bf16_params.h 226 0x3120 1 +conv2d_dw_bf16_params.h 231 0x3120 2 +conv2d_dw_bf16_params.h 219 0x312a +conv2d_dw_bf16_params.h 219 0x312a 1 +conv2d_dw_bf16_params.h 220 0x312a 2 +conv2d_dw_bf16_params.h 220 0x312a 3 +conv2d_dw_bf16_params.h 232 0x312a 4 +conv2d_dw_bf16_params.h 234 0x312a 5 +conv2d_dw_bf16_params.h 234 0x312a 6 +conv2d_dw_bf16_params.h 243 0x312a 7 +conv2d_dw_bf16_params.h 250 0x312a 8 +conv2d_dw_bf16_params.h 253 0x312a 9 +conv2d_dw_bf16_params.h 260 0x312a 10 +conv2d_dw_bf16_params.h 264 0x312a 11 +conv2d_dw_bf16_params.h 220 0x3134 +conv2d_dw_bf16_params.h 234 0x3134 1 +conv2d_dw_bf16_params.h 246 0x3134 2 +conv2d_dw_bf16_params.h 253 0x3134 3 +conv2d_dw_bf16_params.h 226 0x313e x +conv2d_dw_bf16_params.h 234 0x313e 1 +conv2d_dw_bf16_params.h 234 0x313e 2 +conv2d_dw_bf16_params.h 231 0x3148 +conv2d_dw_bf16_params.h 232 0x3148 1 +conv2d_dw_bf16_params.h 232 0x3148 2 +conv2d_dw_bf16_params.h 235 0x3152 +conv2d_dw_bf16_params.h 235 0x3152 1 +conv2d_dw_bf16_params.h 242 0x3152 2 +conv2d_dw_bf16_params.h 242 0x3152 3 +conv2d_dw_bf16_params.h 243 0x3152 4 +conv2d_dw_bf16_params.h 250 0x3152 5 +conv2d_dw_bf16_params.h 255 0x3152 6 +conv2d_dw_bf16_params.h 260 0x3152 7 +conv2d_dw_bf16_params.h 264 0x3152 8 +conv2d_dw_bf16_params.h 234 0x315c +conv2d_dw_bf16_params.h 239 0x315c 1 +conv2d_dw_bf16_params.h 242 0x315c 2 +conv2d_dw_bf16_params.h 248 0x315c 3 +conv2d_dw_bf16_params.h 253 0x315c 4 +conv2d_dw_bf16_params.h 264 0x315c 5 +conv2d_dw_bf16_params.h 219 0x3166 x +conv2d_dw_bf16_params.h 219 0x316a +conv2d_dw_bf16_params.h 219 0x316e +conv2d_dw_bf16_params.h 220 0x316e 1 +conv2d_dw_bf16_params.h 219 0x3174 +conv2d_dw_bf16_params.h 243 0x3174 1 +conv2d_dw_bf16_params.h 247 0x3174 2 +conv2d_dw_bf16_params.h 220 0x317a x +conv2d_dw_bf16_params.h 250 0x317a 1 +conv2d_dw_bf16_params.h 219 0x3180 x +conv2d_dw_bf16_params.h 220 0x3184 x +conv2d_dw_bf16_params.h 231 0x3184 1 +conv2d_dw_bf16_params.h 219 0x318a x +conv2d_dw_bf16_params.h 231 0x318a 1 x +conv2d_dw_bf16_params.h 220 0x3190 x +conv2d_dw_bf16_params.h 253 0x3190 1 x +conv2d_dw_bf16_params.h 240 0x3196 +conv2d_dw_bf16_params.h 246 0x3196 1 x +conv2d_dw_bf16_params.h 232 0x319c x +conv2d_dw_bf16_params.h 226 0x31a0 x +conv2d_dw_bf16_params.h 231 0x31a4 x +conv2d_dw_bf16_params.h 238 0x31a4 1 +conv2d_dw_bf16_params.h 234 0x31aa x +conv2d_dw_bf16_params.h 231 0x31ae x +conv2d_dw_bf16_params.h 232 0x31ae 1 x +conv2d_dw_bf16_params.h 234 0x31b4 x +conv2d_dw_bf16_params.h 232 0x31b8 x +conv2d_dw_bf16_params.h 227 0x31bc x +conv2d_dw_bf16_params.h 232 0x31bc 1 +conv2d_dw_bf16_params.h 234 0x31c2 x +conv2d_dw_bf16_params.h 235 0x31c2 1 x +conv2d_dw_bf16_params.h 235 0x31c8 +conv2d_dw_bf16_params.h 243 0x31c8 1 x +conv2d_dw_bf16_params.h 238 0x31ce x +conv2d_dw_bf16_params.h 242 0x31ce 1 x +conv2d_dw_bf16_params.h 242 0x31d4 +conv2d_dw_bf16_params.h 243 0x31d4 1 x +conv2d_dw_bf16_params.h 239 0x31da x +conv2d_dw_bf16_params.h 242 0x31da 1 x +conv2d_dw_bf16_params.h 243 0x31e0 x +conv2d_dw_bf16_params.h 250 0x31e0 1 x +conv2d_dw_bf16_params.h 234 0x31e6 x +conv2d_dw_bf16_params.h 240 0x31e6 1 x +conv2d_dw_bf16_params.h 253 0x31e6 2 x +conv2d_dw_bf16_params.h 247 0x31ec x +conv2d_dw_bf16_params.h 242 0x31f0 x +conv2d_dw_bf16_params.h 247 0x31f0 1 +conv2d_dw_bf16_params.h 241 0x31f6 x +conv2d_dw_bf16_params.h 243 0x31f6 1 x +conv2d_dw_bf16_params.h 243 0x31fc +conv2d_dw_bf16_params.h 245 0x31fc 1 x +conv2d_dw_bf16_params.h 243 0x3202 x +conv2d_dw_bf16_params.h 248 0x3202 1 x +conv2d_dw_bf16_params.h 245 0x3208 x +conv2d_dw_bf16_params.h 250 0x3208 1 x +conv2d_dw_bf16_params.h 246 0x320e x +conv2d_dw_bf16_params.h 250 0x320e 1 +conv2d_dw_bf16_params.h 247 0x3214 x +conv2d_dw_bf16_params.h 248 0x3214 1 x +conv2d_dw_bf16_params.h 250 0x321a x +conv2d_dw_bf16_params.h 250 0x321a 1 x +conv2d_dw_bf16_params.h 248 0x3220 x +conv2d_dw_bf16_params.h 250 0x3220 1 +conv2d_dw_bf16_params.h 249 0x3226 x +conv2d_dw_bf16_params.h 255 0x3226 1 x +conv2d_dw_bf16_params.h 258 0x3226 2 +conv2d_dw_bf16_params.h 258 0x3226 3 +conv2d_dw_bf16_params.h 252 0x3230 x +conv2d_dw_bf16_params.h 253 0x3230 1 x +conv2d_dw_bf16_params.h 253 0x3236 +conv2d_dw_bf16_params.h 255 0x3236 1 x +conv2d_dw_bf16_params.h 254 0x323c x +conv2d_dw_bf16_params.h 255 0x323c 1 +conv2d_dw_bf16_params.h 256 0x323c 2 +conv2d_dw_bf16_params.h 258 0x323c 3 x +conv2d_dw_bf16_params.h 258 0x323c 4 x +conv2d_dw_bf16_params.h 259 0x323c 5 +conv2d_dw_bf16_params.h 263 0x323c 6 +conv2d_dw_bf16_params.h 255 0x3248 x +conv2d_dw_bf16_params.h 256 0x324c x +conv2d_dw_bf16_params.h 260 0x324c 1 x +conv2d_dw_bf16_params.h 258 0x3252 x +conv2d_dw_bf16_params.h 260 0x3252 1 +conv2d_dw_bf16_params.h 259 0x3258 x +conv2d_dw_bf16_params.h 264 0x3258 1 x +conv2d_dw_bf16_params.h 260 0x325e x +conv2d_dw_bf16_params.h 264 0x325e 1 +conv2d_dw_bf16_params.h 262 0x3264 x +conv2d_dw_bf16_params.h 263 0x3268 x +conv2d_dw_bf16_params.h 264 0x326c x +conv2d_dw_bf16_params.h 266 0x3270 x +conv2d_dw_bf16_params.h 266 0x3280 +conv2d_dw_bf16_params.h 266 0x3280 1 +conv2d_dw_bf16_params.h 266 0x3286 +conv2d_dw_bf16_params.h 266 0x328a +conv2d_dw_bf16_params.h 266 0x3296 +conv2d_dw_bf16_params.h 266 0x32a0 +conv2d_dw_bf16_params.h 267 0x32a0 1 +conv2d_dw_bf16_params.h 266 0x32aa +conv2d_dw_bf16_params.h 266 0x32aa 1 +conv2d_dw_bf16_params.h 266 0x32b0 +conv2d_dw_bf16_params.h 266 0x32b6 +conv2d_dw_bf16_params.h 267 0x32bc x +conv2d_dw_bf16_params.h 266 0x32c6 x +conv2d_dw_bf16_params.h 266 0x32ca +conv2d_dw_bf16_params.h 267 0x32ca 1 x +conv2d_dw_bf16_params.h 266 0x32d0 x +conv2d_dw_bf16_params.h 266 0x32d8 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 444 0x35c0 x +superkernels.cpp 449 0x35c0 1 +superkernels.cpp 449 0x35c6 x +superkernels.cpp 444 0x35cc +superkernels.cpp 467 0x35da +superkernels.cpp 452 0x35ea +superkernels.cpp 449 0x35f2 +superkernels.cpp 449 0x35f2 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x35f8 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 446 0x35fc x +superkernels.cpp 446 0x3600 +superkernels.cpp 446 0x3604 +superkernels.cpp 446 0x360a +superkernels.cpp 461 0x360e + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x360e 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 451 0x3618 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x3618 1 +tile.hpp 86 0x3618 2 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 451 0x3626 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x3630 +tile.hpp 74 0x3634 +tile.hpp 74 0x3638 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 452 0x3640 +superkernels.cpp 461 0x3640 1 +superkernels.cpp 452 0x3648 x +superkernels.cpp 453 0x364c +superkernels.cpp 453 0x364c 1 x +superkernels.cpp 452 0x365e +superkernels.cpp 457 0x365e 1 +superkernels.cpp 452 0x3668 x +superkernels.cpp 453 0x366c x +superkernels.cpp 457 0x3670 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x3680 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 457 0x3684 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x3684 1 +io_buffer_main.h 218 0x368e +io_buffer_main.h 218 0x3692 +io_buffer_main.h 235 0x3696 x +io_buffer_main.h 218 0x36a4 x +io_buffer_main.h 218 0x36a4 1 x +io_buffer_main.h 218 0x36a8 +io_buffer_main.h 395 0x36ac +io_buffer_main.h 395 0x36b6 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 456 0x36ba +superkernels.cpp 459 0x36ba 1 +superkernels.cpp 464 0x36ba 2 +superkernels.cpp 465 0x36ba 3 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x36ba 4 +io_buffer_main.h 425 0x36ba 5 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/detail/io_buffer_impl.h: +io_buffer_impl.h 52 0x36c4 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x36ce +io_buffer_main.h 324 0x36ce 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 457 0x36d4 x +superkernels.cpp 457 0x36d8 +superkernels.cpp 461 0x36d8 1 +superkernels.cpp 456 0x36e2 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x36ec x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 457 0x36f2 x +superkernels.cpp 456 0x36f6 x +superkernels.cpp 459 0x36fa x +superkernels.cpp 461 0x36fe x +superkernels.cpp 456 0x3704 x +superkernels.cpp 459 0x3708 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/detail/io_buffer_impl.h: +io_buffer_impl.h 201 0x370c x +io_buffer_impl.h 52 0x3710 x +io_buffer_impl.h 52 0x3714 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x3720 +io_buffer_main.h 324 0x3724 x +io_buffer_main.h 425 0x3734 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 464 0x3738 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x3738 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 464 0x3742 x +superkernels.cpp 464 0x3746 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x3756 x +io_buffer_main.h 327 0x375a + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 464 0x375e x +superkernels.cpp 464 0x3762 +superkernels.cpp 465 0x3768 +superkernels.cpp 465 0x3774 x +superkernels.cpp 467 0x3780 +superkernels.cpp 467 0x378a x +superkernels.cpp 467 0x378e +superkernels.cpp 578 0x37a0 +superkernels.cpp 578 0x37a0 1 x +superkernels.cpp 583 0x37a6 +superkernels.cpp 583 0x37b0 x +superkernels.cpp 587 0x37c2 +superkernels.cpp 590 0x37c2 1 +superkernels.cpp 599 0x37c2 2 +superkernels.cpp 629 0x37c2 3 +superkernels.cpp 583 0x37d0 +superkernels.cpp 583 0x37d0 1 +superkernels.cpp 580 0x37da x +superkernels.cpp 580 0x37de +superkernels.cpp 580 0x37e2 +superkernels.cpp 580 0x37e8 +superkernels.cpp 587 0x37ec + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x37ec 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 621 0x37f6 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x37f6 1 +tile.hpp 86 0x37f6 2 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 587 0x3802 +superkernels.cpp 587 0x3802 1 +superkernels.cpp 587 0x380c x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x3816 +tile.hpp 74 0x381a + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 587 0x3820 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x3820 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 590 0x3830 +superkernels.cpp 591 0x3830 1 +superkernels.cpp 590 0x3836 x +superkernels.cpp 591 0x3846 x +superkernels.cpp 591 0x384a +superkernels.cpp 599 0x3850 +superkernels.cpp 599 0x3854 x +superkernels.cpp 591 0x385e x +superkernels.cpp 611 0x386a +superkernels.cpp 594 0x3874 x +superkernels.cpp 595 0x387e +superkernels.cpp 594 0x3884 +superkernels.cpp 594 0x388a +superkernels.cpp 595 0x38a0 x +superkernels.cpp 621 0x38aa +superkernels.cpp 621 0x38c0 +superkernels.cpp 599 0x38d0 x +superkernels.cpp 600 0x38da +superkernels.cpp 599 0x38e0 +superkernels.cpp 599 0x38e6 +superkernels.cpp 600 0x38f0 x +superkernels.cpp 621 0x38fa +superkernels.cpp 606 0x3904 x +superkernels.cpp 611 0x3904 1 +superkernels.cpp 611 0x390e x +superkernels.cpp 607 0x3912 x +superkernels.cpp 607 0x3916 +superkernels.cpp 607 0x391c +superkernels.cpp 606 0x3924 +superkernels.cpp 607 0x392a +superkernels.cpp 606 0x392e x +superkernels.cpp 611 0x392e 1 +superkernels.cpp 607 0x3938 x +superkernels.cpp 611 0x393c x +superkernels.cpp 608 0x3940 x +superkernels.cpp 608 0x3944 +superkernels.cpp 611 0x3944 1 x +superkernels.cpp 608 0x3950 x +superkernels.cpp 614 0x3960 +superkernels.cpp 614 0x3966 x +superkernels.cpp 616 0x3966 1 +superkernels.cpp 615 0x3970 +superkernels.cpp 616 0x3970 1 x +superkernels.cpp 615 0x397a x +superkernels.cpp 618 0x3986 x +superkernels.cpp 618 0x3986 1 x +superkernels.cpp 614 0x398c x +superkernels.cpp 616 0x398c 1 x +superkernels.cpp 615 0x3992 x +superkernels.cpp 616 0x3996 x +superkernels.cpp 615 0x399a x +superkernels.cpp 614 0x399e x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x39a2 +io_buffer_main.h 218 0x39b2 x +io_buffer_main.h 218 0x39b6 +io_buffer_main.h 218 0x39ba +io_buffer_main.h 218 0x39be +io_buffer_main.h 235 0x39c4 x +io_buffer_main.h 218 0x39d0 x +io_buffer_main.h 218 0x39d0 1 x +io_buffer_main.h 218 0x39d4 +io_buffer_main.h 395 0x39d4 1 +io_buffer_main.h 395 0x39e2 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 621 0x39f6 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x39f6 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 621 0x39fa +superkernels.cpp 621 0x39fe x +superkernels.cpp 621 0x3a04 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x3a10 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 623 0x3a20 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x3a20 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 623 0x3a2a x +superkernels.cpp 623 0x3a2a 1 +superkernels.cpp 623 0x3a34 +superkernels.cpp 623 0x3a44 +superkernels.cpp 623 0x3a48 +superkernels.cpp 629 0x3a58 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x3a58 1 x +io_buffer_main.h 395 0x3a58 2 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 626 0x3a62 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x3a62 1 +io_buffer_main.h 218 0x3a6c +io_buffer_main.h 218 0x3a70 +io_buffer_main.h 235 0x3a74 x +io_buffer_main.h 218 0x3a82 x +io_buffer_main.h 218 0x3a82 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 626 0x3a86 +superkernels.cpp 630 0x3a86 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x3a86 2 +io_buffer_main.h 395 0x3a94 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 626 0x3a9e x +superkernels.cpp 629 0x3aa2 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x3aaa x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 626 0x3ab2 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x3ab2 1 +io_buffer_main.h 324 0x3ab2 2 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 630 0x3abc x +superkernels.cpp 630 0x3ac0 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x3ac6 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 626 0x3ad0 x +superkernels.cpp 630 0x3ad8 x +superkernels.cpp 633 0x3ae8 x +superkernels.cpp 633 0x3aee +superkernels.cpp 633 0x3afa +superkernels.cpp 637 0x3b10 x +superkernels.cpp 637 0x3b16 +superkernels.cpp 637 0x3b1c + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x3b30 +io_buffer_main.h 327 0x3b30 1 +io_buffer_main.h 324 0x3b34 +io_buffer_main.h 327 0x3b34 1 +io_buffer_main.h 327 0x3b34 2 +io_buffer_main.h 425 0x3b34 3 +io_buffer_main.h 425 0x3b34 4 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 645 0x3b3a + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x3b3a 1 x +io_buffer_main.h 425 0x3b50 x +io_buffer_main.h 327 0x3b54 x +io_buffer_main.h 324 0x3b58 x +io_buffer_main.h 327 0x3b66 x +io_buffer_main.h 327 0x3b6a +io_buffer_main.h 425 0x3b76 x +io_buffer_main.h 327 0x3b7a x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 645 0x3b8c +superkernels.cpp 649 0x3b8c 1 +superkernels.cpp 645 0x3b90 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x3b90 1 +io_buffer_main.h 327 0x3b96 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 649 0x3ba0 +superkernels.cpp 648 0x3bb0 +superkernels.cpp 651 0x3bb0 1 +superkernels.cpp 648 0x3bba +superkernels.cpp 648 0x3bba 1 x +superkernels.cpp 649 0x3bba 2 +superkernels.cpp 648 0x3bc4 +superkernels.cpp 648 0x3bd4 +superkernels.cpp 648 0x3bd8 +superkernels.cpp 649 0x3bea x +superkernels.cpp 651 0x3bf4 x +superkernels.cpp 651 0x3bf8 +superkernels.cpp - 0x3bf9 + + +superkernels.cpp: +File name Line number Starting address View Stmt + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc: +0_0_reloadable5.cc 21 0x3c10 x +0_0_reloadable5.cc 23 0x3c10 1 +0_0_reloadable5.cc 23 0x3c14 x +0_0_reloadable5.cc 24 0x3c18 x +0_0_reloadable5.cc 26 0x3c1c x +0_0_reloadable5.cc 25 0x3c20 x +0_0_reloadable5.cc 22 0x3c24 x +0_0_reloadable5.cc 30 0x3c40 x +0_0_reloadable5.cc 32 0x3c40 1 +0_0_reloadable5.cc 32 0x3c44 x +0_0_reloadable5.cc 34 0x3c48 x +0_0_reloadable5.cc 33 0x3c4c x +0_0_reloadable5.cc 31 0x3c50 x +0_0_reloadable5.cc 38 0x3c60 x +0_0_reloadable5.cc 40 0x3c60 1 +0_0_reloadable5.cc 40 0x3c64 x +0_0_reloadable5.cc 42 0x3c68 x +0_0_reloadable5.cc 41 0x3c6c x +0_0_reloadable5.cc 39 0x3c70 x +0_0_reloadable5.cc 46 0x3c80 x +0_0_reloadable5.cc 48 0x3c80 1 +0_0_reloadable5.cc 48 0x3c84 x +0_0_reloadable5.cc 50 0x3c88 x +0_0_reloadable5.cc 49 0x3c8c x +0_0_reloadable5.cc 47 0x3c90 x +0_0_reloadable5.cc 54 0x3ca0 x +0_0_reloadable5.cc 56 0x3ca0 1 +0_0_reloadable5.cc 56 0x3ca4 x +0_0_reloadable5.cc 57 0x3ca8 x +0_0_reloadable5.cc 59 0x3cac x +0_0_reloadable5.cc 58 0x3cb0 x +0_0_reloadable5.cc 55 0x3cb4 x +0_0_reloadable5.cc 63 0x3cd0 x +0_0_reloadable5.cc 65 0x3cd0 1 +0_0_reloadable5.cc 65 0x3cd4 x +0_0_reloadable5.cc 66 0x3cd8 x +0_0_reloadable5.cc 67 0x3cdc x +0_0_reloadable5.cc 69 0x3ce0 x +0_0_reloadable5.cc 68 0x3ce4 x +0_0_reloadable5.cc 64 0x3ce8 x +0_0_reloadable5.cc 73 0x3d00 x +0_0_reloadable5.cc 75 0x3d00 1 +0_0_reloadable5.cc 75 0x3d04 x +0_0_reloadable5.cc 76 0x3d08 x +0_0_reloadable5.cc 78 0x3d0c x +0_0_reloadable5.cc 77 0x3d10 x +0_0_reloadable5.cc 74 0x3d14 x +0_0_reloadable5.cc 94 0x930 x +0_0_reloadable5.cc 96 0x930 1 x +0_0_reloadable5.cc 96 0x930 2 +0_0_reloadable5.cc 98 0x930 3 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0x930 4 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc: +0_0_reloadable5.cc 94 0x936 +0_0_reloadable5.cc 96 0x944 +0_0_reloadable5.cc 98 0x944 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0x944 2 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc: +0_0_reloadable5.cc 96 0x94c x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 410 0x952 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 590 0x958 x +io_buffer_compiler.h 590 0x95c +io_buffer_compiler.h 590 0x960 +io_buffer_compiler.h 590 0x964 +io_buffer_compiler.h 590 0x968 +io_buffer_compiler.h 195 0x978 x +io_buffer_compiler.h 195 0x978 1 x +io_buffer_compiler.h 194 0x97c x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 410 0x980 +io_buffer_main.h 410 0x980 1 +io_buffer_main.h 410 0x98a x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc: +0_0_reloadable5.cc 98 0x98e +0_0_reloadable5.cc 102 0x98e 1 +0_0_reloadable5.cc 98 0x992 x +0_0_reloadable5.cc 98 0x996 +0_0_reloadable5.cc 98 0x99a +0_0_reloadable5.cc 98 0x9a8 +0_0_reloadable5.cc 98 0x9ac + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 590 0x9b0 x +io_buffer_compiler.h 590 0x9b8 +io_buffer_compiler.h 590 0x9bc +io_buffer_compiler.h 590 0x9c0 +io_buffer_compiler.h 590 0x9c4 +io_buffer_compiler.h 195 0x9d4 x +io_buffer_compiler.h 195 0x9d4 1 x +io_buffer_compiler.h 194 0x9d8 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 410 0x9e4 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc: +0_0_reloadable5.cc 102 0x9e8 x +0_0_reloadable5.cc 102 0x9ec +0_0_reloadable5.cc 102 0x9f0 +0_0_reloadable5.cc 102 0x9f6 +0_0_reloadable5.cc 102 0xa08 +0_0_reloadable5.cc 105 0xa0c +0_0_reloadable5.cc 107 0xa0c 1 +0_0_reloadable5.cc 105 0xa20 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0xa20 1 +io_buffer_compiler.h 606 0xa20 2 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 440 0xa20 3 +io_buffer_main.h 440 0xa20 4 +io_buffer_main.h 440 0xa26 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc: +0_0_reloadable5.cc 107 0xa2a + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0xa2e + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 440 0xa2e 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 605 0xa38 x +io_buffer_compiler.h 605 0xa3c + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 440 0xa4a +io_buffer_main.h 440 0xa4e x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0xa52 +io_buffer_compiler.h 606 0xa52 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc: +0_0_reloadable5.cc 107 0xa58 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0xa66 x +io_buffer_compiler.h 605 0xa6a x +io_buffer_compiler.h 606 0xa6a 1 +io_buffer_compiler.h 605 0xa70 +io_buffer_compiler.h 606 0xa70 1 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 440 0xa82 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc: +0_0_reloadable5.cc 110 0xa86 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0xa8a x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable5/src/0_0_reloadable5.cc: +0_0_reloadable5.cc 110 0xa96 x +0_0_reloadable5.cc 110 0xaa0 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0xaa4 +io_buffer_compiler.h 606 0xaa8 x +io_buffer_compiler.h 606 0xaac +io_buffer_compiler.h 606 0xab0 +io_buffer_compiler.h - 0xab1 + + +CU: me_div.c: +File name Line number Starting address View Stmt + +./me_div.c:[++] +me_div.c 108 0x3d30 +me_div.c 108 0x3d30 1 +me_div.c 115 0x3d30 2 x +me_div.c 108 0x3d36 +me_div.c 108 0x3d3a +me_div.c 108 0x3d3e +me_div.c 108 0x3d42 +me_div.c 108 0x3d46 +me_div.c 108 0x3d4a +me_div.c 108 0x3d4e +me_div.c 108 0x3d52 +me_div.c 108 0x3d56 +me_div.c 108 0x3d5a +me_div.c 108 0x3d5e +me_div.c 108 0x3d62 +me_div.c 108 0x3d66 +me_div.c 108 0x3d6a +me_div.c 108 0x3d6e +me_div.c 108 0x3d72 +me_div.c 108 0x3d76 +me_div.c 108 0x3d7a +me_div.c 108 0x3d7e +me_div.c 108 0x3d82 +me_div.c 108 0x3d86 +me_div.c 108 0x3d8a +me_div.c 108 0x3d8e +me_div.c 108 0x3d92 +me_div.c 108 0x3d96 +me_div.c 108 0x3d9a +me_div.c 108 0x3d9e +me_div.c 108 0x3da2 +me_div.c 119 0x3da6 x +me_div.c 108 0x3daa x +me_div.c 108 0x3dae +me_div.c 108 0x3db2 +me_div.c 108 0x3db6 +me_div.c - 0x3db7 + + +CU: No directory table +CU: Empty file name table + - 0x1 + + +CU: No directory table +CU: Empty file name table + - 0x1 + + diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable7/scripts/0_2_reloadable7.bcf b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable7/scripts/0_2_reloadable7.bcf new file mode 100644 index 0000000000000000000000000000000000000000..ac2c44e2095fee61e0bb45bf67ea52ec6719ca60 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable7/scripts/0_2_reloadable7.bcf @@ -0,0 +1,16 @@ +_reserved DMb 0x0 0x40000 + +_reserved PM 0x0 0x930 //reserved for main elf + +_entry_point _Z13kernelWrapperPPvjjjj +_symbol _Z13kernelWrapperPPvjjjj 0x930 + +_reserved DMb 0x7b280 0x800 //reserved for lcp ping-pong buffers +_reserved DMb 0x7ba80 0x40 //reserved for sync buffer +_stack DM_stack 0x7bac0 0x940 //stack for core +_reserved DMb 0x7c400 0x40 //reserved for main elf heap +//space for synopsys compiler at 0x7c440 0x880//heap +_reserved DMb 0x40000 0x3b280 + +_reserved DMb 0x7ccc0 0x3340 +_reserved DMb 0x80000 0x80000 // And everything else the core can't see diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable7/scripts/0_2_reloadable7.prx b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable7/scripts/0_2_reloadable7.prx new file mode 100644 index 0000000000000000000000000000000000000000..41aacceae3bcb3202d8b36c8c9e1c8142cf346fd --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable7/scripts/0_2_reloadable7.prx @@ -0,0 +1,13 @@ + + + \ No newline at end of file diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable7/src/0_2_reloadable7.cc b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable7/src/0_2_reloadable7.cc new file mode 100644 index 0000000000000000000000000000000000000000..ed266062f542d5fd9e7d7b554216254c298b9574 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable7/src/0_2_reloadable7.cc @@ -0,0 +1,110 @@ +// Automatically generated processor driver using AIEngine tool-chain + +#include +#include +#include + + +// Declare Kernel functions and initializers +void conv2d_maxpool(adf::io_buffer>> &__restrict,adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict); +void superkernel_add1d_attribute_broadcasting(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict); +void superkernel_clip1d(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict); +void superkernel_mul1d_attribute_broadcasting(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict); +void superkernel_mul1d(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict,adf::io_buffer, adf::locking::async>> &__restrict); +void superkernel_conv_eltbinary(adf::io_buffer>> &__restrict,adf::io_buffer>> &__restrict,adf::io_buffer, adf::locking::async>> &__restrict,const unsigned int (&)[17],adf::io_buffer, adf::locking::async>> &__restrict); +void superkernel_conv2d_dwc(adf::io_buffer>> &__restrict,adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict); + +// Declare Kernel objects and external arrays + + +void _b896_wrapper(void* args[]) +{ + conv2d_maxpool( + *reinterpret_cast*>(args[0]), + *reinterpret_cast*>(args[1]), + *reinterpret_cast(args[3]), + *reinterpret_cast*>(args[2])); +} + +void _b901_wrapper(void* args[]) +{ + superkernel_add1d_attribute_broadcasting( + *reinterpret_cast*>(args[0]), + *reinterpret_cast(args[2]), + *reinterpret_cast*>(args[1])); +} + +void _b906_wrapper(void* args[]) +{ + superkernel_clip1d( + *reinterpret_cast*>(args[0]), + *reinterpret_cast(args[2]), + *reinterpret_cast*>(args[1])); +} + +void _b881_wrapper(void* args[]) +{ + superkernel_mul1d_attribute_broadcasting( + *reinterpret_cast*>(args[0]), + *reinterpret_cast(args[2]), + *reinterpret_cast*>(args[1])); +} + +void _b891_wrapper(void* args[]) +{ + superkernel_mul1d( + *reinterpret_cast*>(args[0]), + *reinterpret_cast(args[3]), + *reinterpret_cast*>(args[1]), + *reinterpret_cast*>(args[2])); +} + +void _b924_wrapper(void* args[]) +{ + superkernel_conv_eltbinary( + *reinterpret_cast*>(args[0]), + *reinterpret_cast*>(args[1]), + *reinterpret_cast*>(args[2]), + *reinterpret_cast(args[4]), + *reinterpret_cast*>(args[3])); +} + +void _b919_wrapper(void* args[]) +{ + superkernel_conv2d_dwc( + *reinterpret_cast*>(args[0]), + *reinterpret_cast*>(args[1]), + *reinterpret_cast(args[3]), + *reinterpret_cast*>(args[2])); +} + +using UniformKernelFunc = void (*)(void **); + +static UniformKernelFunc g_uniformKernelFuncs[7] = { + _b896_wrapper, + _b901_wrapper, + _b906_wrapper, + _b881_wrapper, + _b891_wrapper, + _b924_wrapper, + _b919_wrapper +}; + +__attribute__((always_inline)) void kernelWrapper(void* args[], uint32 kernelId, uint32 numSyncIn, uint32 numAsyncIn, uint32 numSyncOut) +{ + uint32 idx = 0; + reinterpret_cast(args[idx])->acquire(numSyncIn > 0); + idx += (numSyncIn > 0) ? 1 : 0; + reinterpret_cast(args[idx])->acquire(numSyncIn > 1); + idx += (numSyncIn > 1) ? 1 : 0; + idx += numAsyncIn; + + (*(g_uniformKernelFuncs[kernelId]))(args); + + idx = 0; + reinterpret_cast(args[idx])->release(numSyncIn > 0); + idx += (numSyncIn > 0) ? 1 : 0; + reinterpret_cast(args[idx])->release(numSyncIn > 1); + idx += (numSyncIn > 1) ? 1 : 0; + idx += numAsyncIn; +} diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/Release/0_2_reloadable8.calltree b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/Release/0_2_reloadable8.calltree new file mode 100644 index 0000000000000000000000000000000000000000..0d87486df8d685214c85a56d2c420e80fd5d49bc --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/Release/0_2_reloadable8.calltree @@ -0,0 +1,54 @@ + +// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:46:39 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// bridge -o../Release/0_0_reloadable2 ../Release/0_0_reloadable2.o -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/isg -g -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable2.bcf -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork1731 -pme + + +// Release: ipp V-2024.06-TGT-241219 + +_Z13kernelWrapperPPvjjjj + _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv + _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t + _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E + _ZN12me_primitive10udiv_dstepEjjRjS0_ + _ZN12me_primitive10udiv_dstepEjjRjS0_ (*) + int32_to_float32 + _ZL28normalizeRoundAndPackFloat32iij + _ZL19roundAndPackFloat32iij + float32_add + _ZL14addFloat32Sigsjji + _ZL19propagateFloat32NaNjj + _ZL19roundAndPackFloat32iij (*) + _ZL14subFloat32Sigsjji + _ZL19propagateFloat32NaNjj (*) + _ZL28normalizeRoundAndPackFloat32iij (*) + + +Call tree stack and functions sizes: + +stack stack stack call func func function name + desc level level desc +----- ----- ----- ----- ----- ----- -------------------------------------------------------------- + 64 448 0 0 220 10058 _Z13kernelWrapperPPvjjjj + 128 384 1 1 2676 9838 _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + 64 64 2 2 1588 1588 _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv + 0 0 2 2 670 670 _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t + 256 256 2 2 2680 2822 _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E + 0 0 3 3 142 142 _ZN12me_primitive10udiv_dstepEjjRjS0_ + 0 0 2 2 142 142 _ZN12me_primitive10udiv_dstepEjjRjS0_ + 0 0 2 2 114 530 int32_to_float32 + 0 0 2 3 24 416 _ZL28normalizeRoundAndPackFloat32iij + 0 0 2 4 392 392 _ZL19roundAndPackFloat32iij + 0 0 2 2 64 1968 float32_add + 0 0 3 3 624 1128 _ZL14addFloat32Sigsjji + 0 0 4 4 112 112 _ZL19propagateFloat32NaNjj + 0 0 3 4 392 392 _ZL19roundAndPackFloat32iij + 0 0 2 3 752 1280 _ZL14subFloat32Sigsjji + 0 0 3 4 112 112 _ZL19propagateFloat32NaNjj + 0 0 2 4 24 416 _ZL28normalizeRoundAndPackFloat32iij (*) + + +Maximum call level : 4 +Maximum stack level: 4 +Maximum stack size : 448 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/Release/0_2_reloadable8.cmic2 b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/Release/0_2_reloadable8.cmic2 new file mode 100644 index 0000000000000000000000000000000000000000..d037f49ea23915d17f1d140dbcf225735acc1af1 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/Release/0_2_reloadable8.cmic2 @@ -0,0 +1,14427 @@ + +// File generated by darts version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:46:41 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// darts -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -d -h -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable2 me + +// Release: ipp V-2024.06-TGT-241219 +.label __Z13kernelWrapperPPvjjjj___func_begin0 +.label _Z13kernelWrapperPPvjjjj +.function kernelWrapper _Z13kernelWrapperPPvjjjj +.src_ref 0 "0_0_reloadable2.cc" 29 first +.src_ref 0 "0_0_reloadable2.cc" 31 60 first +.function_start + 2352 "11010100" // LDA r16, [p0]; MOV r0, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2353 "01000001" // /* MW 5 */ + 2354 "00101111" // /* MW 4 */ + 2355 "11010000" // /* MW 3 */ + 2356 "11000010" // /* MW 2 */ + 2357 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 29 + 2358 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2359 "00000001" // /* MW 5 */ + 2360 "00000000" // /* MW 4 */ + 2361 "00000000" // /* MW 3 */ + 2362 "00001000" // /* MW 2 */ + 2363 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 31 110 +.src_ref 1 "io_buffer_compiler.h" 606 24 + 2364 "00000010" // ST p7, [sp, #-8]; MOV r15, r1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2365 "01110000" // /* MW 7 */ + 2366 "01010000" // /* MW 6 */ + 2367 "11101000" // /* MW 5 */ + 2368 "00000001" // /* MW 4 */ + 2369 "10110000" // /* MW 3 */ + 2370 "01110011" // /* MW 2 */ + 2371 "11111111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 17 79 +.src_ref 0 "0_0_reloadable2.cc" 31 110 first + 2372 "00111010" // ST r0, [sp, #-4]; NEZ r26, r15; MOV p7, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2373 "01111001" // /* MW 9 */ + 2374 "01100000" // /* MW 8 */ + 2375 "10110000" // /* MW 7 */ + 2376 "10000011" // /* MW 6 */ + 2377 "10100111" // /* MW 5 */ + 2378 "00011111" // /* MW 4 */ + 2379 "10110000" // /* MW 3 */ + 2380 "10000010" // /* MW 2 */ + 2381 "11111111" // /* MW 1 */ + 2382 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2383 "00111101" // /* MW 3 */ + 2384 "11110100" // /* MW 2 */ + 2385 "00001111" // /* MW 1 */ + 2386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2387 "00000000" // /* MW 1 */ + 2388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2389 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 first + 2390 "00011000" // ADD.NC p0, r16, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2391 "00000010" // /* MW 3 */ + 2392 "01101000" // /* MW 2 */ + 2393 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 + 2394 "10011000" // LDA r16, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2395 "00010110" // /* MW 3 */ + 2396 "00011110" // /* MW 2 */ + 2397 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 36 + 2398 "10011000" // LDA r18, [p0], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2399 "01010110" // /* MW 3 */ + 2400 "00111110" // /* MW 2 */ + 2401 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 46 + 2402 "10011000" // LDA r17, [p0], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2403 "00110110" // /* MW 3 */ + 2404 "11101110" // /* MW 2 */ + 2405 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 60 + 2406 "10011000" // LDA r27, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2407 "01110110" // /* MW 3 */ + 2408 "00000111" // /* MW 2 */ + 2409 "00000000" // /* MW 1 */ + 2410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2411 "00000000" // /* MW 1 */ + 2412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2413 "00000000" // /* MW 1 */ + 2414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2415 "00000000" // /* MW 1 */ + 2416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2417 "00000000" // /* MW 1 */ + 2418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2419 "00000000" // /* MW 1 */ + 2420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2421 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 195 30 first +.src_ref 1 "io_buffer_compiler.h" 195 37 first + 2422 "00011000" // SEL.EQZ r16, r16, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2423 "00100010" // /* MW 3 */ + 2424 "00100001" // /* MW 2 */ + 2425 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 194 23 first + 2426 "10011000" // ST r16, [p0, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2427 "00010001" // /* MW 3 */ + 2428 "11010110" // /* MW 2 */ + 2429 "00001000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 + 2430 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2431 "11111101" // /* MW 3 */ + 2432 "11100000" // /* MW 2 */ + 2433 "00010111" // /* MW 1 */ + 2434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2435 "00000000" // /* MW 1 */ + 2436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2437 "00000000" // /* MW 1 */ + 2438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2439 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 first + 2440 "00011000" // ACQ.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2441 "00001000" // /* MW 3 */ + 2442 "01010111" // /* MW 2 */ + 2443 "00010100" // /* MW 1 */ + 2444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2445 "00000000" // /* MW 1 */ + 2446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2447 "00000000" // /* MW 1 */ + 2448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2449 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 17 79 first + 2450 "10011000" // LDA p0, [p7], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2451 "00011110" // /* MW 3 */ + 2452 "00101100" // /* MW 2 */ + 2453 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 18 47 first + 2454 "10011000" // LDA p1, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2455 "10011110" // /* MW 3 */ + 2456 "11111100" // /* MW 2 */ + 2457 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 19 81 first + 2458 "10011000" // LDA p2, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2459 "00011110" // /* MW 3 */ + 2460 "00000101" // /* MW 2 */ + 2461 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 16 4 first +.no_stack_arguments + 2462 "00000100" // JL #7536 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7536 delay_slots=5 */ + 2463 "00000001" // /* MW 5 */ + 2464 "00000000" // /* MW 4 */ + 2465 "10111000" // /* MW 3 */ + 2466 "00001110" // /* MW 2 */ + 2467 "00000000" // /* MW 1 */ +.delay_slot + 2468 "10011000" // ST r26, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2469 "01010101" // /* MW 3 */ + 2470 "11110011" // /* MW 2 */ + 2471 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2473 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2475 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2477 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2479 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 38 60 first +.return_address + 2480 "10011000" // LDA r16, [p7, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2481 "00010110" // /* MW 3 */ + 2482 "11110110" // /* MW 2 */ + 2483 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 + 2484 "00011000" // LDA r26, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2485 "01010001" // /* MW 3 */ + 2486 "11110011" // /* MW 2 */ + 2487 "00000111" // /* MW 1 */ + 2488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2489 "00000000" // /* MW 1 */ + 2490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2491 "00000000" // /* MW 1 */ + 2492 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2493 "00000000" // /* MW 1 */ + 2494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2495 "00000000" // /* MW 1 */ + 2496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2497 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 first + 2498 "00011000" // ADD.NC p0, r16, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2499 "00001000" // /* MW 3 */ + 2500 "01101000" // /* MW 2 */ + 2501 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 + 2502 "10011000" // LDA r16, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2503 "00010110" // /* MW 3 */ + 2504 "00000110" // /* MW 2 */ + 2505 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_main.h" 440 8 + 2506 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2507 "00000101" // /* MW 3 */ + 2508 "00100010" // /* MW 2 */ + 2509 "00010000" // /* MW 1 */ + 2510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2511 "00000000" // /* MW 1 */ + 2512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2513 "00000000" // /* MW 1 */ + 2514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2515 "00000000" // /* MW 1 */ + 2516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2517 "00000000" // /* MW 1 */ + 2518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2519 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 first + 2520 "00011000" // REL.COND r16, r17, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2521 "00011000" // /* MW 3 */ + 2522 "00010101" // /* MW 2 */ + 2523 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 41 +.src_ref 1 "io_buffer_compiler.h" 606 24 + 2524 "11010100" // LDA lr, [sp, #-12]; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2525 "01000001" // /* MW 5 */ + 2526 "10101111" // /* MW 4 */ + 2527 "00101101" // /* MW 3 */ + 2528 "10000111" // /* MW 2 */ + 2529 "11111110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 first + 2530 "10011000" // LDA r16, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2531 "00010110" // /* MW 3 */ + 2532 "11110110" // /* MW 2 */ + 2533 "00000000" // /* MW 1 */ + 2534 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2535 "10011001" // /* MW 3 */ + 2536 "11111011" // /* MW 2 */ + 2537 "00000111" // /* MW 1 */ + 2538 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2539 "00000000" // /* MW 1 */ + 2540 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2541 "11110001" // /* MW 3 */ + 2542 "11111101" // /* MW 2 */ + 2543 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 41 first + 2544 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2545 "00000001" // /* MW 5 */ + 2546 "00000000" // /* MW 4 */ + 2547 "00000000" // /* MW 3 */ + 2548 "11111000" // /* MW 2 */ + 2549 "11111111" // /* MW 1 */ + 2550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2551 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 41 + 2552 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 2553 "00000000" // /* MW 3 */ + 2554 "00101000" // /* MW 2 */ + 2555 "00010000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 first +.delay_slot + 2556 "10011000" // SUB r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2557 "00000001" // /* MW 3 */ + 2558 "01100011" // /* MW 2 */ + 2559 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.delay_slot + 2560 "00011000" // SEL.EQZ r16, r16, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2561 "00010010" // /* MW 3 */ + 2562 "00100001" // /* MW 2 */ + 2563 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 22 +.delay_slot + 2564 "10011000" // ST r16, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2565 "00010001" // /* MW 3 */ + 2566 "11110110" // /* MW 2 */ + 2567 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2569 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13kernelWrapperPPvjjjj__end +.label __Z13kernelWrapperPPvjjjj___func_end0 + 2571 "00000000" // /* MW 1 */ +.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv___func_begin0 +.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv +.function setup _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv +.src_ref 2 "reduce_base_c8.h" 218 first +.src_ref 2 "reduce_base_c8.h" 220 27 first +.src_ref 2 "reduce_base_c8.h" 290 63 +.src_ref 2 "reduce_base_c8.h" 348 46 +.function_start + 2576 "01110110" // LDA r3, [p1], #4; MOVS p3, p0; MOVX r6, #-5; MOV r0, p1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 2577 "01111000" // /* MW 11 */ + 2578 "01100000" // /* MW 10 */ + 2579 "00001001" // /* MW 9 */ + 2580 "01101000" // /* MW 8 */ + 2581 "01100111" // /* MW 7 */ + 2582 "00111110" // /* MW 6 */ + 2583 "10001011" // /* MW 5 */ + 2584 "10000000" // /* MW 4 */ + 2585 "11010011" // /* MW 3 */ + 2586 "10001110" // /* MW 2 */ + 2587 "00100011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 287 40 +.src_ref 2 "reduce_base_c8.h" 287 40 +.src_ref 2 "reduce_base_c8.h" 348 46 first + 2588 "10111010" // MOVA r7, #16; MOVX r2, #-24; ADD.NC p2, r0, #28 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2589 "00001000" // /* MW 9 */ + 2590 "00000111" // /* MW 8 */ + 2591 "00110000" // /* MW 7 */ + 2592 "00001001" // /* MW 6 */ + 2593 "00100101" // /* MW 5 */ + 2594 "00111110" // /* MW 4 */ + 2595 "00000000" // /* MW 3 */ + 2596 "00000111" // /* MW 2 */ + 2597 "00000010" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 +.src_ref 2 "reduce_base_c8.h" 293 77 +.src_ref 2 "reduce_base_c8.h" 298 44 +.src_ref 2 "reduce_base_c8.h" 299 40 +.src_ref 2 "reduce_base_c8.h" 300 59 +.src_ref 2 "reduce_base_c8.h" 326 79 + 2598 "10111010" // MOVA r30, #3; MOVX r1, #-3; MOV r0, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2599 "01111000" // /* MW 9 */ + 2600 "01100000" // /* MW 8 */ + 2601 "00001000" // /* MW 7 */ + 2602 "10101000" // /* MW 6 */ + 2603 "00010111" // /* MW 5 */ + 2604 "00111110" // /* MW 4 */ + 2605 "00000000" // /* MW 3 */ + 2606 "01111110" // /* MW 2 */ + 2607 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 276 57 +.src_ref 2 "reduce_base_c8.h" 301 81 +.src_ref 2 "reduce_base_c8.h" 305 77 + 2608 "10111010" // MOVA r5, #-1; MOVXM r4, #65528 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2609 "00010000" // /* MW 9 */ + 2610 "11111100" // /* MW 8 */ + 2611 "10001111" // /* MW 7 */ + 2612 "00111100" // /* MW 6 */ + 2613 "00000000" // /* MW 5 */ + 2614 "00000000" // /* MW 4 */ + 2615 "00000000" // /* MW 3 */ + 2616 "11100101" // /* MW 2 */ + 2617 "11111111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 218 +.src_ref 2 "reduce_base_c8.h" 280 76 +.src_ref 2 "reduce_base_c8.h" 312 98 + 2618 "10111010" // MOVA r16, #-4; PADDXM [sp], #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2619 "01110000" // /* MW 9 */ + 2620 "00000000" // /* MW 8 */ + 2621 "00000000" // /* MW 7 */ + 2622 "00000000" // /* MW 6 */ + 2623 "00000010" // /* MW 5 */ + 2624 "00000000" // /* MW 4 */ + 2625 "00000000" // /* MW 3 */ + 2626 "10010000" // /* MW 2 */ + 2627 "11111111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 298 44 first + 2628 "00011000" // ADD.NC p4, r0, #46 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2629 "00010111" // /* MW 3 */ + 2630 "01100000" // /* MW 2 */ + 2631 "00011100" // /* MW 1 */ + 2632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2633 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 220 25 first + 2634 "10011000" // ST r3, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2635 "01110001" // /* MW 3 */ + 2636 "00011100" // /* MW 2 */ + 2637 "00001000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 221 28 first + 2638 "10011000" // LDA r26, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2639 "01010110" // /* MW 3 */ + 2640 "00011111" // /* MW 2 */ + 2641 "00000001" // /* MW 1 */ + 2642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2643 "00000000" // /* MW 1 */ + 2644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2645 "00000000" // /* MW 1 */ + 2646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2647 "00000000" // /* MW 1 */ + 2648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2649 "00000000" // /* MW 1 */ + 2650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2651 "00000000" // /* MW 1 */ + 2652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2653 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 221 26 +.src_ref 2 "reduce_base_c8.h" 301 81 first + 2654 "01011100" // ST r26, [p0], #4; AND r17, r26, r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2655 "10001001" // /* MW 5 */ + 2656 "01000100" // /* MW 4 */ + 2657 "00111101" // /* MW 3 */ + 2658 "11101010" // /* MW 2 */ + 2659 "00000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 222 26 first +.src_ref 2 "reduce_base_c8.h" 293 58 first +.src_ref 2 "reduce_base_c8.h" 301 81 + 2660 "10111010" // LDA r29, [p1], #4; MUL r4, r3, r26; ADD.NC r22, r17, r4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2661 "10101000" // /* MW 9 */ + 2662 "01001000" // /* MW 8 */ + 2663 "11001100" // /* MW 7 */ + 2664 "01111110" // /* MW 6 */ + 2665 "01001101" // /* MW 5 */ + 2666 "00000110" // /* MW 4 */ + 2667 "11010000" // /* MW 3 */ + 2668 "11110110" // /* MW 2 */ + 2669 "00100011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 290 63 first + 2670 "10011000" // LSHL r18, r26, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2671 "01101101" // /* MW 3 */ + 2672 "10100100" // /* MW 2 */ + 2673 "00010110" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 293 77 first + 2674 "10011000" // LSHL r6, r4, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2675 "00011101" // /* MW 3 */ + 2676 "00001100" // /* MW 2 */ + 2677 "00010001" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 290 41 first +.src_ref 2 "reduce_base_c8.h" 300 59 first + 2678 "00100100" // LSHL r17, r26, r1; ADD.NC r1, r18, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2679 "11111111" // /* MW 5 */ + 2680 "10110010" // /* MW 4 */ + 2681 "10110000" // /* MW 3 */ + 2682 "01000011" // /* MW 2 */ + 2683 "11010100" // /* MW 1 */ + 2684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2685 "00000000" // /* MW 1 */ + 2686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2687 "00000000" // /* MW 1 */ + 2688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2689 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 222 24 first +.src_ref 2 "reduce_base_c8.h" 287 40 first + 2690 "01011100" // ST r29, [p0], #4; MAC r7, r7, r29, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2691 "01001100" // /* MW 5 */ + 2692 "10011100" // /* MW 4 */ + 2693 "00111110" // /* MW 3 */ + 2694 "11110110" // /* MW 2 */ + 2695 "00000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 223 29 first +.src_ref 2 "reduce_base_c8.h" 312 60 first + 2696 "11111010" // LDA r2, [p1], #4; ST r29, [sp, #-4]; MUL r4, r29, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2697 "10101111" // /* MW 9 */ + 2698 "01001001" // /* MW 8 */ + 2699 "00000111" // /* MW 7 */ + 2700 "10000000" // /* MW 6 */ + 2701 "10110101" // /* MW 5 */ + 2702 "11111111" // /* MW 4 */ + 2703 "11010111" // /* MW 3 */ + 2704 "10001010" // /* MW 2 */ + 2705 "00100011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 305 57 first + 2706 "10011000" // MUL r20, r3, r29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2707 "11011111" // /* MW 3 */ + 2708 "11101001" // /* MW 2 */ + 2709 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 312 78 first + 2710 "10011000" // MUL r28, r3, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2711 "01001111" // /* MW 3 */ + 2712 "11111000" // /* MW 2 */ + 2713 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 299 40 first + 2714 "10011000" // LSHL r21, r29, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2715 "11101101" // /* MW 3 */ + 2716 "01101011" // /* MW 2 */ + 2717 "00010111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 276 57 first +.src_ref 2 "reduce_base_c8.h" 299 40 + 2718 "00100100" // LSHL r18, r29, r5; ADD.NC r27, r21, #-48 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2719 "11010000" // /* MW 5 */ + 2720 "10110101" // /* MW 4 */ + 2721 "10111101" // /* MW 3 */ + 2722 "10001011" // /* MW 2 */ + 2723 "11101100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 276 41 + 2724 "00011000" // ADD r23, r18, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2725 "11111111" // /* MW 3 */ + 2726 "10101111" // /* MW 2 */ + 2727 "00010100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 301 85 first + 2728 "10011000" // MUL r29, r29, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2729 "01101111" // /* MW 3 */ + 2730 "01111011" // /* MW 2 */ + 2731 "00010111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 223 27 first +.src_ref 2 "reduce_base_c8.h" 236 8 first + 2732 "01011100" // ST r2, [p0], #4; LT r24, r30, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2733 "01010101" // /* MW 5 */ + 2734 "01100000" // /* MW 4 */ + 2735 "00111111" // /* MW 3 */ + 2736 "10001010" // /* MW 2 */ + 2737 "00000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 224 33 first + 2738 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2739 "00101110" // /* MW 3 */ + 2740 "00011100" // /* MW 2 */ + 2741 "00000001" // /* MW 1 */ + 2742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2743 "00000000" // /* MW 1 */ + 2744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2745 "00000000" // /* MW 1 */ + 2746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2747 "00000000" // /* MW 1 */ + 2748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2749 "00000000" // /* MW 1 */ + 2750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2751 "00000000" // /* MW 1 */ + 2752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2753 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 224 31 +.src_ref 2 "reduce_base_c8.h" 318 64 + 2754 "00000010" // ST el0, [p0], #4; MOV r31, el0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2755 "01110000" // /* MW 7 */ + 2756 "00001110" // /* MW 6 */ + 2757 "11110000" // /* MW 5 */ + 2758 "00000011" // /* MW 4 */ + 2759 "00110000" // /* MW 3 */ + 2760 "10000101" // /* MW 2 */ + 2761 "00000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 225 34 first + 2762 "10011000" // LDA eh0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2763 "00001110" // /* MW 3 */ + 2764 "00000100" // /* MW 2 */ + 2765 "00000001" // /* MW 1 */ + 2766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2767 "00000000" // /* MW 1 */ + 2768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2769 "00000000" // /* MW 1 */ + 2770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2771 "00000000" // /* MW 1 */ + 2772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2773 "00000000" // /* MW 1 */ + 2774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2775 "00000000" // /* MW 1 */ + 2776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2777 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 225 32 +.src_ref 2 "reduce_base_c8.h" 318 64 + 2778 "00000010" // ST eh0, [p0]; MOV r25, eh0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2779 "01110000" // /* MW 7 */ + 2780 "10001110" // /* MW 6 */ + 2781 "00110000" // /* MW 5 */ + 2782 "00000011" // /* MW 4 */ + 2783 "00110000" // /* MW 3 */ + 2784 "10000001" // /* MW 2 */ + 2785 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 226 32 first + 2786 "10011000" // LDA r30, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2787 "11010110" // /* MW 3 */ + 2788 "00010111" // /* MW 2 */ + 2789 "00000001" // /* MW 1 */ + 2790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2791 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 first + 2792 "10000100" // JNZ r24, #2912 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=2912 delay_slots=5 */ + 2793 "00000001" // /* MW 5 */ + 2794 "01000000" // /* MW 4 */ + 2795 "10110000" // /* MW 3 */ + 2796 "00000101" // /* MW 2 */ + 2797 "11000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 312 98 first +.delay_slot + 2798 "10011000" // LSHL r19, r28, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2799 "00001101" // /* MW 3 */ + 2800 "00100111" // /* MW 2 */ + 2801 "00010111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 318 64 first +.delay_slot + 2802 "10011000" // MUL r25, r31, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2803 "10011111" // /* MW 3 */ + 2804 "11110011" // /* MW 2 */ + 2805 "00010111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 300 41 first +.src_ref 2 "reduce_base_c8.h" 305 77 first +.delay_slot + 2806 "00100100" // LSHL r20, r20, r5; ADD.NC r5, r17, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2807 "11111111" // /* MW 5 */ + 2808 "10110001" // /* MW 4 */ + 2809 "10110010" // /* MW 3 */ + 2810 "00001011" // /* MW 2 */ + 2811 "10100101" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 280 76 first +.delay_slot + 2812 "10011000" // LSHL r16, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2813 "00001101" // /* MW 3 */ + 2814 "00100001" // /* MW 2 */ + 2815 "00010001" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 226 30 first +.src_ref 2 "reduce_base_c8.h" 318 88 first +.delay_slot + 2816 "01011100" // ST r30, [p0, #4]; MUL r31, r25, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2817 "11011111" // /* MW 5 */ + 2818 "11111111" // /* MW 4 */ + 2819 "00111100" // /* MW 3 */ + 2820 "11111010" // /* MW 2 */ + 2821 "00000010" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2822 "00011000" // MOVX r28, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2823 "00000101" // /* MW 3 */ + 2824 "00111000" // /* MW 2 */ + 2825 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 first + 2826 "10011000" // EQ r28, r2, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2827 "11000111" // /* MW 3 */ + 2828 "10111001" // /* MW 2 */ + 2829 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2830 "10000100" // JNZ r28, #4032 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4032 delay_slots=5 */ + 2831 "00000001" // /* MW 5 */ + 2832 "01000000" // /* MW 4 */ + 2833 "11100000" // /* MW 3 */ + 2834 "00000111" // /* MW 2 */ + 2835 "11100000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2837 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2839 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2840 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2841 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2843 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 242 41 first +.delay_slot + 2844 "00011000" // ADD r22, r3, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2845 "11111111" // /* MW 3 */ + 2846 "11101101" // /* MW 2 */ + 2847 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2848 "00011000" // MOVX r17, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2849 "00001001" // /* MW 3 */ + 2850 "00100010" // /* MW 2 */ + 2851 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 first + 2852 "10011000" // EQ r17, r17, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2853 "00100111" // /* MW 3 */ + 2854 "01100010" // /* MW 2 */ + 2855 "00010100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2856 "10000100" // JNZ r17, #3904 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3904 delay_slots=5 */ + 2857 "00000001" // /* MW 5 */ + 2858 "01000000" // /* MW 4 */ + 2859 "10100000" // /* MW 3 */ + 2860 "00000111" // /* MW 2 */ + 2861 "10001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2863 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2865 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2867 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2869 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 +.delay_slot + 2870 "00011000" // MOVX r7, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2871 "00001101" // /* MW 3 */ + 2872 "00001110" // /* MW 2 */ + 2873 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2874 "10011000" // EQ r2, r7, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2875 "00100111" // /* MW 3 */ + 2876 "11000100" // /* MW 2 */ + 2877 "00010001" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2878 "10000100" // JNZ r2, #3744 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3744 delay_slots=5 */ + 2879 "00000001" // /* MW 5 */ + 2880 "01000000" // /* MW 4 */ + 2881 "01010000" // /* MW 3 */ + 2882 "00000111" // /* MW 2 */ + 2883 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2885 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2887 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2889 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2891 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2892 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2893 "00000000" // /* MW 1 */ + 2894 "10000100" // J #3552 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3552 delay_slots=5 */ + 2895 "00000000" // /* MW 5 */ + 2896 "00000000" // /* MW 4 */ + 2897 "11110000" // /* MW 3 */ + 2898 "00000110" // /* MW 2 */ + 2899 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 329 30 +.delay_slot + 2900 "00011000" // MOVX r26, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2901 "00010001" // /* MW 3 */ + 2902 "00110100" // /* MW 2 */ + 2903 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2905 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2907 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2909 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2911 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_336 +.src_ref 2 "reduce_base_c8.h" 236 8 + 2912 "00011000" // MOVX r29, #5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2913 "00010101" // /* MW 3 */ + 2914 "00111010" // /* MW 2 */ + 2915 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2916 "10011000" // LT r24, r29, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2917 "00101010" // /* MW 3 */ + 2918 "01110000" // /* MW 2 */ + 2919 "00010111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2920 "10000100" // JNZ r24, #3232 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3232 delay_slots=5 */ + 2921 "00000001" // /* MW 5 */ + 2922 "01000000" // /* MW 4 */ + 2923 "01010000" // /* MW 3 */ + 2924 "00000110" // /* MW 2 */ + 2925 "11000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2927 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2929 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2931 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2933 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 +.src_ref 2 "reduce_base_c8.h" 316 38 +.src_ref 2 "reduce_base_c8.h" 329 30 +.delay_slot + 2934 "00011000" // MOVX r26, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2935 "00010001" // /* MW 3 */ + 2936 "00110100" // /* MW 2 */ + 2937 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2938 "10011000" // EQ r17, r26, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2939 "00100111" // /* MW 3 */ + 2940 "10100010" // /* MW 2 */ + 2941 "00010110" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2942 "10000100" // JNZ r17, #3104 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3104 delay_slots=5 */ + 2943 "00000001" // /* MW 5 */ + 2944 "01000000" // /* MW 4 */ + 2945 "00010000" // /* MW 3 */ + 2946 "00000110" // /* MW 2 */ + 2947 "10001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2949 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2951 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2953 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2955 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2957 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2958 "10011000" // NE r2, r29, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2959 "00101000" // /* MW 3 */ + 2960 "01000100" // /* MW 2 */ + 2961 "00010111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2962 "10000100" // JNZ r2, #3552 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3552 delay_slots=5 */ + 2963 "00000001" // /* MW 5 */ + 2964 "01000000" // /* MW 4 */ + 2965 "11110000" // /* MW 3 */ + 2966 "00000110" // /* MW 2 */ + 2967 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2969 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2971 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2973 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2975 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2977 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 286 44 first +.src_ref 2 "reduce_base_c8.h" 289 38 +.src_ref 2 "reduce_base_c8.h" 291 40 +.src_ref 2 "reduce_base_c8.h" 291 40 + 2978 "10111010" // ST.s16 r21, [p4], #2; MOVX r2, #16; MOV m0, #-20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2979 "01011000" // /* MW 9 */ + 2980 "11101100" // /* MW 8 */ + 2981 "00000111" // /* MW 7 */ + 2982 "00001000" // /* MW 6 */ + 2983 "00100010" // /* MW 5 */ + 2984 "00000000" // /* MW 4 */ + 2985 "11100000" // /* MW 3 */ + 2986 "11010110" // /* MW 2 */ + 2987 "10000011" // /* MW 1 */ + 2988 "11111000" // MOV r30, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2989 "10100000" // /* MW 3 */ + 2990 "10011100" // /* MW 2 */ + 2991 "00011111" // /* MW 1 */ + 2992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2993 "00000000" // /* MW 1 */ + 2994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2995 "00000000" // /* MW 1 */ + 2996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2997 "00000000" // /* MW 1 */ + 2998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2999 "00000000" // /* MW 1 */ + 3000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3001 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 287 38 first + 3002 "00011000" // ST.s16 r7, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3003 "11110111" // /* MW 3 */ + 3004 "00011100" // /* MW 2 */ + 3005 "00000100" // /* MW 1 */ + 3006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3007 "00000000" // /* MW 1 */ + 3008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3009 "00000000" // /* MW 1 */ + 3010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3011 "00000000" // /* MW 1 */ + 3012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3013 "00000000" // /* MW 1 */ + 3014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3015 "00000000" // /* MW 1 */ + 3016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3017 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 288 39 first + 3018 "00011000" // ST.s16 r23, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3019 "11110111" // /* MW 3 */ + 3020 "00011110" // /* MW 2 */ + 3021 "00000100" // /* MW 1 */ + 3022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3023 "00000000" // /* MW 1 */ + 3024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3025 "00000000" // /* MW 1 */ + 3026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3027 "00000000" // /* MW 1 */ + 3028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3029 "00000000" // /* MW 1 */ + 3030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3031 "00000000" // /* MW 1 */ + 3032 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3033 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 289 38 first + 3034 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3035 "01010111" // /* MW 3 */ + 3036 "00011100" // /* MW 2 */ + 3037 "00000100" // /* MW 1 */ + 3038 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3039 "00000000" // /* MW 1 */ + 3040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3041 "00000000" // /* MW 1 */ + 3042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3043 "00000000" // /* MW 1 */ + 3044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3045 "00000000" // /* MW 1 */ + 3046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3047 "00000000" // /* MW 1 */ + 3048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3049 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 290 39 first + 3050 "00011000" // ST.s16 r1, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3051 "00110111" // /* MW 3 */ + 3052 "00011100" // /* MW 2 */ + 3053 "00000100" // /* MW 1 */ + 3054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3055 "00000000" // /* MW 1 */ + 3056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3057 "00000000" // /* MW 1 */ + 3058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3059 "00000000" // /* MW 1 */ + 3060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3061 "00000000" // /* MW 1 */ + 3062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3063 "00000000" // /* MW 1 */ + 3064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3065 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 291 40 first + 3066 "00011000" // ST.s16 r2, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3067 "01010111" // /* MW 3 */ + 3068 "00001000" // /* MW 2 */ + 3069 "00000100" // /* MW 1 */ + 3070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3071 "00000000" // /* MW 1 */ + 3072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3073 "00000000" // /* MW 1 */ + 3074 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */ + 3075 "00000000" // /* MW 5 */ + 3076 "00000000" // /* MW 4 */ + 3077 "11101000" // /* MW 3 */ + 3078 "00000110" // /* MW 2 */ + 3079 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3080 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3081 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3083 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3085 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 292 38 first +.delay_slot + 3086 "10011000" // ST r18, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3087 "01010001" // /* MW 3 */ + 3088 "00000110" // /* MW 2 */ + 3089 "00001100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 293 38 first +.delay_slot + 3090 "00101110" // NOPA; ST r6, [p4, #4]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 3091 "00011100" // /* MW 13 */ + 3092 "00000000" // /* MW 12 */ + 3093 "00000000" // /* MW 11 */ + 3094 "01010111" // /* MW 10 */ + 3095 "00011010" // /* MW 9 */ + 3096 "01000000" // /* MW 8 */ + 3097 "00000000" // /* MW 7 */ + 3098 "00000000" // /* MW 6 */ + 3099 "10100011" // /* MW 5 */ + 3100 "00101001" // /* MW 4 */ + 3101 "11111000" // /* MW 3 */ + 3102 "00101100" // /* MW 2 */ + 3103 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_528 +.src_ref 2 "reduce_base_c8.h" 274 44 first +.src_ref 2 "reduce_base_c8.h" 275 40 +.src_ref 2 "reduce_base_c8.h" 275 40 + 3104 "10111010" // ST.s16 r4, [p4], #2; MOVX r6, #-3; MOV r2, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3105 "01011000" // /* MW 9 */ + 3106 "00010000" // /* MW 8 */ + 3107 "01001000" // /* MW 7 */ + 3108 "10101000" // /* MW 6 */ + 3109 "01100111" // /* MW 5 */ + 3110 "00111110" // /* MW 4 */ + 3111 "11100000" // /* MW 3 */ + 3112 "10010010" // /* MW 2 */ + 3113 "10000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 275 40 first +.src_ref 2 "reduce_base_c8.h" 279 40 + 3114 "10111010" // MOVA m0, #-20; MAC r2, r2, r6, r4; MOV r30, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3115 "01111000" // /* MW 9 */ + 3116 "00001110" // /* MW 8 */ + 3117 "11010000" // /* MW 7 */ + 3118 "00110011" // /* MW 6 */ + 3119 "00100010" // /* MW 5 */ + 3120 "00001100" // /* MW 4 */ + 3121 "10000000" // /* MW 3 */ + 3122 "10000000" // /* MW 2 */ + 3123 "11111101" // /* MW 1 */ + 3124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3125 "00000000" // /* MW 1 */ + 3126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3127 "00000000" // /* MW 1 */ + 3128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3129 "00000000" // /* MW 1 */ + 3130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3131 "00000000" // /* MW 1 */ + 3132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3133 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 275 38 + 3134 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3135 "01010111" // /* MW 3 */ + 3136 "00011100" // /* MW 2 */ + 3137 "00000100" // /* MW 1 */ + 3138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3139 "00000000" // /* MW 1 */ + 3140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3141 "00000000" // /* MW 1 */ + 3142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3143 "00000000" // /* MW 1 */ + 3144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3145 "00000000" // /* MW 1 */ + 3146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3147 "00000000" // /* MW 1 */ + 3148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3149 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 276 39 first + 3150 "00011000" // ST.s16 r23, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3151 "11110111" // /* MW 3 */ + 3152 "00011110" // /* MW 2 */ + 3153 "00000100" // /* MW 1 */ + 3154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3155 "00000000" // /* MW 1 */ + 3156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3157 "00000000" // /* MW 1 */ + 3158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3159 "00000000" // /* MW 1 */ + 3160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3161 "00000000" // /* MW 1 */ + 3162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3163 "00000000" // /* MW 1 */ + 3164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3165 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 275 38 first +.src_ref 2 "reduce_base_c8.h" 277 38 first + 3166 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3167 "01010111" // /* MW 3 */ + 3168 "00011100" // /* MW 2 */ + 3169 "00000100" // /* MW 1 */ + 3170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3171 "00000000" // /* MW 1 */ + 3172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3173 "00000000" // /* MW 1 */ + 3174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3175 "00000000" // /* MW 1 */ + 3176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3177 "00000000" // /* MW 1 */ + 3178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3179 "00000000" // /* MW 1 */ + 3180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3181 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 278 39 first + 3182 "00011000" // ST.s16 r5, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3183 "10110111" // /* MW 3 */ + 3184 "00011100" // /* MW 2 */ + 3185 "00000100" // /* MW 1 */ + 3186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3187 "00000000" // /* MW 1 */ + 3188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3189 "00000000" // /* MW 1 */ + 3190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3191 "00000000" // /* MW 1 */ + 3192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3193 "00000000" // /* MW 1 */ + 3194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3195 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3197 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 279 40 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3198 "00011000" // ST.s16 r1, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3199 "00110111" // /* MW 3 */ + 3200 "00001000" // /* MW 2 */ + 3201 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3203 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3205 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3206 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */ + 3207 "00000000" // /* MW 5 */ + 3208 "00000000" // /* MW 4 */ + 3209 "11101000" // /* MW 3 */ + 3210 "00000110" // /* MW 2 */ + 3211 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 279 40 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3212 "00011000" // MOVX r1, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3213 "01000001" // /* MW 3 */ + 3214 "00000010" // /* MW 2 */ + 3215 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3216 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3217 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3219 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 281 38 first +.delay_slot + 3220 "10011000" // ST r3, [p4, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3221 "01110001" // /* MW 3 */ + 3222 "00010100" // /* MW 2 */ + 3223 "00001100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 280 38 first +.delay_slot + 3224 "00000010" // ST r16, [p4]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3225 "01110000" // /* MW 7 */ + 3226 "10100101" // /* MW 6 */ + 3227 "00000001" // /* MW 5 */ + 3228 "00000000" // /* MW 4 */ + 3229 "00110000" // /* MW 3 */ + 3230 "11000010" // /* MW 2 */ + 3231 "10000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_656 +.src_ref 2 "reduce_base_c8.h" 236 8 +.src_ref 2 "reduce_base_c8.h" 301 40 +.src_ref 2 "reduce_base_c8.h" 302 76 + 3232 "00101100" // LDA r3, [sp, #-4]; MOVX r4, #6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3233 "00110010" // /* MW 5 */ + 3234 "00010000" // /* MW 4 */ + 3235 "00100000" // /* MW 3 */ + 3236 "10001110" // /* MW 2 */ + 3237 "11111111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 first + 3238 "10011000" // EQ r4, r2, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3239 "01000111" // /* MW 3 */ + 3240 "10001000" // /* MW 2 */ + 3241 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 3242 "10000100" // JNZ r4, #3408 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3408 delay_slots=5 */ + 3243 "00000001" // /* MW 5 */ + 3244 "01000000" // /* MW 4 */ + 3245 "10101000" // /* MW 3 */ + 3246 "00000110" // /* MW 2 */ + 3247 "00100000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 298 44 +.src_ref 2 "reduce_base_c8.h" 303 40 +.src_ref 2 "reduce_base_c8.h" 310 44 +.src_ref 2 "reduce_base_c8.h" 311 38 +.delay_slot + 3248 "00011000" // MOVX r1, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3249 "01000001" // /* MW 3 */ + 3250 "00000010" // /* MW 2 */ + 3251 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3253 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3255 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3256 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3257 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3259 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 3260 "00011000" // MOVX r3, #7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3261 "00011101" // /* MW 3 */ + 3262 "00000110" // /* MW 2 */ + 3263 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 3264 "10011000" // NE r2, r3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3265 "00101000" // /* MW 3 */ + 3266 "11000100" // /* MW 2 */ + 3267 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 3268 "10000100" // JNZ r2, #3552 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3552 delay_slots=5 */ + 3269 "00000001" // /* MW 5 */ + 3270 "01000000" // /* MW 4 */ + 3271 "11110000" // /* MW 3 */ + 3272 "00000110" // /* MW 2 */ + 3273 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3275 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3277 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3279 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3281 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3283 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 310 44 first +.src_ref 2 "reduce_base_c8.h" 312 41 first +.src_ref 2 "reduce_base_c8.h" 315 40 + 3284 "10111010" // ST.s16 r1, [p4], #2; ADD r2, r19, #-1; MOV m0, #-20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3285 "01011000" // /* MW 9 */ + 3286 "11101100" // /* MW 8 */ + 3287 "00000111" // /* MW 7 */ + 3288 "11111000" // /* MW 6 */ + 3289 "00101111" // /* MW 5 */ + 3290 "00100110" // /* MW 4 */ + 3291 "11100000" // /* MW 3 */ + 3292 "10000110" // /* MW 2 */ + 3293 "10000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 313 38 +.src_ref 2 "reduce_base_c8.h" 317 97 + 3294 "10111010" // MOVA r3, #-6; MOVXM dj0, #65536 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3295 "00010000" // /* MW 9 */ + 3296 "00000000" // /* MW 8 */ + 3297 "01000000" // /* MW 7 */ + 3298 "01000000" // /* MW 6 */ + 3299 "00000000" // /* MW 5 */ + 3300 "00000000" // /* MW 4 */ + 3301 "00000000" // /* MW 3 */ + 3302 "01000011" // /* MW 2 */ + 3303 "11111111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 315 40 +.src_ref 2 "reduce_base_c8.h" 317 97 first + 3304 "01100100" // LSHL r3, r28, r3; MOV r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3305 "00000001" // /* MW 5 */ + 3306 "00100000" // /* MW 4 */ + 3307 "10111100" // /* MW 3 */ + 3308 "11000111" // /* MW 2 */ + 3309 "11100000" // /* MW 1 */ + 3310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3311 "00000000" // /* MW 1 */ + 3312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3313 "00000000" // /* MW 1 */ + 3314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3315 "00000000" // /* MW 1 */ + 3316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3317 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 311 38 first + 3318 "00011000" // ST.s16 r1, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3319 "00110111" // /* MW 3 */ + 3320 "00011100" // /* MW 2 */ + 3321 "00000100" // /* MW 1 */ + 3322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3323 "00000000" // /* MW 1 */ + 3324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3325 "00000000" // /* MW 1 */ + 3326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3327 "00000000" // /* MW 1 */ + 3328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3329 "00000000" // /* MW 1 */ + 3330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3331 "00000000" // /* MW 1 */ + 3332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3333 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 312 39 first + 3334 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3335 "01010111" // /* MW 3 */ + 3336 "00011100" // /* MW 2 */ + 3337 "00000100" // /* MW 1 */ + 3338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3339 "00000000" // /* MW 1 */ + 3340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3341 "00000000" // /* MW 1 */ + 3342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3343 "00000000" // /* MW 1 */ + 3344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3345 "00000000" // /* MW 1 */ + 3346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3347 "00000000" // /* MW 1 */ + 3348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3349 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 313 38 first + 3350 "10011000" // ST dj0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3351 "01000001" // /* MW 3 */ + 3352 "00011100" // /* MW 2 */ + 3353 "00001100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 315 40 first + 3354 "00011000" // ST.s16 r24, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3355 "00010111" // /* MW 3 */ + 3356 "00001011" // /* MW 2 */ + 3357 "00000100" // /* MW 1 */ + 3358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3359 "00000000" // /* MW 1 */ + 3360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3361 "00000000" // /* MW 1 */ + 3362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3363 "00000000" // /* MW 1 */ + 3364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3365 "00000000" // /* MW 1 */ + 3366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3367 "00000000" // /* MW 1 */ + 3368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3369 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 316 38 first + 3370 "10011000" // ST r26, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3371 "01010001" // /* MW 3 */ + 3372 "00000111" // /* MW 2 */ + 3373 "00001100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 317 38 first + 3374 "10011000" // ST r3, [p4, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3375 "01110001" // /* MW 3 */ + 3376 "00010100" // /* MW 2 */ + 3377 "00001100" // /* MW 1 */ + 3378 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */ + 3379 "00000000" // /* MW 5 */ + 3380 "00000000" // /* MW 4 */ + 3381 "11101000" // /* MW 3 */ + 3382 "00000110" // /* MW 2 */ + 3383 "00000000" // /* MW 1 */ +.delay_slot + 3384 "11111000" // MOV r30, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3385 "10100000" // /* MW 3 */ + 3386 "10011111" // /* MW 2 */ + 3387 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3389 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3391 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3393 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3394 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 3395 "00011100" // /* MW 13 */ + 3396 "00000000" // /* MW 12 */ + 3397 "00000000" // /* MW 11 */ + 3398 "01010111" // /* MW 10 */ + 3399 "00011010" // /* MW 9 */ + 3400 "01000000" // /* MW 8 */ + 3401 "00000000" // /* MW 7 */ + 3402 "00000000" // /* MW 6 */ + 3403 "10110110" // /* MW 5 */ + 3404 "00000010" // /* MW 4 */ + 3405 "11110000" // /* MW 3 */ + 3406 "00101100" // /* MW 2 */ + 3407 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_832 +.src_ref 2 "reduce_base_c8.h" 298 44 first +.src_ref 2 "reduce_base_c8.h" 301 40 +.src_ref 2 "reduce_base_c8.h" 301 40 first + 3408 "10111010" // ST.s16 r1, [p4], #2; MSC r2, r2, r3, r22; MOV r2, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3409 "01011000" // /* MW 9 */ + 3410 "00010000" // /* MW 8 */ + 3411 "01001000" // /* MW 7 */ + 3412 "01110000" // /* MW 6 */ + 3413 "00101011" // /* MW 5 */ + 3414 "00000110" // /* MW 4 */ + 3415 "11100000" // /* MW 3 */ + 3416 "10000110" // /* MW 2 */ + 3417 "10000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 302 76 +.src_ref 2 "reduce_base_c8.h" 303 40 +.src_ref 2 "reduce_base_c8.h" 306 62 + 3418 "10111010" // MOVA m0, #-20; MOVX r4, #-3; MOV r6, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3419 "01111000" // /* MW 9 */ + 3420 "00001110" // /* MW 8 */ + 3421 "11010000" // /* MW 7 */ + 3422 "10101000" // /* MW 6 */ + 3423 "01000111" // /* MW 5 */ + 3424 "00111110" // /* MW 4 */ + 3425 "10000000" // /* MW 3 */ + 3426 "10000000" // /* MW 2 */ + 3427 "11111101" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 302 76 first + 3428 "10011000" // LSHL r4, r3, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3429 "01001101" // /* MW 3 */ + 3430 "11001000" // /* MW 2 */ + 3431 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 302 41 +.src_ref 2 "reduce_base_c8.h" 306 62 first + 3432 "00100100" // MUL r30, r30, r6; ADD.NC r3, r4, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3433 "11111111" // /* MW 5 */ + 3434 "10100100" // /* MW 4 */ + 3435 "11110001" // /* MW 3 */ + 3436 "10001101" // /* MW 2 */ + 3437 "11110111" // /* MW 1 */ + 3438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3439 "00000000" // /* MW 1 */ + 3440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3441 "00000000" // /* MW 1 */ + 3442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3443 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 299 38 first + 3444 "00011000" // ST.s16 r27, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3445 "01110111" // /* MW 3 */ + 3446 "00011111" // /* MW 2 */ + 3447 "00000100" // /* MW 1 */ + 3448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3449 "00000000" // /* MW 1 */ + 3450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3451 "00000000" // /* MW 1 */ + 3452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3453 "00000000" // /* MW 1 */ + 3454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3455 "00000000" // /* MW 1 */ + 3456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3457 "00000000" // /* MW 1 */ + 3458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3459 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 300 39 first + 3460 "00011000" // ST.s16 r5, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3461 "10110111" // /* MW 3 */ + 3462 "00011100" // /* MW 2 */ + 3463 "00000100" // /* MW 1 */ + 3464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3465 "00000000" // /* MW 1 */ + 3466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3467 "00000000" // /* MW 1 */ + 3468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3469 "00000000" // /* MW 1 */ + 3470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3471 "00000000" // /* MW 1 */ + 3472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3473 "00000000" // /* MW 1 */ + 3474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3475 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 301 38 first + 3476 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3477 "01010111" // /* MW 3 */ + 3478 "00011100" // /* MW 2 */ + 3479 "00000100" // /* MW 1 */ + 3480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3481 "00000000" // /* MW 1 */ + 3482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3483 "00000000" // /* MW 1 */ + 3484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3485 "00000000" // /* MW 1 */ + 3486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3487 "00000000" // /* MW 1 */ + 3488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3489 "00000000" // /* MW 1 */ + 3490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3491 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 302 39 first + 3492 "00011000" // ST.s16 r3, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3493 "01110111" // /* MW 3 */ + 3494 "00011100" // /* MW 2 */ + 3495 "00000100" // /* MW 1 */ + 3496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3497 "00000000" // /* MW 1 */ + 3498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3499 "00000000" // /* MW 1 */ + 3500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3501 "00000000" // /* MW 1 */ + 3502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3503 "00000000" // /* MW 1 */ + 3504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3505 "00000000" // /* MW 1 */ + 3506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3507 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 303 40 first + 3508 "00011000" // ST.s16 r1, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3509 "00110111" // /* MW 3 */ + 3510 "00001000" // /* MW 2 */ + 3511 "00000100" // /* MW 1 */ + 3512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3513 "00000000" // /* MW 1 */ + 3514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3515 "00000000" // /* MW 1 */ + 3516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3517 "00000000" // /* MW 1 */ + 3518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3519 "00000000" // /* MW 1 */ + 3520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3521 "00000000" // /* MW 1 */ + 3522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3523 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 304 38 first + 3524 "10011000" // ST r17, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3525 "00110001" // /* MW 3 */ + 3526 "00000110" // /* MW 2 */ + 3527 "00001100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 305 38 first + 3528 "00000010" // ST r20, [p4, #4]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3529 "01110000" // /* MW 7 */ + 3530 "10100101" // /* MW 6 */ + 3531 "00000001" // /* MW 5 */ + 3532 "00000000" // /* MW 4 */ + 3533 "00110000" // /* MW 3 */ + 3534 "11010010" // /* MW 2 */ + 3535 "10000010" // /* MW 1 */ +.label __ll42__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv + 3536 "10111000" // MOV dj0, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3537 "01000000" // /* MW 3 */ + 3538 "10000000" // /* MW 2 */ + 3539 "00011000" // /* MW 1 */ + 3540 "00110110" // ST.s16 r30, [p3, dj0]; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3541 "10000001" // /* MW 11 */ + 3542 "10101101" // /* MW 10 */ + 3543 "00000000" // /* MW 9 */ + 3544 "00000000" // /* MW 8 */ + 3545 "00000000" // /* MW 7 */ + 3546 "00000000" // /* MW 6 */ + 3547 "00100000" // /* MW 5 */ + 3548 "00000000" // /* MW 4 */ + 3549 "11100000" // /* MW 3 */ + 3550 "01111010" // /* MW 2 */ + 3551 "01100000" // /* MW 1 */ +.label __ll70__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv +.src_ref 2 "reduce_base_c8.h" 326 79 first +.src_ref 2 "reduce_base_c8.h" 329 51 + 3552 "00010100" // MOVA m2, #24; ADD.NC p0, r0, #30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3553 "00011110" // /* MW 5 */ + 3554 "11000000" // /* MW 4 */ + 3555 "10000000" // /* MW 3 */ + 3556 "00001000" // /* MW 2 */ + 3557 "00000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 329 30 +.src_ref 2 "reduce_base_c8.h" 330 26 +.src_ref 3 "reduce_mean_c8_impl.h" 139 51 first + 3558 "10111010" // LDA r2, [p2], #4; MOVX r0, #16; MOV m0, #-30 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3559 "01011000" // /* MW 9 */ + 3560 "11100010" // /* MW 8 */ + 3561 "00000111" // /* MW 7 */ + 3562 "00001000" // /* MW 6 */ + 3563 "00000010" // /* MW 5 */ + 3564 "00000000" // /* MW 4 */ + 3565 "11010000" // /* MW 3 */ + 3566 "10001010" // /* MW 2 */ + 3567 "01000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 331 24 +.src_ref 3 "reduce_mean_c8_impl.h" 140 34 first + 3568 "01010100" // LDA.s16 r3, [p2]; MOV m1, #38 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3569 "10011001" // /* MW 5 */ + 3570 "00000000" // /* MW 4 */ + 3571 "01010010" // /* MW 3 */ + 3572 "10001110" // /* MW 2 */ + 3573 "01000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 141 49 first + 3574 "10011000" // LDA r1, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3575 "00110110" // /* MW 3 */ + 3576 "00010100" // /* MW 2 */ + 3577 "00000010" // /* MW 1 */ + 3578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3579 "00000000" // /* MW 1 */ + 3580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3581 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 326 28 first + 3582 "00011000" // ST.s16 r31, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3583 "11110111" // /* MW 3 */ + 3584 "00101111" // /* MW 2 */ + 3585 "00000000" // /* MW 1 */ + 3586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3587 "00000000" // /* MW 1 */ + 3588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3589 "00000000" // /* MW 1 */ + 3590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3591 "00000000" // /* MW 1 */ + 3592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3593 "00000000" // /* MW 1 */ + 3594 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3595 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 3596 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3597 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 327 31 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 3598 "00011000" // ST.s16 r24, [p0], #10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3599 "00010111" // /* MW 3 */ + 3600 "01011111" // /* MW 2 */ + 3601 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3603 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3605 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3607 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3608 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3609 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 327 31 +.src_ref 2 "reduce_base_c8.h" 328 23 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3610 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3611 "00000001" // /* MW 3 */ + 3612 "00110000" // /* MW 2 */ + 3613 "00010000" // /* MW 1 */ + 3614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3615 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 328 23 first + 3616 "00011000" // ST.s16 r24, [p0], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3617 "00010111" // /* MW 3 */ + 3618 "11001111" // /* MW 2 */ + 3619 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 329 51 first + 3620 "10011000" // LDA.u16 r4, [p0], m2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3621 "10011010" // /* MW 3 */ + 3622 "01001000" // /* MW 2 */ + 3623 "00000000" // /* MW 1 */ + 3624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3625 "00000000" // /* MW 1 */ + 3626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3627 "00000000" // /* MW 1 */ + 3628 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3629 "00000000" // /* MW 1 */ + 3630 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3631 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 3632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3633 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 329 28 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 3634 "00011000" // ST.s16 r0, [p0], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3635 "00010111" // /* MW 3 */ + 3636 "11111100" // /* MW 2 */ + 3637 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 329 30 +.src_ref 2 "reduce_base_c8.h" 330 28 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3638 "00100100" // LSHL r4, r4, r26; ADD.NC r5, r4, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3639 "11111111" // /* MW 5 */ + 3640 "10100100" // /* MW 4 */ + 3641 "10110010" // /* MW 3 */ + 3642 "00110101" // /* MW 2 */ + 3643 "00100001" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 329 30 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3644 "10011000" // SUB r0, r0, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3645 "01000001" // /* MW 3 */ + 3646 "00000000" // /* MW 2 */ + 3647 "00010000" // /* MW 1 */ + 3648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3649 "00000000" // /* MW 1 */ + 3650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3651 "00000000" // /* MW 1 */ + 3652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3653 "00000000" // /* MW 1 */ + 3654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3655 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 330 26 first + 3656 "00011000" // ST.s16 r5, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3657 "10110111" // /* MW 3 */ + 3658 "00001000" // /* MW 2 */ + 3659 "00000000" // /* MW 1 */ + 3660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3661 "00000000" // /* MW 1 */ + 3662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3663 "00000000" // /* MW 1 */ + 3664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3665 "00000000" // /* MW 1 */ + 3666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3667 "00000000" // /* MW 1 */ + 3668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3669 "00000000" // /* MW 1 */ + 3670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3671 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 331 24 first + 3672 "00011000" // ST.s16 r19, [p0], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3673 "01110111" // /* MW 3 */ + 3674 "00101010" // /* MW 2 */ + 3675 "00000000" // /* MW 1 */ + 3676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3677 "00000000" // /* MW 1 */ + 3678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3679 "00000000" // /* MW 1 */ + 3680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3681 "00000000" // /* MW 1 */ + 3682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3683 "00000000" // /* MW 1 */ + 3684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3685 "00000000" // /* MW 1 */ + 3686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3687 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 139 40 first + 3688 "00011000" // ST.s8 r2, [p0], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3689 "01000111" // /* MW 3 */ + 3690 "11101100" // /* MW 2 */ + 3691 "00000000" // /* MW 1 */ + 3692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3693 "00000000" // /* MW 1 */ + 3694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3695 "00000000" // /* MW 1 */ + 3696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3697 "00000000" // /* MW 1 */ + 3698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3699 "00000000" // /* MW 1 */ + 3700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3701 "00000000" // /* MW 1 */ + 3702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3703 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 140 34 first + 3704 "00011000" // ST.s16 r3, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3705 "01110111" // /* MW 3 */ + 3706 "00000100" // /* MW 2 */ + 3707 "00000000" // /* MW 1 */ + 3708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3709 "00000000" // /* MW 1 */ + 3710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3711 "00000000" // /* MW 1 */ + 3712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3713 "00000000" // /* MW 1 */ + 3714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3715 "00000000" // /* MW 1 */ + 3716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3717 "00000000" // /* MW 1 */ + 3718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3719 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 141 38 first + 3720 "00011000" // ST.s8 r1, [p0, #-2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3721 "00100111" // /* MW 3 */ + 3722 "11100100" // /* MW 2 */ + 3723 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 349 4 first + 3724 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3725 "00000000" // /* MW 3 */ + 3726 "00101000" // /* MW 2 */ + 3727 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 349 4 +.delay_slot + 3728 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3729 "00000001" // /* MW 5 */ + 3730 "00000000" // /* MW 4 */ + 3731 "00000000" // /* MW 3 */ + 3732 "11111000" // /* MW 2 */ + 3733 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3735 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3736 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3737 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3738 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3739 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3740 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3741 "01100111" // /* MW 3 */ + 3742 "00000001" // /* MW 2 */ + 3743 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_1168 +.src_ref 2 "reduce_base_c8.h" 262 44 first +.src_ref 2 "reduce_base_c8.h" 263 77 + 3744 "10111010" // ST.s16 r21, [p4], #2; MOVXM r5, #65512 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3745 "00010000" // /* MW 9 */ + 3746 "11110100" // /* MW 8 */ + 3747 "10101111" // /* MW 7 */ + 3748 "00111100" // /* MW 6 */ + 3749 "00000000" // /* MW 5 */ + 3750 "00000000" // /* MW 4 */ + 3751 "11100000" // /* MW 3 */ + 3752 "11010110" // /* MW 2 */ + 3753 "10000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 263 56 +.src_ref 2 "reduce_base_c8.h" 263 77 first +.src_ref 2 "reduce_base_c8.h" 267 40 + 3754 "10111010" // LDA r2, [sp, #-4]; ADD r7, r5, r26; MOV m0, #-20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3755 "01011000" // /* MW 9 */ + 3756 "11101100" // /* MW 8 */ + 3757 "00000111" // /* MW 7 */ + 3758 "00000100" // /* MW 6 */ + 3759 "01111101" // /* MW 5 */ + 3760 "00001010" // /* MW 4 */ + 3761 "00100000" // /* MW 3 */ + 3762 "10001010" // /* MW 2 */ + 3763 "11111111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 265 118 +.src_ref 2 "reduce_base_c8.h" 329 30 + 3764 "10111010" // MOVA r26, #4; MOVXM r6, #65535 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3765 "10010000" // /* MW 9 */ + 3766 "11111111" // /* MW 8 */ + 3767 "11001111" // /* MW 7 */ + 3768 "00111100" // /* MW 6 */ + 3769 "00000000" // /* MW 5 */ + 3770 "00000000" // /* MW 4 */ + 3771 "00000000" // /* MW 3 */ + 3772 "10011010" // /* MW 2 */ + 3773 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 265 118 first + 3774 "10011000" // ADD r17, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3775 "01100000" // /* MW 3 */ + 3776 "11100010" // /* MW 2 */ + 3777 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 265 98 +.src_ref 2 "reduce_base_c8.h" 267 116 first + 3778 "00011000" // MAC r29, r29, r17, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3779 "01000110" // /* MW 3 */ + 3780 "01111010" // /* MW 2 */ + 3781 "00010100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 265 60 first +.src_ref 2 "reduce_base_c8.h" 265 98 first + 3782 "00011000" // MSC r21, r21, r17, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3783 "01001110" // /* MW 3 */ + 3784 "01101010" // /* MW 2 */ + 3785 "00010100" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 3786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3787 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 263 38 first +.aggressive_scheduled_block_id 4 +.noswbrkpt + 3788 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3789 "01010111" // /* MW 3 */ + 3790 "00011100" // /* MW 2 */ + 3791 "00000100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 263 56 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3792 "10011000" // MUL r2, r7, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3793 "00101111" // /* MW 3 */ + 3794 "11000100" // /* MW 2 */ + 3795 "00010001" // /* MW 1 */ + 3796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3797 "00000000" // /* MW 1 */ + 3798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3799 "00000000" // /* MW 1 */ + 3800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3801 "00000000" // /* MW 1 */ + 3802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3803 "00000000" // /* MW 1 */ + 3804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3805 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 264 39 first + 3806 "00011000" // ST.s16 r22, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3807 "11010111" // /* MW 3 */ + 3808 "00011110" // /* MW 2 */ + 3809 "00000100" // /* MW 1 */ + 3810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3811 "00000000" // /* MW 1 */ + 3812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3813 "00000000" // /* MW 1 */ + 3814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3815 "00000000" // /* MW 1 */ + 3816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3817 "00000000" // /* MW 1 */ + 3818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3819 "00000000" // /* MW 1 */ + 3820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3821 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 265 38 first + 3822 "00011000" // ST.s16 r21, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3823 "10110111" // /* MW 3 */ + 3824 "00011110" // /* MW 2 */ + 3825 "00000100" // /* MW 1 */ + 3826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3827 "00000000" // /* MW 1 */ + 3828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3829 "00000000" // /* MW 1 */ + 3830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3831 "00000000" // /* MW 1 */ + 3832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3833 "00000000" // /* MW 1 */ + 3834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3835 "00000000" // /* MW 1 */ + 3836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3837 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 266 39 first + 3838 "00011000" // ST.s16 r1, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3839 "00110111" // /* MW 3 */ + 3840 "00011100" // /* MW 2 */ + 3841 "00000100" // /* MW 1 */ + 3842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3843 "00000000" // /* MW 1 */ + 3844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3845 "00000000" // /* MW 1 */ + 3846 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3847 "00000000" // /* MW 1 */ + 3848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3849 "00000000" // /* MW 1 */ + 3850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3851 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 3852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3853 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 267 40 first +.aggressive_scheduled_block_id 5 +.noswbrkpt + 3854 "00011000" // ST.s16 r2, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3855 "01010111" // /* MW 3 */ + 3856 "00001000" // /* MW 2 */ + 3857 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3859 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3861 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3862 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */ + 3863 "00000000" // /* MW 5 */ + 3864 "00000000" // /* MW 4 */ + 3865 "11101000" // /* MW 3 */ + 3866 "00000110" // /* MW 2 */ + 3867 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 267 42 +.delay_slot +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3868 "00011000" // MOVX r5, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3869 "01000001" // /* MW 3 */ + 3870 "00001010" // /* MW 2 */ + 3871 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 267 42 +.delay_slot +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3872 "10011000" // SUB r2, r5, r29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3873 "11010001" // /* MW 3 */ + 3874 "01000101" // /* MW 2 */ + 3875 "00010001" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 270 64 +.delay_slot + 3876 "11111000" // MOV r6, eh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3877 "00011100" // /* MW 3 */ + 3878 "10100001" // /* MW 2 */ + 3879 "00011001" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 268 38 first +.delay_slot + 3880 "00000010" // ST r3, [p4]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3881 "01110000" // /* MW 7 */ + 3882 "10100101" // /* MW 6 */ + 3883 "00000001" // /* MW 5 */ + 3884 "00000000" // /* MW 4 */ + 3885 "00110000" // /* MW 3 */ + 3886 "10001110" // /* MW 2 */ + 3887 "10000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 269 38 first +.src_ref 2 "reduce_base_c8.h" 270 64 first +.delay_slot + 3888 "11100001" // NOPA; NOPB; ST r16, [p4, #4]; MUL r30, r30, r6; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3889 "00000000" // /* MW 15 */ + 3890 "00000000" // /* MW 14 */ + 3891 "01111000" // /* MW 13 */ + 3892 "10100101" // /* MW 12 */ + 3893 "00000001" // /* MW 11 */ + 3894 "01111100" // /* MW 10 */ + 3895 "11100011" // /* MW 9 */ + 3896 "10111101" // /* MW 8 */ + 3897 "00010001" // /* MW 7 */ + 3898 "00010110" // /* MW 6 */ + 3899 "00100100" // /* MW 5 */ + 3900 "00000000" // /* MW 4 */ + 3901 "11110000" // /* MW 3 */ + 3902 "00101100" // /* MW 2 */ + 3903 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_1328 +.src_ref 2 "reduce_base_c8.h" 250 44 +.src_ref 2 "reduce_base_c8.h" 250 44 first +.src_ref 2 "reduce_base_c8.h" 255 40 + 3904 "10111010" // ST.s16 r4, [p4], #2; MOVX r4, #16; MOV m0, #-20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3905 "01011000" // /* MW 9 */ + 3906 "11101100" // /* MW 8 */ + 3907 "00000111" // /* MW 7 */ + 3908 "00001000" // /* MW 6 */ + 3909 "01000010" // /* MW 5 */ + 3910 "00000000" // /* MW 4 */ + 3911 "11100000" // /* MW 3 */ + 3912 "10010010" // /* MW 2 */ + 3913 "10000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 255 42 +.src_ref 2 "reduce_base_c8.h" 255 113 +.src_ref 2 "reduce_base_c8.h" 255 113 +.src_ref 2 "reduce_base_c8.h" 255 113 first + 3914 "10111010" // LDA r1, [sp, #-4]; MSC r2, r2, r3, r26; MOV r2, #8 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3915 "01011000" // /* MW 9 */ + 3916 "00001000" // /* MW 8 */ + 3917 "01001000" // /* MW 7 */ + 3918 "01110000" // /* MW 6 */ + 3919 "00101101" // /* MW 5 */ + 3920 "00000110" // /* MW 4 */ + 3921 "00100000" // /* MW 3 */ + 3922 "10000110" // /* MW 2 */ + 3923 "11111111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 255 42 +.src_ref 2 "reduce_base_c8.h" 255 113 +.src_ref 2 "reduce_base_c8.h" 329 30 + 3924 "01100100" // MOVX r3, #16; MOV r26, #4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3925 "00010001" // /* MW 5 */ + 3926 "00100000" // /* MW 4 */ + 3927 "00101101" // /* MW 3 */ + 3928 "11001000" // /* MW 2 */ + 3929 "00000000" // /* MW 1 */ + 3930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3931 "00000000" // /* MW 1 */ + 3932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3933 "00000000" // /* MW 1 */ + 3934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3935 "00000000" // /* MW 1 */ + 3936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3937 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 251 38 first + 3938 "00011000" // ST.s16 r27, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3939 "01110111" // /* MW 3 */ + 3940 "00011111" // /* MW 2 */ + 3941 "00000100" // /* MW 1 */ + 3942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3943 "00000000" // /* MW 1 */ + 3944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3945 "00000000" // /* MW 1 */ + 3946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3947 "00000000" // /* MW 1 */ + 3948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3949 "00000000" // /* MW 1 */ + 3950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3951 "00000000" // /* MW 1 */ + 3952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3953 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 252 39 first + 3954 "00011000" // ST.s16 r5, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3955 "10110111" // /* MW 3 */ + 3956 "00011100" // /* MW 2 */ + 3957 "00000100" // /* MW 1 */ + 3958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3959 "00000000" // /* MW 1 */ + 3960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3961 "00000000" // /* MW 1 */ + 3962 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3963 "00000000" // /* MW 1 */ + 3964 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3965 "00000000" // /* MW 1 */ + 3966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3967 "00000000" // /* MW 1 */ + 3968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3969 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 253 38 first + 3970 "00011000" // ST.s16 r27, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3971 "01110111" // /* MW 3 */ + 3972 "00011111" // /* MW 2 */ + 3973 "00000100" // /* MW 1 */ + 3974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3975 "00000000" // /* MW 1 */ + 3976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3977 "00000000" // /* MW 1 */ + 3978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3979 "00000000" // /* MW 1 */ + 3980 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3981 "00000000" // /* MW 1 */ + 3982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3983 "00000000" // /* MW 1 */ + 3984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3985 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 254 39 first + 3986 "00011000" // ST.s16 r22, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3987 "11010111" // /* MW 3 */ + 3988 "00011110" // /* MW 2 */ + 3989 "00000100" // /* MW 1 */ + 3990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3991 "00000000" // /* MW 1 */ + 3992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3993 "00000000" // /* MW 1 */ + 3994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3995 "00000000" // /* MW 1 */ + 3996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3997 "00000000" // /* MW 1 */ + 3998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3999 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 4000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4001 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 255 40 first +.aggressive_scheduled_block_id 6 +.noswbrkpt + 4002 "00011000" // ST.s16 r3, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4003 "01110111" // /* MW 3 */ + 4004 "00001000" // /* MW 2 */ + 4005 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 4006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4007 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 4008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4009 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 4010 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */ + 4011 "00000000" // /* MW 5 */ + 4012 "00000000" // /* MW 4 */ + 4013 "11101000" // /* MW 3 */ + 4014 "00000110" // /* MW 2 */ + 4015 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 255 42 +.src_ref 2 "reduce_base_c8.h" 255 113 +.delay_slot +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4016 "00011000" // MAC r3, r3, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4017 "00100110" // /* MW 3 */ + 4018 "01000110" // /* MW 2 */ + 4019 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4021 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4023 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 256 38 first +.delay_slot + 4024 "10011000" // ST r6, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4025 "11010001" // /* MW 3 */ + 4026 "00000100" // /* MW 2 */ + 4027 "00001100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 257 38 first +.delay_slot + 4028 "10011000" // ST r18, [p4, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4029 "01010001" // /* MW 3 */ + 4030 "00010110" // /* MW 2 */ + 4031 "00001100" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_1456 +.src_ref 2 "reduce_base_c8.h" 238 44 first + 4032 "00011000" // ST.s16 r21, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4033 "10110111" // /* MW 3 */ + 4034 "00011110" // /* MW 2 */ + 4035 "00000100" // /* MW 1 */ + 4036 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4037 "00000000" // /* MW 1 */ + 4038 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4039 "00000000" // /* MW 1 */ + 4040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4041 "00000000" // /* MW 1 */ + 4042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4043 "00000000" // /* MW 1 */ + 4044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4045 "00000000" // /* MW 1 */ + 4046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4047 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 239 38 first + 4048 "00011000" // ST.s16 r7, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4049 "11110111" // /* MW 3 */ + 4050 "00011100" // /* MW 2 */ + 4051 "00000100" // /* MW 1 */ + 4052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4053 "00000000" // /* MW 1 */ + 4054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4055 "00000000" // /* MW 1 */ + 4056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4057 "00000000" // /* MW 1 */ + 4058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4059 "00000000" // /* MW 1 */ + 4060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4061 "00000000" // /* MW 1 */ + 4062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4063 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 240 39 first + 4064 "00011000" // ST.s16 r23, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4065 "11110111" // /* MW 3 */ + 4066 "00011110" // /* MW 2 */ + 4067 "00000100" // /* MW 1 */ + 4068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4069 "00000000" // /* MW 1 */ + 4070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4071 "00000000" // /* MW 1 */ + 4072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4073 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 241 40 +.src_ref 2 "reduce_base_c8.h" 241 94 + 4074 "00011000" // LDA r3, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4075 "01110001" // /* MW 3 */ + 4076 "11111100" // /* MW 2 */ + 4077 "00000111" // /* MW 1 */ + 4078 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4079 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 4080 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4081 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 241 38 first +.aggressive_scheduled_block_id 7 +.noswbrkpt + 4082 "00011000" // ST.s16 r1, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4083 "00110111" // /* MW 3 */ + 4084 "00011100" // /* MW 2 */ + 4085 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 4086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4087 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 241 94 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 4088 "01000100" // MOVXM r1, #65504 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4089 "11000000" // /* MW 5 */ + 4090 "10111111" // /* MW 4 */ + 4091 "11110000" // /* MW 3 */ + 4092 "00000000" // /* MW 2 */ + 4093 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 241 94 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 4094 "10011000" // ADD r2, r1, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4095 "10100000" // /* MW 3 */ + 4096 "01000101" // /* MW 2 */ + 4097 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 241 40 +.src_ref 2 "reduce_base_c8.h" 241 40 +.src_ref 2 "reduce_base_c8.h" 241 94 +.src_ref 2 "reduce_base_c8.h" 241 94 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4098 "01100100" // MAC r1, r1, r3, r2; MOV r1, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4099 "01000001" // /* MW 5 */ + 4100 "10100000" // /* MW 4 */ + 4101 "11000000" // /* MW 3 */ + 4102 "01000100" // /* MW 2 */ + 4103 "00011000" // /* MW 1 */ + 4104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4105 "00000000" // /* MW 1 */ + 4106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4107 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 242 39 first + 4108 "00011000" // ST.s16 r22, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4109 "11010111" // /* MW 3 */ + 4110 "00011110" // /* MW 2 */ + 4111 "00000100" // /* MW 1 */ + 4112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4113 "00000000" // /* MW 1 */ + 4114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4115 "00000000" // /* MW 1 */ + 4116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4117 "00000000" // /* MW 1 */ + 4118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4119 "00000000" // /* MW 1 */ + 4120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4121 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 243 40 +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 4122 "10111000" // MOV m0, #-20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4123 "11011000" // /* MW 3 */ + 4124 "00001111" // /* MW 2 */ + 4125 "00011000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 243 40 first +.aggressive_scheduled_block_id 8 +.noswbrkpt + 4126 "00011000" // ST.s16 r5, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4127 "10110111" // /* MW 3 */ + 4128 "00001000" // /* MW 2 */ + 4129 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 4130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4131 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 4132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4133 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 4134 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */ + 4135 "00000000" // /* MW 5 */ + 4136 "00000000" // /* MW 4 */ + 4137 "11101000" // /* MW 3 */ + 4138 "00000110" // /* MW 2 */ + 4139 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 243 42 +.src_ref 2 "reduce_base_c8.h" 243 42 +.src_ref 2 "reduce_base_c8.h" 243 91 +.src_ref 2 "reduce_base_c8.h" 243 91 +.delay_slot +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4140 "01100100" // MSC r5, r5, r22, r4; MOV r5, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4141 "01000001" // /* MW 5 */ + 4142 "10100000" // /* MW 4 */ + 4143 "11000010" // /* MW 3 */ + 4144 "01001001" // /* MW 2 */ + 4145 "10110001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4147 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4149 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 244 38 first +.delay_slot + 4150 "10011000" // ST r20, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4151 "10010001" // /* MW 3 */ + 4152 "00000110" // /* MW 2 */ + 4153 "00001100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 245 38 first +.src_ref 2 "reduce_base_c8.h" 329 30 +.delay_slot + 4154 "00111010" // ST r17, [p4, #4]; MOVX r26, #4; MOV r30, eh0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4155 "01111001" // /* MW 9 */ + 4156 "10001110" // /* MW 8 */ + 4157 "11010000" // /* MW 7 */ + 4158 "10001011" // /* MW 6 */ + 4159 "10100000" // /* MW 5 */ + 4160 "00000001" // /* MW 4 */ + 4161 "00110000" // /* MW 3 */ + 4162 "11000110" // /* MW 2 */ +.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv__end +.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv___func_end0 + 4163 "10000010" // /* MW 1 */ +.label __Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t___func_begin0 +.label _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t +.function pad_3d<(pad_3d_mode)0, bfloat16, 1> _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t +.src_ref 3 "pad_3d.h" 266 first +.src_ref 3 "pad_3d.h" 465 37 first +.src_ref 3 "pad_3d.h" 468 21 first +.src_ref 3 "pad_3d.h" 471 29 +.src_ref 3 "pad_3d.h" 479 21 +.function_start + 4176 "10111010" // LDA r0, [p2, #4]; MOVX r4, #-2; MOV m1, #-24 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4177 "01011000" // /* MW 9 */ + 4178 "11101000" // /* MW 8 */ + 4179 "10000111" // /* MW 7 */ + 4180 "11001000" // /* MW 6 */ + 4181 "01000111" // /* MW 5 */ + 4182 "00111110" // /* MW 4 */ + 4183 "11010000" // /* MW 3 */ + 4184 "10000010" // /* MW 2 */ + 4185 "01000010" // /* MW 1 */ +.src_ref 4 "array_helpers.hpp" 950 13 +.src_ref 3 "pad_3d.h" 469 21 first +.src_ref 3 "pad_3d.h" 478 21 +.src_ref 3 "pad_3d.h" 499 52 +.src_ref 3 "pad_3d.h" 511 25 + 4186 "10111010" // LDA r1, [p2], #8; MOVX r2, #-3; MOV r16, #6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4187 "01011000" // /* MW 9 */ + 4188 "00000110" // /* MW 8 */ + 4189 "00001000" // /* MW 7 */ + 4190 "10101010" // /* MW 6 */ + 4191 "00100111" // /* MW 5 */ + 4192 "00111110" // /* MW 4 */ + 4193 "11010000" // /* MW 3 */ + 4194 "10000110" // /* MW 2 */ + 4195 "01000101" // /* MW 1 */ +.src_ref 4 "array_helpers.hpp" 950 13 +.src_ref 3 "pad_3d.h" 470 21 first +.src_ref 3 "pad_3d.h" 486 26 +.src_ref 3 "pad_3d.h" 498 10 +.src_ref 3 "pad_3d.h" 499 26 +.src_ref 3 "pad_3d.h" 509 21 +.src_ref 3 "pad_3d.h" 517 22 + 4196 "10111010" // LDA r5, [p2], #28; MOVX r24, #0; MOV r3, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4197 "01111000" // /* MW 9 */ + 4198 "01100000" // /* MW 8 */ + 4199 "01101000" // /* MW 7 */ + 4200 "00001000" // /* MW 6 */ + 4201 "10000000" // /* MW 5 */ + 4202 "00000001" // /* MW 4 */ + 4203 "11010000" // /* MW 3 */ + 4204 "10010110" // /* MW 2 */ + 4205 "01001111" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 471 29 first + 4206 "10011000" // LDA.s16 r18, [p2], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4207 "01010010" // /* MW 3 */ + 4208 "00101010" // /* MW 2 */ + 4209 "00000010" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 472 25 first + 4210 "10011000" // LDA r6, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4211 "11010110" // /* MW 3 */ + 4212 "00011100" // /* MW 2 */ + 4213 "00000010" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 473 26 first + 4214 "10011000" // LDA r7, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4215 "11110110" // /* MW 3 */ + 4216 "00101100" // /* MW 2 */ + 4217 "00000010" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 475 24 first + 4218 "10011000" // LDA r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4219 "00110110" // /* MW 3 */ + 4220 "00000110" // /* MW 2 */ + 4221 "00000010" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 479 21 first + 4222 "10011000" // ASHL r19, r0, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4223 "01001110" // /* MW 3 */ + 4224 "00100110" // /* MW 2 */ + 4225 "00010000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 477 23 first + 4226 "10011000" // LDA r4, [p2, #8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4227 "10010110" // /* MW 3 */ + 4228 "00100100" // /* MW 2 */ + 4229 "00000010" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 478 21 first + 4230 "10011000" // ASHL r20, r5, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4231 "00101110" // /* MW 3 */ + 4232 "01101000" // /* MW 2 */ + 4233 "00010001" // /* MW 1 */ +.src_ref 5 "broadcast.hpp" 56 25 first + 4234 "11111000" // VBCST.16 x0, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4235 "01110010" // /* MW 3 */ + 4236 "01001001" // /* MW 2 */ + 4237 "00011000" // /* MW 1 */ + 4238 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4239 "00000000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 485 45 first + 4240 "10011000" // MUL r18, r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4241 "01001111" // /* MW 3 */ + 4242 "11100101" // /* MW 2 */ + 4243 "00010100" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 485 34 + 4244 "10011000" // SUB r19, r1, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4245 "00010001" // /* MW 3 */ + 4246 "01100111" // /* MW 2 */ + 4247 "00010000" // /* MW 1 */ +.src_ref 4 "array_helpers.hpp" 998 25 first + 4248 "10011000" // MUL r19, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4249 "00101111" // /* MW 3 */ + 4250 "11100111" // /* MW 2 */ + 4251 "00010100" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 486 43 first + 4252 "10011000" // MUL r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4253 "00101111" // /* MW 3 */ + 4254 "01100011" // /* MW 2 */ + 4255 "00010100" // /* MW 1 */ +.src_ref 4 "array_helpers.hpp" 950 13 first + 4256 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4257 "00001101" // /* MW 3 */ + 4258 "11100001" // /* MW 2 */ + 4259 "00010100" // /* MW 1 */ +.src_ref 4 "array_helpers.hpp" 950 13 +.src_ref 3 "pad_3d.h" 486 26 first + 4260 "10100100" // GE r16, r24, r17; ADD.NC p2, r3, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4261 "10000010" // /* MW 5 */ + 4262 "11000011" // /* MW 4 */ + 4263 "00110100" // /* MW 3 */ + 4264 "00100011" // /* MW 2 */ + 4265 "11000100" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 486 4 + 4266 "10000100" // JNZ r16, #4416 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4416 delay_slots=5 */ + 4267 "00000001" // /* MW 5 */ + 4268 "01000000" // /* MW 4 */ + 4269 "10100000" // /* MW 3 */ + 4270 "00001000" // /* MW 2 */ + 4271 "10000000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 487 22 +.delay_slot + 4272 "11111000" // VMOV bmll0, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4273 "10010010" // /* MW 3 */ + 4274 "00000000" // /* MW 2 */ + 4275 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4277 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4279 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4281 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4283 "00000000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 486 4 + 4284 "01000100" // MOVXM ls, #4400 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4285 "01100000" // /* MW 5 */ + 4286 "11100010" // /* MW 4 */ + 4287 "00010001" // /* MW 3 */ + 4288 "00000000" // /* MW 2 */ + 4289 "00000000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 486 4 + 4290 "01000100" // MOVXM le, #4400 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4291 "01100000" // /* MW 5 */ + 4292 "11100010" // /* MW 4 */ + 4293 "00010110" // /* MW 3 */ + 4294 "00000000" // /* MW 2 */ + 4295 "00000000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 486 4 + 4296 "00000010" // NOPS; MOV lc, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4297 "01110000" // /* MW 7 */ + 4298 "01010000" // /* MW 6 */ + 4299 "10111100" // /* MW 5 */ + 4300 "00000010" // /* MW 4 */ + 4301 "01100000" // /* MW 3 */ + 4302 "00101011" // /* MW 2 */ + 4303 "00000000" // /* MW 1 */ + 4304 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4305 "00000000" // /* MW 15 */ + 4306 "00000000" // /* MW 14 */ + 4307 "01111000" // /* MW 13 */ + 4308 "10100101" // /* MW 12 */ + 4309 "00000001" // /* MW 11 */ + 4310 "00000000" // /* MW 10 */ + 4311 "00000000" // /* MW 9 */ + 4312 "00000000" // /* MW 8 */ + 4313 "01011011" // /* MW 7 */ + 4314 "00000001" // /* MW 6 */ + 4315 "00100000" // /* MW 5 */ + 4316 "00000000" // /* MW 4 */ + 4317 "11110000" // /* MW 3 */ + 4318 "00101100" // /* MW 2 */ + 4319 "00000000" // /* MW 1 */ + 4320 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4321 "00000000" // /* MW 15 */ + 4322 "00000000" // /* MW 14 */ + 4323 "01111000" // /* MW 13 */ + 4324 "10100101" // /* MW 12 */ + 4325 "00000001" // /* MW 11 */ + 4326 "00000000" // /* MW 10 */ + 4327 "00000000" // /* MW 9 */ + 4328 "00000000" // /* MW 8 */ + 4329 "01011011" // /* MW 7 */ + 4330 "00000001" // /* MW 6 */ + 4331 "00100000" // /* MW 5 */ + 4332 "00000000" // /* MW 4 */ + 4333 "11110000" // /* MW 3 */ + 4334 "00101100" // /* MW 2 */ + 4335 "00000000" // /* MW 1 */ + 4336 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4337 "00000000" // /* MW 15 */ + 4338 "00000000" // /* MW 14 */ + 4339 "01111000" // /* MW 13 */ + 4340 "10100101" // /* MW 12 */ + 4341 "00000001" // /* MW 11 */ + 4342 "00000000" // /* MW 10 */ + 4343 "00000000" // /* MW 9 */ + 4344 "00000000" // /* MW 8 */ + 4345 "01011011" // /* MW 7 */ + 4346 "00000001" // /* MW 6 */ + 4347 "00100000" // /* MW 5 */ + 4348 "00000000" // /* MW 4 */ + 4349 "11110000" // /* MW 3 */ + 4350 "00101100" // /* MW 2 */ + 4351 "00000000" // /* MW 1 */ + 4352 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4353 "00000000" // /* MW 15 */ + 4354 "00000000" // /* MW 14 */ + 4355 "01111000" // /* MW 13 */ + 4356 "10100101" // /* MW 12 */ + 4357 "00000001" // /* MW 11 */ + 4358 "00000000" // /* MW 10 */ + 4359 "00000000" // /* MW 9 */ + 4360 "00000000" // /* MW 8 */ + 4361 "01011011" // /* MW 7 */ + 4362 "00000001" // /* MW 6 */ + 4363 "00100000" // /* MW 5 */ + 4364 "00000000" // /* MW 4 */ + 4365 "11110000" // /* MW 3 */ + 4366 "00101100" // /* MW 2 */ + 4367 "00000000" // /* MW 1 */ + 4368 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4369 "00000000" // /* MW 15 */ + 4370 "00000000" // /* MW 14 */ + 4371 "01111000" // /* MW 13 */ + 4372 "10100101" // /* MW 12 */ + 4373 "00000001" // /* MW 11 */ + 4374 "00000000" // /* MW 10 */ + 4375 "00000000" // /* MW 9 */ + 4376 "00000000" // /* MW 8 */ + 4377 "01011011" // /* MW 7 */ + 4378 "00000001" // /* MW 6 */ + 4379 "00100000" // /* MW 5 */ + 4380 "00000000" // /* MW 4 */ + 4381 "11110000" // /* MW 3 */ + 4382 "00101100" // /* MW 2 */ + 4383 "00000000" // /* MW 1 */ + 4384 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4385 "00000000" // /* MW 15 */ + 4386 "00000000" // /* MW 14 */ + 4387 "01111000" // /* MW 13 */ + 4388 "10100101" // /* MW 12 */ + 4389 "00000001" // /* MW 11 */ + 4390 "00000000" // /* MW 10 */ + 4391 "00000000" // /* MW 9 */ + 4392 "00000000" // /* MW 8 */ + 4393 "01011011" // /* MW 7 */ + 4394 "00000001" // /* MW 6 */ + 4395 "00100000" // /* MW 5 */ + 4396 "00000000" // /* MW 4 */ + 4397 "11110000" // /* MW 3 */ + 4398 "00101100" // /* MW 2 */ + 4399 "00000000" // /* MW 1 */ +.label ZLS_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_224 +.src_ref 3 "pad_3d.h" 487 22 first +.begin_of_loop +.end_of_loop +.loop_nesting 1 + 4400 "11100001" // NOPA; NOPB; VST bmll0, [p2], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4401 "00000000" // /* MW 15 */ + 4402 "00000000" // /* MW 14 */ + 4403 "01111000" // /* MW 13 */ + 4404 "10100101" // /* MW 12 */ + 4405 "00000001" // /* MW 11 */ + 4406 "00000000" // /* MW 10 */ + 4407 "00000000" // /* MW 9 */ + 4408 "10000000" // /* MW 8 */ + 4409 "00000110" // /* MW 7 */ + 4410 "00011100" // /* MW 6 */ + 4411 "00100010" // /* MW 5 */ + 4412 "00000000" // /* MW 4 */ + 4413 "11110000" // /* MW 3 */ + 4414 "00101100" // /* MW 2 */ + 4415 "00000000" // /* MW 1 */ +.label TGT_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_240 +.src_ref 3 "pad_3d.h" 495 21 +.src_ref 3 "pad_3d.h" 495 40 first +.src_ref 3 "pad_3d.h" 498 10 +.src_ref 3 "pad_3d.h" 499 38 first +.loop_nesting 0 + 4416 "10111010" // MOVA r6, #4; MUL r16, r5, r1; ADD.NC r17, r7, r6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4417 "10101000" // /* MW 9 */ + 4418 "11001100" // /* MW 8 */ + 4419 "00101001" // /* MW 7 */ + 4420 "11111110" // /* MW 6 */ + 4421 "00000000" // /* MW 5 */ + 4422 "00001011" // /* MW 4 */ + 4423 "00000000" // /* MW 3 */ + 4424 "10000110" // /* MW 2 */ + 4425 "00000000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 495 40 +.src_ref 3 "pad_3d.h" 496 29 first + 4426 "00100100" // SUB r17, r0, r17; ADD.NC dn1, r7, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4427 "11111111" // /* MW 5 */ + 4428 "10000111" // /* MW 4 */ + 4429 "00110010" // /* MW 3 */ + 4430 "01100010" // /* MW 2 */ + 4431 "00000100" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 495 21 first + 4432 "10011000" // LSHL r17, r17, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4433 "01101101" // /* MW 3 */ + 4434 "01100010" // /* MW 2 */ + 4435 "00010100" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 495 58 +.src_ref 3 "pad_3d.h" 498 23 first + 4436 "00100100" // SUB r17, r0, r7; ADD.NC m1, r17, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4437 "00010000" // /* MW 5 */ + 4438 "00010001" // /* MW 4 */ + 4439 "00110010" // /* MW 3 */ + 4440 "01001110" // /* MW 2 */ + 4441 "00000100" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 499 45 first + 4442 "10011000" // MUL r16, r7, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4443 "00001111" // /* MW 3 */ + 4444 "11100001" // /* MW 2 */ + 4445 "00010001" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 498 10 first + 4446 "10011000" // LSHL r6, r17, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4447 "01101101" // /* MW 3 */ + 4448 "01001100" // /* MW 2 */ + 4449 "00010100" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 498 10 +.src_ref 3 "pad_3d.h" 499 52 first + 4450 "10100100" // ASHL r6, r16, r2; ADD.NC p2, r3, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4451 "00110010" // /* MW 5 */ + 4452 "11000011" // /* MW 4 */ + 4453 "11010100" // /* MW 3 */ + 4454 "10000101" // /* MW 2 */ + 4455 "10000001" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 499 26 + 4456 "10011000" // GE r7, r24, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4457 "01101001" // /* MW 3 */ + 4458 "00001110" // /* MW 2 */ + 4459 "00010110" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 499 4 + 4460 "10000100" // JNZ r7, #4624 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4624 delay_slots=5 */ + 4461 "00000001" // /* MW 5 */ + 4462 "01000000" // /* MW 4 */ + 4463 "00001000" // /* MW 3 */ + 4464 "00001001" // /* MW 2 */ + 4465 "00111000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4467 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4469 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4471 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4473 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4475 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 3 "pad_3d.h" 499 4 + 4476 "10111010" // MOVA dc1, #0; MOVXM ls, #4608 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4477 "00010000" // /* MW 9 */ + 4478 "00000000" // /* MW 8 */ + 4479 "01111001" // /* MW 7 */ + 4480 "00000100" // /* MW 6 */ + 4481 "00000000" // /* MW 5 */ + 4482 "00000000" // /* MW 4 */ + 4483 "10000000" // /* MW 3 */ + 4484 "00000111" // /* MW 2 */ + 4485 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 3 "pad_3d.h" 499 4 + 4486 "10111010" // MOVA dj1, #16; MOVXM le, #4608 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4487 "00010000" // /* MW 9 */ + 4488 "00000000" // /* MW 8 */ + 4489 "10111001" // /* MW 7 */ + 4490 "00000101" // /* MW 6 */ + 4491 "00000000" // /* MW 5 */ + 4492 "00000000" // /* MW 4 */ + 4493 "10000000" // /* MW 3 */ + 4494 "00000110" // /* MW 2 */ + 4495 "00000010" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 499 4 + 4496 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV lc, r6; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4497 "00000000" // /* MW 15 */ + 4498 "00000000" // /* MW 14 */ + 4499 "01111000" // /* MW 13 */ + 4500 "10010000" // /* MW 12 */ + 4501 "10111001" // /* MW 11 */ + 4502 "00000010" // /* MW 10 */ + 4503 "00000000" // /* MW 9 */ + 4504 "00000000" // /* MW 8 */ + 4505 "01011011" // /* MW 7 */ + 4506 "00000001" // /* MW 6 */ + 4507 "00100000" // /* MW 5 */ + 4508 "00000000" // /* MW 4 */ + 4509 "11110000" // /* MW 3 */ + 4510 "00101100" // /* MW 2 */ + 4511 "00000000" // /* MW 1 */ + 4512 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4513 "00000000" // /* MW 15 */ + 4514 "00000000" // /* MW 14 */ + 4515 "01111000" // /* MW 13 */ + 4516 "10100101" // /* MW 12 */ + 4517 "00000001" // /* MW 11 */ + 4518 "00000000" // /* MW 10 */ + 4519 "00000000" // /* MW 9 */ + 4520 "00000000" // /* MW 8 */ + 4521 "01011011" // /* MW 7 */ + 4522 "00000001" // /* MW 6 */ + 4523 "00100000" // /* MW 5 */ + 4524 "00000000" // /* MW 4 */ + 4525 "11110000" // /* MW 3 */ + 4526 "00101100" // /* MW 2 */ + 4527 "00000000" // /* MW 1 */ + 4528 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4529 "00000000" // /* MW 15 */ + 4530 "00000000" // /* MW 14 */ + 4531 "01111000" // /* MW 13 */ + 4532 "10100101" // /* MW 12 */ + 4533 "00000001" // /* MW 11 */ + 4534 "00000000" // /* MW 10 */ + 4535 "00000000" // /* MW 9 */ + 4536 "00000000" // /* MW 8 */ + 4537 "01011011" // /* MW 7 */ + 4538 "00000001" // /* MW 6 */ + 4539 "00100000" // /* MW 5 */ + 4540 "00000000" // /* MW 4 */ + 4541 "11110000" // /* MW 3 */ + 4542 "00101100" // /* MW 2 */ + 4543 "00000000" // /* MW 1 */ + 4544 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4545 "00000000" // /* MW 15 */ + 4546 "00000000" // /* MW 14 */ + 4547 "01111000" // /* MW 13 */ + 4548 "10100101" // /* MW 12 */ + 4549 "00000001" // /* MW 11 */ + 4550 "00000000" // /* MW 10 */ + 4551 "00000000" // /* MW 9 */ + 4552 "00000000" // /* MW 8 */ + 4553 "01011011" // /* MW 7 */ + 4554 "00000001" // /* MW 6 */ + 4555 "00100000" // /* MW 5 */ + 4556 "00000000" // /* MW 4 */ + 4557 "11110000" // /* MW 3 */ + 4558 "00101100" // /* MW 2 */ + 4559 "00000000" // /* MW 1 */ + 4560 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4561 "00000000" // /* MW 15 */ + 4562 "00000000" // /* MW 14 */ + 4563 "01111000" // /* MW 13 */ + 4564 "10100101" // /* MW 12 */ + 4565 "00000001" // /* MW 11 */ + 4566 "00000000" // /* MW 10 */ + 4567 "00000000" // /* MW 9 */ + 4568 "00000000" // /* MW 8 */ + 4569 "01011011" // /* MW 7 */ + 4570 "00000001" // /* MW 6 */ + 4571 "00100000" // /* MW 5 */ + 4572 "00000000" // /* MW 4 */ + 4573 "11110000" // /* MW 3 */ + 4574 "00101100" // /* MW 2 */ + 4575 "00000000" // /* MW 1 */ + 4576 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4577 "00000000" // /* MW 15 */ + 4578 "00000000" // /* MW 14 */ + 4579 "01111000" // /* MW 13 */ + 4580 "10100101" // /* MW 12 */ + 4581 "00000001" // /* MW 11 */ + 4582 "00000000" // /* MW 10 */ + 4583 "00000000" // /* MW 9 */ + 4584 "00000000" // /* MW 8 */ + 4585 "01011011" // /* MW 7 */ + 4586 "00000001" // /* MW 6 */ + 4587 "00100000" // /* MW 5 */ + 4588 "00000000" // /* MW 4 */ + 4589 "11110000" // /* MW 3 */ + 4590 "00101100" // /* MW 2 */ + 4591 "00000000" // /* MW 1 */ + 4592 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4593 "00000000" // /* MW 15 */ + 4594 "00000000" // /* MW 14 */ + 4595 "01111000" // /* MW 13 */ + 4596 "10100101" // /* MW 12 */ + 4597 "00000001" // /* MW 11 */ + 4598 "00000000" // /* MW 10 */ + 4599 "00000000" // /* MW 9 */ + 4600 "00000000" // /* MW 8 */ + 4601 "01011011" // /* MW 7 */ + 4602 "00000001" // /* MW 6 */ + 4603 "00100000" // /* MW 5 */ + 4604 "00000000" // /* MW 4 */ + 4605 "11110000" // /* MW 3 */ + 4606 "00101100" // /* MW 2 */ + 4607 "00000000" // /* MW 1 */ +.label ZLS_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_432 +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 5 "vector.hpp" 1159 33 first +.begin_of_loop +.end_of_loop +.loop_nesting 1 + 4608 "11100001" // NOPA; NOPB; VST.2D.128 wl0, [p2], d1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4609 "00000000" // /* MW 15 */ + 4610 "00000000" // /* MW 14 */ + 4611 "01111000" // /* MW 13 */ + 4612 "10100101" // /* MW 12 */ + 4613 "00000001" // /* MW 11 */ + 4614 "00000000" // /* MW 10 */ + 4615 "00000000" // /* MW 9 */ + 4616 "00000000" // /* MW 8 */ + 4617 "00101110" // /* MW 7 */ + 4618 "00110000" // /* MW 6 */ + 4619 "00100010" // /* MW 5 */ + 4620 "00000000" // /* MW 4 */ + 4621 "11110000" // /* MW 3 */ + 4622 "00101100" // /* MW 2 */ + 4623 "00000000" // /* MW 1 */ +.label TGT_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_448 +.src_ref 3 "pad_3d.h" 514 39 +.loop_nesting 0 + 4624 "01000100" // MOVXM r7, #2147483640 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4625 "11110000" // /* MW 5 */ + 4626 "10111111" // /* MW 4 */ + 4627 "11110011" // /* MW 3 */ + 4628 "11111111" // /* MW 2 */ + 4629 "01111111" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 514 39 first + 4630 "10011000" // AND r7, r7, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4631 "01000100" // /* MW 3 */ + 4632 "11001110" // /* MW 2 */ + 4633 "00010001" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 514 35 + 4634 "10011000" // SUB r7, r5, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4635 "01110001" // /* MW 3 */ + 4636 "01001110" // /* MW 2 */ + 4637 "00010001" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 514 55 + 4638 "10011000" // MUL r7, r7, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4639 "00001111" // /* MW 3 */ + 4640 "11001110" // /* MW 2 */ + 4641 "00010001" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 511 25 first + 4642 "10011000" // ASHL r2, r4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4643 "00101110" // /* MW 3 */ + 4644 "00000100" // /* MW 2 */ + 4645 "00010001" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 509 36 first + 4646 "10011000" // SUB r4, r5, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4647 "01000001" // /* MW 3 */ + 4648 "01001000" // /* MW 2 */ + 4649 "00010001" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 515 30 first + 4650 "10011000" // MUL r2, r2, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4651 "00001111" // /* MW 3 */ + 4652 "10000100" // /* MW 2 */ + 4653 "00010000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 509 28 first + 4654 "10011000" // MUL r0, r4, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4655 "00001111" // /* MW 3 */ + 4656 "00000000" // /* MW 2 */ + 4657 "00010001" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 509 21 +.src_ref 3 "pad_3d.h" 514 55 +.src_ref 3 "pad_3d.h" 517 39 first + 4658 "01100100" // MUL r1, r1, r2; MOV r6, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4659 "00000101" // /* MW 5 */ + 4660 "00100000" // /* MW 4 */ + 4661 "11110011" // /* MW 3 */ + 4662 "01000101" // /* MW 2 */ + 4663 "00001000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 509 21 first + 4664 "10011000" // LSHL r0, r0, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4665 "01101101" // /* MW 3 */ + 4666 "00000000" // /* MW 2 */ + 4667 "00010000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 509 21 +.src_ref 3 "pad_3d.h" 517 22 first + 4668 "10100100" // GE r0, r24, r1; ADD.NC p2, r3, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4669 "00000010" // /* MW 5 */ + 4670 "11000011" // /* MW 4 */ + 4671 "00110100" // /* MW 3 */ + 4672 "00000011" // /* MW 2 */ + 4673 "11000000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 517 4 + 4674 "10000100" // JNZ r0, #4832 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4832 delay_slots=5 */ + 4675 "00000001" // /* MW 5 */ + 4676 "01000000" // /* MW 4 */ + 4677 "01110000" // /* MW 3 */ + 4678 "00001001" // /* MW 2 */ + 4679 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4681 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4683 "00000000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 514 55 first +.delay_slot + 4684 "10011000" // LSHL r4, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4685 "01101101" // /* MW 3 */ + 4686 "11001000" // /* MW 2 */ + 4687 "00010001" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 514 55 +.delay_slot + 4688 "00011000" // ADD.NC m0, r4, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4689 "00001000" // /* MW 3 */ + 4690 "00000010" // /* MW 2 */ + 4691 "00011000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 515 37 first +.delay_slot + 4692 "10011000" // ADD.NC dn0, r2, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4693 "01111111" // /* MW 3 */ + 4694 "01000001" // /* MW 2 */ + 4695 "00011000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 3 "pad_3d.h" 517 4 first + 4696 "10111010" // MOVA dc0, #0; MOVXM ls, #4816 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4697 "00010000" // /* MW 9 */ + 4698 "01101000" // /* MW 8 */ + 4699 "01111001" // /* MW 7 */ + 4700 "00000100" // /* MW 6 */ + 4701 "00000000" // /* MW 5 */ + 4702 "00000000" // /* MW 4 */ + 4703 "10000000" // /* MW 3 */ + 4704 "00000011" // /* MW 2 */ + 4705 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 3 "pad_3d.h" 517 4 + 4706 "10111010" // MOVA dj0, #16; MOVXM le, #4816 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4707 "00010000" // /* MW 9 */ + 4708 "01101000" // /* MW 8 */ + 4709 "10111001" // /* MW 7 */ + 4710 "00000101" // /* MW 6 */ + 4711 "00000000" // /* MW 5 */ + 4712 "00000000" // /* MW 4 */ + 4713 "10000000" // /* MW 3 */ + 4714 "00000010" // /* MW 2 */ + 4715 "00000010" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 517 4 + 4716 "11111000" // MOV lc, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4717 "10100000" // /* MW 3 */ + 4718 "01110000" // /* MW 2 */ + 4719 "00011101" // /* MW 1 */ + 4720 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4721 "00000000" // /* MW 15 */ + 4722 "00000000" // /* MW 14 */ + 4723 "01111000" // /* MW 13 */ + 4724 "10100101" // /* MW 12 */ + 4725 "00000001" // /* MW 11 */ + 4726 "00000000" // /* MW 10 */ + 4727 "00000000" // /* MW 9 */ + 4728 "00000000" // /* MW 8 */ + 4729 "01011011" // /* MW 7 */ + 4730 "00000001" // /* MW 6 */ + 4731 "00100000" // /* MW 5 */ + 4732 "00000000" // /* MW 4 */ + 4733 "11110000" // /* MW 3 */ + 4734 "00101100" // /* MW 2 */ + 4735 "00000000" // /* MW 1 */ + 4736 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4737 "00000000" // /* MW 15 */ + 4738 "00000000" // /* MW 14 */ + 4739 "01111000" // /* MW 13 */ + 4740 "10100101" // /* MW 12 */ + 4741 "00000001" // /* MW 11 */ + 4742 "00000000" // /* MW 10 */ + 4743 "00000000" // /* MW 9 */ + 4744 "00000000" // /* MW 8 */ + 4745 "01011011" // /* MW 7 */ + 4746 "00000001" // /* MW 6 */ + 4747 "00100000" // /* MW 5 */ + 4748 "00000000" // /* MW 4 */ + 4749 "11110000" // /* MW 3 */ + 4750 "00101100" // /* MW 2 */ + 4751 "00000000" // /* MW 1 */ + 4752 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4753 "00000000" // /* MW 15 */ + 4754 "00000000" // /* MW 14 */ + 4755 "01111000" // /* MW 13 */ + 4756 "10100101" // /* MW 12 */ + 4757 "00000001" // /* MW 11 */ + 4758 "00000000" // /* MW 10 */ + 4759 "00000000" // /* MW 9 */ + 4760 "00000000" // /* MW 8 */ + 4761 "01011011" // /* MW 7 */ + 4762 "00000001" // /* MW 6 */ + 4763 "00100000" // /* MW 5 */ + 4764 "00000000" // /* MW 4 */ + 4765 "11110000" // /* MW 3 */ + 4766 "00101100" // /* MW 2 */ + 4767 "00000000" // /* MW 1 */ + 4768 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4769 "00000000" // /* MW 15 */ + 4770 "00000000" // /* MW 14 */ + 4771 "01111000" // /* MW 13 */ + 4772 "10100101" // /* MW 12 */ + 4773 "00000001" // /* MW 11 */ + 4774 "00000000" // /* MW 10 */ + 4775 "00000000" // /* MW 9 */ + 4776 "00000000" // /* MW 8 */ + 4777 "01011011" // /* MW 7 */ + 4778 "00000001" // /* MW 6 */ + 4779 "00100000" // /* MW 5 */ + 4780 "00000000" // /* MW 4 */ + 4781 "11110000" // /* MW 3 */ + 4782 "00101100" // /* MW 2 */ + 4783 "00000000" // /* MW 1 */ + 4784 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4785 "00000000" // /* MW 15 */ + 4786 "00000000" // /* MW 14 */ + 4787 "01111000" // /* MW 13 */ + 4788 "10100101" // /* MW 12 */ + 4789 "00000001" // /* MW 11 */ + 4790 "00000000" // /* MW 10 */ + 4791 "00000000" // /* MW 9 */ + 4792 "00000000" // /* MW 8 */ + 4793 "01011011" // /* MW 7 */ + 4794 "00000001" // /* MW 6 */ + 4795 "00100000" // /* MW 5 */ + 4796 "00000000" // /* MW 4 */ + 4797 "11110000" // /* MW 3 */ + 4798 "00101100" // /* MW 2 */ + 4799 "00000000" // /* MW 1 */ + 4800 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4801 "00000000" // /* MW 15 */ + 4802 "00000000" // /* MW 14 */ + 4803 "01111000" // /* MW 13 */ + 4804 "10100101" // /* MW 12 */ + 4805 "00000001" // /* MW 11 */ + 4806 "00000000" // /* MW 10 */ + 4807 "00000000" // /* MW 9 */ + 4808 "00000000" // /* MW 8 */ + 4809 "01011011" // /* MW 7 */ + 4810 "00000001" // /* MW 6 */ + 4811 "00100000" // /* MW 5 */ + 4812 "00000000" // /* MW 4 */ + 4813 "11110000" // /* MW 3 */ + 4814 "00101100" // /* MW 2 */ + 4815 "00000000" // /* MW 1 */ +.label ZLS_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_640 +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 5 "vector.hpp" 1159 33 first +.begin_of_loop +.end_of_loop +.loop_nesting 1 + 4816 "11100001" // NOPA; NOPB; VST.2D.128 wl0, [p2], d0; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4817 "00000000" // /* MW 15 */ + 4818 "00000000" // /* MW 14 */ + 4819 "01111000" // /* MW 13 */ + 4820 "10100101" // /* MW 12 */ + 4821 "00000001" // /* MW 11 */ + 4822 "00000000" // /* MW 10 */ + 4823 "00000000" // /* MW 9 */ + 4824 "00000000" // /* MW 8 */ + 4825 "00101110" // /* MW 7 */ + 4826 "00010000" // /* MW 6 */ + 4827 "00100010" // /* MW 5 */ + 4828 "00000000" // /* MW 4 */ + 4829 "11110000" // /* MW 3 */ + 4830 "00101100" // /* MW 2 */ + 4831 "00000000" // /* MW 1 */ +.label TGT_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_656 +.src_ref 3 "pad_3d.h" 282 first +.loop_nesting 0 + 4832 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 4833 "00000000" // /* MW 3 */ + 4834 "00101000" // /* MW 2 */ + 4835 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4837 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4839 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4840 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4841 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4843 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t__end +.label __Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t___func_end0 + 4845 "00000000" // /* MW 1 */ +.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E___func_begin0 +.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E +.function run _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E +.src_ref 2 "reduce_base_c8.h" 352 30 +.src_ref 2 "reduce_base_c8.h" 362 first +.src_ref 2 "reduce_base_c8.h" 365 18 +.src_ref 3 "reduce_mean_c8_impl.h" 200 65 +.src_ref 3 "reduce_mean_c8_impl.h" 223 35 +.function_start + 4848 "11111000" // MOV r3, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4849 "11000000" // /* MW 3 */ + 4850 "11010100" // /* MW 2 */ + 4851 "00011000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 365 18 first + 4852 "00000010" // MOVS dn3, p7; ADD.NC p7, r3, #44 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4853 "00000000" // /* MW 7 */ + 4854 "11001011" // /* MW 6 */ + 4855 "10110000" // /* MW 5 */ + 4856 "00000011" // /* MW 4 */ + 4857 "01100000" // /* MW 3 */ + 4858 "10010001" // /* MW 2 */ + 4859 "01101011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 367 19 first + 4860 "10011000" // LDA.u16 r0, [p7], #-16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4861 "00011010" // /* MW 3 */ + 4862 "10001100" // /* MW 2 */ + 4863 "00000111" // /* MW 1 */ + 4864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4865 "00000000" // /* MW 1 */ + 4866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4867 "00000000" // /* MW 1 */ + 4868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4869 "00000000" // /* MW 1 */ + 4870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4871 "00000000" // /* MW 1 */ + 4872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4873 "00000000" // /* MW 1 */ + 4874 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4875 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 367 12 +.src_ref 2 "reduce_base_c8.h" 367 19 + 4876 "10000100" // JNZ r0, #5088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5088 delay_slots=5 */ + 4877 "00000001" // /* MW 5 */ + 4878 "01000000" // /* MW 4 */ + 4879 "11110000" // /* MW 3 */ + 4880 "00001001" // /* MW 2 */ + 4881 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 101 18 +.src_ref 5 "broadcast.hpp" 80 25 +.src_ref 5 "blend.hpp" 170 36 +.src_ref 2 "reduce_base_c8.h" 372 34 +.delay_slot + 4882 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4883 "00000001" // /* MW 3 */ + 4884 "00100000" // /* MW 2 */ + 4885 "00010000" // /* MW 1 */ +.src_ref 5 "broadcast.hpp" 80 25 first +.delay_slot + 4886 "11111000" // VBCST.32 x1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4887 "01110010" // /* MW 3 */ + 4888 "11000010" // /* MW 2 */ + 4889 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4891 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4892 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4893 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 362 +.delay_slot + 4894 "11000100" // PADDXM [sp], #256 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4895 "00000001" // /* MW 5 */ + 4896 "00000000" // /* MW 4 */ + 4897 "00000000" // /* MW 3 */ + 4898 "00100000" // /* MW 2 */ + 4899 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 372 43 + 4900 "10111000" // MOV dj2, #36 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4901 "01001000" // /* MW 3 */ + 4902 "10000000" // /* MW 2 */ + 4903 "00011010" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 372 43 first + 4904 "10011000" // LDA r1, [p2, dj2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4905 "00110110" // /* MW 3 */ + 4906 "01000000" // /* MW 2 */ + 4907 "00000010" // /* MW 1 */ + 4908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4909 "00000000" // /* MW 1 */ + 4910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4911 "00000000" // /* MW 1 */ + 4912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4913 "00000000" // /* MW 1 */ + 4914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4915 "00000000" // /* MW 1 */ + 4916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4917 "00000000" // /* MW 1 */ + 4918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4919 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 372 34 + 4920 "10011000" // GE r2, r16, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4921 "00011001" // /* MW 3 */ + 4922 "00000100" // /* MW 2 */ + 4923 "00010100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 372 12 + 4924 "10000100" // JNZ r2, #5088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5088 delay_slots=5 */ + 4925 "00000001" // /* MW 5 */ + 4926 "01000000" // /* MW 4 */ + 4927 "11110000" // /* MW 3 */ + 4928 "00001001" // /* MW 2 */ + 4929 "00010000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 2 "reduce_base_c8.h" 374 29 +.delay_slot + 4930 "11111000" // VMOV bmll2, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4931 "10010010" // /* MW 3 */ + 4932 "00000010" // /* MW 2 */ + 4933 "00011010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4935 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4937 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4939 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4941 "00000000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 2 "reduce_base_c8.h" 372 12 +.src_ref 2 "reduce_base_c8.h" 374 29 + 4942 "01110110" // NOPA; MOVS p3, p1; MOVXM ls, #5072 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4943 "00010000" // /* MW 11 */ + 4944 "11101000" // /* MW 10 */ + 4945 "01111001" // /* MW 9 */ + 4946 "00000100" // /* MW 8 */ + 4947 "00000000" // /* MW 7 */ + 4948 "00000000" // /* MW 6 */ + 4949 "10001011" // /* MW 5 */ + 4950 "10000100" // /* MW 4 */ + 4951 "11110011" // /* MW 3 */ + 4952 "00101100" // /* MW 2 */ + 4953 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 372 12 + 4954 "01000100" // MOVXM le, #5072 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4955 "10100000" // /* MW 5 */ + 4956 "11100111" // /* MW 4 */ + 4957 "00010110" // /* MW 3 */ + 4958 "00000000" // /* MW 2 */ + 4959 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 372 12 + 4960 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV lc, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4961 "00000000" // /* MW 15 */ + 4962 "00000000" // /* MW 14 */ + 4963 "01111000" // /* MW 13 */ + 4964 "01010000" // /* MW 12 */ + 4965 "10111000" // /* MW 11 */ + 4966 "00000010" // /* MW 10 */ + 4967 "00000000" // /* MW 9 */ + 4968 "00000000" // /* MW 8 */ + 4969 "01011011" // /* MW 7 */ + 4970 "00000001" // /* MW 6 */ + 4971 "00100000" // /* MW 5 */ + 4972 "00000000" // /* MW 4 */ + 4973 "11110000" // /* MW 3 */ + 4974 "00101100" // /* MW 2 */ + 4975 "00000000" // /* MW 1 */ + 4976 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4977 "00000000" // /* MW 15 */ + 4978 "00000000" // /* MW 14 */ + 4979 "01111000" // /* MW 13 */ + 4980 "10100101" // /* MW 12 */ + 4981 "00000001" // /* MW 11 */ + 4982 "00000000" // /* MW 10 */ + 4983 "00000000" // /* MW 9 */ + 4984 "00000000" // /* MW 8 */ + 4985 "01011011" // /* MW 7 */ + 4986 "00000001" // /* MW 6 */ + 4987 "00100000" // /* MW 5 */ + 4988 "00000000" // /* MW 4 */ + 4989 "11110000" // /* MW 3 */ + 4990 "00101100" // /* MW 2 */ + 4991 "00000000" // /* MW 1 */ + 4992 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4993 "00000000" // /* MW 15 */ + 4994 "00000000" // /* MW 14 */ + 4995 "01111000" // /* MW 13 */ + 4996 "10100101" // /* MW 12 */ + 4997 "00000001" // /* MW 11 */ + 4998 "00000000" // /* MW 10 */ + 4999 "00000000" // /* MW 9 */ + 5000 "00000000" // /* MW 8 */ + 5001 "01011011" // /* MW 7 */ + 5002 "00000001" // /* MW 6 */ + 5003 "00100000" // /* MW 5 */ + 5004 "00000000" // /* MW 4 */ + 5005 "11110000" // /* MW 3 */ + 5006 "00101100" // /* MW 2 */ + 5007 "00000000" // /* MW 1 */ + 5008 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5009 "00000000" // /* MW 15 */ + 5010 "00000000" // /* MW 14 */ + 5011 "01111000" // /* MW 13 */ + 5012 "10100101" // /* MW 12 */ + 5013 "00000001" // /* MW 11 */ + 5014 "00000000" // /* MW 10 */ + 5015 "00000000" // /* MW 9 */ + 5016 "00000000" // /* MW 8 */ + 5017 "01011011" // /* MW 7 */ + 5018 "00000001" // /* MW 6 */ + 5019 "00100000" // /* MW 5 */ + 5020 "00000000" // /* MW 4 */ + 5021 "11110000" // /* MW 3 */ + 5022 "00101100" // /* MW 2 */ + 5023 "00000000" // /* MW 1 */ + 5024 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5025 "00000000" // /* MW 15 */ + 5026 "00000000" // /* MW 14 */ + 5027 "01111000" // /* MW 13 */ + 5028 "10100101" // /* MW 12 */ + 5029 "00000001" // /* MW 11 */ + 5030 "00000000" // /* MW 10 */ + 5031 "00000000" // /* MW 9 */ + 5032 "00000000" // /* MW 8 */ + 5033 "01011011" // /* MW 7 */ + 5034 "00000001" // /* MW 6 */ + 5035 "00100000" // /* MW 5 */ + 5036 "00000000" // /* MW 4 */ + 5037 "11110000" // /* MW 3 */ + 5038 "00101100" // /* MW 2 */ + 5039 "00000000" // /* MW 1 */ + 5040 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5041 "00000000" // /* MW 15 */ + 5042 "00000000" // /* MW 14 */ + 5043 "01111000" // /* MW 13 */ + 5044 "10100101" // /* MW 12 */ + 5045 "00000001" // /* MW 11 */ + 5046 "00000000" // /* MW 10 */ + 5047 "00000000" // /* MW 9 */ + 5048 "00000000" // /* MW 8 */ + 5049 "01011011" // /* MW 7 */ + 5050 "00000001" // /* MW 6 */ + 5051 "00100000" // /* MW 5 */ + 5052 "00000000" // /* MW 4 */ + 5053 "11110000" // /* MW 3 */ + 5054 "00101100" // /* MW 2 */ + 5055 "00000000" // /* MW 1 */ + 5056 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5057 "00000000" // /* MW 15 */ + 5058 "00000000" // /* MW 14 */ + 5059 "01111000" // /* MW 13 */ + 5060 "10100101" // /* MW 12 */ + 5061 "00000001" // /* MW 11 */ + 5062 "00000000" // /* MW 10 */ + 5063 "00000000" // /* MW 9 */ + 5064 "00000000" // /* MW 8 */ + 5065 "01011011" // /* MW 7 */ + 5066 "00000001" // /* MW 6 */ + 5067 "00100000" // /* MW 5 */ + 5068 "00000000" // /* MW 4 */ + 5069 "11110000" // /* MW 3 */ + 5070 "00101100" // /* MW 2 */ + 5071 "00000000" // /* MW 1 */ +.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_224 +.src_ref 5 "vector.hpp" 1159 33 first +.src_ref 2 "reduce_base_c8.h" 374 29 first +.begin_of_loop +.end_of_loop +.loop_nesting 1 + 5072 "11100001" // NOPA; NOPB; VST bmll2, [p3], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5073 "00000000" // /* MW 15 */ + 5074 "00000000" // /* MW 14 */ + 5075 "01111000" // /* MW 13 */ + 5076 "10100101" // /* MW 12 */ + 5077 "00000001" // /* MW 11 */ + 5078 "00000000" // /* MW 10 */ + 5079 "00000000" // /* MW 9 */ + 5080 "10000000" // /* MW 8 */ + 5081 "00000110" // /* MW 7 */ + 5082 "00011101" // /* MW 6 */ + 5083 "00100011" // /* MW 5 */ + 5084 "00000000" // /* MW 4 */ + 5085 "11110000" // /* MW 3 */ + 5086 "00101100" // /* MW 2 */ + 5087 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_240 +.src_ref 5 "vector.hpp" 57 98 +.src_ref 2 "reduce_base_c8.h" 388 28 +.src_ref 2 "reduce_base_c8.h" 412 41 +.src_ref 3 "reduce_mean_c8_impl.h" 268 19 +.loop_nesting 0 + 5088 "10111000" // MOV m4, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5089 "01000000" // /* MW 3 */ + 5090 "00000000" // /* MW 2 */ + 5091 "00011100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 388 28 first + 5092 "10011000" // LDA.u16 r17, [p7], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5093 "00111010" // /* MW 3 */ + 5094 "10001010" // /* MW 2 */ + 5095 "00000111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 388 28 +.src_ref 2 "reduce_base_c8.h" 388 28 + 5096 "01010100" // LDA.s16 r22, [p7], #-2; MOV m5, #-58 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5097 "00011001" // /* MW 5 */ + 5098 "00011111" // /* MW 4 */ + 5099 "01011010" // /* MW 3 */ + 5100 "11011010" // /* MW 2 */ + 5101 "11111111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 388 28 +.src_ref 2 "reduce_base_c8.h" 570 33 +.src_ref 2 "reduce_base_c8.h" 570 33 +.src_ref 2 "reduce_base_c8.h" 570 33 + 5102 "01010100" // LDA.u16 r26, [p7], m5; MOV dj0, #46 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5103 "10111001" // /* MW 5 */ + 5104 "00000000" // /* MW 4 */ + 5105 "01010001" // /* MW 3 */ + 5106 "01101011" // /* MW 2 */ + 5107 "11110101" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 570 33 first +.src_ref 2 "reduce_base_c8.h" 594 43 + 5108 "11010100" // LDA.s16 r20, [p7, dj0]; MOV r19, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5109 "10000001" // /* MW 5 */ + 5110 "10111101" // /* MW 4 */ + 5111 "01011001" // /* MW 3 */ + 5112 "01010010" // /* MW 2 */ + 5113 "11100000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 570 33 +.src_ref 2 "reduce_base_c8.h" 594 43 first + 5114 "00010100" // LDA.s16 r19, [p7, dj0]; ADD.NC p3, r19, #56 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5115 "00111000" // /* MW 5 */ + 5116 "11010011" // /* MW 4 */ + 5117 "01010110" // /* MW 3 */ + 5118 "01001110" // /* MW 2 */ + 5119 "11100000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 594 43 + 5120 "10011000" // LDA.s16 r21, [p3], #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5121 "10110010" // /* MW 3 */ + 5122 "11011110" // /* MW 2 */ + 5123 "00000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 594 64 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5124 "10011000" // LDA.u16 r28, [p3], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5125 "10011010" // /* MW 3 */ + 5126 "11111111" // /* MW 2 */ + 5127 "00000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 388 28 +.src_ref 2 "reduce_base_c8.h" 595 56 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5128 "00101100" // LDA.s16 r17, [p3], #6; MOVX r7, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5129 "00010010" // /* MW 5 */ + 5130 "00011100" // /* MW 4 */ + 5131 "01010000" // /* MW 3 */ + 5132 "11000110" // /* MW 2 */ + 5133 "01100111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "aie_core.h" 90 15 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 7 "accum.hpp" 153 115 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 943 89 +.src_ref 2 "reduce_base_c8.h" 388 28 +.src_ref 2 "reduce_base_c8.h" 596 56 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5134 "10111010" // LDA.s16 r18, [p3, #-2]; MOVX r18, #-2; MOV dc4, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5135 "01011000" // /* MW 9 */ + 5136 "00000000" // /* MW 8 */ + 5137 "01100000" // /* MW 7 */ + 5138 "11001010" // /* MW 6 */ + 5139 "00100111" // /* MW 5 */ + 5140 "00111111" // /* MW 4 */ + 5141 "01010000" // /* MW 3 */ + 5142 "11001010" // /* MW 2 */ + 5143 "01111110" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "aie_core.h" 90 15 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 2 "reduce_base_c8.h" 388 28 first +.src_ref 2 "reduce_base_c8.h" 570 24 +.src_ref 2 "reduce_base_c8.h" 570 24 +.src_ref 2 "reduce_base_c8.h" 570 24 +.src_ref 2 "reduce_base_c8.h" 570 33 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5144 "01110110" // LDA.s16 r7, [p7, dj0]; MOVS dc2, dc4; LSHL r18, r17, r18; MOV r6, #1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5145 "01011000" // /* MW 11 */ + 5146 "00000001" // /* MW 10 */ + 5147 "11001000" // /* MW 9 */ + 5148 "01101100" // /* MW 8 */ + 5149 "00101001" // /* MW 7 */ + 5150 "00100011" // /* MW 6 */ + 5151 "01001011" // /* MW 5 */ + 5152 "00010000" // /* MW 4 */ + 5153 "01010010" // /* MW 3 */ + 5154 "00011110" // /* MW 2 */ + 5155 "11100000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 +.src_ref 6 "me_vmult_float_emulated.h" 117 42 +.src_ref 6 "me_vmult_float_emulated.h" 117 42 +.src_ref 6 "me_vmult_float_emulated.h" 118 6 +.src_ref 6 "me_vmult_float_emulated.h" 118 6 +.src_ref 6 "me_vmult_float_emulated.h" 118 9 +.src_ref 6 "me_vmult_float_emulated.h" 118 9 +.src_ref 6 "me_vmult_float_emulated.h" 119 6 +.src_ref 6 "me_vmult_float_emulated.h" 119 6 +.src_ref 6 "me_vmult_float_emulated.h" 119 9 +.src_ref 6 "me_vmult_float_emulated.h" 119 9 +.src_ref 6 "me_vmult_float_emulated.h" 120 6 +.src_ref 6 "me_vmult_float_emulated.h" 120 6 +.src_ref 6 "me_vmult_float_emulated.h" 120 9 +.src_ref 6 "me_vmult_float_emulated.h" 120 9 +.src_ref 6 "me_vmult_float_emulated.h" 121 6 +.src_ref 6 "me_vmult_float_emulated.h" 121 6 +.src_ref 6 "me_vmult_float_emulated.h" 121 9 +.src_ref 6 "me_vmult_float_emulated.h" 121 9 +.src_ref 6 "me_vmult_float_emulated.h" 122 6 +.src_ref 6 "me_vmult_float_emulated.h" 122 6 +.src_ref 6 "me_vmult_float_emulated.h" 122 9 +.src_ref 6 "me_vmult_float_emulated.h" 122 9 +.src_ref 6 "me_vmult_float_emulated.h" 123 6 +.src_ref 6 "me_vmult_float_emulated.h" 123 6 +.src_ref 6 "me_vmult_float_emulated.h" 123 9 +.src_ref 6 "me_vmult_float_emulated.h" 123 9 +.src_ref 6 "me_vmult_float_emulated.h" 124 6 +.src_ref 6 "me_vmult_float_emulated.h" 124 6 +.src_ref 6 "me_vmult_float_emulated.h" 124 9 +.src_ref 6 "me_vmult_float_emulated.h" 124 9 +.src_ref 6 "me_vmult_float_emulated.h" 125 6 +.src_ref 6 "me_vmult_float_emulated.h" 125 6 +.src_ref 6 "me_vmult_float_emulated.h" 125 9 +.src_ref 6 "me_vmult_float_emulated.h" 125 9 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 5 "add.hpp" 28 49 +.src_ref 5 "add.hpp" 28 49 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 7 "add_accum.hpp" 19 92 +.src_ref 7 "add_accum.hpp" 19 92 +.src_ref 7 "add_accum.hpp" 19 92 +.src_ref 7 "add_accum.hpp" 19 92 +.src_ref 2 "reduce_base_c8.h" 388 28 +.src_ref 2 "reduce_base_c8.h" 595 75 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5156 "01110110" // LDA.u16 r27, [p3]; MOVS dn2, r26; LSHL r7, r22, r7; MOV r2, #60 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5157 "01011000" // /* MW 11 */ + 5158 "00111100" // /* MW 10 */ + 5159 "01001000" // /* MW 9 */ + 5160 "11101100" // /* MW 8 */ + 5161 "01110011" // /* MW 7 */ + 5162 "00101100" // /* MW 6 */ + 5163 "00001011" // /* MW 5 */ + 5164 "01011010" // /* MW 4 */ + 5165 "01010010" // /* MW 3 */ + 5166 "11101111" // /* MW 2 */ + 5167 "01100000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "aie_core.h" 73 15 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 7 "accum.hpp" 153 115 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 2 "reduce_base_c8.h" 570 24 first +.src_ref 3 "reduce_mean_c8_impl.h" 200 65 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5168 "01110110" // MOVA dj2, #64; MOVS p3, p1; LSHL r20, r20, r6; MOV m2, r7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5169 "01111000" // /* MW 11 */ + 5170 "11010000" // /* MW 10 */ + 5171 "00000001" // /* MW 9 */ + 5172 "01101101" // /* MW 8 */ + 5173 "01000011" // /* MW 7 */ + 5174 "00101001" // /* MW 6 */ + 5175 "10001011" // /* MW 5 */ + 5176 "10000100" // /* MW 4 */ + 5177 "10000011" // /* MW 3 */ + 5178 "00001010" // /* MW 2 */ + 5179 "00001000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 first +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 943 89 +.src_ref 2 "reduce_base_c8.h" 570 24 +.src_ref 2 "reduce_base_c8.h" 570 24 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5180 "10111010" // VLDA.2D bmll1, [p3], d2; LSHL r19, r19, r6; MOV m5, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5181 "01111000" // /* MW 9 */ + 5182 "00010000" // /* MW 8 */ + 5183 "10000101" // /* MW 7 */ + 5184 "01101110" // /* MW 6 */ + 5185 "00110011" // /* MW 5 */ + 5186 "00100111" // /* MW 4 */ + 5187 "10110000" // /* MW 3 */ + 5188 "00010010" // /* MW 2 */ + 5189 "01101010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 90 15 first +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 943 89 +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 2 "reduce_base_c8.h" 570 24 +.src_ref 2 "reduce_base_c8.h" 570 24 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5190 "10111010" // VLDA.CONV.fp32.bf16 bmll4, [p0], m5; LSHL r19, r21, r6; MOV m6, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5191 "01111000" // /* MW 9 */ + 5192 "11010000" // /* MW 8 */ + 5193 "00000100" // /* MW 7 */ + 5194 "01101111" // /* MW 6 */ + 5195 "00110011" // /* MW 5 */ + 5196 "00101011" // /* MW 4 */ + 5197 "00110000" // /* MW 3 */ + 5198 "01000001" // /* MW 2 */ + 5199 "00010101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 90 15 +.src_ref 2 "reduce_base_c8.h" 391 8 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5200 "00100100" // LSHL r17, r17, r6; ADD.NC lc, r18, #-2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5201 "11111110" // /* MW 5 */ + 5202 "11110010" // /* MW 4 */ + 5203 "10111010" // /* MW 3 */ + 5204 "01001101" // /* MW 2 */ + 5205 "10001100" // /* MW 1 */ +.src_ref 6 "aie_core.h" 90 15 +.src_ref 6 "aie_core.h" 90 15 first +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 943 89 + 5206 "11100100" // LSHL r17, r18, r6; MOV dj0, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5207 "01000001" // /* MW 5 */ + 5208 "00010001" // /* MW 4 */ + 5209 "10110001" // /* MW 3 */ + 5210 "01001101" // /* MW 2 */ + 5211 "10010100" // /* MW 1 */ +.src_ref 6 "aie_core.h" 90 15 +.src_ref 6 "aie_core.h" 90 15 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 943 89 +.src_ref 7 "accum.hpp" 943 89 +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 2 "reduce_base_c8.h" 570 24 first +.src_ref 2 "reduce_base_c8.h" 570 24 first + 5212 "01110110" // VLDA.CONV.fp32.bf16 bmll0, [p0], m6; MOVS dc0, dc4; LSHL r6, r7, r6; MOV m0, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5213 "01111000" // /* MW 11 */ + 5214 "11010000" // /* MW 10 */ + 5215 "00000100" // /* MW 9 */ + 5216 "01101100" // /* MW 8 */ + 5217 "01100011" // /* MW 7 */ + 5218 "00001110" // /* MW 6 */ + 5219 "01001011" // /* MW 5 */ + 5220 "00010000" // /* MW 4 */ + 5221 "00110000" // /* MW 3 */ + 5222 "00000001" // /* MW 2 */ + 5223 "00011001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 90 15 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 943 89 +.src_ref 7 "accum.hpp" 943 89 +.src_ref 7 "add_accum.hpp" 19 92 first +.src_ref 2 "reduce_base_c8.h" 570 24 + 5224 "01001010" // MOVS dn0, r28; MOV m7, r6; VADD.f dm4, dm1, dm4, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5225 "00111101" // /* MW 9 */ + 5226 "00110000" // /* MW 8 */ + 5227 "00010100" // /* MW 7 */ + 5228 "11100100" // /* MW 6 */ + 5229 "00100000" // /* MW 5 */ + 5230 "00000011" // /* MW 4 */ + 5231 "01100111" // /* MW 3 */ + 5232 "10000001" // /* MW 2 */ + 5233 "00001011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 90 15 +.src_ref 6 "aie_core.h" 90 15 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 943 89 +.src_ref 7 "accum.hpp" 943 89 +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 2 "reduce_base_c8.h" 570 24 first + 5234 "10111010" // VLDA.CONV.fp32.bf16 bmll2, [p0], m7; MOVS dn4, r27; MOV dj4, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5235 "01110010" // /* MW 9 */ + 5236 "01010000" // /* MW 8 */ + 5237 "01000100" // /* MW 7 */ + 5238 "00000010" // /* MW 6 */ + 5239 "00001011" // /* MW 5 */ + 5240 "01011011" // /* MW 4 */ + 5241 "00110100" // /* MW 3 */ + 5242 "00100001" // /* MW 2 */ + 5243 "00011101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "aie_core.h" 90 15 first +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 7 "accum.hpp" 153 115 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 943 89 + 5244 "11010100" // VLDA.3D.CONV.fp32.bf16 bmll3, [p0], d0; MOV dc1, dc4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5245 "00000001" // /* MW 5 */ + 5246 "10010011" // /* MW 4 */ + 5247 "00110011" // /* MW 3 */ + 5248 "00110001" // /* MW 2 */ + 5249 "00000011" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 7 "add_accum.hpp" 19 92 first +.src_ref 2 "reduce_base_c8.h" 570 24 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 5250 "01100010" // VLDA.CONV.fp32.bf16 bmll4, [p0], m5; VADD.f dm1, dm4, dm0, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5251 "00111101" // /* MW 7 */ + 5252 "10000000" // /* MW 6 */ + 5253 "00010001" // /* MW 5 */ + 5254 "00000100" // /* MW 4 */ + 5255 "00110000" // /* MW 3 */ + 5256 "01000001" // /* MW 2 */ + 5257 "00010101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 first +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 7 "accum.hpp" 198 120 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 5258 "10011000" // VLDA.2D bmll1, [p3], d2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5259 "10010101" // /* MW 3 */ + 5260 "01010000" // /* MW 2 */ + 5261 "00000011" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5263 "00000000" // /* MW 1 */ +.src_ref 7 "add_accum.hpp" 19 92 first +.src_ref 2 "reduce_base_c8.h" 391 8 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5264 "01011010" // MOVXM ls, #5312; VADD.f dm0, dm1, dm2, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5265 "00111101" // /* MW 9 */ + 5266 "00101000" // /* MW 8 */ + 5267 "00010000" // /* MW 7 */ + 5268 "00000010" // /* MW 6 */ + 5269 "01001100" // /* MW 5 */ + 5270 "10001111" // /* MW 4 */ + 5271 "00000000" // /* MW 3 */ + 5272 "00000000" // /* MW 2 */ + 5273 "00000000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 2 "reduce_base_c8.h" 412 41 +.src_ref 2 "reduce_base_c8.h" 570 24 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 5274 "11010100" // VLDA.CONV.fp32.bf16 bmll0, [p0], m6; MOV dj3, m4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5275 "00000001" // /* MW 5 */ + 5276 "00010000" // /* MW 4 */ + 5277 "00110111" // /* MW 3 */ + 5278 "00000001" // /* MW 2 */ + 5279 "00011001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "aie_core.h" 73 15 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 5 "vector.hpp" 1285 72 +.src_ref 7 "accum.hpp" 153 115 +.src_ref 7 "accum.hpp" 153 115 +.src_ref 7 "accum.hpp" 153 115 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 7 "add_accum.hpp" 19 92 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5280 "11101011" // MOVA dj1, #64; NOPB; MOVS p4, p1; MOVX r4, #32; MOV m1, m2; VADD.f dm4, dm1, dm4, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5281 "10000001" // /* MW 15 */ + 5282 "10100001" // /* MW 14 */ + 5283 "01111000" // /* MW 13 */ + 5284 "00000000" // /* MW 12 */ + 5285 "10000010" // /* MW 11 */ + 5286 "00001000" // /* MW 10 */ + 5287 "01000100" // /* MW 9 */ + 5288 "00000000" // /* MW 8 */ + 5289 "10001011" // /* MW 7 */ + 5290 "10000100" // /* MW 6 */ + 5291 "00100100" // /* MW 5 */ + 5292 "00000000" // /* MW 4 */ + 5293 "10000000" // /* MW 3 */ + 5294 "00000110" // /* MW 2 */ + 5295 "00001000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 7 "accum.hpp" 153 115 +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 7 "add_accum.hpp" 19 92 +.src_ref 2 "reduce_base_c8.h" 391 8 first +.src_ref 2 "reduce_base_c8.h" 570 24 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5296 "11101011" // VLDA.CONV.fp32.bf16 bmll2, [p0], m7;NOPB; MOVS dn1, r26; MOVXM le, #5408; VADD.f dm2, dm0, dm3, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5297 "01100001" // /* MW 15 */ + 5298 "10010000" // /* MW 14 */ + 5299 "00010000" // /* MW 13 */ + 5300 "10010000" // /* MW 12 */ + 5301 "10111010" // /* MW 11 */ + 5302 "00000101" // /* MW 10 */ + 5303 "00000000" // /* MW 9 */ + 5304 "00000000" // /* MW 8 */ + 5305 "00001011" // /* MW 7 */ + 5306 "01011010" // /* MW 6 */ + 5307 "00100001" // /* MW 5 */ + 5308 "00000000" // /* MW 4 */ + 5309 "00110000" // /* MW 3 */ + 5310 "00100001" // /* MW 2 */ + 5311 "00011101" // /* MW 1 */ +.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_464 +.src_ref 6 "aie_core.h" 90 15 first +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 943 89 +.begin_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 5312 "10011000" // VLDA.3D.CONV.fp32.bf16 bmll3, [p0], d0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5313 "10001001" // /* MW 3 */ + 5314 "00011001" // /* MW 2 */ + 5315 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 first +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "add_accum.hpp" 19 92 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5316 "01100110" // VLDA.2D bmll1, [p3], d2; NOPB; NOPS; VADD.f dm1, dm4, dm0, r2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5317 "00111101" // /* MW 11 */ + 5318 "10000000" // /* MW 10 */ + 5319 "00010001" // /* MW 9 */ + 5320 "10001110" // /* MW 8 */ + 5321 "10101101" // /* MW 7 */ + 5322 "00000000" // /* MW 6 */ + 5323 "00100000" // /* MW 5 */ + 5324 "00000000" // /* MW 4 */ + 5325 "10110000" // /* MW 3 */ + 5326 "00010010" // /* MW 2 */ + 5327 "01101010" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 2 "reduce_base_c8.h" 570 24 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5328 "11100001" // VLDA.CONV.fp32.bf16 bmll4, [p0], m5;NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5329 "00000000" // /* MW 15 */ + 5330 "00000000" // /* MW 14 */ + 5331 "01111000" // /* MW 13 */ + 5332 "10100101" // /* MW 12 */ + 5333 "00000001" // /* MW 11 */ + 5334 "00000000" // /* MW 10 */ + 5335 "00000000" // /* MW 9 */ + 5336 "00000000" // /* MW 8 */ + 5337 "01011011" // /* MW 7 */ + 5338 "00000001" // /* MW 6 */ + 5339 "00100000" // /* MW 5 */ + 5340 "00000000" // /* MW 4 */ + 5341 "00110000" // /* MW 3 */ + 5342 "01000001" // /* MW 2 */ + 5343 "00010101" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5344 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5345 "00000000" // /* MW 15 */ + 5346 "00000000" // /* MW 14 */ + 5347 "01111000" // /* MW 13 */ + 5348 "10100101" // /* MW 12 */ + 5349 "00000001" // /* MW 11 */ + 5350 "00000000" // /* MW 10 */ + 5351 "00000000" // /* MW 9 */ + 5352 "00000000" // /* MW 8 */ + 5353 "01011011" // /* MW 7 */ + 5354 "00000001" // /* MW 6 */ + 5355 "00100000" // /* MW 5 */ + 5356 "00000000" // /* MW 4 */ + 5357 "11110000" // /* MW 3 */ + 5358 "00101100" // /* MW 2 */ + 5359 "00000000" // /* MW 1 */ +.src_ref 7 "add_accum.hpp" 19 92 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5360 "11101011" // NOPA; NOPB; NOPS; NOPX; NOPM; VADD.f dm0, dm1, dm2, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5361 "01000001" // /* MW 15 */ + 5362 "10000001" // /* MW 14 */ + 5363 "01111000" // /* MW 13 */ + 5364 "10100101" // /* MW 12 */ + 5365 "00000001" // /* MW 11 */ + 5366 "00000000" // /* MW 10 */ + 5367 "00000000" // /* MW 9 */ + 5368 "00000000" // /* MW 8 */ + 5369 "01011011" // /* MW 7 */ + 5370 "00000001" // /* MW 6 */ + 5371 "00100000" // /* MW 5 */ + 5372 "00000000" // /* MW 4 */ + 5373 "11110000" // /* MW 3 */ + 5374 "00101100" // /* MW 2 */ + 5375 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 first +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 5 "vector.hpp" 1159 33 first +.src_ref 7 "accum.hpp" 153 115 first +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 2 "reduce_base_c8.h" 570 24 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5376 "11100001" // VLDA.CONV.fp32.bf16 bmll0, [p0], m6;NOPB; VST.2D bmll2, [p4], d1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5377 "00000000" // /* MW 15 */ + 5378 "00000000" // /* MW 14 */ + 5379 "01111000" // /* MW 13 */ + 5380 "10100101" // /* MW 12 */ + 5381 "00000001" // /* MW 11 */ + 5382 "00000000" // /* MW 10 */ + 5383 "00000000" // /* MW 9 */ + 5384 "10000000" // /* MW 8 */ + 5385 "00000110" // /* MW 7 */ + 5386 "00110001" // /* MW 6 */ + 5387 "00100100" // /* MW 5 */ + 5388 "00000000" // /* MW 4 */ + 5389 "00110000" // /* MW 3 */ + 5390 "00000001" // /* MW 2 */ + 5391 "00011001" // /* MW 1 */ +.src_ref 7 "add_accum.hpp" 19 92 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5392 "11101011" // NOPA; NOPB; NOPS; NOPX; NOPM; VADD.f dm4, dm1, dm4, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5393 "10000001" // /* MW 15 */ + 5394 "10100001" // /* MW 14 */ + 5395 "01111000" // /* MW 13 */ + 5396 "10100101" // /* MW 12 */ + 5397 "00000001" // /* MW 11 */ + 5398 "00000000" // /* MW 10 */ + 5399 "00000000" // /* MW 9 */ + 5400 "00000000" // /* MW 8 */ + 5401 "01011011" // /* MW 7 */ + 5402 "00000001" // /* MW 6 */ + 5403 "00100000" // /* MW 5 */ + 5404 "00000000" // /* MW 4 */ + 5405 "11110000" // /* MW 3 */ + 5406 "00101100" // /* MW 2 */ + 5407 "00000000" // /* MW 1 */ +.label ZLE_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_560 +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 7 "add_accum.hpp" 19 92 +.src_ref 2 "reduce_base_c8.h" 570 24 first +.end_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5408 "11101011" // VLDA.CONV.fp32.bf16 bmll2, [p0], m7;NOPB; NOPS; NOPX; NOPM; VADD.f dm2, dm0, dm3, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5409 "01100001" // /* MW 15 */ + 5410 "10010000" // /* MW 14 */ + 5411 "01111000" // /* MW 13 */ + 5412 "10100101" // /* MW 12 */ + 5413 "00000001" // /* MW 11 */ + 5414 "00000000" // /* MW 10 */ + 5415 "00000000" // /* MW 9 */ + 5416 "00000000" // /* MW 8 */ + 5417 "01011011" // /* MW 7 */ + 5418 "00000001" // /* MW 6 */ + 5419 "00100000" // /* MW 5 */ + 5420 "00000000" // /* MW 4 */ + 5421 "00110000" // /* MW 3 */ + 5422 "00100001" // /* MW 2 */ + 5423 "00011101" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 107 23 +.src_ref 2 "reduce_base_c8.h" 412 41 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 5424 "10111010" // LDA.u16 r1, [p7, dj3]; MOVXM r5, #16256 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5425 "00010000" // /* MW 9 */ + 5426 "11000000" // /* MW 8 */ + 5427 "10101111" // /* MW 7 */ + 5428 "00001100" // /* MW 6 */ + 5429 "00000000" // /* MW 5 */ + 5430 "00000000" // /* MW 4 */ + 5431 "01010000" // /* MW 3 */ + 5432 "00000111" // /* MW 2 */ + 5433 "11101100" // /* MW 1 */ +.src_ref 6 "aie_core.h" 90 15 first +.src_ref 6 "me_vmult_float_emulated.h" 107 23 first +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 7 "add_accum.hpp" 19 92 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5434 "01001010" // VLDA.3D.CONV.fp32.bf16 bmll3, [p0], d0; VBCST.16 x4, r5; VADD.f dm1, dm4, dm0, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5435 "00111101" // /* MW 9 */ + 5436 "10000000" // /* MW 8 */ + 5437 "00010001" // /* MW 7 */ + 5438 "11100010" // /* MW 6 */ + 5439 "01110010" // /* MW 5 */ + 5440 "00010101" // /* MW 4 */ + 5441 "00110010" // /* MW 3 */ + 5442 "00110001" // /* MW 2 */ + 5443 "00000011" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 101 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5444 "11111000" // VBCST.16 x0, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5445 "01110010" // /* MW 3 */ + 5446 "01000001" // /* MW 2 */ + 5447 "00011000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5449 "00000000" // /* MW 1 */ +.src_ref 7 "add_accum.hpp" 19 92 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5450 "01001000" // VADD.f dm0, dm1, dm2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5451 "00111101" // /* MW 3 */ + 5452 "00101000" // /* MW 2 */ + 5453 "00010000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 first +.src_ref 5 "vector.hpp" 1159 33 first +.src_ref 7 "accum.hpp" 153 115 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5454 "10011000" // VST.2D bmll2, [p4], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5455 "00000110" // /* MW 3 */ + 5456 "00110001" // /* MW 2 */ + 5457 "00001100" // /* MW 1 */ + 5458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5459 "00000000" // /* MW 1 */ +.src_ref 7 "add_accum.hpp" 19 92 first +.src_ref 2 "reduce_base_c8.h" 412 52 first + 5460 "01100010" // ADD r5, r1, #-1; VADD.f dm2, dm0, dm3, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5461 "00111101" // /* MW 7 */ + 5462 "00001100" // /* MW 6 */ + 5463 "00010010" // /* MW 5 */ + 5464 "11111001" // /* MW 4 */ + 5465 "01011111" // /* MW 3 */ + 5466 "00000010" // /* MW 2 */ + 5467 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 412 31 + 5468 "10011000" // NE r0, r5, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5469 "00001000" // /* MW 3 */ + 5470 "01000000" // /* MW 2 */ + 5471 "00010001" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 412 16 + 5472 "10000100" // JNZ r0, #6368 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6368 delay_slots=5 */ + 5473 "00000001" // /* MW 5 */ + 5474 "01000000" // /* MW 4 */ + 5475 "01110000" // /* MW 3 */ + 5476 "00001100" // /* MW 2 */ + 5477 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5479 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5481 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5483 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 first +.src_ref 5 "vector.hpp" 1159 33 first +.src_ref 7 "accum.hpp" 153 115 first +.delay_slot + 5484 "10011000" // VST.2D bmll2, [p4], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5485 "00000110" // /* MW 3 */ + 5486 "00110001" // /* MW 2 */ + 5487 "00001100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5489 "00000000" // /* MW 1 */ +.src_ref 5 "broadcast.hpp" 80 25 first +.src_ref 3 "reduce_mean_c8_impl.h" 184 15 first +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 5490 "00101100" // LDA r6, [p2, #12]; MOVX r5, #3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5491 "00011010" // /* MW 5 */ + 5492 "00010100" // /* MW 4 */ + 5493 "11010000" // /* MW 3 */ + 5494 "10011010" // /* MW 2 */ + 5495 "01000110" // /* MW 1 */ + 5496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5497 "00000000" // /* MW 1 */ + 5498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5499 "00000000" // /* MW 1 */ + 5500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5501 "00000000" // /* MW 1 */ + 5502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5503 "00000000" // /* MW 1 */ + 5504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5505 "00000000" // /* MW 1 */ + 5506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5507 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 5508 "10011000" // GE r7, r5, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5509 "01101001" // /* MW 3 */ + 5510 "01001110" // /* MW 2 */ + 5511 "00010001" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 5512 "10000100" // JNZ r7, #7296 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7296 delay_slots=5 */ + 5513 "00000001" // /* MW 5 */ + 5514 "01000000" // /* MW 4 */ + 5515 "01000000" // /* MW 3 */ + 5516 "00001110" // /* MW 2 */ + 5517 "00111000" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 +.delay_slot + 5518 "00011000" // MOVX r0, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5519 "00010001" // /* MW 3 */ + 5520 "00000000" // /* MW 2 */ + 5521 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5523 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5525 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5527 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5529 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 5530 "10011000" // NE r5, r6, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5531 "00001000" // /* MW 3 */ + 5532 "10001010" // /* MW 2 */ + 5533 "00010001" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 5534 "10000100" // JNZ r5, #6512 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6512 delay_slots=5 */ + 5535 "00000001" // /* MW 5 */ + 5536 "01000000" // /* MW 4 */ + 5537 "10111000" // /* MW 3 */ + 5538 "00001100" // /* MW 2 */ + 5539 "00101000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5541 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5543 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5545 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5547 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5549 "00000000" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 + 5550 "11100100" // MOVX r17, #257; MOV dc4, lr /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5551 "11000001" // /* MW 5 */ + 5552 "10000011" // /* MW 4 */ + 5553 "10101001" // /* MW 3 */ + 5554 "01000000" // /* MW 2 */ + 5555 "00100100" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 + 5556 "01000100" // MOVXM r21, #65535 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5557 "11111110" // /* MW 5 */ + 5558 "10111111" // /* MW 4 */ + 5559 "11111010" // /* MW 3 */ + 5560 "00000000" // /* MW 2 */ + 5561 "00000000" // /* MW 1 */ +.src_ref 5 "blend.hpp" 163 48 + 5562 "00101100" // NOPA; MOVX r20, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5563 "00000010" // /* MW 5 */ + 5564 "01010000" // /* MW 4 */ + 5565 "11110000" // /* MW 3 */ + 5566 "00101100" // /* MW 2 */ + 5567 "00000000" // /* MW 1 */ +.label __ll91__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 +.src_ref 6 "me_vmult_float_emulated.h" 117 42 +.src_ref 6 "me_vmult_float_emulated.h" 118 6 +.src_ref 6 "me_vmult_float_emulated.h" 118 9 +.src_ref 6 "me_vmult_float_emulated.h" 119 6 +.src_ref 6 "me_vmult_float_emulated.h" 119 9 +.src_ref 6 "me_vmult_float_emulated.h" 120 6 +.src_ref 6 "me_vmult_float_emulated.h" 120 9 +.src_ref 6 "me_vmult_float_emulated.h" 121 6 +.src_ref 6 "me_vmult_float_emulated.h" 121 9 +.src_ref 6 "me_vmult_float_emulated.h" 122 6 +.src_ref 6 "me_vmult_float_emulated.h" 122 9 +.src_ref 6 "me_vmult_float_emulated.h" 123 6 +.src_ref 6 "me_vmult_float_emulated.h" 123 9 +.src_ref 6 "me_vmult_float_emulated.h" 124 6 +.src_ref 6 "me_vmult_float_emulated.h" 124 9 +.src_ref 6 "me_vmult_float_emulated.h" 125 6 +.src_ref 6 "me_vmult_float_emulated.h" 125 9 +.src_ref 5 "add.hpp" 28 49 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 3 "reduce_mean_c8_impl.h" 200 49 +.src_ref 3 "reduce_mean_c8_impl.h" 200 65 +.src_ref 3 "reduce_mean_c8_impl.h" 200 65 +.src_ref 3 "reduce_mean_c8_impl.h" 223 35 + 5568 "01110110" // MOVA dj2, #64; MOVS p2, r3; MOVX r5, #16; MOV r2, #60 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5569 "01011000" // /* MW 11 */ + 5570 "00111100" // /* MW 10 */ + 5571 "01001000" // /* MW 9 */ + 5572 "00001000" // /* MW 8 */ + 5573 "01010010" // /* MW 7 */ + 5574 "00000000" // /* MW 6 */ + 5575 "00001011" // /* MW 5 */ + 5576 "10000011" // /* MW 4 */ + 5577 "10000010" // /* MW 3 */ + 5578 "00001010" // /* MW 2 */ + 5579 "00001000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1285 72 +.src_ref 3 "reduce_mean_c8_impl.h" 200 65 first + 5580 "00101100" // LDA.s16 r6, [p2, dj2]; MOVX r4, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5581 "00000010" // /* MW 5 */ + 5582 "00010001" // /* MW 4 */ + 5583 "01010000" // /* MW 3 */ + 5584 "00011010" // /* MW 2 */ + 5585 "01001000" // /* MW 1 */ + 5586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5587 "00000000" // /* MW 1 */ + 5588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5589 "00000000" // /* MW 1 */ + 5590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5591 "00000000" // /* MW 1 */ + 5592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5593 "00000000" // /* MW 1 */ + 5594 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5595 "00000000" // /* MW 1 */ + 5596 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5597 "01100111" // /* MW 3 */ + 5598 "00000001" // /* MW 2 */ + 5599 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 200 49 + 5600 "11100001" // NOPA; NOPB; NOPS; ASHL r5, r6, r5; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5601 "00000000" // /* MW 15 */ + 5602 "00000000" // /* MW 14 */ + 5603 "01111000" // /* MW 13 */ + 5604 "10100101" // /* MW 12 */ + 5605 "00000001" // /* MW 11 */ + 5606 "11110100" // /* MW 10 */ + 5607 "01010010" // /* MW 9 */ + 5608 "00001100" // /* MW 8 */ + 5609 "01011011" // /* MW 7 */ + 5610 "00000001" // /* MW 6 */ + 5611 "00100000" // /* MW 5 */ + 5612 "00000000" // /* MW 4 */ + 5613 "11110000" // /* MW 3 */ + 5614 "00101100" // /* MW 2 */ + 5615 "00000000" // /* MW 1 */ +.label __ll93__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E +.src_ref 3 "reduce_mean_c8_impl.h" 223 35 + 5616 "01110110" // MOVA dj2, #36; ST dn3, [sp, #-4]; MOVXM p7, #509168 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5617 "00010000" // /* MW 11 */ + 5618 "01111000" // /* MW 10 */ + 5619 "10110010" // /* MW 9 */ + 5620 "11110011" // /* MW 8 */ + 5621 "00000001" // /* MW 7 */ + 5622 "10000000" // /* MW 6 */ + 5623 "10100101" // /* MW 5 */ + 5624 "11111101" // /* MW 4 */ + 5625 "10000111" // /* MW 3 */ + 5626 "10001010" // /* MW 2 */ + 5627 "00000100" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1285 72 +.src_ref 5 "vector.hpp" 1289 16 + 5628 "01110110" // LDA.s8 r23, [p7]; ST dc4, [sp, #-8]; MOVX r5, #0; VBCST.32 x2, r5 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5629 "01111000" // /* MW 11 */ + 5630 "00111001" // /* MW 10 */ + 5631 "10001011" // /* MW 9 */ + 5632 "00001000" // /* MW 8 */ + 5633 "01010000" // /* MW 7 */ + 5634 "10000000" // /* MW 6 */ + 5635 "01100101" // /* MW 5 */ + 5636 "11111010" // /* MW 4 */ + 5637 "01010111" // /* MW 3 */ + 5638 "11011100" // /* MW 2 */ + 5639 "11100000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 112 6 +.src_ref 6 "me_vmult_float_emulated.h" 112 19 +.src_ref 5 "vector.hpp" 57 98 +.src_ref 5 "vector.hpp" 57 98 +.src_ref 5 "vector.hpp" 1280 49 +.src_ref 5 "vector.hpp" 1285 72 +.src_ref 5 "vector.hpp" 1287 41 +.src_ref 5 "vector.hpp" 1288 16 +.src_ref 5 "vector.hpp" 1289 16 +.src_ref 5 "vector.hpp" 1292 26 +.src_ref 3 "reduce_mean_c8_impl.h" 223 35 first +.src_ref 3 "reduce_mean_c8_impl.h" 268 19 + 5640 "01110110" // LDA r6, [p2, dj2]; MOVS p7, p1; MOVX r22, #-1; VMOV bmll0, x2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5641 "01111000" // /* MW 11 */ + 5642 "01001001" // /* MW 10 */ + 5643 "00000010" // /* MW 9 */ + 5644 "11101000" // /* MW 8 */ + 5645 "01100111" // /* MW 7 */ + 5646 "00111111" // /* MW 6 */ + 5647 "10001011" // /* MW 5 */ + 5648 "10000100" // /* MW 4 */ + 5649 "11010111" // /* MW 3 */ + 5650 "00011010" // /* MW 2 */ + 5651 "01001000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 915 23 +.src_ref 5 "vector.hpp" 915 23 +.src_ref 5 "vector.hpp" 1280 49 + 5652 "10111010" // MOVA r24, #31; MOVX vaddSign0, #1; VMOV bmll2, x2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5653 "01111000" // /* MW 9 */ + 5654 "01001001" // /* MW 8 */ + 5655 "00000010" // /* MW 7 */ + 5656 "00000001" // /* MW 6 */ + 5657 "11010010" // /* MW 5 */ + 5658 "00000010" // /* MW 4 */ + 5659 "00000000" // /* MW 3 */ + 5660 "11111000" // /* MW 2 */ + 5661 "00000011" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 3 "reduce_mean_c8_impl.h" 223 9 first + 5662 "10111010" // MOVA r25, #16; MOVXM ls, #5760 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5663 "00010000" // /* MW 9 */ + 5664 "01000000" // /* MW 8 */ + 5665 "01111011" // /* MW 7 */ + 5666 "00000100" // /* MW 6 */ + 5667 "00000000" // /* MW 5 */ + 5668 "00000000" // /* MW 4 */ + 5669 "00000000" // /* MW 3 */ + 5670 "00011001" // /* MW 2 */ + 5671 "00000010" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 223 9 + 5672 "10111010" // VLDA wl2, [sp, #-32]; MOVXM le, #6336 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5673 "00010000" // /* MW 9 */ + 5674 "01100000" // /* MW 8 */ + 5675 "10111100" // /* MW 7 */ + 5676 "00000101" // /* MW 6 */ + 5677 "00000000" // /* MW 5 */ + 5678 "00000000" // /* MW 4 */ + 5679 "10110000" // /* MW 3 */ + 5680 "10010100" // /* MW 2 */ + 5681 "11111111" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1286 98 + 5682 "00011000" // MOVX r26, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5683 "00000001" // /* MW 3 */ + 5684 "01110100" // /* MW 2 */ + 5685 "00010000" // /* MW 1 */ + 5686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5687 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 112 6 +.src_ref 6 "me_vmult_float_emulated.h" 112 19 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 114 6 +.src_ref 6 "me_vmult_float_emulated.h" 114 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 6 +.src_ref 6 "me_vmult_float_emulated.h" 115 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 +.src_ref 5 "vector.hpp" 1286 72 +.src_ref 7 "accum.hpp" 1108 103 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 5688 "00011000" // MOVX crRnd, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5689 "10000000" // /* MW 3 */ + 5690 "11111010" // /* MW 2 */ + 5691 "00010101" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 112 6 first +.src_ref 6 "me_vmult_float_emulated.h" 112 19 first +.src_ref 3 "reduce_mean_c8_impl.h" 223 9 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 5692 "00000010" // VCONV.bf16.fp32 wl0, bmll0; ADD.NC lc, r6, #0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5693 "00000000" // /* MW 7 */ + 5694 "10000000" // /* MW 6 */ + 5695 "10111001" // /* MW 5 */ + 5696 "00000010" // /* MW 4 */ + 5697 "11000000" // /* MW 3 */ + 5698 "00000010" // /* MW 2 */ + 5699 "00001000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 114 6 +.src_ref 6 "me_vmult_float_emulated.h" 114 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 6 +.src_ref 6 "me_vmult_float_emulated.h" 115 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5700 "11111000" // VMOV x3, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5701 "10010010" // /* MW 3 */ + 5702 "10100000" // /* MW 2 */ + 5703 "00011001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 first +.src_ref 6 "me_vmult_float_emulated.h" 115 6 +.src_ref 6 "me_vmult_float_emulated.h" 115 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 + 5704 "01100010" // VMOV x5, x3; VMSC.f dm0, dm2, x0, x4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5705 "10000011" // /* MW 7 */ + 5706 "01000000" // /* MW 6 */ + 5707 "00010000" // /* MW 5 */ + 5708 "11100110" // /* MW 4 */ + 5709 "10010010" // /* MW 3 */ + 5710 "10100110" // /* MW 2 */ + 5711 "00000010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 + 5712 "11111000" // VMOV x6, x5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5713 "10010010" // /* MW 3 */ + 5714 "00101010" // /* MW 2 */ + 5715 "00011011" // /* MW 1 */ + 5716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5717 "00000000" // /* MW 1 */ + 5718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5719 "00000000" // /* MW 1 */ + 5720 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5721 "00000000" // /* MW 1 */ + 5722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5723 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 114 6 first +.src_ref 6 "me_vmult_float_emulated.h" 114 19 first + 5724 "00011000" // VCONV.bf16.fp32 wl3, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5725 "00010110" // /* MW 3 */ + 5726 "11000000" // /* MW 2 */ + 5727 "00001001" // /* MW 1 */ + 5728 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5729 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 115 34 first + 5730 "01001000" // VMSC.f dm0, dm0, x3, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5731 "10000011" // /* MW 3 */ + 5732 "00000110" // /* MW 2 */ + 5733 "00010000" // /* MW 1 */ + 5734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5735 "00000000" // /* MW 1 */ + 5736 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5737 "00000000" // /* MW 1 */ + 5738 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5739 "00000000" // /* MW 1 */ + 5740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5741 "00000000" // /* MW 1 */ + 5742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5743 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 115 6 +.src_ref 6 "me_vmult_float_emulated.h" 115 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "add_reduce.hpp" 322 47 + 5744 "11100001" // NOPA; NOPB; VCONV.bf16.fp32 wl5, bmll0; MOVX r7, #8; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5745 "00000000" // /* MW 15 */ + 5746 "00000000" // /* MW 14 */ + 5747 "01111000" // /* MW 13 */ + 5748 "10100101" // /* MW 12 */ + 5749 "00000001" // /* MW 11 */ + 5750 "00001000" // /* MW 10 */ + 5751 "01110001" // /* MW 9 */ + 5752 "00000000" // /* MW 8 */ + 5753 "00010110" // /* MW 7 */ + 5754 "11000000" // /* MW 6 */ + 5755 "00100010" // /* MW 5 */ + 5756 "00000000" // /* MW 4 */ + 5757 "11110000" // /* MW 3 */ + 5758 "00101100" // /* MW 2 */ + 5759 "00000000" // /* MW 1 */ +.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_912 +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 3 "reduce_mean_c8_impl.h" 226 22 first +.begin_of_loop +.loop_nesting 1 + 5760 "11110100" // VLDB x7, [p1], #64; VMOV bmhh4, x9 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5761 "00100101" // /* MW 5 */ + 5762 "10100101" // /* MW 4 */ + 5763 "10001001" // /* MW 3 */ + 5764 "10111110" // /* MW 2 */ + 5765 "00100011" // /* MW 1 */ + 5766 "11111000" // VMOV bmhh3, x11 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5767 "10010010" // /* MW 3 */ + 5768 "11010110" // /* MW 2 */ + 5769 "00011011" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1280 49 + 5770 "11111000" // MOV r28, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5771 "11000000" // /* MW 3 */ + 5772 "00011110" // /* MW 2 */ + 5773 "00011111" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1280 49 first + 5774 "10011000" // AND r29, r28, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5775 "10000100" // /* MW 3 */ + 5776 "00111011" // /* MW 2 */ + 5777 "00010111" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1285 72 first +.src_ref 5 "vector.hpp" 1285 72 first + 5778 "00100100" // LT r27, r29, r4; ADD.NC r28, r29, #-32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5779 "11100000" // /* MW 5 */ + 5780 "00111101" // /* MW 4 */ + 5781 "01011110" // /* MW 3 */ + 5782 "11001001" // /* MW 2 */ + 5783 "11101110" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1285 72 + 5784 "10011000" // LSHL r30, r22, r29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5785 "11011101" // /* MW 3 */ + 5786 "10111101" // /* MW 2 */ + 5787 "00010101" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1286 98 first + 5788 "10011000" // SUB r31, r26, r29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5789 "11010001" // /* MW 3 */ + 5790 "10111111" // /* MW 2 */ + 5791 "00010110" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1285 72 first +.src_ref 5 "add_reduce.hpp" 322 47 first + 5792 "10100100" // SEL.EQZ r30, r5, r30, r27; VSHIFT x8, x7, x0, r25 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5793 "11001101" // /* MW 5 */ + 5794 "01110000" // /* MW 4 */ + 5795 "01001000" // /* MW 3 */ + 5796 "10111100" // /* MW 2 */ + 5797 "00101111" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first + 5798 "11111000" // VMOV bmll4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5799 "10010010" // /* MW 3 */ + 5800 "00010000" // /* MW 2 */ + 5801 "00011100" // /* MW 1 */ +.src_ref 5 "vector.hpp" 142 95 first +.src_ref 7 "accum.hpp" 198 120 + 5802 "11111000" // VMOV wl8, wh7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5803 "00100010" // /* MW 3 */ + 5804 "01001110" // /* MW 2 */ + 5805 "00011100" // /* MW 1 */ +.src_ref 5 "vector.hpp" 142 95 +.src_ref 7 "accum.hpp" 198 120 first + 5806 "11111000" // VMOV wl10, wl7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5807 "00100010" // /* MW 3 */ + 5808 "01001111" // /* MW 2 */ + 5809 "00011101" // /* MW 1 */ + 5810 "11111000" // VMOV bmhl4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5811 "10010010" // /* MW 3 */ + 5812 "10010000" // /* MW 2 */ + 5813 "00011100" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 5814 "11111000" // VMOV bmhl3, x10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5815 "10010010" // /* MW 3 */ + 5816 "10010100" // /* MW 2 */ + 5817 "00011011" // /* MW 1 */ +.src_ref 7 "accum.hpp" 199 120 first +.src_ref 5 "add.hpp" 28 49 first +.aggressive_scheduled_block_id 4 +.noswbrkpt + 5818 "01100010" // VMOV cml2, cmh4; VADD.f dm3, dm1, dm2, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5819 "00111101" // /* MW 7 */ + 5820 "00101000" // /* MW 6 */ + 5821 "00010011" // /* MW 5 */ + 5822 "11100110" // /* MW 4 */ + 5823 "10001010" // /* MW 3 */ + 5824 "00010010" // /* MW 2 */ + 5825 "00000010" // /* MW 1 */ +.src_ref 7 "accum.hpp" 199 120 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5826 "11111000" // VMOV cml1, cmh3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5827 "10001010" // /* MW 3 */ + 5828 "00001110" // /* MW 2 */ + 5829 "00011001" // /* MW 1 */ +.src_ref 5 "vector.hpp" 142 95 first +.src_ref 5 "vector.hpp" 243 115 first +.src_ref 5 "add_reduce.hpp" 324 44 first +.aggressive_scheduled_block_id 4 +.noswbrkpt + 5830 "01100010" // VMOV wl8, wh7; VADD.f dm2, dm2, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5831 "00111101" // /* MW 7 */ + 5832 "01010000" // /* MW 6 */ + 5833 "00010010" // /* MW 5 */ + 5834 "11100110" // /* MW 4 */ + 5835 "00100010" // /* MW 3 */ + 5836 "01001110" // /* MW 2 */ + 5837 "00000100" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5838 "11111000" // VMOV bmll2, x7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5839 "10010010" // /* MW 3 */ + 5840 "00001110" // /* MW 2 */ + 5841 "00011010" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 5842 "11011000" // VSHIFT x9, x8, x0, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5843 "01100110" // /* MW 3 */ + 5844 "11000000" // /* MW 2 */ + 5845 "00011100" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 5 "add_reduce.hpp" 324 44 first +.aggressive_scheduled_block_id 5 +.noswbrkpt + 5846 "01100010" // VMOV bmll1, x8; VADD.f dm4, dm1, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5847 "00111101" // /* MW 7 */ + 5848 "00110000" // /* MW 6 */ + 5849 "00010100" // /* MW 5 */ + 5850 "11100110" // /* MW 4 */ + 5851 "10010010" // /* MW 3 */ + 5852 "00010000" // /* MW 2 */ + 5853 "00000001" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5854 "11111000" // VMOV bmll4, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5855 "10010010" // /* MW 3 */ + 5856 "00010010" // /* MW 2 */ + 5857 "00011100" // /* MW 1 */ +.src_ref 7 "accum.hpp" 151 136 first + 5858 "11111000" // VMOV x8, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5859 "00010010" // /* MW 3 */ + 5860 "00101100" // /* MW 2 */ + 5861 "00011100" // /* MW 1 */ +.src_ref 5 "vector.hpp" 243 115 first +.src_ref 7 "accum.hpp" 151 115 + 5862 "11111000" // VMOV wl9, wl8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5863 "00100010" // /* MW 3 */ + 5864 "11010001" // /* MW 2 */ + 5865 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 5866 "11011000" // VSHIFT x8, x9, x0, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5867 "01100110" // /* MW 3 */ + 5868 "01001000" // /* MW 2 */ + 5869 "00011100" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 5 "add_reduce.hpp" 324 44 first +.aggressive_scheduled_block_id 6 +.noswbrkpt + 5870 "01100010" // VMOV bmll1, x8; VADD.f dm1, dm3, dm1, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5871 "00111101" // /* MW 7 */ + 5872 "01100100" // /* MW 6 */ + 5873 "00010001" // /* MW 5 */ + 5874 "11100110" // /* MW 4 */ + 5875 "10010010" // /* MW 3 */ + 5876 "00010000" // /* MW 2 */ + 5877 "00000001" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5878 "11111000" // VMOV bmll3, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5879 "10010010" // /* MW 3 */ + 5880 "00010010" // /* MW 2 */ + 5881 "00011011" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 + 5882 "11111000" // VMOV x8, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5883 "00010010" // /* MW 3 */ + 5884 "00101000" // /* MW 2 */ + 5885 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 5886 "11011000" // VSHIFT x10, x8, x0, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5887 "00011110" // /* MW 3 */ + 5888 "01000000" // /* MW 2 */ + 5889 "00011101" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first +.src_ref 5 "add_reduce.hpp" 324 44 first +.aggressive_scheduled_block_id 7 +.noswbrkpt + 5890 "01100010" // VMOV x8, bmll4; VADD.f dm2, dm2, dm3, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5891 "00111101" // /* MW 7 */ + 5892 "01001100" // /* MW 6 */ + 5893 "00010010" // /* MW 5 */ + 5894 "11100110" // /* MW 4 */ + 5895 "00010010" // /* MW 3 */ + 5896 "00110000" // /* MW 2 */ + 5897 "00000100" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 5898 "11111000" // VMOV bmll3, x10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5899 "10010010" // /* MW 3 */ + 5900 "00010100" // /* MW 2 */ + 5901 "00011011" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.src_ref 5 "add_reduce.hpp" 324 44 first +.aggressive_scheduled_block_id 7 +.noswbrkpt + 5902 "01100010" // VSHIFT x8, x8, x0, r7; VADD.f dm3, dm4, dm3, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5903 "00111101" // /* MW 7 */ + 5904 "10001100" // /* MW 6 */ + 5905 "00010011" // /* MW 5 */ + 5906 "11000110" // /* MW 4 */ + 5907 "00011110" // /* MW 3 */ + 5908 "01000000" // /* MW 2 */ + 5909 "00000100" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5910 "11111000" // VMOV bmll3, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5911 "10010010" // /* MW 3 */ + 5912 "00010000" // /* MW 2 */ + 5913 "00011011" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 5914 "11111000" // VMOV x8, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5915 "00010010" // /* MW 3 */ + 5916 "00100100" // /* MW 2 */ + 5917 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.src_ref 5 "add_reduce.hpp" 324 44 +.aggressive_scheduled_block_id 8 +.noswbrkpt + 5918 "01100010" // VSHIFT x8, x8, x0, r7; VADD.f dm1, dm1, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5919 "00111101" // /* MW 7 */ + 5920 "00110000" // /* MW 6 */ + 5921 "00010001" // /* MW 5 */ + 5922 "11000110" // /* MW 4 */ + 5923 "00011110" // /* MW 3 */ + 5924 "01000000" // /* MW 2 */ + 5925 "00000100" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5926 "11111000" // VMOV bmll4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5927 "10010010" // /* MW 3 */ + 5928 "00010000" // /* MW 2 */ + 5929 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id first + 5930 "11111000" // VMOV x8, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5931 "00010010" // /* MW 3 */ + 5932 "00101000" // /* MW 2 */ + 5933 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.src_ref 5 "add_reduce.hpp" 324 44 +.aggressive_scheduled_block_id 9 +.noswbrkpt + 5934 "01100010" // VSHIFT x8, x8, x0, r0; VADD.f dm2, dm2, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5935 "00111101" // /* MW 7 */ + 5936 "01010000" // /* MW 6 */ + 5937 "00010010" // /* MW 5 */ + 5938 "11000110" // /* MW 4 */ + 5939 "00000010" // /* MW 3 */ + 5940 "01000000" // /* MW 2 */ + 5941 "00000100" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5942 "11111000" // VMOV bmll4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5943 "10010010" // /* MW 3 */ + 5944 "00010000" // /* MW 2 */ + 5945 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first + 5946 "11111000" // VMOV x8, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5947 "00010010" // /* MW 3 */ + 5948 "00101100" // /* MW 2 */ + 5949 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id first + 5950 "11011000" // VSHIFT x8, x8, x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5951 "00000010" // /* MW 3 */ + 5952 "01000000" // /* MW 2 */ + 5953 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first +.src_ref 5 "add_reduce.hpp" 324 44 first +.aggressive_scheduled_block_id 10 +.noswbrkpt + 5954 "01100010" // VMOV x10, bmll1; VADD.f dm3, dm3, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5955 "00111101" // /* MW 7 */ + 5956 "01110000" // /* MW 6 */ + 5957 "00010011" // /* MW 5 */ + 5958 "11100110" // /* MW 4 */ + 5959 "00010010" // /* MW 3 */ + 5960 "00100100" // /* MW 2 */ + 5961 "00000101" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 10 +.nohwbrkpt +.noswbrkpt + 5962 "11111000" // VMOV bmll4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5963 "10010010" // /* MW 3 */ + 5964 "00010000" // /* MW 2 */ + 5965 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.src_ref 5 "add_reduce.hpp" 324 44 first +.aggressive_scheduled_block_id 10 +.noswbrkpt + 5966 "01100010" // VSHIFT x10, x10, x0, r0; VADD.f dm0, dm1, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5967 "00111101" // /* MW 7 */ + 5968 "00110000" // /* MW 6 */ + 5969 "00010000" // /* MW 5 */ + 5970 "11000110" // /* MW 4 */ + 5971 "00000010" // /* MW 3 */ + 5972 "01010000" // /* MW 2 */ + 5973 "00000101" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5974 "11111000" // VMOV bmll4, x10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5975 "10010010" // /* MW 3 */ + 5976 "00010100" // /* MW 2 */ + 5977 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first + 5978 "11111000" // VMOV x8, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5979 "00010010" // /* MW 3 */ + 5980 "00101000" // /* MW 2 */ + 5981 "00011100" // /* MW 1 */ +.src_ref 5 "vector.hpp" 915 23 first + 5982 "10111000" // VEXTRACT.32 r23, x8, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5983 "00000001" // /* MW 3 */ + 5984 "11100010" // /* MW 2 */ + 5985 "00011101" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id first + 5986 "11111000" // VMOV x10, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5987 "00010010" // /* MW 3 */ + 5988 "00101100" // /* MW 2 */ + 5989 "00011101" // /* MW 1 */ +.src_ref 5 "vector.hpp" 915 23 first +.src_ref 5 "vector.hpp" 1288 16 first +.aggressive_scheduled_block_id 11 +.noswbrkpt + 5990 "01110100" // VLDB wh10, [p7, #32]; VEXTRACT.32 r6, x10, #0, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5991 "00000011" // /* MW 5 */ + 5992 "01010100" // /* MW 4 */ + 5993 "10000011" // /* MW 3 */ + 5994 "11010000" // /* MW 2 */ + 5995 "11100010" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 5996 "11111000" // VMOV x11, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5997 "00010010" // /* MW 3 */ + 5998 "10100000" // /* MW 2 */ + 5999 "00011101" // /* MW 1 */ +.src_ref 5 "vector.hpp" 915 23 first +.src_ref 5 "vector.hpp" 1287 41 first +.src_ref 5 "broadcast.hpp" 80 25 first +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 6000 "10110100" // VLDB wl10, [p7]; VEXTBCST.32 x10, x11, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6001 "00000110" // /* MW 5 */ + 6002 "10110100" // /* MW 4 */ + 6003 "10001010" // /* MW 3 */ + 6004 "11010100" // /* MW 2 */ + 6005 "11100000" // /* MW 1 */ +.src_ref 5 "blend.hpp" 163 48 first +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 6006 "00111000" // VSEL.32 x9, x10, x9, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6007 "10100000" // /* MW 3 */ + 6008 "11010100" // /* MW 2 */ + 6009 "00011100" // /* MW 1 */ +.src_ref 5 "vector.hpp" 853 46 first +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 6010 "01111000" // VINSERT.32 x10, x2, #0, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6011 "11010001" // /* MW 3 */ + 6012 "00010000" // /* MW 2 */ + 6013 "00011101" // /* MW 1 */ +.src_ref 5 "vector.hpp" 853 46 +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 6014 "01111000" // VINSERT.32 x8, x2, #0, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6015 "11110001" // /* MW 3 */ + 6016 "00010010" // /* MW 2 */ + 6017 "00011100" // /* MW 1 */ +.src_ref 5 "vector.hpp" 142 95 first +.src_ref 5 "vector.hpp" 1413 19 first +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 6018 "11111000" // VMOV wl11, wl9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6019 "00100010" // /* MW 3 */ + 6020 "11010011" // /* MW 2 */ + 6021 "00011101" // /* MW 1 */ +.src_ref 5 "vector.hpp" 142 95 +.src_ref 5 "vector.hpp" 1413 19 +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 6022 "11111000" // VMOV wh11, wl9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6023 "00100010" // /* MW 3 */ + 6024 "10010011" // /* MW 2 */ + 6025 "00011101" // /* MW 1 */ +.src_ref 5 "vector.hpp" 142 95 +.src_ref 5 "vector.hpp" 1413 19 +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6026 "11111000" // VMOV wh8, wl10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6027 "00100010" // /* MW 3 */ + 6028 "00010101" // /* MW 2 */ + 6029 "00011100" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 first + 6030 "00111000" // VSEL.32 x8, x11, x8, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6031 "00000000" // /* MW 3 */ + 6032 "01011100" // /* MW 2 */ + 6033 "00011100" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 + 6034 "00111000" // VSEL.32 x8, x1, x8, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6035 "00001000" // /* MW 3 */ + 6036 "00001100" // /* MW 2 */ + 6037 "00011100" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 + 6038 "00111000" // VSEL.32 x7, x8, x7, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6039 "10101000" // /* MW 3 */ + 6040 "11000011" // /* MW 2 */ + 6041 "00011011" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 + 6042 "11111000" // VMOV bmll0, x7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6043 "10010010" // /* MW 3 */ + 6044 "00001110" // /* MW 2 */ + 6045 "00011000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 + 6046 "11111000" // VMOV x9, x6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6047 "10010010" // /* MW 3 */ + 6048 "10101100" // /* MW 2 */ + 6049 "00011100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 first +.src_ref 6 "me_vmult_float_emulated.h" 108 19 first +.src_ref 6 "me_vmult_float_emulated.h" 109 21 first + 6050 "00000010" // VCONV.bf16.fp32 wl6, bmll0; VMOV bmll2, x7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6051 "01110000" // /* MW 7 */ + 6052 "01001001" // /* MW 6 */ + 6053 "00000111" // /* MW 5 */ + 6054 "00000001" // /* MW 4 */ + 6055 "11000000" // /* MW 3 */ + 6056 "00000010" // /* MW 2 */ + 6057 "01101000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 + 6058 "11111000" // VMOV x8, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6059 "10010010" // /* MW 3 */ + 6060 "00110010" // /* MW 2 */ + 6061 "00011100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 5 "vector.hpp" 1285 72 +.src_ref 5 "vector.hpp" 1285 72 first +.src_ref 5 "vector.hpp" 1289 16 + 6062 "01011010" // LSHL r29, r22, r28; MOV r27, r29; VMSC.f dm2, dm2, x6, x4, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6063 "10000011" // /* MW 9 */ + 6064 "01001100" // /* MW 8 */ + 6065 "00010010" // /* MW 7 */ + 6066 "00001111" // /* MW 6 */ + 6067 "11101010" // /* MW 5 */ + 6068 "11101101" // /* MW 4 */ + 6069 "11001101" // /* MW 3 */ + 6070 "10111011" // /* MW 2 */ + 6071 "00000101" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 120 9 first +.src_ref 5 "vector.hpp" 1285 72 +.src_ref 5 "vector.hpp" 1289 16 first +.aggressive_scheduled_block_id 12 +.aggressive_scheduled_block_id first + 6072 "01100010" // SEL.EQZ r19, r5, r29, r27; VMUL.f dm1, x6, x5, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6073 "10100001" // /* MW 7 */ + 6074 "11101100" // /* MW 6 */ + 6075 "00010001" // /* MW 5 */ + 6076 "10010001" // /* MW 4 */ + 6077 "00111110" // /* MW 3 */ + 6078 "00001011" // /* MW 2 */ + 6079 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 124 9 first +.src_ref 5 "vector.hpp" 1285 72 first +.src_ref 5 "vector.hpp" 1289 16 +.aggressive_scheduled_block_id 12 +.noswbrkpt + 6080 "01011010" // SEL.EQZ r18, r22, r30, r27; VMOV x6, x8; VMUL.f dm0, x6, x3, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6081 "01100001" // /* MW 9 */ + 6082 "11101100" // /* MW 8 */ + 6083 "00010000" // /* MW 7 */ + 6084 "00101111" // /* MW 6 */ + 6085 "00001001" // /* MW 5 */ + 6086 "00110011" // /* MW 4 */ + 6087 "11100010" // /* MW 3 */ + 6088 "10100101" // /* MW 2 */ + 6089 "00000101" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 125 9 first +.aggressive_scheduled_block_id 12 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6090 "01001000" // VMUL.f dm3, x6, x0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6091 "00000001" // /* MW 3 */ + 6092 "11101100" // /* MW 2 */ + 6093 "00010011" // /* MW 1 */ + 6094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6095 "00000000" // /* MW 1 */ + 6096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6097 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 109 21 first +.src_ref 6 "me_vmult_float_emulated.h" 110 6 first +.src_ref 6 "me_vmult_float_emulated.h" 110 19 first + 6098 "00011000" // VCONV.bf16.fp32 wl9, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6099 "00010110" // /* MW 3 */ + 6100 "11000001" // /* MW 2 */ + 6101 "00001100" // /* MW 1 */ + 6102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6103 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 111 34 first + 6104 "01001000" // VMSC.f dm2, dm2, x9, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6105 "10000011" // /* MW 3 */ + 6106 "01010010" // /* MW 2 */ + 6107 "00010010" // /* MW 1 */ + 6108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6109 "00000000" // /* MW 1 */ + 6110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6111 "00000000" // /* MW 1 */ + 6112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6113 "00000000" // /* MW 1 */ + 6114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6115 "00000000" // /* MW 1 */ + 6116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6117 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 + 6118 "00011000" // VCONV.bf16.fp32 wl8, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6119 "00010110" // /* MW 3 */ + 6120 "01000001" // /* MW 2 */ + 6121 "00001100" // /* MW 1 */ + 6122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6123 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 117 42 first + 6124 "01001000" // VMUL.f dm4, x8, x5, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6125 "10100001" // /* MW 3 */ + 6126 "11110000" // /* MW 2 */ + 6127 "00010100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 118 9 first + 6128 "01001000" // VMUL.f dm2, x8, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6129 "01100001" // /* MW 3 */ + 6130 "11110000" // /* MW 2 */ + 6131 "00010010" // /* MW 1 */ + 6132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6133 "00000000" // /* MW 1 */ + 6134 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6135 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 13 +.aggressive_scheduled_block_id first + 6136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6137 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 119 9 first +.aggressive_scheduled_block_id 13 +.noswbrkpt + 6138 "01001000" // VMUL.f dm2, x9, x5, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6139 "10100001" // /* MW 3 */ + 6140 "11110010" // /* MW 2 */ + 6141 "00010010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 117 42 first +.aggressive_scheduled_block_id 13 +.nohwbrkpt +.noswbrkpt + 6142 "11111000" // VMOV lfl0, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6143 "00010010" // /* MW 3 */ + 6144 "01110000" // /* MW 2 */ + 6145 "00011001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 118 6 first +.aggressive_scheduled_block_id 13 +.nohwbrkpt +.noswbrkpt + 6146 "01001000" // VADD.f dm4, dm4, dm2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6147 "00111101" // /* MW 3 */ + 6148 "10001000" // /* MW 2 */ + 6149 "00010100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 118 6 +.aggressive_scheduled_block_id 13 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6150 "11111000" // VMOV bmll4, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6151 "10010010" // /* MW 3 */ + 6152 "00000101" // /* MW 2 */ + 6153 "00011100" // /* MW 1 */ + 6154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6155 "00000000" // /* MW 1 */ + 6156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6157 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 14 +.aggressive_scheduled_block_id first + 6158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6159 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 122 9 first +.aggressive_scheduled_block_id 14 +.noswbrkpt + 6160 "01001000" // VMUL.f dm2, x0, x8, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6161 "00000001" // /* MW 3 */ + 6162 "11100001" // /* MW 2 */ + 6163 "00010010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 118 6 first +.aggressive_scheduled_block_id 14 +.nohwbrkpt +.noswbrkpt + 6164 "11111000" // VMOV lfh0, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6165 "00010010" // /* MW 3 */ + 6166 "01110000" // /* MW 2 */ + 6167 "00011000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 119 6 first +.aggressive_scheduled_block_id 14 +.nohwbrkpt +.noswbrkpt + 6168 "01001000" // VADD.f dm4, dm4, dm2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6169 "00111101" // /* MW 3 */ + 6170 "10001000" // /* MW 2 */ + 6171 "00010100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 119 6 +.aggressive_scheduled_block_id 14 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6172 "11111000" // VMOV bmll4, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6173 "10010010" // /* MW 3 */ + 6174 "00000001" // /* MW 2 */ + 6175 "00011100" // /* MW 1 */ + 6176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6177 "00000000" // /* MW 1 */ + 6178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6179 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 15 +.aggressive_scheduled_block_id first + 6180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6181 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 121 9 first +.aggressive_scheduled_block_id 15 +.noswbrkpt + 6182 "01001000" // VMUL.f dm1, x9, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6183 "01100001" // /* MW 3 */ + 6184 "11110010" // /* MW 2 */ + 6185 "00010001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 119 6 first +.aggressive_scheduled_block_id 15 +.nohwbrkpt +.noswbrkpt + 6186 "11111000" // VMOV lfl0, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6187 "00010010" // /* MW 3 */ + 6188 "01110000" // /* MW 2 */ + 6189 "00011001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 120 6 first +.aggressive_scheduled_block_id 15 +.nohwbrkpt +.noswbrkpt + 6190 "01001000" // VADD.f dm4, dm4, dm1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6191 "00111101" // /* MW 3 */ + 6192 "10000100" // /* MW 2 */ + 6193 "00010100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 120 6 +.aggressive_scheduled_block_id 15 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6194 "11111000" // VMOV bmll4, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6195 "10010010" // /* MW 3 */ + 6196 "00000101" // /* MW 2 */ + 6197 "00011100" // /* MW 1 */ + 6198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6199 "00000000" // /* MW 1 */ + 6200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6201 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 16 +.aggressive_scheduled_block_id first + 6202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6203 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 123 9 first +.aggressive_scheduled_block_id 16 +.noswbrkpt + 6204 "01001000" // VMUL.f dm1, x9, x0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6205 "00000001" // /* MW 3 */ + 6206 "11110010" // /* MW 2 */ + 6207 "00010001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 120 6 first +.aggressive_scheduled_block_id 16 +.nohwbrkpt +.noswbrkpt + 6208 "11111000" // VMOV lfh0, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6209 "00010010" // /* MW 3 */ + 6210 "01110000" // /* MW 2 */ + 6211 "00011000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 121 6 first +.aggressive_scheduled_block_id 16 +.nohwbrkpt +.noswbrkpt + 6212 "01001000" // VADD.f dm4, dm4, dm1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6213 "00111101" // /* MW 3 */ + 6214 "10000100" // /* MW 2 */ + 6215 "00010100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 121 6 +.aggressive_scheduled_block_id 16 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6216 "11111000" // VMOV bmll4, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6217 "10010010" // /* MW 3 */ + 6218 "00000001" // /* MW 2 */ + 6219 "00011100" // /* MW 1 */ + 6220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6221 "00000000" // /* MW 1 */ + 6222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6223 "00000000" // /* MW 1 */ + 6224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6225 "00000000" // /* MW 1 */ + 6226 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6227 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 121 6 +.aggressive_scheduled_block_id 17 +.aggressive_scheduled_block_id first + 6228 "11111000" // VMOV lfl1, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6229 "00010010" // /* MW 3 */ + 6230 "01110000" // /* MW 2 */ + 6231 "00011101" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 122 6 first +.aggressive_scheduled_block_id 17 +.noswbrkpt + 6232 "01001000" // VADD.f dm2, dm4, dm2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6233 "00111101" // /* MW 3 */ + 6234 "10001000" // /* MW 2 */ + 6235 "00010010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 122 6 +.aggressive_scheduled_block_id 17 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6236 "11111000" // VMOV bmll4, lfl1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6237 "10010010" // /* MW 3 */ + 6238 "00010101" // /* MW 2 */ + 6239 "00011100" // /* MW 1 */ + 6240 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6241 "00000000" // /* MW 1 */ + 6242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6243 "00000000" // /* MW 1 */ + 6244 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6245 "00000000" // /* MW 1 */ + 6246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6247 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 122 6 +.aggressive_scheduled_block_id 18 +.aggressive_scheduled_block_id first + 6248 "11111000" // VMOV lfh1, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6249 "00010010" // /* MW 3 */ + 6250 "01101000" // /* MW 2 */ + 6251 "00011100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 123 6 first +.aggressive_scheduled_block_id 18 +.noswbrkpt + 6252 "01001000" // VADD.f dm2, dm2, dm1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6253 "00111101" // /* MW 3 */ + 6254 "01000100" // /* MW 2 */ + 6255 "00010010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 123 6 +.aggressive_scheduled_block_id 18 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6256 "11111000" // VMOV bmll2, lfh1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6257 "10010010" // /* MW 3 */ + 6258 "00010001" // /* MW 2 */ + 6259 "00011010" // /* MW 1 */ + 6260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6261 "00000000" // /* MW 1 */ + 6262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6263 "00000000" // /* MW 1 */ + 6264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6265 "00000000" // /* MW 1 */ + 6266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6267 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 123 6 +.aggressive_scheduled_block_id 19 +.aggressive_scheduled_block_id first + 6268 "11111000" // VMOV lfl1, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6269 "00010010" // /* MW 3 */ + 6270 "01101000" // /* MW 2 */ + 6271 "00011101" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 124 6 first +.aggressive_scheduled_block_id 19 +.noswbrkpt + 6272 "01001000" // VADD.f dm0, dm1, dm0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6273 "00111101" // /* MW 3 */ + 6274 "00100000" // /* MW 2 */ + 6275 "00010000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 124 6 +.aggressive_scheduled_block_id 19 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6276 "11111000" // VMOV bmll1, lfl1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6277 "10010010" // /* MW 3 */ + 6278 "00010101" // /* MW 2 */ + 6279 "00011001" // /* MW 1 */ + 6280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6281 "00000000" // /* MW 1 */ + 6282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6283 "00000000" // /* MW 1 */ + 6284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6285 "00000000" // /* MW 1 */ + 6286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6287 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 124 6 +.aggressive_scheduled_block_id 20 +.aggressive_scheduled_block_id first + 6288 "11111000" // VMOV lfh1, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6289 "00010010" // /* MW 3 */ + 6290 "01100000" // /* MW 2 */ + 6291 "00011100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 125 6 first +.aggressive_scheduled_block_id 20 +.noswbrkpt + 6292 "01001000" // VADD.f dm0, dm0, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6293 "00111101" // /* MW 3 */ + 6294 "00001100" // /* MW 2 */ + 6295 "00010000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 125 6 +.aggressive_scheduled_block_id 20 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6296 "11111000" // VMOV bmll0, lfh1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6297 "10010010" // /* MW 3 */ + 6298 "00010001" // /* MW 2 */ + 6299 "00011000" // /* MW 1 */ + 6300 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6301 "00000000" // /* MW 1 */ + 6302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6303 "00000000" // /* MW 1 */ + 6304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6305 "00000000" // /* MW 1 */ + 6306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6307 "00000000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1286 72 first +.src_ref 7 "accum.hpp" 1108 103 first + 6308 "00011000" // VCONV.bf16.fp32 wl11, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6309 "00010110" // /* MW 3 */ + 6310 "11000000" // /* MW 2 */ + 6311 "00001101" // /* MW 1 */ + 6312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6313 "00000000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1286 41 + 6314 "11011000" // VSHIFT x11, x0, x11, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6315 "11111110" // /* MW 3 */ + 6316 "10000101" // /* MW 2 */ + 6317 "00011101" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1289 16 first + 6318 "00111000" // VSEL.8 x11, x10, x11, r19:r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6319 "11001100" // /* MW 3 */ + 6320 "11010101" // /* MW 2 */ + 6321 "00011101" // /* MW 1 */ + 6322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6323 "00000000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 57 98 first +.src_ref 5 "vector.hpp" 1292 26 first + 6324 "00110110" // NOPA; NOPB; VST wh11, [p7, #32]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6325 "01000001" // /* MW 11 */ + 6326 "01100101" // /* MW 10 */ + 6327 "10001011" // /* MW 9 */ + 6328 "00000011" // /* MW 8 */ + 6329 "00000000" // /* MW 7 */ + 6330 "00000000" // /* MW 6 */ + 6331 "00100000" // /* MW 5 */ + 6332 "00000000" // /* MW 4 */ + 6333 "11110000" // /* MW 3 */ + 6334 "00101100" // /* MW 2 */ + 6335 "00000000" // /* MW 1 */ +.label ZLE_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1488 +.src_ref 5 "vector.hpp" 57 98 +.src_ref 3 "reduce_mean_c8_impl.h" 268 19 first +.end_of_loop + 6336 "11100001" // NOPA; NOPB; VST wl11, [p7], m4; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6337 "00000000" // /* MW 15 */ + 6338 "00000000" // /* MW 14 */ + 6339 "01111000" // /* MW 13 */ + 6340 "10100101" // /* MW 12 */ + 6341 "00000001" // /* MW 11 */ + 6342 "00000000" // /* MW 10 */ + 6343 "00000000" // /* MW 9 */ + 6344 "10000000" // /* MW 8 */ + 6345 "11101010" // /* MW 7 */ + 6346 "10001010" // /* MW 6 */ + 6347 "00100111" // /* MW 5 */ + 6348 "00000000" // /* MW 4 */ + 6349 "11110000" // /* MW 3 */ + 6350 "00101100" // /* MW 2 */ + 6351 "00000000" // /* MW 1 */ +.loop_nesting 0 + 6352 "10000100" // J #6384 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6384 delay_slots=5 */ + 6353 "00000000" // /* MW 5 */ + 6354 "00000000" // /* MW 4 */ + 6355 "01111000" // /* MW 3 */ + 6356 "00001100" // /* MW 2 */ + 6357 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6359 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6361 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6363 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6365 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6367 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1520 + 6368 "01011100" // ST dn3, [sp, #-4]; MOVX vaddSign0, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6369 "10000000" // /* MW 5 */ + 6370 "10110100" // /* MW 4 */ + 6371 "10110000" // /* MW 3 */ + 6372 "10110100" // /* MW 2 */ + 6373 "11111111" // /* MW 1 */ + 6374 "01111010" // NOPA; ST lr, [sp, #-8]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6375 "00000000" // /* MW 9 */ + 6376 "00000000" // /* MW 8 */ + 6377 "00000000" // /* MW 7 */ + 6378 "10000000" // /* MW 6 */ + 6379 "00111101" // /* MW 5 */ + 6380 "11111000" // /* MW 4 */ + 6381 "11110111" // /* MW 3 */ + 6382 "00101100" // /* MW 2 */ + 6383 "00000000" // /* MW 1 */ +.label __ll133__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E +.src_ref 2 "reduce_base_c8.h" 352 30 first + 6384 "00011000" // ADD.NC p7, r3, #34 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6385 "10010001" // /* MW 3 */ + 6386 "01100001" // /* MW 2 */ + 6387 "00011111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 352 30 + 6388 "11010100" // LDA.u16 r3, [p7]; MOV crMCDEn, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6389 "11000001" // /* MW 5 */ + 6390 "01100100" // /* MW 4 */ + 6391 "01011011" // /* MW 3 */ + 6392 "10001111" // /* MW 2 */ + 6393 "11100000" // /* MW 1 */ +.aggressive_scheduled_block_id 21 +.aggressive_scheduled_block_id first + 6394 "11111000" // MOV crSCDEn, crMCDEn /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6395 "01100000" // /* MW 3 */ + 6396 "01111011" // /* MW 2 */ + 6397 "00011000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 352 30 +.aggressive_scheduled_block_id 21 +.noswbrkpt + 6398 "00011000" // ST.s16 r3, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6399 "01110111" // /* MW 3 */ + 6400 "00000100" // /* MW 2 */ + 6401 "00000111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 353 57 first +.aggressive_scheduled_block_id 21 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6402 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */ + 6403 "00000001" // /* MW 5 */ + 6404 "00000000" // /* MW 4 */ + 6405 "11111000" // /* MW 3 */ + 6406 "00010011" // /* MW 2 */ + 6407 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 21 +.nohwbrkpt +.noswbrkpt + 6408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6409 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 21 +.nohwbrkpt +.noswbrkpt + 6410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6411 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 21 +.nohwbrkpt +.noswbrkpt + 6412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6413 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 352 30 first +.delay_slot +.aggressive_scheduled_block_id 21 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6414 "00011000" // ADD r3, r3, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6415 "00000111" // /* MW 3 */ + 6416 "11000110" // /* MW 2 */ + 6417 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 352 30 +.delay_slot + 6418 "01111110" // NOPA; NOPB; NOPS; EXTEND.u16 r0, r3; NOPM /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 6419 "01100000" // /* MW 13 */ + 6420 "00101011" // /* MW 12 */ + 6421 "00000000" // /* MW 11 */ + 6422 "10101111" // /* MW 10 */ + 6423 "00110100" // /* MW 9 */ + 6424 "00000000" // /* MW 8 */ + 6425 "10110000" // /* MW 7 */ + 6426 "11000000" // /* MW 6 */ + 6427 "00100000" // /* MW 5 */ + 6428 "00000000" // /* MW 4 */ + 6429 "11110000" // /* MW 3 */ + 6430 "00101100" // /* MW 2 */ + 6431 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 420 4 +.return_address + 6432 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6433 "00111001" // /* MW 3 */ + 6434 "11111000" // /* MW 2 */ + 6435 "00000111" // /* MW 1 */ + 6436 "00011000" // LDA p1, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6437 "10011001" // /* MW 3 */ + 6438 "11111100" // /* MW 2 */ + 6439 "00000111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 353 23 first + 6440 "00011000" // ST.s16 r3, [p7, #10] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6441 "01110111" // /* MW 3 */ + 6442 "01010100" // /* MW 2 */ + 6443 "00000111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 420 4 first + 6444 "11000100" // PADDXM [sp], #-256 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6445 "00000001" // /* MW 5 */ + 6446 "00000000" // /* MW 4 */ + 6447 "00000000" // /* MW 3 */ + 6448 "11100000" // /* MW 2 */ + 6449 "11111111" // /* MW 1 */ + 6450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6451 "00000000" // /* MW 1 */ + 6452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6453 "00000000" // /* MW 1 */ + 6454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6455 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 420 4 + 6456 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 6457 "00000000" // /* MW 3 */ + 6458 "00101000" // /* MW 2 */ + 6459 "00010000" // /* MW 1 */ +.delay_slot + 6460 "11111000" // MOV p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6461 "11000000" // /* MW 3 */ + 6462 "01100010" // /* MW 2 */ + 6463 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6465 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6467 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6469 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6470 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6471 "01111110" // /* MW 9 */ + 6472 "10100101" // /* MW 8 */ + 6473 "00000001" // /* MW 7 */ + 6474 "00000000" // /* MW 6 */ + 6475 "00010000" // /* MW 5 */ + 6476 "00000000" // /* MW 4 */ + 6477 "11110000" // /* MW 3 */ + 6478 "00101100" // /* MW 2 */ + 6479 "00000000" // /* MW 1 */ +.label __ll135__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E +.src_ref 5 "blend.hpp" 163 48 + 6480 "10111010" // MOVA r20, #255; J #5568 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=5568 delay_slots=5 */ + 6481 "00100000" // /* MW 9 */ + 6482 "00000000" // /* MW 8 */ + 6483 "00000000" // /* MW 7 */ + 6484 "10111000" // /* MW 6 */ + 6485 "00000010" // /* MW 5 */ + 6486 "00000000" // /* MW 4 */ + 6487 "00000000" // /* MW 3 */ + 6488 "11110100" // /* MW 2 */ + 6489 "00011111" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 +.delay_slot + 6490 "00011000" // MOVX r21, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6491 "00000001" // /* MW 3 */ + 6492 "00101010" // /* MW 2 */ + 6493 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6495 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6497 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6499 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6500 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6501 "10000001" // /* MW 11 */ + 6502 "10101101" // /* MW 10 */ + 6503 "00000000" // /* MW 9 */ + 6504 "00000000" // /* MW 8 */ + 6505 "00000000" // /* MW 7 */ + 6506 "00000000" // /* MW 6 */ + 6507 "00100000" // /* MW 5 */ + 6508 "00000000" // /* MW 4 */ + 6509 "11110000" // /* MW 3 */ + 6510 "00101100" // /* MW 2 */ + 6511 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1664 +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 6512 "00011000" // MOVX r5, #5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6513 "00010101" // /* MW 3 */ + 6514 "00001010" // /* MW 2 */ + 6515 "00010000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 first + 6516 "10011000" // EQ r5, r5, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6517 "01100111" // /* MW 3 */ + 6518 "01001010" // /* MW 2 */ + 6519 "00010001" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 6520 "10000100" // JNZ r5, #7264 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7264 delay_slots=5 */ + 6521 "00000001" // /* MW 5 */ + 6522 "01000000" // /* MW 4 */ + 6523 "00110000" // /* MW 3 */ + 6524 "00001110" // /* MW 2 */ + 6525 "00101000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6527 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6529 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6531 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6533 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6535 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 6536 "00011000" // MOVX r7, #6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6537 "00011001" // /* MW 3 */ + 6538 "00001110" // /* MW 2 */ + 6539 "00010000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 6540 "10011000" // EQ r7, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6541 "01100111" // /* MW 3 */ + 6542 "11001110" // /* MW 2 */ + 6543 "00010001" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 6544 "10000100" // JNZ r7, #7504 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7504 delay_slots=5 */ + 6545 "00000001" // /* MW 5 */ + 6546 "01000000" // /* MW 4 */ + 6547 "10101000" // /* MW 3 */ + 6548 "00001110" // /* MW 2 */ + 6549 "00111000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 200 49 +.delay_slot + 6550 "00011000" // MOVX r5, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6551 "01000001" // /* MW 3 */ + 6552 "00001010" // /* MW 2 */ + 6553 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6555 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6557 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6558 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6559 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6560 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6561 "00000000" // /* MW 15 */ + 6562 "00000000" // /* MW 14 */ + 6563 "01111000" // /* MW 13 */ + 6564 "10100101" // /* MW 12 */ + 6565 "00000001" // /* MW 11 */ + 6566 "00000000" // /* MW 10 */ + 6567 "00000000" // /* MW 9 */ + 6568 "00000000" // /* MW 8 */ + 6569 "01011011" // /* MW 7 */ + 6570 "00000001" // /* MW 6 */ + 6571 "00100000" // /* MW 5 */ + 6572 "00000000" // /* MW 4 */ + 6573 "11110000" // /* MW 3 */ + 6574 "00101100" // /* MW 2 */ + 6575 "00000000" // /* MW 1 */ +.label __ll67__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E +.src_ref 3 "reduce_mean_c8_impl.h" 200 65 first +.src_ref 3 "reduce_mean_c8_impl.h" 202 30 + 6576 "10111010" // LDA.s16 r7, [p2, dj2]; MOVX r17, #7; MOV dc4, lr /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6577 "01111000" // /* MW 9 */ + 6578 "11110000" // /* MW 8 */ + 6579 "01100000" // /* MW 7 */ + 6580 "11101010" // /* MW 6 */ + 6581 "00010000" // /* MW 5 */ + 6582 "00000001" // /* MW 4 */ + 6583 "01010000" // /* MW 3 */ + 6584 "00011110" // /* MW 2 */ + 6585 "01001000" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 +.src_ref 3 "reduce_mean_c8_impl.h" 202 30 first + 6586 "01100100" // NE r6, r17, r6; MOV r17, #257 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6587 "00000101" // /* MW 5 */ + 6588 "10100100" // /* MW 4 */ + 6589 "00011000" // /* MW 3 */ + 6590 "10001101" // /* MW 2 */ + 6591 "10001001" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 202 12 + 6592 "10000100" // JNZ r6, #7232 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7232 delay_slots=5 */ + 6593 "00000001" // /* MW 5 */ + 6594 "01000000" // /* MW 4 */ + 6595 "00100000" // /* MW 3 */ + 6596 "00001110" // /* MW 2 */ + 6597 "00110000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6598 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6599 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6601 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6603 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6605 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 200 49 first +.delay_slot + 6606 "10011000" // ASHL r5, r7, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6607 "01011110" // /* MW 3 */ + 6608 "11001010" // /* MW 2 */ + 6609 "00010001" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 7 "accum.hpp" 199 120 +.src_ref 3 "reduce_mean_c8_impl.h" 206 9 first +.src_ref 3 "reduce_mean_c8_impl.h" 206 35 +.src_ref 3 "reduce_mean_c8_impl.h" 209 22 + 6610 "01110110" // MOVA dj2, #36; MOVS p0, p1; MOVXM ls, #6672 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6611 "00010000" // /* MW 11 */ + 6612 "00001000" // /* MW 10 */ + 6613 "01111101" // /* MW 9 */ + 6614 "00000100" // /* MW 8 */ + 6615 "00000000" // /* MW 7 */ + 6616 "00000000" // /* MW 6 */ + 6617 "10001011" // /* MW 5 */ + 6618 "10000100" // /* MW 4 */ + 6619 "10000000" // /* MW 3 */ + 6620 "10001010" // /* MW 2 */ + 6621 "00000100" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 206 9 +.src_ref 3 "reduce_mean_c8_impl.h" 206 35 + 6622 "01110110" // LDA r7, [p2, dj2]; ST dn3, [sp, #-4]; MOVXM le, #6768 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6623 "00010000" // /* MW 11 */ + 6624 "00111000" // /* MW 10 */ + 6625 "10111101" // /* MW 9 */ + 6626 "00000101" // /* MW 8 */ + 6627 "00000000" // /* MW 7 */ + 6628 "10000000" // /* MW 6 */ + 6629 "10100101" // /* MW 5 */ + 6630 "11111101" // /* MW 4 */ + 6631 "11010111" // /* MW 3 */ + 6632 "00011110" // /* MW 2 */ + 6633 "01001000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 199 120 first +.src_ref 3 "reduce_mean_c8_impl.h" 209 22 first + 6634 "10011000" // VLDA bmll2, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6635 "00010101" // /* MW 3 */ + 6636 "00011101" // /* MW 2 */ + 6637 "00000000" // /* MW 1 */ + 6638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6639 "00000000" // /* MW 1 */ +.src_ref 7 "accum.hpp" 199 120 + 6640 "11111000" // VMOV bmhh4, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6641 "10010010" // /* MW 3 */ + 6642 "11000010" // /* MW 2 */ + 6643 "00011100" // /* MW 1 */ + 6644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6645 "00000000" // /* MW 1 */ +.src_ref 7 "accum.hpp" 199 120 +.src_ref 5 "add.hpp" 28 49 first + 6646 "01100010" // VMOV bmll3, bmhh4; VADD.f dm1, dm3, dm2, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6647 "00111101" // /* MW 7 */ + 6648 "01101000" // /* MW 6 */ + 6649 "00010001" // /* MW 5 */ + 6650 "11100110" // /* MW 4 */ + 6651 "00010010" // /* MW 3 */ + 6652 "00010011" // /* MW 2 */ + 6653 "00000011" // /* MW 1 */ + 6654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6655 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 206 9 first + 6656 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC lc, r7, #-1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6657 "00000000" // /* MW 15 */ + 6658 "00000000" // /* MW 14 */ + 6659 "11001000" // /* MW 13 */ + 6660 "11111111" // /* MW 12 */ + 6661 "10111001" // /* MW 11 */ + 6662 "00000010" // /* MW 10 */ + 6663 "00000000" // /* MW 9 */ + 6664 "00000000" // /* MW 8 */ + 6665 "01011011" // /* MW 7 */ + 6666 "00000001" // /* MW 6 */ + 6667 "00100000" // /* MW 5 */ + 6668 "00000000" // /* MW 4 */ + 6669 "11110000" // /* MW 3 */ + 6670 "00101100" // /* MW 2 */ + 6671 "00000000" // /* MW 1 */ +.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1824 +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 199 120 first +.src_ref 3 "reduce_mean_c8_impl.h" 209 22 first +.begin_of_loop +.loop_nesting 1 + 6672 "11100001" // VLDA bmll2, [p0], #64; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6673 "00000000" // /* MW 15 */ + 6674 "00000000" // /* MW 14 */ + 6675 "01111000" // /* MW 13 */ + 6676 "10100101" // /* MW 12 */ + 6677 "00000001" // /* MW 11 */ + 6678 "00000000" // /* MW 10 */ + 6679 "00000000" // /* MW 9 */ + 6680 "00000000" // /* MW 8 */ + 6681 "01011011" // /* MW 7 */ + 6682 "00000001" // /* MW 6 */ + 6683 "00100000" // /* MW 5 */ + 6684 "00000000" // /* MW 4 */ + 6685 "10110000" // /* MW 3 */ + 6686 "10100010" // /* MW 2 */ + 6687 "00000011" // /* MW 1 */ + 6688 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6689 "00000000" // /* MW 15 */ + 6690 "00000000" // /* MW 14 */ + 6691 "01111000" // /* MW 13 */ + 6692 "10100101" // /* MW 12 */ + 6693 "00000001" // /* MW 11 */ + 6694 "00000000" // /* MW 10 */ + 6695 "00000000" // /* MW 9 */ + 6696 "00000000" // /* MW 8 */ + 6697 "01011011" // /* MW 7 */ + 6698 "00000001" // /* MW 6 */ + 6699 "00100000" // /* MW 5 */ + 6700 "00000000" // /* MW 4 */ + 6701 "11110000" // /* MW 3 */ + 6702 "00101100" // /* MW 2 */ + 6703 "00000000" // /* MW 1 */ + 6704 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6705 "00000000" // /* MW 15 */ + 6706 "00000000" // /* MW 14 */ + 6707 "01111000" // /* MW 13 */ + 6708 "10100101" // /* MW 12 */ + 6709 "00000001" // /* MW 11 */ + 6710 "00000000" // /* MW 10 */ + 6711 "00000000" // /* MW 9 */ + 6712 "00000000" // /* MW 8 */ + 6713 "01011011" // /* MW 7 */ + 6714 "00000001" // /* MW 6 */ + 6715 "00100000" // /* MW 5 */ + 6716 "00000000" // /* MW 4 */ + 6717 "11110000" // /* MW 3 */ + 6718 "00101100" // /* MW 2 */ + 6719 "00000000" // /* MW 1 */ + 6720 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6721 "00000000" // /* MW 15 */ + 6722 "00000000" // /* MW 14 */ + 6723 "01111000" // /* MW 13 */ + 6724 "10100101" // /* MW 12 */ + 6725 "00000001" // /* MW 11 */ + 6726 "00000000" // /* MW 10 */ + 6727 "00000000" // /* MW 9 */ + 6728 "00000000" // /* MW 8 */ + 6729 "01011011" // /* MW 7 */ + 6730 "00000001" // /* MW 6 */ + 6731 "00100000" // /* MW 5 */ + 6732 "00000000" // /* MW 4 */ + 6733 "11110000" // /* MW 3 */ + 6734 "00101100" // /* MW 2 */ + 6735 "00000000" // /* MW 1 */ +.src_ref 7 "accum.hpp" 150 115 first +.aggressive_scheduled_block_id 22 +.aggressive_scheduled_block_id first + 6736 "11100001" // NOPA; NOPB; NOPS; NOPX; VMOV bmhh4, bmll1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6737 "00000000" // /* MW 15 */ + 6738 "00000000" // /* MW 14 */ + 6739 "01111000" // /* MW 13 */ + 6740 "00001001" // /* MW 12 */ + 6741 "01100010" // /* MW 11 */ + 6742 "00000010" // /* MW 10 */ + 6743 "00000000" // /* MW 9 */ + 6744 "00000000" // /* MW 8 */ + 6745 "01011011" // /* MW 7 */ + 6746 "00000001" // /* MW 6 */ + 6747 "00100000" // /* MW 5 */ + 6748 "00000000" // /* MW 4 */ + 6749 "11110000" // /* MW 3 */ + 6750 "00101100" // /* MW 2 */ + 6751 "00000000" // /* MW 1 */ +.src_ref 5 "add.hpp" 28 49 first +.aggressive_scheduled_block_id 22 +.noswbrkpt + 6752 "11101011" // NOPA; NOPB; NOPS; NOPX; NOPM; VADD.f dm1, dm3, dm2, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6753 "01000001" // /* MW 15 */ + 6754 "10001011" // /* MW 14 */ + 6755 "01111000" // /* MW 13 */ + 6756 "10100101" // /* MW 12 */ + 6757 "00000001" // /* MW 11 */ + 6758 "00000000" // /* MW 10 */ + 6759 "00000000" // /* MW 9 */ + 6760 "00000000" // /* MW 8 */ + 6761 "01011011" // /* MW 7 */ + 6762 "00000001" // /* MW 6 */ + 6763 "00100000" // /* MW 5 */ + 6764 "00000000" // /* MW 4 */ + 6765 "11110000" // /* MW 3 */ + 6766 "00101100" // /* MW 2 */ + 6767 "00000000" // /* MW 1 */ +.label ZLE_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1920 +.src_ref 7 "accum.hpp" 199 120 first +.end_of_loop +.aggressive_scheduled_block_id 22 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6768 "11100001" // NOPA; NOPB; NOPS; NOPX; VMOV bmll3, bmhh4; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6769 "00000000" // /* MW 15 */ + 6770 "00000000" // /* MW 14 */ + 6771 "01111000" // /* MW 13 */ + 6772 "10001001" // /* MW 12 */ + 6773 "10001001" // /* MW 11 */ + 6774 "00000001" // /* MW 10 */ + 6775 "00000000" // /* MW 9 */ + 6776 "00000000" // /* MW 8 */ + 6777 "01011011" // /* MW 7 */ + 6778 "00000001" // /* MW 6 */ + 6779 "00100000" // /* MW 5 */ + 6780 "00000000" // /* MW 4 */ + 6781 "11110000" // /* MW 3 */ + 6782 "00101100" // /* MW 2 */ + 6783 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 5 "add_reduce.hpp" 322 47 +.aggressive_scheduled_block_id 23 +.aggressive_scheduled_block_id first +.loop_nesting 0 + 6784 "10111010" // MOVA r16, #16; MOVXM p7, #509168 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6785 "00010000" // /* MW 9 */ + 6786 "01111000" // /* MW 8 */ + 6787 "10110010" // /* MW 7 */ + 6788 "11110011" // /* MW 6 */ + 6789 "00000001" // /* MW 5 */ + 6790 "00000000" // /* MW 4 */ + 6791 "00000000" // /* MW 3 */ + 6792 "00010000" // /* MW 2 */ + 6793 "00000010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 19 first +.src_ref 5 "vector.hpp" 915 23 +.src_ref 5 "add_reduce.hpp" 322 47 +.aggressive_scheduled_block_id 23 +.noswbrkpt + 6794 "10111010" // LDA.s8 r4, [p7]; MOVX r6, #8; MOV vaddSign0, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6795 "01011000" // /* MW 9 */ + 6796 "00000001" // /* MW 8 */ + 6797 "10011000" // /* MW 7 */ + 6798 "00001000" // /* MW 6 */ + 6799 "01100001" // /* MW 5 */ + 6800 "00000000" // /* MW 4 */ + 6801 "01010000" // /* MW 3 */ + 6802 "10010000" // /* MW 2 */ + 6803 "11100000" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 +.aggressive_scheduled_block_id 23 +.nohwbrkpt +.noswbrkpt + 6804 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6805 "00000101" // /* MW 3 */ + 6806 "00100010" // /* MW 2 */ + 6807 "00010000" // /* MW 1 */ +.aggressive_scheduled_block_id 23 +.nohwbrkpt +.noswbrkpt + 6808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6809 "00000000" // /* MW 1 */ +.src_ref 7 "accum.hpp" 150 115 first +.aggressive_scheduled_block_id 23 +.nohwbrkpt +.noswbrkpt + 6810 "11111000" // VMOV bmhh4, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6811 "00010010" // /* MW 3 */ + 6812 "11000100" // /* MW 2 */ + 6813 "00011100" // /* MW 1 */ +.aggressive_scheduled_block_id 23 +.nohwbrkpt +.noswbrkpt + 6814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6815 "00000000" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 +.aggressive_scheduled_block_id 23 +.nohwbrkpt +.noswbrkpt + 6816 "11111000" // VMOV x2, bmhh4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6817 "00010010" // /* MW 3 */ + 6818 "00110011" // /* MW 2 */ + 6819 "00011001" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.aggressive_scheduled_block_id 23 +.nohwbrkpt +.noswbrkpt + 6820 "11011000" // VSHIFT x2, x2, x0, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6821 "00010010" // /* MW 3 */ + 6822 "00010000" // /* MW 2 */ + 6823 "00011001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 112 6 +.src_ref 6 "me_vmult_float_emulated.h" 112 19 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 114 6 +.src_ref 6 "me_vmult_float_emulated.h" 114 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 6 +.src_ref 6 "me_vmult_float_emulated.h" 115 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 1108 103 +.src_ref 5 "add_reduce.hpp" 324 44 first +.aggressive_scheduled_block_id 23 +.noswbrkpt + 6824 "01011010" // MOVX crRnd, r4; VMOV bmll0, x2; VADD.f dm0, dm2, dm0, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6825 "00111101" // /* MW 9 */ + 6826 "01000000" // /* MW 8 */ + 6827 "00010000" // /* MW 7 */ + 6828 "00101111" // /* MW 6 */ + 6829 "01001001" // /* MW 5 */ + 6830 "00000000" // /* MW 4 */ + 6831 "10000000" // /* MW 3 */ + 6832 "00111010" // /* MW 2 */ + 6833 "00000001" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 +.aggressive_scheduled_block_id 23 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6834 "11111000" // VMOV bmll2, bmhh4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6835 "00010010" // /* MW 3 */ + 6836 "00010011" // /* MW 2 */ + 6837 "00011010" // /* MW 1 */ +.src_ref 5 "broadcast.hpp" 80 25 first + 6838 "11111000" // VBCST.32 x2, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6839 "01110010" // /* MW 3 */ + 6840 "00010110" // /* MW 2 */ + 6841 "00011001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 112 6 +.src_ref 6 "me_vmult_float_emulated.h" 112 19 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 + 6842 "11111000" // VMOV bmll1, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6843 "10010010" // /* MW 3 */ + 6844 "00000100" // /* MW 2 */ + 6845 "00011001" // /* MW 1 */ + 6846 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6847 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 113 21 first + 6848 "11111000" // VMOV bmll2, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6849 "00010010" // /* MW 3 */ + 6850 "00000100" // /* MW 2 */ + 6851 "00011010" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first +.aggressive_scheduled_block_id 24 +.aggressive_scheduled_block_id first + 6852 "11111000" // VMOV x2, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6853 "00010010" // /* MW 3 */ + 6854 "00100000" // /* MW 2 */ + 6855 "00011001" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.src_ref 5 "add_reduce.hpp" 324 44 +.aggressive_scheduled_block_id 24 +.noswbrkpt + 6856 "01100010" // VSHIFT x2, x2, x0, r16; VADD.f dm0, dm0, dm3, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6857 "00111101" // /* MW 7 */ + 6858 "00001100" // /* MW 6 */ + 6859 "00010000" // /* MW 5 */ + 6860 "11000110" // /* MW 4 */ + 6861 "01000010" // /* MW 3 */ + 6862 "00010000" // /* MW 2 */ + 6863 "00000001" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 24 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6864 "11111000" // VMOV bmll3, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6865 "10010010" // /* MW 3 */ + 6866 "00000100" // /* MW 2 */ + 6867 "00011011" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 114 6 +.src_ref 6 "me_vmult_float_emulated.h" 114 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 6 +.src_ref 6 "me_vmult_float_emulated.h" 115 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 + 6868 "11111000" // VMOV x2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6869 "10010010" // /* MW 3 */ + 6870 "00100000" // /* MW 2 */ + 6871 "00011001" // /* MW 1 */ + 6872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6873 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 112 6 first +.src_ref 6 "me_vmult_float_emulated.h" 112 19 first + 6874 "00011000" // VCONV.bf16.fp32 wl0, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6875 "10010110" // /* MW 3 */ + 6876 "01000000" // /* MW 2 */ + 6877 "00001000" // /* MW 1 */ + 6878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6879 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 113 21 first +.src_ref 5 "add_reduce.hpp" 324 22 first +.aggressive_scheduled_block_id 25 +.aggressive_scheduled_block_id first + 6880 "01100010" // VMOV x3, bmll0; VMSC.f dm4, dm2, x0, x4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6881 "10000011" // /* MW 7 */ + 6882 "01000000" // /* MW 6 */ + 6883 "00010100" // /* MW 5 */ + 6884 "11100110" // /* MW 4 */ + 6885 "00010010" // /* MW 3 */ + 6886 "10100000" // /* MW 2 */ + 6887 "00000001" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.src_ref 5 "add_reduce.hpp" 324 44 +.aggressive_scheduled_block_id 25 +.noswbrkpt + 6888 "01100010" // VSHIFT x3, x3, x0, r6; VADD.f dm0, dm0, dm2, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6889 "00111101" // /* MW 7 */ + 6890 "00001000" // /* MW 6 */ + 6891 "00010000" // /* MW 5 */ + 6892 "11000110" // /* MW 4 */ + 6893 "00011010" // /* MW 3 */ + 6894 "10011000" // /* MW 2 */ + 6895 "00000001" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 25 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6896 "11111000" // VMOV bmll2, x3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6897 "10010010" // /* MW 3 */ + 6898 "00000110" // /* MW 2 */ + 6899 "00011010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 115 6 +.src_ref 6 "me_vmult_float_emulated.h" 115 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 + 6900 "11111000" // VMOV x3, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6901 "10010010" // /* MW 3 */ + 6902 "10100100" // /* MW 2 */ + 6903 "00011001" // /* MW 1 */ + 6904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6905 "00000000" // /* MW 1 */ + 6906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6907 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 113 21 first +.src_ref 6 "me_vmult_float_emulated.h" 114 6 first +.src_ref 6 "me_vmult_float_emulated.h" 114 19 first + 6908 "00011000" // VCONV.bf16.fp32 wl2, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6909 "00010110" // /* MW 3 */ + 6910 "01000010" // /* MW 2 */ + 6911 "00001001" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first +.aggressive_scheduled_block_id 26 +.aggressive_scheduled_block_id first + 6912 "11111000" // VMOV x5, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6913 "00010010" // /* MW 3 */ + 6914 "10100000" // /* MW 2 */ + 6915 "00011010" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.src_ref 5 "add_reduce.hpp" 324 44 +.aggressive_scheduled_block_id 26 +.noswbrkpt + 6916 "01100010" // VSHIFT x6, x5, x0, r0; VADD.f dm0, dm0, dm2, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6917 "00111101" // /* MW 7 */ + 6918 "00001000" // /* MW 6 */ + 6919 "00010000" // /* MW 5 */ + 6920 "11000110" // /* MW 4 */ + 6921 "00000010" // /* MW 3 */ + 6922 "00101000" // /* MW 2 */ + 6923 "00000011" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 26 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6924 "11111000" // VMOV bmll2, x6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6925 "10010010" // /* MW 3 */ + 6926 "00001100" // /* MW 2 */ + 6927 "00011010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 + 6928 "11111000" // VMOV x5, x3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6929 "10010010" // /* MW 3 */ + 6930 "10100110" // /* MW 2 */ + 6931 "00011010" // /* MW 1 */ + 6932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6933 "00000000" // /* MW 1 */ + 6934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6935 "00000000" // /* MW 1 */ + 6936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6937 "00000000" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first + 6938 "11111000" // VMOV x6, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6939 "00010010" // /* MW 3 */ + 6940 "00100000" // /* MW 2 */ + 6941 "00011011" // /* MW 1 */ +.src_ref 5 "vector.hpp" 915 23 first + 6942 "10111000" // VEXTRACT.32 r0, x6, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6943 "00000001" // /* MW 3 */ + 6944 "00011010" // /* MW 2 */ + 6945 "00011000" // /* MW 1 */ + 6946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6947 "00000000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 856 23 first + 6948 "01111000" // VINSERT.32 x6, x0, #0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6949 "00010001" // /* MW 3 */ + 6950 "00000000" // /* MW 2 */ + 6951 "00011011" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 first + 6952 "00111000" // VSEL.32 x1, x1, x6, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6953 "00001000" // /* MW 3 */ + 6954 "10001011" // /* MW 2 */ + 6955 "00011000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 + 6956 "11111000" // VMOV bmll2, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6957 "10010010" // /* MW 3 */ + 6958 "00000010" // /* MW 2 */ + 6959 "00011010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 + 6960 "11111000" // VMOV x1, x5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6961 "10010010" // /* MW 3 */ + 6962 "10101010" // /* MW 2 */ + 6963 "00011000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 first +.src_ref 6 "me_vmult_float_emulated.h" 108 19 first +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 + 6964 "00000010" // VCONV.bf16.fp32 wl5, bmll2; VMOV x6, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6965 "01110000" // /* MW 7 */ + 6966 "01001001" // /* MW 6 */ + 6967 "10010001" // /* MW 5 */ + 6968 "00000001" // /* MW 4 */ + 6969 "11000000" // /* MW 3 */ + 6970 "00100010" // /* MW 2 */ + 6971 "01011000" // /* MW 1 */ + 6972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6973 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 109 21 first + 6974 "01001000" // VMSC.f dm1, dm2, x5, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6975 "10000011" // /* MW 3 */ + 6976 "01001010" // /* MW 2 */ + 6977 "00010001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 124 9 first + 6978 "01001000" // VMUL.f dm0, x5, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6979 "01000001" // /* MW 3 */ + 6980 "11101010" // /* MW 2 */ + 6981 "00010000" // /* MW 1 */ + 6982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6983 "00000000" // /* MW 1 */ + 6984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6985 "00000000" // /* MW 1 */ + 6986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6987 "00000000" // /* MW 1 */ + 6988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6989 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 109 21 first +.src_ref 6 "me_vmult_float_emulated.h" 110 6 first +.src_ref 6 "me_vmult_float_emulated.h" 110 19 first + 6990 "00011000" // VCONV.bf16.fp32 wl1, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6991 "10010110" // /* MW 3 */ + 6992 "11000000" // /* MW 2 */ + 6993 "00001000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 115 34 first + 6994 "01001000" // VMSC.f dm4, dm4, x2, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6995 "10000011" // /* MW 3 */ + 6996 "10000100" // /* MW 2 */ + 6997 "00010100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 111 34 first + 6998 "01001000" // VMSC.f dm3, dm1, x1, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6999 "10000011" // /* MW 3 */ + 7000 "00100010" // /* MW 2 */ + 7001 "00010011" // /* MW 1 */ + 7002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7003 "00000000" // /* MW 1 */ + 7004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7005 "00000000" // /* MW 1 */ + 7006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7007 "00000000" // /* MW 1 */ + 7008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7009 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 115 6 first +.src_ref 6 "me_vmult_float_emulated.h" 115 19 first +.src_ref 6 "me_vmult_float_emulated.h" 115 34 first + 7010 "00011000" // VCONV.bf16.fp32 wl3, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7011 "00010110" // /* MW 3 */ + 7012 "11000010" // /* MW 2 */ + 7013 "00001001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 111 6 first +.src_ref 6 "me_vmult_float_emulated.h" 111 19 first +.src_ref 6 "me_vmult_float_emulated.h" 111 34 first + 7014 "00011000" // VCONV.bf16.fp32 wl6, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7015 "10010110" // /* MW 3 */ + 7016 "01000001" // /* MW 2 */ + 7017 "00001011" // /* MW 1 */ + 7018 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7019 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 117 42 first + 7020 "01001000" // VMUL.f dm2, x6, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7021 "01100001" // /* MW 3 */ + 7022 "11101100" // /* MW 2 */ + 7023 "00010010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 118 9 first + 7024 "01001000" // VMUL.f dm3, x6, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7025 "01000001" // /* MW 3 */ + 7026 "11101100" // /* MW 2 */ + 7027 "00010011" // /* MW 1 */ + 7028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7029 "00000000" // /* MW 1 */ + 7030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7031 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 27 +.aggressive_scheduled_block_id first + 7032 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7033 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 119 9 first +.aggressive_scheduled_block_id 27 +.noswbrkpt + 7034 "01001000" // VMUL.f dm3, x1, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7035 "01100001" // /* MW 3 */ + 7036 "11100010" // /* MW 2 */ + 7037 "00010011" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 117 42 first +.aggressive_scheduled_block_id 27 +.nohwbrkpt +.noswbrkpt + 7038 "11111000" // VMOV lfl0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7039 "00010010" // /* MW 3 */ + 7040 "01101000" // /* MW 2 */ + 7041 "00011001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 118 6 first +.aggressive_scheduled_block_id 27 +.nohwbrkpt +.noswbrkpt + 7042 "01001000" // VADD.f dm2, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7043 "00111101" // /* MW 3 */ + 7044 "01001100" // /* MW 2 */ + 7045 "00010010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 118 6 +.aggressive_scheduled_block_id 27 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7046 "11111000" // VMOV bmll2, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7047 "10010010" // /* MW 3 */ + 7048 "00000101" // /* MW 2 */ + 7049 "00011010" // /* MW 1 */ + 7050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7051 "00000000" // /* MW 1 */ + 7052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7053 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 28 +.aggressive_scheduled_block_id first + 7054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7055 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 120 9 first +.aggressive_scheduled_block_id 28 +.noswbrkpt + 7056 "01001000" // VMUL.f dm3, x5, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7057 "01100001" // /* MW 3 */ + 7058 "11101010" // /* MW 2 */ + 7059 "00010011" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 118 6 first +.aggressive_scheduled_block_id 28 +.nohwbrkpt +.noswbrkpt + 7060 "11111000" // VMOV lfh0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7061 "00010010" // /* MW 3 */ + 7062 "01101000" // /* MW 2 */ + 7063 "00011000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 119 6 first +.aggressive_scheduled_block_id 28 +.nohwbrkpt +.noswbrkpt + 7064 "01001000" // VADD.f dm2, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7065 "00111101" // /* MW 3 */ + 7066 "01001100" // /* MW 2 */ + 7067 "00010010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 119 6 +.aggressive_scheduled_block_id 28 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7068 "11111000" // VMOV bmll2, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7069 "10010010" // /* MW 3 */ + 7070 "00000001" // /* MW 2 */ + 7071 "00011010" // /* MW 1 */ + 7072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7073 "00000000" // /* MW 1 */ + 7074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7075 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 29 +.aggressive_scheduled_block_id first + 7076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7077 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 121 9 first +.aggressive_scheduled_block_id 29 +.noswbrkpt + 7078 "01001000" // VMUL.f dm3, x1, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7079 "01000001" // /* MW 3 */ + 7080 "11100010" // /* MW 2 */ + 7081 "00010011" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 119 6 first +.aggressive_scheduled_block_id 29 +.nohwbrkpt +.noswbrkpt + 7082 "11111000" // VMOV lfl0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7083 "00010010" // /* MW 3 */ + 7084 "01101000" // /* MW 2 */ + 7085 "00011001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 120 6 first +.aggressive_scheduled_block_id 29 +.nohwbrkpt +.noswbrkpt + 7086 "01001000" // VADD.f dm2, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7087 "00111101" // /* MW 3 */ + 7088 "01001100" // /* MW 2 */ + 7089 "00010010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 120 6 +.aggressive_scheduled_block_id 29 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7090 "11111000" // VMOV bmll2, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7091 "10010010" // /* MW 3 */ + 7092 "00000101" // /* MW 2 */ + 7093 "00011010" // /* MW 1 */ + 7094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7095 "00000000" // /* MW 1 */ + 7096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7097 "00000000" // /* MW 1 */ + 7098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7099 "00000000" // /* MW 1 */ + 7100 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7101 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 120 6 +.aggressive_scheduled_block_id 30 +.aggressive_scheduled_block_id first + 7102 "11111000" // VMOV lfh0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7103 "00010010" // /* MW 3 */ + 7104 "01101000" // /* MW 2 */ + 7105 "00011000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 121 6 first +.aggressive_scheduled_block_id 30 +.noswbrkpt + 7106 "01001000" // VADD.f dm2, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7107 "00111101" // /* MW 3 */ + 7108 "01001100" // /* MW 2 */ + 7109 "00010010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 121 6 +.aggressive_scheduled_block_id 30 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7110 "11111000" // VMOV bmll2, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7111 "10010010" // /* MW 3 */ + 7112 "00000001" // /* MW 2 */ + 7113 "00011010" // /* MW 1 */ + 7114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7115 "00000000" // /* MW 1 */ + 7116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7117 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 122 9 first + 7118 "01001000" // VMUL.f dm3, x0, x6, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7119 "11000001" // /* MW 3 */ + 7120 "11100000" // /* MW 2 */ + 7121 "00010011" // /* MW 1 */ + 7122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7123 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 121 6 first +.aggressive_scheduled_block_id 31 +.aggressive_scheduled_block_id first + 7124 "11111000" // VMOV lfl0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7125 "00010010" // /* MW 3 */ + 7126 "01101000" // /* MW 2 */ + 7127 "00011001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 122 6 first +.aggressive_scheduled_block_id 31 +.noswbrkpt + 7128 "01001000" // VADD.f dm3, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7129 "00111101" // /* MW 3 */ + 7130 "01001100" // /* MW 2 */ + 7131 "00010011" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 122 6 +.aggressive_scheduled_block_id 31 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7132 "11111000" // VMOV bmll2, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7133 "10010010" // /* MW 3 */ + 7134 "00000101" // /* MW 2 */ + 7135 "00011010" // /* MW 1 */ + 7136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7137 "00000000" // /* MW 1 */ + 7138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7139 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 123 9 first + 7140 "01001000" // VMUL.f dm1, x1, x0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7141 "00000001" // /* MW 3 */ + 7142 "11100010" // /* MW 2 */ + 7143 "00010001" // /* MW 1 */ + 7144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7145 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 122 6 first +.aggressive_scheduled_block_id 32 +.aggressive_scheduled_block_id first + 7146 "11111000" // VMOV lfh0, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7147 "00010010" // /* MW 3 */ + 7148 "01101100" // /* MW 2 */ + 7149 "00011000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 123 6 first +.aggressive_scheduled_block_id 32 +.noswbrkpt + 7150 "01001000" // VADD.f dm1, dm2, dm1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7151 "00111101" // /* MW 3 */ + 7152 "01000100" // /* MW 2 */ + 7153 "00010001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 123 6 +.aggressive_scheduled_block_id 32 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7154 "11111000" // VMOV bmll2, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7155 "10010010" // /* MW 3 */ + 7156 "00000001" // /* MW 2 */ + 7157 "00011010" // /* MW 1 */ + 7158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7159 "00000000" // /* MW 1 */ + 7160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7161 "00000000" // /* MW 1 */ + 7162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7163 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 33 +.aggressive_scheduled_block_id first + 7164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7165 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 123 6 +.src_ref 6 "me_vmult_float_emulated.h" 124 6 first +.aggressive_scheduled_block_id 33 +.noswbrkpt + 7166 "01100010" // VMOV x0, bmll1; VADD.f dm0, dm2, dm0, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7167 "00111101" // /* MW 7 */ + 7168 "01000000" // /* MW 6 */ + 7169 "00010000" // /* MW 5 */ + 7170 "11100110" // /* MW 4 */ + 7171 "00010010" // /* MW 3 */ + 7172 "00100100" // /* MW 2 */ + 7173 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 124 6 +.src_ref 6 "me_vmult_float_emulated.h" 125 9 first +.aggressive_scheduled_block_id 33 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7174 "01100010" // VMOV bmll2, x0; VMUL.f dm4, x5, x0, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7175 "00000001" // /* MW 7 */ + 7176 "11101010" // /* MW 6 */ + 7177 "00010100" // /* MW 5 */ + 7178 "11100110" // /* MW 4 */ + 7179 "10010010" // /* MW 3 */ + 7180 "00000000" // /* MW 2 */ + 7181 "00000010" // /* MW 1 */ + 7182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7183 "00000000" // /* MW 1 */ + 7184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7185 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 34 +.aggressive_scheduled_block_id first + 7186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7187 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 125 6 +.aggressive_scheduled_block_id 34 +.noswbrkpt + 7188 "01001000" // VADD.f dm0, dm2, dm4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7189 "00111101" // /* MW 3 */ + 7190 "01010000" // /* MW 2 */ + 7191 "00010000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 124 6 first +.aggressive_scheduled_block_id 34 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7192 "11111000" // VMOV bmll2, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7193 "00010010" // /* MW 3 */ + 7194 "00000000" // /* MW 2 */ + 7195 "00011010" // /* MW 1 */ + 7196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7197 "00000000" // /* MW 1 */ + 7198 "10000100" // J #6384 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6384 delay_slots=5 */ + 7199 "00000000" // /* MW 5 */ + 7200 "00000000" // /* MW 4 */ + 7201 "01111000" // /* MW 3 */ + 7202 "00001100" // /* MW 2 */ + 7203 "00000000" // /* MW 1 */ +.delay_slot + 7204 "10011000" // ST dc4, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7205 "01100101" // /* MW 3 */ + 7206 "11111010" // /* MW 2 */ + 7207 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7209 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 125 6 first +.delay_slot + 7210 "11111000" // VMOV bmll2, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7211 "00010010" // /* MW 3 */ + 7212 "00000000" // /* MW 2 */ + 7213 "00011010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7215 "00000000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1159 33 first +.src_ref 7 "accum.hpp" 1108 103 first +.delay_slot + 7216 "11100001" // NOPA; NOPB; VST.CONV.bf16.fp32 bmll2, [p1];NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7217 "00000000" // /* MW 15 */ + 7218 "00000000" // /* MW 14 */ + 7219 "01111000" // /* MW 13 */ + 7220 "10100101" // /* MW 12 */ + 7221 "00000001" // /* MW 11 */ + 7222 "00000000" // /* MW 10 */ + 7223 "00000000" // /* MW 9 */ + 7224 "10000000" // /* MW 8 */ + 7225 "00010010" // /* MW 7 */ + 7226 "00000101" // /* MW 6 */ + 7227 "00100001" // /* MW 5 */ + 7228 "00000000" // /* MW 4 */ + 7229 "11110000" // /* MW 3 */ + 7230 "00101100" // /* MW 2 */ + 7231 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2384 +.src_ref 5 "blend.hpp" 163 48 + 7232 "10111010" // MOVA r20, #0; J #5616 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=5616 delay_slots=5 */ + 7233 "00100000" // /* MW 9 */ + 7234 "00000000" // /* MW 8 */ + 7235 "00000000" // /* MW 7 */ + 7236 "10111110" // /* MW 6 */ + 7237 "00000010" // /* MW 5 */ + 7238 "00000000" // /* MW 4 */ + 7239 "00000000" // /* MW 3 */ + 7240 "00010100" // /* MW 2 */ + 7241 "00000000" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 +.delay_slot + 7242 "00011000" // MOVX r21, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7243 "00000001" // /* MW 3 */ + 7244 "00101010" // /* MW 2 */ + 7245 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7247 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7248 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7249 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7251 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7252 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7253 "10000001" // /* MW 11 */ + 7254 "10101101" // /* MW 10 */ + 7255 "00000000" // /* MW 9 */ + 7256 "00000000" // /* MW 8 */ + 7257 "00000000" // /* MW 7 */ + 7258 "00000000" // /* MW 6 */ + 7259 "00100000" // /* MW 5 */ + 7260 "00000000" // /* MW 4 */ + 7261 "11110000" // /* MW 3 */ + 7262 "00101100" // /* MW 2 */ + 7263 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2416 + 7264 "10000100" // J #7456 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=7456 delay_slots=5 */ + 7265 "00000000" // /* MW 5 */ + 7266 "00000000" // /* MW 4 */ + 7267 "10010000" // /* MW 3 */ + 7268 "00001110" // /* MW 2 */ + 7269 "00000000" // /* MW 1 */ +.delay_slot + 7270 "00000010" // ST p1, [sp, #-4]; MOV dc4, lr /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7271 "01110000" // /* MW 7 */ + 7272 "11110000" // /* MW 6 */ + 7273 "01100000" // /* MW 5 */ + 7274 "00000010" // /* MW 4 */ + 7275 "10110000" // /* MW 3 */ + 7276 "10010011" // /* MW 2 */ + 7277 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7279 "00000000" // /* MW 1 */ +.delay_slot + 7280 "00011000" // VST x0, [sp, #-256] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7281 "00110011" // /* MW 3 */ + 7282 "11110000" // /* MW 2 */ + 7283 "00001111" // /* MW 1 */ +.delay_slot + 7284 "00011000" // VST x4, [sp, #-192] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7285 "00110011" // /* MW 3 */ + 7286 "11110101" // /* MW 2 */ + 7287 "00001111" // /* MW 1 */ +.delay_slot + 7288 "00000010" // VST x1, [sp, #-128]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7289 "01110000" // /* MW 7 */ + 7290 "10100101" // /* MW 6 */ + 7291 "00000001" // /* MW 5 */ + 7292 "00000000" // /* MW 4 */ + 7293 "01100000" // /* MW 3 */ + 7294 "00001110" // /* MW 2 */ + 7295 "11111111" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2448 +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 7296 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7297 "00000101" // /* MW 3 */ + 7298 "00100010" // /* MW 2 */ + 7299 "00010000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 first + 7300 "10011000" // EQ r17, r17, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7301 "01100111" // /* MW 3 */ + 7302 "01100010" // /* MW 2 */ + 7303 "00010100" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 7304 "10000100" // JNZ r17, #7456 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7456 delay_slots=5 */ + 7305 "00000001" // /* MW 5 */ + 7306 "01000000" // /* MW 4 */ + 7307 "10010000" // /* MW 3 */ + 7308 "00001110" // /* MW 2 */ + 7309 "10001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7311 "00000000" // /* MW 1 */ +.delay_slot + 7312 "00011000" // VST x0, [sp, #-256] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7313 "00110011" // /* MW 3 */ + 7314 "11110000" // /* MW 2 */ + 7315 "00001111" // /* MW 1 */ +.delay_slot + 7316 "00011000" // VST x4, [sp, #-192] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7317 "00110011" // /* MW 3 */ + 7318 "11110101" // /* MW 2 */ + 7319 "00001111" // /* MW 1 */ +.delay_slot + 7320 "00011000" // VST x1, [sp, #-128] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7321 "01110011" // /* MW 3 */ + 7322 "11111000" // /* MW 2 */ + 7323 "00001111" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 +.delay_slot + 7324 "00111010" // ST p1, [sp, #-4]; MOVX r7, #2; MOV dc4, lr /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7325 "01111001" // /* MW 9 */ + 7326 "11110000" // /* MW 8 */ + 7327 "01100000" // /* MW 7 */ + 7328 "01001010" // /* MW 6 */ + 7329 "01110000" // /* MW 5 */ + 7330 "00000000" // /* MW 4 */ + 7331 "10110000" // /* MW 3 */ + 7332 "10010011" // /* MW 2 */ + 7333 "11111111" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 7334 "10011000" // EQ r7, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7335 "01100111" // /* MW 3 */ + 7336 "11001110" // /* MW 2 */ + 7337 "00010001" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 7338 "10000100" // JNZ r7, #7424 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7424 delay_slots=5 */ + 7339 "00000001" // /* MW 5 */ + 7340 "01000000" // /* MW 4 */ + 7341 "10000000" // /* MW 3 */ + 7342 "00001110" // /* MW 2 */ + 7343 "00111000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7345 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7347 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7349 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7351 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7353 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 7354 "10011000" // EQ r7, r5, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7355 "01100111" // /* MW 3 */ + 7356 "01001110" // /* MW 2 */ + 7357 "00010001" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 7358 "10000100" // JNZ r7, #7392 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7392 delay_slots=5 */ + 7359 "00000001" // /* MW 5 */ + 7360 "01000000" // /* MW 4 */ + 7361 "01110000" // /* MW 3 */ + 7362 "00001110" // /* MW 2 */ + 7363 "00111000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 200 49 +.delay_slot + 7364 "00011000" // MOVX r5, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7365 "01000001" // /* MW 3 */ + 7366 "00001010" // /* MW 2 */ + 7367 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7369 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7371 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7373 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7375 "00000000" // /* MW 1 */ + 7376 "10000100" // J #6576 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6576 delay_slots=5 */ + 7377 "00000000" // /* MW 5 */ + 7378 "00000000" // /* MW 4 */ + 7379 "11011000" // /* MW 3 */ + 7380 "00001100" // /* MW 2 */ + 7381 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7383 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7385 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7387 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7389 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7391 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2544 +.src_ref 5 "blend.hpp" 170 36 + 7392 "10111010" // MOVA r17, #257; J #5568 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=5568 delay_slots=5 */ + 7393 "00100000" // /* MW 9 */ + 7394 "00000000" // /* MW 8 */ + 7395 "00000000" // /* MW 7 */ + 7396 "10111000" // /* MW 6 */ + 7397 "00000010" // /* MW 5 */ + 7398 "00000000" // /* MW 4 */ + 7399 "00000000" // /* MW 3 */ + 7400 "00110001" // /* MW 2 */ + 7401 "00100000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 57 98 +.src_ref 5 "blend.hpp" 170 36 +.src_ref 3 "reduce_mean_c8_impl.h" 268 19 +.delay_slot + 7402 "01100100" // MOVX r21, #0; MOV m4, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7403 "01000001" // /* MW 5 */ + 7404 "00000000" // /* MW 4 */ + 7405 "00101000" // /* MW 3 */ + 7406 "01000000" // /* MW 2 */ + 7407 "00000101" // /* MW 1 */ +.src_ref 5 "blend.hpp" 163 48 +.delay_slot + 7408 "00011000" // MOVX r20, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7409 "00000001" // /* MW 3 */ + 7410 "00101000" // /* MW 2 */ + 7411 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7413 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7415 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7416 "00100010" // NOPA; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7417 "00011100" // /* MW 7 */ + 7418 "00000000" // /* MW 6 */ + 7419 "00000000" // /* MW 5 */ + 7420 "00000100" // /* MW 4 */ + 7421 "11110000" // /* MW 3 */ + 7422 "00101100" // /* MW 2 */ + 7423 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2576 + 7424 "10000100" // J #6480 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6480 delay_slots=5 */ + 7425 "00000000" // /* MW 5 */ + 7426 "00000000" // /* MW 4 */ + 7427 "10101000" // /* MW 3 */ + 7428 "00001100" // /* MW 2 */ + 7429 "00000000" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 +.delay_slot + 7430 "01000100" // MOVXM r17, #65535 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7431 "11111110" // /* MW 5 */ + 7432 "10111111" // /* MW 4 */ + 7433 "11111000" // /* MW 3 */ + 7434 "00000000" // /* MW 2 */ + 7435 "00000000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 57 98 +.src_ref 3 "reduce_mean_c8_impl.h" 268 19 +.delay_slot + 7436 "10111000" // MOV m4, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7437 "00100000" // /* MW 3 */ + 7438 "00000000" // /* MW 2 */ + 7439 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7441 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7443 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7444 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7445 "10000001" // /* MW 11 */ + 7446 "10101101" // /* MW 10 */ + 7447 "00000000" // /* MW 9 */ + 7448 "00000000" // /* MW 8 */ + 7449 "00000000" // /* MW 7 */ + 7450 "00000000" // /* MW 6 */ + 7451 "00100000" // /* MW 5 */ + 7452 "00000000" // /* MW 4 */ + 7453 "11110000" // /* MW 3 */ + 7454 "00101100" // /* MW 2 */ + 7455 "00000000" // /* MW 1 */ +.label __ll128__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 112 6 +.src_ref 6 "me_vmult_float_emulated.h" 112 19 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 114 6 +.src_ref 6 "me_vmult_float_emulated.h" 114 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 6 +.src_ref 6 "me_vmult_float_emulated.h" 115 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 + 7456 "10111010" // VLDA x0, [sp, #-256]; J #5568 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=5568 delay_slots=5 */ + 7457 "00100000" // /* MW 9 */ + 7458 "00000000" // /* MW 8 */ + 7459 "00000000" // /* MW 7 */ + 7460 "10111000" // /* MW 6 */ + 7461 "00000010" // /* MW 5 */ + 7462 "00000000" // /* MW 4 */ + 7463 "01110000" // /* MW 3 */ + 7464 "00000111" // /* MW 2 */ + 7465 "11111110" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "blend.hpp" 163 48 +.delay_slot + 7466 "10111010" // VLDA x4, [sp, #-192]; MOVX r0, #4; MOV r20, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7467 "01011000" // /* MW 9 */ + 7468 "00000000" // /* MW 8 */ + 7469 "10001000" // /* MW 7 */ + 7470 "10001010" // /* MW 6 */ + 7471 "00000000" // /* MW 5 */ + 7472 "00000000" // /* MW 4 */ + 7473 "01110000" // /* MW 3 */ + 7474 "10100111" // /* MW 2 */ + 7475 "11111110" // /* MW 1 */ +.src_ref 5 "vector.hpp" 57 98 +.src_ref 5 "vector.hpp" 57 98 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1280 49 +.src_ref 5 "vector.hpp" 1287 41 +.src_ref 5 "vector.hpp" 1288 16 +.src_ref 5 "vector.hpp" 1292 26 +.src_ref 5 "blend.hpp" 170 36 +.src_ref 3 "reduce_mean_c8_impl.h" 226 22 +.src_ref 3 "reduce_mean_c8_impl.h" 268 19 +.delay_slot + 7476 "10111010" // LDA p1, [sp, #-4]; MOVXM r16, #65535 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7477 "10010000" // /* MW 9 */ + 7478 "11111111" // /* MW 8 */ + 7479 "00001111" // /* MW 7 */ + 7480 "00111110" // /* MW 6 */ + 7481 "00000000" // /* MW 5 */ + 7482 "00000000" // /* MW 4 */ + 7483 "00100000" // /* MW 3 */ + 7484 "10010011" // /* MW 2 */ + 7485 "11111111" // /* MW 1 */ +.src_ref 5 "vector.hpp" 57 98 +.src_ref 5 "blend.hpp" 170 36 +.src_ref 3 "reduce_mean_c8_impl.h" 268 19 +.delay_slot + 7486 "01100100" // MOVX r21, #0; MOV m4, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7487 "10000001" // /* MW 5 */ + 7488 "00000000" // /* MW 4 */ + 7489 "00101000" // /* MW 3 */ + 7490 "01000000" // /* MW 2 */ + 7491 "00000101" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 +.delay_slot + 7492 "00011000" // MOVX r17, #257 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7493 "00000101" // /* MW 3 */ + 7494 "00100010" // /* MW 2 */ + 7495 "00010001" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 +.delay_slot + 7496 "00100010" // VLDA x1, [sp, #-128]; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7497 "00011100" // /* MW 7 */ + 7498 "00000000" // /* MW 6 */ + 7499 "00000000" // /* MW 5 */ + 7500 "00000100" // /* MW 4 */ + 7501 "01110000" // /* MW 3 */ + 7502 "00001111" // /* MW 2 */ + 7503 "11111111" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2656 + 7504 "10000100" // J #6480 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6480 delay_slots=5 */ + 7505 "00000000" // /* MW 5 */ + 7506 "00000000" // /* MW 4 */ + 7507 "10101000" // /* MW 3 */ + 7508 "00001100" // /* MW 2 */ + 7509 "00000000" // /* MW 1 */ +.delay_slot + 7510 "11111000" // MOV dc4, lr /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7511 "11100000" // /* MW 3 */ + 7512 "11000001" // /* MW 2 */ + 7513 "00011100" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 +.delay_slot + 7514 "01000100" // MOVXM r17, #65535 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7515 "11111110" // /* MW 5 */ + 7516 "10111111" // /* MW 4 */ + 7517 "11111000" // /* MW 3 */ + 7518 "00000000" // /* MW 2 */ + 7519 "00000000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 57 98 +.src_ref 3 "reduce_mean_c8_impl.h" 268 19 +.delay_slot + 7520 "10111000" // MOV m4, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7521 "00100000" // /* MW 3 */ + 7522 "00000000" // /* MW 2 */ + 7523 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7525 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E__end +.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E___func_end0 + 7527 "00000000" // /* MW 1 */ +.label __Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_reduce_mean_c8 _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 8 "superkernels.cpp" 472 +.src_ref 8 "superkernels.cpp" 472 first +.function_start + 7536 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7537 "00000001" // /* MW 5 */ + 7538 "00000000" // /* MW 4 */ + 7539 "00000000" // /* MW 3 */ + 7540 "00010000" // /* MW 2 */ + 7541 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 477 6 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7542 "00111010" // ST p7, [sp, #-20]; MOVXM p7, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7543 "00010001" // /* MW 9 */ + 7544 "01100000" // /* MW 8 */ + 7545 "10110010" // /* MW 7 */ + 7546 "11110011" // /* MW 6 */ + 7547 "00000001" // /* MW 5 */ + 7548 "00000000" // /* MW 4 */ + 7549 "10110000" // /* MW 3 */ + 7550 "11110011" // /* MW 2 */ + 7551 "11111101" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 477 6 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7552 "10111010" // LDA r16, [p7]; ST p6, [sp, #-28]; MOV r16, CORE_ID /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7553 "01110010" // /* MW 9 */ + 7554 "01110000" // /* MW 8 */ + 7555 "00001101" // /* MW 7 */ + 7556 "10000010" // /* MW 6 */ + 7557 "00011101" // /* MW 5 */ + 7558 "11100111" // /* MW 4 */ + 7559 "11010111" // /* MW 3 */ + 7560 "11000010" // /* MW 2 */ + 7561 "11100000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 474 22 first +.src_ref 8 "superkernels.cpp" 569 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7562 "00111010" // ST r11, [sp, #-8]; EXTEND.u8 r16, r16; MOV r11, lr /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7563 "01111001" // /* MW 9 */ + 7564 "11110000" // /* MW 8 */ + 7565 "01101000" // /* MW 7 */ + 7566 "10000001" // /* MW 6 */ + 7567 "00000100" // /* MW 5 */ + 7568 "00100001" // /* MW 4 */ + 7569 "10110000" // /* MW 3 */ + 7570 "00101110" // /* MW 2 */ + 7571 "11111111" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 474 30 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7572 "01011100" // ST r15, [sp, #-16]; ADD r17, r16, #-2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7573 "11110110" // /* MW 5 */ + 7574 "01000111" // /* MW 4 */ + 7575 "10111000" // /* MW 3 */ + 7576 "00111110" // /* MW 2 */ + 7577 "11111110" // /* MW 1 */ + 7578 "10011000" // ST r13, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7579 "10110101" // /* MW 3 */ + 7580 "11101001" // /* MW 2 */ + 7581 "00001111" // /* MW 1 */ + 7582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7583 "00000000" // /* MW 1 */ + 7584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7585 "00000000" // /* MW 1 */ + 7586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7587 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 477 6 first +.src_ref 8 "superkernels.cpp" 477 16 first + 7588 "10000100" // JNZ r16, #8160 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8160 delay_slots=5 */ + 7589 "00000001" // /* MW 5 */ + 7590 "01000000" // /* MW 4 */ + 7591 "11110000" // /* MW 3 */ + 7592 "00001111" // /* MW 2 */ + 7593 "10000000" // /* MW 1 */ +.delay_slot + 7594 "10011000" // ST r12, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7595 "10010101" // /* MW 3 */ + 7596 "11111101" // /* MW 2 */ + 7597 "00001111" // /* MW 1 */ +.delay_slot + 7598 "10011000" // ST r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7599 "11010101" // /* MW 3 */ + 7600 "11110101" // /* MW 2 */ + 7601 "00001111" // /* MW 1 */ +.delay_slot + 7602 "10011000" // ST p0, [sp, #-32] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7603 "00011101" // /* MW 3 */ + 7604 "11100000" // /* MW 2 */ + 7605 "00001111" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 474 11 +.delay_slot + 7606 "01000100" // MOVXM p6, #509128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7607 "10010000" // /* MW 5 */ + 7608 "11001001" // /* MW 4 */ + 7609 "11001100" // /* MW 3 */ + 7610 "00000111" // /* MW 2 */ + 7611 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 474 11 first +.delay_slot + 7612 "10011000" // ST r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7613 "00110001" // /* MW 3 */ + 7614 "00000110" // /* MW 2 */ + 7615 "00001110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 1 "io_buffer_main.h" 218 49 +.src_ref 1 "io_buffer_main.h" 324 51 +.src_ref 5 "tile.hpp" 74 8 +.src_ref 5 "tile.hpp" 74 8 + 7616 "01110110" // MOVA r17, #1; MOVS p7, p2; MOVXM p2, #509164 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7617 "00010000" // /* MW 11 */ + 7618 "01110110" // /* MW 10 */ + 7619 "00110010" // /* MW 9 */ + 7620 "11110001" // /* MW 8 */ + 7621 "00000001" // /* MW 7 */ + 7622 "00000000" // /* MW 6 */ + 7623 "10001011" // /* MW 5 */ + 7624 "10001000" // /* MW 4 */ + 7625 "00000111" // /* MW 3 */ + 7626 "00110001" // /* MW 2 */ + 7627 "00000000" // /* MW 1 */ +.src_ref 5 "tile.hpp" 74 8 first +.src_ref 5 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 7628 "00111010" // ST r17, [p2]; MOVXM p2, #509168 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7629 "00010001" // /* MW 9 */ + 7630 "01111000" // /* MW 8 */ + 7631 "00110010" // /* MW 7 */ + 7632 "11110001" // /* MW 6 */ + 7633 "00000001" // /* MW 5 */ + 7634 "00000000" // /* MW 4 */ + 7635 "00110000" // /* MW 3 */ + 7636 "11000110" // /* MW 2 */ + 7637 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 483 44 +.src_ref 5 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 7638 "11010100" // ST.s8 r16, [p2]; MOV p6, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7639 "10000001" // /* MW 5 */ + 7640 "11000101" // /* MW 4 */ + 7641 "11101100" // /* MW 3 */ + 7642 "11000000" // /* MW 2 */ + 7643 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 480 4 first +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 7644 "00000100" // JL #2576 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=2576 delay_slots=5 */ + 7645 "00000001" // /* MW 5 */ + 7646 "00000000" // /* MW 4 */ + 7647 "00001000" // /* MW 3 */ + 7648 "00000101" // /* MW 2 */ + 7649 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 480 4 +.delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7650 "01000100" // MOVXM p0, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7651 "10000000" // /* MW 5 */ + 7652 "11001000" // /* MW 4 */ + 7653 "11000000" // /* MW 3 */ + 7654 "00000111" // /* MW 2 */ + 7655 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7657 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7659 "00000000" // /* MW 1 */ +.src_ref 5 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7660 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7661 "00110001" // /* MW 3 */ + 7662 "00100000" // /* MW 2 */ + 7663 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7664 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7665 "00000000" // /* MW 15 */ + 7666 "00000000" // /* MW 14 */ + 7667 "01111000" // /* MW 13 */ + 7668 "10100101" // /* MW 12 */ + 7669 "00000001" // /* MW 11 */ + 7670 "00000000" // /* MW 10 */ + 7671 "00000000" // /* MW 9 */ + 7672 "00000000" // /* MW 8 */ + 7673 "01011011" // /* MW 7 */ + 7674 "00000001" // /* MW 6 */ + 7675 "00100000" // /* MW 5 */ + 7676 "00000000" // /* MW 4 */ + 7677 "11110000" // /* MW 3 */ + 7678 "00101100" // /* MW 2 */ + 7679 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 51 +.src_ref 8 "superkernels.cpp" 487 47 +.return_address + 7680 "10111010" // MOVA r17, #0; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7681 "00010000" // /* MW 9 */ + 7682 "00100000" // /* MW 8 */ + 7683 "00110010" // /* MW 7 */ + 7684 "11110001" // /* MW 6 */ + 7685 "00000001" // /* MW 5 */ + 7686 "00000000" // /* MW 4 */ + 7687 "00000000" // /* MW 3 */ + 7688 "00010001" // /* MW 2 */ + 7689 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 18 +.src_ref 8 "superkernels.cpp" 481 51 first + 7690 "10111010" // LDA r14, [p2]; MOVXM p2, #509128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7691 "00010000" // /* MW 9 */ + 7692 "01100100" // /* MW 8 */ + 7693 "00110010" // /* MW 7 */ + 7694 "11110001" // /* MW 6 */ + 7695 "00000001" // /* MW 5 */ + 7696 "00000000" // /* MW 4 */ + 7697 "11010000" // /* MW 3 */ + 7698 "10111010" // /* MW 2 */ + 7699 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 18 +.src_ref 8 "superkernels.cpp" 481 85 + 7700 "10111010" // LDA r18, [p2]; MOVXM p2, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7701 "00010000" // /* MW 9 */ + 7702 "00100010" // /* MW 8 */ + 7703 "00110010" // /* MW 7 */ + 7704 "11110001" // /* MW 6 */ + 7705 "00000001" // /* MW 5 */ + 7706 "00000000" // /* MW 4 */ + 7707 "11010000" // /* MW 3 */ + 7708 "11001010" // /* MW 2 */ + 7709 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 85 +.src_ref 8 "superkernels.cpp" 482 16 + 7710 "10111010" // LDA r13, [p2], #4; MOVXM p3, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7711 "00010000" // /* MW 9 */ + 7712 "01101000" // /* MW 8 */ + 7713 "10110010" // /* MW 7 */ + 7714 "11110001" // /* MW 6 */ + 7715 "00000001" // /* MW 5 */ + 7716 "00000000" // /* MW 4 */ + 7717 "11010000" // /* MW 3 */ + 7718 "10110110" // /* MW 2 */ + 7719 "01000011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 16 +.src_ref 8 "superkernels.cpp" 482 40 first + 7720 "10111010" // LDA el0, [p2, #4]; MOVXM p1, #509132 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7721 "00010000" // /* MW 9 */ + 7722 "01100110" // /* MW 8 */ + 7723 "10110010" // /* MW 7 */ + 7724 "11110000" // /* MW 6 */ + 7725 "00000001" // /* MW 5 */ + 7726 "00000000" // /* MW 4 */ + 7727 "11010000" // /* MW 3 */ + 7728 "10000101" // /* MW 2 */ + 7729 "01000010" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 120 first +.src_ref 8 "superkernels.cpp" 483 44 + 7730 "11010100" // LDA r15, [p2]; MOV r16, p6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7731 "10000001" // /* MW 5 */ + 7732 "00111001" // /* MW 4 */ + 7733 "11011000" // /* MW 3 */ + 7734 "10111110" // /* MW 2 */ + 7735 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 483 44 + 7736 "00011000" // ADD.NC p2, r16, #40 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7737 "00010100" // /* MW 3 */ + 7738 "01101000" // /* MW 2 */ + 7739 "00011010" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 + 7740 "01000100" // MOVXM p6, #509184 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7741 "00000000" // /* MW 5 */ + 7742 "11001010" // /* MW 4 */ + 7743 "11001100" // /* MW 3 */ + 7744 "00000111" // /* MW 2 */ + 7745 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 483 13 + 7746 "01000100" // MOVXM p0, #509160 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7747 "11010000" // /* MW 5 */ + 7748 "11001001" // /* MW 4 */ + 7749 "11000000" // /* MW 3 */ + 7750 "00000111" // /* MW 2 */ + 7751 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 27 + 7752 "10011000" // MUL r18, r14, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7753 "00101111" // /* MW 3 */ + 7754 "10100101" // /* MW 2 */ + 7755 "00010011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 + 7756 "01000100" // MOVXM r16, #-2147483648 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7757 "00000000" // /* MW 5 */ + 7758 "00100000" // /* MW 4 */ + 7759 "00001000" // /* MW 3 */ + 7760 "00000000" // /* MW 2 */ + 7761 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 61 +.src_ref 8 "superkernels.cpp" 482 16 first + 7762 "01011100" // ST el0, [p3]; MUL r18, r13, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7763 "01011111" // /* MW 5 */ + 7764 "11001010" // /* MW 4 */ + 7765 "00110110" // /* MW 3 */ + 7766 "10000101" // /* MW 2 */ + 7767 "01100000" // /* MW 1 */ + 7768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7769 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 96 first + 7770 "10011000" // MUL r18, r15, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7771 "00101111" // /* MW 3 */ + 7772 "11100101" // /* MW 2 */ + 7773 "00010011" // /* MW 1 */ + 7774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7775 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 16 + 7776 "10011000" // ST r18, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7777 "01010001" // /* MW 3 */ + 7778 "00000110" // /* MW 2 */ + 7779 "00001001" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 483 15 first + 7780 "10011000" // LDA el0, [p2], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7781 "00101110" // /* MW 3 */ + 7782 "01001100" // /* MW 2 */ + 7783 "00000010" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 first + 7784 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7785 "00110001" // /* MW 3 */ + 7786 "00011110" // /* MW 2 */ + 7787 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 + 7788 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7789 "00110001" // /* MW 3 */ + 7790 "00011110" // /* MW 2 */ + 7791 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 + 7792 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7793 "00110001" // /* MW 3 */ + 7794 "00011110" // /* MW 2 */ + 7795 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 + 7796 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7797 "00110001" // /* MW 3 */ + 7798 "00011110" // /* MW 2 */ + 7799 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 + 7800 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7801 "00110001" // /* MW 3 */ + 7802 "00011110" // /* MW 2 */ + 7803 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 + 7804 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7805 "00110001" // /* MW 3 */ + 7806 "00011110" // /* MW 2 */ + 7807 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 483 13 first + 7808 "10011000" // ST el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7809 "00101001" // /* MW 3 */ + 7810 "00000100" // /* MW 2 */ + 7811 "00001000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 first + 7812 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7813 "00110001" // /* MW 3 */ + 7814 "00011110" // /* MW 2 */ + 7815 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 + 7816 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7817 "00110001" // /* MW 3 */ + 7818 "00011110" // /* MW 2 */ + 7819 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 + 7820 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7821 "00110001" // /* MW 3 */ + 7822 "00011110" // /* MW 2 */ + 7823 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 first + 7824 "10011000" // LDA r1, [p2], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7825 "00110110" // /* MW 3 */ + 7826 "11011100" // /* MW 2 */ + 7827 "00000010" // /* MW 1 */ + 7828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7829 "00000000" // /* MW 1 */ + 7830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7831 "00000000" // /* MW 1 */ + 7832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7833 "00000000" // /* MW 1 */ + 7834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7835 "00000000" // /* MW 1 */ + 7836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7837 "00000000" // /* MW 1 */ + 7838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7839 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 + 7840 "10011000" // GEU r17, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7841 "00001011" // /* MW 3 */ + 7842 "01100011" // /* MW 2 */ + 7843 "00010000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 + 7844 "10000100" // JNZ r17, #7920 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7920 delay_slots=5 */ + 7845 "00000001" // /* MW 5 */ + 7846 "01000000" // /* MW 4 */ + 7847 "01111000" // /* MW 3 */ + 7848 "00001111" // /* MW 2 */ + 7849 "10001000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 1 "io_buffer_main.h" 218 49 +.src_ref 1 "io_buffer_main.h" 324 51 +.delay_slot + 7850 "11111000" // MOV r12, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7851 "11000000" // /* MW 3 */ + 7852 "00011110" // /* MW 2 */ + 7853 "00011011" // /* MW 1 */ +.delay_slot + 7854 "10011000" // ST p2, [sp, #-40] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7855 "00011101" // /* MW 3 */ + 7856 "11011001" // /* MW 2 */ + 7857 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7859 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7861 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7863 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.no_stack_arguments + 7864 "00000100" // JL #10912 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10912 delay_slots=5 */ + 7865 "00000001" // /* MW 5 */ + 7866 "00000000" // /* MW 4 */ + 7867 "01010000" // /* MW 3 */ + 7868 "00010101" // /* MW 2 */ + 7869 "00000000" // /* MW 1 */ +.delay_slot + 7870 "10011000" // ST r12, [sp, #-36] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7871 "10010101" // /* MW 3 */ + 7872 "11011101" // /* MW 2 */ + 7873 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7874 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7875 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7877 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7879 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7880 "00100010" // NOPA; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7881 "00011100" // /* MW 7 */ + 7882 "00000000" // /* MW 6 */ + 7883 "00000000" // /* MW 5 */ + 7884 "00000100" // /* MW 4 */ + 7885 "11110000" // /* MW 3 */ + 7886 "00101100" // /* MW 2 */ + 7887 "00000000" // /* MW 1 */ +.return_address + 7888 "10000100" // J #7984 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=7984 delay_slots=5 */ + 7889 "00000000" // /* MW 5 */ + 7890 "00000000" // /* MW 4 */ + 7891 "10011000" // /* MW 3 */ + 7892 "00001111" // /* MW 2 */ + 7893 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.delay_slot + 7894 "01000100" // MOVXM p7, #509168 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7895 "11100000" // /* MW 5 */ + 7896 "11001001" // /* MW 4 */ + 7897 "11001110" // /* MW 3 */ + 7898 "00000111" // /* MW 2 */ + 7899 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7901 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7903 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7905 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7906 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 7907 "00011100" // /* MW 13 */ + 7908 "00000000" // /* MW 12 */ + 7909 "00000000" // /* MW 11 */ + 7910 "01010111" // /* MW 10 */ + 7911 "00011010" // /* MW 9 */ + 7912 "01000000" // /* MW 8 */ + 7913 "00000000" // /* MW 7 */ + 7914 "00000000" // /* MW 6 */ + 7915 "10110110" // /* MW 5 */ + 7916 "00000010" // /* MW 4 */ + 7917 "11110000" // /* MW 3 */ + 7918 "00101100" // /* MW 2 */ + 7919 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_384 +.src_ref 8 "superkernels.cpp" 491 40 +.no_stack_arguments + 7920 "00000100" // JL #10912 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10912 delay_slots=5 */ + 7921 "00000001" // /* MW 5 */ + 7922 "00000000" // /* MW 4 */ + 7923 "01010000" // /* MW 3 */ + 7924 "00010101" // /* MW 2 */ + 7925 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7927 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7929 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7931 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7932 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7933 "01100111" // /* MW 3 */ + 7934 "00000001" // /* MW 2 */ + 7935 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.delay_slot + 7936 "11100001" // NOPA; NOPB; NOPS; SUB r1, r1, r16; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7937 "00000000" // /* MW 15 */ + 7938 "00000000" // /* MW 14 */ + 7939 "01111000" // /* MW 13 */ + 7940 "10100101" // /* MW 12 */ + 7941 "00000001" // /* MW 11 */ + 7942 "00001100" // /* MW 10 */ + 7943 "00011000" // /* MW 9 */ + 7944 "00000010" // /* MW 8 */ + 7945 "01011011" // /* MW 7 */ + 7946 "00000001" // /* MW 6 */ + 7947 "00100000" // /* MW 5 */ + 7948 "00000000" // /* MW 4 */ + 7949 "11110000" // /* MW 3 */ + 7950 "00101100" // /* MW 2 */ + 7951 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.return_address +.no_stack_arguments + 7952 "00000100" // JL #12416 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12416 delay_slots=5 */ + 7953 "00000001" // /* MW 5 */ + 7954 "00000000" // /* MW 4 */ + 7955 "01000000" // /* MW 3 */ + 7956 "00011000" // /* MW 2 */ + 7957 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.delay_slot + 7958 "11111000" // MOV r1, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7959 "00100000" // /* MW 3 */ + 7960 "01010000" // /* MW 2 */ + 7961 "00011000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.delay_slot + 7962 "01000100" // MOVXM p7, #509168 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7963 "11100000" // /* MW 5 */ + 7964 "11001001" // /* MW 4 */ + 7965 "11001110" // /* MW 3 */ + 7966 "00000111" // /* MW 2 */ + 7967 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.delay_slot + 7968 "01000100" // MOVXM r2, #1325400064 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7969 "00000000" // /* MW 5 */ + 7970 "00100000" // /* MW 4 */ + 7971 "00000001" // /* MW 3 */ + 7972 "00000000" // /* MW 2 */ + 7973 "01001111" // /* MW 1 */ +.delay_slot + 7974 "10011000" // ST r12, [sp, #-36] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7975 "10010101" // /* MW 3 */ + 7976 "11011101" // /* MW 2 */ + 7977 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7978 "00111100" // NOPA; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7979 "00100000" // /* MW 5 */ + 7980 "00000000" // /* MW 4 */ + 7981 "11110000" // /* MW 3 */ + 7982 "00101100" // /* MW 2 */ + 7983 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 8 "superkernels.cpp" 491 40 +.src_ref 8 "superkernels.cpp" 491 40 +.src_ref 8 "superkernels.cpp" 491 40 +.return_address + 7984 "10111010" // LDA.s8 r16, [p7]; MOVX vaddSign0, #1; VINSERT.32 x0, x0, #0, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7985 "10111000" // /* MW 9 */ + 7986 "00001000" // /* MW 8 */ + 7987 "00000000" // /* MW 7 */ + 7988 "00000000" // /* MW 6 */ + 7989 "11010010" // /* MW 5 */ + 7990 "00000010" // /* MW 4 */ + 7991 "01010000" // /* MW 3 */ + 7992 "11000000" // /* MW 2 */ + 7993 "11100000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.src_ref 8 "superkernels.cpp" 492 38 +.src_ref 8 "superkernels.cpp" 492 38 + 7994 "10111010" // MOVA m0, #-38; MOVX r24, #0; VMOV bmll0, x0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7995 "01111000" // /* MW 9 */ + 7996 "01001001" // /* MW 8 */ + 7997 "00000000" // /* MW 7 */ + 7998 "00001000" // /* MW 6 */ + 7999 "10000000" // /* MW 5 */ + 8000 "00000001" // /* MW 4 */ + 8001 "10000000" // /* MW 3 */ + 8002 "01000000" // /* MW 2 */ + 8003 "11111011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 498 13 +.src_ref 8 "superkernels.cpp" 498 15 + 8004 "10111010" // LDA p2, [sp, #-40]; MOVXM p3, #509140 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8005 "00010000" // /* MW 9 */ + 8006 "01101010" // /* MW 8 */ + 8007 "10110010" // /* MW 7 */ + 8008 "11110001" // /* MW 6 */ + 8009 "00000001" // /* MW 5 */ + 8010 "00000000" // /* MW 4 */ + 8011 "00100000" // /* MW 3 */ + 8012 "00100011" // /* MW 2 */ + 8013 "11111011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 499 14 + 8014 "01000100" // MOVXM p1, #509144 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8015 "10110000" // /* MW 5 */ + 8016 "11001001" // /* MW 4 */ + 8017 "11000010" // /* MW 3 */ + 8018 "00000111" // /* MW 2 */ + 8019 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 7 +.src_ref 8 "superkernels.cpp" 508 7 +.src_ref 8 "superkernels.cpp" 511 7 + 8020 "01000100" // MOVXM p7, #509136 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8021 "10100000" // /* MW 5 */ + 8022 "11001001" // /* MW 4 */ + 8023 "11001110" // /* MW 3 */ + 8024 "00000111" // /* MW 2 */ + 8025 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 8026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8027 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 38 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 8028 "00011000" // ST.s16 r16, [p6], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8029 "00010111" // /* MW 3 */ + 8030 "00011110" // /* MW 2 */ + 8031 "00000110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8032 "00011000" // MOVX crRnd, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8033 "10000000" // /* MW 3 */ + 8034 "00111010" // /* MW 2 */ + 8035 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8036 "00011000" // VCONV.bf16.fp32 wl0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8037 "00010110" // /* MW 3 */ + 8038 "01000000" // /* MW 2 */ + 8039 "00001000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8041 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8042 "10111000" // VEXTRACT.16 r16, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8043 "00000001" // /* MW 3 */ + 8044 "00000001" // /* MW 2 */ + 8045 "00011100" // /* MW 1 */ + 8046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8047 "00000000" // /* MW 1 */ + 8048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8049 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 492 38 first + 8050 "00011000" // ST.s8 r24, [p6], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8051 "00000111" // /* MW 3 */ + 8052 "00001011" // /* MW 2 */ + 8053 "00000110" // /* MW 1 */ + 8054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8055 "00000000" // /* MW 1 */ + 8056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8057 "00000000" // /* MW 1 */ + 8058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8059 "00000000" // /* MW 1 */ + 8060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8061 "00000000" // /* MW 1 */ + 8062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8063 "00000000" // /* MW 1 */ + 8064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8065 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 494 25 first + 8066 "10011000" // ST r14, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8067 "11010001" // /* MW 3 */ + 8068 "00011101" // /* MW 2 */ + 8069 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 495 24 first + 8070 "10011000" // ST r15, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8071 "11110001" // /* MW 3 */ + 8072 "00000101" // /* MW 2 */ + 8073 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 496 24 first + 8074 "10011000" // ST r13, [p6, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8075 "10110001" // /* MW 3 */ + 8076 "00010101" // /* MW 2 */ + 8077 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 498 15 first + 8078 "10011000" // LDA el0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8079 "00101110" // /* MW 3 */ + 8080 "00011100" // /* MW 2 */ + 8081 "00000010" // /* MW 1 */ + 8082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8083 "00000000" // /* MW 1 */ + 8084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8085 "00000000" // /* MW 1 */ + 8086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8087 "00000000" // /* MW 1 */ + 8088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8089 "00000000" // /* MW 1 */ + 8090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8091 "00000000" // /* MW 1 */ + 8092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8093 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 498 13 + 8094 "10011000" // ST el0, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8095 "00101001" // /* MW 3 */ + 8096 "00000100" // /* MW 2 */ + 8097 "00001011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 499 16 first + 8098 "10011000" // LDA el0, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8099 "00101110" // /* MW 3 */ + 8100 "00000100" // /* MW 2 */ + 8101 "00000010" // /* MW 1 */ + 8102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8103 "00000000" // /* MW 1 */ + 8104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8105 "00000000" // /* MW 1 */ + 8106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8107 "00000000" // /* MW 1 */ + 8108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8109 "00000000" // /* MW 1 */ + 8110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8111 "00000000" // /* MW 1 */ + 8112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8113 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 499 14 + 8114 "10011000" // ST el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8115 "00101001" // /* MW 3 */ + 8116 "00000100" // /* MW 2 */ + 8117 "00001001" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 500 15 first + 8118 "10011000" // LDA el0, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8119 "00101110" // /* MW 3 */ + 8120 "00010100" // /* MW 2 */ + 8121 "00000010" // /* MW 1 */ + 8122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8123 "00000000" // /* MW 1 */ + 8124 "10000100" // J #8176 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8176 delay_slots=5 */ + 8125 "00000000" // /* MW 5 */ + 8126 "00000000" // /* MW 4 */ + 8127 "11111000" // /* MW 3 */ + 8128 "00001111" // /* MW 2 */ + 8129 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 500 13 +.delay_slot + 8130 "01000100" // MOVXM p0, #509148 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8131 "10111000" // /* MW 5 */ + 8132 "11001001" // /* MW 4 */ + 8133 "11000000" // /* MW 3 */ + 8134 "00000111" // /* MW 2 */ + 8135 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8137 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8139 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8140 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8141 "01100111" // /* MW 3 */ + 8142 "00000001" // /* MW 2 */ + 8143 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 500 13 +.delay_slot + 8144 "11100001" // NOPA; NOPB; ST el0, [p0]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8145 "00000000" // /* MW 15 */ + 8146 "00000000" // /* MW 14 */ + 8147 "01111000" // /* MW 13 */ + 8148 "10100101" // /* MW 12 */ + 8149 "00000001" // /* MW 11 */ + 8150 "00000000" // /* MW 10 */ + 8151 "00000000" // /* MW 9 */ + 8152 "10000000" // /* MW 8 */ + 8153 "00101001" // /* MW 7 */ + 8154 "00000100" // /* MW 6 */ + 8155 "00100000" // /* MW 5 */ + 8156 "00000000" // /* MW 4 */ + 8157 "11110000" // /* MW 3 */ + 8158 "00101100" // /* MW 2 */ + 8159 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_624 +.src_ref 8 "superkernels.cpp" 505 7 +.src_ref 8 "superkernels.cpp" 508 7 +.src_ref 8 "superkernels.cpp" 511 7 + 8160 "00111010" // ST p2, [sp, #-36]; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8161 "00010001" // /* MW 9 */ + 8162 "01101000" // /* MW 8 */ + 8163 "10110010" // /* MW 7 */ + 8164 "11110011" // /* MW 6 */ + 8165 "00000001" // /* MW 5 */ + 8166 "00000000" // /* MW 4 */ + 8167 "10110000" // /* MW 3 */ + 8168 "10100011" // /* MW 2 */ + 8169 "11111011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 1 "io_buffer_main.h" 218 49 +.src_ref 1 "io_buffer_main.h" 324 51 + 8170 "11010100" // NOPA; MOV r12, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8171 "10000001" // /* MW 5 */ + 8172 "00101001" // /* MW 4 */ + 8173 "11110110" // /* MW 3 */ + 8174 "00101100" // /* MW 2 */ + 8175 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_640 +.src_ref 8 "superkernels.cpp" 505 7 first +.src_ref 8 "superkernels.cpp" 505 19 + 8176 "00101100" // LDA r16, [p7]; MOVX r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8177 "00001010" // /* MW 5 */ + 8178 "01000100" // /* MW 4 */ + 8179 "11010000" // /* MW 3 */ + 8180 "11000010" // /* MW 2 */ + 8181 "11100000" // /* MW 1 */ + 8182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8183 "00000000" // /* MW 1 */ + 8184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8185 "00000000" // /* MW 1 */ + 8186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8187 "00000000" // /* MW 1 */ + 8188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8189 "00000000" // /* MW 1 */ + 8190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8191 "00000000" // /* MW 1 */ + 8192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8193 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 19 + 8194 "10011000" // NE r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8195 "00001000" // /* MW 3 */ + 8196 "01100011" // /* MW 2 */ + 8197 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 25 + 8198 "10000100" // JNZ r17, #8368 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8368 delay_slots=5 */ + 8199 "00000001" // /* MW 5 */ + 8200 "01000000" // /* MW 4 */ + 8201 "01011000" // /* MW 3 */ + 8202 "00010000" // /* MW 2 */ + 8203 "10001000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 first +.delay_slot + 8204 "00011000" // ADD.NC p6, r12, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8205 "00000110" // /* MW 3 */ + 8206 "01100110" // /* MW 2 */ + 8207 "00011110" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8209 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8211 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8212 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8213 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8215 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 29 + 8216 "01000100" // MOVXM p2, #509124 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8217 "10001000" // /* MW 5 */ + 8218 "11001001" // /* MW 4 */ + 8219 "11000100" // /* MW 3 */ + 8220 "00000111" // /* MW 2 */ + 8221 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 29 first +.src_ref 8 "superkernels.cpp" 505 65 + 8222 "10111010" // LDA r16, [p2]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8223 "00010000" // /* MW 9 */ + 8224 "00110000" // /* MW 8 */ + 8225 "00110010" // /* MW 7 */ + 8226 "11110001" // /* MW 6 */ + 8227 "00000001" // /* MW 5 */ + 8228 "00000000" // /* MW 4 */ + 8229 "11010000" // /* MW 3 */ + 8230 "11000010" // /* MW 2 */ + 8231 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 65 + 8232 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8233 "00111010" // /* MW 3 */ + 8234 "00000100" // /* MW 2 */ + 8235 "00000010" // /* MW 1 */ + 8236 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8237 "00000000" // /* MW 1 */ + 8238 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8239 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 41 +.no_stack_arguments + 8240 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */ + 8241 "00000001" // /* MW 5 */ + 8242 "00000000" // /* MW 4 */ + 8243 "11111000" // /* MW 3 */ + 8244 "00010011" // /* MW 2 */ + 8245 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 41 +.delay_slot + 8246 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8247 "00000001" // /* MW 3 */ + 8248 "00011010" // /* MW 2 */ + 8249 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8251 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 41 +.delay_slot + 8252 "10011000" // LT r27, r16, r13 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8253 "11011010" // /* MW 3 */ + 8254 "00110110" // /* MW 2 */ + 8255 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 41 +.src_ref 8 "superkernels.cpp" 505 41 +.delay_slot + 8256 "11100100" // SUB r17, r13, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8257 "01000001" // /* MW 5 */ + 8258 "10111011" // /* MW 4 */ + 8259 "00110111" // /* MW 3 */ + 8260 "01100000" // /* MW 2 */ + 8261 "01101100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 41 +.delay_slot + 8262 "01111010" // NOPA; NOPS; SEL.EQZ r0, r16, r17, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8263 "00010010" // /* MW 9 */ + 8264 "00000001" // /* MW 8 */ + 8265 "00000100" // /* MW 7 */ + 8266 "00000000" // /* MW 6 */ + 8267 "01011011" // /* MW 5 */ + 8268 "00000001" // /* MW 4 */ + 8269 "11110000" // /* MW 3 */ + 8270 "00101100" // /* MW 2 */ + 8271 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 41 +.src_ref 8 "superkernels.cpp" 505 41 +.return_address + 8272 "11100100" // SUB r16, r13, r3; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8273 "01000001" // /* MW 5 */ + 8274 "10101111" // /* MW 4 */ + 8275 "00111101" // /* MW 3 */ + 8276 "00000110" // /* MW 2 */ + 8277 "01101100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 41 + 8278 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8279 "00000010" // /* MW 3 */ + 8280 "11100001" // /* MW 2 */ + 8281 "00010000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 6 +.src_ref 8 "superkernels.cpp" 505 76 + 8282 "10000100" // JNZ r16, #8352 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8352 delay_slots=5 */ + 8283 "00000001" // /* MW 5 */ + 8284 "01000000" // /* MW 4 */ + 8285 "01010000" // /* MW 3 */ + 8286 "00010000" // /* MW 2 */ + 8287 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8289 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8291 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8293 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8294 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8295 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8297 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 +.src_ref 1 "io_buffer_main.h" 395 8 + 8298 "11100100" // MOVX r16, #-1; MOV p2, p6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8299 "10000001" // /* MW 5 */ + 8300 "11011001" // /* MW 4 */ + 8301 "10100100" // /* MW 3 */ + 8302 "00011111" // /* MW 2 */ + 8303 "11111100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 first + 8304 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8305 "01110110" // /* MW 3 */ + 8306 "11111111" // /* MW 2 */ + 8307 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 8308 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8309 "00110110" // /* MW 3 */ + 8310 "11111110" // /* MW 2 */ + 8311 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 8312 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8313 "01010110" // /* MW 3 */ + 8314 "11111110" // /* MW 2 */ + 8315 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 8316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8317 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first +.aggressive_scheduled_block_id 4 +.noswbrkpt + 8318 "10011000" // LDA r17, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8319 "00110110" // /* MW 3 */ + 8320 "01000110" // /* MW 2 */ + 8321 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 8322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8323 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 8324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8325 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 8326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8327 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 8328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8329 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 8330 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8331 "00010010" // /* MW 3 */ + 8332 "10100011" // /* MW 2 */ + 8333 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8334 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8335 "00110001" // /* MW 3 */ + 8336 "00000110" // /* MW 2 */ + 8337 "00001010" // /* MW 1 */ + 8338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8339 "00000000" // /* MW 1 */ + 8340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8341 "00000000" // /* MW 1 */ + 8342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8343 "00000000" // /* MW 1 */ + 8344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8345 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 8346 "00101100" // NOPA; ACQ r17, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8347 "00010000" // /* MW 5 */ + 8348 "10100110" // /* MW 4 */ + 8349 "11111000" // /* MW 3 */ + 8350 "00101100" // /* MW 2 */ + 8351 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_816 + 8352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8353 "00000000" // /* MW 1 */ + 8354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8355 "00000000" // /* MW 1 */ + 8356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8357 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 7 first + 8358 "10111010" // LDA r16, [p7]; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8359 "01111110" // /* MW 9 */ + 8360 "10100101" // /* MW 8 */ + 8361 "00000001" // /* MW 7 */ + 8362 "00000000" // /* MW 6 */ + 8363 "00010000" // /* MW 5 */ + 8364 "00000000" // /* MW 4 */ + 8365 "11010000" // /* MW 3 */ + 8366 "11000010" // /* MW 2 */ + 8367 "11100000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_832 +.src_ref 8 "superkernels.cpp" 508 19 +.src_ref 8 "superkernels.cpp" 522 6 +.src_ref 8 "superkernels.cpp" 558 19 + 8368 "00011000" // MOVX r14, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8369 "00001001" // /* MW 3 */ + 8370 "00011100" // /* MW 2 */ + 8371 "00010000" // /* MW 1 */ + 8372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8373 "00000000" // /* MW 1 */ + 8374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8375 "00000000" // /* MW 1 */ + 8376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8377 "00000000" // /* MW 1 */ + 8378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8379 "00000000" // /* MW 1 */ + 8380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8381 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 19 + 8382 "10011000" // NE r16, r14, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8383 "00001000" // /* MW 3 */ + 8384 "10100001" // /* MW 2 */ + 8385 "00010011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 25 + 8386 "10000100" // JNZ r16, #8544 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8544 delay_slots=5 */ + 8387 "00000001" // /* MW 5 */ + 8388 "01000000" // /* MW 4 */ + 8389 "10110000" // /* MW 3 */ + 8390 "00010000" // /* MW 2 */ + 8391 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8393 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8394 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8395 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8397 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8399 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8401 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 29 + 8402 "01000100" // MOVXM p2, #509152 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8403 "11000000" // /* MW 5 */ + 8404 "11001001" // /* MW 4 */ + 8405 "11000100" // /* MW 3 */ + 8406 "00000111" // /* MW 2 */ + 8407 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 29 +.src_ref 8 "superkernels.cpp" 508 65 + 8408 "10111010" // LDA r16, [p2]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8409 "00010000" // /* MW 9 */ + 8410 "00110000" // /* MW 8 */ + 8411 "00110010" // /* MW 7 */ + 8412 "11110001" // /* MW 6 */ + 8413 "00000001" // /* MW 5 */ + 8414 "00000000" // /* MW 4 */ + 8415 "11010000" // /* MW 3 */ + 8416 "11000010" // /* MW 2 */ + 8417 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 65 + 8418 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8419 "00111010" // /* MW 3 */ + 8420 "00000100" // /* MW 2 */ + 8421 "00000010" // /* MW 1 */ + 8422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8423 "00000000" // /* MW 1 */ + 8424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8425 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 41 +.no_stack_arguments + 8426 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */ + 8427 "00000001" // /* MW 5 */ + 8428 "00000000" // /* MW 4 */ + 8429 "11111000" // /* MW 3 */ + 8430 "00010011" // /* MW 2 */ + 8431 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 41 +.delay_slot + 8432 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8433 "00000001" // /* MW 3 */ + 8434 "00011010" // /* MW 2 */ + 8435 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8437 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 41 +.delay_slot + 8438 "10011000" // LT r27, r16, r13 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8439 "11011010" // /* MW 3 */ + 8440 "00110110" // /* MW 2 */ + 8441 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 41 +.src_ref 8 "superkernels.cpp" 508 41 +.delay_slot + 8442 "11100100" // SUB r17, r13, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8443 "01000001" // /* MW 5 */ + 8444 "10111011" // /* MW 4 */ + 8445 "00110111" // /* MW 3 */ + 8446 "01100000" // /* MW 2 */ + 8447 "01101100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 41 +.delay_slot + 8448 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r0, r16, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8449 "00000000" // /* MW 15 */ + 8450 "00000000" // /* MW 14 */ + 8451 "01111000" // /* MW 13 */ + 8452 "10100101" // /* MW 12 */ + 8453 "00000001" // /* MW 11 */ + 8454 "10010000" // /* MW 10 */ + 8455 "00001000" // /* MW 9 */ + 8456 "00100000" // /* MW 8 */ + 8457 "01011011" // /* MW 7 */ + 8458 "00000001" // /* MW 6 */ + 8459 "00100000" // /* MW 5 */ + 8460 "00000000" // /* MW 4 */ + 8461 "11110000" // /* MW 3 */ + 8462 "00101100" // /* MW 2 */ + 8463 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 41 +.src_ref 8 "superkernels.cpp" 508 41 +.return_address + 8464 "11100100" // SUB r16, r13, r3; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8465 "01000001" // /* MW 5 */ + 8466 "10101111" // /* MW 4 */ + 8467 "00111101" // /* MW 3 */ + 8468 "00000110" // /* MW 2 */ + 8469 "01101100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 41 + 8470 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8471 "00000010" // /* MW 3 */ + 8472 "11100001" // /* MW 2 */ + 8473 "00010000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 6 +.src_ref 8 "superkernels.cpp" 508 76 + 8474 "10000100" // JNZ r16, #8544 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8544 delay_slots=5 */ + 8475 "00000001" // /* MW 5 */ + 8476 "01000000" // /* MW 4 */ + 8477 "10110000" // /* MW 3 */ + 8478 "00010000" // /* MW 2 */ + 8479 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8481 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8483 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8485 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8487 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8489 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 +.src_ref 1 "io_buffer_main.h" 395 8 + 8490 "11100100" // MOVX r16, #-1; MOV p2, p6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8491 "10000001" // /* MW 5 */ + 8492 "11011001" // /* MW 4 */ + 8493 "10100100" // /* MW 3 */ + 8494 "00011111" // /* MW 2 */ + 8495 "11111100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 first + 8496 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8497 "01110110" // /* MW 3 */ + 8498 "11111111" // /* MW 2 */ + 8499 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 8500 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8501 "00110110" // /* MW 3 */ + 8502 "11111110" // /* MW 2 */ + 8503 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 8504 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8505 "01010110" // /* MW 3 */ + 8506 "11111110" // /* MW 2 */ + 8507 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 8508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8509 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first +.aggressive_scheduled_block_id 5 +.noswbrkpt + 8510 "10011000" // LDA r17, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8511 "00110110" // /* MW 3 */ + 8512 "01000110" // /* MW 2 */ + 8513 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 8514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8515 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 8516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8517 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 8518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8519 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 8520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8521 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 8522 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8523 "00010010" // /* MW 3 */ + 8524 "10100011" // /* MW 2 */ + 8525 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8526 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8527 "00110001" // /* MW 3 */ + 8528 "00000110" // /* MW 2 */ + 8529 "00001010" // /* MW 1 */ + 8530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8531 "00000000" // /* MW 1 */ + 8532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8533 "00000000" // /* MW 1 */ + 8534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8535 "00000000" // /* MW 1 */ + 8536 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8537 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 8538 "00101100" // NOPA; ACQ r17, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8539 "00010000" // /* MW 5 */ + 8540 "10100110" // /* MW 4 */ + 8541 "11111000" // /* MW 3 */ + 8542 "00101100" // /* MW 2 */ + 8543 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1008 + 8544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8545 "00000000" // /* MW 1 */ + 8546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8547 "00000000" // /* MW 1 */ + 8548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8549 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 7 first +.src_ref 8 "superkernels.cpp" 511 29 + 8550 "10111010" // LDA r16, [p7]; MOVXM p7, #509156 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8551 "00010000" // /* MW 9 */ + 8552 "01110010" // /* MW 8 */ + 8553 "10110010" // /* MW 7 */ + 8554 "11110011" // /* MW 6 */ + 8555 "00000001" // /* MW 5 */ + 8556 "00000000" // /* MW 4 */ + 8557 "11010000" // /* MW 3 */ + 8558 "11000010" // /* MW 2 */ + 8559 "11100000" // /* MW 1 */ + 8560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8561 "00000000" // /* MW 1 */ + 8562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8563 "00000000" // /* MW 1 */ + 8564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8565 "00000000" // /* MW 1 */ + 8566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8567 "00000000" // /* MW 1 */ + 8568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8569 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 19 + 8570 "00011000" // MOVX r18, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8571 "00010001" // /* MW 3 */ + 8572 "00100100" // /* MW 2 */ + 8573 "00010000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 19 + 8574 "10011000" // NE r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8575 "00001000" // /* MW 3 */ + 8576 "10100001" // /* MW 2 */ + 8577 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 25 + 8578 "10000100" // JNZ r16, #8768 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8768 delay_slots=5 */ + 8579 "00000001" // /* MW 5 */ + 8580 "01000000" // /* MW 4 */ + 8581 "00100000" // /* MW 3 */ + 8582 "00010001" // /* MW 2 */ + 8583 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 66 +.delay_slot + 8584 "01000100" // MOVXM p2, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8585 "11000000" // /* MW 5 */ + 8586 "11001000" // /* MW 4 */ + 8587 "11000100" // /* MW 3 */ + 8588 "00000111" // /* MW 2 */ + 8589 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8591 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8593 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8594 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8595 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 42 +.delay_slot + 8596 "00011000" // MOVX r17, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8597 "00000001" // /* MW 3 */ + 8598 "00100010" // /* MW 2 */ + 8599 "00010000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 29 +.src_ref 8 "superkernels.cpp" 511 42 + 8600 "00101100" // LDA r16, [p7]; MOVX r13, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8601 "00000010" // /* MW 5 */ + 8602 "00110100" // /* MW 4 */ + 8603 "11010000" // /* MW 3 */ + 8604 "11000010" // /* MW 2 */ + 8605 "11100000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 66 + 8606 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8607 "00111010" // /* MW 3 */ + 8608 "00000100" // /* MW 2 */ + 8609 "00000010" // /* MW 1 */ + 8610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8611 "00000000" // /* MW 1 */ + 8612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8613 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 42 +.no_stack_arguments + 8614 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */ + 8615 "00000001" // /* MW 5 */ + 8616 "00000000" // /* MW 4 */ + 8617 "11111000" // /* MW 3 */ + 8618 "00010011" // /* MW 2 */ + 8619 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8621 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8623 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 42 +.delay_slot + 8624 "10011000" // LT r27, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8625 "00011010" // /* MW 3 */ + 8626 "00110111" // /* MW 2 */ + 8627 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 42 +.src_ref 8 "superkernels.cpp" 511 42 +.delay_slot + 8628 "11100100" // SUB r17, r17, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8629 "01000001" // /* MW 5 */ + 8630 "10111011" // /* MW 4 */ + 8631 "00110111" // /* MW 3 */ + 8632 "01100000" // /* MW 2 */ + 8633 "10001100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 42 +.delay_slot + 8634 "00101100" // NOPA; SEL.EQZ r0, r16, r17, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8635 "00100100" // /* MW 5 */ + 8636 "00000010" // /* MW 4 */ + 8637 "11111000" // /* MW 3 */ + 8638 "00101100" // /* MW 2 */ + 8639 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 42 +.src_ref 8 "superkernels.cpp" 511 42 +.return_address + 8640 "11100100" // SUB r16, r13, r3; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8641 "01000001" // /* MW 5 */ + 8642 "10101111" // /* MW 4 */ + 8643 "00111101" // /* MW 3 */ + 8644 "00000110" // /* MW 2 */ + 8645 "01101100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 42 + 8646 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8647 "00000010" // /* MW 3 */ + 8648 "11100001" // /* MW 2 */ + 8649 "00010000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 6 +.src_ref 8 "superkernels.cpp" 511 77 + 8650 "10000100" // JNZ r16, #8736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8736 delay_slots=5 */ + 8651 "00000001" // /* MW 5 */ + 8652 "01000000" // /* MW 4 */ + 8653 "00010000" // /* MW 3 */ + 8654 "00010001" // /* MW 2 */ + 8655 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8657 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8659 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8661 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8663 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8665 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 516 45 +.src_ref 8 "superkernels.cpp" 522 6 +.src_ref 1 "io_buffer_main.h" 218 49 first +.src_ref 1 "io_buffer_main.h" 395 8 + 8666 "10111010" // LDA r27, [p6], #-4; MOVX r17, #-1; MOV r16, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8667 "01011000" // /* MW 9 */ + 8668 "00000001" // /* MW 8 */ + 8669 "00001000" // /* MW 7 */ + 8670 "11101010" // /* MW 6 */ + 8671 "00010111" // /* MW 5 */ + 8672 "00111111" // /* MW 4 */ + 8673 "11010000" // /* MW 3 */ + 8674 "11101110" // /* MW 2 */ + 8675 "11011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 8676 "10011000" // LDA r18, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8677 "01010110" // /* MW 3 */ + 8678 "11111110" // /* MW 2 */ + 8679 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 8680 "10011000" // LDA r19, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8681 "01110110" // /* MW 3 */ + 8682 "11111110" // /* MW 2 */ + 8683 "00000110" // /* MW 1 */ +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 8684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8685 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first +.aggressive_scheduled_block_id 6 +.noswbrkpt + 8686 "10011000" // LDA r18, [p6, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8687 "01010110" // /* MW 3 */ + 8688 "01000110" // /* MW 2 */ + 8689 "00000110" // /* MW 1 */ +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 8690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8691 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 8692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8693 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 8694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8695 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 8696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8697 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 8698 "00011000" // SEL.EQZ r18, r19, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8699 "00100010" // /* MW 3 */ + 8700 "11100101" // /* MW 2 */ + 8701 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8702 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8703 "01010001" // /* MW 3 */ + 8704 "00000110" // /* MW 2 */ + 8705 "00001110" // /* MW 1 */ + 8706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8707 "00000000" // /* MW 1 */ + 8708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8709 "00000000" // /* MW 1 */ + 8710 "10000100" // J #8784 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8784 delay_slots=5 */ + 8711 "00000000" // /* MW 5 */ + 8712 "00000000" // /* MW 4 */ + 8713 "00101000" // /* MW 3 */ + 8714 "00010001" // /* MW 2 */ + 8715 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8717 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first +.delay_slot + 8718 "00011000" // ACQ r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8719 "00011000" // /* MW 3 */ + 8720 "10010011" // /* MW 2 */ + 8721 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8723 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8725 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8726 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8727 "01111110" // /* MW 9 */ + 8728 "10100101" // /* MW 8 */ + 8729 "00000001" // /* MW 7 */ + 8730 "00000000" // /* MW 6 */ + 8731 "00010000" // /* MW 5 */ + 8732 "00000000" // /* MW 4 */ + 8733 "11110000" // /* MW 3 */ + 8734 "00101100" // /* MW 2 */ + 8735 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1200 + 8736 "10000100" // J #8784 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8784 delay_slots=5 */ + 8737 "00000000" // /* MW 5 */ + 8738 "00000000" // /* MW 4 */ + 8739 "00101000" // /* MW 3 */ + 8740 "00010001" // /* MW 2 */ + 8741 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 516 45 +.src_ref 8 "superkernels.cpp" 522 6 +.delay_slot + 8742 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8743 "00000101" // /* MW 3 */ + 8744 "00100000" // /* MW 2 */ + 8745 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8747 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8749 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8751 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8752 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8753 "00000000" // /* MW 15 */ + 8754 "00000000" // /* MW 14 */ + 8755 "01111000" // /* MW 13 */ + 8756 "10100101" // /* MW 12 */ + 8757 "00000001" // /* MW 11 */ + 8758 "00000000" // /* MW 10 */ + 8759 "00000000" // /* MW 9 */ + 8760 "00000000" // /* MW 8 */ + 8761 "01011011" // /* MW 7 */ + 8762 "00000001" // /* MW 6 */ + 8763 "00100000" // /* MW 5 */ + 8764 "00000000" // /* MW 4 */ + 8765 "11110000" // /* MW 3 */ + 8766 "00101100" // /* MW 2 */ + 8767 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1232 +.src_ref 8 "superkernels.cpp" 516 45 +.src_ref 8 "superkernels.cpp" 522 6 + 8768 "11100001" // NOPA; NOPB; NOPS; MOVX r16, #1; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8769 "00000000" // /* MW 15 */ + 8770 "00000000" // /* MW 14 */ + 8771 "01111000" // /* MW 13 */ + 8772 "10100101" // /* MW 12 */ + 8773 "00000001" // /* MW 11 */ + 8774 "00101000" // /* MW 10 */ + 8775 "00000000" // /* MW 9 */ + 8776 "00000001" // /* MW 8 */ + 8777 "01011011" // /* MW 7 */ + 8778 "00000001" // /* MW 6 */ + 8779 "00100000" // /* MW 5 */ + 8780 "00000000" // /* MW 4 */ + 8781 "11110000" // /* MW 3 */ + 8782 "00101100" // /* MW 2 */ + 8783 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1248 +.src_ref 8 "superkernels.cpp" 516 47 +.src_ref 1 "io_buffer_main.h" 125 25 + 8784 "10111010" // LDA p7, [sp, #-32]; MOVXM p6, #509132 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8785 "00010000" // /* MW 9 */ + 8786 "01100110" // /* MW 8 */ + 8787 "00110010" // /* MW 7 */ + 8788 "11110011" // /* MW 6 */ + 8789 "00000001" // /* MW 5 */ + 8790 "00000000" // /* MW 4 */ + 8791 "00100000" // /* MW 3 */ + 8792 "01110011" // /* MW 2 */ + 8793 "11111100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 516 47 first +.src_ref 8 "superkernels.cpp" 522 6 + 8794 "10111010" // LDA r21, [p6]; MOVXM p2, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8795 "00010000" // /* MW 9 */ + 8796 "01101000" // /* MW 8 */ + 8797 "00110010" // /* MW 7 */ + 8798 "11110001" // /* MW 6 */ + 8799 "00000001" // /* MW 5 */ + 8800 "00000000" // /* MW 4 */ + 8801 "11010000" // /* MW 3 */ + 8802 "11010110" // /* MW 2 */ + 8803 "11000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 514 2 +.src_ref 8 "superkernels.cpp" 522 6 first + 8804 "10111010" // LDA r17, [p2]; MOVXM p6, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8805 "00010000" // /* MW 9 */ + 8806 "01100000" // /* MW 8 */ + 8807 "00110010" // /* MW 7 */ + 8808 "11110011" // /* MW 6 */ + 8809 "00000001" // /* MW 5 */ + 8810 "00000000" // /* MW 4 */ + 8811 "11010000" // /* MW 3 */ + 8812 "11000110" // /* MW 2 */ + 8813 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 514 2 first + 8814 "10011000" // LDA r20, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8815 "10010110" // /* MW 3 */ + 8816 "00000110" // /* MW 2 */ + 8817 "00000110" // /* MW 1 */ + 8818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8819 "00000000" // /* MW 1 */ + 8820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8821 "00000000" // /* MW 1 */ + 8822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8823 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 8824 "10011000" // LDA r19, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8825 "01110110" // /* MW 3 */ + 8826 "00000110" // /* MW 2 */ + 8827 "00000111" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 516 45 first + 8828 "10011000" // LSHL r21, r21, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8829 "00001101" // /* MW 3 */ + 8830 "01101011" // /* MW 2 */ + 8831 "00010101" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 522 6 first + 8832 "10011000" // EQ r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8833 "00000111" // /* MW 3 */ + 8834 "01100001" // /* MW 2 */ + 8835 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 522 6 + 8836 "10000100" // JNZ r16, #9232 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9232 delay_slots=5 */ + 8837 "00000001" // /* MW 5 */ + 8838 "01000000" // /* MW 4 */ + 8839 "00001000" // /* MW 3 */ + 8840 "00010010" // /* MW 2 */ + 8841 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 514 2 first +.delay_slot + 8842 "00011000" // ADD r20, r20, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8843 "00000111" // /* MW 3 */ + 8844 "00101000" // /* MW 2 */ + 8845 "00010101" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 514 2 +.delay_slot + 8846 "10011000" // ST r20, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8847 "10010001" // /* MW 3 */ + 8848 "00000110" // /* MW 2 */ + 8849 "00001110" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8851 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 516 45 first +.delay_slot + 8852 "01011000" // ADD.NC p0, r19, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8853 "11010101" // /* MW 3 */ + 8854 "01101001" // /* MW 2 */ + 8855 "00011000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 516 12 +.src_ref 8 "superkernels.cpp" 522 6 +.delay_slot + 8856 "01011100" // ST p0, [sp, #-68]; MOVX r18, #4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8857 "00100010" // /* MW 5 */ + 8858 "01001000" // /* MW 4 */ + 8859 "10110000" // /* MW 3 */ + 8860 "10000011" // /* MW 2 */ + 8861 "11110111" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 522 6 first + 8862 "10011000" // EQ r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8863 "00100111" // /* MW 3 */ + 8864 "01100001" // /* MW 2 */ + 8865 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 522 6 + 8866 "10000100" // JNZ r16, #9088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9088 delay_slots=5 */ + 8867 "00000001" // /* MW 5 */ + 8868 "01000000" // /* MW 4 */ + 8869 "11000000" // /* MW 3 */ + 8870 "00010001" // /* MW 2 */ + 8871 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8873 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8874 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8875 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8877 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8879 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8881 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 522 6 + 8882 "10011000" // NE r16, r17, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8883 "11101000" // /* MW 3 */ + 8884 "01100000" // /* MW 2 */ + 8885 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 522 6 + 8886 "10000100" // JNZ r16, #9040 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9040 delay_slots=5 */ + 8887 "00000001" // /* MW 5 */ + 8888 "01000000" // /* MW 4 */ + 8889 "10101000" // /* MW 3 */ + 8890 "00010001" // /* MW 2 */ + 8891 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 523 26 +.delay_slot + 8892 "01000100" // MOVXM p6, #509152 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8893 "11000000" // /* MW 5 */ + 8894 "11001001" // /* MW 4 */ + 8895 "11001100" // /* MW 3 */ + 8896 "00000111" // /* MW 2 */ + 8897 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8899 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8901 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8903 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8905 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 523 26 first +.src_ref 8 "superkernels.cpp" 523 61 + 8906 "10111010" // LDA r18, [p6]; MOVXM p6, #509000 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8907 "00010000" // /* MW 9 */ + 8908 "00100100" // /* MW 8 */ + 8909 "00110010" // /* MW 7 */ + 8910 "11110011" // /* MW 6 */ + 8911 "00000001" // /* MW 5 */ + 8912 "00000000" // /* MW 4 */ + 8913 "11010000" // /* MW 3 */ + 8914 "11001010" // /* MW 2 */ + 8915 "11000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 523 61 +.src_ref 8 "superkernels.cpp" 524 44 + 8916 "10111010" // LDA r16, [p6]; MOVXM p6, #509140 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8917 "00010000" // /* MW 9 */ + 8918 "01101010" // /* MW 8 */ + 8919 "00110010" // /* MW 7 */ + 8920 "11110011" // /* MW 6 */ + 8921 "00000001" // /* MW 5 */ + 8922 "00000000" // /* MW 4 */ + 8923 "11010000" // /* MW 3 */ + 8924 "11000010" // /* MW 2 */ + 8925 "11000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 524 30 +.src_ref 8 "superkernels.cpp" 524 44 first + 8926 "00101100" // LDA r17, [p6]; MOVX r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8927 "00000010" // /* MW 5 */ + 8928 "01100000" // /* MW 4 */ + 8929 "11010000" // /* MW 3 */ + 8930 "11000110" // /* MW 2 */ + 8931 "11000000" // /* MW 1 */ + 8932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8933 "00000000" // /* MW 1 */ + 8934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8935 "00000000" // /* MW 1 */ + 8936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8937 "00000000" // /* MW 1 */ + 8938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8939 "00000000" // /* MW 1 */ + 8940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8941 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 523 37 first + 8942 "10011000" // MUL r18, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8943 "00001111" // /* MW 3 */ + 8944 "10100101" // /* MW 2 */ + 8945 "00010100" // /* MW 1 */ + 8946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8947 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 524 30 first +.src_ref 8 "superkernels.cpp" 524 30 first + 8948 "10100100" // SUB r19, r17, r18; ADD.NC r20, r18, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8949 "10000010" // /* MW 5 */ + 8950 "00110010" // /* MW 4 */ + 8951 "00111010" // /* MW 3 */ + 8952 "11100100" // /* MW 2 */ + 8953 "10001100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 524 30 + 8954 "10011000" // LTU r27, r20, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8955 "00011100" // /* MW 3 */ + 8956 "00110111" // /* MW 2 */ + 8957 "00010101" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 524 30 + 8958 "00011000" // SEL.EQZ r19, r19, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8959 "00000010" // /* MW 3 */ + 8960 "11100111" // /* MW 2 */ + 8961 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 524 42 + 8962 "10011000" // LTU r27, r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8963 "00011100" // /* MW 3 */ + 8964 "10110111" // /* MW 2 */ + 8965 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 524 30 + 8966 "00011000" // SEL.EQZ r17, r24, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8967 "00110010" // /* MW 3 */ + 8968 "00100011" // /* MW 2 */ + 8969 "00010110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 525 65 first + 8970 "10011000" // SUB r18, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8971 "00010001" // /* MW 3 */ + 8972 "00100101" // /* MW 2 */ + 8973 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 526 36 first + 8974 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8975 "00001000" // /* MW 3 */ + 8976 "01100001" // /* MW 2 */ + 8977 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 547 6 first + 8978 "10000100" // JNZ r16, #9344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9344 delay_slots=5 */ + 8979 "00000001" // /* MW 5 */ + 8980 "01000000" // /* MW 4 */ + 8981 "01000000" // /* MW 3 */ + 8982 "00010010" // /* MW 2 */ + 8983 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 525 32 +.delay_slot + 8984 "01000100" // MOVXM p6, #509200 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8985 "00100000" // /* MW 5 */ + 8986 "11001010" // /* MW 4 */ + 8987 "11001100" // /* MW 3 */ + 8988 "00000111" // /* MW 2 */ + 8989 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 525 32 first +.delay_slot + 8990 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8991 "01010001" // /* MW 3 */ + 8992 "00000110" // /* MW 2 */ + 8993 "00001110" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8995 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8997 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8999 "00000000" // /* MW 1 */ + 9000 "10000100" // J #9200 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9200 delay_slots=5 */ + 9001 "00000000" // /* MW 5 */ + 9002 "00000000" // /* MW 4 */ + 9003 "11111000" // /* MW 3 */ + 9004 "00010001" // /* MW 2 */ + 9005 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 7 +.src_ref 8 "superkernels.cpp" 558 19 +.delay_slot + 9006 "10111010" // MOVA r14, #2; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9007 "00010000" // /* MW 9 */ + 9008 "01101000" // /* MW 8 */ + 9009 "10110010" // /* MW 7 */ + 9010 "11110011" // /* MW 6 */ + 9011 "00000001" // /* MW 5 */ + 9012 "00000000" // /* MW 4 */ + 9013 "00000000" // /* MW 3 */ + 9014 "01001110" // /* MW 2 */ + 9015 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 552 2 +.src_ref 8 "superkernels.cpp" 554 19 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 +.delay_slot + 9016 "10111010" // MOVA r15, #1; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9017 "00010000" // /* MW 9 */ + 9018 "00100000" // /* MW 8 */ + 9019 "00110010" // /* MW 7 */ + 9020 "11110001" // /* MW 6 */ + 9021 "00000001" // /* MW 5 */ + 9022 "00000000" // /* MW 4 */ + 9023 "00000000" // /* MW 3 */ + 9024 "00101111" // /* MW 2 */ + 9025 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 43 +.src_ref 8 "superkernels.cpp" 555 15 +.src_ref 8 "superkernels.cpp" 558 43 +.src_ref 8 "superkernels.cpp" 559 15 +.src_ref 8 "superkernels.cpp" 562 44 +.src_ref 8 "superkernels.cpp" 563 16 +.src_ref 8 "superkernels.cpp" 567 14 +.delay_slot + 9026 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9027 "00000001" // /* MW 3 */ + 9028 "00011010" // /* MW 2 */ + 9029 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9031 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9032 "00100010" // NOPA; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9033 "00011100" // /* MW 7 */ + 9034 "00000000" // /* MW 6 */ + 9035 "00000000" // /* MW 5 */ + 9036 "00000100" // /* MW 4 */ + 9037 "11110000" // /* MW 3 */ + 9038 "00101100" // /* MW 2 */ + 9039 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1504 + 9040 "10000100" // J #9200 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9200 delay_slots=5 */ + 9041 "00000000" // /* MW 5 */ + 9042 "00000000" // /* MW 4 */ + 9043 "11111000" // /* MW 3 */ + 9044 "00010001" // /* MW 2 */ + 9045 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 7 +.src_ref 8 "superkernels.cpp" 558 19 +.delay_slot + 9046 "10111010" // MOVA r14, #2; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9047 "00010000" // /* MW 9 */ + 9048 "01101000" // /* MW 8 */ + 9049 "10110010" // /* MW 7 */ + 9050 "11110011" // /* MW 6 */ + 9051 "00000001" // /* MW 5 */ + 9052 "00000000" // /* MW 4 */ + 9053 "00000000" // /* MW 3 */ + 9054 "01001110" // /* MW 2 */ + 9055 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 552 2 +.src_ref 8 "superkernels.cpp" 554 19 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 +.delay_slot + 9056 "10111010" // MOVA r15, #1; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9057 "00010000" // /* MW 9 */ + 9058 "00100000" // /* MW 8 */ + 9059 "00110010" // /* MW 7 */ + 9060 "11110001" // /* MW 6 */ + 9061 "00000001" // /* MW 5 */ + 9062 "00000000" // /* MW 4 */ + 9063 "00000000" // /* MW 3 */ + 9064 "00101111" // /* MW 2 */ + 9065 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 43 +.src_ref 8 "superkernels.cpp" 555 15 +.src_ref 8 "superkernels.cpp" 558 43 +.src_ref 8 "superkernels.cpp" 559 15 +.src_ref 8 "superkernels.cpp" 562 44 +.src_ref 8 "superkernels.cpp" 563 16 +.src_ref 8 "superkernels.cpp" 567 14 +.delay_slot + 9066 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9067 "00000001" // /* MW 3 */ + 9068 "00011010" // /* MW 2 */ + 9069 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9071 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9072 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9073 "00000000" // /* MW 15 */ + 9074 "00000000" // /* MW 14 */ + 9075 "01111000" // /* MW 13 */ + 9076 "10100101" // /* MW 12 */ + 9077 "00000001" // /* MW 11 */ + 9078 "00000000" // /* MW 10 */ + 9079 "00000000" // /* MW 9 */ + 9080 "00000000" // /* MW 8 */ + 9081 "01011011" // /* MW 7 */ + 9082 "00000001" // /* MW 6 */ + 9083 "00100000" // /* MW 5 */ + 9084 "00000000" // /* MW 4 */ + 9085 "11110000" // /* MW 3 */ + 9086 "00101100" // /* MW 2 */ + 9087 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1552 +.src_ref 8 "superkernels.cpp" 532 27 +.src_ref 8 "superkernels.cpp" 533 31 +.src_ref 8 "superkernels.cpp" 554 43 +.src_ref 8 "superkernels.cpp" 555 15 +.src_ref 8 "superkernels.cpp" 558 43 +.src_ref 8 "superkernels.cpp" 559 15 +.src_ref 8 "superkernels.cpp" 562 44 +.src_ref 8 "superkernels.cpp" 563 16 +.src_ref 8 "superkernels.cpp" 567 14 + 9088 "10111010" // MOVA r13, #0; MOVXM p6, #509156 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9089 "00010000" // /* MW 9 */ + 9090 "01110010" // /* MW 8 */ + 9091 "00110010" // /* MW 7 */ + 9092 "11110011" // /* MW 6 */ + 9093 "00000001" // /* MW 5 */ + 9094 "00000000" // /* MW 4 */ + 9095 "00000000" // /* MW 3 */ + 9096 "00001101" // /* MW 2 */ + 9097 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 532 27 first +.src_ref 8 "superkernels.cpp" 532 63 +.src_ref 8 "superkernels.cpp" 552 2 + 9098 "10111010" // LDA r18, [p6]; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9099 "00010000" // /* MW 9 */ + 9100 "00100000" // /* MW 8 */ + 9101 "00110010" // /* MW 7 */ + 9102 "11110001" // /* MW 6 */ + 9103 "00000001" // /* MW 5 */ + 9104 "00000000" // /* MW 4 */ + 9105 "11010000" // /* MW 3 */ + 9106 "11001010" // /* MW 2 */ + 9107 "11000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 532 63 +.src_ref 8 "superkernels.cpp" 533 46 + 9108 "10111010" // LDA r16, [p2]; MOVXM p6, #509144 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9109 "00010000" // /* MW 9 */ + 9110 "01101100" // /* MW 8 */ + 9111 "00110010" // /* MW 7 */ + 9112 "11110011" // /* MW 6 */ + 9113 "00000001" // /* MW 5 */ + 9114 "00000000" // /* MW 4 */ + 9115 "11010000" // /* MW 3 */ + 9116 "11000010" // /* MW 2 */ + 9117 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 533 46 first +.src_ref 8 "superkernels.cpp" 554 19 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 + 9118 "00101100" // LDA r17, [p6]; MOVX r15, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9119 "00001010" // /* MW 5 */ + 9120 "00111100" // /* MW 4 */ + 9121 "11010000" // /* MW 3 */ + 9122 "11000110" // /* MW 2 */ + 9123 "11000000" // /* MW 1 */ + 9124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9125 "00000000" // /* MW 1 */ + 9126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9127 "00000000" // /* MW 1 */ + 9128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9129 "00000000" // /* MW 1 */ + 9130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9131 "00000000" // /* MW 1 */ + 9132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9133 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 532 39 first + 9134 "10011000" // MUL r18, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9135 "00001111" // /* MW 3 */ + 9136 "10100101" // /* MW 2 */ + 9137 "00010100" // /* MW 1 */ + 9138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9139 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 533 31 first +.src_ref 8 "superkernels.cpp" 533 31 first + 9140 "10100100" // SUB r19, r17, r18; ADD.NC r20, r18, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9141 "10000010" // /* MW 5 */ + 9142 "00110010" // /* MW 4 */ + 9143 "00111010" // /* MW 3 */ + 9144 "11100100" // /* MW 2 */ + 9145 "10001100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 533 31 + 9146 "10011000" // LTU r27, r20, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9147 "00011100" // /* MW 3 */ + 9148 "00110111" // /* MW 2 */ + 9149 "00010101" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 533 31 + 9150 "00011000" // SEL.EQZ r19, r19, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9151 "00000010" // /* MW 3 */ + 9152 "11100111" // /* MW 2 */ + 9153 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 533 44 + 9154 "10011000" // LTU r27, r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9155 "00011100" // /* MW 3 */ + 9156 "10110111" // /* MW 2 */ + 9157 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 533 31 + 9158 "00011000" // SEL.EQZ r17, r13, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9159 "00110010" // /* MW 3 */ + 9160 "01100011" // /* MW 2 */ + 9161 "00010011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 534 67 first + 9162 "10011000" // SUB r18, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9163 "00010001" // /* MW 3 */ + 9164 "00100101" // /* MW 2 */ + 9165 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 535 37 first + 9166 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9167 "00001000" // /* MW 3 */ + 9168 "01100001" // /* MW 2 */ + 9169 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 547 6 first + 9170 "10000100" // JNZ r16, #9344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9344 delay_slots=5 */ + 9171 "00000001" // /* MW 5 */ + 9172 "01000000" // /* MW 4 */ + 9173 "01000000" // /* MW 3 */ + 9174 "00010010" // /* MW 2 */ + 9175 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 534 33 +.delay_slot + 9176 "01000100" // MOVXM p6, #509208 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9177 "00110000" // /* MW 5 */ + 9178 "11001010" // /* MW 4 */ + 9179 "11001100" // /* MW 3 */ + 9180 "00000111" // /* MW 2 */ + 9181 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 534 33 first +.delay_slot + 9182 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9183 "01010001" // /* MW 3 */ + 9184 "00000110" // /* MW 2 */ + 9185 "00001110" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9187 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9189 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 7 +.delay_slot + 9190 "10111010" // NOPA; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9191 "00010000" // /* MW 9 */ + 9192 "01101000" // /* MW 8 */ + 9193 "10110010" // /* MW 7 */ + 9194 "11110011" // /* MW 6 */ + 9195 "00000001" // /* MW 5 */ + 9196 "00000000" // /* MW 4 */ + 9197 "11110000" // /* MW 3 */ + 9198 "00101100" // /* MW 2 */ + 9199 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1664 +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 1 "io_buffer_main.h" 324 51 + 9200 "00111010" // MOVS p6, r12; J #9408 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=9408 delay_slots=5 */ + 9201 "00100001" // /* MW 9 */ + 9202 "00000000" // /* MW 8 */ + 9203 "00000000" // /* MW 7 */ + 9204 "10011000" // /* MW 6 */ + 9205 "00000100" // /* MW 5 */ + 9206 "00000000" // /* MW 4 */ + 9207 "01100000" // /* MW 3 */ + 9208 "10000001" // /* MW 2 */ + 9209 "11010001" // /* MW 1 */ +.delay_slot + 9210 "00011000" // LDA r12, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9211 "10010001" // /* MW 3 */ + 9212 "11100101" // /* MW 2 */ + 9213 "00000111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9215 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9216 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9217 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9219 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9220 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9221 "10000001" // /* MW 11 */ + 9222 "10101101" // /* MW 10 */ + 9223 "00000000" // /* MW 9 */ + 9224 "00000000" // /* MW 8 */ + 9225 "00000000" // /* MW 7 */ + 9226 "00000000" // /* MW 6 */ + 9227 "00100000" // /* MW 5 */ + 9228 "00000000" // /* MW 4 */ + 9229 "11110000" // /* MW 3 */ + 9230 "00101100" // /* MW 2 */ + 9231 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1696 +.src_ref 8 "superkernels.cpp" 541 26 + 9232 "01000100" // MOVXM p6, #509124 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9233 "10001000" // /* MW 5 */ + 9234 "11001001" // /* MW 4 */ + 9235 "11001100" // /* MW 3 */ + 9236 "00000111" // /* MW 2 */ + 9237 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 541 26 first +.src_ref 8 "superkernels.cpp" 541 61 + 9238 "10111010" // LDA r19, [p6]; MOVXM p6, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9239 "00010000" // /* MW 9 */ + 9240 "00100010" // /* MW 8 */ + 9241 "00110010" // /* MW 7 */ + 9242 "11110011" // /* MW 6 */ + 9243 "00000001" // /* MW 5 */ + 9244 "00000000" // /* MW 4 */ + 9245 "11010000" // /* MW 3 */ + 9246 "11001110" // /* MW 2 */ + 9247 "11000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 541 61 +.src_ref 8 "superkernels.cpp" 542 44 + 9248 "10111010" // LDA r16, [p6]; MOVXM p6, #509148 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9249 "00010000" // /* MW 9 */ + 9250 "01101110" // /* MW 8 */ + 9251 "00110010" // /* MW 7 */ + 9252 "11110011" // /* MW 6 */ + 9253 "00000001" // /* MW 5 */ + 9254 "00000000" // /* MW 4 */ + 9255 "11010000" // /* MW 3 */ + 9256 "11000010" // /* MW 2 */ + 9257 "11000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 542 44 first + 9258 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9259 "01010110" // /* MW 3 */ + 9260 "00000110" // /* MW 2 */ + 9261 "00000110" // /* MW 1 */ + 9262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9263 "00000000" // /* MW 1 */ + 9264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9265 "00000000" // /* MW 1 */ + 9266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9267 "00000000" // /* MW 1 */ + 9268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9269 "00000000" // /* MW 1 */ + 9270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9271 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 541 37 first + 9272 "10011000" // MUL r19, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9273 "00001111" // /* MW 3 */ + 9274 "11100111" // /* MW 2 */ + 9275 "00010100" // /* MW 1 */ + 9276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9277 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 542 30 first +.src_ref 8 "superkernels.cpp" 542 30 first + 9278 "10100100" // SUB r20, r18, r19; ADD.NC r21, r19, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9279 "10000010" // /* MW 5 */ + 9280 "10110011" // /* MW 4 */ + 9281 "00111010" // /* MW 3 */ + 9282 "00100110" // /* MW 2 */ + 9283 "10010101" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 542 30 + 9284 "10011000" // LTU r27, r21, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9285 "00101100" // /* MW 3 */ + 9286 "01110111" // /* MW 2 */ + 9287 "00010101" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 542 30 + 9288 "00011000" // SEL.EQZ r20, r20, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9289 "00000010" // /* MW 3 */ + 9290 "00101001" // /* MW 2 */ + 9291 "00010101" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 542 30 +.src_ref 8 "superkernels.cpp" 542 42 + 9292 "01100100" // LTU r27, r19, r18; MOV r17, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9293 "00000001" // /* MW 5 */ + 9294 "10100000" // /* MW 4 */ + 9295 "10011000" // /* MW 3 */ + 9296 "11100101" // /* MW 2 */ + 9297 "10011110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 542 30 + 9298 "00011000" // SEL.EQZ r17, r17, r20, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9299 "01000010" // /* MW 3 */ + 9300 "01100011" // /* MW 2 */ + 9301 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 543 69 first + 9302 "10011000" // SUB r18, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9303 "00010001" // /* MW 3 */ + 9304 "00100101" // /* MW 2 */ + 9305 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 544 38 first + 9306 "10011000" // EQ r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9307 "00000111" // /* MW 3 */ + 9308 "01100001" // /* MW 2 */ + 9309 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 547 6 first + 9310 "10000100" // JNZ r16, #10176 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10176 delay_slots=5 */ + 9311 "00000001" // /* MW 5 */ + 9312 "01000000" // /* MW 4 */ + 9313 "11100000" // /* MW 3 */ + 9314 "00010011" // /* MW 2 */ + 9315 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 543 34 +.delay_slot + 9316 "01000100" // MOVXM p6, #509216 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9317 "01000000" // /* MW 5 */ + 9318 "11001010" // /* MW 4 */ + 9319 "11001100" // /* MW 3 */ + 9320 "00000111" // /* MW 2 */ + 9321 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 543 34 first +.delay_slot + 9322 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9323 "01010001" // /* MW 3 */ + 9324 "00000110" // /* MW 2 */ + 9325 "00001110" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9327 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9329 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 9330 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 9331 "00011100" // /* MW 13 */ + 9332 "00000000" // /* MW 12 */ + 9333 "00000000" // /* MW 11 */ + 9334 "01010111" // /* MW 10 */ + 9335 "00011010" // /* MW 9 */ + 9336 "01000000" // /* MW 8 */ + 9337 "00000000" // /* MW 7 */ + 9338 "00000000" // /* MW 6 */ + 9339 "10110110" // /* MW 5 */ + 9340 "00000010" // /* MW 4 */ + 9341 "11110000" // /* MW 3 */ + 9342 "00101100" // /* MW 2 */ + 9343 "00000000" // /* MW 1 */ +.label __ll65__Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 8 "superkernels.cpp" 554 19 +.src_ref 8 "superkernels.cpp" 558 19 +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 1 "io_buffer_main.h" 324 51 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 +.aggressive_scheduled_block_id 7 +.noswbrkpt + 9344 "01110110" // LDA p0, [sp, #-68]; MOVS p6, r12; MOVX r14, #2; MOV r15, #1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9345 "01011000" // /* MW 11 */ + 9346 "00000001" // /* MW 10 */ + 9347 "11101000" // /* MW 9 */ + 9348 "01001001" // /* MW 8 */ + 9349 "11100000" // /* MW 7 */ + 9350 "00000000" // /* MW 6 */ + 9351 "00001011" // /* MW 5 */ + 9352 "10001100" // /* MW 4 */ + 9353 "00100110" // /* MW 3 */ + 9354 "10000011" // /* MW 2 */ + 9355 "11110111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 9356 "00011000" // LDA p1, [sp, #-68] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9357 "10011001" // /* MW 3 */ + 9358 "10111100" // /* MW 2 */ + 9359 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 9360 "00011000" // LDA r12, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9361 "10010001" // /* MW 3 */ + 9362 "11100101" // /* MW 2 */ + 9363 "00000111" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 287 11 first +.aggressive_scheduled_block_id 7 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 9364 "00000100" // JL #4176 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4176 delay_slots=5 */ + 9365 "00000001" // /* MW 5 */ + 9366 "00000000" // /* MW 4 */ + 9367 "00101000" // /* MW 3 */ + 9368 "00001000" // /* MW 2 */ + 9369 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 552 2 +.delay_slot +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9370 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9371 "11000000" // /* MW 3 */ + 9372 "01100000" // /* MW 2 */ + 9373 "00011111" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 43 +.src_ref 8 "superkernels.cpp" 555 15 +.src_ref 8 "superkernels.cpp" 558 43 +.src_ref 8 "superkernels.cpp" 559 15 +.src_ref 8 "superkernels.cpp" 562 44 +.src_ref 8 "superkernels.cpp" 563 16 +.src_ref 8 "superkernels.cpp" 567 14 +.delay_slot + 9374 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9375 "00000001" // /* MW 3 */ + 9376 "00011010" // /* MW 2 */ + 9377 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9379 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9381 "00000000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 287 11 +.delay_slot + 9382 "10111010" // NOPA; MOVXM p2, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9383 "00010000" // /* MW 9 */ + 9384 "10000000" // /* MW 8 */ + 9385 "00110010" // /* MW 7 */ + 9386 "11110001" // /* MW 6 */ + 9387 "00000001" // /* MW 5 */ + 9388 "00000000" // /* MW 4 */ + 9389 "11110000" // /* MW 3 */ + 9390 "00101100" // /* MW 2 */ + 9391 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 552 2 +.src_ref 8 "superkernels.cpp" 552 2 +.return_address + 9392 "00111010" // MOVS p0, p7; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9393 "00010001" // /* MW 9 */ + 9394 "00100000" // /* MW 8 */ + 9395 "00110010" // /* MW 7 */ + 9396 "11110001" // /* MW 6 */ + 9397 "00000001" // /* MW 5 */ + 9398 "00000000" // /* MW 4 */ + 9399 "01100000" // /* MW 3 */ + 9400 "10010001" // /* MW 2 */ + 9401 "00010011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 7 + 9402 "01000100" // MOVXM p7, #509136 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9403 "10100000" // /* MW 5 */ + 9404 "11001001" // /* MW 4 */ + 9405 "11001110" // /* MW 3 */ + 9406 "00000111" // /* MW 2 */ + 9407 "00000000" // /* MW 1 */ +.label __ll95__Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 1 "io_buffer_main.h" 324 51 first + 9408 "10011000" // LDA p1, [p6], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9409 "10011110" // /* MW 3 */ + 9410 "01011100" // /* MW 2 */ + 9411 "00000110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 552 2 first +.no_stack_arguments + 9412 "00000100" // JL #4848 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4848 delay_slots=5 */ + 9413 "00000001" // /* MW 5 */ + 9414 "00000000" // /* MW 4 */ + 9415 "01111000" // /* MW 3 */ + 9416 "00001001" // /* MW 2 */ + 9417 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9419 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9421 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9423 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9425 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9426 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 9427 "00011100" // /* MW 13 */ + 9428 "00000000" // /* MW 12 */ + 9429 "00000000" // /* MW 11 */ + 9430 "01010111" // /* MW 10 */ + 9431 "00011010" // /* MW 9 */ + 9432 "01000000" // /* MW 8 */ + 9433 "00000000" // /* MW 7 */ + 9434 "00000000" // /* MW 6 */ + 9435 "10110110" // /* MW 5 */ + 9436 "00000010" // /* MW 4 */ + 9437 "11110000" // /* MW 3 */ + 9438 "00101100" // /* MW 2 */ + 9439 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 7 first +.return_address + 9440 "10011000" // LDA r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9441 "00010110" // /* MW 3 */ + 9442 "00000110" // /* MW 2 */ + 9443 "00000111" // /* MW 1 */ + 9444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9445 "00000000" // /* MW 1 */ + 9446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9447 "00000000" // /* MW 1 */ + 9448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9449 "00000000" // /* MW 1 */ + 9450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9451 "00000000" // /* MW 1 */ + 9452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9453 "00000000" // /* MW 1 */ + 9454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9455 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 19 + 9456 "10011000" // NE r17, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9457 "00001000" // /* MW 3 */ + 9458 "11100011" // /* MW 2 */ + 9459 "00010011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 25 + 9460 "10000100" // JNZ r17, #9664 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9664 delay_slots=5 */ + 9461 "00000001" // /* MW 5 */ + 9462 "01000000" // /* MW 4 */ + 9463 "11100000" // /* MW 3 */ + 9464 "00010010" // /* MW 2 */ + 9465 "10001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9467 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9469 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9471 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9473 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9475 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 29 +.src_ref 8 "superkernels.cpp" 555 15 + 9476 "01000100" // MOVXM p7, #509124 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9477 "10001000" // /* MW 5 */ + 9478 "11001001" // /* MW 4 */ + 9479 "11001110" // /* MW 3 */ + 9480 "00000111" // /* MW 2 */ + 9481 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 29 +.src_ref 8 "superkernels.cpp" 554 67 + 9482 "10111010" // LDA r16, [p7]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9483 "00010000" // /* MW 9 */ + 9484 "00110000" // /* MW 8 */ + 9485 "00110010" // /* MW 7 */ + 9486 "11110001" // /* MW 6 */ + 9487 "00000001" // /* MW 5 */ + 9488 "00000000" // /* MW 4 */ + 9489 "11010000" // /* MW 3 */ + 9490 "11000010" // /* MW 2 */ + 9491 "11100000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 67 + 9492 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9493 "00111010" // /* MW 3 */ + 9494 "00000100" // /* MW 2 */ + 9495 "00000010" // /* MW 1 */ + 9496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9497 "00000000" // /* MW 1 */ + 9498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9499 "00000000" // /* MW 1 */ + 9500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9501 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 43 +.no_stack_arguments + 9502 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */ + 9503 "00000001" // /* MW 5 */ + 9504 "00000000" // /* MW 4 */ + 9505 "11111000" // /* MW 3 */ + 9506 "00010011" // /* MW 2 */ + 9507 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9509 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 29 +.delay_slot + 9510 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9511 "00000111" // /* MW 3 */ + 9512 "00100000" // /* MW 2 */ + 9513 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 29 +.src_ref 8 "superkernels.cpp" 554 43 +.delay_slot + 9514 "01011100" // ST r16, [p7]; LT r27, r16, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9515 "10110101" // /* MW 5 */ + 9516 "01101101" // /* MW 4 */ + 9517 "00111000" // /* MW 3 */ + 9518 "11000010" // /* MW 2 */ + 9519 "11100000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 43 +.src_ref 8 "superkernels.cpp" 554 43 +.delay_slot + 9520 "11100100" // SUB r17, r13, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9521 "01000001" // /* MW 5 */ + 9522 "10111011" // /* MW 4 */ + 9523 "00110111" // /* MW 3 */ + 9524 "01100000" // /* MW 2 */ + 9525 "01101100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 43 +.delay_slot + 9526 "01111010" // NOPA; NOPS; SEL.EQZ r0, r16, r17, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9527 "00010010" // /* MW 9 */ + 9528 "00000001" // /* MW 8 */ + 9529 "00000100" // /* MW 7 */ + 9530 "00000000" // /* MW 6 */ + 9531 "01011011" // /* MW 5 */ + 9532 "00000001" // /* MW 4 */ + 9533 "11110000" // /* MW 3 */ + 9534 "00101100" // /* MW 2 */ + 9535 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 43 +.src_ref 8 "superkernels.cpp" 554 43 +.src_ref 1 "io_buffer_main.h" 324 51 +.return_address + 9536 "10111010" // LDA p2, [sp, #-36]; SUB r16, r13, r3; MOV r27, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9537 "01111000" // /* MW 9 */ + 9538 "11010000" // /* MW 8 */ + 9539 "01101011" // /* MW 7 */ + 9540 "10001111" // /* MW 6 */ + 9541 "00000001" // /* MW 5 */ + 9542 "00011011" // /* MW 4 */ + 9543 "00100000" // /* MW 3 */ + 9544 "10100011" // /* MW 2 */ + 9545 "11111011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 43 + 9546 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9547 "00000010" // /* MW 3 */ + 9548 "11100001" // /* MW 2 */ + 9549 "00010000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 6 +.src_ref 8 "superkernels.cpp" 554 78 + 9550 "10000100" // JNZ r16, #9632 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9632 delay_slots=5 */ + 9551 "00000001" // /* MW 5 */ + 9552 "01000000" // /* MW 4 */ + 9553 "11010000" // /* MW 3 */ + 9554 "00010010" // /* MW 2 */ + 9555 "10000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 +.delay_slot + 9556 "00011000" // MOVX r15, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9557 "00000101" // /* MW 3 */ + 9558 "00011110" // /* MW 2 */ + 9559 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9561 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9563 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9565 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9567 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 555 15 first +.src_ref 1 "io_buffer_main.h" 324 51 first + 9568 "00001100" // LDA r16, [p2, #20]; ST r13, [p7] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9569 "01100011" // /* MW 5 */ + 9570 "00001011" // /* MW 4 */ + 9571 "11011110" // /* MW 3 */ + 9572 "11000010" // /* MW 2 */ + 9573 "01001010" // /* MW 1 */ + 9574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9575 "00000000" // /* MW 1 */ + 9576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9577 "00000000" // /* MW 1 */ + 9578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9579 "00000000" // /* MW 1 */ + 9580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9581 "00000000" // /* MW 1 */ + 9582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9583 "00000000" // /* MW 1 */ + 9584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9585 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 9586 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9587 "11111000" // /* MW 3 */ + 9588 "00010000" // /* MW 2 */ + 9589 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 7 +.src_ref 1 "io_buffer_main.h" 327 40 first + 9590 "10111010" // LDA r16, [p6, #-8]; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9591 "00010000" // /* MW 9 */ + 9592 "01101000" // /* MW 8 */ + 9593 "10110010" // /* MW 7 */ + 9594 "11110011" // /* MW 6 */ + 9595 "00000001" // /* MW 5 */ + 9596 "00000000" // /* MW 4 */ + 9597 "11010000" // /* MW 3 */ + 9598 "11000010" // /* MW 2 */ + 9599 "11011100" // /* MW 1 */ + 9600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9601 "00000000" // /* MW 1 */ + 9602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9603 "00000000" // /* MW 1 */ + 9604 "10000100" // J #9648 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9648 delay_slots=5 */ + 9605 "00000000" // /* MW 5 */ + 9606 "00000000" // /* MW 4 */ + 9607 "11011000" // /* MW 3 */ + 9608 "00010010" // /* MW 2 */ + 9609 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9611 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9613 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9615 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 +.delay_slot + 9616 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9617 "00000001" // /* MW 3 */ + 9618 "11100001" // /* MW 2 */ + 9619 "00010011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 +.delay_slot + 9620 "00110110" // NOPA; NOPB; ST r16, [p6, #-8]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9621 "11000001" // /* MW 11 */ + 9622 "00001000" // /* MW 10 */ + 9623 "01110011" // /* MW 9 */ + 9624 "00000011" // /* MW 8 */ + 9625 "00000000" // /* MW 7 */ + 9626 "00000000" // /* MW 6 */ + 9627 "00100000" // /* MW 5 */ + 9628 "00000000" // /* MW 4 */ + 9629 "11110000" // /* MW 3 */ + 9630 "00101100" // /* MW 2 */ + 9631 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2096 +.src_ref 8 "superkernels.cpp" 558 7 + 9632 "11100001" // NOPA; NOPB; NOPS; MOVXM p7, #509136; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9633 "00000000" // /* MW 15 */ + 9634 "00000000" // /* MW 14 */ + 9635 "00010000" // /* MW 13 */ + 9636 "01101000" // /* MW 12 */ + 9637 "10110010" // /* MW 11 */ + 9638 "11110011" // /* MW 10 */ + 9639 "00000001" // /* MW 9 */ + 9640 "00000000" // /* MW 8 */ + 9641 "01011011" // /* MW 7 */ + 9642 "00000001" // /* MW 6 */ + 9643 "00100000" // /* MW 5 */ + 9644 "00000000" // /* MW 4 */ + 9645 "11110000" // /* MW 3 */ + 9646 "00101100" // /* MW 2 */ + 9647 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2112 +.src_ref 8 "superkernels.cpp" 558 7 first + 9648 "11100001" // LDA r16, [p7]; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9649 "00000000" // /* MW 15 */ + 9650 "00000000" // /* MW 14 */ + 9651 "01111000" // /* MW 13 */ + 9652 "10100101" // /* MW 12 */ + 9653 "00000001" // /* MW 11 */ + 9654 "00000000" // /* MW 10 */ + 9655 "00000000" // /* MW 9 */ + 9656 "00000000" // /* MW 8 */ + 9657 "01011011" // /* MW 7 */ + 9658 "00000001" // /* MW 6 */ + 9659 "00100000" // /* MW 5 */ + 9660 "00000000" // /* MW 4 */ + 9661 "11010000" // /* MW 3 */ + 9662 "11000010" // /* MW 2 */ + 9663 "11100000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2128 +.src_ref 8 "superkernels.cpp" 558 43 + 9664 "00011000" // MOVX r17, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9665 "00000001" // /* MW 3 */ + 9666 "00100010" // /* MW 2 */ + 9667 "00010000" // /* MW 1 */ + 9668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9669 "00000000" // /* MW 1 */ + 9670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9671 "00000000" // /* MW 1 */ + 9672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9673 "00000000" // /* MW 1 */ + 9674 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9675 "00000000" // /* MW 1 */ + 9676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9677 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 19 + 9678 "10011000" // NE r16, r14, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9679 "00001000" // /* MW 3 */ + 9680 "10100001" // /* MW 2 */ + 9681 "00010011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 25 + 9682 "10000100" // JNZ r16, #9872 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9872 delay_slots=5 */ + 9683 "00000001" // /* MW 5 */ + 9684 "01000000" // /* MW 4 */ + 9685 "01001000" // /* MW 3 */ + 9686 "00010011" // /* MW 2 */ + 9687 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 29 +.src_ref 8 "superkernels.cpp" 559 15 +.delay_slot + 9688 "01000100" // MOVXM p7, #509152 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9689 "11000000" // /* MW 5 */ + 9690 "11001001" // /* MW 4 */ + 9691 "11001110" // /* MW 3 */ + 9692 "00000111" // /* MW 2 */ + 9693 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 67 +.delay_slot + 9694 "01000100" // MOVXM p2, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9695 "11000000" // /* MW 5 */ + 9696 "11001000" // /* MW 4 */ + 9697 "11000100" // /* MW 3 */ + 9698 "00000111" // /* MW 2 */ + 9699 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9701 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9703 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9704 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9705 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 29 + 9706 "10011000" // LDA r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9707 "00010110" // /* MW 3 */ + 9708 "00000110" // /* MW 2 */ + 9709 "00000111" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 67 + 9710 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9711 "00111010" // /* MW 3 */ + 9712 "00000100" // /* MW 2 */ + 9713 "00000010" // /* MW 1 */ + 9714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9715 "00000000" // /* MW 1 */ + 9716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9717 "00000000" // /* MW 1 */ + 9718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9719 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 43 +.no_stack_arguments + 9720 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */ + 9721 "00000001" // /* MW 5 */ + 9722 "00000000" // /* MW 4 */ + 9723 "11111000" // /* MW 3 */ + 9724 "00010011" // /* MW 2 */ + 9725 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9727 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 29 +.delay_slot + 9728 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9729 "00000111" // /* MW 3 */ + 9730 "00100000" // /* MW 2 */ + 9731 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 29 +.src_ref 8 "superkernels.cpp" 558 43 +.delay_slot + 9732 "01011100" // ST r16, [p7]; LT r27, r16, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9733 "00110101" // /* MW 5 */ + 9734 "01101110" // /* MW 4 */ + 9735 "00111000" // /* MW 3 */ + 9736 "11000010" // /* MW 2 */ + 9737 "11100000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 43 +.src_ref 8 "superkernels.cpp" 558 43 +.delay_slot + 9738 "11100100" // SUB r17, r17, r16; MOV r14, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9739 "01000001" // /* MW 5 */ + 9740 "00111011" // /* MW 4 */ + 9741 "00110111" // /* MW 3 */ + 9742 "01100000" // /* MW 2 */ + 9743 "10001100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 43 +.delay_slot + 9744 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r0, r16, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9745 "00000000" // /* MW 15 */ + 9746 "00000000" // /* MW 14 */ + 9747 "01111000" // /* MW 13 */ + 9748 "10100101" // /* MW 12 */ + 9749 "00000001" // /* MW 11 */ + 9750 "10010000" // /* MW 10 */ + 9751 "00001000" // /* MW 9 */ + 9752 "00100000" // /* MW 8 */ + 9753 "01011011" // /* MW 7 */ + 9754 "00000001" // /* MW 6 */ + 9755 "00100000" // /* MW 5 */ + 9756 "00000000" // /* MW 4 */ + 9757 "11110000" // /* MW 3 */ + 9758 "00101100" // /* MW 2 */ + 9759 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 43 +.src_ref 8 "superkernels.cpp" 558 43 +.src_ref 1 "io_buffer_main.h" 324 51 +.src_ref 1 "io_buffer_main.h" 324 51 +.return_address + 9760 "10111010" // LDA p1, [sp, #-36]; SUB r16, r13, r3; MOV r27, r14 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9761 "01111000" // /* MW 9 */ + 9762 "10010000" // /* MW 8 */ + 9763 "01101011" // /* MW 7 */ + 9764 "10001111" // /* MW 6 */ + 9765 "00000001" // /* MW 5 */ + 9766 "00011011" // /* MW 4 */ + 9767 "00100000" // /* MW 3 */ + 9768 "10010011" // /* MW 2 */ + 9769 "11111011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 43 + 9770 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9771 "00000010" // /* MW 3 */ + 9772 "11100001" // /* MW 2 */ + 9773 "00010000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 6 +.src_ref 8 "superkernels.cpp" 558 78 + 9774 "10000100" // JNZ r16, #9840 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9840 delay_slots=5 */ + 9775 "00000001" // /* MW 5 */ + 9776 "01000000" // /* MW 4 */ + 9777 "00111000" // /* MW 3 */ + 9778 "00010011" // /* MW 2 */ + 9779 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 7 +.delay_slot + 9780 "01000100" // MOVXM p2, #509136 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9781 "10100000" // /* MW 5 */ + 9782 "11001001" // /* MW 4 */ + 9783 "11000100" // /* MW 3 */ + 9784 "00000111" // /* MW 2 */ + 9785 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9787 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9789 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9791 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9793 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 559 15 first +.src_ref 1 "io_buffer_main.h" 324 51 first + 9794 "00001100" // LDA r16, [p1, #20]; ST r13, [p7] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9795 "01100011" // /* MW 5 */ + 9796 "00001011" // /* MW 4 */ + 9797 "11011110" // /* MW 3 */ + 9798 "11000010" // /* MW 2 */ + 9799 "00101010" // /* MW 1 */ + 9800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9801 "00000000" // /* MW 1 */ + 9802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9803 "00000000" // /* MW 1 */ + 9804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9805 "00000000" // /* MW 1 */ + 9806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9807 "00000000" // /* MW 1 */ + 9808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9809 "00000000" // /* MW 1 */ + 9810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9811 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 9812 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9813 "11111000" // /* MW 3 */ + 9814 "00010000" // /* MW 2 */ + 9815 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 40 first + 9816 "10011000" // LDA r16, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9817 "00010110" // /* MW 3 */ + 9818 "11100110" // /* MW 2 */ + 9819 "00000110" // /* MW 1 */ + 9820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9821 "00000000" // /* MW 1 */ + 9822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9823 "00000000" // /* MW 1 */ + 9824 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9825 "00000000" // /* MW 1 */ + 9826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9827 "00000000" // /* MW 1 */ + 9828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9829 "00000000" // /* MW 1 */ + 9830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9831 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 + 9832 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9833 "00000001" // /* MW 3 */ + 9834 "11100001" // /* MW 2 */ + 9835 "00010011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 9836 "10011000" // ST r16, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9837 "00010001" // /* MW 3 */ + 9838 "11100110" // /* MW 2 */ + 9839 "00001110" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2304 + 9840 "10000100" // J #9888 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9888 delay_slots=5 */ + 9841 "00000000" // /* MW 5 */ + 9842 "00000000" // /* MW 4 */ + 9843 "01010000" // /* MW 3 */ + 9844 "00010011" // /* MW 2 */ + 9845 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 +.delay_slot + 9846 "11111000" // MOV p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9847 "11000000" // /* MW 3 */ + 9848 "01100010" // /* MW 2 */ + 9849 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9851 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9853 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9854 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9855 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9856 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9857 "00000000" // /* MW 15 */ + 9858 "00000000" // /* MW 14 */ + 9859 "01111000" // /* MW 13 */ + 9860 "10100101" // /* MW 12 */ + 9861 "00000001" // /* MW 11 */ + 9862 "00000000" // /* MW 10 */ + 9863 "00000000" // /* MW 9 */ + 9864 "00000000" // /* MW 8 */ + 9865 "01011011" // /* MW 7 */ + 9866 "00000001" // /* MW 6 */ + 9867 "00100000" // /* MW 5 */ + 9868 "00000000" // /* MW 4 */ + 9869 "11110000" // /* MW 3 */ + 9870 "00101100" // /* MW 2 */ + 9871 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2336 +.src_ref 8 "superkernels.cpp" 562 7 +.src_ref 1 "io_buffer_main.h" 324 51 + 9872 "11100001" // LDA p7, [sp, #-36]; NOPB; NOPS; MOVXM p2, #509136; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9873 "00000000" // /* MW 15 */ + 9874 "00000000" // /* MW 14 */ + 9875 "00010000" // /* MW 13 */ + 9876 "01101000" // /* MW 12 */ + 9877 "00110010" // /* MW 11 */ + 9878 "11110001" // /* MW 10 */ + 9879 "00000001" // /* MW 9 */ + 9880 "00000000" // /* MW 8 */ + 9881 "01011011" // /* MW 7 */ + 9882 "00000001" // /* MW 6 */ + 9883 "00100000" // /* MW 5 */ + 9884 "00000000" // /* MW 4 */ + 9885 "00100000" // /* MW 3 */ + 9886 "11110011" // /* MW 2 */ + 9887 "11111011" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2352 +.src_ref 8 "superkernels.cpp" 562 7 first +.src_ref 8 "superkernels.cpp" 562 19 + 9888 "00101100" // LDA r16, [p2]; MOVX r17, #4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9889 "00100010" // /* MW 5 */ + 9890 "01000100" // /* MW 4 */ + 9891 "11010000" // /* MW 3 */ + 9892 "11000010" // /* MW 2 */ + 9893 "01000000" // /* MW 1 */ + 9894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9895 "00000000" // /* MW 1 */ + 9896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9897 "00000000" // /* MW 1 */ + 9898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9899 "00000000" // /* MW 1 */ + 9900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9901 "00000000" // /* MW 1 */ + 9902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9903 "00000000" // /* MW 1 */ + 9904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9905 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 19 + 9906 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9907 "00001000" // /* MW 3 */ + 9908 "01100001" // /* MW 2 */ + 9909 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 25 + 9910 "10000100" // JNZ r16, #10064 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10064 delay_slots=5 */ + 9911 "00000001" // /* MW 5 */ + 9912 "01000000" // /* MW 4 */ + 9913 "10101000" // /* MW 3 */ + 9914 "00010011" // /* MW 2 */ + 9915 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 29 +.delay_slot + 9916 "01000100" // MOVXM p2, #509156 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9917 "11001000" // /* MW 5 */ + 9918 "11001001" // /* MW 4 */ + 9919 "11000100" // /* MW 3 */ + 9920 "00000111" // /* MW 2 */ + 9921 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9923 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9925 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9927 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9929 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 29 +.src_ref 8 "superkernels.cpp" 562 68 + 9930 "10111010" // LDA r16, [p2]; MOVXM p1, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9931 "00010000" // /* MW 9 */ + 9932 "00110000" // /* MW 8 */ + 9933 "10110010" // /* MW 7 */ + 9934 "11110000" // /* MW 6 */ + 9935 "00000001" // /* MW 5 */ + 9936 "00000000" // /* MW 4 */ + 9937 "11010000" // /* MW 3 */ + 9938 "11000010" // /* MW 2 */ + 9939 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 68 + 9940 "10011000" // LDA.u16 r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9941 "00111010" // /* MW 3 */ + 9942 "00000100" // /* MW 2 */ + 9943 "00000001" // /* MW 1 */ + 9944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9945 "00000000" // /* MW 1 */ + 9946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9947 "00000000" // /* MW 1 */ + 9948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9949 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 44 +.no_stack_arguments + 9950 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */ + 9951 "00000001" // /* MW 5 */ + 9952 "00000000" // /* MW 4 */ + 9953 "11111000" // /* MW 3 */ + 9954 "00010011" // /* MW 2 */ + 9955 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9957 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 29 +.delay_slot + 9958 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9959 "00000111" // /* MW 3 */ + 9960 "00100000" // /* MW 2 */ + 9961 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 29 +.src_ref 8 "superkernels.cpp" 562 44 +.delay_slot + 9962 "01011100" // ST r16, [p2]; LT r27, r16, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9963 "10110101" // /* MW 5 */ + 9964 "01101101" // /* MW 4 */ + 9965 "00111000" // /* MW 3 */ + 9966 "11000010" // /* MW 2 */ + 9967 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 44 +.src_ref 8 "superkernels.cpp" 562 44 +.delay_slot + 9968 "11100100" // SUB r17, r13, r16; MOV r14, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9969 "01000001" // /* MW 5 */ + 9970 "00111011" // /* MW 4 */ + 9971 "00110111" // /* MW 3 */ + 9972 "01100000" // /* MW 2 */ + 9973 "01101100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 44 +.delay_slot + 9974 "01111010" // NOPA; NOPS; SEL.EQZ r0, r16, r17, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9975 "00010010" // /* MW 9 */ + 9976 "00000001" // /* MW 8 */ + 9977 "00000100" // /* MW 7 */ + 9978 "00000000" // /* MW 6 */ + 9979 "01011011" // /* MW 5 */ + 9980 "00000001" // /* MW 4 */ + 9981 "11110000" // /* MW 3 */ + 9982 "00101100" // /* MW 2 */ + 9983 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 44 +.src_ref 8 "superkernels.cpp" 562 44 +.return_address + 9984 "11100100" // SUB r16, r13, r3; MOV r27, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9985 "01000001" // /* MW 5 */ + 9986 "10101110" // /* MW 4 */ + 9987 "00111101" // /* MW 3 */ + 9988 "00000110" // /* MW 2 */ + 9989 "01101100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 44 + 9990 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9991 "00000010" // /* MW 3 */ + 9992 "11100001" // /* MW 2 */ + 9993 "00010000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 6 +.src_ref 8 "superkernels.cpp" 562 79 + 9994 "10000100" // JNZ r16, #10064 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10064 delay_slots=5 */ + 9995 "00000001" // /* MW 5 */ + 9996 "01000000" // /* MW 4 */ + 9997 "10101000" // /* MW 3 */ + 9998 "00010011" // /* MW 2 */ + 9999 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 563 16 +.delay_slot + 10000 "01000100" // MOVXM p2, #509156 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10001 "11001000" // /* MW 5 */ + 10002 "11001001" // /* MW 4 */ + 10003 "11000100" // /* MW 3 */ + 10004 "00000111" // /* MW 2 */ + 10005 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10007 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10009 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10011 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10013 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 563 16 first +.src_ref 1 "io_buffer_main.h" 324 51 first + 10014 "00001100" // LDA r16, [p7, #20]; ST r13, [p2] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10015 "01100011" // /* MW 5 */ + 10016 "00001011" // /* MW 4 */ + 10017 "11010100" // /* MW 3 */ + 10018 "11000010" // /* MW 2 */ + 10019 "11101010" // /* MW 1 */ + 10020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10021 "00000000" // /* MW 1 */ + 10022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10023 "00000000" // /* MW 1 */ + 10024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10025 "00000000" // /* MW 1 */ + 10026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10027 "00000000" // /* MW 1 */ + 10028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10029 "00000000" // /* MW 1 */ + 10030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10031 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 10032 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10033 "11111000" // /* MW 3 */ + 10034 "00010000" // /* MW 2 */ + 10035 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 40 first + 10036 "10011000" // LDA r16, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10037 "00010110" // /* MW 3 */ + 10038 "11100110" // /* MW 2 */ + 10039 "00000110" // /* MW 1 */ + 10040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10041 "00000000" // /* MW 1 */ + 10042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10043 "00000000" // /* MW 1 */ + 10044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10045 "00000000" // /* MW 1 */ + 10046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10047 "00000000" // /* MW 1 */ + 10048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10049 "00000000" // /* MW 1 */ + 10050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10051 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 + 10052 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10053 "00000001" // /* MW 3 */ + 10054 "11100001" // /* MW 2 */ + 10055 "00010011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 10056 "00000010" // ST r16, [p6, #-8]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10057 "01110000" // /* MW 7 */ + 10058 "10100101" // /* MW 6 */ + 10059 "00000001" // /* MW 5 */ + 10060 "00000000" // /* MW 4 */ + 10061 "00110000" // /* MW 3 */ + 10062 "11000010" // /* MW 2 */ + 10063 "11011100" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2528 +.src_ref 8 "superkernels.cpp" 566 6 +.src_ref 8 "superkernels.cpp" 567 14 + 10064 "01000100" // MOVXM p6, #509120 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10065 "10000000" // /* MW 5 */ + 10066 "11001001" // /* MW 4 */ + 10067 "11001100" // /* MW 3 */ + 10068 "00000111" // /* MW 2 */ + 10069 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 566 6 first +.src_ref 8 "superkernels.cpp" 566 19 + 10070 "10111010" // LDA r16, [p6]; MOVXM p2, #509160 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10071 "00010000" // /* MW 9 */ + 10072 "01110100" // /* MW 8 */ + 10073 "00110010" // /* MW 7 */ + 10074 "11110001" // /* MW 6 */ + 10075 "00000001" // /* MW 5 */ + 10076 "00000000" // /* MW 4 */ + 10077 "11010000" // /* MW 3 */ + 10078 "11000010" // /* MW 2 */ + 10079 "11000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 566 19 + 10080 "10011000" // LDA r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10081 "00110110" // /* MW 3 */ + 10082 "00000110" // /* MW 2 */ + 10083 "00000010" // /* MW 1 */ + 10084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10085 "00000000" // /* MW 1 */ + 10086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10087 "00000000" // /* MW 1 */ + 10088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10089 "00000000" // /* MW 1 */ + 10090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10091 "00000000" // /* MW 1 */ + 10092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10093 "00000000" // /* MW 1 */ + 10094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10095 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 566 16 + 10096 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10097 "00001000" // /* MW 3 */ + 10098 "01100001" // /* MW 2 */ + 10099 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 566 6 + 10100 "10000100" // JNZ r16, #10128 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10128 delay_slots=5 */ + 10101 "00000001" // /* MW 5 */ + 10102 "01000000" // /* MW 4 */ + 10103 "11001000" // /* MW 3 */ + 10104 "00010011" // /* MW 2 */ + 10105 "10000000" // /* MW 1 */ +.delay_slot + 10106 "00011000" // LDA p7, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10107 "10011001" // /* MW 3 */ + 10108 "11101111" // /* MW 2 */ + 10109 "00000111" // /* MW 1 */ +.delay_slot + 10110 "00011000" // LDA r15, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10111 "11110001" // /* MW 3 */ + 10112 "11110001" // /* MW 2 */ + 10113 "00000111" // /* MW 1 */ +.delay_slot + 10114 "00011000" // LDA r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10115 "11010001" // /* MW 3 */ + 10116 "11110101" // /* MW 2 */ + 10117 "00000111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10119 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10121 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 567 14 first + 10122 "00001100" // NOPA; ST r13, [p6] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10123 "01100011" // /* MW 5 */ + 10124 "00001011" // /* MW 4 */ + 10125 "11111100" // /* MW 3 */ + 10126 "00101100" // /* MW 2 */ + 10127 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2592 +.src_ref 8 "superkernels.cpp" 569 +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 10128 "11010100" // LDA r11, [sp, #-8]; MOV lr, r11 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10129 "01000001" // /* MW 5 */ + 10130 "11101011" // /* MW 4 */ + 10131 "00101110" // /* MW 3 */ + 10132 "00101110" // /* MW 2 */ + 10133 "11111111" // /* MW 1 */ +.aggressive_scheduled_block_id 8 +.noswbrkpt + 10134 "00011000" // LDA r12, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10135 "10010001" // /* MW 3 */ + 10136 "11111101" // /* MW 2 */ + 10137 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 10138 "00011000" // LDA r13, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10139 "10110001" // /* MW 3 */ + 10140 "11101001" // /* MW 2 */ + 10141 "00000111" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 569 first +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 10142 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10143 "00000000" // /* MW 3 */ + 10144 "00101000" // /* MW 2 */ + 10145 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10146 "11111000" // MOV p6, r12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10147 "00100000" // /* MW 3 */ + 10148 "01100110" // /* MW 2 */ + 10149 "00011110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 569 +.delay_slot + 10150 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10151 "00000001" // /* MW 5 */ + 10152 "00000000" // /* MW 4 */ + 10153 "00000000" // /* MW 3 */ + 10154 "11110000" // /* MW 2 */ + 10155 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10157 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10159 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10160 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10161 "00000000" // /* MW 15 */ + 10162 "00000000" // /* MW 14 */ + 10163 "01111000" // /* MW 13 */ + 10164 "10100101" // /* MW 12 */ + 10165 "00000001" // /* MW 11 */ + 10166 "00000000" // /* MW 10 */ + 10167 "00000000" // /* MW 9 */ + 10168 "00000000" // /* MW 8 */ + 10169 "01011011" // /* MW 7 */ + 10170 "00000001" // /* MW 6 */ + 10171 "00100000" // /* MW 5 */ + 10172 "00000000" // /* MW 4 */ + 10173 "11110000" // /* MW 3 */ + 10174 "00101100" // /* MW 2 */ + 10175 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2640 +.src_ref 8 "superkernels.cpp" 554 43 +.src_ref 8 "superkernels.cpp" 555 15 +.src_ref 8 "superkernels.cpp" 558 43 +.src_ref 8 "superkernels.cpp" 559 15 +.src_ref 8 "superkernels.cpp" 562 44 +.src_ref 8 "superkernels.cpp" 563 16 +.src_ref 8 "superkernels.cpp" 567 14 +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 1 "io_buffer_main.h" 324 51 + 10176 "01110110" // MOVA r13, #0; MOVS p6, r12; J #9408 /* MW 12 */ /* control_operation: words=12 jump unconditional cycles_taken=1 direct absolute target_address=9408 delay_slots=5 */ + 10177 "00100000" // /* MW 11 */ + 10178 "00000000" // /* MW 10 */ + 10179 "00000000" // /* MW 9 */ + 10180 "10011000" // /* MW 8 */ + 10181 "00000100" // /* MW 7 */ + 10182 "00000000" // /* MW 6 */ + 10183 "00001011" // /* MW 5 */ + 10184 "10001100" // /* MW 4 */ + 10185 "00000110" // /* MW 3 */ + 10186 "00001101" // /* MW 2 */ + 10187 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 19 +.src_ref 8 "superkernels.cpp" 558 19 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 +.delay_slot + 10188 "01100100" // MOVX r15, #1; MOV r14, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10189 "00001001" // /* MW 5 */ + 10190 "00100000" // /* MW 4 */ + 10191 "10100111" // /* MW 3 */ + 10192 "11000000" // /* MW 2 */ + 10193 "00000011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 552 2 +.delay_slot + 10194 "01000100" // MOVXM p2, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10195 "10000000" // /* MW 5 */ + 10196 "11001000" // /* MW 4 */ + 10197 "11000100" // /* MW 3 */ + 10198 "00000111" // /* MW 2 */ + 10199 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 7 +.delay_slot + 10200 "01000100" // MOVXM p7, #509136 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10201 "10100000" // /* MW 5 */ + 10202 "11001001" // /* MW 4 */ + 10203 "11001110" // /* MW 3 */ + 10204 "00000111" // /* MW 2 */ + 10205 "00000000" // /* MW 1 */ +.delay_slot + 10206 "00011000" // LDA r12, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10207 "10010001" // /* MW 3 */ + 10208 "11100101" // /* MW 2 */ + 10209 "00000111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 10211 "00000000" // /* MW 1 */ +.label _ZN12me_primitive10udiv_dstepEjjRjS0_ +.function udiv_dstep _ZN12me_primitive10udiv_dstepEjjRjS0_ +.src_ref 9 "me_div.c" 108 19 +.src_ref 9 "me_div.c" 108 19 +.src_ref 9 "me_div.c" 115 4 first +.function_start + 10224 "11100100" // MOVX r3, #0; MOV r31, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10225 "01000001" // /* MW 5 */ + 10226 "10100000" // /* MW 4 */ + 10227 "00101111" // /* MW 3 */ + 10228 "11000000" // /* MW 2 */ + 10229 "00000000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10230 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10231 "00011100" // /* MW 3 */ + 10232 "11000110" // /* MW 2 */ + 10233 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10234 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10235 "00011100" // /* MW 3 */ + 10236 "11000110" // /* MW 2 */ + 10237 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10238 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10239 "00011100" // /* MW 3 */ + 10240 "11000110" // /* MW 2 */ + 10241 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10242 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10243 "00011100" // /* MW 3 */ + 10244 "11000110" // /* MW 2 */ + 10245 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10246 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10247 "00011100" // /* MW 3 */ + 10248 "11000110" // /* MW 2 */ + 10249 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10250 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10251 "00011100" // /* MW 3 */ + 10252 "11000110" // /* MW 2 */ + 10253 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10254 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10255 "00011100" // /* MW 3 */ + 10256 "11000110" // /* MW 2 */ + 10257 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10258 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10259 "00011100" // /* MW 3 */ + 10260 "11000110" // /* MW 2 */ + 10261 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10262 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10263 "00011100" // /* MW 3 */ + 10264 "11000110" // /* MW 2 */ + 10265 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10266 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10267 "00011100" // /* MW 3 */ + 10268 "11000110" // /* MW 2 */ + 10269 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10270 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10271 "00011100" // /* MW 3 */ + 10272 "11000110" // /* MW 2 */ + 10273 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10274 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10275 "00011100" // /* MW 3 */ + 10276 "11000110" // /* MW 2 */ + 10277 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10278 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10279 "00011100" // /* MW 3 */ + 10280 "11000110" // /* MW 2 */ + 10281 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10282 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10283 "00011100" // /* MW 3 */ + 10284 "11000110" // /* MW 2 */ + 10285 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10286 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10287 "00011100" // /* MW 3 */ + 10288 "11000110" // /* MW 2 */ + 10289 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10290 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10291 "00011100" // /* MW 3 */ + 10292 "11000110" // /* MW 2 */ + 10293 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10294 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10295 "00011100" // /* MW 3 */ + 10296 "11000110" // /* MW 2 */ + 10297 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10298 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10299 "00011100" // /* MW 3 */ + 10300 "11000110" // /* MW 2 */ + 10301 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10302 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10303 "00011100" // /* MW 3 */ + 10304 "11000110" // /* MW 2 */ + 10305 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10306 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10307 "00011100" // /* MW 3 */ + 10308 "11000110" // /* MW 2 */ + 10309 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10310 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10311 "00011100" // /* MW 3 */ + 10312 "11000110" // /* MW 2 */ + 10313 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10314 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10315 "00011100" // /* MW 3 */ + 10316 "11000110" // /* MW 2 */ + 10317 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10318 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10319 "00011100" // /* MW 3 */ + 10320 "11000110" // /* MW 2 */ + 10321 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10322 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10323 "00011100" // /* MW 3 */ + 10324 "11000110" // /* MW 2 */ + 10325 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10326 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10327 "00011100" // /* MW 3 */ + 10328 "11000110" // /* MW 2 */ + 10329 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10330 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10331 "00011100" // /* MW 3 */ + 10332 "11000110" // /* MW 2 */ + 10333 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10334 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10335 "00011100" // /* MW 3 */ + 10336 "11000110" // /* MW 2 */ + 10337 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10338 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10339 "00011100" // /* MW 3 */ + 10340 "11000110" // /* MW 2 */ + 10341 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 119 first + 10342 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10343 "00000000" // /* MW 3 */ + 10344 "00101000" // /* MW 2 */ + 10345 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 first +.delay_slot + 10346 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10347 "00011100" // /* MW 3 */ + 10348 "11000110" // /* MW 2 */ + 10349 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 +.delay_slot + 10350 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10351 "00011100" // /* MW 3 */ + 10352 "11000110" // /* MW 2 */ + 10353 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 +.delay_slot + 10354 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10355 "00011100" // /* MW 3 */ + 10356 "11000110" // /* MW 2 */ + 10357 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 +.delay_slot + 10358 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10359 "00011100" // /* MW 3 */ + 10360 "11000110" // /* MW 2 */ + 10361 "00010000" // /* MW 1 */ +.delay_slot + 10362 "11111000" // MOV r2, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10363 "10100000" // /* MW 3 */ + 10364 "10011111" // /* MW 2 */ +.label _ZN12me_primitive10udiv_dstepEjjRjS0___end + 10365 "00011000" // /* MW 1 */ +.label _ZL19propagateFloat32NaNjj +.function propagateFloat32NaN _ZL19propagateFloat32NaNjj +.src_ref 10 "softfloat-specialize" 78 24 +.src_ref 10 "softfloat-specialize" 137 22 +.src_ref 10 "softfloat-specialize" 139 22 +.src_ref 10 "softfloat-specialize" 143 4 first +.function_start + 10368 "10111010" // MOVA r3, #-22; MOVXM r18, #-16777216 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10369 "00010000" // /* MW 9 */ + 10370 "00000000" // /* MW 8 */ + 10371 "01001000" // /* MW 7 */ + 10372 "00000010" // /* MW 6 */ + 10373 "11000000" // /* MW 5 */ + 10374 "00111111" // /* MW 4 */ + 10375 "00000000" // /* MW 3 */ + 10376 "01000011" // /* MW 2 */ + 10377 "11111101" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 137 22 +.src_ref 10 "softfloat-specialize" 139 22 +.src_ref 10 "softfloat-specialize" 140 6 +.src_ref 10 "softfloat-specialize" 141 6 + 10378 "10111010" // MOVA r7, #511; MOVXM r0, #4194304 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10379 "00010000" // /* MW 9 */ + 10380 "00000000" // /* MW 8 */ + 10381 "00001000" // /* MW 7 */ + 10382 "00000000" // /* MW 6 */ + 10383 "00010000" // /* MW 5 */ + 10384 "00000000" // /* MW 4 */ + 10385 "00000000" // /* MW 3 */ + 10386 "11100111" // /* MW 2 */ + 10387 "00111111" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 78 38 +.src_ref 10 "softfloat-specialize" 137 22 +.src_ref 10 "softfloat-specialize" 139 22 +.src_ref 10 "softfloat-specialize" 140 6 first + 10388 "10111010" // MOVA r16, #1; OR r4, r1, r0; MOV r5, #510 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10389 "01011000" // /* MW 9 */ + 10390 "11111110" // /* MW 8 */ + 10391 "10101001" // /* MW 7 */ + 10392 "00101100" // /* MW 6 */ + 10393 "01000000" // /* MW 5 */ + 10394 "00000010" // /* MW 4 */ + 10395 "00000000" // /* MW 3 */ + 10396 "00110000" // /* MW 2 */ + 10397 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 141 6 first + 10398 "10011000" // OR r0, r2, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10399 "00000101" // /* MW 3 */ + 10400 "10000000" // /* MW 2 */ + 10401 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 137 22 first + 10402 "10011000" // LSHL r6, r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10403 "00111101" // /* MW 3 */ + 10404 "01001100" // /* MW 2 */ + 10405 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 139 22 first + 10406 "10011000" // LSHL r3, r2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10407 "00111101" // /* MW 3 */ + 10408 "10000110" // /* MW 2 */ + 10409 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 139 22 + 10410 "10011000" // AND r3, r7, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10411 "00110100" // /* MW 3 */ + 10412 "11000110" // /* MW 2 */ + 10413 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 137 22 first + 10414 "10011000" // AND r6, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10415 "01100100" // /* MW 3 */ + 10416 "11001100" // /* MW 2 */ + 10417 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 137 22 + 10418 "10011000" // EQ r6, r5, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10419 "01100111" // /* MW 3 */ + 10420 "01001100" // /* MW 2 */ + 10421 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 78 38 first + 10422 "10011000" // LSHL r17, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10423 "00001101" // /* MW 3 */ + 10424 "10100011" // /* MW 2 */ + 10425 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 78 24 + 10426 "10011000" // LTU r27, r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10427 "00011100" // /* MW 3 */ + 10428 "10110111" // /* MW 2 */ + 10429 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 143 62 first + 10430 "00011000" // SEL.EQZ r17, r4, r0, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10431 "00000010" // /* MW 3 */ + 10432 "00100010" // /* MW 2 */ + 10433 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 137 22 +.src_ref 10 "softfloat-specialize" 139 22 + 10434 "01000100" // MOVXM r16, #4194303 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10435 "11111110" // /* MW 5 */ + 10436 "00111111" // /* MW 4 */ + 10437 "11111000" // /* MW 3 */ + 10438 "00111111" // /* MW 2 */ + 10439 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 139 22 first + 10440 "10011000" // AND r2, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10441 "00000100" // /* MW 3 */ + 10442 "10000101" // /* MW 2 */ + 10443 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 139 22 + 10444 "00011000" // NEZ r2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10445 "11110000" // /* MW 3 */ + 10446 "10000100" // /* MW 2 */ + 10447 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 137 22 first + 10448 "10011000" // AND r1, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10449 "00000100" // /* MW 3 */ + 10450 "01000011" // /* MW 2 */ + 10451 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 137 22 + 10452 "00011000" // NEZ r1, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10453 "11110000" // /* MW 3 */ + 10454 "01000010" // /* MW 2 */ + 10455 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 143 4 first + 10456 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10457 "00000000" // /* MW 3 */ + 10458 "00101000" // /* MW 2 */ + 10459 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 137 22 first +.delay_slot + 10460 "10011000" // AND r27, r1, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10461 "01100100" // /* MW 3 */ + 10462 "01110110" // /* MW 2 */ + 10463 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 139 22 first +.delay_slot + 10464 "10011000" // EQ r1, r3, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10465 "01010111" // /* MW 3 */ + 10466 "11000010" // /* MW 2 */ + 10467 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 143 49 first +.delay_slot + 10468 "00011000" // SEL.EQZ r3, r17, r4, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10469 "01000010" // /* MW 3 */ + 10470 "01000110" // /* MW 2 */ + 10471 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 139 22 first +.delay_slot + 10472 "10011000" // AND r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10473 "00100100" // /* MW 3 */ + 10474 "01110110" // /* MW 2 */ + 10475 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 143 27 first +.delay_slot + 10476 "00011000" // SEL.EQZ r0, r3, r0, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10477 "00000010" // /* MW 3 */ + 10478 "11000000" // /* MW 2 */ +.label _ZL19propagateFloat32NaNjj__end + 10479 "00010000" // /* MW 1 */ +.label _ZL19roundAndPackFloat32iij +.function roundAndPackFloat32 _ZL19roundAndPackFloat32iij +.src_ref 10 "softfloat.c" 154 first +.src_ref 10 "softfloat.c" 161 19 +.src_ref 10 "softfloat.c" 203 30 +.function_start + 10480 "10111010" // MOVA r0, #64; MOVXM p0, #509172 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10481 "00010000" // /* MW 9 */ + 10482 "01111010" // /* MW 8 */ + 10483 "00110010" // /* MW 7 */ + 10484 "11110000" // /* MW 6 */ + 10485 "00000001" // /* MW 5 */ + 10486 "00000000" // /* MW 4 */ + 10487 "00000000" // /* MW 3 */ + 10488 "00000000" // /* MW 2 */ + 10489 "00001000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 161 19 first +.src_ref 10 "softfloat.c" 171 16 +.src_ref 10 "softfloat.c" 174 16 +.src_ref 10 "softfloat.c" 178 21 +.src_ref 10 "softfloat.c" 194 29 + 10490 "00101100" // LDA r4, [p0]; MOVX r6, #127 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10491 "11111010" // /* MW 5 */ + 10492 "10011001" // /* MW 4 */ + 10493 "11010000" // /* MW 3 */ + 10494 "10010010" // /* MW 2 */ + 10495 "00000000" // /* MW 1 */ +.swstall __RAW__R_1948 + 10496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10497 "00000000" // /* MW 1 */ +.swstall __RAW__R_1948 + 10498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10499 "00000000" // /* MW 1 */ +.swstall __RAW__R_1948 + 10500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10501 "00000000" // /* MW 1 */ +.swstall __RAW__R_1948 + 10502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10503 "00000000" // /* MW 1 */ +.swstall __RAW__R_1948 + 10504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10505 "00000000" // /* MW 1 */ +.swstall __RAW__R_1948 + 10506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10507 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 162 36 first +.src_ref 10 "softfloat.c" 164 4 first + 10508 "10000100" // JZ r4, #10576 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10576 delay_slots=5 */ + 10509 "00000001" // /* MW 5 */ + 10510 "00000000" // /* MW 4 */ + 10511 "10101000" // /* MW 3 */ + 10512 "00010100" // /* MW 2 */ + 10513 "00100000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 182 40 +.src_ref 10 "softfloat.c" 185 68 +.src_ref 10 "softfloat.c" 202 18 +.delay_slot + 10514 "00011000" // MOVX r5, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10515 "00000001" // /* MW 3 */ + 10516 "01001010" // /* MW 2 */ + 10517 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10519 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10521 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10523 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10525 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 165 8 +.src_ref 10 "softfloat.c" 171 16 +.src_ref 10 "softfloat.c" 171 34 +.src_ref 10 "softfloat.c" 174 16 +.src_ref 10 "softfloat.c" 174 34 + 10526 "10111010" // MOVA r16, #3; MOVX r7, #2; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10527 "01011000" // /* MW 9 */ + 10528 "00000000" // /* MW 8 */ + 10529 "00001000" // /* MW 7 */ + 10530 "01001011" // /* MW 6 */ + 10531 "01110000" // /* MW 5 */ + 10532 "00000000" // /* MW 4 */ + 10533 "00000000" // /* MW 3 */ + 10534 "01110000" // /* MW 2 */ + 10535 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 165 26 +.src_ref 10 "softfloat.c" 171 34 first + 10536 "01100100" // EQ r27, r7, r4; MOV r5, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10537 "00000101" // /* MW 5 */ + 10538 "10100000" // /* MW 4 */ + 10539 "11110010" // /* MW 3 */ + 10540 "11001000" // /* MW 2 */ + 10541 "00111110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 171 16 + 10542 "00011000" // SEL.EQZ r7, r6, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10543 "10000010" // /* MW 3 */ + 10544 "10001111" // /* MW 2 */ + 10545 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 174 34 first + 10546 "10011000" // EQ r27, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10547 "00000111" // /* MW 3 */ + 10548 "00110111" // /* MW 2 */ + 10549 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 170 12 +.src_ref 10 "softfloat.c" 174 16 + 10550 "11100100" // SEL.EQZ r16, r6, r24, r27; MOV r27, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10551 "01000001" // /* MW 5 */ + 10552 "10100001" // /* MW 4 */ + 10553 "01001101" // /* MW 3 */ + 10554 "00110000" // /* MW 2 */ + 10555 "00110100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 170 12 first +.src_ref 10 "softfloat.c" 170 12 first + 10556 "00011000" // SEL.EQZ r7, r16, r7, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10557 "01110010" // /* MW 3 */ + 10558 "00001110" // /* MW 2 */ + 10559 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 165 26 first + 10560 "10011000" // EQ r27, r5, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10561 "01000111" // /* MW 3 */ + 10562 "01110110" // /* MW 2 */ + 10563 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 165 8 + 10564 "00110110" // NOPA; NOPB; NOPS; SEL.EQZ r5, r7, r24, r27 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10565 "10000001" // /* MW 11 */ + 10566 "10101101" // /* MW 10 */ + 10567 "00000000" // /* MW 9 */ + 10568 "00010000" // /* MW 8 */ + 10569 "01011100" // /* MW 7 */ + 10570 "00001110" // /* MW 6 */ + 10571 "00100000" // /* MW 5 */ + 10572 "00000000" // /* MW 4 */ + 10573 "11110000" // /* MW 3 */ + 10574 "00101100" // /* MW 2 */ + 10575 "00000000" // /* MW 1 */ +.label TGT_F_ZL19roundAndPackFloat32iij_96 +.src_ref 10 "softfloat.c" 179 14 +.src_ref 10 "softfloat.c" 179 17 first +.src_ref 10 "softfloat.c" 180 23 +.src_ref 10 "softfloat.c" 181 28 + 10576 "01100100" // EXTEND.u16 r18, r2; MOV r16, #253 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10577 "11110101" // /* MW 5 */ + 10578 "00100011" // /* MW 4 */ + 10579 "00001000" // /* MW 3 */ + 10580 "10010110" // /* MW 2 */ + 10581 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 179 14 + 10582 "10011000" // LT r18, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10583 "00001010" // /* MW 3 */ + 10584 "10100101" // /* MW 2 */ + 10585 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 179 4 + 10586 "10000100" // JNZ r18, #10768 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10768 delay_slots=5 */ + 10587 "00000001" // /* MW 5 */ + 10588 "01000000" // /* MW 4 */ + 10589 "00001000" // /* MW 3 */ + 10590 "00010101" // /* MW 2 */ + 10591 "10010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 178 21 first +.delay_slot + 10592 "10011000" // AND r17, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10593 "01100100" // /* MW 3 */ + 10594 "11100010" // /* MW 2 */ + 10595 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 48 +.src_ref 10 "softfloat.c" 128 31 +.delay_slot + 10596 "00011000" // MOVX r7, #31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10597 "01111101" // /* MW 3 */ + 10598 "00001110" // /* MW 2 */ + 10599 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 31 first +.delay_slot + 10600 "10011000" // LSHL r1, r1, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10601 "01111101" // /* MW 3 */ + 10602 "01000010" // /* MW 2 */ + 10603 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10605 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10607 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 181 28 first +.src_ref 10 "softfloat.c" 182 40 first +.src_ref 10 "softfloat.c" 182 59 + 10608 "10111010" // MOVA r18, #0; EQ r19, r2, r16; ADD.NC r20, r3, r5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10609 "10101000" // /* MW 9 */ + 10610 "11001010" // /* MW 8 */ + 10611 "10001000" // /* MW 7 */ + 10612 "00111110" // /* MW 6 */ + 10613 "00111000" // /* MW 5 */ + 10614 "00000101" // /* MW 4 */ + 10615 "00000000" // /* MW 3 */ + 10616 "00010010" // /* MW 2 */ + 10617 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 182 59 + 10618 "10011000" // LT r20, r20, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10619 "00101010" // /* MW 3 */ + 10620 "00101001" // /* MW 2 */ + 10621 "00010101" // /* MW 1 */ +.src_ref 10 "softfloat.c" 180 23 first + 10622 "10011000" // LT r16, r16, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10623 "00101010" // /* MW 3 */ + 10624 "00100000" // /* MW 2 */ + 10625 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 182 18 first + 10626 "10011000" // AND r19, r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10627 "01000100" // /* MW 3 */ + 10628 "11100111" // /* MW 2 */ + 10629 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 181 13 first + 10630 "10011000" // OR r19, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10631 "00000101" // /* MW 3 */ + 10632 "11100111" // /* MW 2 */ + 10633 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 180 8 first + 10634 "10000100" // JNZ r19, #10848 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10848 delay_slots=5 */ + 10635 "00000001" // /* MW 5 */ + 10636 "01000000" // /* MW 4 */ + 10637 "00110000" // /* MW 3 */ + 10638 "00010101" // /* MW 2 */ + 10639 "10011000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 38 +.src_ref 10 "softfloat.c" 187 18 +.src_ref 10 "softfloat.c" 192 39 +.src_ref 10 "softfloat.c" 204 4 +.src_ref 10 "softfloat.c" 204 14 +.delay_slot + 10640 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10641 "00000001" // /* MW 3 */ + 10642 "00100000" // /* MW 2 */ + 10643 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10645 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10647 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10649 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10651 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 187 18 first + 10652 "10011000" // GE r19, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10653 "00001001" // /* MW 3 */ + 10654 "10100111" // /* MW 2 */ + 10655 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 187 8 + 10656 "10000100" // JNZ r19, #10784 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10784 delay_slots=5 */ + 10657 "00000001" // /* MW 5 */ + 10658 "01000000" // /* MW 4 */ + 10659 "00010000" // /* MW 3 */ + 10660 "00010101" // /* MW 2 */ + 10661 "10011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10663 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10665 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10667 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10669 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10671 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 192 39 first + 10672 "10011000" // SUB r2, r16, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10673 "00100001" // /* MW 3 */ + 10674 "00000100" // /* MW 2 */ + 10675 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 46 4 first +.src_ref 10 "softfloat-macros" 46 15 first + 10676 "10000100" // JZ r2, #10736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10736 delay_slots=5 */ + 10677 "00000001" // /* MW 5 */ + 10678 "00000000" // /* MW 4 */ + 10679 "11111000" // /* MW 3 */ + 10680 "00010100" // /* MW 2 */ + 10681 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10683 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10685 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10687 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10689 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10691 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 +.src_ref 10 "softfloat-macros" 50 38 first + 10692 "01100100" // SUB r17, r16, r2; MOV r19, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10693 "10000001" // /* MW 5 */ + 10694 "10100000" // /* MW 4 */ + 10695 "00111001" // /* MW 3 */ + 10696 "01000100" // /* MW 2 */ + 10697 "10000100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 48 + 10698 "10011000" // AND r7, r7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10699 "00010100" // /* MW 3 */ + 10700 "11001111" // /* MW 2 */ + 10701 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 32 + 10702 "10011000" // LSHL r7, r3, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10703 "01111101" // /* MW 3 */ + 10704 "11001110" // /* MW 2 */ + 10705 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 15 + 10706 "10011000" // LSHL r17, r3, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10707 "00011101" // /* MW 3 */ + 10708 "11100011" // /* MW 2 */ + 10709 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 first + 10710 "10011000" // LT r27, r2, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10711 "00111010" // /* MW 3 */ + 10712 "10110111" // /* MW 2 */ + 10713 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 57 first + 10714 "00011000" // NEZ r7, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10715 "11110000" // /* MW 3 */ + 10716 "11001110" // /* MW 2 */ + 10717 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 53 16 first + 10718 "00011000" // NEZ r3, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10719 "11110000" // /* MW 3 */ + 10720 "11000110" // /* MW 2 */ + 10721 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 25 first + 10722 "10011000" // OR r2, r7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10723 "00010101" // /* MW 3 */ + 10724 "11000101" // /* MW 2 */ + 10725 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 9 first + 10726 "01111010" // NOPA; NOPS; SEL.EQZ r3, r3, r2, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10727 "00100010" // /* MW 9 */ + 10728 "11000110" // /* MW 8 */ + 10729 "00000000" // /* MW 7 */ + 10730 "00000000" // /* MW 6 */ + 10731 "01011011" // /* MW 5 */ + 10732 "00000001" // /* MW 4 */ + 10733 "11110000" // /* MW 3 */ + 10734 "00101100" // /* MW 2 */ + 10735 "00000000" // /* MW 1 */ +.label TGT_F_ZL19roundAndPackFloat32iij_256 + 10736 "10000100" // J #10784 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10784 delay_slots=5 */ + 10737 "00000000" // /* MW 5 */ + 10738 "00000000" // /* MW 4 */ + 10739 "00010000" // /* MW 3 */ + 10740 "00010101" // /* MW 2 */ + 10741 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 194 29 first +.delay_slot + 10742 "10011000" // AND r17, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10743 "01100100" // /* MW 3 */ + 10744 "11100010" // /* MW 2 */ + 10745 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 204 4 +.src_ref 10 "softfloat.c" 204 14 +.delay_slot + 10746 "00011000" // MOVX r2, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10747 "00000001" // /* MW 3 */ + 10748 "00000100" // /* MW 2 */ + 10749 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10751 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10753 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10754 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 10755 "00011100" // /* MW 13 */ + 10756 "00000000" // /* MW 12 */ + 10757 "00000000" // /* MW 11 */ + 10758 "01010111" // /* MW 10 */ + 10759 "00011010" // /* MW 9 */ + 10760 "01000000" // /* MW 8 */ + 10761 "00000000" // /* MW 7 */ + 10762 "00000000" // /* MW 6 */ + 10763 "10110110" // /* MW 5 */ + 10764 "00000010" // /* MW 4 */ + 10765 "11110000" // /* MW 3 */ + 10766 "00101100" // /* MW 2 */ + 10767 "00000000" // /* MW 1 */ +.label TGT_F_ZL19roundAndPackFloat32iij_288 +.src_ref 10 "softfloat.c" 204 4 +.src_ref 10 "softfloat.c" 204 14 + 10768 "11100001" // NOPA; NOPB; NOPS; MOVX r16, #0; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10769 "00000000" // /* MW 15 */ + 10770 "00000000" // /* MW 14 */ + 10771 "01111000" // /* MW 13 */ + 10772 "10100101" // /* MW 12 */ + 10773 "00000001" // /* MW 11 */ + 10774 "00001000" // /* MW 10 */ + 10775 "00000000" // /* MW 9 */ + 10776 "00000001" // /* MW 8 */ + 10777 "01011011" // /* MW 7 */ + 10778 "00000001" // /* MW 6 */ + 10779 "00100000" // /* MW 5 */ + 10780 "00000000" // /* MW 4 */ + 10781 "11110000" // /* MW 3 */ + 10782 "00101100" // /* MW 2 */ + 10783 "00000000" // /* MW 1 */ +.label TGT_F_ZL19roundAndPackFloat32iij_304 +.src_ref 10 "softfloat.c" 202 18 first +.src_ref 10 "softfloat.c" 202 36 +.src_ref 10 "softfloat.c" 203 30 first + 10784 "10111010" // MOVA r0, #-7; XOR r3, r17, r0; ADD.NC r5, r3, r5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10785 "10101000" // /* MW 9 */ + 10786 "11001010" // /* MW 8 */ + 10787 "10101000" // /* MW 7 */ + 10788 "00110100" // /* MW 6 */ + 10789 "00110000" // /* MW 5 */ + 10790 "00100010" // /* MW 4 */ + 10791 "00000000" // /* MW 3 */ + 10792 "00100000" // /* MW 2 */ + 10793 "11111111" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 59 +.src_ref 10 "softfloat.c" 203 12 +.src_ref 10 "softfloat.c" 203 46 + 10794 "10111010" // MOVA r3, #23; OR r6, r3, r4; MOV r4, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10795 "01011000" // /* MW 9 */ + 10796 "11111111" // /* MW 8 */ + 10797 "10001111" // /* MW 7 */ + 10798 "00101100" // /* MW 6 */ + 10799 "01100010" // /* MW 5 */ + 10800 "00000110" // /* MW 4 */ + 10801 "00000000" // /* MW 3 */ + 10802 "11100011" // /* MW 2 */ + 10803 "00000010" // /* MW 1 */ +.src_ref 10 "softfloat.c" 203 46 + 10804 "00011000" // EQZ r6, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10805 "11010000" // /* MW 3 */ + 10806 "10001100" // /* MW 2 */ + 10807 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 202 36 + 10808 "10011000" // LSHL r0, r5, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10809 "00001101" // /* MW 3 */ + 10810 "01000000" // /* MW 2 */ + 10811 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 203 12 + 10812 "10011000" // XOR r4, r6, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10813 "01000110" // /* MW 3 */ + 10814 "10001000" // /* MW 2 */ + 10815 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 205 4 first + 10816 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10817 "00000000" // /* MW 3 */ + 10818 "00101000" // /* MW 2 */ + 10819 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 203 9 first +.delay_slot + 10820 "10011000" // AND r27, r4, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10821 "00000100" // /* MW 3 */ + 10822 "00110110" // /* MW 2 */ + 10823 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 204 4 first +.src_ref 10 "softfloat.c" 204 14 first +.delay_slot + 10824 "00011000" // SEL.EQZ r2, r16, r2, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10825 "00100010" // /* MW 3 */ + 10826 "00000100" // /* MW 2 */ + 10827 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 59 first +.delay_slot + 10828 "10011000" // LSHL r2, r2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10829 "00111101" // /* MW 3 */ + 10830 "10000100" // /* MW 2 */ + 10831 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 38 +.delay_slot + 10832 "10011000" // ADD r2, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10833 "00100000" // /* MW 3 */ + 10834 "01000100" // /* MW 2 */ + 10835 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 66 +.delay_slot + 10836 "00110110" // NOPA; NOPB; NOPS; ADD r0, r27, r2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10837 "10000001" // /* MW 11 */ + 10838 "10101101" // /* MW 10 */ + 10839 "00000000" // /* MW 9 */ + 10840 "00000100" // /* MW 8 */ + 10841 "00000001" // /* MW 7 */ + 10842 "00110110" // /* MW 6 */ + 10843 "00100000" // /* MW 5 */ + 10844 "00000000" // /* MW 4 */ + 10845 "11110000" // /* MW 3 */ + 10846 "00101100" // /* MW 2 */ + 10847 "00000000" // /* MW 1 */ +.label TGT_F_ZL19roundAndPackFloat32iij_368 +.src_ref 10 "softfloat.c" 185 12 first + 10848 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10849 "00000000" // /* MW 3 */ + 10850 "00101000" // /* MW 2 */ + 10851 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 38 +.delay_slot + 10852 "01000100" // MOVXM r2, #2139095040 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10853 "00000000" // /* MW 5 */ + 10854 "00100000" // /* MW 4 */ + 10855 "00000001" // /* MW 3 */ + 10856 "10000000" // /* MW 2 */ + 10857 "01111111" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 38 first +.delay_slot + 10858 "10011000" // ADD r3, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10859 "00100000" // /* MW 3 */ + 10860 "01000110" // /* MW 2 */ + 10861 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 185 68 first +.delay_slot + 10862 "00011000" // EQZ r2, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10863 "11010000" // /* MW 3 */ + 10864 "01000100" // /* MW 2 */ + 10865 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 185 49 +.delay_slot + 10866 "10011000" // SUB r0, r3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10867 "00100001" // /* MW 3 */ + 10868 "11000000" // /* MW 2 */ + 10869 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL19roundAndPackFloat32iij__end + 10871 "00000000" // /* MW 1 */ +.label _ZL28normalizeRoundAndPackFloat32iij +.function normalizeRoundAndPackFloat32 _ZL28normalizeRoundAndPackFloat32iij +.src_ref 10 "softfloat.c" 218 first +.src_ref 10 "softfloat.c" 224 11 first +.tail_call +.function_start + 10880 "10000100" // J #10480 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10480 delay_slots=5 */ + 10881 "00000000" // /* MW 5 */ + 10882 "00000000" // /* MW 4 */ + 10883 "01111000" // /* MW 3 */ + 10884 "00010100" // /* MW 2 */ + 10885 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 552 53 first +.delay_slot + 10886 "00011000" // CLZ r16, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10887 "00110000" // /* MW 3 */ + 10888 "11100000" // /* MW 2 */ + 10889 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 223 45 first +.delay_slot + 10890 "00011000" // ADD r16, r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10891 "11111111" // /* MW 3 */ + 10892 "00100001" // /* MW 2 */ + 10893 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 224 44 first +.delay_slot + 10894 "10011000" // SUB r2, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10895 "00000001" // /* MW 3 */ + 10896 "10000101" // /* MW 2 */ + 10897 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 224 62 +.delay_slot + 10898 "10011000" // LSHL r3, r3, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10899 "00001101" // /* MW 3 */ + 10900 "11000111" // /* MW 2 */ + 10901 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL28normalizeRoundAndPackFloat32iij__end + 10903 "00000000" // /* MW 1 */ +.label int32_to_float32 +.function int32_to_float32 int32_to_float32 +.src_ref 10 "softfloat.c" 477 first +.src_ref 10 "softfloat.c" 481 4 +.src_ref 10 "softfloat.c" 481 11 first +.function_start + 10912 "10000100" // JZ r1, #10992 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10992 delay_slots=5 */ + 10913 "00000001" // /* MW 5 */ + 10914 "00000000" // /* MW 4 */ + 10915 "01111000" // /* MW 3 */ + 10916 "00010101" // /* MW 2 */ + 10917 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10919 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10921 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10923 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10925 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10927 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 482 11 + 10928 "01000100" // MOVXM r16, #-2147483648 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10929 "00000000" // /* MW 5 */ + 10930 "00100000" // /* MW 4 */ + 10931 "00001000" // /* MW 3 */ + 10932 "00000000" // /* MW 2 */ + 10933 "10000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 482 11 first + 10934 "10011000" // EQ r16, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10935 "00000111" // /* MW 3 */ + 10936 "01100001" // /* MW 2 */ + 10937 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 482 4 + 10938 "10000100" // JNZ r16, #11008 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11008 delay_slots=5 */ + 10939 "00000001" // /* MW 5 */ + 10940 "01000000" // /* MW 4 */ + 10941 "10000000" // /* MW 3 */ + 10942 "00010101" // /* MW 2 */ + 10943 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10945 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10947 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10949 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10951 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10953 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 484 11 +.src_ref 10 "softfloat.c" 484 11 first +.tail_call + 10954 "10111010" // MOVA r2, #156; J #10880 /* MW 10 */ /* control_operation: words=10 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10880 delay_slots=5 */ + 10955 "00100000" // /* MW 9 */ + 10956 "00000000" // /* MW 8 */ + 10957 "00000000" // /* MW 7 */ + 10958 "01010000" // /* MW 6 */ + 10959 "00000101" // /* MW 5 */ + 10960 "00000000" // /* MW 4 */ + 10961 "00000000" // /* MW 3 */ + 10962 "10000010" // /* MW 2 */ + 10963 "00010011" // /* MW 1 */ +.src_ref 10 "softfloat.c" 484 60 +.src_ref 10 "softfloat.c" 484 62 +.delay_slot + 10964 "00011000" // ABS r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10965 "00010000" // /* MW 3 */ + 10966 "01000111" // /* MW 2 */ + 10967 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 483 16 +.delay_slot + 10968 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10969 "00000001" // /* MW 3 */ + 10970 "00100000" // /* MW 2 */ + 10971 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 483 16 first +.delay_slot + 10972 "10011000" // LT r1, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10973 "00001010" // /* MW 3 */ + 10974 "01000011" // /* MW 2 */ + 10975 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10977 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10978 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 10979 "00011100" // /* MW 13 */ + 10980 "00000000" // /* MW 12 */ + 10981 "00000000" // /* MW 11 */ + 10982 "01010111" // /* MW 10 */ + 10983 "00011010" // /* MW 9 */ + 10984 "01000000" // /* MW 8 */ + 10985 "00000000" // /* MW 7 */ + 10986 "00000000" // /* MW 6 */ + 10987 "10110110" // /* MW 5 */ + 10988 "00000010" // /* MW 4 */ + 10989 "11110000" // /* MW 3 */ + 10990 "00101100" // /* MW 2 */ + 10991 "00000000" // /* MW 1 */ +.label TGT_Fint32_to_float32_80 +.src_ref 10 "softfloat.c" 481 18 first +.return_address + 10992 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10993 "00000000" // /* MW 3 */ + 10994 "00101000" // /* MW 2 */ + 10995 "00010000" // /* MW 1 */ +.delay_slot + 10996 "00011000" // MOVX r0, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10997 "00000001" // /* MW 3 */ + 10998 "00000000" // /* MW 2 */ + 10999 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11001 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11003 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11005 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11007 "00000000" // /* MW 1 */ +.label TGT_Fint32_to_float32_96 +.src_ref 10 "softfloat.c" 482 37 first + 11008 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11009 "00000000" // /* MW 3 */ + 11010 "00101000" // /* MW 2 */ + 11011 "00010000" // /* MW 1 */ +.delay_slot + 11012 "01000100" // MOVXM r0, #-822083584 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11013 "00000000" // /* MW 5 */ + 11014 "00100000" // /* MW 4 */ + 11015 "00000000" // /* MW 3 */ + 11016 "00000000" // /* MW 2 */ + 11017 "11001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11018 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11019 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11021 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11023 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label int32_to_float32__end + 11025 "00000000" // /* MW 1 */ +.label _ZL14addFloat32Sigsjji +.function addFloat32Sigs _ZL14addFloat32Sigsjji +.src_ref 10 "softfloat.c" 70 13 +.src_ref 10 "softfloat.c" 81 14 +.src_ref 10 "softfloat.c" 734 first +.function_start + 11040 "10111010" // MOVA r18, #-23; MOVXM r16, #8388607 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11041 "10010000" // /* MW 9 */ + 11042 "11111111" // /* MW 8 */ + 11043 "00001111" // /* MW 7 */ + 11044 "11111110" // /* MW 6 */ + 11045 "00011111" // /* MW 5 */ + 11046 "00000000" // /* MW 4 */ + 11047 "00000000" // /* MW 3 */ + 11048 "00110010" // /* MW 2 */ + 11049 "11111101" // /* MW 1 */ +.src_ref 10 "softfloat.c" 81 14 first + 11050 "10011000" // LSHL r17, r1, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11051 "00101101" // /* MW 3 */ + 11052 "01100011" // /* MW 2 */ + 11053 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 81 14 + 11054 "10011000" // LSHL r4, r2, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11055 "00101101" // /* MW 3 */ + 11056 "10001001" // /* MW 2 */ + 11057 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 81 21 + 11058 "00011000" // EXTEND.u8 r27, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11059 "10010000" // /* MW 3 */ + 11060 "01110110" // /* MW 2 */ + 11061 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 81 21 + 11062 "00011000" // EXTEND.u8 r25, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11063 "10010000" // /* MW 3 */ + 11064 "00110010" // /* MW 2 */ + 11065 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 38 +.src_ref 10 "softfloat.c" 744 19 first +.src_ref 10 "softfloat.c" 747 11 +.src_ref 10 "softfloat.c" 761 22 +.src_ref 10 "softfloat.c" 772 35 +.src_ref 10 "softfloat.c" 788 24 + 11066 "01100100" // SUB r17, r27, r25; MOV r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11067 "00000001" // /* MW 5 */ + 11068 "00100000" // /* MW 4 */ + 11069 "00111100" // /* MW 3 */ + 11070 "01110010" // /* MW 2 */ + 11071 "11011100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 747 11 first + 11072 "10011000" // LT r4, r24, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11073 "00011010" // /* MW 3 */ + 11074 "00001001" // /* MW 2 */ + 11075 "00010110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 747 4 + 11076 "10000100" // JNZ r4, #11248 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11248 delay_slots=5 */ + 11077 "00000001" // /* MW 5 */ + 11078 "01000000" // /* MW 4 */ + 11079 "11111000" // /* MW 3 */ + 11080 "00010101" // /* MW 2 */ + 11081 "00100000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 70 13 first +.delay_slot + 11082 "10011000" // AND r19, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11083 "00000100" // /* MW 3 */ + 11084 "01100111" // /* MW 2 */ + 11085 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 70 13 +.src_ref 10 "softfloat.c" 745 9 +.src_ref 10 "softfloat.c" 746 9 +.delay_slot + 11086 "01100100" // AND r16, r2, r16; MOV r0, #6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11087 "00011001" // /* MW 5 */ + 11088 "00100000" // /* MW 4 */ + 11089 "10010000" // /* MW 3 */ + 11090 "00100000" // /* MW 2 */ + 11091 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 745 9 first +.delay_slot + 11092 "10011000" // LSHL r19, r19, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11093 "00001101" // /* MW 3 */ + 11094 "11100110" // /* MW 2 */ + 11095 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 746 9 first +.src_ref 10 "softfloat.c" 748 18 +.src_ref 10 "softfloat.c" 762 18 +.delay_slot + 11096 "01100100" // LSHL r16, r16, r0; MOV r20, #255 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11097 "11111101" // /* MW 5 */ + 11098 "00100011" // /* MW 4 */ + 11099 "10111010" // /* MW 3 */ + 11100 "00000001" // /* MW 2 */ + 11101 "10000100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 48 +.src_ref 10 "softfloat.c" 128 31 +.src_ref 10 "softfloat.c" 748 18 first +.delay_slot + 11102 "01100100" // EQ r0, r27, r20; MOV r18, #31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11103 "01111101" // /* MW 5 */ + 11104 "00100000" // /* MW 4 */ + 11105 "11111001" // /* MW 3 */ + 11106 "00101000" // /* MW 2 */ + 11107 "11011000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 761 22 first + 11108 "10011000" // GE r5, r17, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11109 "10001001" // /* MW 3 */ + 11110 "01001011" // /* MW 2 */ + 11111 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 761 9 + 11112 "10000100" // JNZ r5, #11440 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11440 delay_slots=5 */ + 11113 "00000001" // /* MW 5 */ + 11114 "01000000" // /* MW 4 */ + 11115 "01011000" // /* MW 3 */ + 11116 "00010110" // /* MW 2 */ + 11117 "00101000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 31 first +.delay_slot + 11118 "10011000" // LSHL r4, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11119 "00101101" // /* MW 3 */ + 11120 "11001001" // /* MW 2 */ + 11121 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11123 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11125 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11127 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11129 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 762 18 first + 11130 "10011000" // EQ r20, r25, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11131 "01000111" // /* MW 3 */ + 11132 "01101001" // /* MW 2 */ + 11133 "00010110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 762 8 + 11134 "10000100" // JNZ r20, #11392 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11392 delay_slots=5 */ + 11135 "00000001" // /* MW 5 */ + 11136 "01000000" // /* MW 4 */ + 11137 "01000000" // /* MW 3 */ + 11138 "00010110" // /* MW 2 */ + 11139 "10100000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11141 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11143 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11145 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11147 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11149 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 793 11 + 11150 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11151 "10100000" // /* MW 3 */ + 11152 "01010001" // /* MW 2 */ + 11153 "00011000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 787 4 + 11154 "11111000" // MOV r2, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11155 "10100000" // /* MW 3 */ + 11156 "10011100" // /* MW 2 */ + 11157 "00011000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 767 12 first + 11158 "00011000" // ADD r0, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11159 "00000111" // /* MW 3 */ + 11160 "01000000" // /* MW 2 */ + 11161 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 766 8 first + 11162 "00011000" // SEL.EQZ r17, r0, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11163 "00010010" // /* MW 3 */ + 11164 "00100011" // /* MW 2 */ + 11165 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 772 35 first + 11166 "10011000" // SUB r17, r24, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11167 "00010001" // /* MW 3 */ + 11168 "00100011" // /* MW 2 */ + 11169 "00010110" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 46 4 first +.src_ref 10 "softfloat-macros" 46 15 first + 11170 "10000100" // JZ r17, #11344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11344 delay_slots=5 */ + 11171 "00000001" // /* MW 5 */ + 11172 "00000000" // /* MW 4 */ + 11173 "00101000" // /* MW 3 */ + 11174 "00010110" // /* MW 2 */ + 11175 "10001000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 770 17 +.src_ref 10 "softfloat.c" 785 9 +.delay_slot + 11176 "01000100" // MOVXM r20, #536870912 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11177 "00000000" // /* MW 5 */ + 11178 "00100000" // /* MW 4 */ + 11179 "00001010" // /* MW 3 */ + 11180 "00000000" // /* MW 2 */ + 11181 "00100000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 770 17 first +.delay_slot + 11182 "10011000" // OR r3, r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11183 "01000101" // /* MW 3 */ + 11184 "11000111" // /* MW 2 */ + 11185 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 766 8 first +.delay_slot + 11186 "00011000" // SEL.EQZ r19, r19, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11187 "00110010" // /* MW 3 */ + 11188 "11100110" // /* MW 2 */ + 11189 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11191 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11193 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 +.src_ref 10 "softfloat-macros" 50 38 first + 11194 "01100100" // SUB r3, r24, r17; MOV r0, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11195 "10000001" // /* MW 5 */ + 11196 "00100000" // /* MW 4 */ + 11197 "00110000" // /* MW 3 */ + 11198 "11100010" // /* MW 2 */ + 11199 "11000000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 48 + 11200 "10011000" // AND r18, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11201 "00100100" // /* MW 3 */ + 11202 "11100101" // /* MW 2 */ + 11203 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 32 + 11204 "10011000" // LSHL r18, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11205 "00101101" // /* MW 3 */ + 11206 "11100101" // /* MW 2 */ + 11207 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 first + 11208 "10011000" // LT r27, r17, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11209 "00001010" // /* MW 3 */ + 11210 "01110110" // /* MW 2 */ + 11211 "00010100" // /* MW 1 */ + 11212 "10000100" // J #11344 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11344 delay_slots=5 */ + 11213 "00000000" // /* MW 5 */ + 11214 "00000000" // /* MW 4 */ + 11215 "00101000" // /* MW 3 */ + 11216 "00010110" // /* MW 2 */ + 11217 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 15 first +.delay_slot + 11218 "10011000" // LSHL r3, r19, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11219 "00111101" // /* MW 3 */ + 11220 "11000110" // /* MW 2 */ + 11221 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 57 +.delay_slot + 11222 "00011000" // NEZ r18, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11223 "11110000" // /* MW 3 */ + 11224 "10100100" // /* MW 2 */ + 11225 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 53 16 first +.delay_slot + 11226 "00011000" // NEZ r17, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11227 "11110000" // /* MW 3 */ + 11228 "11100010" // /* MW 2 */ + 11229 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 25 first +.delay_slot + 11230 "10011000" // OR r18, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11231 "00100101" // /* MW 3 */ + 11232 "11100101" // /* MW 2 */ + 11233 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 9 first +.delay_slot + 11234 "01111110" // NOPA; NOPB; NOPS; SEL.EQZ r19, r17, r18, r27; NOPM /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 11235 "01100000" // /* MW 13 */ + 11236 "00101011" // /* MW 12 */ + 11237 "00000000" // /* MW 11 */ + 11238 "10101111" // /* MW 10 */ + 11239 "00110100" // /* MW 9 */ + 11240 "00000000" // /* MW 8 */ + 11241 "00100010" // /* MW 7 */ + 11242 "01100111" // /* MW 6 */ + 11243 "00100100" // /* MW 5 */ + 11244 "00000000" // /* MW 4 */ + 11245 "11110000" // /* MW 3 */ + 11246 "00101100" // /* MW 2 */ + 11247 "00000000" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_208 +.src_ref 10 "softfloat.c" 748 8 first + 11248 "10000100" // JNZ r0, #11504 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11504 delay_slots=5 */ + 11249 "00000001" // /* MW 5 */ + 11250 "01000000" // /* MW 4 */ + 11251 "01111000" // /* MW 3 */ + 11252 "00010110" // /* MW 2 */ + 11253 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 756 17 +.src_ref 10 "softfloat.c" 785 9 +.delay_slot + 11254 "01000100" // MOVXM r20, #536870912 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11255 "00000000" // /* MW 5 */ + 11256 "00100000" // /* MW 4 */ + 11257 "00001010" // /* MW 3 */ + 11258 "00000000" // /* MW 2 */ + 11259 "00100000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11261 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11263 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11265 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11267 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 793 11 + 11268 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11269 "10100000" // /* MW 3 */ + 11270 "01010001" // /* MW 2 */ + 11271 "00011000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 753 12 first +.src_ref 10 "softfloat.c" 787 4 + 11272 "11100100" // ADD r3, r17, #-1; MOV r2, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11273 "01000001" // /* MW 5 */ + 11274 "00111011" // /* MW 4 */ + 11275 "11100001" // /* MW 3 */ + 11276 "11111111" // /* MW 2 */ + 11277 "10001000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 752 8 +.src_ref 10 "softfloat.c" 752 18 + 11278 "11111000" // MOV r27, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11279 "10100000" // /* MW 3 */ + 11280 "11011100" // /* MW 2 */ + 11281 "00011110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 752 8 first +.src_ref 10 "softfloat.c" 752 18 first + 11282 "00011000" // SEL.EQZ r17, r3, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11283 "00010010" // /* MW 3 */ + 11284 "11100011" // /* MW 2 */ + 11285 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 46 4 first +.src_ref 10 "softfloat-macros" 46 15 first + 11286 "10000100" // JZ r17, #11344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11344 delay_slots=5 */ + 11287 "00000001" // /* MW 5 */ + 11288 "00000000" // /* MW 4 */ + 11289 "00101000" // /* MW 3 */ + 11290 "00010110" // /* MW 2 */ + 11291 "10001000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 756 17 first +.delay_slot + 11292 "10011000" // OR r0, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11293 "00000101" // /* MW 3 */ + 11294 "00000001" // /* MW 2 */ + 11295 "00010101" // /* MW 1 */ +.src_ref 10 "softfloat.c" 752 8 first +.src_ref 10 "softfloat.c" 752 18 first +.delay_slot + 11296 "00011000" // SEL.EQZ r16, r16, r0, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11297 "00000010" // /* MW 3 */ + 11298 "00100000" // /* MW 2 */ + 11299 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11300 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11301 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11303 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11305 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 +.src_ref 10 "softfloat-macros" 50 38 first + 11306 "01100100" // SUB r3, r24, r17; MOV r0, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11307 "10000001" // /* MW 5 */ + 11308 "00100000" // /* MW 4 */ + 11309 "00110000" // /* MW 3 */ + 11310 "11100010" // /* MW 2 */ + 11311 "11000000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 48 + 11312 "10011000" // AND r18, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11313 "00100100" // /* MW 3 */ + 11314 "11100101" // /* MW 2 */ + 11315 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 32 + 11316 "10011000" // LSHL r18, r16, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11317 "00101101" // /* MW 3 */ + 11318 "00100101" // /* MW 2 */ + 11319 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 15 + 11320 "10011000" // LSHL r3, r16, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11321 "00111101" // /* MW 3 */ + 11322 "00000110" // /* MW 2 */ + 11323 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 first + 11324 "10011000" // LT r27, r17, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11325 "00001010" // /* MW 3 */ + 11326 "01110110" // /* MW 2 */ + 11327 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 57 first + 11328 "00011000" // NEZ r18, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11329 "11110000" // /* MW 3 */ + 11330 "10100100" // /* MW 2 */ + 11331 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 53 16 first + 11332 "00011000" // NEZ r16, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11333 "11110000" // /* MW 3 */ + 11334 "00100000" // /* MW 2 */ + 11335 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 25 first + 11336 "10011000" // OR r17, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11337 "00100101" // /* MW 3 */ + 11338 "11100011" // /* MW 2 */ + 11339 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 9 first + 11340 "00011000" // SEL.EQZ r16, r16, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11341 "00010010" // /* MW 3 */ + 11342 "00100001" // /* MW 2 */ + 11343 "00010100" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_304 +.src_ref 10 "softfloat.c" 785 9 first +.src_ref 10 "softfloat.c" 786 26 +.src_ref 10 "softfloat.c" 787 4 first + 11344 "10111010" // MOVA r18, #1; OR r19, r19, r20; ADD.NC r17, r2, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11345 "11001000" // /* MW 9 */ + 11346 "10111111" // /* MW 8 */ + 11347 "00101000" // /* MW 7 */ + 11348 "00101110" // /* MW 6 */ + 11349 "00111010" // /* MW 5 */ + 11350 "00100111" // /* MW 4 */ + 11351 "00000000" // /* MW 3 */ + 11352 "00110010" // /* MW 2 */ + 11353 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 786 18 first +.src_ref 10 "softfloat.c" 790 8 first + 11354 "00100100" // ADD r19, r19, r16; ADD.NC r16, r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11355 "00000001" // /* MW 5 */ + 11356 "00110001" // /* MW 4 */ + 11357 "00011000" // /* MW 3 */ + 11358 "11100000" // /* MW 2 */ + 11359 "10011100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 786 26 + 11360 "10011000" // LSHL r18, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11361 "00101101" // /* MW 3 */ + 11362 "11100101" // /* MW 2 */ + 11363 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 788 24 first + 11364 "10011000" // LT r27, r18, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11365 "10001010" // /* MW 3 */ + 11366 "10110111" // /* MW 2 */ + 11367 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 788 4 + 11368 "00011000" // SEL.EQZ r2, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11369 "00000010" // /* MW 3 */ + 11370 "01000101" // /* MW 2 */ + 11371 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 788 4 + 11372 "00011000" // SEL.EQZ r3, r18, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11373 "00110010" // /* MW 3 */ + 11374 "10000111" // /* MW 2 */ + 11375 "00010100" // /* MW 1 */ +.label __ll1__ZL14addFloat32Sigsjji +.src_ref 10 "softfloat.c" 793 11 first +.tail_call + 11376 "10000100" // J #10480 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10480 delay_slots=5 */ + 11377 "00000000" // /* MW 5 */ + 11378 "00000000" // /* MW 4 */ + 11379 "01111000" // /* MW 3 */ + 11380 "00010100" // /* MW 2 */ + 11381 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11383 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11385 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11387 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11389 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11391 "00000000" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_352 +.src_ref 10 "softfloat.c" 763 12 first +.return_address + 11392 "10000100" // JNZ r16, #11536 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11536 delay_slots=5 */ + 11393 "00000001" // /* MW 5 */ + 11394 "01000000" // /* MW 4 */ + 11395 "10001000" // /* MW 3 */ + 11396 "00010110" // /* MW 2 */ + 11397 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11399 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11401 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11403 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11405 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11407 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 764 12 first + 11408 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11409 "00000000" // /* MW 3 */ + 11410 "00101000" // /* MW 2 */ + 11411 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 38 +.delay_slot + 11412 "01000100" // MOVXM r16, #2139095040 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11413 "00000000" // /* MW 5 */ + 11414 "00100000" // /* MW 4 */ + 11415 "00001000" // /* MW 3 */ + 11416 "10000000" // /* MW 2 */ + 11417 "01111111" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 38 first +.delay_slot + 11418 "10011000" // ADD r0, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11419 "00000000" // /* MW 3 */ + 11420 "00000001" // /* MW 2 */ + 11421 "00010001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11423 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11425 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11426 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 11427 "00011100" // /* MW 13 */ + 11428 "00000000" // /* MW 12 */ + 11429 "00000000" // /* MW 11 */ + 11430 "01010111" // /* MW 10 */ + 11431 "00011010" // /* MW 9 */ + 11432 "01000000" // /* MW 8 */ + 11433 "00000000" // /* MW 7 */ + 11434 "00000000" // /* MW 6 */ + 11435 "10110110" // /* MW 5 */ + 11436 "00000010" // /* MW 4 */ + 11437 "11110000" // /* MW 3 */ + 11438 "00101100" // /* MW 2 */ + 11439 "00000000" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_400 +.src_ref 10 "softfloat.c" 776 8 first + 11440 "10000100" // JNZ r0, #11552 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11552 delay_slots=5 */ + 11441 "00000001" // /* MW 5 */ + 11442 "01000000" // /* MW 4 */ + 11443 "10010000" // /* MW 3 */ + 11444 "00010110" // /* MW 2 */ + 11445 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11447 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11449 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11451 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11453 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11455 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 780 8 first + 11456 "10000100" // JZ r27, #11600 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11600 delay_slots=5 */ + 11457 "00000001" // /* MW 5 */ + 11458 "00000000" // /* MW 4 */ + 11459 "10101000" // /* MW 3 */ + 11460 "00010110" // /* MW 2 */ + 11461 "11011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11463 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11465 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11467 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11469 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11471 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 793 11 + 11472 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11473 "10100000" // /* MW 3 */ + 11474 "01010001" // /* MW 2 */ + 11475 "00011000" // /* MW 1 */ + 11476 "10000100" // J #11376 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11376 delay_slots=5 */ + 11477 "00000000" // /* MW 5 */ + 11478 "00000000" // /* MW 4 */ + 11479 "00111000" // /* MW 3 */ + 11480 "00010110" // /* MW 2 */ + 11481 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 781 26 +.delay_slot + 11482 "01000100" // MOVXM r17, #1073741824 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11483 "00000000" // /* MW 5 */ + 11484 "10100000" // /* MW 4 */ + 11485 "00001000" // /* MW 3 */ + 11486 "00000000" // /* MW 2 */ + 11487 "01000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 781 26 first +.src_ref 10 "softfloat.c" 793 11 +.delay_slot + 11488 "11100100" // ADD r17, r19, r17; MOV r2, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11489 "01000001" // /* MW 5 */ + 11490 "00111011" // /* MW 4 */ + 11491 "00010001" // /* MW 3 */ + 11492 "01100010" // /* MW 2 */ + 11493 "10011100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 781 33 +.delay_slot + 11494 "10011000" // ADD r3, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11495 "00000000" // /* MW 3 */ + 11496 "01000111" // /* MW 2 */ + 11497 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11499 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11500 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11501 "01100111" // /* MW 3 */ + 11502 "00000001" // /* MW 2 */ + 11503 "00000000" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_464 +.src_ref 10 "softfloat.c" 749 12 first + 11504 "10000100" // JNZ r19, #11632 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11632 delay_slots=5 */ + 11505 "00000001" // /* MW 5 */ + 11506 "01000000" // /* MW 4 */ + 11507 "10111000" // /* MW 3 */ + 11508 "00010110" // /* MW 2 */ + 11509 "10011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11511 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11513 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11515 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11517 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11519 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 750 12 first + 11520 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11521 "00000000" // /* MW 3 */ + 11522 "00101000" // /* MW 2 */ + 11523 "00010000" // /* MW 1 */ +.delay_slot + 11524 "11111000" // MOV r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11525 "10100000" // /* MW 3 */ + 11526 "00010000" // /* MW 2 */ + 11527 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11529 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11531 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11533 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11535 "00000000" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_496 +.src_ref 10 "softfloat.c" 763 31 first +.tail_call + 11536 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */ + 11537 "00000000" // /* MW 5 */ + 11538 "00000000" // /* MW 4 */ + 11539 "01000000" // /* MW 3 */ + 11540 "00010100" // /* MW 2 */ + 11541 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11543 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11545 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11547 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11549 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11551 "00000000" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_512 +.src_ref 10 "softfloat.c" 777 22 first +.return_address + 11552 "10011000" // OR r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11553 "00000101" // /* MW 3 */ + 11554 "11100001" // /* MW 2 */ + 11555 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 777 12 + 11556 "10000100" // JNZ r16, #11648 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11648 delay_slots=5 */ + 11557 "00000001" // /* MW 5 */ + 11558 "01000000" // /* MW 4 */ + 11559 "11000000" // /* MW 3 */ + 11560 "00010110" // /* MW 2 */ + 11561 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11563 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11565 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11567 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11569 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11571 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 778 12 first + 11572 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11573 "00000000" // /* MW 3 */ + 11574 "00101000" // /* MW 2 */ + 11575 "00010000" // /* MW 1 */ +.delay_slot + 11576 "11111000" // MOV r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11577 "10100000" // /* MW 3 */ + 11578 "00010000" // /* MW 2 */ + 11579 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11581 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11583 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11585 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11586 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 11587 "00011100" // /* MW 13 */ + 11588 "00000000" // /* MW 12 */ + 11589 "00000000" // /* MW 11 */ + 11590 "01010111" // /* MW 10 */ + 11591 "00011010" // /* MW 9 */ + 11592 "01000000" // /* MW 8 */ + 11593 "00000000" // /* MW 7 */ + 11594 "00000000" // /* MW 6 */ + 11595 "10110110" // /* MW 5 */ + 11596 "00000010" // /* MW 4 */ + 11597 "11110000" // /* MW 3 */ + 11598 "00101100" // /* MW 2 */ + 11599 "00000000" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_560 +.src_ref 10 "softfloat.c" 780 25 first +.src_ref 10 "softfloat.c" 780 62 first + 11600 "10100100" // RET lr; ADD.NC r16, r19, r16 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 11601 "10000010" // /* MW 5 */ + 11602 "00110011" // /* MW 4 */ + 11603 "00001000" // /* MW 3 */ + 11604 "00000000" // /* MW 2 */ + 11605 "00000101" // /* MW 1 */ +.src_ref 10 "softfloat.c" 780 70 +.delay_slot + 11606 "00011000" // MOVX r17, #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11607 "11101001" // /* MW 3 */ + 11608 "11100010" // /* MW 2 */ + 11609 "00010111" // /* MW 1 */ +.src_ref 10 "softfloat.c" 780 70 +.delay_slot + 11610 "10011000" // LSHL r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11611 "00011101" // /* MW 3 */ + 11612 "00100001" // /* MW 2 */ + 11613 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 66 first +.delay_slot + 11614 "10011000" // ADD r0, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11615 "00000000" // /* MW 3 */ + 11616 "00000001" // /* MW 2 */ + 11617 "00010001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11619 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11620 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 11621 "10000001" // /* MW 11 */ + 11622 "10101101" // /* MW 10 */ + 11623 "00000000" // /* MW 9 */ + 11624 "00000000" // /* MW 8 */ + 11625 "00000000" // /* MW 7 */ + 11626 "00000000" // /* MW 6 */ + 11627 "00100000" // /* MW 5 */ + 11628 "00000000" // /* MW 4 */ + 11629 "11110000" // /* MW 3 */ + 11630 "00101100" // /* MW 2 */ + 11631 "00000000" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_592 +.src_ref 10 "softfloat.c" 749 31 first +.tail_call + 11632 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */ + 11633 "00000000" // /* MW 5 */ + 11634 "00000000" // /* MW 4 */ + 11635 "01000000" // /* MW 3 */ + 11636 "00010100" // /* MW 2 */ + 11637 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11639 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11641 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11643 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11645 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11647 "00000000" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_608 +.src_ref 10 "softfloat.c" 777 38 first +.tail_call +.return_address + 11648 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */ + 11649 "00000000" // /* MW 5 */ + 11650 "00000000" // /* MW 4 */ + 11651 "01000000" // /* MW 3 */ + 11652 "00010100" // /* MW 2 */ + 11653 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11655 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11657 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11659 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11661 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL14addFloat32Sigsjji__end + 11663 "00000000" // /* MW 1 */ +.label _ZL14subFloat32Sigsjji +.function subFloat32Sigs _ZL14subFloat32Sigsjji +.src_ref 10 "softfloat.c" 70 13 +.src_ref 10 "softfloat.c" 81 14 +.src_ref 10 "softfloat.c" 805 first +.function_start + 11664 "10111010" // MOVA r17, #-23; MOVXM r16, #8388607 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11665 "10010000" // /* MW 9 */ + 11666 "11111111" // /* MW 8 */ + 11667 "00001111" // /* MW 7 */ + 11668 "11111110" // /* MW 6 */ + 11669 "00011111" // /* MW 5 */ + 11670 "00000000" // /* MW 4 */ + 11671 "00000000" // /* MW 3 */ + 11672 "00110001" // /* MW 2 */ + 11673 "11111101" // /* MW 1 */ +.src_ref 10 "softfloat.c" 81 14 first + 11674 "10011000" // LSHL r4, r2, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11675 "00011101" // /* MW 3 */ + 11676 "10001001" // /* MW 2 */ + 11677 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 81 14 + 11678 "10011000" // LSHL r18, r1, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11679 "00011101" // /* MW 3 */ + 11680 "01100101" // /* MW 2 */ + 11681 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 70 13 first + 11682 "10011000" // AND r20, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11683 "00000100" // /* MW 3 */ + 11684 "01101001" // /* MW 2 */ + 11685 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 81 21 first + 11686 "00011000" // EXTEND.u8 r25, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11687 "10010000" // /* MW 3 */ + 11688 "00110010" // /* MW 2 */ + 11689 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 81 21 + 11690 "00011000" // EXTEND.u8 r27, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11691 "10010000" // /* MW 3 */ + 11692 "10110110" // /* MW 2 */ + 11693 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 70 13 first +.src_ref 10 "softfloat.c" 816 9 +.src_ref 10 "softfloat.c" 817 9 + 11694 "01100100" // AND r16, r2, r16; MOV r19, #7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11695 "00011101" // /* MW 5 */ + 11696 "10100000" // /* MW 4 */ + 11697 "10011001" // /* MW 3 */ + 11698 "00100000" // /* MW 2 */ + 11699 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 816 9 first + 11700 "10011000" // LSHL r17, r20, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11701 "00111101" // /* MW 3 */ + 11702 "00100011" // /* MW 2 */ + 11703 "00010101" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 38 +.src_ref 10 "softfloat.c" 815 19 first +.src_ref 10 "softfloat.c" 818 11 +.src_ref 10 "softfloat.c" 819 17 +.src_ref 10 "softfloat.c" 843 31 + 11704 "01100100" // SUB r18, r27, r25; MOV r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11705 "00000001" // /* MW 5 */ + 11706 "00100000" // /* MW 4 */ + 11707 "00111100" // /* MW 3 */ + 11708 "10110010" // /* MW 2 */ + 11709 "11011100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 818 11 first + 11710 "10011000" // LT r5, r24, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11711 "00101010" // /* MW 3 */ + 11712 "00001011" // /* MW 2 */ + 11713 "00010110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 818 4 + 11714 "10000100" // JNZ r5, #11904 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11904 delay_slots=5 */ + 11715 "00000001" // /* MW 5 */ + 11716 "01000000" // /* MW 4 */ + 11717 "01000000" // /* MW 3 */ + 11718 "00010111" // /* MW 2 */ + 11719 "00101000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 817 9 first +.delay_slot + 11720 "10011000" // LSHL r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11721 "00111101" // /* MW 3 */ + 11722 "00100001" // /* MW 2 */ + 11723 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 833 14 +.src_ref 10 "softfloat.c" 851 14 +.src_ref 10 "softfloat.c" 859 13 +.src_ref 10 "softfloat.c" 862 9 +.delay_slot + 11724 "10111010" // MOVA r0, #255; MOVXM r4, #1073741824 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11725 "00010000" // /* MW 9 */ + 11726 "00000000" // /* MW 8 */ + 11727 "10001000" // /* MW 7 */ + 11728 "00000000" // /* MW 6 */ + 11729 "00000000" // /* MW 5 */ + 11730 "00010000" // /* MW 4 */ + 11731 "00000000" // /* MW 3 */ + 11732 "11100000" // /* MW 2 */ + 11733 "00011111" // /* MW 1 */ +.src_ref 10 "softfloat.c" 851 14 first +.delay_slot + 11734 "10011000" // EQ r20, r27, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11735 "00000111" // /* MW 3 */ + 11736 "11101000" // /* MW 2 */ + 11737 "00010110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 862 9 first +.delay_slot + 11738 "10011000" // OR r19, r17, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11739 "01000101" // /* MW 3 */ + 11740 "01100110" // /* MW 2 */ + 11741 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 859 13 first +.delay_slot + 11742 "10011000" // OR r4, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11743 "00000101" // /* MW 3 */ + 11744 "00001001" // /* MW 2 */ + 11745 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 819 17 first + 11746 "10011000" // GE r6, r18, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11747 "10001001" // /* MW 3 */ + 11748 "10001101" // /* MW 2 */ + 11749 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 819 4 + 11750 "10000100" // JNZ r6, #12064 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12064 delay_slots=5 */ + 11751 "00000001" // /* MW 5 */ + 11752 "01000000" // /* MW 4 */ + 11753 "10010000" // /* MW 3 */ + 11754 "00010111" // /* MW 2 */ + 11755 "00110000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 825 4 +.src_ref 10 "softfloat.c" 835 34 +.delay_slot + 11756 "00011000" // MOVX r5, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11757 "00000101" // /* MW 3 */ + 11758 "00001010" // /* MW 2 */ + 11759 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 835 34 first +.delay_slot + 11760 "10011000" // XOR r7, r3, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11761 "01010110" // /* MW 3 */ + 11762 "11001110" // /* MW 2 */ + 11763 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11765 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11767 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11769 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 833 14 first + 11770 "10011000" // EQ r20, r25, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11771 "00000111" // /* MW 3 */ + 11772 "01101000" // /* MW 2 */ + 11773 "00010110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 833 4 + 11774 "10000100" // JNZ r20, #12176 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12176 delay_slots=5 */ + 11775 "00000001" // /* MW 5 */ + 11776 "01000000" // /* MW 4 */ + 11777 "11001000" // /* MW 3 */ + 11778 "00010111" // /* MW 2 */ + 11779 "10100000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11781 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11783 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11785 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11787 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11789 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 868 11 + 11790 "11111000" // MOV r1, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11791 "10100000" // /* MW 3 */ + 11792 "01010011" // /* MW 2 */ + 11793 "00011000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 838 8 first + 11794 "00011000" // ADD r16, r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11795 "00000111" // /* MW 3 */ + 11796 "10100000" // /* MW 2 */ + 11797 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 837 4 first + 11798 "00011000" // SEL.EQZ r16, r16, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11799 "00100010" // /* MW 3 */ + 11800 "00100001" // /* MW 2 */ + 11801 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 843 31 first + 11802 "10011000" // SUB r16, r24, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11803 "00000001" // /* MW 3 */ + 11804 "00100001" // /* MW 2 */ + 11805 "00010110" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 46 4 first +.src_ref 10 "softfloat-macros" 46 15 first + 11806 "10000100" // JZ r16, #11872 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11872 delay_slots=5 */ + 11807 "00000001" // /* MW 5 */ + 11808 "00000000" // /* MW 4 */ + 11809 "00110000" // /* MW 3 */ + 11810 "00010111" // /* MW 2 */ + 11811 "10000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 837 4 first +.delay_slot + 11812 "00011000" // SEL.EQZ r17, r17, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11813 "00110010" // /* MW 3 */ + 11814 "01100011" // /* MW 2 */ + 11815 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11817 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11819 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11821 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11823 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 +.src_ref 10 "softfloat-macros" 50 38 first +.src_ref 10 "softfloat-macros" 50 48 + 11824 "10111010" // MOVA r20, #32; SUB r3, r24, r16; MOV r18, #31 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11825 "01011000" // /* MW 9 */ + 11826 "00011111" // /* MW 8 */ + 11827 "01001000" // /* MW 7 */ + 11828 "00001110" // /* MW 6 */ + 11829 "00111000" // /* MW 5 */ + 11830 "00110000" // /* MW 4 */ + 11831 "00000000" // /* MW 3 */ + 11832 "00010100" // /* MW 2 */ + 11833 "00000100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 48 + 11834 "10011000" // AND r18, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11835 "00100100" // /* MW 3 */ + 11836 "11100101" // /* MW 2 */ + 11837 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 32 + 11838 "10011000" // LSHL r18, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11839 "00101101" // /* MW 3 */ + 11840 "01100101" // /* MW 2 */ + 11841 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 53 16 first + 11842 "00011000" // NEZ r19, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11843 "11110000" // /* MW 3 */ + 11844 "01100110" // /* MW 2 */ + 11845 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 first + 11846 "10011000" // LT r27, r16, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11847 "01001010" // /* MW 3 */ + 11848 "00110111" // /* MW 2 */ + 11849 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 15 first + 11850 "10011000" // LSHL r17, r17, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11851 "00111101" // /* MW 3 */ + 11852 "01100010" // /* MW 2 */ + 11853 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 57 + 11854 "00011000" // NEZ r18, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11855 "11110000" // /* MW 3 */ + 11856 "10100100" // /* MW 2 */ + 11857 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 25 + 11858 "10011000" // OR r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11859 "00100101" // /* MW 3 */ + 11860 "01100001" // /* MW 2 */ + 11861 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 9 first + 11862 "01111010" // NOPA; NOPS; SEL.EQZ r17, r19, r16, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11863 "00000010" // /* MW 9 */ + 11864 "11100011" // /* MW 8 */ + 11865 "00000100" // /* MW 7 */ + 11866 "00000000" // /* MW 6 */ + 11867 "01011011" // /* MW 5 */ + 11868 "00000001" // /* MW 4 */ + 11869 "11110000" // /* MW 3 */ + 11870 "00101100" // /* MW 2 */ + 11871 "00000000" // /* MW 1 */ +.label __ll2__ZL14subFloat32Sigsjji + 11872 "10000100" // J #12032 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=12032 delay_slots=5 */ + 11873 "00000000" // /* MW 5 */ + 11874 "00000000" // /* MW 4 */ + 11875 "10000000" // /* MW 3 */ + 11876 "00010111" // /* MW 2 */ + 11877 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 846 16 first +.delay_slot + 11878 "10011000" // SUB r3, r4, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11879 "00010001" // /* MW 3 */ + 11880 "00000111" // /* MW 2 */ + 11881 "00010001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11883 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11885 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11887 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11888 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11889 "00000000" // /* MW 15 */ + 11890 "00000000" // /* MW 14 */ + 11891 "01111000" // /* MW 13 */ + 11892 "10100101" // /* MW 12 */ + 11893 "00000001" // /* MW 11 */ + 11894 "00000000" // /* MW 10 */ + 11895 "00000000" // /* MW 9 */ + 11896 "00000000" // /* MW 8 */ + 11897 "01011011" // /* MW 7 */ + 11898 "00000001" // /* MW 6 */ + 11899 "00100000" // /* MW 5 */ + 11900 "00000000" // /* MW 4 */ + 11901 "11110000" // /* MW 3 */ + 11902 "00101100" // /* MW 2 */ + 11903 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_240 +.src_ref 10 "softfloat.c" 851 4 first + 11904 "10000100" // JNZ r20, #12224 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12224 delay_slots=5 */ + 11905 "00000001" // /* MW 5 */ + 11906 "01000000" // /* MW 4 */ + 11907 "11100000" // /* MW 3 */ + 11908 "00010111" // /* MW 2 */ + 11909 "10100000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11911 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11913 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11915 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11917 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11919 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 867 4 + 11920 "11111000" // MOV r0, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11921 "10100000" // /* MW 3 */ + 11922 "00011101" // /* MW 2 */ + 11923 "00011000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 868 11 + 11924 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11925 "10100000" // /* MW 3 */ + 11926 "01010001" // /* MW 2 */ + 11927 "00011000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 855 4 +.src_ref 10 "softfloat.c" 855 14 + 11928 "11111000" // MOV r27, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11929 "10100000" // /* MW 3 */ + 11930 "11011100" // /* MW 2 */ + 11931 "00011110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 867 4 + 11932 "11111000" // MOV r25, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11933 "00100000" // /* MW 3 */ + 11934 "01010000" // /* MW 2 */ + 11935 "00011110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 856 8 first + 11936 "00011000" // ADD r17, r18, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11937 "11111111" // /* MW 3 */ + 11938 "10100011" // /* MW 2 */ + 11939 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 855 4 first +.src_ref 10 "softfloat.c" 855 14 first + 11940 "00011000" // SEL.EQZ r17, r17, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11941 "00100010" // /* MW 3 */ + 11942 "01100011" // /* MW 2 */ + 11943 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 46 4 first +.src_ref 10 "softfloat-macros" 46 15 first + 11944 "10000100" // JZ r17, #12016 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12016 delay_slots=5 */ + 11945 "00000001" // /* MW 5 */ + 11946 "00000000" // /* MW 4 */ + 11947 "01111000" // /* MW 3 */ + 11948 "00010111" // /* MW 2 */ + 11949 "10001000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 855 4 first +.src_ref 10 "softfloat.c" 855 14 first +.delay_slot + 11950 "00011000" // SEL.EQZ r16, r16, r4, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11951 "01000010" // /* MW 3 */ + 11952 "00100000" // /* MW 2 */ + 11953 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11955 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11957 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11959 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11961 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 +.src_ref 10 "softfloat-macros" 50 38 first +.src_ref 10 "softfloat-macros" 50 48 + 11962 "10111010" // MOVA r3, #32; SUB r18, r24, r17; MOV r20, #31 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11963 "01011000" // /* MW 9 */ + 11964 "00011111" // /* MW 8 */ + 11965 "10001000" // /* MW 7 */ + 11966 "10001110" // /* MW 6 */ + 11967 "00101000" // /* MW 5 */ + 11968 "00110001" // /* MW 4 */ + 11969 "00000000" // /* MW 3 */ + 11970 "00000011" // /* MW 2 */ + 11971 "00000100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 48 + 11972 "10011000" // AND r20, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11973 "01000100" // /* MW 3 */ + 11974 "10101001" // /* MW 2 */ + 11975 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 32 + 11976 "10011000" // LSHL r20, r16, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11977 "01001101" // /* MW 3 */ + 11978 "00101001" // /* MW 2 */ + 11979 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 15 + 11980 "10011000" // LSHL r18, r16, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11981 "00101101" // /* MW 3 */ + 11982 "00100101" // /* MW 2 */ + 11983 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 first + 11984 "10011000" // LT r27, r17, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11985 "00111010" // /* MW 3 */ + 11986 "01110110" // /* MW 2 */ + 11987 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 57 first + 11988 "00011000" // NEZ r20, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11989 "11110000" // /* MW 3 */ + 11990 "00101000" // /* MW 2 */ + 11991 "00010101" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 53 16 first + 11992 "00011000" // NEZ r16, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11993 "11110000" // /* MW 3 */ + 11994 "00100000" // /* MW 2 */ + 11995 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 25 first + 11996 "10011000" // OR r17, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11997 "01000101" // /* MW 3 */ + 11998 "10100011" // /* MW 2 */ + 11999 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 9 first + 12000 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r16, r16, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12001 "00000000" // /* MW 15 */ + 12002 "00000000" // /* MW 14 */ + 12003 "01111000" // /* MW 13 */ + 12004 "10100101" // /* MW 12 */ + 12005 "00000001" // /* MW 11 */ + 12006 "10010000" // /* MW 10 */ + 12007 "00001000" // /* MW 9 */ + 12008 "00100001" // /* MW 8 */ + 12009 "01011011" // /* MW 7 */ + 12010 "00000001" // /* MW 6 */ + 12011 "00100000" // /* MW 5 */ + 12012 "00000000" // /* MW 4 */ + 12013 "11110000" // /* MW 3 */ + 12014 "00101100" // /* MW 2 */ + 12015 "00000000" // /* MW 1 */ +.label __ll1__ZL14subFloat32Sigsjji +.src_ref 10 "softfloat.c" 864 16 first + 12016 "11100001" // NOPA; NOPB; NOPS; SUB r3, r19, r16; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12017 "00000000" // /* MW 15 */ + 12018 "00000000" // /* MW 14 */ + 12019 "01111000" // /* MW 13 */ + 12020 "10100101" // /* MW 12 */ + 12021 "00000001" // /* MW 11 */ + 12022 "00001100" // /* MW 10 */ + 12023 "00111000" // /* MW 9 */ + 12024 "00100110" // /* MW 8 */ + 12025 "01011011" // /* MW 7 */ + 12026 "00000001" // /* MW 6 */ + 12027 "00100000" // /* MW 5 */ + 12028 "00000000" // /* MW 4 */ + 12029 "11110000" // /* MW 3 */ + 12030 "00101100" // /* MW 2 */ + 12031 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_368 +.src_ref 10 "softfloat.c" 868 11 first +.tail_call + 12032 "10000100" // J #10880 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10880 delay_slots=5 */ + 12033 "00000000" // /* MW 5 */ + 12034 "00000000" // /* MW 4 */ + 12035 "01000000" // /* MW 3 */ + 12036 "00010101" // /* MW 2 */ + 12037 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 867 4 first +.delay_slot + 12038 "00011000" // ADD r2, r25, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12039 "11111111" // /* MW 3 */ + 12040 "01000101" // /* MW 2 */ + 12041 "00010110" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12043 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12045 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12047 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12048 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12049 "00000000" // /* MW 15 */ + 12050 "00000000" // /* MW 14 */ + 12051 "01111000" // /* MW 13 */ + 12052 "10100101" // /* MW 12 */ + 12053 "00000001" // /* MW 11 */ + 12054 "00000000" // /* MW 10 */ + 12055 "00000000" // /* MW 9 */ + 12056 "00000000" // /* MW 8 */ + 12057 "01011011" // /* MW 7 */ + 12058 "00000001" // /* MW 6 */ + 12059 "00100000" // /* MW 5 */ + 12060 "00000000" // /* MW 4 */ + 12061 "11110000" // /* MW 3 */ + 12062 "00101100" // /* MW 2 */ + 12063 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_400 +.src_ref 10 "softfloat.c" 820 4 first +.return_address + 12064 "10000100" // JNZ r20, #12256 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12256 delay_slots=5 */ + 12065 "00000001" // /* MW 5 */ + 12066 "01000000" // /* MW 4 */ + 12067 "11110000" // /* MW 3 */ + 12068 "00010111" // /* MW 2 */ + 12069 "10100000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12071 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12073 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12075 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12077 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12078 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12079 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 829 14 first + 12080 "10011000" // LTU r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12081 "00011100" // /* MW 3 */ + 12082 "00100111" // /* MW 2 */ + 12083 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 829 4 + 12084 "10000100" // JNZ r19, #12304 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12304 delay_slots=5 */ + 12085 "00000001" // /* MW 5 */ + 12086 "01000000" // /* MW 4 */ + 12087 "00001000" // /* MW 3 */ + 12088 "00011000" // /* MW 2 */ + 12089 "10011000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 825 4 first +.delay_slot + 12090 "00011000" // SEL.EQZ r24, r5, r25, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12091 "10010010" // /* MW 3 */ + 12092 "01110001" // /* MW 2 */ + 12093 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 825 4 +.delay_slot + 12094 "11111000" // MOV r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12095 "10100000" // /* MW 3 */ + 12096 "10011101" // /* MW 2 */ + 12097 "00011100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 825 4 +.delay_slot + 12098 "00011000" // SEL.EQZ r25, r5, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12099 "00100010" // /* MW 3 */ + 12100 "01110011" // /* MW 2 */ + 12101 "00010001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12103 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12105 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 830 14 first + 12106 "10011000" // LTU r18, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12107 "00001100" // /* MW 3 */ + 12108 "01100101" // /* MW 2 */ + 12109 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 830 4 + 12110 "10000100" // JNZ r18, #12336 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12336 delay_slots=5 */ + 12111 "00000001" // /* MW 5 */ + 12112 "01000000" // /* MW 4 */ + 12113 "00011000" // /* MW 3 */ + 12114 "00011000" // /* MW 2 */ + 12115 "10010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12117 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12119 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12121 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12123 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12125 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 31 + 12126 "00011000" // MOVX r16, #31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12127 "01111101" // /* MW 3 */ + 12128 "00100000" // /* MW 2 */ + 12129 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 831 24 + 12130 "01000100" // MOVXM p0, #509172 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12131 "11101000" // /* MW 5 */ + 12132 "11001001" // /* MW 4 */ + 12133 "11000000" // /* MW 3 */ + 12134 "00000111" // /* MW 2 */ + 12135 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 831 24 first + 12136 "10011000" // LDA r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12137 "01010110" // /* MW 3 */ + 12138 "00000110" // /* MW 2 */ + 12139 "00000000" // /* MW 1 */ +.swstall __RAW__R_1948 + 12140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12141 "00000000" // /* MW 1 */ +.swstall __RAW__R_1948 + 12142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12143 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 831 4 + 12144 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12145 "00000000" // /* MW 3 */ + 12146 "00101000" // /* MW 2 */ + 12147 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 831 44 +.delay_slot + 12148 "00011000" // MOVX r17, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12149 "00001101" // /* MW 3 */ + 12150 "00100010" // /* MW 2 */ + 12151 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12152 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12153 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12155 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 831 44 +.delay_slot + 12156 "10011000" // EQ r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12157 "00100111" // /* MW 3 */ + 12158 "01100011" // /* MW 2 */ + 12159 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 31 first +.delay_slot + 12160 "11100001" // NOPA; NOPB; NOPS; LSHL r0, r17, r16; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12161 "00000000" // /* MW 15 */ + 12162 "00000000" // /* MW 14 */ + 12163 "01111000" // /* MW 13 */ + 12164 "10100101" // /* MW 12 */ + 12165 "00000001" // /* MW 11 */ + 12166 "01101100" // /* MW 10 */ + 12167 "00001000" // /* MW 9 */ + 12168 "00100010" // /* MW 8 */ + 12169 "01011011" // /* MW 7 */ + 12170 "00000001" // /* MW 6 */ + 12171 "00100000" // /* MW 5 */ + 12172 "00000000" // /* MW 4 */ + 12173 "11110000" // /* MW 3 */ + 12174 "00101100" // /* MW 2 */ + 12175 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_512 +.src_ref 10 "softfloat.c" 834 8 first + 12176 "10000100" // JNZ r16, #12368 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12368 delay_slots=5 */ + 12177 "00000001" // /* MW 5 */ + 12178 "01000000" // /* MW 4 */ + 12179 "00101000" // /* MW 3 */ + 12180 "00011000" // /* MW 2 */ + 12181 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12183 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12185 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12187 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12189 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12191 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 31 + 12192 "00011000" // MOVX r16, #31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12193 "01111101" // /* MW 3 */ + 12194 "00100000" // /* MW 2 */ + 12195 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 835 8 first + 12196 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12197 "00000000" // /* MW 3 */ + 12198 "00101000" // /* MW 2 */ + 12199 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 31 first +.delay_slot + 12200 "10011000" // LSHL r16, r7, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12201 "00001101" // /* MW 3 */ + 12202 "11100001" // /* MW 2 */ + 12203 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 38 +.delay_slot + 12204 "01000100" // MOVXM r17, #2139095040 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12205 "00000000" // /* MW 5 */ + 12206 "10100000" // /* MW 4 */ + 12207 "00001000" // /* MW 3 */ + 12208 "10000000" // /* MW 2 */ + 12209 "01111111" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 38 +.delay_slot + 12210 "10011000" // ADD r0, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12211 "00000000" // /* MW 3 */ + 12212 "01000001" // /* MW 2 */ + 12213 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12215 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12216 "00100010" // NOPA; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12217 "00011100" // /* MW 7 */ + 12218 "00000000" // /* MW 6 */ + 12219 "00000000" // /* MW 5 */ + 12220 "00000100" // /* MW 4 */ + 12221 "11110000" // /* MW 3 */ + 12222 "00101100" // /* MW 2 */ + 12223 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_560 +.src_ref 10 "softfloat.c" 852 8 first + 12224 "10000100" // JNZ r17, #12384 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12384 delay_slots=5 */ + 12225 "00000001" // /* MW 5 */ + 12226 "01000000" // /* MW 4 */ + 12227 "00110000" // /* MW 3 */ + 12228 "00011000" // /* MW 2 */ + 12229 "10001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12231 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12233 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12234 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12235 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12236 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12237 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12238 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12239 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 853 8 first + 12240 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12241 "00000000" // /* MW 3 */ + 12242 "00101000" // /* MW 2 */ + 12243 "00010000" // /* MW 1 */ +.delay_slot + 12244 "11111000" // MOV r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12245 "10100000" // /* MW 3 */ + 12246 "00010000" // /* MW 2 */ + 12247 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12248 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12249 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12251 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12253 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12255 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_592 +.src_ref 10 "softfloat.c" 821 18 first + 12256 "10011000" // OR r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12257 "00000101" // /* MW 3 */ + 12258 "01100001" // /* MW 2 */ + 12259 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 821 8 + 12260 "10000100" // JNZ r16, #12400 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12400 delay_slots=5 */ + 12261 "00000001" // /* MW 5 */ + 12262 "01000000" // /* MW 4 */ + 12263 "00111000" // /* MW 3 */ + 12264 "00011000" // /* MW 2 */ + 12265 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12267 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12269 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12271 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12273 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12275 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 823 8 first + 12276 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12277 "00000000" // /* MW 3 */ + 12278 "00101000" // /* MW 2 */ + 12279 "00010000" // /* MW 1 */ +.delay_slot + 12280 "01000100" // MOVXM r0, #2147483647 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12281 "11111110" // /* MW 5 */ + 12282 "00111111" // /* MW 4 */ + 12283 "11110000" // /* MW 3 */ + 12284 "11111111" // /* MW 2 */ + 12285 "01111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12287 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12289 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12291 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12292 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12293 "10000001" // /* MW 11 */ + 12294 "10101101" // /* MW 10 */ + 12295 "00000000" // /* MW 9 */ + 12296 "00000000" // /* MW 8 */ + 12297 "00000000" // /* MW 7 */ + 12298 "00000000" // /* MW 6 */ + 12299 "00100000" // /* MW 5 */ + 12300 "00000000" // /* MW 4 */ + 12301 "11110000" // /* MW 3 */ + 12302 "00101100" // /* MW 2 */ + 12303 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_640 + 12304 "10000100" // J #12016 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=12016 delay_slots=5 */ + 12305 "00000000" // /* MW 5 */ + 12306 "00000000" // /* MW 4 */ + 12307 "01111000" // /* MW 3 */ + 12308 "00010111" // /* MW 2 */ + 12309 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 868 11 +.delay_slot + 12310 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12311 "10100000" // /* MW 3 */ + 12312 "01010001" // /* MW 2 */ + 12313 "00011000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 864 16 +.delay_slot + 12314 "11111000" // MOV r19, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12315 "10100000" // /* MW 3 */ + 12316 "11011000" // /* MW 2 */ + 12317 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12318 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12319 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12321 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12322 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 12323 "00011100" // /* MW 13 */ + 12324 "00000000" // /* MW 12 */ + 12325 "00000000" // /* MW 11 */ + 12326 "01010111" // /* MW 10 */ + 12327 "00011010" // /* MW 9 */ + 12328 "01000000" // /* MW 8 */ + 12329 "00000000" // /* MW 7 */ + 12330 "00000000" // /* MW 6 */ + 12331 "10110110" // /* MW 5 */ + 12332 "00000010" // /* MW 4 */ + 12333 "11110000" // /* MW 3 */ + 12334 "00101100" // /* MW 2 */ + 12335 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_672 + 12336 "10000100" // J #11872 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11872 delay_slots=5 */ + 12337 "00000000" // /* MW 5 */ + 12338 "00000000" // /* MW 4 */ + 12339 "00110000" // /* MW 3 */ + 12340 "00010111" // /* MW 2 */ + 12341 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 846 16 +.delay_slot + 12342 "11111000" // MOV r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12343 "00100000" // /* MW 3 */ + 12344 "00011000" // /* MW 2 */ + 12345 "00011001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 867 4 +.delay_slot + 12346 "11111000" // MOV r25, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12347 "00100000" // /* MW 3 */ + 12348 "01011100" // /* MW 2 */ + 12349 "00011110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 868 11 +.delay_slot + 12350 "11111000" // MOV r1, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12351 "10100000" // /* MW 3 */ + 12352 "01010011" // /* MW 2 */ + 12353 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12355 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12356 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12357 "10000001" // /* MW 11 */ + 12358 "10101101" // /* MW 10 */ + 12359 "00000000" // /* MW 9 */ + 12360 "00000000" // /* MW 8 */ + 12361 "00000000" // /* MW 7 */ + 12362 "00000000" // /* MW 6 */ + 12363 "00100000" // /* MW 5 */ + 12364 "00000000" // /* MW 4 */ + 12365 "11110000" // /* MW 3 */ + 12366 "00101100" // /* MW 2 */ + 12367 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_704 +.src_ref 10 "softfloat.c" 834 27 first +.tail_call + 12368 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */ + 12369 "00000000" // /* MW 5 */ + 12370 "00000000" // /* MW 4 */ + 12371 "01000000" // /* MW 3 */ + 12372 "00010100" // /* MW 2 */ + 12373 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12375 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12377 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12379 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12381 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12383 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_720 +.src_ref 10 "softfloat.c" 852 27 first +.tail_call +.return_address + 12384 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */ + 12385 "00000000" // /* MW 5 */ + 12386 "00000000" // /* MW 4 */ + 12387 "01000000" // /* MW 3 */ + 12388 "00010100" // /* MW 2 */ + 12389 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12391 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12393 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12394 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12395 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12397 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12399 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_736 +.src_ref 10 "softfloat.c" 821 34 first +.tail_call +.return_address + 12400 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */ + 12401 "00000000" // /* MW 5 */ + 12402 "00000000" // /* MW 4 */ + 12403 "01000000" // /* MW 3 */ + 12404 "00010100" // /* MW 2 */ + 12405 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12407 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12409 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12411 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12413 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL14subFloat32Sigsjji__end + 12415 "00000000" // /* MW 1 */ +.label float32_add +.function float32_add float32_add +.src_ref 10 "softfloat.c" 92 12 +.src_ref 10 "softfloat.c" 878 first +.function_start + 12416 "00011000" // MOVX r16, #-31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12417 "10000101" // /* MW 3 */ + 12418 "11100000" // /* MW 2 */ + 12419 "00010111" // /* MW 1 */ +.src_ref 10 "softfloat.c" 92 12 first + 12420 "10011000" // LSHL r3, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12421 "00001101" // /* MW 3 */ + 12422 "01000111" // /* MW 2 */ + 12423 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 92 12 + 12424 "10011000" // LSHL r16, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12425 "00001101" // /* MW 3 */ + 12426 "10100001" // /* MW 2 */ + 12427 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 884 15 first + 12428 "10011000" // EQ r16, r3, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12429 "00000111" // /* MW 3 */ + 12430 "11100001" // /* MW 2 */ + 12431 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 884 4 + 12432 "10000100" // JNZ r16, #12464 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12464 delay_slots=5 */ + 12433 "00000001" // /* MW 5 */ + 12434 "01000000" // /* MW 4 */ + 12435 "01011000" // /* MW 3 */ + 12436 "00011000" // /* MW 2 */ + 12437 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12439 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12441 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12443 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12445 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12447 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 888 15 first +.tail_call + 12448 "10000100" // J #11664 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=11664 delay_slots=5 */ + 12449 "00000000" // /* MW 5 */ + 12450 "00000000" // /* MW 4 */ + 12451 "11001000" // /* MW 3 */ + 12452 "00010110" // /* MW 2 */ + 12453 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12455 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12457 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12459 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12461 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12463 "00000000" // /* MW 1 */ +.label TGT_Ffloat32_add_48 +.src_ref 10 "softfloat.c" 885 15 first +.tail_call +.return_address + 12464 "10000100" // J #11040 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=11040 delay_slots=5 */ + 12465 "00000000" // /* MW 5 */ + 12466 "00000000" // /* MW 4 */ + 12467 "10010000" // /* MW 3 */ + 12468 "00010101" // /* MW 2 */ + 12469 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12471 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12473 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12475 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12477 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label float32_add__end + 12479 "00000000" // /* MW 1 */ +.dir 0 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable2/src" +.dir 1 "/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer" +.dir 2 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common" +.dir 3 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc" +.dir 4 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail" +.dir 5 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2" +.dir 6 "/usr/local/lib/python3.10/dist-packages/data/aie2p/lib" +.dir 7 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p" +.dir 8 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend" +.dir 9 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21708/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib" +.dir 10 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21708/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib/softfloat" diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/Release/0_2_reloadable8.cmico b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/Release/0_2_reloadable8.cmico new file mode 100644 index 0000000000000000000000000000000000000000..f377058758269f564988080a1597f499edc1b997 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/Release/0_2_reloadable8.cmico @@ -0,0 +1 @@ ++Mdec diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/Release/0_2_reloadable8.lst b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/Release/0_2_reloadable8.lst new file mode 100644 index 0000000000000000000000000000000000000000..da538ba51f010cb935d6faf7c98cc539440d5b5d --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/Release/0_2_reloadable8.lst @@ -0,0 +1,4815 @@ + +// File generated by darts version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:46:40 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// darts -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -d -h -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable2 me + +// Release: ipp V-2024.06-TGT-241219 + +.text_segment PM 2352 +.entry_point +.label __Z13kernelWrapperPPvjjjj___func_begin0 +.label _Z13kernelWrapperPPvjjjj +.function_start + 2352 0x00 0xc2 0xd0 0x2f 0x41 0xd4 LDA r16, [p0]; MOV r0, r15 + 2358 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 2364 0xff 0x73 0xb0 0x01 0xe8 0x50 0x70 0x02 ST p7, [sp, #-8]; MOV r15, r1 + 2372 0xff 0x82 0xb0 0x1f 0xa7 0x83 0xb0 0x60 0x79 0x3a ST r0, [sp, #-4]; NEZ r26, r15; MOV p7, p0 + 2382 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12] + 2386 0x00 0x00 NOPX + 2388 0x00 0x00 NOPX + 2390 0x18 0x68 0x02 0x18 ADD.NC p0, r16, #4 + 2394 0x00 0x1e 0x16 0x98 LDA r16, [p0], #4 + 2398 0x00 0x3e 0x56 0x98 LDA r18, [p0], #12 + 2402 0x00 0xee 0x36 0x98 LDA r17, [p0], #-8 + 2406 0x00 0x07 0x76 0x98 LDA r27, [p0] + 2410 0x00 0x00 NOPX + 2412 0x00 0x00 NOPX + 2414 0x00 0x00 NOPX + 2416 0x00 0x00 NOPX + 2418 0x00 0x00 NOPX + 2420 0x00 0x00 NOPX + 2422 0x14 0x21 0x22 0x18 SEL.EQZ r16, r16, r18, r27 + 2426 0x08 0xd6 0x11 0x98 ST r16, [p0, #-12] + 2430 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 2434 0x00 0x00 NOPX + 2436 0x00 0x00 NOPX + 2438 0x00 0x00 NOPX + 2440 0x14 0x57 0x08 0x18 ACQ.COND r17, r16, r26 + 2444 0x00 0x00 NOPX + 2446 0x00 0x00 NOPX + 2448 0x00 0x00 NOPX + 2450 0x07 0x2c 0x1e 0x98 LDA p0, [p7], #8 + 2454 0x07 0xfc 0x9e 0x98 LDA p1, [p7], #-4 + 2458 0x07 0x05 0x1e 0x98 LDA p2, [p7] +.no_stack_arguments + 2462 0x00 0x0e 0xb8 0x00 0x01 0x04 JL #7536 +.delay_slot + 2468 0x0f 0xf3 0x55 0x98 ST r26, [sp, #-16] +.delay_slot +.swstall delay_slot + 2472 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2474 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2476 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2478 0x00 0x00 NOPX +.return_address + 2480 0x07 0xf6 0x16 0x98 LDA r16, [p7, #-4] + 2484 0x07 0xf3 0x51 0x18 LDA r26, [sp, #-16] + 2488 0x00 0x00 NOPX + 2490 0x00 0x00 NOPX + 2492 0x00 0x00 NOPX + 2494 0x00 0x00 NOPX + 2496 0x00 0x00 NOPX + 2498 0x18 0x68 0x08 0x18 ADD.NC p0, r16, #16 + 2502 0x00 0x06 0x16 0x98 LDA r16, [p0] + 2506 0x10 0x22 0x05 0x18 MOVX r17, #1 + 2510 0x00 0x00 NOPX + 2512 0x00 0x00 NOPX + 2514 0x00 0x00 NOPX + 2516 0x00 0x00 NOPX + 2518 0x00 0x00 NOPX + 2520 0x14 0x15 0x18 0x18 REL.COND r16, r17, r26 + 2524 0xfe 0x87 0x2d 0xaf 0x41 0xd4 LDA lr, [sp, #-12]; MOV r27, r15 + 2530 0x00 0xf6 0x16 0x98 LDA r16, [p0, #-4] + 2534 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8] + 2538 0x00 0x00 NOPX + 2540 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] + 2544 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 + 2550 0x00 0x00 NOPX + 2552 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 2556 0x14 0x63 0x01 0x98 SUB r17, r17, r16 +.delay_slot + 2560 0x14 0x21 0x12 0x18 SEL.EQZ r16, r16, r17, r27 +.delay_slot + 2564 0x08 0xf6 0x11 0x98 ST r16, [p0, #-4] +.delay_slot +.swstall delay_slot + 2568 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2570 0x00 0x00 NOPX +.label _Z13kernelWrapperPPvjjjj__end +.label __Z13kernelWrapperPPvjjjj___func_end0 + +.text_segment PM 2576 +.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv___func_begin0 +.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv +.function_start + 2576 0x23 0x8e 0xd3 0x80 0x8b 0x3e 0x67 0x68 0x09 0x60 0x78 0x76 LDA r3, [p1], #4; MOVS p3, p0; MOVX r6, #-5; MOV r0, p1 + 2588 0x02 0x07 0x00 0x3e 0x25 0x09 0x30 0x07 0x08 0xba MOVA r7, #16; MOVX r2, #-24; ADD.NC p2, r0, #28 + 2598 0x00 0x7e 0x00 0x3e 0x17 0xa8 0x08 0x60 0x78 0xba MOVA r30, #3; MOVX r1, #-3; MOV r0, p0 + 2608 0xff 0xe5 0x00 0x00 0x00 0x3c 0x8f 0xfc 0x10 0xba MOVA r5, #-1; MOVXM r4, #65528 + 2618 0xff 0x90 0x00 0x00 0x02 0x00 0x00 0x00 0x70 0xba MOVA r16, #-4; PADDXM [sp], #64 + 2628 0x1c 0x60 0x17 0x18 ADD.NC p4, r0, #46 + 2632 0x00 0x00 NOPX + 2634 0x08 0x1c 0x71 0x98 ST r3, [p0], #4 + 2638 0x01 0x1f 0x56 0x98 LDA r26, [p1], #4 + 2642 0x00 0x00 NOPX + 2644 0x00 0x00 NOPX + 2646 0x00 0x00 NOPX + 2648 0x00 0x00 NOPX + 2650 0x00 0x00 NOPX + 2652 0x00 0x00 NOPX + 2654 0x03 0xea 0x3d 0x44 0x89 0x5c ST r26, [p0], #4; AND r17, r26, r4 + 2660 0x23 0xf6 0xd0 0x06 0x4d 0x7e 0xcc 0x48 0xa8 0xba LDA r29, [p1], #4; MUL r4, r3, r26; ADD.NC r22, r17, r4 + 2670 0x16 0xa4 0x6d 0x98 LSHL r18, r26, r6 + 2674 0x11 0x0c 0x1d 0x98 LSHL r6, r4, r1 + 2678 0xd4 0x43 0xb0 0xb2 0xff 0x24 LSHL r17, r26, r1; ADD.NC r1, r18, #-1 + 2684 0x00 0x00 NOPX + 2686 0x00 0x00 NOPX + 2688 0x00 0x00 NOPX + 2690 0x03 0xf6 0x3e 0x9c 0x4c 0x5c ST r29, [p0], #4; MAC r7, r7, r29, r2 + 2696 0x23 0x8a 0xd7 0xff 0xb5 0x80 0x07 0x49 0xaf 0xfa LDA r2, [p1], #4; ST r29, [sp, #-4]; MUL r4, r29, r26 + 2706 0x10 0xe9 0xdf 0x98 MUL r20, r3, r29 + 2710 0x10 0xf8 0x4f 0x98 MUL r28, r3, r4 + 2714 0x17 0x6b 0xed 0x98 LSHL r21, r29, r30 + 2718 0xec 0x8b 0xbd 0xb5 0xd0 0x24 LSHL r18, r29, r5; ADD.NC r27, r21, #-48 + 2724 0x14 0xaf 0xff 0x18 ADD r23, r18, #-1 + 2728 0x17 0x7b 0x6f 0x98 MUL r29, r29, r22 + 2732 0x03 0x8a 0x3f 0x60 0x55 0x5c ST r2, [p0], #4; LT r24, r30, r2 + 2738 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 2742 0x00 0x00 NOPX + 2744 0x00 0x00 NOPX + 2746 0x00 0x00 NOPX + 2748 0x00 0x00 NOPX + 2750 0x00 0x00 NOPX + 2752 0x00 0x00 NOPX + 2754 0x03 0x85 0x30 0x03 0xf0 0x0e 0x70 0x02 ST el0, [p0], #4; MOV r31, el0 + 2762 0x01 0x04 0x0e 0x98 LDA eh0, [p1] + 2766 0x00 0x00 NOPX + 2768 0x00 0x00 NOPX + 2770 0x00 0x00 NOPX + 2772 0x00 0x00 NOPX + 2774 0x00 0x00 NOPX + 2776 0x00 0x00 NOPX + 2778 0x00 0x81 0x30 0x03 0x30 0x8e 0x70 0x02 ST eh0, [p0]; MOV r25, eh0 + 2786 0x01 0x17 0xd6 0x98 LDA r30, [p1, #4] + 2790 0x00 0x00 NOPX + 2792 0xc0 0x05 0xb0 0x40 0x01 0x84 JNZ r24, #2912 +.delay_slot + 2798 0x17 0x27 0x0d 0x98 LSHL r19, r28, r16 +.delay_slot + 2802 0x17 0xf3 0x9f 0x98 MUL r25, r31, r25 +.delay_slot + 2806 0xa5 0x0b 0xb2 0xb1 0xff 0x24 LSHL r20, r20, r5; ADD.NC r5, r17, #-1 +.delay_slot + 2812 0x11 0x21 0x0d 0x98 LSHL r16, r4, r16 +.delay_slot + 2816 0x02 0xfa 0x3c 0xff 0xdf 0x5c ST r30, [p0, #4]; MUL r31, r25, r30 + 2822 0x10 0x38 0x05 0x18 MOVX r28, #1 + 2826 0x10 0xb9 0xc7 0x98 EQ r28, r2, r28 + 2830 0xe0 0x07 0xe0 0x40 0x01 0x84 JNZ r28, #4032 +.delay_slot +.swstall delay_slot + 2836 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2838 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2840 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2842 0x00 0x00 NOPX +.delay_slot + 2844 0x10 0xed 0xff 0x18 ADD r22, r3, #-1 + 2848 0x10 0x22 0x09 0x18 MOVX r17, #2 + 2852 0x14 0x62 0x27 0x98 EQ r17, r17, r2 + 2856 0x88 0x07 0xa0 0x40 0x01 0x84 JNZ r17, #3904 +.delay_slot +.swstall delay_slot + 2862 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2864 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2866 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2868 0x00 0x00 NOPX +.delay_slot + 2870 0x10 0x0e 0x0d 0x18 MOVX r7, #3 + 2874 0x11 0xc4 0x27 0x98 EQ r2, r7, r2 + 2878 0x10 0x07 0x50 0x40 0x01 0x84 JNZ r2, #3744 +.delay_slot +.swstall delay_slot + 2884 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2886 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2888 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2890 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2892 0x00 0x00 NOPX + 2894 0x00 0x06 0xf0 0x00 0x00 0x84 J #3552 +.delay_slot + 2900 0x10 0x34 0x11 0x18 MOVX r26, #4 +.delay_slot +.swstall delay_slot + 2904 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2906 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2908 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2910 0x00 0x00 NOPX +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_336 + 2912 0x10 0x3a 0x15 0x18 MOVX r29, #5 + 2916 0x17 0x70 0x2a 0x98 LT r24, r29, r2 + 2920 0xc0 0x06 0x50 0x40 0x01 0x84 JNZ r24, #3232 +.delay_slot +.swstall delay_slot + 2926 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2928 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2930 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2932 0x00 0x00 NOPX +.delay_slot + 2934 0x10 0x34 0x11 0x18 MOVX r26, #4 + 2938 0x16 0xa2 0x27 0x98 EQ r17, r26, r2 + 2942 0x88 0x06 0x10 0x40 0x01 0x84 JNZ r17, #3104 +.delay_slot +.swstall delay_slot + 2948 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2950 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2952 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2954 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2956 0x00 0x00 NOPX + 2958 0x17 0x44 0x28 0x98 NE r2, r29, r2 + 2962 0x10 0x06 0xf0 0x40 0x01 0x84 JNZ r2, #3552 +.delay_slot +.swstall delay_slot + 2968 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2970 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2972 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2974 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2976 0x00 0x00 NOPX + 2978 0x83 0xd6 0xe0 0x00 0x22 0x08 0x07 0xec 0x58 0xba ST.s16 r21, [p4], #2; MOVX r2, #16; MOV m0, #-20 + 2988 0x1f 0x9c 0xa0 0xf8 MOV r30, r25 + 2992 0x00 0x00 NOPX + 2994 0x00 0x00 NOPX + 2996 0x00 0x00 NOPX + 2998 0x00 0x00 NOPX + 3000 0x00 0x00 NOPX + 3002 0x04 0x1c 0xf7 0x18 ST.s16 r7, [p4], #2 + 3006 0x00 0x00 NOPX + 3008 0x00 0x00 NOPX + 3010 0x00 0x00 NOPX + 3012 0x00 0x00 NOPX + 3014 0x00 0x00 NOPX + 3016 0x00 0x00 NOPX + 3018 0x04 0x1e 0xf7 0x18 ST.s16 r23, [p4], #2 + 3022 0x00 0x00 NOPX + 3024 0x00 0x00 NOPX + 3026 0x00 0x00 NOPX + 3028 0x00 0x00 NOPX + 3030 0x00 0x00 NOPX + 3032 0x00 0x00 NOPX + 3034 0x04 0x1c 0x57 0x18 ST.s16 r2, [p4], #2 + 3038 0x00 0x00 NOPX + 3040 0x00 0x00 NOPX + 3042 0x00 0x00 NOPX + 3044 0x00 0x00 NOPX + 3046 0x00 0x00 NOPX + 3048 0x00 0x00 NOPX + 3050 0x04 0x1c 0x37 0x18 ST.s16 r1, [p4], #2 + 3054 0x00 0x00 NOPX + 3056 0x00 0x00 NOPX + 3058 0x00 0x00 NOPX + 3060 0x00 0x00 NOPX + 3062 0x00 0x00 NOPX + 3064 0x00 0x00 NOPX + 3066 0x04 0x08 0x57 0x18 ST.s16 r2, [p4], m0 + 3070 0x00 0x00 NOPX + 3072 0x00 0x00 NOPX + 3074 0x00 0x06 0xe8 0x00 0x00 0x84 J #3536 +.delay_slot +.swstall delay_slot + 3080 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3082 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3084 0x00 0x00 NOPX +.delay_slot + 3086 0x0c 0x06 0x51 0x98 ST r18, [p4] +.delay_slot + 3090 0x00 0x2c 0xf8 0x29 0xa3 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; ST r6, [p4, #4]; NOPM; NOPV +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_528 + 3104 0x83 0x92 0xe0 0x3e 0x67 0xa8 0x48 0x10 0x58 0xba ST.s16 r4, [p4], #2; MOVX r6, #-3; MOV r2, #16 + 3114 0xfd 0x80 0x80 0x0c 0x22 0x33 0xd0 0x0e 0x78 0xba MOVA m0, #-20; MAC r2, r2, r6, r4; MOV r30, el0 + 3124 0x00 0x00 NOPX + 3126 0x00 0x00 NOPX + 3128 0x00 0x00 NOPX + 3130 0x00 0x00 NOPX + 3132 0x00 0x00 NOPX + 3134 0x04 0x1c 0x57 0x18 ST.s16 r2, [p4], #2 + 3138 0x00 0x00 NOPX + 3140 0x00 0x00 NOPX + 3142 0x00 0x00 NOPX + 3144 0x00 0x00 NOPX + 3146 0x00 0x00 NOPX + 3148 0x00 0x00 NOPX + 3150 0x04 0x1e 0xf7 0x18 ST.s16 r23, [p4], #2 + 3154 0x00 0x00 NOPX + 3156 0x00 0x00 NOPX + 3158 0x00 0x00 NOPX + 3160 0x00 0x00 NOPX + 3162 0x00 0x00 NOPX + 3164 0x00 0x00 NOPX + 3166 0x04 0x1c 0x57 0x18 ST.s16 r2, [p4], #2 + 3170 0x00 0x00 NOPX + 3172 0x00 0x00 NOPX + 3174 0x00 0x00 NOPX + 3176 0x00 0x00 NOPX + 3178 0x00 0x00 NOPX + 3180 0x00 0x00 NOPX + 3182 0x04 0x1c 0xb7 0x18 ST.s16 r5, [p4], #2 + 3186 0x00 0x00 NOPX + 3188 0x00 0x00 NOPX + 3190 0x00 0x00 NOPX + 3192 0x00 0x00 NOPX + 3194 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3196 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3198 0x04 0x08 0x37 0x18 ST.s16 r1, [p4], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3202 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3204 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3206 0x00 0x06 0xe8 0x00 0x00 0x84 J #3536 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3212 0x10 0x02 0x41 0x18 MOVX r1, #16 +.delay_slot +.swstall delay_slot + 3216 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3218 0x00 0x00 NOPX +.delay_slot + 3220 0x0c 0x14 0x71 0x98 ST r3, [p4, #4] +.delay_slot + 3224 0x80 0xc2 0x30 0x00 0x01 0xa5 0x70 0x02 ST r16, [p4]; NOPM +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_656 + 3232 0xff 0x8e 0x20 0x10 0x32 0x2c LDA r3, [sp, #-4]; MOVX r4, #6 + 3238 0x10 0x88 0x47 0x98 EQ r4, r2, r4 + 3242 0x20 0x06 0xa8 0x40 0x01 0x84 JNZ r4, #3408 +.delay_slot + 3248 0x10 0x02 0x41 0x18 MOVX r1, #16 +.delay_slot +.swstall delay_slot + 3252 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3254 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3256 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3258 0x00 0x00 NOPX + 3260 0x10 0x06 0x1d 0x18 MOVX r3, #7 + 3264 0x10 0xc4 0x28 0x98 NE r2, r3, r2 + 3268 0x10 0x06 0xf0 0x40 0x01 0x84 JNZ r2, #3552 +.delay_slot +.swstall delay_slot + 3274 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3276 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3278 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3280 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3282 0x00 0x00 NOPX + 3284 0x83 0x86 0xe0 0x26 0x2f 0xf8 0x07 0xec 0x58 0xba ST.s16 r1, [p4], #2; ADD r2, r19, #-1; MOV m0, #-20 + 3294 0xff 0x43 0x00 0x00 0x00 0x40 0x40 0x00 0x10 0xba MOVA r3, #-6; MOVXM dj0, #65536 + 3304 0xe0 0xc7 0xbc 0x20 0x01 0x64 LSHL r3, r28, r3; MOV r24, #0 + 3310 0x00 0x00 NOPX + 3312 0x00 0x00 NOPX + 3314 0x00 0x00 NOPX + 3316 0x00 0x00 NOPX + 3318 0x04 0x1c 0x37 0x18 ST.s16 r1, [p4], #2 + 3322 0x00 0x00 NOPX + 3324 0x00 0x00 NOPX + 3326 0x00 0x00 NOPX + 3328 0x00 0x00 NOPX + 3330 0x00 0x00 NOPX + 3332 0x00 0x00 NOPX + 3334 0x04 0x1c 0x57 0x18 ST.s16 r2, [p4], #2 + 3338 0x00 0x00 NOPX + 3340 0x00 0x00 NOPX + 3342 0x00 0x00 NOPX + 3344 0x00 0x00 NOPX + 3346 0x00 0x00 NOPX + 3348 0x00 0x00 NOPX + 3350 0x0c 0x1c 0x41 0x98 ST dj0, [p4], #4 + 3354 0x04 0x0b 0x17 0x18 ST.s16 r24, [p4], m0 + 3358 0x00 0x00 NOPX + 3360 0x00 0x00 NOPX + 3362 0x00 0x00 NOPX + 3364 0x00 0x00 NOPX + 3366 0x00 0x00 NOPX + 3368 0x00 0x00 NOPX + 3370 0x0c 0x07 0x51 0x98 ST r26, [p4] + 3374 0x0c 0x14 0x71 0x98 ST r3, [p4, #4] + 3378 0x00 0x06 0xe8 0x00 0x00 0x84 J #3536 +.delay_slot + 3384 0x1f 0x9f 0xa0 0xf8 MOV r30, r31 +.delay_slot +.swstall delay_slot + 3388 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3390 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3392 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3394 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_832 + 3408 0x83 0x86 0xe0 0x06 0x2b 0x70 0x48 0x10 0x58 0xba ST.s16 r1, [p4], #2; MSC r2, r2, r3, r22; MOV r2, #16 + 3418 0xfd 0x80 0x80 0x3e 0x47 0xa8 0xd0 0x0e 0x78 0xba MOVA m0, #-20; MOVX r4, #-3; MOV r6, el0 + 3428 0x10 0xc8 0x4d 0x98 LSHL r4, r3, r4 + 3432 0xf7 0x8d 0xf1 0xa4 0xff 0x24 MUL r30, r30, r6; ADD.NC r3, r4, #-1 + 3438 0x00 0x00 NOPX + 3440 0x00 0x00 NOPX + 3442 0x00 0x00 NOPX + 3444 0x04 0x1f 0x77 0x18 ST.s16 r27, [p4], #2 + 3448 0x00 0x00 NOPX + 3450 0x00 0x00 NOPX + 3452 0x00 0x00 NOPX + 3454 0x00 0x00 NOPX + 3456 0x00 0x00 NOPX + 3458 0x00 0x00 NOPX + 3460 0x04 0x1c 0xb7 0x18 ST.s16 r5, [p4], #2 + 3464 0x00 0x00 NOPX + 3466 0x00 0x00 NOPX + 3468 0x00 0x00 NOPX + 3470 0x00 0x00 NOPX + 3472 0x00 0x00 NOPX + 3474 0x00 0x00 NOPX + 3476 0x04 0x1c 0x57 0x18 ST.s16 r2, [p4], #2 + 3480 0x00 0x00 NOPX + 3482 0x00 0x00 NOPX + 3484 0x00 0x00 NOPX + 3486 0x00 0x00 NOPX + 3488 0x00 0x00 NOPX + 3490 0x00 0x00 NOPX + 3492 0x04 0x1c 0x77 0x18 ST.s16 r3, [p4], #2 + 3496 0x00 0x00 NOPX + 3498 0x00 0x00 NOPX + 3500 0x00 0x00 NOPX + 3502 0x00 0x00 NOPX + 3504 0x00 0x00 NOPX + 3506 0x00 0x00 NOPX + 3508 0x04 0x08 0x37 0x18 ST.s16 r1, [p4], m0 + 3512 0x00 0x00 NOPX + 3514 0x00 0x00 NOPX + 3516 0x00 0x00 NOPX + 3518 0x00 0x00 NOPX + 3520 0x00 0x00 NOPX + 3522 0x00 0x00 NOPX + 3524 0x0c 0x06 0x31 0x98 ST r17, [p4] + 3528 0x82 0xd2 0x30 0x00 0x01 0xa5 0x70 0x02 ST r20, [p4, #4]; NOPM +.label __ll42__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv + 3536 0x18 0x80 0x40 0xb8 MOV dj0, #32 + 3540 0x60 0x7a 0xe0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 ST.s16 r30, [p3, dj0]; NOPB; NOPS; NOPX +.label __ll70__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv + 3552 0x03 0x08 0x80 0xc0 0x1e 0x14 MOVA m2, #24; ADD.NC p0, r0, #30 + 3558 0x43 0x8a 0xd0 0x00 0x02 0x08 0x07 0xe2 0x58 0xba LDA r2, [p2], #4; MOVX r0, #16; MOV m0, #-30 + 3568 0x40 0x8e 0x52 0x00 0x99 0x54 LDA.s16 r3, [p2]; MOV m1, #38 + 3574 0x02 0x14 0x36 0x98 LDA r1, [p2, #4] + 3578 0x00 0x00 NOPX + 3580 0x00 0x00 NOPX + 3582 0x00 0x2f 0xf7 0x18 ST.s16 r31, [p0], #4 + 3586 0x00 0x00 NOPX + 3588 0x00 0x00 NOPX + 3590 0x00 0x00 NOPX + 3592 0x00 0x00 NOPX + 3594 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 3596 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.noswbrkpt + 3598 0x00 0x5f 0x17 0x18 ST.s16 r24, [p0], #10 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3602 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3604 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3606 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3608 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3610 0x10 0x30 0x01 0x18 MOVX r24, #0 + 3614 0x00 0x00 NOPX + 3616 0x00 0xcf 0x17 0x18 ST.s16 r24, [p0], #-8 + 3620 0x00 0x48 0x9a 0x98 LDA.u16 r4, [p0], m2 + 3624 0x00 0x00 NOPX + 3626 0x00 0x00 NOPX + 3628 0x00 0x00 NOPX + 3630 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 3632 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.noswbrkpt + 3634 0x00 0xfc 0x17 0x18 ST.s16 r0, [p0], #-2 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3638 0x21 0x35 0xb2 0xa4 0xff 0x24 LSHL r4, r4, r26; ADD.NC r5, r4, #-1 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3644 0x10 0x00 0x41 0x98 SUB r0, r0, r4 + 3648 0x00 0x00 NOPX + 3650 0x00 0x00 NOPX + 3652 0x00 0x00 NOPX + 3654 0x00 0x00 NOPX + 3656 0x00 0x08 0xb7 0x18 ST.s16 r5, [p0], m0 + 3660 0x00 0x00 NOPX + 3662 0x00 0x00 NOPX + 3664 0x00 0x00 NOPX + 3666 0x00 0x00 NOPX + 3668 0x00 0x00 NOPX + 3670 0x00 0x00 NOPX + 3672 0x00 0x2a 0x77 0x18 ST.s16 r19, [p0], m1 + 3676 0x00 0x00 NOPX + 3678 0x00 0x00 NOPX + 3680 0x00 0x00 NOPX + 3682 0x00 0x00 NOPX + 3684 0x00 0x00 NOPX + 3686 0x00 0x00 NOPX + 3688 0x00 0xec 0x47 0x18 ST.s8 r2, [p0], #-2 + 3692 0x00 0x00 NOPX + 3694 0x00 0x00 NOPX + 3696 0x00 0x00 NOPX + 3698 0x00 0x00 NOPX + 3700 0x00 0x00 NOPX + 3702 0x00 0x00 NOPX + 3704 0x00 0x04 0x77 0x18 ST.s16 r3, [p0] + 3708 0x00 0x00 NOPX + 3710 0x00 0x00 NOPX + 3712 0x00 0x00 NOPX + 3714 0x00 0x00 NOPX + 3716 0x00 0x00 NOPX + 3718 0x00 0x00 NOPX + 3720 0x00 0xe4 0x27 0x18 ST.s8 r1, [p0, #-2] + 3724 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 3728 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 3734 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3736 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3738 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3740 0x00 0x01 0x67 0x98 NOPA +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_1168 + 3744 0x83 0xd6 0xe0 0x00 0x00 0x3c 0xaf 0xf4 0x10 0xba ST.s16 r21, [p4], #2; MOVXM r5, #65512 + 3754 0xff 0x8a 0x20 0x0a 0x7d 0x04 0x07 0xec 0x58 0xba LDA r2, [sp, #-4]; ADD r7, r5, r26; MOV m0, #-20 + 3764 0x00 0x9a 0x00 0x00 0x00 0x3c 0xcf 0xff 0x90 0xba MOVA r26, #4; MOVXM r6, #65535 + 3774 0x10 0xe2 0x60 0x98 ADD r17, r3, r6 + 3778 0x14 0x7a 0x46 0x18 MAC r29, r29, r17, r4 + 3782 0x14 0x6a 0x4e 0x18 MSC r21, r21, r17, r4 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 3786 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.noswbrkpt + 3788 0x04 0x1c 0x57 0x18 ST.s16 r2, [p4], #2 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3792 0x11 0xc4 0x2f 0x98 MUL r2, r7, r2 + 3796 0x00 0x00 NOPX + 3798 0x00 0x00 NOPX + 3800 0x00 0x00 NOPX + 3802 0x00 0x00 NOPX + 3804 0x00 0x00 NOPX + 3806 0x04 0x1e 0xd7 0x18 ST.s16 r22, [p4], #2 + 3810 0x00 0x00 NOPX + 3812 0x00 0x00 NOPX + 3814 0x00 0x00 NOPX + 3816 0x00 0x00 NOPX + 3818 0x00 0x00 NOPX + 3820 0x00 0x00 NOPX + 3822 0x04 0x1e 0xb7 0x18 ST.s16 r21, [p4], #2 + 3826 0x00 0x00 NOPX + 3828 0x00 0x00 NOPX + 3830 0x00 0x00 NOPX + 3832 0x00 0x00 NOPX + 3834 0x00 0x00 NOPX + 3836 0x00 0x00 NOPX + 3838 0x04 0x1c 0x37 0x18 ST.s16 r1, [p4], #2 + 3842 0x00 0x00 NOPX + 3844 0x00 0x00 NOPX + 3846 0x00 0x00 NOPX + 3848 0x00 0x00 NOPX + 3850 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 3852 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.noswbrkpt + 3854 0x04 0x08 0x57 0x18 ST.s16 r2, [p4], m0 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3858 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3860 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3862 0x00 0x06 0xe8 0x00 0x00 0x84 J #3536 +.delay_slot +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3868 0x10 0x0a 0x41 0x18 MOVX r5, #16 +.delay_slot +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3872 0x11 0x45 0xd1 0x98 SUB r2, r5, r29 +.delay_slot + 3876 0x19 0xa1 0x1c 0xf8 MOV r6, eh0 +.delay_slot + 3880 0x80 0x8e 0x30 0x00 0x01 0xa5 0x70 0x02 ST r3, [p4]; NOPM +.delay_slot + 3888 0x00 0x2c 0xf0 0x00 0x24 0x16 0x11 0xbd 0xe3 0x7c 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p4, #4]; MUL r30, r30, r6; NOPM; NOPV +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_1328 + 3904 0x83 0x92 0xe0 0x00 0x42 0x08 0x07 0xec 0x58 0xba ST.s16 r4, [p4], #2; MOVX r4, #16; MOV m0, #-20 + 3914 0xff 0x86 0x20 0x06 0x2d 0x70 0x48 0x08 0x58 0xba LDA r1, [sp, #-4]; MSC r2, r2, r3, r26; MOV r2, #8 + 3924 0x00 0xc8 0x2d 0x20 0x11 0x64 MOVX r3, #16; MOV r26, #4 + 3930 0x00 0x00 NOPX + 3932 0x00 0x00 NOPX + 3934 0x00 0x00 NOPX + 3936 0x00 0x00 NOPX + 3938 0x04 0x1f 0x77 0x18 ST.s16 r27, [p4], #2 + 3942 0x00 0x00 NOPX + 3944 0x00 0x00 NOPX + 3946 0x00 0x00 NOPX + 3948 0x00 0x00 NOPX + 3950 0x00 0x00 NOPX + 3952 0x00 0x00 NOPX + 3954 0x04 0x1c 0xb7 0x18 ST.s16 r5, [p4], #2 + 3958 0x00 0x00 NOPX + 3960 0x00 0x00 NOPX + 3962 0x00 0x00 NOPX + 3964 0x00 0x00 NOPX + 3966 0x00 0x00 NOPX + 3968 0x00 0x00 NOPX + 3970 0x04 0x1f 0x77 0x18 ST.s16 r27, [p4], #2 + 3974 0x00 0x00 NOPX + 3976 0x00 0x00 NOPX + 3978 0x00 0x00 NOPX + 3980 0x00 0x00 NOPX + 3982 0x00 0x00 NOPX + 3984 0x00 0x00 NOPX + 3986 0x04 0x1e 0xd7 0x18 ST.s16 r22, [p4], #2 + 3990 0x00 0x00 NOPX + 3992 0x00 0x00 NOPX + 3994 0x00 0x00 NOPX + 3996 0x00 0x00 NOPX + 3998 0x00 0x00 NOPX +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 4000 0x00 0x00 NOPX +.aggressive_scheduled_block_id 6 +.noswbrkpt + 4002 0x04 0x08 0x77 0x18 ST.s16 r3, [p4], m0 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 4006 0x00 0x00 NOPX +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 4008 0x00 0x00 NOPX +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 4010 0x00 0x06 0xe8 0x00 0x00 0x84 J #3536 +.delay_slot +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4016 0x10 0x46 0x26 0x18 MAC r3, r3, r1, r2 +.delay_slot +.swstall delay_slot + 4020 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4022 0x00 0x00 NOPX +.delay_slot + 4024 0x0c 0x04 0xd1 0x98 ST r6, [p4] +.delay_slot + 4028 0x0c 0x16 0x51 0x98 ST r18, [p4, #4] +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_1456 + 4032 0x04 0x1e 0xb7 0x18 ST.s16 r21, [p4], #2 + 4036 0x00 0x00 NOPX + 4038 0x00 0x00 NOPX + 4040 0x00 0x00 NOPX + 4042 0x00 0x00 NOPX + 4044 0x00 0x00 NOPX + 4046 0x00 0x00 NOPX + 4048 0x04 0x1c 0xf7 0x18 ST.s16 r7, [p4], #2 + 4052 0x00 0x00 NOPX + 4054 0x00 0x00 NOPX + 4056 0x00 0x00 NOPX + 4058 0x00 0x00 NOPX + 4060 0x00 0x00 NOPX + 4062 0x00 0x00 NOPX + 4064 0x04 0x1e 0xf7 0x18 ST.s16 r23, [p4], #2 + 4068 0x00 0x00 NOPX + 4070 0x00 0x00 NOPX + 4072 0x00 0x00 NOPX + 4074 0x07 0xfc 0x71 0x18 LDA r3, [sp, #-4] + 4078 0x00 0x00 NOPX +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 4080 0x00 0x00 NOPX +.aggressive_scheduled_block_id 7 +.noswbrkpt + 4082 0x04 0x1c 0x37 0x18 ST.s16 r1, [p4], #2 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 4086 0x00 0x00 NOPX +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 4088 0x00 0x00 0xf0 0xbf 0xc0 0x44 MOVXM r1, #65504 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 4094 0x10 0x45 0xa0 0x98 ADD r2, r1, r26 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4098 0x18 0x44 0xc0 0xa0 0x41 0x64 MAC r1, r1, r3, r2; MOV r1, #16 + 4104 0x00 0x00 NOPX + 4106 0x00 0x00 NOPX + 4108 0x04 0x1e 0xd7 0x18 ST.s16 r22, [p4], #2 + 4112 0x00 0x00 NOPX + 4114 0x00 0x00 NOPX + 4116 0x00 0x00 NOPX + 4118 0x00 0x00 NOPX + 4120 0x00 0x00 NOPX +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 4122 0x18 0x0f 0xd8 0xb8 MOV m0, #-20 +.aggressive_scheduled_block_id 8 +.noswbrkpt + 4126 0x04 0x08 0xb7 0x18 ST.s16 r5, [p4], m0 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 4130 0x00 0x00 NOPX +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 4132 0x00 0x00 NOPX +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 4134 0x00 0x06 0xe8 0x00 0x00 0x84 J #3536 +.delay_slot +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4140 0xb1 0x49 0xc2 0xa0 0x41 0x64 MSC r5, r5, r22, r4; MOV r5, #16 +.delay_slot +.swstall delay_slot + 4146 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4148 0x00 0x00 NOPX +.delay_slot + 4150 0x0c 0x06 0x91 0x98 ST r20, [p4] +.delay_slot + 4154 0x82 0xc6 0x30 0x01 0xa0 0x8b 0xd0 0x8e 0x79 0x3a ST r17, [p4, #4]; MOVX r26, #4; MOV r30, eh0 +.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv__end +.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv___func_end0 + +.text_segment PM 4176 +.label __Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t___func_begin0 +.label _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t +.function_start + 4176 0x42 0x82 0xd0 0x3e 0x47 0xc8 0x87 0xe8 0x58 0xba LDA r0, [p2, #4]; MOVX r4, #-2; MOV m1, #-24 + 4186 0x45 0x86 0xd0 0x3e 0x27 0xaa 0x08 0x06 0x58 0xba LDA r1, [p2], #8; MOVX r2, #-3; MOV r16, #6 + 4196 0x4f 0x96 0xd0 0x01 0x80 0x08 0x68 0x60 0x78 0xba LDA r5, [p2], #28; MOVX r24, #0; MOV r3, p0 + 4206 0x02 0x2a 0x52 0x98 LDA.s16 r18, [p2], m1 + 4210 0x02 0x1c 0xd6 0x98 LDA r6, [p2], #4 + 4214 0x02 0x2c 0xf6 0x98 LDA r7, [p2], #8 + 4218 0x02 0x06 0x36 0x98 LDA r17, [p2] + 4222 0x10 0x26 0x4e 0x98 ASHL r19, r0, r4 + 4226 0x02 0x24 0x96 0x98 LDA r4, [p2, #8] + 4230 0x11 0x68 0x2e 0x98 ASHL r20, r5, r2 + 4234 0x18 0x49 0x72 0xf8 VBCST.16 x0, r18 + 4238 0x00 0x00 NOPX + 4240 0x14 0xe5 0x4f 0x98 MUL r18, r19, r20 + 4244 0x10 0x67 0x11 0x98 SUB r19, r1, r17 + 4248 0x14 0xe7 0x2f 0x98 MUL r19, r19, r18 + 4252 0x14 0x63 0x2f 0x98 MUL r17, r17, r18 + 4256 0x14 0xe1 0x0d 0x98 LSHL r16, r19, r16 + 4260 0xc4 0x23 0x34 0xc3 0x82 0xa4 GE r16, r24, r17; ADD.NC p2, r3, r16 + 4266 0x80 0x08 0xa0 0x40 0x01 0x84 JNZ r16, #4416 +.delay_slot + 4272 0x18 0x00 0x92 0xf8 VMOV bmll0, x0 +.delay_slot +.swstall delay_slot + 4276 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4278 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4280 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4282 0x00 0x00 NOPX + 4284 0x00 0x00 0x11 0xe2 0x60 0x44 MOVXM ls, #4400 + 4290 0x00 0x00 0x16 0xe2 0x60 0x44 MOVXM le, #4400 + 4296 0x00 0x2b 0x60 0x02 0xbc 0x50 0x70 0x02 NOPS; MOV lc, r17 + 4304 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 4320 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 4336 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 4352 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 4368 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 4384 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLS_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_224 +.loop_nesting 1 +.begin_of_loop +.end_of_loop + 4400 0x00 0x2c 0xf0 0x00 0x22 0x1c 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmll0, [p2], #64; NOPX; NOPM; NOPV +.label TGT_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_240 +.loop_nesting 0 + 4416 0x00 0x86 0x00 0x0b 0x00 0xfe 0x29 0xcc 0xa8 0xba MOVA r6, #4; MUL r16, r5, r1; ADD.NC r17, r7, r6 + 4426 0x04 0x62 0x32 0x87 0xff 0x24 SUB r17, r0, r17; ADD.NC dn1, r7, #-1 + 4432 0x14 0x62 0x6d 0x98 LSHL r17, r17, r6 + 4436 0x04 0x4e 0x32 0x11 0x10 0x24 SUB r17, r0, r7; ADD.NC m1, r17, #16 + 4442 0x11 0xe1 0x0f 0x98 MUL r16, r7, r16 + 4446 0x14 0x4c 0x6d 0x98 LSHL r6, r17, r6 + 4450 0x81 0x85 0xd4 0xc3 0x32 0xa4 ASHL r6, r16, r2; ADD.NC p2, r3, r6 + 4456 0x16 0x0e 0x69 0x98 GE r7, r24, r6 + 4460 0x38 0x09 0x08 0x40 0x01 0x84 JNZ r7, #4624 +.delay_slot +.swstall delay_slot + 4466 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4468 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4470 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4472 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4474 0x00 0x00 NOPX + 4476 0x00 0x07 0x80 0x00 0x00 0x04 0x79 0x00 0x10 0xba MOVA dc1, #0; MOVXM ls, #4608 + 4486 0x02 0x06 0x80 0x00 0x00 0x05 0xb9 0x00 0x10 0xba MOVA dj1, #16; MOVXM le, #4608 + 4496 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x02 0xb9 0x90 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; MOV lc, r6; NOPV + 4512 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 4528 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 4544 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 4560 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 4576 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 4592 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLS_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_432 +.loop_nesting 1 +.begin_of_loop +.end_of_loop + 4608 0x00 0x2c 0xf0 0x00 0x22 0x30 0x2e 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST.2D.128 wl0, [p2], d1; NOPX; NOPM; NOPV +.label TGT_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_448 +.loop_nesting 0 + 4624 0x7f 0xff 0xf3 0xbf 0xf0 0x44 MOVXM r7, #2147483640 + 4630 0x11 0xce 0x44 0x98 AND r7, r7, r4 + 4634 0x11 0x4e 0x71 0x98 SUB r7, r5, r7 + 4638 0x11 0xce 0x0f 0x98 MUL r7, r7, r0 + 4642 0x11 0x04 0x2e 0x98 ASHL r2, r4, r2 + 4646 0x11 0x48 0x41 0x98 SUB r4, r5, r4 + 4650 0x10 0x84 0x0f 0x98 MUL r2, r2, r0 + 4654 0x11 0x00 0x0f 0x98 MUL r0, r4, r0 + 4658 0x08 0x45 0xf3 0x20 0x05 0x64 MUL r1, r1, r2; MOV r6, #1 + 4664 0x10 0x00 0x6d 0x98 LSHL r0, r0, r6 + 4668 0xc0 0x03 0x34 0xc3 0x02 0xa4 GE r0, r24, r1; ADD.NC p2, r3, r0 + 4674 0x00 0x09 0x70 0x40 0x01 0x84 JNZ r0, #4832 +.delay_slot +.swstall delay_slot + 4680 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4682 0x00 0x00 NOPX +.delay_slot + 4684 0x11 0xc8 0x6d 0x98 LSHL r4, r7, r6 +.delay_slot + 4688 0x18 0x02 0x08 0x18 ADD.NC m0, r4, #16 +.delay_slot + 4692 0x18 0x41 0x7f 0x98 ADD.NC dn0, r2, #-1 + 4696 0x00 0x03 0x80 0x00 0x00 0x04 0x79 0x68 0x10 0xba MOVA dc0, #0; MOVXM ls, #4816 + 4706 0x02 0x02 0x80 0x00 0x00 0x05 0xb9 0x68 0x10 0xba MOVA dj0, #16; MOVXM le, #4816 + 4716 0x1d 0x70 0xa0 0xf8 MOV lc, r1 + 4720 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 4736 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 4752 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 4768 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 4784 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 4800 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLS_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_640 +.loop_nesting 1 +.begin_of_loop +.end_of_loop + 4816 0x00 0x2c 0xf0 0x00 0x22 0x10 0x2e 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST.2D.128 wl0, [p2], d0; NOPX; NOPM; NOPV +.label TGT_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_656 +.loop_nesting 0 + 4832 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot + 4836 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4838 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4840 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4842 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4844 0x00 0x00 NOPX +.label _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t__end +.label __Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t___func_end0 + +.text_segment PM 4848 +.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E___func_begin0 +.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E +.function_start + 4848 0x18 0xd4 0xc0 0xf8 MOV r3, p2 + 4852 0x6b 0x91 0x60 0x03 0xb0 0xcb 0x00 0x02 MOVS dn3, p7; ADD.NC p7, r3, #44 + 4860 0x07 0x8c 0x1a 0x98 LDA.u16 r0, [p7], #-16 + 4864 0x00 0x00 NOPX + 4866 0x00 0x00 NOPX + 4868 0x00 0x00 NOPX + 4870 0x00 0x00 NOPX + 4872 0x00 0x00 NOPX + 4874 0x00 0x00 NOPX + 4876 0x00 0x09 0xf0 0x40 0x01 0x84 JNZ r0, #5088 +.delay_slot + 4882 0x10 0x20 0x01 0x18 MOVX r16, #0 +.delay_slot + 4886 0x18 0xc2 0x72 0xf8 VBCST.32 x1, r16 +.delay_slot +.swstall delay_slot + 4890 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4892 0x00 0x00 NOPX +.delay_slot + 4894 0x00 0x20 0x00 0x00 0x01 0xc4 PADDXM [sp], #256 + 4900 0x1a 0x80 0x48 0xb8 MOV dj2, #36 + 4904 0x02 0x40 0x36 0x98 LDA r1, [p2, dj2] + 4908 0x00 0x00 NOPX + 4910 0x00 0x00 NOPX + 4912 0x00 0x00 NOPX + 4914 0x00 0x00 NOPX + 4916 0x00 0x00 NOPX + 4918 0x00 0x00 NOPX + 4920 0x14 0x04 0x19 0x98 GE r2, r16, r1 + 4924 0x10 0x09 0xf0 0x40 0x01 0x84 JNZ r2, #5088 +.delay_slot + 4930 0x1a 0x02 0x92 0xf8 VMOV bmll2, x1 +.delay_slot +.swstall delay_slot + 4934 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4936 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4938 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4940 0x00 0x00 NOPX + 4942 0x00 0x2c 0xf3 0x84 0x8b 0x00 0x00 0x04 0x79 0xe8 0x10 0x76 NOPA; MOVS p3, p1; MOVXM ls, #5072 + 4954 0x00 0x00 0x16 0xe7 0xa0 0x44 MOVXM le, #5072 + 4960 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x02 0xb8 0x50 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; MOV lc, r1; NOPV + 4976 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 4992 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 5008 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 5024 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 5040 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 5056 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_224 +.loop_nesting 1 +.begin_of_loop +.end_of_loop + 5072 0x00 0x2c 0xf0 0x00 0x23 0x1d 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmll2, [p3], #64; NOPX; NOPM; NOPV +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_240 +.loop_nesting 0 + 5088 0x1c 0x00 0x40 0xb8 MOV m4, #32 + 5092 0x07 0x8a 0x3a 0x98 LDA.u16 r17, [p7], m4 + 5096 0xff 0xda 0x5a 0x1f 0x19 0x54 LDA.s16 r22, [p7], #-2; MOV m5, #-58 + 5102 0xf5 0x6b 0x51 0x00 0xb9 0x54 LDA.u16 r26, [p7], m5; MOV dj0, #46 + 5108 0xe0 0x52 0x59 0xbd 0x81 0xd4 LDA.s16 r20, [p7, dj0]; MOV r19, p7 + 5114 0xe0 0x4e 0x56 0xd3 0x38 0x14 LDA.s16 r19, [p7, dj0]; ADD.NC p3, r19, #56 + 5120 0x03 0xde 0xb2 0x98 LDA.s16 r21, [p3], #-6 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5124 0x03 0xff 0x9a 0x98 LDA.u16 r28, [p3], #-2 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5128 0x67 0xc6 0x50 0x1c 0x12 0x2c LDA.s16 r17, [p3], #6; MOVX r7, #2 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5134 0x7e 0xca 0x50 0x3f 0x27 0xca 0x60 0x00 0x58 0xba LDA.s16 r18, [p3, #-2]; MOVX r18, #-2; MOV dc4, #0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5144 0xe0 0x1e 0x52 0x10 0x4b 0x23 0x29 0x6c 0xc8 0x01 0x58 0x76 LDA.s16 r7, [p7, dj0]; MOVS dc2, dc4; LSHL r18, r17, r18; MOV r6, #1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5156 0x60 0xef 0x52 0x5a 0x0b 0x2c 0x73 0xec 0x48 0x3c 0x58 0x76 LDA.u16 r27, [p3]; MOVS dn2, r26; LSHL r7, r22, r7; MOV r2, #60 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5168 0x08 0x0a 0x83 0x84 0x8b 0x29 0x43 0x6d 0x01 0xd0 0x78 0x76 MOVA dj2, #64; MOVS p3, p1; LSHL r20, r20, r6; MOV m2, r7 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5180 0x6a 0x12 0xb0 0x27 0x33 0x6e 0x85 0x10 0x78 0xba VLDA.2D bmll1, [p3], d2; LSHL r19, r19, r6; MOV m5, r20 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5190 0x15 0x41 0x30 0x2b 0x33 0x6f 0x04 0xd0 0x78 0xba VLDA.CONV.fp32.bf16 bmll4, [p0], m5; LSHL r19, r21, r6; MOV m6, r19 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5200 0x8c 0x4d 0xba 0xf2 0xfe 0x24 LSHL r17, r17, r6; ADD.NC lc, r18, #-2 + 5206 0x94 0x4d 0xb1 0x11 0x41 0xe4 LSHL r17, r18, r6; MOV dj0, r17 + 5212 0x19 0x01 0x30 0x10 0x4b 0x0e 0x63 0x6c 0x04 0xd0 0x78 0x76 VLDA.CONV.fp32.bf16 bmll0, [p0], m6; MOVS dc0, dc4; LSHL r6, r7, r6; MOV m0, r19 + 5224 0x0b 0x81 0x67 0x03 0x20 0xe4 0x14 0x30 0x3d 0x4a MOVS dn0, r28; MOV m7, r6; VADD.f dm4, dm1, dm4, r2 + 5234 0x1d 0x21 0x34 0x5b 0x0b 0x02 0x44 0x50 0x72 0xba VLDA.CONV.fp32.bf16 bmll2, [p0], m7; MOVS dn4, r27; MOV dj4, r17 + 5244 0x03 0x31 0x33 0x93 0x01 0xd4 VLDA.3D.CONV.fp32.bf16 bmll3, [p0], d0; MOV dc1, dc4 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 5250 0x15 0x41 0x30 0x04 0x11 0x80 0x3d 0x62 VLDA.CONV.fp32.bf16 bmll4, [p0], m5; VADD.f dm1, dm4, dm0, r2 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 5258 0x03 0x50 0x95 0x98 VLDA.2D bmll1, [p3], d2 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5262 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5264 0x00 0x00 0x00 0x8f 0x4c 0x02 0x10 0x28 0x3d 0x5a MOVXM ls, #5312; VADD.f dm0, dm1, dm2, r2 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 5274 0x19 0x01 0x37 0x10 0x01 0xd4 VLDA.CONV.fp32.bf16 bmll0, [p0], m6; MOV dj3, m4 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5280 0x08 0x06 0x80 0x00 0x24 0x84 0x8b 0x00 0x44 0x08 0x82 0x00 0x78 0xa1 0x81 0xeb MOVA dj1, #64; NOPB; MOVS p4, p1; MOVX r4, #32; MOV m1, m2; VADD.f dm4, dm1, dm4, r2 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5296 0x1d 0x21 0x30 0x00 0x21 0x5a 0x0b 0x00 0x00 0x05 0xba 0x90 0x10 0x90 0x61 0xeb VLDA.CONV.fp32.bf16 bmll2, [p0], m7;NOPB; MOVS dn1, r26; MOVXM le, #5408; VADD.f dm2, dm0, dm3, r2 +.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_464 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5312 0x00 0x19 0x89 0x98 VLDA.3D.CONV.fp32.bf16 bmll3, [p0], d0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5316 0x6a 0x12 0xb0 0x00 0x20 0x00 0xad 0x8e 0x11 0x80 0x3d 0x66 VLDA.2D bmll1, [p3], d2; NOPB; NOPS; VADD.f dm1, dm4, dm0, r2 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5328 0x15 0x41 0x30 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.CONV.fp32.bf16 bmll4, [p0], m5;NOPB; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5344 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5360 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x81 0x41 0xeb NOPA; NOPB; NOPS; NOPX; NOPM; VADD.f dm0, dm1, dm2, r2 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5376 0x19 0x01 0x30 0x00 0x24 0x31 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.CONV.fp32.bf16 bmll0, [p0], m6;NOPB; VST.2D bmll2, [p4], d1; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5392 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0xa1 0x81 0xeb NOPA; NOPB; NOPS; NOPX; NOPM; VADD.f dm4, dm1, dm4, r2 +.label ZLE_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_560 +.end_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5408 0x1d 0x21 0x30 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x90 0x61 0xeb VLDA.CONV.fp32.bf16 bmll2, [p0], m7;NOPB; NOPS; NOPX; NOPM; VADD.f dm2, dm0, dm3, r2 +.loop_nesting 0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5424 0xec 0x07 0x50 0x00 0x00 0x0c 0xaf 0xc0 0x10 0xba LDA.u16 r1, [p7, dj3]; MOVXM r5, #16256 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5434 0x03 0x31 0x32 0x15 0x72 0xe2 0x11 0x80 0x3d 0x4a VLDA.3D.CONV.fp32.bf16 bmll3, [p0], d0; VBCST.16 x4, r5; VADD.f dm1, dm4, dm0, r2 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5444 0x18 0x41 0x72 0xf8 VBCST.16 x0, r16 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5448 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5450 0x10 0x28 0x3d 0x48 VADD.f dm0, dm1, dm2, r2 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5454 0x0c 0x31 0x06 0x98 VST.2D bmll2, [p4], d1 + 5458 0x00 0x00 NOPX + 5460 0x00 0x02 0x5f 0xf9 0x12 0x0c 0x3d 0x62 ADD r5, r1, #-1; VADD.f dm2, dm0, dm3, r2 + 5468 0x11 0x40 0x08 0x98 NE r0, r5, r0 + 5472 0x00 0x0c 0x70 0x40 0x01 0x84 JNZ r0, #6368 +.delay_slot +.swstall delay_slot + 5478 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5480 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5482 0x00 0x00 NOPX +.delay_slot + 5484 0x0c 0x31 0x06 0x98 VST.2D bmll2, [p4], d1 +.delay_slot +.swstall delay_slot + 5488 0x00 0x00 NOPX + 5490 0x46 0x9a 0xd0 0x14 0x1a 0x2c LDA r6, [p2, #12]; MOVX r5, #3 + 5496 0x00 0x00 NOPX + 5498 0x00 0x00 NOPX + 5500 0x00 0x00 NOPX + 5502 0x00 0x00 NOPX + 5504 0x00 0x00 NOPX + 5506 0x00 0x00 NOPX + 5508 0x11 0x4e 0x69 0x98 GE r7, r5, r6 + 5512 0x38 0x0e 0x40 0x40 0x01 0x84 JNZ r7, #7296 +.delay_slot + 5518 0x10 0x00 0x11 0x18 MOVX r0, #4 +.delay_slot +.swstall delay_slot + 5522 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5524 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5526 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5528 0x00 0x00 NOPX + 5530 0x11 0x8a 0x08 0x98 NE r5, r6, r0 + 5534 0x28 0x0c 0xb8 0x40 0x01 0x84 JNZ r5, #6512 +.delay_slot +.swstall delay_slot + 5540 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5542 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5544 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5546 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5548 0x00 0x00 NOPX + 5550 0x24 0x40 0xa9 0x83 0xc1 0xe4 MOVX r17, #257; MOV dc4, lr + 5556 0x00 0x00 0xfa 0xbf 0xfe 0x44 MOVXM r21, #65535 + 5562 0x00 0x2c 0xf0 0x50 0x02 0x2c NOPA; MOVX r20, #0 +.label __ll91__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E + 5568 0x08 0x0a 0x82 0x83 0x0b 0x00 0x52 0x08 0x48 0x3c 0x58 0x76 MOVA dj2, #64; MOVS p2, r3; MOVX r5, #16; MOV r2, #60 + 5580 0x48 0x1a 0x50 0x11 0x02 0x2c LDA.s16 r6, [p2, dj2]; MOVX r4, #32 + 5586 0x00 0x00 NOPX + 5588 0x00 0x00 NOPX + 5590 0x00 0x00 NOPX + 5592 0x00 0x00 NOPX + 5594 0x00 0x00 NOPX + 5596 0x00 0x01 0x67 0x98 NOPA + 5600 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x0c 0x52 0xf4 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; ASHL r5, r6, r5; NOPM; NOPV +.label __ll93__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E + 5616 0x04 0x8a 0x87 0xfd 0xa5 0x80 0x01 0xf3 0xb2 0x78 0x10 0x76 MOVA dj2, #36; ST dn3, [sp, #-4]; MOVXM p7, #509168 + 5628 0xe0 0xdc 0x57 0xfa 0x65 0x80 0x50 0x08 0x8b 0x39 0x78 0x76 LDA.s8 r23, [p7]; ST dc4, [sp, #-8]; MOVX r5, #0; VBCST.32 x2, r5 + 5640 0x48 0x1a 0xd7 0x84 0x8b 0x3f 0x67 0xe8 0x02 0x49 0x78 0x76 LDA r6, [p2, dj2]; MOVS p7, p1; MOVX r22, #-1; VMOV bmll0, x2 + 5652 0x03 0xf8 0x00 0x02 0xd2 0x01 0x02 0x49 0x78 0xba MOVA r24, #31; MOVX vaddSign0, #1; VMOV bmll2, x2 + 5662 0x02 0x19 0x00 0x00 0x00 0x04 0x7b 0x40 0x10 0xba MOVA r25, #16; MOVXM ls, #5760 + 5672 0xff 0x94 0xb0 0x00 0x00 0x05 0xbc 0x60 0x10 0xba VLDA wl2, [sp, #-32]; MOVXM le, #6336 + 5682 0x10 0x74 0x01 0x18 MOVX r26, #64 + 5686 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 5688 0x15 0xfa 0x80 0x18 MOVX crRnd, r23 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 5692 0x08 0x02 0xc0 0x02 0xb9 0x80 0x00 0x02 VCONV.bf16.fp32 wl0, bmll0; ADD.NC lc, r6, #0 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5700 0x19 0xa0 0x92 0xf8 VMOV x3, x0 + 5704 0x02 0xa6 0x92 0xe6 0x10 0x40 0x83 0x62 VMOV x5, x3; VMSC.f dm0, dm2, x0, x4, r2 + 5712 0x1b 0x2a 0x92 0xf8 VMOV x6, x5 + 5716 0x00 0x00 NOPX + 5718 0x00 0x00 NOPX + 5720 0x00 0x00 NOPX + 5722 0x00 0x00 NOPX + 5724 0x09 0xc0 0x16 0x18 VCONV.bf16.fp32 wl3, bmll0 + 5728 0x00 0x00 NOPX + 5730 0x10 0x06 0x83 0x48 VMSC.f dm0, dm0, x3, x4, r2 + 5734 0x00 0x00 NOPX + 5736 0x00 0x00 NOPX + 5738 0x00 0x00 NOPX + 5740 0x00 0x00 NOPX + 5742 0x00 0x00 NOPX + 5744 0x00 0x2c 0xf0 0x00 0x22 0xc0 0x16 0x00 0x71 0x08 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VCONV.bf16.fp32 wl5, bmll0; MOVX r7, #8; NOPM; NOPV +.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_912 +.loop_nesting 1 +.begin_of_loop + 5760 0x23 0xbe 0x89 0xa5 0x25 0xf4 VLDB x7, [p1], #64; VMOV bmhh4, x9 + 5766 0x1b 0xd6 0x92 0xf8 VMOV bmhh3, x11 + 5770 0x1f 0x1e 0xc0 0xf8 MOV r28, p7 + 5774 0x17 0x3b 0x84 0x98 AND r29, r28, r24 + 5778 0xee 0xc9 0x5e 0x3d 0xe0 0x24 LT r27, r29, r4; ADD.NC r28, r29, #-32 + 5784 0x15 0xbd 0xdd 0x98 LSHL r30, r22, r29 + 5788 0x16 0xbf 0xd1 0x98 SUB r31, r26, r29 + 5792 0x2f 0xbc 0x48 0x70 0xcd 0xa4 SEL.EQZ r30, r5, r30, r27; VSHIFT x8, x7, x0, r25 + 5798 0x1c 0x10 0x92 0xf8 VMOV bmll4, x8 + 5802 0x1c 0x4e 0x22 0xf8 VMOV wl8, wh7 + 5806 0x1d 0x4f 0x22 0xf8 VMOV wl10, wl7 + 5810 0x1c 0x90 0x92 0xf8 VMOV bmhl4, x8 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 5814 0x1b 0x94 0x92 0xf8 VMOV bmhl3, x10 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 5818 0x02 0x12 0x8a 0xe6 0x13 0x28 0x3d 0x62 VMOV cml2, cmh4; VADD.f dm3, dm1, dm2, r2 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5826 0x19 0x0e 0x8a 0xf8 VMOV cml1, cmh3 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 5830 0x04 0x4e 0x22 0xe6 0x12 0x50 0x3d 0x62 VMOV wl8, wh7; VADD.f dm2, dm2, dm4, r2 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5838 0x1a 0x0e 0x92 0xf8 VMOV bmll2, x7 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 5842 0x1c 0xc0 0x66 0xd8 VSHIFT x9, x8, x0, r25 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 5846 0x01 0x10 0x92 0xe6 0x14 0x30 0x3d 0x62 VMOV bmll1, x8; VADD.f dm4, dm1, dm4, r2 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5854 0x1c 0x12 0x92 0xf8 VMOV bmll4, x9 + 5858 0x1c 0x2c 0x12 0xf8 VMOV x8, bmll3 + 5862 0x1c 0xd1 0x22 0xf8 VMOV wl9, wl8 +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 5866 0x1c 0x48 0x66 0xd8 VSHIFT x8, x9, x0, r25 +.aggressive_scheduled_block_id 6 +.noswbrkpt + 5870 0x01 0x10 0x92 0xe6 0x11 0x64 0x3d 0x62 VMOV bmll1, x8; VADD.f dm1, dm3, dm1, r2 +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5878 0x1b 0x12 0x92 0xf8 VMOV bmll3, x9 + 5882 0x1c 0x28 0x12 0xf8 VMOV x8, bmll2 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 5886 0x1d 0x40 0x1e 0xd8 VSHIFT x10, x8, x0, r7 +.aggressive_scheduled_block_id 7 +.noswbrkpt + 5890 0x04 0x30 0x12 0xe6 0x12 0x4c 0x3d 0x62 VMOV x8, bmll4; VADD.f dm2, dm2, dm3, r2 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 5898 0x1b 0x14 0x92 0xf8 VMOV bmll3, x10 +.aggressive_scheduled_block_id 7 +.noswbrkpt + 5902 0x04 0x40 0x1e 0xc6 0x13 0x8c 0x3d 0x62 VSHIFT x8, x8, x0, r7; VADD.f dm3, dm4, dm3, r2 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5910 0x1b 0x10 0x92 0xf8 VMOV bmll3, x8 +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 5914 0x1c 0x24 0x12 0xf8 VMOV x8, bmll1 +.aggressive_scheduled_block_id 8 +.noswbrkpt + 5918 0x04 0x40 0x1e 0xc6 0x11 0x30 0x3d 0x62 VSHIFT x8, x8, x0, r7; VADD.f dm1, dm1, dm4, r2 +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5926 0x1c 0x10 0x92 0xf8 VMOV bmll4, x8 +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id first + 5930 0x1c 0x28 0x12 0xf8 VMOV x8, bmll2 +.aggressive_scheduled_block_id 9 +.noswbrkpt + 5934 0x04 0x40 0x02 0xc6 0x12 0x50 0x3d 0x62 VSHIFT x8, x8, x0, r0; VADD.f dm2, dm2, dm4, r2 +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5942 0x1c 0x10 0x92 0xf8 VMOV bmll4, x8 + 5946 0x1c 0x2c 0x12 0xf8 VMOV x8, bmll3 +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id first + 5950 0x1c 0x40 0x02 0xd8 VSHIFT x8, x8, x0, r0 +.aggressive_scheduled_block_id 10 +.noswbrkpt + 5954 0x05 0x24 0x12 0xe6 0x13 0x70 0x3d 0x62 VMOV x10, bmll1; VADD.f dm3, dm3, dm4, r2 +.aggressive_scheduled_block_id 10 +.nohwbrkpt +.noswbrkpt + 5962 0x1c 0x10 0x92 0xf8 VMOV bmll4, x8 +.aggressive_scheduled_block_id 10 +.noswbrkpt + 5966 0x05 0x50 0x02 0xc6 0x10 0x30 0x3d 0x62 VSHIFT x10, x10, x0, r0; VADD.f dm0, dm1, dm4, r2 +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5974 0x1c 0x14 0x92 0xf8 VMOV bmll4, x10 + 5978 0x1c 0x28 0x12 0xf8 VMOV x8, bmll2 + 5982 0x1d 0xe2 0x01 0xb8 VEXTRACT.32 r23, x8, #0, vaddSign0 +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id first + 5986 0x1d 0x2c 0x12 0xf8 VMOV x10, bmll3 +.aggressive_scheduled_block_id 11 +.noswbrkpt + 5990 0xe2 0xd0 0x83 0x54 0x03 0x74 VLDB wh10, [p7, #32]; VEXTRACT.32 r6, x10, #0, vaddSign0 +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 5996 0x1d 0xa0 0x12 0xf8 VMOV x11, bmll0 +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 6000 0xe0 0xd4 0x8a 0xb4 0x06 0xb4 VLDB wl10, [p7]; VEXTBCST.32 x10, x11, #0 +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 6006 0x1c 0xd4 0xa0 0x38 VSEL.32 x9, x10, x9, r20 +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 6010 0x1d 0x10 0xd1 0x78 VINSERT.32 x10, x2, #0, r6 +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 6014 0x1c 0x12 0xf1 0x78 VINSERT.32 x8, x2, #0, r23 +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 6018 0x1d 0xd3 0x22 0xf8 VMOV wl11, wl9 +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 6022 0x1d 0x93 0x22 0xf8 VMOV wh11, wl9 +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6026 0x1c 0x15 0x22 0xf8 VMOV wh8, wl10 + 6030 0x1c 0x5c 0x00 0x38 VSEL.32 x8, x11, x8, r16 + 6034 0x1c 0x0c 0x08 0x38 VSEL.32 x8, x1, x8, r17 + 6038 0x1b 0xc3 0xa8 0x38 VSEL.32 x7, x8, x7, r21 + 6042 0x18 0x0e 0x92 0xf8 VMOV bmll0, x7 + 6046 0x1c 0xac 0x92 0xf8 VMOV x9, x6 + 6050 0x68 0x02 0xc0 0x01 0x07 0x49 0x70 0x02 VCONV.bf16.fp32 wl6, bmll0; VMOV bmll2, x7 + 6058 0x1c 0x32 0x92 0xf8 VMOV x8, x9 + 6062 0x05 0xbb 0xcd 0xed 0xea 0x0f 0x12 0x4c 0x83 0x5a LSHL r29, r22, r28; MOV r27, r29; VMSC.f dm2, dm2, x6, x4, r2 +.aggressive_scheduled_block_id 12 +.aggressive_scheduled_block_id first + 6072 0x00 0x0b 0x3e 0x91 0x11 0xec 0xa1 0x62 SEL.EQZ r19, r5, r29, r27; VMUL.f dm1, x6, x5, r2 +.aggressive_scheduled_block_id 12 +.noswbrkpt + 6080 0x05 0xa5 0xe2 0x33 0x09 0x2f 0x10 0xec 0x61 0x5a SEL.EQZ r18, r22, r30, r27; VMOV x6, x8; VMUL.f dm0, x6, x3, r2 +.aggressive_scheduled_block_id 12 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6090 0x13 0xec 0x01 0x48 VMUL.f dm3, x6, x0, r2 + 6094 0x00 0x00 NOPX + 6096 0x00 0x00 NOPX + 6098 0x0c 0xc1 0x16 0x18 VCONV.bf16.fp32 wl9, bmll2 + 6102 0x00 0x00 NOPX + 6104 0x12 0x52 0x83 0x48 VMSC.f dm2, dm2, x9, x4, r2 + 6108 0x00 0x00 NOPX + 6110 0x00 0x00 NOPX + 6112 0x00 0x00 NOPX + 6114 0x00 0x00 NOPX + 6116 0x00 0x00 NOPX + 6118 0x0c 0x41 0x16 0x18 VCONV.bf16.fp32 wl8, bmll2 + 6122 0x00 0x00 NOPX + 6124 0x14 0xf0 0xa1 0x48 VMUL.f dm4, x8, x5, r2 + 6128 0x12 0xf0 0x61 0x48 VMUL.f dm2, x8, x3, r2 + 6132 0x00 0x00 NOPX + 6134 0x00 0x00 NOPX +.aggressive_scheduled_block_id 13 +.aggressive_scheduled_block_id first + 6136 0x00 0x00 NOPX +.aggressive_scheduled_block_id 13 +.noswbrkpt + 6138 0x12 0xf2 0xa1 0x48 VMUL.f dm2, x9, x5, r2 +.aggressive_scheduled_block_id 13 +.nohwbrkpt +.noswbrkpt + 6142 0x19 0x70 0x12 0xf8 VMOV lfl0, bmll4 +.aggressive_scheduled_block_id 13 +.nohwbrkpt +.noswbrkpt + 6146 0x14 0x88 0x3d 0x48 VADD.f dm4, dm4, dm2, r2 +.aggressive_scheduled_block_id 13 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6150 0x1c 0x05 0x92 0xf8 VMOV bmll4, lfl0 + 6154 0x00 0x00 NOPX + 6156 0x00 0x00 NOPX +.aggressive_scheduled_block_id 14 +.aggressive_scheduled_block_id first + 6158 0x00 0x00 NOPX +.aggressive_scheduled_block_id 14 +.noswbrkpt + 6160 0x12 0xe1 0x01 0x48 VMUL.f dm2, x0, x8, r2 +.aggressive_scheduled_block_id 14 +.nohwbrkpt +.noswbrkpt + 6164 0x18 0x70 0x12 0xf8 VMOV lfh0, bmll4 +.aggressive_scheduled_block_id 14 +.nohwbrkpt +.noswbrkpt + 6168 0x14 0x88 0x3d 0x48 VADD.f dm4, dm4, dm2, r2 +.aggressive_scheduled_block_id 14 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6172 0x1c 0x01 0x92 0xf8 VMOV bmll4, lfh0 + 6176 0x00 0x00 NOPX + 6178 0x00 0x00 NOPX +.aggressive_scheduled_block_id 15 +.aggressive_scheduled_block_id first + 6180 0x00 0x00 NOPX +.aggressive_scheduled_block_id 15 +.noswbrkpt + 6182 0x11 0xf2 0x61 0x48 VMUL.f dm1, x9, x3, r2 +.aggressive_scheduled_block_id 15 +.nohwbrkpt +.noswbrkpt + 6186 0x19 0x70 0x12 0xf8 VMOV lfl0, bmll4 +.aggressive_scheduled_block_id 15 +.nohwbrkpt +.noswbrkpt + 6190 0x14 0x84 0x3d 0x48 VADD.f dm4, dm4, dm1, r2 +.aggressive_scheduled_block_id 15 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6194 0x1c 0x05 0x92 0xf8 VMOV bmll4, lfl0 + 6198 0x00 0x00 NOPX + 6200 0x00 0x00 NOPX +.aggressive_scheduled_block_id 16 +.aggressive_scheduled_block_id first + 6202 0x00 0x00 NOPX +.aggressive_scheduled_block_id 16 +.noswbrkpt + 6204 0x11 0xf2 0x01 0x48 VMUL.f dm1, x9, x0, r2 +.aggressive_scheduled_block_id 16 +.nohwbrkpt +.noswbrkpt + 6208 0x18 0x70 0x12 0xf8 VMOV lfh0, bmll4 +.aggressive_scheduled_block_id 16 +.nohwbrkpt +.noswbrkpt + 6212 0x14 0x84 0x3d 0x48 VADD.f dm4, dm4, dm1, r2 +.aggressive_scheduled_block_id 16 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6216 0x1c 0x01 0x92 0xf8 VMOV bmll4, lfh0 + 6220 0x00 0x00 NOPX + 6222 0x00 0x00 NOPX + 6224 0x00 0x00 NOPX + 6226 0x00 0x00 NOPX +.aggressive_scheduled_block_id 17 +.aggressive_scheduled_block_id first + 6228 0x1d 0x70 0x12 0xf8 VMOV lfl1, bmll4 +.aggressive_scheduled_block_id 17 +.noswbrkpt + 6232 0x12 0x88 0x3d 0x48 VADD.f dm2, dm4, dm2, r2 +.aggressive_scheduled_block_id 17 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6236 0x1c 0x15 0x92 0xf8 VMOV bmll4, lfl1 + 6240 0x00 0x00 NOPX + 6242 0x00 0x00 NOPX + 6244 0x00 0x00 NOPX + 6246 0x00 0x00 NOPX +.aggressive_scheduled_block_id 18 +.aggressive_scheduled_block_id first + 6248 0x1c 0x68 0x12 0xf8 VMOV lfh1, bmll2 +.aggressive_scheduled_block_id 18 +.noswbrkpt + 6252 0x12 0x44 0x3d 0x48 VADD.f dm2, dm2, dm1, r2 +.aggressive_scheduled_block_id 18 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6256 0x1a 0x11 0x92 0xf8 VMOV bmll2, lfh1 + 6260 0x00 0x00 NOPX + 6262 0x00 0x00 NOPX + 6264 0x00 0x00 NOPX + 6266 0x00 0x00 NOPX +.aggressive_scheduled_block_id 19 +.aggressive_scheduled_block_id first + 6268 0x1d 0x68 0x12 0xf8 VMOV lfl1, bmll2 +.aggressive_scheduled_block_id 19 +.noswbrkpt + 6272 0x10 0x20 0x3d 0x48 VADD.f dm0, dm1, dm0, r2 +.aggressive_scheduled_block_id 19 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6276 0x19 0x15 0x92 0xf8 VMOV bmll1, lfl1 + 6280 0x00 0x00 NOPX + 6282 0x00 0x00 NOPX + 6284 0x00 0x00 NOPX + 6286 0x00 0x00 NOPX +.aggressive_scheduled_block_id 20 +.aggressive_scheduled_block_id first + 6288 0x1c 0x60 0x12 0xf8 VMOV lfh1, bmll0 +.aggressive_scheduled_block_id 20 +.noswbrkpt + 6292 0x10 0x0c 0x3d 0x48 VADD.f dm0, dm0, dm3, r2 +.aggressive_scheduled_block_id 20 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6296 0x18 0x11 0x92 0xf8 VMOV bmll0, lfh1 + 6300 0x00 0x00 NOPX + 6302 0x00 0x00 NOPX + 6304 0x00 0x00 NOPX + 6306 0x00 0x00 NOPX + 6308 0x0d 0xc0 0x16 0x18 VCONV.bf16.fp32 wl11, bmll0 + 6312 0x00 0x00 NOPX + 6314 0x1d 0x85 0xfe 0xd8 VSHIFT x11, x0, x11, r31 + 6318 0x1d 0xd5 0xcc 0x38 VSEL.8 x11, x10, x11, r19:r18 + 6322 0x00 0x00 NOPX + 6324 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x03 0x8b 0x65 0x41 0x36 NOPA; NOPB; VST wh11, [p7, #32]; NOPX +.label ZLE_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1488 +.end_of_loop + 6336 0x00 0x2c 0xf0 0x00 0x27 0x8a 0xea 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST wl11, [p7], m4; NOPX; NOPM; NOPV +.loop_nesting 0 + 6352 0x00 0x0c 0x78 0x00 0x00 0x84 J #6384 +.delay_slot +.swstall delay_slot + 6358 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6360 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6362 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6364 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6366 0x00 0x00 NOPX +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1520 + 6368 0xff 0xb4 0xb0 0xb4 0x80 0x5c ST dn3, [sp, #-4]; MOVX vaddSign0, #1 + 6374 0x00 0x2c 0xf7 0xf8 0x3d 0x80 0x00 0x00 0x00 0x7a NOPA; ST lr, [sp, #-8]; NOPX +.label __ll133__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E + 6384 0x1f 0x61 0x91 0x18 ADD.NC p7, r3, #34 + 6388 0xe0 0x8f 0x5b 0x64 0xc1 0xd4 LDA.u16 r3, [p7]; MOV crMCDEn, vaddSign0 +.aggressive_scheduled_block_id 21 +.aggressive_scheduled_block_id first + 6394 0x18 0x7b 0x60 0xf8 MOV crSCDEn, crMCDEn +.aggressive_scheduled_block_id 21 +.noswbrkpt + 6398 0x07 0x04 0x77 0x18 ST.s16 r3, [p7] +.aggressive_scheduled_block_id 21 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6402 0x00 0x13 0xf8 0x00 0x01 0x04 JL #10224 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 21 +.nohwbrkpt +.noswbrkpt + 6408 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 21 +.nohwbrkpt +.noswbrkpt + 6410 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 21 +.nohwbrkpt +.noswbrkpt + 6412 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 21 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6414 0x10 0xc6 0x07 0x18 ADD r3, r3, #1 +.delay_slot + 6418 0x00 0x2c 0xf0 0x00 0x20 0xc0 0xb0 0x00 0x34 0xaf 0x00 0x2b 0x60 0x7e NOPA; NOPB; NOPS; EXTEND.u16 r0, r3; NOPM +.return_address + 6432 0x07 0xf8 0x39 0x18 LDA lr, [sp, #-8] + 6436 0x07 0xfc 0x99 0x18 LDA p1, [sp, #-4] + 6440 0x07 0x54 0x77 0x18 ST.s16 r3, [p7, #10] + 6444 0xff 0xe0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-256 + 6450 0x00 0x00 NOPX + 6452 0x00 0x00 NOPX + 6454 0x00 0x00 NOPX + 6456 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 6460 0x1f 0x62 0xc0 0xf8 MOV p7, p1 +.delay_slot +.swstall delay_slot + 6464 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6466 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6468 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6470 0x00 0x2c 0xf0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba NOPA; NOPB; NOPM +.label __ll135__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E + 6480 0x1f 0xf4 0x00 0x00 0x02 0xb8 0x00 0x00 0x20 0xba MOVA r20, #255; J #5568 +.delay_slot + 6490 0x10 0x2a 0x01 0x18 MOVX r21, #0 +.delay_slot +.swstall delay_slot + 6494 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6496 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6498 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6500 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1664 + 6512 0x10 0x0a 0x15 0x18 MOVX r5, #5 + 6516 0x11 0x4a 0x67 0x98 EQ r5, r5, r6 + 6520 0x28 0x0e 0x30 0x40 0x01 0x84 JNZ r5, #7264 +.delay_slot +.swstall delay_slot + 6526 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6528 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6530 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6532 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6534 0x00 0x00 NOPX + 6536 0x10 0x0e 0x19 0x18 MOVX r7, #6 + 6540 0x11 0xce 0x67 0x98 EQ r7, r7, r6 + 6544 0x38 0x0e 0xa8 0x40 0x01 0x84 JNZ r7, #7504 +.delay_slot + 6550 0x10 0x0a 0x41 0x18 MOVX r5, #16 +.delay_slot +.swstall delay_slot + 6554 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6556 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6558 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6560 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label __ll67__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E + 6576 0x48 0x1e 0x50 0x01 0x10 0xea 0x60 0xf0 0x78 0xba LDA.s16 r7, [p2, dj2]; MOVX r17, #7; MOV dc4, lr + 6586 0x89 0x8d 0x18 0xa4 0x05 0x64 NE r6, r17, r6; MOV r17, #257 + 6592 0x30 0x0e 0x20 0x40 0x01 0x84 JNZ r6, #7232 +.delay_slot +.swstall delay_slot + 6598 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6600 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6602 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6604 0x00 0x00 NOPX +.delay_slot + 6606 0x11 0xca 0x5e 0x98 ASHL r5, r7, r5 + 6610 0x04 0x8a 0x80 0x84 0x8b 0x00 0x00 0x04 0x7d 0x08 0x10 0x76 MOVA dj2, #36; MOVS p0, p1; MOVXM ls, #6672 + 6622 0x48 0x1e 0xd7 0xfd 0xa5 0x80 0x00 0x05 0xbd 0x38 0x10 0x76 LDA r7, [p2, dj2]; ST dn3, [sp, #-4]; MOVXM le, #6768 + 6634 0x00 0x1d 0x15 0x98 VLDA bmll2, [p0], #64 + 6638 0x00 0x00 NOPX + 6640 0x1c 0xc2 0x92 0xf8 VMOV bmhh4, x1 + 6644 0x00 0x00 NOPX + 6646 0x03 0x13 0x12 0xe6 0x11 0x68 0x3d 0x62 VMOV bmll3, bmhh4; VADD.f dm1, dm3, dm2, r2 + 6654 0x00 0x00 NOPX + 6656 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x02 0xb9 0xff 0xc8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC lc, r7, #-1; NOPV +.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1824 +.loop_nesting 1 +.begin_of_loop + 6672 0x03 0xa2 0xb0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA bmll2, [p0], #64; NOPB; NOPS; NOPX; NOPM; NOPV + 6688 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 6704 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 6720 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 22 +.aggressive_scheduled_block_id first + 6736 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x02 0x62 0x09 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VMOV bmhh4, bmll1; NOPV +.aggressive_scheduled_block_id 22 +.noswbrkpt + 6752 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x8b 0x41 0xeb NOPA; NOPB; NOPS; NOPX; NOPM; VADD.f dm1, dm3, dm2, r2 +.label ZLE_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1920 +.end_of_loop +.aggressive_scheduled_block_id 22 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6768 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x01 0x89 0x89 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VMOV bmll3, bmhh4; NOPV +.loop_nesting 0 +.aggressive_scheduled_block_id 23 +.aggressive_scheduled_block_id first + 6784 0x02 0x10 0x00 0x00 0x01 0xf3 0xb2 0x78 0x10 0xba MOVA r16, #16; MOVXM p7, #509168 +.aggressive_scheduled_block_id 23 +.noswbrkpt + 6794 0xe0 0x90 0x50 0x00 0x61 0x08 0x98 0x01 0x58 0xba LDA.s8 r4, [p7]; MOVX r6, #8; MOV vaddSign0, #1 +.aggressive_scheduled_block_id 23 +.nohwbrkpt +.noswbrkpt + 6804 0x10 0x22 0x05 0x18 MOVX r17, #1 +.aggressive_scheduled_block_id 23 +.nohwbrkpt +.noswbrkpt + 6808 0x00 0x00 NOPX +.aggressive_scheduled_block_id 23 +.nohwbrkpt +.noswbrkpt + 6810 0x1c 0xc4 0x12 0xf8 VMOV bmhh4, bmll1 +.aggressive_scheduled_block_id 23 +.nohwbrkpt +.noswbrkpt + 6814 0x00 0x00 NOPX +.aggressive_scheduled_block_id 23 +.nohwbrkpt +.noswbrkpt + 6816 0x19 0x33 0x12 0xf8 VMOV x2, bmhh4 +.aggressive_scheduled_block_id 23 +.nohwbrkpt +.noswbrkpt + 6820 0x19 0x10 0x12 0xd8 VSHIFT x2, x2, x0, r4 +.aggressive_scheduled_block_id 23 +.noswbrkpt + 6824 0x01 0x3a 0x80 0x00 0x49 0x2f 0x10 0x40 0x3d 0x5a MOVX crRnd, r4; VMOV bmll0, x2; VADD.f dm0, dm2, dm0, r2 +.aggressive_scheduled_block_id 23 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6834 0x1a 0x13 0x12 0xf8 VMOV bmll2, bmhh4 + 6838 0x19 0x16 0x72 0xf8 VBCST.32 x2, r5 + 6842 0x19 0x04 0x92 0xf8 VMOV bmll1, x2 + 6846 0x00 0x00 NOPX + 6848 0x1a 0x04 0x12 0xf8 VMOV bmll2, bmll1 +.aggressive_scheduled_block_id 24 +.aggressive_scheduled_block_id first + 6852 0x19 0x20 0x12 0xf8 VMOV x2, bmll0 +.aggressive_scheduled_block_id 24 +.noswbrkpt + 6856 0x01 0x10 0x42 0xc6 0x10 0x0c 0x3d 0x62 VSHIFT x2, x2, x0, r16; VADD.f dm0, dm0, dm3, r2 +.aggressive_scheduled_block_id 24 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6864 0x1b 0x04 0x92 0xf8 VMOV bmll3, x2 + 6868 0x19 0x20 0x92 0xf8 VMOV x2, x0 + 6872 0x00 0x00 NOPX + 6874 0x08 0x40 0x96 0x18 VCONV.bf16.fp32 wl0, bmll1 + 6878 0x00 0x00 NOPX +.aggressive_scheduled_block_id 25 +.aggressive_scheduled_block_id first + 6880 0x01 0xa0 0x12 0xe6 0x14 0x40 0x83 0x62 VMOV x3, bmll0; VMSC.f dm4, dm2, x0, x4, r2 +.aggressive_scheduled_block_id 25 +.noswbrkpt + 6888 0x01 0x98 0x1a 0xc6 0x10 0x08 0x3d 0x62 VSHIFT x3, x3, x0, r6; VADD.f dm0, dm0, dm2, r2 +.aggressive_scheduled_block_id 25 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6896 0x1a 0x06 0x92 0xf8 VMOV bmll2, x3 + 6900 0x19 0xa4 0x92 0xf8 VMOV x3, x2 + 6904 0x00 0x00 NOPX + 6906 0x00 0x00 NOPX + 6908 0x09 0x42 0x16 0x18 VCONV.bf16.fp32 wl2, bmll4 +.aggressive_scheduled_block_id 26 +.aggressive_scheduled_block_id first + 6912 0x1a 0xa0 0x12 0xf8 VMOV x5, bmll0 +.aggressive_scheduled_block_id 26 +.noswbrkpt + 6916 0x03 0x28 0x02 0xc6 0x10 0x08 0x3d 0x62 VSHIFT x6, x5, x0, r0; VADD.f dm0, dm0, dm2, r2 +.aggressive_scheduled_block_id 26 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6924 0x1a 0x0c 0x92 0xf8 VMOV bmll2, x6 + 6928 0x1a 0xa6 0x92 0xf8 VMOV x5, x3 + 6932 0x00 0x00 NOPX + 6934 0x00 0x00 NOPX + 6936 0x00 0x00 NOPX + 6938 0x1b 0x20 0x12 0xf8 VMOV x6, bmll0 + 6942 0x18 0x1a 0x01 0xb8 VEXTRACT.32 r0, x6, #0, vaddSign0 + 6946 0x00 0x00 NOPX + 6948 0x1b 0x00 0x11 0x78 VINSERT.32 x6, x0, #0, r0 + 6952 0x18 0x8b 0x08 0x38 VSEL.32 x1, x1, x6, r17 + 6956 0x1a 0x02 0x92 0xf8 VMOV bmll2, x1 + 6960 0x18 0xaa 0x92 0xf8 VMOV x1, x5 + 6964 0x58 0x22 0xc0 0x01 0x91 0x49 0x70 0x02 VCONV.bf16.fp32 wl5, bmll2; VMOV x6, x1 + 6972 0x00 0x00 NOPX + 6974 0x11 0x4a 0x83 0x48 VMSC.f dm1, dm2, x5, x4, r2 + 6978 0x10 0xea 0x41 0x48 VMUL.f dm0, x5, x2, r2 + 6982 0x00 0x00 NOPX + 6984 0x00 0x00 NOPX + 6986 0x00 0x00 NOPX + 6988 0x00 0x00 NOPX + 6990 0x08 0xc0 0x96 0x18 VCONV.bf16.fp32 wl1, bmll1 + 6994 0x14 0x84 0x83 0x48 VMSC.f dm4, dm4, x2, x4, r2 + 6998 0x13 0x22 0x83 0x48 VMSC.f dm3, dm1, x1, x4, r2 + 7002 0x00 0x00 NOPX + 7004 0x00 0x00 NOPX + 7006 0x00 0x00 NOPX + 7008 0x00 0x00 NOPX + 7010 0x09 0xc2 0x16 0x18 VCONV.bf16.fp32 wl3, bmll4 + 7014 0x0b 0x41 0x96 0x18 VCONV.bf16.fp32 wl6, bmll3 + 7018 0x00 0x00 NOPX + 7020 0x12 0xec 0x61 0x48 VMUL.f dm2, x6, x3, r2 + 7024 0x13 0xec 0x41 0x48 VMUL.f dm3, x6, x2, r2 + 7028 0x00 0x00 NOPX + 7030 0x00 0x00 NOPX +.aggressive_scheduled_block_id 27 +.aggressive_scheduled_block_id first + 7032 0x00 0x00 NOPX +.aggressive_scheduled_block_id 27 +.noswbrkpt + 7034 0x13 0xe2 0x61 0x48 VMUL.f dm3, x1, x3, r2 +.aggressive_scheduled_block_id 27 +.nohwbrkpt +.noswbrkpt + 7038 0x19 0x68 0x12 0xf8 VMOV lfl0, bmll2 +.aggressive_scheduled_block_id 27 +.nohwbrkpt +.noswbrkpt + 7042 0x12 0x4c 0x3d 0x48 VADD.f dm2, dm2, dm3, r2 +.aggressive_scheduled_block_id 27 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7046 0x1a 0x05 0x92 0xf8 VMOV bmll2, lfl0 + 7050 0x00 0x00 NOPX + 7052 0x00 0x00 NOPX +.aggressive_scheduled_block_id 28 +.aggressive_scheduled_block_id first + 7054 0x00 0x00 NOPX +.aggressive_scheduled_block_id 28 +.noswbrkpt + 7056 0x13 0xea 0x61 0x48 VMUL.f dm3, x5, x3, r2 +.aggressive_scheduled_block_id 28 +.nohwbrkpt +.noswbrkpt + 7060 0x18 0x68 0x12 0xf8 VMOV lfh0, bmll2 +.aggressive_scheduled_block_id 28 +.nohwbrkpt +.noswbrkpt + 7064 0x12 0x4c 0x3d 0x48 VADD.f dm2, dm2, dm3, r2 +.aggressive_scheduled_block_id 28 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7068 0x1a 0x01 0x92 0xf8 VMOV bmll2, lfh0 + 7072 0x00 0x00 NOPX + 7074 0x00 0x00 NOPX +.aggressive_scheduled_block_id 29 +.aggressive_scheduled_block_id first + 7076 0x00 0x00 NOPX +.aggressive_scheduled_block_id 29 +.noswbrkpt + 7078 0x13 0xe2 0x41 0x48 VMUL.f dm3, x1, x2, r2 +.aggressive_scheduled_block_id 29 +.nohwbrkpt +.noswbrkpt + 7082 0x19 0x68 0x12 0xf8 VMOV lfl0, bmll2 +.aggressive_scheduled_block_id 29 +.nohwbrkpt +.noswbrkpt + 7086 0x12 0x4c 0x3d 0x48 VADD.f dm2, dm2, dm3, r2 +.aggressive_scheduled_block_id 29 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7090 0x1a 0x05 0x92 0xf8 VMOV bmll2, lfl0 + 7094 0x00 0x00 NOPX + 7096 0x00 0x00 NOPX + 7098 0x00 0x00 NOPX + 7100 0x00 0x00 NOPX +.aggressive_scheduled_block_id 30 +.aggressive_scheduled_block_id first + 7102 0x18 0x68 0x12 0xf8 VMOV lfh0, bmll2 +.aggressive_scheduled_block_id 30 +.noswbrkpt + 7106 0x12 0x4c 0x3d 0x48 VADD.f dm2, dm2, dm3, r2 +.aggressive_scheduled_block_id 30 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7110 0x1a 0x01 0x92 0xf8 VMOV bmll2, lfh0 + 7114 0x00 0x00 NOPX + 7116 0x00 0x00 NOPX + 7118 0x13 0xe0 0xc1 0x48 VMUL.f dm3, x0, x6, r2 + 7122 0x00 0x00 NOPX +.aggressive_scheduled_block_id 31 +.aggressive_scheduled_block_id first + 7124 0x19 0x68 0x12 0xf8 VMOV lfl0, bmll2 +.aggressive_scheduled_block_id 31 +.noswbrkpt + 7128 0x13 0x4c 0x3d 0x48 VADD.f dm3, dm2, dm3, r2 +.aggressive_scheduled_block_id 31 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7132 0x1a 0x05 0x92 0xf8 VMOV bmll2, lfl0 + 7136 0x00 0x00 NOPX + 7138 0x00 0x00 NOPX + 7140 0x11 0xe2 0x01 0x48 VMUL.f dm1, x1, x0, r2 + 7144 0x00 0x00 NOPX +.aggressive_scheduled_block_id 32 +.aggressive_scheduled_block_id first + 7146 0x18 0x6c 0x12 0xf8 VMOV lfh0, bmll3 +.aggressive_scheduled_block_id 32 +.noswbrkpt + 7150 0x11 0x44 0x3d 0x48 VADD.f dm1, dm2, dm1, r2 +.aggressive_scheduled_block_id 32 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7154 0x1a 0x01 0x92 0xf8 VMOV bmll2, lfh0 + 7158 0x00 0x00 NOPX + 7160 0x00 0x00 NOPX + 7162 0x00 0x00 NOPX +.aggressive_scheduled_block_id 33 +.aggressive_scheduled_block_id first + 7164 0x00 0x00 NOPX +.aggressive_scheduled_block_id 33 +.noswbrkpt + 7166 0x00 0x24 0x12 0xe6 0x10 0x40 0x3d 0x62 VMOV x0, bmll1; VADD.f dm0, dm2, dm0, r2 +.aggressive_scheduled_block_id 33 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7174 0x02 0x00 0x92 0xe6 0x14 0xea 0x01 0x62 VMOV bmll2, x0; VMUL.f dm4, x5, x0, r2 + 7182 0x00 0x00 NOPX + 7184 0x00 0x00 NOPX +.aggressive_scheduled_block_id 34 +.aggressive_scheduled_block_id first + 7186 0x00 0x00 NOPX +.aggressive_scheduled_block_id 34 +.noswbrkpt + 7188 0x10 0x50 0x3d 0x48 VADD.f dm0, dm2, dm4, r2 +.aggressive_scheduled_block_id 34 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7192 0x1a 0x00 0x12 0xf8 VMOV bmll2, bmll0 + 7196 0x00 0x00 NOPX + 7198 0x00 0x0c 0x78 0x00 0x00 0x84 J #6384 +.delay_slot + 7204 0x0f 0xfa 0x65 0x98 ST dc4, [sp, #-8] +.delay_slot +.swstall delay_slot + 7208 0x00 0x00 NOPX +.delay_slot + 7210 0x1a 0x00 0x12 0xf8 VMOV bmll2, bmll0 +.delay_slot +.swstall delay_slot + 7214 0x00 0x00 NOPX +.delay_slot + 7216 0x00 0x2c 0xf0 0x00 0x21 0x05 0x12 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST.CONV.bf16.fp32 bmll2, [p1];NOPX; NOPM; NOPV +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2384 + 7232 0x00 0x14 0x00 0x00 0x02 0xbe 0x00 0x00 0x20 0xba MOVA r20, #0; J #5616 +.delay_slot + 7242 0x10 0x2a 0x01 0x18 MOVX r21, #0 +.delay_slot +.swstall delay_slot + 7246 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7248 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7250 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7252 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2416 + 7264 0x00 0x0e 0x90 0x00 0x00 0x84 J #7456 +.delay_slot + 7270 0xff 0x93 0xb0 0x02 0x60 0xf0 0x70 0x02 ST p1, [sp, #-4]; MOV dc4, lr +.delay_slot +.swstall delay_slot + 7278 0x00 0x00 NOPX +.delay_slot + 7280 0x0f 0xf0 0x33 0x18 VST x0, [sp, #-256] +.delay_slot + 7284 0x0f 0xf5 0x33 0x18 VST x4, [sp, #-192] +.delay_slot + 7288 0xff 0x0e 0x60 0x00 0x01 0xa5 0x70 0x02 VST x1, [sp, #-128]; NOPM +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2448 + 7296 0x10 0x22 0x05 0x18 MOVX r17, #1 + 7300 0x14 0x62 0x67 0x98 EQ r17, r17, r6 + 7304 0x88 0x0e 0x90 0x40 0x01 0x84 JNZ r17, #7456 +.delay_slot +.swstall delay_slot + 7310 0x00 0x00 NOPX +.delay_slot + 7312 0x0f 0xf0 0x33 0x18 VST x0, [sp, #-256] +.delay_slot + 7316 0x0f 0xf5 0x33 0x18 VST x4, [sp, #-192] +.delay_slot + 7320 0x0f 0xf8 0x73 0x18 VST x1, [sp, #-128] +.delay_slot + 7324 0xff 0x93 0xb0 0x00 0x70 0x4a 0x60 0xf0 0x79 0x3a ST p1, [sp, #-4]; MOVX r7, #2; MOV dc4, lr + 7334 0x11 0xce 0x67 0x98 EQ r7, r7, r6 + 7338 0x38 0x0e 0x80 0x40 0x01 0x84 JNZ r7, #7424 +.delay_slot +.swstall delay_slot + 7344 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7346 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7348 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7350 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7352 0x00 0x00 NOPX + 7354 0x11 0x4e 0x67 0x98 EQ r7, r5, r6 + 7358 0x38 0x0e 0x70 0x40 0x01 0x84 JNZ r7, #7392 +.delay_slot + 7364 0x10 0x0a 0x41 0x18 MOVX r5, #16 +.delay_slot +.swstall delay_slot + 7368 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7370 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7372 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7374 0x00 0x00 NOPX + 7376 0x00 0x0c 0xd8 0x00 0x00 0x84 J #6576 +.delay_slot +.swstall delay_slot + 7382 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7384 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7386 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7388 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7390 0x00 0x00 NOPX +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2544 + 7392 0x20 0x31 0x00 0x00 0x02 0xb8 0x00 0x00 0x20 0xba MOVA r17, #257; J #5568 +.delay_slot + 7402 0x05 0x40 0x28 0x00 0x41 0x64 MOVX r21, #0; MOV m4, #16 +.delay_slot + 7408 0x10 0x28 0x01 0x18 MOVX r20, #0 +.delay_slot +.swstall delay_slot + 7412 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7414 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7416 0x00 0x2c 0xf0 0x04 0x00 0x00 0x1c 0x22 NOPA; NOPV +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2576 + 7424 0x00 0x0c 0xa8 0x00 0x00 0x84 J #6480 +.delay_slot + 7430 0x00 0x00 0xf8 0xbf 0xfe 0x44 MOVXM r17, #65535 +.delay_slot + 7436 0x1c 0x00 0x20 0xb8 MOV m4, #16 +.delay_slot +.swstall delay_slot + 7440 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7442 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7444 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.label __ll128__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E + 7456 0xfe 0x07 0x70 0x00 0x02 0xb8 0x00 0x00 0x20 0xba VLDA x0, [sp, #-256]; J #5568 +.delay_slot + 7466 0xfe 0xa7 0x70 0x00 0x00 0x8a 0x88 0x00 0x58 0xba VLDA x4, [sp, #-192]; MOVX r0, #4; MOV r20, #0 +.delay_slot + 7476 0xff 0x93 0x20 0x00 0x00 0x3e 0x0f 0xff 0x90 0xba LDA p1, [sp, #-4]; MOVXM r16, #65535 +.delay_slot + 7486 0x05 0x40 0x28 0x00 0x81 0x64 MOVX r21, #0; MOV m4, #32 +.delay_slot + 7492 0x11 0x22 0x05 0x18 MOVX r17, #257 +.delay_slot + 7496 0xff 0x0f 0x70 0x04 0x00 0x00 0x1c 0x22 VLDA x1, [sp, #-128]; NOPV +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2656 + 7504 0x00 0x0c 0xa8 0x00 0x00 0x84 J #6480 +.delay_slot + 7510 0x1c 0xc1 0xe0 0xf8 MOV dc4, lr +.delay_slot + 7514 0x00 0x00 0xf8 0xbf 0xfe 0x44 MOVXM r17, #65535 +.delay_slot + 7520 0x1c 0x00 0x20 0xb8 MOV m4, #16 +.delay_slot +.swstall delay_slot + 7524 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7526 0x00 0x00 NOPX +.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E__end +.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E___func_end0 + +.text_segment PM 7536 +.label __Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function_start + 7536 0x00 0x10 0x00 0x00 0x01 0xc4 PADDXM [sp], #128 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7542 0xfd 0xf3 0xb0 0x00 0x01 0xf3 0xb2 0x60 0x11 0x3a ST p7, [sp, #-20]; MOVXM p7, #509120 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7552 0xe0 0xc2 0xd7 0xe7 0x1d 0x82 0x0d 0x70 0x72 0xba LDA r16, [p7]; ST p6, [sp, #-28]; MOV r16, CORE_ID +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7562 0xff 0x2e 0xb0 0x21 0x04 0x81 0x68 0xf0 0x79 0x3a ST r11, [sp, #-8]; EXTEND.u8 r16, r16; MOV r11, lr +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7572 0xfe 0x3e 0xb8 0x47 0xf6 0x5c ST r15, [sp, #-16]; ADD r17, r16, #-2 + 7578 0x0f 0xe9 0xb5 0x98 ST r13, [sp, #-24] + 7582 0x00 0x00 NOPX + 7584 0x00 0x00 NOPX + 7586 0x00 0x00 NOPX + 7588 0x80 0x0f 0xf0 0x40 0x01 0x84 JNZ r16, #8160 +.delay_slot + 7594 0x0f 0xfd 0x95 0x98 ST r12, [sp, #-4] +.delay_slot + 7598 0x0f 0xf5 0xd5 0x98 ST r14, [sp, #-12] +.delay_slot + 7602 0x0f 0xe0 0x1d 0x98 ST p0, [sp, #-32] +.delay_slot + 7606 0x00 0x07 0xcc 0xc9 0x90 0x44 MOVXM p6, #509128 +.delay_slot + 7612 0x0e 0x06 0x31 0x98 ST r17, [p6] + 7616 0x00 0x31 0x07 0x88 0x8b 0x00 0x01 0xf1 0x32 0x76 0x10 0x76 MOVA r17, #1; MOVS p7, p2; MOVXM p2, #509164 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 7628 0x40 0xc6 0x30 0x00 0x01 0xf1 0x32 0x78 0x11 0x3a ST r17, [p2]; MOVXM p2, #509168 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 7638 0x40 0xc0 0xec 0xc5 0x81 0xd4 ST.s8 r16, [p2]; MOV p6, p1 +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 7644 0x00 0x05 0x08 0x00 0x01 0x04 JL #2576 +.delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7650 0x00 0x07 0xc0 0xc8 0x80 0x44 MOVXM p0, #508992 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7656 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7658 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7660 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot +.swstall delay_slot + 7664 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.return_address + 7680 0x00 0x11 0x00 0x00 0x01 0xf1 0x32 0x20 0x10 0xba MOVA r17, #0; MOVXM p2, #508992 + 7690 0x40 0xba 0xd0 0x00 0x01 0xf1 0x32 0x64 0x10 0xba LDA r14, [p2]; MOVXM p2, #509128 + 7700 0x40 0xca 0xd0 0x00 0x01 0xf1 0x32 0x22 0x10 0xba LDA r18, [p2]; MOVXM p2, #508996 + 7710 0x43 0xb6 0xd0 0x00 0x01 0xf1 0xb2 0x68 0x10 0xba LDA r13, [p2], #4; MOVXM p3, #509136 + 7720 0x42 0x85 0xd0 0x00 0x01 0xf0 0xb2 0x66 0x10 0xba LDA el0, [p2, #4]; MOVXM p1, #509132 + 7730 0x40 0xbe 0xd8 0x39 0x81 0xd4 LDA r15, [p2]; MOV r16, p6 + 7736 0x1a 0x68 0x14 0x18 ADD.NC p2, r16, #40 + 7740 0x00 0x07 0xcc 0xca 0x00 0x44 MOVXM p6, #509184 + 7746 0x00 0x07 0xc0 0xc9 0xd0 0x44 MOVXM p0, #509160 + 7752 0x13 0xa5 0x2f 0x98 MUL r18, r14, r18 + 7756 0x80 0x00 0x08 0x20 0x00 0x44 MOVXM r16, #-2147483648 + 7762 0x60 0x85 0x36 0xca 0x5f 0x5c ST el0, [p3]; MUL r18, r13, r18 + 7768 0x00 0x00 NOPX + 7770 0x13 0xe5 0x2f 0x98 MUL r18, r15, r18 + 7774 0x00 0x00 NOPX + 7776 0x09 0x06 0x51 0x98 ST r18, [p1] + 7780 0x02 0x4c 0x2e 0x98 LDA el0, [p2], #16 + 7784 0x0e 0x1e 0x31 0x98 ST r17, [p6], #4 + 7788 0x0e 0x1e 0x31 0x98 ST r17, [p6], #4 + 7792 0x0e 0x1e 0x31 0x98 ST r17, [p6], #4 + 7796 0x0e 0x1e 0x31 0x98 ST r17, [p6], #4 + 7800 0x0e 0x1e 0x31 0x98 ST r17, [p6], #4 + 7804 0x0e 0x1e 0x31 0x98 ST r17, [p6], #4 + 7808 0x08 0x04 0x29 0x98 ST el0, [p0] + 7812 0x0e 0x1e 0x31 0x98 ST r17, [p6], #4 + 7816 0x0e 0x1e 0x31 0x98 ST r17, [p6], #4 + 7820 0x0e 0x1e 0x31 0x98 ST r17, [p6], #4 + 7824 0x02 0xdc 0x36 0x98 LDA r1, [p2], #-12 + 7828 0x00 0x00 NOPX + 7830 0x00 0x00 NOPX + 7832 0x00 0x00 NOPX + 7834 0x00 0x00 NOPX + 7836 0x00 0x00 NOPX + 7838 0x00 0x00 NOPX + 7840 0x10 0x63 0x0b 0x98 GEU r17, r1, r16 + 7844 0x88 0x0f 0x78 0x40 0x01 0x84 JNZ r17, #7920 +.delay_slot + 7850 0x1b 0x1e 0xc0 0xf8 MOV r12, p7 +.delay_slot + 7854 0x0f 0xd9 0x1d 0x98 ST p2, [sp, #-40] +.delay_slot +.swstall delay_slot + 7858 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7860 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7862 0x00 0x00 NOPX +.no_stack_arguments + 7864 0x00 0x15 0x50 0x00 0x01 0x04 JL #10912 +.delay_slot + 7870 0x0f 0xdd 0x95 0x98 ST r12, [sp, #-36] +.delay_slot +.swstall delay_slot + 7874 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7876 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7878 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7880 0x00 0x2c 0xf0 0x04 0x00 0x00 0x1c 0x22 NOPA; NOPV +.return_address + 7888 0x00 0x0f 0x98 0x00 0x00 0x84 J #7984 +.delay_slot + 7894 0x00 0x07 0xce 0xc9 0xe0 0x44 MOVXM p7, #509168 +.delay_slot +.swstall delay_slot + 7900 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7902 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7904 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7906 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_384 +.no_stack_arguments + 7920 0x00 0x15 0x50 0x00 0x01 0x04 JL #10912 +.delay_slot +.swstall delay_slot + 7926 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7928 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7930 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7932 0x00 0x01 0x67 0x98 NOPA +.delay_slot + 7936 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x02 0x18 0x0c 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SUB r1, r1, r16; NOPM; NOPV +.return_address +.no_stack_arguments + 7952 0x00 0x18 0x40 0x00 0x01 0x04 JL #12416 +.delay_slot + 7958 0x18 0x50 0x20 0xf8 MOV r1, r0 +.delay_slot + 7962 0x00 0x07 0xce 0xc9 0xe0 0x44 MOVXM p7, #509168 +.delay_slot + 7968 0x4f 0x00 0x01 0x20 0x00 0x44 MOVXM r2, #1325400064 +.delay_slot + 7974 0x0f 0xdd 0x95 0x98 ST r12, [sp, #-36] +.delay_slot +.swstall delay_slot + 7978 0x00 0x2c 0xf0 0x00 0x20 0x3c NOPA; NOPB +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.return_address + 7984 0xe0 0xc0 0x50 0x02 0xd2 0x00 0x00 0x08 0xb8 0xba LDA.s8 r16, [p7]; MOVX vaddSign0, #1; VINSERT.32 x0, x0, #0, r0 + 7994 0xfb 0x40 0x80 0x01 0x80 0x08 0x00 0x49 0x78 0xba MOVA m0, #-38; MOVX r24, #0; VMOV bmll0, x0 + 8004 0xfb 0x23 0x20 0x00 0x01 0xf1 0xb2 0x6a 0x10 0xba LDA p2, [sp, #-40]; MOVXM p3, #509140 + 8014 0x00 0x07 0xc2 0xc9 0xb0 0x44 MOVXM p1, #509144 + 8020 0x00 0x07 0xce 0xc9 0xa0 0x44 MOVXM p7, #509136 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 8026 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.noswbrkpt + 8028 0x06 0x1e 0x17 0x18 ST.s16 r16, [p6], #2 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8032 0x14 0x3a 0x80 0x18 MOVX crRnd, r16 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8036 0x08 0x40 0x16 0x18 VCONV.bf16.fp32 wl0, bmll0 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8040 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8042 0x1c 0x01 0x01 0xb8 VEXTRACT.16 r16, x0, #0, vaddSign0 + 8046 0x00 0x00 NOPX + 8048 0x00 0x00 NOPX + 8050 0x06 0x0b 0x07 0x18 ST.s8 r24, [p6], m0 + 8054 0x00 0x00 NOPX + 8056 0x00 0x00 NOPX + 8058 0x00 0x00 NOPX + 8060 0x00 0x00 NOPX + 8062 0x00 0x00 NOPX + 8064 0x00 0x00 NOPX + 8066 0x0e 0x1d 0xd1 0x98 ST r14, [p6], #4 + 8070 0x0e 0x05 0xf1 0x98 ST r15, [p6] + 8074 0x0e 0x15 0xb1 0x98 ST r13, [p6, #4] + 8078 0x02 0x1c 0x2e 0x98 LDA el0, [p2], #4 + 8082 0x00 0x00 NOPX + 8084 0x00 0x00 NOPX + 8086 0x00 0x00 NOPX + 8088 0x00 0x00 NOPX + 8090 0x00 0x00 NOPX + 8092 0x00 0x00 NOPX + 8094 0x0b 0x04 0x29 0x98 ST el0, [p3] + 8098 0x02 0x04 0x2e 0x98 LDA el0, [p2] + 8102 0x00 0x00 NOPX + 8104 0x00 0x00 NOPX + 8106 0x00 0x00 NOPX + 8108 0x00 0x00 NOPX + 8110 0x00 0x00 NOPX + 8112 0x00 0x00 NOPX + 8114 0x09 0x04 0x29 0x98 ST el0, [p1] + 8118 0x02 0x14 0x2e 0x98 LDA el0, [p2, #4] + 8122 0x00 0x00 NOPX + 8124 0x00 0x0f 0xf8 0x00 0x00 0x84 J #8176 +.delay_slot + 8130 0x00 0x07 0xc0 0xc9 0xb8 0x44 MOVXM p0, #509148 +.delay_slot +.swstall delay_slot + 8136 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8138 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8140 0x00 0x01 0x67 0x98 NOPA +.delay_slot + 8144 0x00 0x2c 0xf0 0x00 0x20 0x04 0x29 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST el0, [p0]; NOPX; NOPM; NOPV +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_624 + 8160 0xfb 0xa3 0xb0 0x00 0x01 0xf3 0xb2 0x68 0x11 0x3a ST p2, [sp, #-36]; MOVXM p7, #509136 + 8170 0x00 0x2c 0xf6 0x29 0x81 0xd4 NOPA; MOV r12, p2 +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_640 + 8176 0xe0 0xc2 0xd0 0x44 0x0a 0x2c LDA r16, [p7]; MOVX r17, #1 + 8182 0x00 0x00 NOPX + 8184 0x00 0x00 NOPX + 8186 0x00 0x00 NOPX + 8188 0x00 0x00 NOPX + 8190 0x00 0x00 NOPX + 8192 0x00 0x00 NOPX + 8194 0x14 0x63 0x08 0x98 NE r17, r17, r16 + 8198 0x88 0x10 0x58 0x40 0x01 0x84 JNZ r17, #8368 +.delay_slot + 8204 0x1e 0x66 0x06 0x18 ADD.NC p6, r12, #12 +.delay_slot +.swstall delay_slot + 8208 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8210 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8212 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8214 0x00 0x00 NOPX + 8216 0x00 0x07 0xc4 0xc9 0x88 0x44 MOVXM p2, #509124 + 8222 0x40 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x30 0x10 0xba LDA r16, [p2]; MOVXM p2, #509024 + 8232 0x02 0x04 0x3a 0x98 LDA.u16 r1, [p2] + 8236 0x00 0x00 NOPX + 8238 0x00 0x00 NOPX +.no_stack_arguments + 8240 0x00 0x13 0xf8 0x00 0x01 0x04 JL #10224 +.delay_slot + 8246 0x10 0x1a 0x01 0x18 MOVX r13, #0 +.delay_slot +.swstall delay_slot + 8250 0x00 0x00 NOPX +.delay_slot + 8252 0x14 0x36 0xda 0x98 LT r27, r16, r13 +.delay_slot + 8256 0x6c 0x60 0x37 0xbb 0x41 0xe4 SUB r17, r13, r16; MOV r15, r27 +.delay_slot + 8262 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x04 0x01 0x12 0x7a NOPA; NOPS; SEL.EQZ r0, r16, r17, r27 +.return_address + 8272 0x6c 0x06 0x3d 0xaf 0x41 0xe4 SUB r16, r13, r3; MOV r27, r15 + 8278 0x10 0xe1 0x02 0x18 SEL.EQZ r16, r3, r16, r27 + 8282 0x80 0x10 0x50 0x40 0x01 0x84 JNZ r16, #8352 +.delay_slot +.swstall delay_slot + 8288 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8290 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8292 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8294 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8296 0x00 0x00 NOPX + 8298 0xfc 0x1f 0xa4 0xd9 0x81 0xe4 MOVX r16, #-1; MOV p2, p6 + 8304 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4 + 8308 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 + 8312 0x02 0xfe 0x56 0x98 LDA r18, [p2], #-4 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 8316 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.noswbrkpt + 8318 0x02 0x46 0x36 0x98 LDA r17, [p2, #16] +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 8322 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 8324 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 8326 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 8328 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 8330 0x14 0xa3 0x12 0x18 SEL.EQZ r17, r18, r17, r27 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8334 0x0a 0x06 0x31 0x98 ST r17, [p2] + 8338 0x00 0x00 NOPX + 8340 0x00 0x00 NOPX + 8342 0x00 0x00 NOPX + 8344 0x00 0x00 NOPX + 8346 0x00 0x2c 0xf8 0xa6 0x10 0x2c NOPA; ACQ r17, r16 +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_816 + 8352 0x00 0x00 NOPX + 8354 0x00 0x00 NOPX + 8356 0x00 0x00 NOPX + 8358 0xe0 0xc2 0xd0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba LDA r16, [p7]; NOPB; NOPM +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_832 + 8368 0x10 0x1c 0x09 0x18 MOVX r14, #2 + 8372 0x00 0x00 NOPX + 8374 0x00 0x00 NOPX + 8376 0x00 0x00 NOPX + 8378 0x00 0x00 NOPX + 8380 0x00 0x00 NOPX + 8382 0x13 0xa1 0x08 0x98 NE r16, r14, r16 + 8386 0x80 0x10 0xb0 0x40 0x01 0x84 JNZ r16, #8544 +.delay_slot +.swstall delay_slot + 8392 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8394 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8396 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8398 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8400 0x00 0x00 NOPX + 8402 0x00 0x07 0xc4 0xc9 0xc0 0x44 MOVXM p2, #509152 + 8408 0x40 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x30 0x10 0xba LDA r16, [p2]; MOVXM p2, #509024 + 8418 0x02 0x04 0x3a 0x98 LDA.u16 r1, [p2] + 8422 0x00 0x00 NOPX + 8424 0x00 0x00 NOPX +.no_stack_arguments + 8426 0x00 0x13 0xf8 0x00 0x01 0x04 JL #10224 +.delay_slot + 8432 0x10 0x1a 0x01 0x18 MOVX r13, #0 +.delay_slot +.swstall delay_slot + 8436 0x00 0x00 NOPX +.delay_slot + 8438 0x14 0x36 0xda 0x98 LT r27, r16, r13 +.delay_slot + 8442 0x6c 0x60 0x37 0xbb 0x41 0xe4 SUB r17, r13, r16; MOV r15, r27 +.delay_slot + 8448 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x20 0x08 0x90 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SEL.EQZ r0, r16, r17, r27; NOPM; NOPV +.return_address + 8464 0x6c 0x06 0x3d 0xaf 0x41 0xe4 SUB r16, r13, r3; MOV r27, r15 + 8470 0x10 0xe1 0x02 0x18 SEL.EQZ r16, r3, r16, r27 + 8474 0x80 0x10 0xb0 0x40 0x01 0x84 JNZ r16, #8544 +.delay_slot +.swstall delay_slot + 8480 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8482 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8484 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8486 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8488 0x00 0x00 NOPX + 8490 0xfc 0x1f 0xa4 0xd9 0x81 0xe4 MOVX r16, #-1; MOV p2, p6 + 8496 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4 + 8500 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 + 8504 0x02 0xfe 0x56 0x98 LDA r18, [p2], #-4 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 8508 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.noswbrkpt + 8510 0x02 0x46 0x36 0x98 LDA r17, [p2, #16] +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 8514 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 8516 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 8518 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 8520 0x00 0x00 NOPX +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 8522 0x14 0xa3 0x12 0x18 SEL.EQZ r17, r18, r17, r27 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8526 0x0a 0x06 0x31 0x98 ST r17, [p2] + 8530 0x00 0x00 NOPX + 8532 0x00 0x00 NOPX + 8534 0x00 0x00 NOPX + 8536 0x00 0x00 NOPX + 8538 0x00 0x2c 0xf8 0xa6 0x10 0x2c NOPA; ACQ r17, r16 +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1008 + 8544 0x00 0x00 NOPX + 8546 0x00 0x00 NOPX + 8548 0x00 0x00 NOPX + 8550 0xe0 0xc2 0xd0 0x00 0x01 0xf3 0xb2 0x72 0x10 0xba LDA r16, [p7]; MOVXM p7, #509156 + 8560 0x00 0x00 NOPX + 8562 0x00 0x00 NOPX + 8564 0x00 0x00 NOPX + 8566 0x00 0x00 NOPX + 8568 0x00 0x00 NOPX + 8570 0x10 0x24 0x11 0x18 MOVX r18, #4 + 8574 0x14 0xa1 0x08 0x98 NE r16, r18, r16 + 8578 0x80 0x11 0x20 0x40 0x01 0x84 JNZ r16, #8768 +.delay_slot + 8584 0x00 0x07 0xc4 0xc8 0xc0 0x44 MOVXM p2, #509024 +.delay_slot +.swstall delay_slot + 8590 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8592 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8594 0x00 0x00 NOPX +.delay_slot + 8596 0x10 0x22 0x01 0x18 MOVX r17, #0 + 8600 0xe0 0xc2 0xd0 0x34 0x02 0x2c LDA r16, [p7]; MOVX r13, #0 + 8606 0x02 0x04 0x3a 0x98 LDA.u16 r1, [p2] + 8610 0x00 0x00 NOPX + 8612 0x00 0x00 NOPX +.no_stack_arguments + 8614 0x00 0x13 0xf8 0x00 0x01 0x04 JL #10224 +.delay_slot +.swstall delay_slot + 8620 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8622 0x00 0x00 NOPX +.delay_slot + 8624 0x14 0x37 0x1a 0x98 LT r27, r16, r17 +.delay_slot + 8628 0x8c 0x60 0x37 0xbb 0x41 0xe4 SUB r17, r17, r16; MOV r15, r27 +.delay_slot + 8634 0x00 0x2c 0xf8 0x02 0x24 0x2c NOPA; SEL.EQZ r0, r16, r17, r27 +.return_address + 8640 0x6c 0x06 0x3d 0xaf 0x41 0xe4 SUB r16, r13, r3; MOV r27, r15 + 8646 0x10 0xe1 0x02 0x18 SEL.EQZ r16, r3, r16, r27 + 8650 0x80 0x11 0x10 0x40 0x01 0x84 JNZ r16, #8736 +.delay_slot +.swstall delay_slot + 8656 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8658 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8660 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8662 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8664 0x00 0x00 NOPX + 8666 0xdf 0xee 0xd0 0x3f 0x17 0xea 0x08 0x01 0x58 0xba LDA r27, [p6], #-4; MOVX r17, #-1; MOV r16, #1 + 8676 0x06 0xfe 0x56 0x98 LDA r18, [p6], #-4 + 8680 0x06 0xfe 0x76 0x98 LDA r19, [p6], #-4 +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 8684 0x00 0x00 NOPX +.aggressive_scheduled_block_id 6 +.noswbrkpt + 8686 0x06 0x46 0x56 0x98 LDA r18, [p6, #16] +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 8690 0x00 0x00 NOPX +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 8692 0x00 0x00 NOPX +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 8694 0x00 0x00 NOPX +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 8696 0x00 0x00 NOPX +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 8698 0x14 0xe5 0x22 0x18 SEL.EQZ r18, r19, r18, r27 +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8702 0x0e 0x06 0x51 0x98 ST r18, [p6] + 8706 0x00 0x00 NOPX + 8708 0x00 0x00 NOPX + 8710 0x00 0x11 0x28 0x00 0x00 0x84 J #8784 +.delay_slot +.swstall delay_slot + 8716 0x00 0x00 NOPX +.delay_slot + 8718 0x14 0x93 0x18 0x18 ACQ r18, r17 +.delay_slot +.swstall delay_slot + 8722 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8724 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8726 0x00 0x2c 0xf0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba NOPA; NOPB; NOPM +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1200 + 8736 0x00 0x11 0x28 0x00 0x00 0x84 J #8784 +.delay_slot + 8742 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot +.swstall delay_slot + 8746 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8748 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8750 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8752 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1232 + 8768 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x01 0x00 0x28 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVX r16, #1; NOPM; NOPV +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1248 + 8784 0xfc 0x73 0x20 0x00 0x01 0xf3 0x32 0x66 0x10 0xba LDA p7, [sp, #-32]; MOVXM p6, #509132 + 8794 0xc0 0xd6 0xd0 0x00 0x01 0xf1 0x32 0x68 0x10 0xba LDA r21, [p6]; MOVXM p2, #509136 + 8804 0x40 0xc6 0xd0 0x00 0x01 0xf3 0x32 0x60 0x10 0xba LDA r17, [p2]; MOVXM p6, #509120 + 8814 0x06 0x06 0x96 0x98 LDA r20, [p6] + 8818 0x00 0x00 NOPX + 8820 0x00 0x00 NOPX + 8822 0x00 0x00 NOPX + 8824 0x07 0x06 0x76 0x98 LDA r19, [p7] + 8828 0x15 0x6b 0x0d 0x98 LSHL r21, r21, r16 + 8832 0x14 0x61 0x07 0x98 EQ r16, r17, r16 + 8836 0x80 0x12 0x08 0x40 0x01 0x84 JNZ r16, #9232 +.delay_slot + 8842 0x15 0x28 0x07 0x18 ADD r20, r20, #1 +.delay_slot + 8846 0x0e 0x06 0x91 0x98 ST r20, [p6] +.delay_slot +.swstall delay_slot + 8850 0x00 0x00 NOPX +.delay_slot + 8852 0x18 0x69 0xd5 0x58 ADD.NC p0, r19, r21 +.delay_slot + 8856 0xf7 0x83 0xb0 0x48 0x22 0x5c ST p0, [sp, #-68]; MOVX r18, #4 + 8862 0x14 0x61 0x27 0x98 EQ r16, r17, r18 + 8866 0x80 0x11 0xc0 0x40 0x01 0x84 JNZ r16, #9088 +.delay_slot +.swstall delay_slot + 8872 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8874 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8876 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8878 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8880 0x00 0x00 NOPX + 8882 0x14 0x60 0xe8 0x98 NE r16, r17, r14 + 8886 0x80 0x11 0xa8 0x40 0x01 0x84 JNZ r16, #9040 +.delay_slot + 8892 0x00 0x07 0xcc 0xc9 0xc0 0x44 MOVXM p6, #509152 +.delay_slot +.swstall delay_slot + 8898 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8900 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8902 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8904 0x00 0x00 NOPX + 8906 0xc0 0xca 0xd0 0x00 0x01 0xf3 0x32 0x24 0x10 0xba LDA r18, [p6]; MOVXM p6, #509000 + 8916 0xc0 0xc2 0xd0 0x00 0x01 0xf3 0x32 0x6a 0x10 0xba LDA r16, [p6]; MOVXM p6, #509140 + 8926 0xc0 0xc6 0xd0 0x60 0x02 0x2c LDA r17, [p6]; MOVX r24, #0 + 8932 0x00 0x00 NOPX + 8934 0x00 0x00 NOPX + 8936 0x00 0x00 NOPX + 8938 0x00 0x00 NOPX + 8940 0x00 0x00 NOPX + 8942 0x14 0xa5 0x0f 0x98 MUL r18, r18, r16 + 8946 0x00 0x00 NOPX + 8948 0x8c 0xe4 0x3a 0x32 0x82 0xa4 SUB r19, r17, r18; ADD.NC r20, r18, r16 + 8954 0x15 0x37 0x1c 0x98 LTU r27, r20, r17 + 8958 0x14 0xe7 0x02 0x18 SEL.EQZ r19, r19, r16, r27 + 8962 0x14 0xb7 0x1c 0x98 LTU r27, r18, r17 + 8966 0x16 0x23 0x32 0x18 SEL.EQZ r17, r24, r19, r27 + 8970 0x14 0x25 0x11 0x98 SUB r18, r16, r17 + 8974 0x14 0x61 0x08 0x98 NE r16, r17, r16 + 8978 0x80 0x12 0x40 0x40 0x01 0x84 JNZ r16, #9344 +.delay_slot + 8984 0x00 0x07 0xcc 0xca 0x20 0x44 MOVXM p6, #509200 +.delay_slot + 8990 0x0e 0x06 0x51 0x98 ST r18, [p6] +.delay_slot +.swstall delay_slot + 8994 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8996 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8998 0x00 0x00 NOPX + 9000 0x00 0x11 0xf8 0x00 0x00 0x84 J #9200 +.delay_slot + 9006 0x00 0x4e 0x00 0x00 0x01 0xf3 0xb2 0x68 0x10 0xba MOVA r14, #2; MOVXM p7, #509136 +.delay_slot + 9016 0x00 0x2f 0x00 0x00 0x01 0xf1 0x32 0x20 0x10 0xba MOVA r15, #1; MOVXM p2, #508992 +.delay_slot + 9026 0x10 0x1a 0x01 0x18 MOVX r13, #0 +.delay_slot +.swstall delay_slot + 9030 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9032 0x00 0x2c 0xf0 0x04 0x00 0x00 0x1c 0x22 NOPA; NOPV +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1504 + 9040 0x00 0x11 0xf8 0x00 0x00 0x84 J #9200 +.delay_slot + 9046 0x00 0x4e 0x00 0x00 0x01 0xf3 0xb2 0x68 0x10 0xba MOVA r14, #2; MOVXM p7, #509136 +.delay_slot + 9056 0x00 0x2f 0x00 0x00 0x01 0xf1 0x32 0x20 0x10 0xba MOVA r15, #1; MOVXM p2, #508992 +.delay_slot + 9066 0x10 0x1a 0x01 0x18 MOVX r13, #0 +.delay_slot +.swstall delay_slot + 9070 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9072 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1552 + 9088 0x00 0x0d 0x00 0x00 0x01 0xf3 0x32 0x72 0x10 0xba MOVA r13, #0; MOVXM p6, #509156 + 9098 0xc0 0xca 0xd0 0x00 0x01 0xf1 0x32 0x20 0x10 0xba LDA r18, [p6]; MOVXM p2, #508992 + 9108 0x40 0xc2 0xd0 0x00 0x01 0xf3 0x32 0x6c 0x10 0xba LDA r16, [p2]; MOVXM p6, #509144 + 9118 0xc0 0xc6 0xd0 0x3c 0x0a 0x2c LDA r17, [p6]; MOVX r15, #1 + 9124 0x00 0x00 NOPX + 9126 0x00 0x00 NOPX + 9128 0x00 0x00 NOPX + 9130 0x00 0x00 NOPX + 9132 0x00 0x00 NOPX + 9134 0x14 0xa5 0x0f 0x98 MUL r18, r18, r16 + 9138 0x00 0x00 NOPX + 9140 0x8c 0xe4 0x3a 0x32 0x82 0xa4 SUB r19, r17, r18; ADD.NC r20, r18, r16 + 9146 0x15 0x37 0x1c 0x98 LTU r27, r20, r17 + 9150 0x14 0xe7 0x02 0x18 SEL.EQZ r19, r19, r16, r27 + 9154 0x14 0xb7 0x1c 0x98 LTU r27, r18, r17 + 9158 0x13 0x63 0x32 0x18 SEL.EQZ r17, r13, r19, r27 + 9162 0x14 0x25 0x11 0x98 SUB r18, r16, r17 + 9166 0x14 0x61 0x08 0x98 NE r16, r17, r16 + 9170 0x80 0x12 0x40 0x40 0x01 0x84 JNZ r16, #9344 +.delay_slot + 9176 0x00 0x07 0xcc 0xca 0x30 0x44 MOVXM p6, #509208 +.delay_slot + 9182 0x0e 0x06 0x51 0x98 ST r18, [p6] +.delay_slot +.swstall delay_slot + 9186 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9188 0x00 0x00 NOPX +.delay_slot + 9190 0x00 0x2c 0xf0 0x00 0x01 0xf3 0xb2 0x68 0x10 0xba NOPA; MOVXM p7, #509136 +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1664 + 9200 0xd1 0x81 0x60 0x00 0x04 0x98 0x00 0x00 0x21 0x3a MOVS p6, r12; J #9408 +.delay_slot + 9210 0x07 0xe5 0x91 0x18 LDA r12, [sp, #-28] +.delay_slot +.swstall delay_slot + 9214 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9216 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9218 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9220 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1696 + 9232 0x00 0x07 0xcc 0xc9 0x88 0x44 MOVXM p6, #509124 + 9238 0xc0 0xce 0xd0 0x00 0x01 0xf3 0x32 0x22 0x10 0xba LDA r19, [p6]; MOVXM p6, #508996 + 9248 0xc0 0xc2 0xd0 0x00 0x01 0xf3 0x32 0x6e 0x10 0xba LDA r16, [p6]; MOVXM p6, #509148 + 9258 0x06 0x06 0x56 0x98 LDA r18, [p6] + 9262 0x00 0x00 NOPX + 9264 0x00 0x00 NOPX + 9266 0x00 0x00 NOPX + 9268 0x00 0x00 NOPX + 9270 0x00 0x00 NOPX + 9272 0x14 0xe7 0x0f 0x98 MUL r19, r19, r16 + 9276 0x00 0x00 NOPX + 9278 0x95 0x26 0x3a 0xb3 0x82 0xa4 SUB r20, r18, r19; ADD.NC r21, r19, r16 + 9284 0x15 0x77 0x2c 0x98 LTU r27, r21, r18 + 9288 0x15 0x29 0x02 0x18 SEL.EQZ r20, r20, r16, r27 + 9292 0x9e 0xe5 0x98 0xa0 0x01 0x64 LTU r27, r19, r18; MOV r17, #0 + 9298 0x14 0x63 0x42 0x18 SEL.EQZ r17, r17, r20, r27 + 9302 0x14 0x25 0x11 0x98 SUB r18, r16, r17 + 9306 0x14 0x61 0x07 0x98 EQ r16, r17, r16 + 9310 0x80 0x13 0xe0 0x40 0x01 0x84 JNZ r16, #10176 +.delay_slot + 9316 0x00 0x07 0xcc 0xca 0x40 0x44 MOVXM p6, #509216 +.delay_slot + 9322 0x0e 0x06 0x51 0x98 ST r18, [p6] +.delay_slot +.swstall delay_slot + 9326 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9328 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 9330 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV +.label __ll65__Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.aggressive_scheduled_block_id 7 +.noswbrkpt + 9344 0xf7 0x83 0x26 0x8c 0x0b 0x00 0xe0 0x49 0xe8 0x01 0x58 0x76 LDA p0, [sp, #-68]; MOVS p6, r12; MOVX r14, #2; MOV r15, #1 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 9356 0x07 0xbc 0x99 0x18 LDA p1, [sp, #-68] +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 9360 0x07 0xe5 0x91 0x18 LDA r12, [sp, #-28] +.aggressive_scheduled_block_id 7 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 9364 0x00 0x08 0x28 0x00 0x01 0x04 JL #4176 +.delay_slot +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9370 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.delay_slot + 9374 0x10 0x1a 0x01 0x18 MOVX r13, #0 +.delay_slot +.swstall delay_slot + 9378 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9380 0x00 0x00 NOPX +.delay_slot + 9382 0x00 0x2c 0xf0 0x00 0x01 0xf1 0x32 0x80 0x10 0xba NOPA; MOVXM p2, #509184 +.return_address + 9392 0x13 0x91 0x60 0x00 0x01 0xf1 0x32 0x20 0x11 0x3a MOVS p0, p7; MOVXM p2, #508992 + 9402 0x00 0x07 0xce 0xc9 0xa0 0x44 MOVXM p7, #509136 +.label __ll95__Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + 9408 0x06 0x5c 0x9e 0x98 LDA p1, [p6], #20 +.no_stack_arguments + 9412 0x00 0x09 0x78 0x00 0x01 0x04 JL #4848 +.delay_slot +.swstall delay_slot + 9418 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9420 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9422 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9424 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9426 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV +.return_address + 9440 0x07 0x06 0x16 0x98 LDA r16, [p7] + 9444 0x00 0x00 NOPX + 9446 0x00 0x00 NOPX + 9448 0x00 0x00 NOPX + 9450 0x00 0x00 NOPX + 9452 0x00 0x00 NOPX + 9454 0x00 0x00 NOPX + 9456 0x13 0xe3 0x08 0x98 NE r17, r15, r16 + 9460 0x88 0x12 0xe0 0x40 0x01 0x84 JNZ r17, #9664 +.delay_slot +.swstall delay_slot + 9466 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9468 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9470 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9472 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9474 0x00 0x00 NOPX + 9476 0x00 0x07 0xce 0xc9 0x88 0x44 MOVXM p7, #509124 + 9482 0xe0 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x30 0x10 0xba LDA r16, [p7]; MOVXM p2, #509024 + 9492 0x02 0x04 0x3a 0x98 LDA.u16 r1, [p2] + 9496 0x00 0x00 NOPX + 9498 0x00 0x00 NOPX + 9500 0x00 0x00 NOPX +.no_stack_arguments + 9502 0x00 0x13 0xf8 0x00 0x01 0x04 JL #10224 +.delay_slot +.swstall delay_slot + 9508 0x00 0x00 NOPX +.delay_slot + 9510 0x14 0x20 0x07 0x18 ADD r16, r16, #1 +.delay_slot + 9514 0xe0 0xc2 0x38 0x6d 0xb5 0x5c ST r16, [p7]; LT r27, r16, r13 +.delay_slot + 9520 0x6c 0x60 0x37 0xbb 0x41 0xe4 SUB r17, r13, r16; MOV r15, r27 +.delay_slot + 9526 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x04 0x01 0x12 0x7a NOPA; NOPS; SEL.EQZ r0, r16, r17, r27 +.return_address + 9536 0xfb 0xa3 0x20 0x1b 0x01 0x8f 0x6b 0xd0 0x78 0xba LDA p2, [sp, #-36]; SUB r16, r13, r3; MOV r27, r15 + 9546 0x10 0xe1 0x02 0x18 SEL.EQZ r16, r3, r16, r27 + 9550 0x80 0x12 0xd0 0x40 0x01 0x84 JNZ r16, #9632 +.delay_slot + 9556 0x10 0x1e 0x05 0x18 MOVX r15, #1 +.delay_slot +.swstall delay_slot + 9560 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9562 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9564 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9566 0x00 0x00 NOPX + 9568 0x4a 0xc2 0xde 0x0b 0x63 0x0c LDA r16, [p2, #20]; ST r13, [p7] + 9574 0x00 0x00 NOPX + 9576 0x00 0x00 NOPX + 9578 0x00 0x00 NOPX + 9580 0x00 0x00 NOPX + 9582 0x00 0x00 NOPX + 9584 0x00 0x00 NOPX + 9586 0x14 0x10 0xf8 0x18 REL r16, r15 + 9590 0xdc 0xc2 0xd0 0x00 0x01 0xf3 0xb2 0x68 0x10 0xba LDA r16, [p6, #-8]; MOVXM p7, #509136 + 9600 0x00 0x00 NOPX + 9602 0x00 0x00 NOPX + 9604 0x00 0x12 0xd8 0x00 0x00 0x84 J #9648 +.delay_slot +.swstall delay_slot + 9610 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9612 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9614 0x00 0x00 NOPX +.delay_slot + 9616 0x13 0xe1 0x01 0x98 SUB r16, r15, r16 +.delay_slot + 9620 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x03 0x73 0x08 0xc1 0x36 NOPA; NOPB; ST r16, [p6, #-8]; NOPX +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2096 + 9632 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x01 0xf3 0xb2 0x68 0x10 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVXM p7, #509136; NOPV +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2112 + 9648 0xe0 0xc2 0xd0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 LDA r16, [p7]; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2128 + 9664 0x10 0x22 0x01 0x18 MOVX r17, #0 + 9668 0x00 0x00 NOPX + 9670 0x00 0x00 NOPX + 9672 0x00 0x00 NOPX + 9674 0x00 0x00 NOPX + 9676 0x00 0x00 NOPX + 9678 0x13 0xa1 0x08 0x98 NE r16, r14, r16 + 9682 0x80 0x13 0x48 0x40 0x01 0x84 JNZ r16, #9872 +.delay_slot + 9688 0x00 0x07 0xce 0xc9 0xc0 0x44 MOVXM p7, #509152 +.delay_slot + 9694 0x00 0x07 0xc4 0xc8 0xc0 0x44 MOVXM p2, #509024 +.delay_slot +.swstall delay_slot + 9700 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9702 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9704 0x00 0x00 NOPX + 9706 0x07 0x06 0x16 0x98 LDA r16, [p7] + 9710 0x02 0x04 0x3a 0x98 LDA.u16 r1, [p2] + 9714 0x00 0x00 NOPX + 9716 0x00 0x00 NOPX + 9718 0x00 0x00 NOPX +.no_stack_arguments + 9720 0x00 0x13 0xf8 0x00 0x01 0x04 JL #10224 +.delay_slot +.swstall delay_slot + 9726 0x00 0x00 NOPX +.delay_slot + 9728 0x14 0x20 0x07 0x18 ADD r16, r16, #1 +.delay_slot + 9732 0xe0 0xc2 0x38 0x6e 0x35 0x5c ST r16, [p7]; LT r27, r16, r17 +.delay_slot + 9738 0x8c 0x60 0x37 0x3b 0x41 0xe4 SUB r17, r17, r16; MOV r14, r27 +.delay_slot + 9744 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x20 0x08 0x90 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SEL.EQZ r0, r16, r17, r27; NOPM; NOPV +.return_address + 9760 0xfb 0x93 0x20 0x1b 0x01 0x8f 0x6b 0x90 0x78 0xba LDA p1, [sp, #-36]; SUB r16, r13, r3; MOV r27, r14 + 9770 0x10 0xe1 0x02 0x18 SEL.EQZ r16, r3, r16, r27 + 9774 0x80 0x13 0x38 0x40 0x01 0x84 JNZ r16, #9840 +.delay_slot + 9780 0x00 0x07 0xc4 0xc9 0xa0 0x44 MOVXM p2, #509136 +.delay_slot +.swstall delay_slot + 9786 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9788 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9790 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9792 0x00 0x00 NOPX + 9794 0x2a 0xc2 0xde 0x0b 0x63 0x0c LDA r16, [p1, #20]; ST r13, [p7] + 9800 0x00 0x00 NOPX + 9802 0x00 0x00 NOPX + 9804 0x00 0x00 NOPX + 9806 0x00 0x00 NOPX + 9808 0x00 0x00 NOPX + 9810 0x00 0x00 NOPX + 9812 0x14 0x10 0xf8 0x18 REL r16, r15 + 9816 0x06 0xe6 0x16 0x98 LDA r16, [p6, #-8] + 9820 0x00 0x00 NOPX + 9822 0x00 0x00 NOPX + 9824 0x00 0x00 NOPX + 9826 0x00 0x00 NOPX + 9828 0x00 0x00 NOPX + 9830 0x00 0x00 NOPX + 9832 0x13 0xe1 0x01 0x98 SUB r16, r15, r16 + 9836 0x0e 0xe6 0x11 0x98 ST r16, [p6, #-8] +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2304 + 9840 0x00 0x13 0x50 0x00 0x00 0x84 J #9888 +.delay_slot + 9846 0x1f 0x62 0xc0 0xf8 MOV p7, p1 +.delay_slot +.swstall delay_slot + 9850 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9852 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9854 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9856 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2336 + 9872 0xfb 0xf3 0x20 0x00 0x20 0x01 0x5b 0x00 0x01 0xf1 0x32 0x68 0x10 0x00 0x00 0xe1 LDA p7, [sp, #-36]; NOPB; NOPS; MOVXM p2, #509136; NOPV +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2352 + 9888 0x40 0xc2 0xd0 0x44 0x22 0x2c LDA r16, [p2]; MOVX r17, #4 + 9894 0x00 0x00 NOPX + 9896 0x00 0x00 NOPX + 9898 0x00 0x00 NOPX + 9900 0x00 0x00 NOPX + 9902 0x00 0x00 NOPX + 9904 0x00 0x00 NOPX + 9906 0x14 0x61 0x08 0x98 NE r16, r17, r16 + 9910 0x80 0x13 0xa8 0x40 0x01 0x84 JNZ r16, #10064 +.delay_slot + 9916 0x00 0x07 0xc4 0xc9 0xc8 0x44 MOVXM p2, #509156 +.delay_slot +.swstall delay_slot + 9922 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9924 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9926 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9928 0x00 0x00 NOPX + 9930 0x40 0xc2 0xd0 0x00 0x01 0xf0 0xb2 0x30 0x10 0xba LDA r16, [p2]; MOVXM p1, #509024 + 9940 0x01 0x04 0x3a 0x98 LDA.u16 r1, [p1] + 9944 0x00 0x00 NOPX + 9946 0x00 0x00 NOPX + 9948 0x00 0x00 NOPX +.no_stack_arguments + 9950 0x00 0x13 0xf8 0x00 0x01 0x04 JL #10224 +.delay_slot +.swstall delay_slot + 9956 0x00 0x00 NOPX +.delay_slot + 9958 0x14 0x20 0x07 0x18 ADD r16, r16, #1 +.delay_slot + 9962 0x40 0xc2 0x38 0x6d 0xb5 0x5c ST r16, [p2]; LT r27, r16, r13 +.delay_slot + 9968 0x6c 0x60 0x37 0x3b 0x41 0xe4 SUB r17, r13, r16; MOV r14, r27 +.delay_slot + 9974 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x04 0x01 0x12 0x7a NOPA; NOPS; SEL.EQZ r0, r16, r17, r27 +.return_address + 9984 0x6c 0x06 0x3d 0xae 0x41 0xe4 SUB r16, r13, r3; MOV r27, r14 + 9990 0x10 0xe1 0x02 0x18 SEL.EQZ r16, r3, r16, r27 + 9994 0x80 0x13 0xa8 0x40 0x01 0x84 JNZ r16, #10064 +.delay_slot + 10000 0x00 0x07 0xc4 0xc9 0xc8 0x44 MOVXM p2, #509156 +.delay_slot +.swstall delay_slot + 10006 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10008 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10010 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10012 0x00 0x00 NOPX + 10014 0xea 0xc2 0xd4 0x0b 0x63 0x0c LDA r16, [p7, #20]; ST r13, [p2] + 10020 0x00 0x00 NOPX + 10022 0x00 0x00 NOPX + 10024 0x00 0x00 NOPX + 10026 0x00 0x00 NOPX + 10028 0x00 0x00 NOPX + 10030 0x00 0x00 NOPX + 10032 0x14 0x10 0xf8 0x18 REL r16, r15 + 10036 0x06 0xe6 0x16 0x98 LDA r16, [p6, #-8] + 10040 0x00 0x00 NOPX + 10042 0x00 0x00 NOPX + 10044 0x00 0x00 NOPX + 10046 0x00 0x00 NOPX + 10048 0x00 0x00 NOPX + 10050 0x00 0x00 NOPX + 10052 0x13 0xe1 0x01 0x98 SUB r16, r15, r16 + 10056 0xdc 0xc2 0x30 0x00 0x01 0xa5 0x70 0x02 ST r16, [p6, #-8]; NOPM +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2528 + 10064 0x00 0x07 0xcc 0xc9 0x80 0x44 MOVXM p6, #509120 + 10070 0xc0 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x74 0x10 0xba LDA r16, [p6]; MOVXM p2, #509160 + 10080 0x02 0x06 0x36 0x98 LDA r17, [p2] + 10084 0x00 0x00 NOPX + 10086 0x00 0x00 NOPX + 10088 0x00 0x00 NOPX + 10090 0x00 0x00 NOPX + 10092 0x00 0x00 NOPX + 10094 0x00 0x00 NOPX + 10096 0x14 0x61 0x08 0x98 NE r16, r17, r16 + 10100 0x80 0x13 0xc8 0x40 0x01 0x84 JNZ r16, #10128 +.delay_slot + 10106 0x07 0xef 0x99 0x18 LDA p7, [sp, #-20] +.delay_slot + 10110 0x07 0xf1 0xf1 0x18 LDA r15, [sp, #-16] +.delay_slot + 10114 0x07 0xf5 0xd1 0x18 LDA r14, [sp, #-12] +.delay_slot +.swstall delay_slot + 10118 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10120 0x00 0x00 NOPX + 10122 0x00 0x2c 0xfc 0x0b 0x63 0x0c NOPA; ST r13, [p6] +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2592 +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 10128 0xff 0x2e 0x2e 0xeb 0x41 0xd4 LDA r11, [sp, #-8]; MOV lr, r11 +.aggressive_scheduled_block_id 8 +.noswbrkpt + 10134 0x07 0xfd 0x91 0x18 LDA r12, [sp, #-4] +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 10138 0x07 0xe9 0xb1 0x18 LDA r13, [sp, #-24] +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 10142 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10146 0x1e 0x66 0x20 0xf8 MOV p6, r12 +.delay_slot + 10150 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128 +.delay_slot +.swstall delay_slot + 10156 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10158 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10160 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2640 + 10176 0x00 0x0d 0x06 0x8c 0x0b 0x00 0x04 0x98 0x00 0x00 0x20 0x76 MOVA r13, #0; MOVS p6, r12; J #9408 +.delay_slot + 10188 0x03 0xc0 0xa7 0x20 0x09 0x64 MOVX r15, #1; MOV r14, #2 +.delay_slot + 10194 0x00 0x07 0xc4 0xc8 0x80 0x44 MOVXM p2, #508992 +.delay_slot + 10200 0x00 0x07 0xce 0xc9 0xa0 0x44 MOVXM p7, #509136 +.delay_slot + 10206 0x07 0xe5 0x91 0x18 LDA r12, [sp, #-28] +.delay_slot +.swstall delay_slot + 10210 0x00 0x00 NOPX +.label _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + +.text_segment PM 10224 +.label _ZN12me_primitive10udiv_dstepEjjRjS0_ +.function_start + 10224 0x00 0xc0 0x2f 0xa0 0x41 0xe4 MOVX r3, #0; MOV r31, r0 + 10230 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10234 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10238 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10242 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10246 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10250 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10254 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10258 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10262 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10266 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10270 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10274 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10278 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10282 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10286 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10290 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10294 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10298 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10302 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10306 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10310 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10314 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10318 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10322 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10326 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10330 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10334 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10338 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 10342 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 10346 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 10350 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 10354 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 10358 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 10362 0x18 0x9f 0xa0 0xf8 MOV r2, r31 +.label _ZN12me_primitive10udiv_dstepEjjRjS0___end + +.text_segment PM 10368 +.label _ZL19propagateFloat32NaNjj +.function_start + 10368 0xfd 0x43 0x00 0x3f 0xc0 0x02 0x48 0x00 0x10 0xba MOVA r3, #-22; MOVXM r18, #-16777216 + 10378 0x3f 0xe7 0x00 0x00 0x10 0x00 0x08 0x00 0x10 0xba MOVA r7, #511; MOVXM r0, #4194304 + 10388 0x00 0x30 0x00 0x02 0x40 0x2c 0xa9 0xfe 0x58 0xba MOVA r16, #1; OR r4, r1, r0; MOV r5, #510 + 10398 0x10 0x80 0x05 0x98 OR r0, r2, r0 + 10402 0x10 0x4c 0x3d 0x98 LSHL r6, r1, r3 + 10406 0x10 0x86 0x3d 0x98 LSHL r3, r2, r3 + 10410 0x11 0xc6 0x34 0x98 AND r3, r7, r3 + 10414 0x11 0xcc 0x64 0x98 AND r6, r7, r6 + 10418 0x11 0x4c 0x67 0x98 EQ r6, r5, r6 + 10422 0x10 0xa3 0x0d 0x98 LSHL r17, r2, r16 + 10426 0x14 0xb7 0x1c 0x98 LTU r27, r18, r17 + 10430 0x11 0x22 0x02 0x18 SEL.EQZ r17, r4, r0, r27 + 10434 0x00 0x3f 0xf8 0x3f 0xfe 0x44 MOVXM r16, #4194303 + 10440 0x10 0x85 0x04 0x98 AND r2, r2, r16 + 10444 0x10 0x84 0xf0 0x18 NEZ r2, r2 + 10448 0x10 0x43 0x04 0x98 AND r1, r1, r16 + 10452 0x10 0x42 0xf0 0x18 NEZ r1, r1 + 10456 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 10460 0x10 0x76 0x64 0x98 AND r27, r1, r6 +.delay_slot + 10464 0x10 0xc2 0x57 0x98 EQ r1, r3, r5 +.delay_slot + 10468 0x14 0x46 0x42 0x18 SEL.EQZ r3, r17, r4, r27 +.delay_slot + 10472 0x10 0x76 0x24 0x98 AND r27, r1, r2 +.delay_slot + 10476 0x10 0xc0 0x02 0x18 SEL.EQZ r0, r3, r0, r27 +.label _ZL19propagateFloat32NaNjj__end +.label _ZL19roundAndPackFloat32iij +.function_start + 10480 0x08 0x00 0x00 0x00 0x01 0xf0 0x32 0x7a 0x10 0xba MOVA r0, #64; MOVXM p0, #509172 + 10490 0x00 0x92 0xd0 0x99 0xfa 0x2c LDA r4, [p0]; MOVX r6, #127 +.swstall __RAW__R_1948 + 10496 0x00 0x00 NOPX +.swstall __RAW__R_1948 + 10498 0x00 0x00 NOPX +.swstall __RAW__R_1948 + 10500 0x00 0x00 NOPX +.swstall __RAW__R_1948 + 10502 0x00 0x00 NOPX +.swstall __RAW__R_1948 + 10504 0x00 0x00 NOPX +.swstall __RAW__R_1948 + 10506 0x00 0x00 NOPX + 10508 0x20 0x14 0xa8 0x00 0x01 0x84 JZ r4, #10576 +.delay_slot + 10514 0x10 0x4a 0x01 0x18 MOVX r5, #64 +.delay_slot +.swstall delay_slot + 10518 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10520 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10522 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10524 0x00 0x00 NOPX + 10526 0x00 0x70 0x00 0x00 0x70 0x4b 0x08 0x00 0x58 0xba MOVA r16, #3; MOVX r7, #2; MOV r24, #0 + 10536 0x3e 0xc8 0xf2 0xa0 0x05 0x64 EQ r27, r7, r4; MOV r5, #1 + 10542 0x11 0x8f 0x82 0x18 SEL.EQZ r7, r6, r24, r27 + 10546 0x11 0x37 0x07 0x98 EQ r27, r4, r16 + 10550 0x34 0x30 0x4d 0xa1 0x41 0xe4 SEL.EQZ r16, r6, r24, r27; MOV r27, r1 + 10556 0x14 0x0e 0x72 0x18 SEL.EQZ r7, r16, r7, r27 + 10560 0x11 0x76 0x47 0x98 EQ r27, r5, r4 + 10564 0x00 0x2c 0xf0 0x00 0x20 0x0e 0x5c 0x10 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; SEL.EQZ r5, r7, r24, r27 +.label TGT_F_ZL19roundAndPackFloat32iij_96 + 10576 0x14 0x96 0x08 0x23 0xf5 0x64 EXTEND.u16 r18, r2; MOV r16, #253 + 10582 0x14 0xa5 0x0a 0x98 LT r18, r18, r16 + 10586 0x90 0x15 0x08 0x40 0x01 0x84 JNZ r18, #10768 +.delay_slot + 10592 0x10 0xe2 0x64 0x98 AND r17, r3, r6 +.delay_slot + 10596 0x10 0x0e 0x7d 0x18 MOVX r7, #31 +.delay_slot + 10600 0x10 0x42 0x7d 0x98 LSHL r1, r1, r7 +.delay_slot +.swstall delay_slot + 10604 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10606 0x00 0x00 NOPX + 10608 0x00 0x12 0x00 0x05 0x38 0x3e 0x88 0xca 0xa8 0xba MOVA r18, #0; EQ r19, r2, r16; ADD.NC r20, r3, r5 + 10618 0x15 0x29 0x2a 0x98 LT r20, r20, r18 + 10622 0x14 0x20 0x2a 0x98 LT r16, r16, r2 + 10626 0x14 0xe7 0x44 0x98 AND r19, r19, r20 + 10630 0x14 0xe7 0x05 0x98 OR r19, r19, r16 + 10634 0x98 0x15 0x30 0x40 0x01 0x84 JNZ r19, #10848 +.delay_slot + 10640 0x10 0x20 0x01 0x18 MOVX r16, #0 +.delay_slot +.swstall delay_slot + 10644 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10646 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10648 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10650 0x00 0x00 NOPX + 10652 0x10 0xa7 0x09 0x98 GE r19, r2, r16 + 10656 0x98 0x15 0x10 0x40 0x01 0x84 JNZ r19, #10784 +.delay_slot +.swstall delay_slot + 10662 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10664 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10666 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10668 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10670 0x00 0x00 NOPX + 10672 0x14 0x04 0x21 0x98 SUB r2, r16, r2 + 10676 0x10 0x14 0xf8 0x00 0x01 0x84 JZ r2, #10736 +.delay_slot +.swstall delay_slot + 10682 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10684 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10686 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10688 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10690 0x00 0x00 NOPX + 10692 0x84 0x44 0x39 0xa0 0x81 0x64 SUB r17, r16, r2; MOV r19, #32 + 10698 0x11 0xcf 0x14 0x98 AND r7, r7, r17 + 10702 0x10 0xce 0x7d 0x98 LSHL r7, r3, r7 + 10706 0x10 0xe3 0x1d 0x98 LSHL r17, r3, r17 + 10710 0x10 0xb7 0x3a 0x98 LT r27, r2, r19 + 10714 0x11 0xce 0xf0 0x18 NEZ r7, r7 + 10718 0x10 0xc6 0xf0 0x18 NEZ r3, r3 + 10722 0x11 0xc5 0x15 0x98 OR r2, r7, r17 + 10726 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x00 0xc6 0x22 0x7a NOPA; NOPS; SEL.EQZ r3, r3, r2, r27 +.label TGT_F_ZL19roundAndPackFloat32iij_256 + 10736 0x00 0x15 0x10 0x00 0x00 0x84 J #10784 +.delay_slot + 10742 0x10 0xe2 0x64 0x98 AND r17, r3, r6 +.delay_slot + 10746 0x10 0x04 0x01 0x18 MOVX r2, #0 +.delay_slot +.swstall delay_slot + 10750 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10752 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10754 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV +.label TGT_F_ZL19roundAndPackFloat32iij_288 + 10768 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x01 0x00 0x08 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVX r16, #0; NOPM; NOPV +.label TGT_F_ZL19roundAndPackFloat32iij_304 + 10784 0xff 0x20 0x00 0x22 0x30 0x34 0xa8 0xca 0xa8 0xba MOVA r0, #-7; XOR r3, r17, r0; ADD.NC r5, r3, r5 + 10794 0x02 0xe3 0x00 0x06 0x62 0x2c 0x8f 0xff 0x58 0xba MOVA r3, #23; OR r6, r3, r4; MOV r4, #-1 + 10804 0x11 0x8c 0xd0 0x18 EQZ r6, r6 + 10808 0x11 0x40 0x0d 0x98 LSHL r0, r5, r0 + 10812 0x11 0x88 0x46 0x98 XOR r4, r6, r4 + 10816 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 10820 0x11 0x36 0x04 0x98 AND r27, r4, r0 +.delay_slot + 10824 0x14 0x04 0x22 0x18 SEL.EQZ r2, r16, r2, r27 +.delay_slot + 10828 0x10 0x84 0x3d 0x98 LSHL r2, r2, r3 +.delay_slot + 10832 0x10 0x44 0x20 0x98 ADD r2, r1, r2 +.delay_slot + 10836 0x00 0x2c 0xf0 0x00 0x20 0x36 0x01 0x04 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; ADD r0, r27, r2 +.label TGT_F_ZL19roundAndPackFloat32iij_368 + 10848 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 10852 0x7f 0x80 0x01 0x20 0x00 0x44 MOVXM r2, #2139095040 +.delay_slot + 10858 0x10 0x46 0x20 0x98 ADD r3, r1, r2 +.delay_slot + 10862 0x11 0x44 0xd0 0x18 EQZ r2, r5 +.delay_slot + 10866 0x10 0xc0 0x21 0x98 SUB r0, r3, r2 +.delay_slot +.swstall delay_slot + 10870 0x00 0x00 NOPX +.label _ZL19roundAndPackFloat32iij__end + +.text_segment PM 10880 +.label _ZL28normalizeRoundAndPackFloat32iij +.tail_call +.function_start + 10880 0x00 0x14 0x78 0x00 0x00 0x84 J #10480 +.delay_slot + 10886 0x10 0xe0 0x30 0x18 CLZ r16, r3 +.delay_slot + 10890 0x14 0x21 0xff 0x18 ADD r16, r16, #-1 +.delay_slot + 10894 0x10 0x85 0x01 0x98 SUB r2, r2, r16 +.delay_slot + 10898 0x10 0xc7 0x0d 0x98 LSHL r3, r3, r16 +.delay_slot +.swstall delay_slot + 10902 0x00 0x00 NOPX +.label _ZL28normalizeRoundAndPackFloat32iij__end + +.text_segment PM 10912 +.label int32_to_float32 +.function_start + 10912 0x08 0x15 0x78 0x00 0x01 0x84 JZ r1, #10992 +.delay_slot +.swstall delay_slot + 10918 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10920 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10922 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10924 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10926 0x00 0x00 NOPX + 10928 0x80 0x00 0x08 0x20 0x00 0x44 MOVXM r16, #-2147483648 + 10934 0x10 0x61 0x07 0x98 EQ r16, r1, r16 + 10938 0x80 0x15 0x80 0x40 0x01 0x84 JNZ r16, #11008 +.delay_slot +.swstall delay_slot + 10944 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10946 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10948 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10950 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10952 0x00 0x00 NOPX +.tail_call + 10954 0x13 0x82 0x00 0x00 0x05 0x50 0x00 0x00 0x20 0xba MOVA r2, #156; J #10880 +.delay_slot + 10964 0x10 0x47 0x10 0x18 ABS r3, r1 +.delay_slot + 10968 0x10 0x20 0x01 0x18 MOVX r16, #0 +.delay_slot + 10972 0x10 0x43 0x0a 0x98 LT r1, r1, r16 +.delay_slot +.swstall delay_slot + 10976 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10978 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV +.label TGT_Fint32_to_float32_80 +.return_address + 10992 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 10996 0x10 0x00 0x01 0x18 MOVX r0, #0 +.delay_slot +.swstall delay_slot + 11000 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11002 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11004 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11006 0x00 0x00 NOPX +.label TGT_Fint32_to_float32_96 + 11008 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 11012 0xcf 0x00 0x00 0x20 0x00 0x44 MOVXM r0, #-822083584 +.delay_slot +.swstall delay_slot + 11018 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11020 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11022 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11024 0x00 0x00 NOPX +.label int32_to_float32__end + +.text_segment PM 11040 +.label _ZL14addFloat32Sigsjji +.function_start + 11040 0xfd 0x32 0x00 0x00 0x1f 0xfe 0x0f 0xff 0x90 0xba MOVA r18, #-23; MOVXM r16, #8388607 + 11050 0x10 0x63 0x2d 0x98 LSHL r17, r1, r18 + 11054 0x10 0x89 0x2d 0x98 LSHL r4, r2, r18 + 11058 0x14 0x76 0x90 0x18 EXTEND.u8 r27, r17 + 11062 0x11 0x32 0x90 0x18 EXTEND.u8 r25, r4 + 11066 0xdc 0x72 0x3c 0x20 0x01 0x64 SUB r17, r27, r25; MOV r24, #0 + 11072 0x16 0x09 0x1a 0x98 LT r4, r24, r17 + 11076 0x20 0x15 0xf8 0x40 0x01 0x84 JNZ r4, #11248 +.delay_slot + 11082 0x10 0x67 0x04 0x98 AND r19, r1, r16 +.delay_slot + 11086 0x14 0x20 0x90 0x20 0x19 0x64 AND r16, r2, r16; MOV r0, #6 +.delay_slot + 11092 0x14 0xe6 0x0d 0x98 LSHL r19, r19, r0 +.delay_slot + 11096 0x84 0x01 0xba 0x23 0xfd 0x64 LSHL r16, r16, r0; MOV r20, #255 +.delay_slot + 11102 0xd8 0x28 0xf9 0x20 0x7d 0x64 EQ r0, r27, r20; MOV r18, #31 + 11108 0x14 0x4b 0x89 0x98 GE r5, r17, r24 + 11112 0x28 0x16 0x58 0x40 0x01 0x84 JNZ r5, #11440 +.delay_slot + 11118 0x10 0xc9 0x2d 0x98 LSHL r4, r3, r18 +.delay_slot +.swstall delay_slot + 11122 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11124 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11126 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11128 0x00 0x00 NOPX + 11130 0x16 0x69 0x47 0x98 EQ r20, r25, r20 + 11134 0xa0 0x16 0x40 0x40 0x01 0x84 JNZ r20, #11392 +.delay_slot +.swstall delay_slot + 11140 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11142 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11144 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11146 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11148 0x00 0x00 NOPX + 11150 0x18 0x51 0xa0 0xf8 MOV r1, r3 + 11154 0x18 0x9c 0xa0 0xf8 MOV r2, r25 + 11158 0x14 0x40 0x07 0x18 ADD r0, r17, #1 + 11162 0x10 0x23 0x12 0x18 SEL.EQZ r17, r0, r17, r27 + 11166 0x16 0x23 0x11 0x98 SUB r17, r24, r17 + 11170 0x88 0x16 0x28 0x00 0x01 0x84 JZ r17, #11344 +.delay_slot + 11176 0x20 0x00 0x0a 0x20 0x00 0x44 MOVXM r20, #536870912 +.delay_slot + 11182 0x14 0xc7 0x45 0x98 OR r3, r19, r20 +.delay_slot + 11186 0x14 0xe6 0x32 0x18 SEL.EQZ r19, r19, r3, r27 +.delay_slot +.swstall delay_slot + 11190 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11192 0x00 0x00 NOPX + 11194 0xc0 0xe2 0x30 0x20 0x81 0x64 SUB r3, r24, r17; MOV r0, #32 + 11200 0x10 0xe5 0x24 0x98 AND r18, r3, r18 + 11204 0x14 0xe5 0x2d 0x98 LSHL r18, r19, r18 + 11208 0x14 0x76 0x0a 0x98 LT r27, r17, r0 + 11212 0x00 0x16 0x28 0x00 0x00 0x84 J #11344 +.delay_slot + 11218 0x14 0xc6 0x3d 0x98 LSHL r3, r19, r3 +.delay_slot + 11222 0x14 0xa4 0xf0 0x18 NEZ r18, r18 +.delay_slot + 11226 0x14 0xe2 0xf0 0x18 NEZ r17, r19 +.delay_slot + 11230 0x10 0xe5 0x25 0x98 OR r18, r3, r18 +.delay_slot + 11234 0x00 0x2c 0xf0 0x00 0x24 0x67 0x22 0x00 0x34 0xaf 0x00 0x2b 0x60 0x7e NOPA; NOPB; NOPS; SEL.EQZ r19, r17, r18, r27; NOPM +.label TGT_F_ZL14addFloat32Sigsjji_208 + 11248 0x00 0x16 0x78 0x40 0x01 0x84 JNZ r0, #11504 +.delay_slot + 11254 0x20 0x00 0x0a 0x20 0x00 0x44 MOVXM r20, #536870912 +.delay_slot +.swstall delay_slot + 11260 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11262 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11264 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11266 0x00 0x00 NOPX + 11268 0x18 0x51 0xa0 0xf8 MOV r1, r3 + 11272 0x88 0xff 0xe1 0x3b 0x41 0xe4 ADD r3, r17, #-1; MOV r2, r27 + 11278 0x1e 0xdc 0xa0 0xf8 MOV r27, r25 + 11282 0x10 0xe3 0x12 0x18 SEL.EQZ r17, r3, r17, r27 + 11286 0x88 0x16 0x28 0x00 0x01 0x84 JZ r17, #11344 +.delay_slot + 11292 0x15 0x01 0x05 0x98 OR r0, r20, r16 +.delay_slot + 11296 0x14 0x20 0x02 0x18 SEL.EQZ r16, r16, r0, r27 +.delay_slot +.swstall delay_slot + 11300 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11302 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11304 0x00 0x00 NOPX + 11306 0xc0 0xe2 0x30 0x20 0x81 0x64 SUB r3, r24, r17; MOV r0, #32 + 11312 0x10 0xe5 0x24 0x98 AND r18, r3, r18 + 11316 0x14 0x25 0x2d 0x98 LSHL r18, r16, r18 + 11320 0x14 0x06 0x3d 0x98 LSHL r3, r16, r3 + 11324 0x14 0x76 0x0a 0x98 LT r27, r17, r0 + 11328 0x14 0xa4 0xf0 0x18 NEZ r18, r18 + 11332 0x14 0x20 0xf0 0x18 NEZ r16, r16 + 11336 0x10 0xe3 0x25 0x98 OR r17, r3, r18 + 11340 0x14 0x21 0x12 0x18 SEL.EQZ r16, r16, r17, r27 +.label TGT_F_ZL14addFloat32Sigsjji_304 + 11344 0x00 0x32 0x00 0x27 0x3a 0x2e 0x28 0xbf 0xc8 0xba MOVA r18, #1; OR r19, r19, r20; ADD.NC r17, r2, #-1 + 11354 0x9c 0xe0 0x18 0x31 0x01 0x24 ADD r19, r19, r16; ADD.NC r16, r17, #1 + 11360 0x14 0xe5 0x2d 0x98 LSHL r18, r19, r18 + 11364 0x14 0xb7 0x8a 0x98 LT r27, r18, r24 + 11368 0x14 0x45 0x02 0x18 SEL.EQZ r2, r17, r16, r27 + 11372 0x14 0x87 0x32 0x18 SEL.EQZ r3, r18, r19, r27 +.label __ll1__ZL14addFloat32Sigsjji +.tail_call + 11376 0x00 0x14 0x78 0x00 0x00 0x84 J #10480 +.delay_slot +.swstall delay_slot + 11382 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11384 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11386 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11388 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11390 0x00 0x00 NOPX +.label TGT_F_ZL14addFloat32Sigsjji_352 +.return_address + 11392 0x80 0x16 0x88 0x40 0x01 0x84 JNZ r16, #11536 +.delay_slot +.swstall delay_slot + 11398 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11400 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11402 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11404 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11406 0x00 0x00 NOPX + 11408 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 11412 0x7f 0x80 0x08 0x20 0x00 0x44 MOVXM r16, #2139095040 +.delay_slot + 11418 0x11 0x01 0x00 0x98 ADD r0, r4, r16 +.delay_slot +.swstall delay_slot + 11422 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11424 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11426 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV +.label TGT_F_ZL14addFloat32Sigsjji_400 + 11440 0x00 0x16 0x90 0x40 0x01 0x84 JNZ r0, #11552 +.delay_slot +.swstall delay_slot + 11446 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11448 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11450 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11452 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11454 0x00 0x00 NOPX + 11456 0xd8 0x16 0xa8 0x00 0x01 0x84 JZ r27, #11600 +.delay_slot +.swstall delay_slot + 11462 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11464 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11466 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11468 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11470 0x00 0x00 NOPX + 11472 0x18 0x51 0xa0 0xf8 MOV r1, r3 + 11476 0x00 0x16 0x38 0x00 0x00 0x84 J #11376 +.delay_slot + 11482 0x40 0x00 0x08 0xa0 0x00 0x44 MOVXM r17, #1073741824 +.delay_slot + 11488 0x9c 0x62 0x11 0x3b 0x41 0xe4 ADD r17, r19, r17; MOV r2, r27 +.delay_slot + 11494 0x14 0x47 0x00 0x98 ADD r3, r17, r16 +.delay_slot +.swstall delay_slot + 11498 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11500 0x00 0x01 0x67 0x98 NOPA +.label TGT_F_ZL14addFloat32Sigsjji_464 + 11504 0x98 0x16 0xb8 0x40 0x01 0x84 JNZ r19, #11632 +.delay_slot +.swstall delay_slot + 11510 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11512 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11514 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11516 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11518 0x00 0x00 NOPX + 11520 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 11524 0x18 0x10 0xa0 0xf8 MOV r0, r1 +.delay_slot +.swstall delay_slot + 11528 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11530 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11532 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11534 0x00 0x00 NOPX +.label TGT_F_ZL14addFloat32Sigsjji_496 +.tail_call + 11536 0x00 0x14 0x40 0x00 0x00 0x84 J #10368 +.delay_slot +.swstall delay_slot + 11542 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11544 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11546 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11548 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11550 0x00 0x00 NOPX +.label TGT_F_ZL14addFloat32Sigsjji_512 +.return_address + 11552 0x14 0xe1 0x05 0x98 OR r16, r19, r16 + 11556 0x80 0x16 0xc0 0x40 0x01 0x84 JNZ r16, #11648 +.delay_slot +.swstall delay_slot + 11562 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11564 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11566 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11568 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11570 0x00 0x00 NOPX + 11572 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 11576 0x18 0x10 0xa0 0xf8 MOV r0, r1 +.delay_slot +.swstall delay_slot + 11580 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11582 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11584 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11586 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV +.label TGT_F_ZL14addFloat32Sigsjji_560 + 11600 0x05 0x00 0x08 0x33 0x82 0xa4 RET lr; ADD.NC r16, r19, r16 +.delay_slot + 11606 0x17 0xe2 0xe9 0x18 MOVX r17, #-6 +.delay_slot + 11610 0x14 0x21 0x1d 0x98 LSHL r16, r16, r17 +.delay_slot + 11614 0x11 0x01 0x00 0x98 ADD r0, r4, r16 +.delay_slot +.swstall delay_slot + 11618 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11620 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.label TGT_F_ZL14addFloat32Sigsjji_592 +.tail_call + 11632 0x00 0x14 0x40 0x00 0x00 0x84 J #10368 +.delay_slot +.swstall delay_slot + 11638 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11640 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11642 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11644 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11646 0x00 0x00 NOPX +.label TGT_F_ZL14addFloat32Sigsjji_608 +.tail_call +.return_address + 11648 0x00 0x14 0x40 0x00 0x00 0x84 J #10368 +.delay_slot +.swstall delay_slot + 11654 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11656 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11658 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11660 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11662 0x00 0x00 NOPX +.label _ZL14addFloat32Sigsjji__end +.label _ZL14subFloat32Sigsjji +.function_start + 11664 0xfd 0x31 0x00 0x00 0x1f 0xfe 0x0f 0xff 0x90 0xba MOVA r17, #-23; MOVXM r16, #8388607 + 11674 0x10 0x89 0x1d 0x98 LSHL r4, r2, r17 + 11678 0x10 0x65 0x1d 0x98 LSHL r18, r1, r17 + 11682 0x10 0x69 0x04 0x98 AND r20, r1, r16 + 11686 0x11 0x32 0x90 0x18 EXTEND.u8 r25, r4 + 11690 0x14 0xb6 0x90 0x18 EXTEND.u8 r27, r18 + 11694 0x14 0x20 0x99 0xa0 0x1d 0x64 AND r16, r2, r16; MOV r19, #7 + 11700 0x15 0x23 0x3d 0x98 LSHL r17, r20, r19 + 11704 0xdc 0xb2 0x3c 0x20 0x01 0x64 SUB r18, r27, r25; MOV r24, #0 + 11710 0x16 0x0b 0x2a 0x98 LT r5, r24, r18 + 11714 0x28 0x17 0x40 0x40 0x01 0x84 JNZ r5, #11904 +.delay_slot + 11720 0x14 0x21 0x3d 0x98 LSHL r16, r16, r19 +.delay_slot + 11724 0x1f 0xe0 0x00 0x10 0x00 0x00 0x88 0x00 0x10 0xba MOVA r0, #255; MOVXM r4, #1073741824 +.delay_slot + 11734 0x16 0xe8 0x07 0x98 EQ r20, r27, r0 +.delay_slot + 11738 0x14 0x66 0x45 0x98 OR r19, r17, r4 +.delay_slot + 11742 0x11 0x09 0x05 0x98 OR r4, r4, r16 + 11746 0x14 0x8d 0x89 0x98 GE r6, r18, r24 + 11750 0x30 0x17 0x90 0x40 0x01 0x84 JNZ r6, #12064 +.delay_slot + 11756 0x10 0x0a 0x05 0x18 MOVX r5, #1 +.delay_slot + 11760 0x10 0xce 0x56 0x98 XOR r7, r3, r5 +.delay_slot +.swstall delay_slot + 11764 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11766 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11768 0x00 0x00 NOPX + 11770 0x16 0x68 0x07 0x98 EQ r20, r25, r0 + 11774 0xa0 0x17 0xc8 0x40 0x01 0x84 JNZ r20, #12176 +.delay_slot +.swstall delay_slot + 11780 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11782 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11784 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11786 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11788 0x00 0x00 NOPX + 11790 0x18 0x53 0xa0 0xf8 MOV r1, r7 + 11794 0x14 0xa0 0x07 0x18 ADD r16, r18, #1 + 11798 0x14 0x21 0x22 0x18 SEL.EQZ r16, r16, r18, r27 + 11802 0x16 0x21 0x01 0x98 SUB r16, r24, r16 + 11806 0x80 0x17 0x30 0x00 0x01 0x84 JZ r16, #11872 +.delay_slot + 11812 0x14 0x63 0x32 0x18 SEL.EQZ r17, r17, r19, r27 +.delay_slot +.swstall delay_slot + 11816 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11818 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11820 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11822 0x00 0x00 NOPX + 11824 0x04 0x14 0x00 0x30 0x38 0x0e 0x48 0x1f 0x58 0xba MOVA r20, #32; SUB r3, r24, r16; MOV r18, #31 + 11834 0x10 0xe5 0x24 0x98 AND r18, r3, r18 + 11838 0x14 0x65 0x2d 0x98 LSHL r18, r17, r18 + 11842 0x14 0x66 0xf0 0x18 NEZ r19, r17 + 11846 0x14 0x37 0x4a 0x98 LT r27, r16, r20 + 11850 0x14 0x62 0x3d 0x98 LSHL r17, r17, r3 + 11854 0x14 0xa4 0xf0 0x18 NEZ r18, r18 + 11858 0x14 0x61 0x25 0x98 OR r16, r17, r18 + 11862 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x04 0xe3 0x02 0x7a NOPA; NOPS; SEL.EQZ r17, r19, r16, r27 +.label __ll2__ZL14subFloat32Sigsjji + 11872 0x00 0x17 0x80 0x00 0x00 0x84 J #12032 +.delay_slot + 11878 0x11 0x07 0x11 0x98 SUB r3, r4, r17 +.delay_slot +.swstall delay_slot + 11882 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11884 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11886 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11888 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_ZL14subFloat32Sigsjji_240 + 11904 0xa0 0x17 0xe0 0x40 0x01 0x84 JNZ r20, #12224 +.delay_slot +.swstall delay_slot + 11910 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11912 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11914 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11916 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11918 0x00 0x00 NOPX + 11920 0x18 0x1d 0xa0 0xf8 MOV r0, r27 + 11924 0x18 0x51 0xa0 0xf8 MOV r1, r3 + 11928 0x1e 0xdc 0xa0 0xf8 MOV r27, r25 + 11932 0x1e 0x50 0x20 0xf8 MOV r25, r0 + 11936 0x14 0xa3 0xff 0x18 ADD r17, r18, #-1 + 11940 0x14 0x63 0x22 0x18 SEL.EQZ r17, r17, r18, r27 + 11944 0x88 0x17 0x78 0x00 0x01 0x84 JZ r17, #12016 +.delay_slot + 11950 0x14 0x20 0x42 0x18 SEL.EQZ r16, r16, r4, r27 +.delay_slot +.swstall delay_slot + 11954 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11956 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11958 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11960 0x00 0x00 NOPX + 11962 0x04 0x03 0x00 0x31 0x28 0x8e 0x88 0x1f 0x58 0xba MOVA r3, #32; SUB r18, r24, r17; MOV r20, #31 + 11972 0x14 0xa9 0x44 0x98 AND r20, r18, r20 + 11976 0x14 0x29 0x4d 0x98 LSHL r20, r16, r20 + 11980 0x14 0x25 0x2d 0x98 LSHL r18, r16, r18 + 11984 0x14 0x76 0x3a 0x98 LT r27, r17, r3 + 11988 0x15 0x28 0xf0 0x18 NEZ r20, r20 + 11992 0x14 0x20 0xf0 0x18 NEZ r16, r16 + 11996 0x14 0xa3 0x45 0x98 OR r17, r18, r20 + 12000 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x21 0x08 0x90 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SEL.EQZ r16, r16, r17, r27; NOPM; NOPV +.label __ll1__ZL14subFloat32Sigsjji + 12016 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x26 0x38 0x0c 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SUB r3, r19, r16; NOPM; NOPV +.label TGT_F_ZL14subFloat32Sigsjji_368 +.tail_call + 12032 0x00 0x15 0x40 0x00 0x00 0x84 J #10880 +.delay_slot + 12038 0x16 0x45 0xff 0x18 ADD r2, r25, #-1 +.delay_slot +.swstall delay_slot + 12042 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12044 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12046 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12048 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_ZL14subFloat32Sigsjji_400 +.return_address + 12064 0xa0 0x17 0xf0 0x40 0x01 0x84 JNZ r20, #12256 +.delay_slot +.swstall delay_slot + 12070 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12072 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12074 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12076 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12078 0x00 0x00 NOPX + 12080 0x14 0x27 0x1c 0x98 LTU r19, r16, r17 + 12084 0x98 0x18 0x08 0x40 0x01 0x84 JNZ r19, #12304 +.delay_slot + 12090 0x11 0x71 0x92 0x18 SEL.EQZ r24, r5, r25, r27 +.delay_slot + 12094 0x1c 0x9d 0xa0 0xf8 MOV r18, r27 +.delay_slot + 12098 0x11 0x73 0x22 0x18 SEL.EQZ r25, r5, r18, r27 +.delay_slot +.swstall delay_slot + 12102 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12104 0x00 0x00 NOPX + 12106 0x14 0x65 0x0c 0x98 LTU r18, r17, r16 + 12110 0x90 0x18 0x18 0x40 0x01 0x84 JNZ r18, #12336 +.delay_slot +.swstall delay_slot + 12116 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12118 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12120 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12122 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12124 0x00 0x00 NOPX + 12126 0x10 0x20 0x7d 0x18 MOVX r16, #31 + 12130 0x00 0x07 0xc0 0xc9 0xe8 0x44 MOVXM p0, #509172 + 12136 0x00 0x06 0x56 0x98 LDA r18, [p0] +.swstall __RAW__R_1948 + 12140 0x00 0x00 NOPX +.swstall __RAW__R_1948 + 12142 0x00 0x00 NOPX + 12144 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 12148 0x10 0x22 0x0d 0x18 MOVX r17, #3 +.delay_slot +.swstall delay_slot + 12152 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12154 0x00 0x00 NOPX +.delay_slot + 12156 0x14 0x63 0x27 0x98 EQ r17, r17, r18 +.delay_slot + 12160 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x22 0x08 0x6c 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; LSHL r0, r17, r16; NOPM; NOPV +.label TGT_F_ZL14subFloat32Sigsjji_512 + 12176 0x80 0x18 0x28 0x40 0x01 0x84 JNZ r16, #12368 +.delay_slot +.swstall delay_slot + 12182 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12184 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12186 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12188 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12190 0x00 0x00 NOPX + 12192 0x10 0x20 0x7d 0x18 MOVX r16, #31 + 12196 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 12200 0x11 0xe1 0x0d 0x98 LSHL r16, r7, r16 +.delay_slot + 12204 0x7f 0x80 0x08 0xa0 0x00 0x44 MOVXM r17, #2139095040 +.delay_slot + 12210 0x14 0x41 0x00 0x98 ADD r0, r17, r16 +.delay_slot +.swstall delay_slot + 12214 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12216 0x00 0x2c 0xf0 0x04 0x00 0x00 0x1c 0x22 NOPA; NOPV +.label TGT_F_ZL14subFloat32Sigsjji_560 + 12224 0x88 0x18 0x30 0x40 0x01 0x84 JNZ r17, #12384 +.delay_slot +.swstall delay_slot + 12230 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12232 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12234 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12236 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12238 0x00 0x00 NOPX + 12240 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 12244 0x18 0x10 0xa0 0xf8 MOV r0, r1 +.delay_slot +.swstall delay_slot + 12248 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12250 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12252 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12254 0x00 0x00 NOPX +.label TGT_F_ZL14subFloat32Sigsjji_592 + 12256 0x14 0x61 0x05 0x98 OR r16, r17, r16 + 12260 0x80 0x18 0x38 0x40 0x01 0x84 JNZ r16, #12400 +.delay_slot +.swstall delay_slot + 12266 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12268 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12270 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12272 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12274 0x00 0x00 NOPX + 12276 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 12280 0x7f 0xff 0xf0 0x3f 0xfe 0x44 MOVXM r0, #2147483647 +.delay_slot +.swstall delay_slot + 12286 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12288 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12290 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12292 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.label TGT_F_ZL14subFloat32Sigsjji_640 + 12304 0x00 0x17 0x78 0x00 0x00 0x84 J #12016 +.delay_slot + 12310 0x18 0x51 0xa0 0xf8 MOV r1, r3 +.delay_slot + 12314 0x1c 0xd8 0xa0 0xf8 MOV r19, r17 +.delay_slot +.swstall delay_slot + 12318 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12320 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12322 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; NOPS; NOPM; NOPV +.label TGT_F_ZL14subFloat32Sigsjji_672 + 12336 0x00 0x17 0x30 0x00 0x00 0x84 J #11872 +.delay_slot + 12342 0x19 0x18 0x20 0xf8 MOV r4, r16 +.delay_slot + 12346 0x1e 0x5c 0x20 0xf8 MOV r25, r24 +.delay_slot + 12350 0x18 0x53 0xa0 0xf8 MOV r1, r7 +.delay_slot +.swstall delay_slot + 12354 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12356 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.label TGT_F_ZL14subFloat32Sigsjji_704 +.tail_call + 12368 0x00 0x14 0x40 0x00 0x00 0x84 J #10368 +.delay_slot +.swstall delay_slot + 12374 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12376 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12378 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12380 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12382 0x00 0x00 NOPX +.label TGT_F_ZL14subFloat32Sigsjji_720 +.tail_call +.return_address + 12384 0x00 0x14 0x40 0x00 0x00 0x84 J #10368 +.delay_slot +.swstall delay_slot + 12390 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12392 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12394 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12396 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12398 0x00 0x00 NOPX +.label TGT_F_ZL14subFloat32Sigsjji_736 +.tail_call +.return_address + 12400 0x00 0x14 0x40 0x00 0x00 0x84 J #10368 +.delay_slot +.swstall delay_slot + 12406 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12408 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12410 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12412 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12414 0x00 0x00 NOPX +.label _ZL14subFloat32Sigsjji__end +.label float32_add +.function_start + 12416 0x17 0xe0 0x85 0x18 MOVX r16, #-31 + 12420 0x10 0x47 0x0d 0x98 LSHL r3, r1, r16 + 12424 0x10 0xa1 0x0d 0x98 LSHL r16, r2, r16 + 12428 0x10 0xe1 0x07 0x98 EQ r16, r3, r16 + 12432 0x80 0x18 0x58 0x40 0x01 0x84 JNZ r16, #12464 +.delay_slot +.swstall delay_slot + 12438 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12440 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12442 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12444 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12446 0x00 0x00 NOPX +.tail_call + 12448 0x00 0x16 0xc8 0x00 0x00 0x84 J #11664 +.delay_slot +.swstall delay_slot + 12454 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12456 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12458 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12460 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12462 0x00 0x00 NOPX +.label TGT_Ffloat32_add_48 +.tail_call +.return_address + 12464 0x00 0x15 0x90 0x00 0x00 0x84 J #11040 +.delay_slot +.swstall delay_slot + 12470 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12472 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12474 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12476 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12478 0x00 0x00 NOPX +.label float32_add__end + +.data_segment DMb 508992 +.label reduce_mean_c8_params + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x7 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + 0x0 + +.bss_segment DMb 509120 40 + +.data_segment DMb 509160 +.label _ZL8num_iter + 0x1 + 0x0 + 0x0 + 0x0 + +.bss_segment DMb 509164 4 + +.bss_segment DMb 509168 1 + +.bss_segment DMb 509172 4 + +.bss_segment DMb 509184 64 + +.stack DM_stack 506560 508928 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/Release/0_2_reloadable8.map b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/Release/0_2_reloadable8.map new file mode 100644 index 0000000000000000000000000000000000000000..a0123fcd2abb0ee7d6fe767c4cfeb9204c35f584 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/Release/0_2_reloadable8.map @@ -0,0 +1,177 @@ + +// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:46:40 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// bridge -o../Release/0_0_reloadable2 ../Release/0_0_reloadable2.o -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/isg -g -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable2.bcf -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork1731 -pme + +// Release: ipp V-2024.06-TGT-241219 + +Memory map for memory 'DM_stack': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 2368 + + 0x0007bac0..0x0007c3ff ( 2368 items) : Stack + +Memory map for memory 'DMb': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 2613 + + 0x00000000..0x0007babf ( 506560 items) : Reserved + 0x0007bac0..0x0007c3ff ( 2368 items) : Stack + 0x0007c400..0x0007c43f ( 64 items) : Reserved + 0x0007c440..0x0007c4bf ( 128 items) : ../Release/0_0_reloadable2.o::reduce_mean_c8_params (Data, Global, .data.DMb.64) + 0x0007c4c0..0x0007c4c3 ( 4 items) : ../Release/0_0_reloadable2.o::_ZL9curr_iter (Data, Local, .bss.DMb.4) + 0x0007c4c4..0x0007c4c7 ( 4 items) : ../Release/0_0_reloadable2.o::_ZL10depth_iter (Data, Local, .bss.DMb.4) + 0x0007c4c8..0x0007c4cb ( 4 items) : ../Release/0_0_reloadable2.o::_ZL8core_row (Data, Local, .bss.DMb.4) + 0x0007c4cc..0x0007c4cf ( 4 items) : ../Release/0_0_reloadable2.o::_ZL11ifm1_offset (Data, Local, .bss.DMb.4) + 0x0007c4d0..0x0007c4d3 ( 4 items) : ../Release/0_0_reloadable2.o::_ZL11reduce_axis (Data, Local, .bss.DMb.4) + 0x0007c4d4..0x0007c4d7 ( 4 items) : ../Release/0_0_reloadable2.o::_ZL8l3_width (Data, Local, .bss.DMb.4) + 0x0007c4d8..0x0007c4db ( 4 items) : ../Release/0_0_reloadable2.o::_ZL9l3_height (Data, Local, .bss.DMb.4) + 0x0007c4dc..0x0007c4df ( 4 items) : ../Release/0_0_reloadable2.o::_ZL8l3_depth (Data, Local, .bss.DMb.4) + 0x0007c4e0..0x0007c4e3 ( 4 items) : ../Release/0_0_reloadable2.o::_ZL10width_iter (Data, Local, .bss.DMb.4) + 0x0007c4e4..0x0007c4e7 ( 4 items) : ../Release/0_0_reloadable2.o::_ZL11height_iter (Data, Local, .bss.DMb.4) + 0x0007c4e8..0x0007c4eb ( 4 items) : ../Release/0_0_reloadable2.o::_ZL8num_iter (Data, Local, .data.DMb.4) + 0x0007c4ec..0x0007c4ef ( 4 items) : me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive11control_satE (Data, Global, .bss.DMb.4) + 0x0007c4f0..0x0007c4f0 ( 1 items) : me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive11control_rndE (Data, Global, .bss.DMb.1) + 0x0007c4f4..0x0007c4f7 ( 4 items) : softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a)::float_rounding_mode (Data, Global, .bss.DMb.4) + 0x0007c500..0x0007c53f ( 64 items) : ../Release/0_0_reloadable2.o::pad_3d_params (Data, Global, .bss.DMb.64) + 0x0007ccc0..0x000fffff ( 537408 items) : Reserved + +Memory map for memory 'PM': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 10058 + + 0x00000000..0x0000092f ( 2352 items) : Reserved + 0x00000930..0x00000a0b ( 220 items) : ../Release/0_0_reloadable2.o::_Z13kernelWrapperPPvjjjj (Function, Global, .text) (stack frame size = 64) + + Called functions : _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + + 0x00000a10..0x00001043 ( 1588 items) : ../Release/0_0_reloadable2.o::_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 64) + 0x00001050..0x000012ed ( 670 items) : ../Release/0_0_reloadable2.o::_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t (Function, Weak, .text) (stack frame size = 0) + 0x000012f0..0x00001d67 ( 2680 items) : ../Release/0_0_reloadable2.o::_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E (Function, Weak, .text) (stack frame size = 256) + + Called functions : _ZN12me_primitive10udiv_dstepEjjRjS0_ + + Referenced symbols: _ZN12me_primitive11control_rndE + + 0x00001d70..0x000027e3 ( 2676 items) : ../Release/0_0_reloadable2.o::_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 128) + + Called functions : _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv + int32_to_float32 + float32_add + _ZN12me_primitive10udiv_dstepEjjRjS0_ + _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t + _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + _ZN12me_primitive11control_satE + _ZN12me_primitive11control_rndE + reduce_mean_c8_params + _ZL11reduce_axis + _ZL11ifm1_offset + pad_3d_params + _ZL8num_iter + _ZL8l3_width + _ZL9l3_height + _ZL8l3_depth + _ZL10depth_iter + _ZL10width_iter + _ZL11height_iter + + 0x000027f0..0x0000287d ( 142 items) : me_div.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive10udiv_dstepEjjRjS0_ (Function, Global, .text) (stack frame size = 0) + 0x00002880..0x000028ef ( 112 items) : softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a)::_ZL19propagateFloat32NaNjj (Function, Local, .text) (stack frame size = 0) + 0x000028f0..0x00002a77 ( 392 items) : softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a)::_ZL19roundAndPackFloat32iij (Function, Local, .text) (stack frame size = 0) + + Referenced symbols: float_rounding_mode + + 0x00002a80..0x00002a97 ( 24 items) : softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a)::_ZL28normalizeRoundAndPackFloat32iij (Function, Local, .text) (stack frame size = 0) + + Called functions : _ZL19roundAndPackFloat32iij + + 0x00002aa0..0x00002b11 ( 114 items) : softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a)::int32_to_float32 (Function, Global, .text) (stack frame size = 0) + + Called functions : _ZL28normalizeRoundAndPackFloat32iij + + 0x00002b20..0x00002d8f ( 624 items) : softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a)::_ZL14addFloat32Sigsjji (Function, Local, .text) (stack frame size = 0) + + Called functions : _ZL19roundAndPackFloat32iij + _ZL19propagateFloat32NaNjj + + 0x00002d90..0x0000307f ( 752 items) : softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a)::_ZL14subFloat32Sigsjji (Function, Local, .text) (stack frame size = 0) + + Called functions : _ZL28normalizeRoundAndPackFloat32iij + _ZL19propagateFloat32NaNjj + + Referenced symbols: float_rounding_mode + + 0x00003080..0x000030bf ( 64 items) : softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a)::float32_add (Function, Global, .text) (stack frame size = 0) + + Called functions : _ZL14subFloat32Sigsjji + _ZL14addFloat32Sigsjji + + +External symbols: + + __dso_handle = 0x0 + _ctors_end = 0x0 + _ctors_start = 0x0 + _dtors_end = 0x0 + _dtors_start = 0x0 + _pc_end = 0x30c0 + _pc_start = 0x930 + _sp_end_DM_stack = 0x7c400 + _sp_start_DM_stack = 0x7bac0 + +Section summary for memory 'DM_stack': + + .stack File + ---------- ---------- + 2368 + ---------- ---------- + 2368 Total + +Section summary for memory 'DMb': + + .bss .data File + ---------- ---------- ---------- + 104 132 ../Release/0_0_reloadable2.o + 4 0 softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a) + 5 0 me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a) + ---------- ---------- ---------- + 113 132 Total + +Section summary for memory 'PM': + + .text File + ---------- ---------- + 7834 ../Release/0_0_reloadable2.o + 2082 softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a) + 142 me_div.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a) + ---------- ---------- + 10058 Total + +File summary: + +../Release/0_0_reloadable2.o + DMb 236 + PM 7834 + +me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a) + DMb 5 + +softfloat.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release/libsoftfloat.a) + DMb 4 + PM 2082 + +me_div.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a) + PM 142 + diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/Release/0_2_reloadable8.sdr b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/Release/0_2_reloadable8.sdr new file mode 100644 index 0000000000000000000000000000000000000000..efa1bd1f1f0feebb4e1aac96628ff9f168810f9e --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/Release/0_2_reloadable8.sdr @@ -0,0 +1,96 @@ + +// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:46:40 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// bridge -o../Release/0_0_reloadable2 ../Release/0_0_reloadable2.o -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/isg -g -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable2.bcf -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork1731 -pme + +// Release: ipp V-2024.06-TGT-241219 + +// Symbols in memory 'DM_bankA': +// Symbols in memory 'DM_bankAB': +// Symbols in memory 'DM_bankAC': +// Symbols in memory 'DM_bankAD': +// Symbols in memory 'DM_bankB': +// Symbols in memory 'DM_bankBC': +// Symbols in memory 'DM_bankBD': +// Symbols in memory 'DM_bankC': +// Symbols in memory 'DM_bankCD': +// Symbols in memory 'DM_bankD': +// Symbols in memory 'DM_stack': +// Symbols in memory 'DM_test': +// Symbols in memory 'DMb': +_symbol reduce_mean_c8_params 0x0007c440 +_symbol _ZN12me_primitive11control_satE 0x0007c4ec +_symbol _ZN12me_primitive11control_rndE 0x0007c4f0 +_symbol float_rounding_mode 0x0007c4f4 +_symbol pad_3d_params 0x0007c500 +// Symbols in memory 'DMh': +// Symbols in memory 'DMh_bankA': +// Symbols in memory 'DMh_bankAB': +// Symbols in memory 'DMh_bankAC': +// Symbols in memory 'DMh_bankAD': +// Symbols in memory 'DMh_bankB': +// Symbols in memory 'DMh_bankBC': +// Symbols in memory 'DMh_bankBD': +// Symbols in memory 'DMh_bankC': +// Symbols in memory 'DMh_bankCD': +// Symbols in memory 'DMh_bankD': +// Symbols in memory 'DMh_stack': +// Symbols in memory 'DMs': +// Symbols in memory 'DMs_bankA': +// Symbols in memory 'DMs_bankAB': +// Symbols in memory 'DMs_bankAC': +// Symbols in memory 'DMs_bankAD': +// Symbols in memory 'DMs_bankB': +// Symbols in memory 'DMs_bankBC': +// Symbols in memory 'DMs_bankBD': +// Symbols in memory 'DMs_bankC': +// Symbols in memory 'DMs_bankCD': +// Symbols in memory 'DMs_bankD': +// Symbols in memory 'DMs_stack': +// Symbols in memory 'DMv': +// Symbols in memory 'DMv_bankA': +// Symbols in memory 'DMv_bankAB': +// Symbols in memory 'DMv_bankAC': +// Symbols in memory 'DMv_bankAD': +// Symbols in memory 'DMv_bankB': +// Symbols in memory 'DMv_bankBC': +// Symbols in memory 'DMv_bankBD': +// Symbols in memory 'DMv_bankC': +// Symbols in memory 'DMv_bankCD': +// Symbols in memory 'DMv_bankD': +// Symbols in memory 'DMv_stack': +// Symbols in memory 'DMw': +// Symbols in memory 'DMw_bankA': +// Symbols in memory 'DMw_bankAB': +// Symbols in memory 'DMw_bankAC': +// Symbols in memory 'DMw_bankAD': +// Symbols in memory 'DMw_bankB': +// Symbols in memory 'DMw_bankBC': +// Symbols in memory 'DMw_bankBD': +// Symbols in memory 'DMw_bankC': +// Symbols in memory 'DMw_bankCD': +// Symbols in memory 'DMw_bankD': +// Symbols in memory 'DMw_stack': +// Symbols in memory 'DMx': +// Symbols in memory 'DMx_bankA': +// Symbols in memory 'DMx_bankAB': +// Symbols in memory 'DMx_bankAC': +// Symbols in memory 'DMx_bankAD': +// Symbols in memory 'DMx_bankB': +// Symbols in memory 'DMx_bankBC': +// Symbols in memory 'DMx_bankBD': +// Symbols in memory 'DMx_bankC': +// Symbols in memory 'DMx_bankCD': +// Symbols in memory 'DMx_bankD': +// Symbols in memory 'DMx_stack': +// Symbols in memory 'PM': +_symbol _Z13kernelWrapperPPvjjjj 0x00000930 +_symbol _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv 0x00000a10 +_symbol _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t 0x00001050 +_symbol _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E 0x000012f0 +_symbol _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00001d70 +_symbol _ZN12me_primitive10udiv_dstepEjjRjS0_ 0x000027f0 +_symbol int32_to_float32 0x00002aa0 +_symbol float32_add 0x00003080 +// Symbols in memory 'PMw': +// Symbols in memory 'TM4': diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/Release/0_2_reloadable8.srv b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/Release/0_2_reloadable8.srv new file mode 100644 index 0000000000000000000000000000000000000000..d037f49ea23915d17f1d140dbcf225735acc1af1 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/Release/0_2_reloadable8.srv @@ -0,0 +1,14427 @@ + +// File generated by darts version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:46:41 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// darts -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -d -h -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable2 me + +// Release: ipp V-2024.06-TGT-241219 +.label __Z13kernelWrapperPPvjjjj___func_begin0 +.label _Z13kernelWrapperPPvjjjj +.function kernelWrapper _Z13kernelWrapperPPvjjjj +.src_ref 0 "0_0_reloadable2.cc" 29 first +.src_ref 0 "0_0_reloadable2.cc" 31 60 first +.function_start + 2352 "11010100" // LDA r16, [p0]; MOV r0, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2353 "01000001" // /* MW 5 */ + 2354 "00101111" // /* MW 4 */ + 2355 "11010000" // /* MW 3 */ + 2356 "11000010" // /* MW 2 */ + 2357 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 29 + 2358 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2359 "00000001" // /* MW 5 */ + 2360 "00000000" // /* MW 4 */ + 2361 "00000000" // /* MW 3 */ + 2362 "00001000" // /* MW 2 */ + 2363 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 31 110 +.src_ref 1 "io_buffer_compiler.h" 606 24 + 2364 "00000010" // ST p7, [sp, #-8]; MOV r15, r1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2365 "01110000" // /* MW 7 */ + 2366 "01010000" // /* MW 6 */ + 2367 "11101000" // /* MW 5 */ + 2368 "00000001" // /* MW 4 */ + 2369 "10110000" // /* MW 3 */ + 2370 "01110011" // /* MW 2 */ + 2371 "11111111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 17 79 +.src_ref 0 "0_0_reloadable2.cc" 31 110 first + 2372 "00111010" // ST r0, [sp, #-4]; NEZ r26, r15; MOV p7, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2373 "01111001" // /* MW 9 */ + 2374 "01100000" // /* MW 8 */ + 2375 "10110000" // /* MW 7 */ + 2376 "10000011" // /* MW 6 */ + 2377 "10100111" // /* MW 5 */ + 2378 "00011111" // /* MW 4 */ + 2379 "10110000" // /* MW 3 */ + 2380 "10000010" // /* MW 2 */ + 2381 "11111111" // /* MW 1 */ + 2382 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2383 "00111101" // /* MW 3 */ + 2384 "11110100" // /* MW 2 */ + 2385 "00001111" // /* MW 1 */ + 2386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2387 "00000000" // /* MW 1 */ + 2388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2389 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 first + 2390 "00011000" // ADD.NC p0, r16, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2391 "00000010" // /* MW 3 */ + 2392 "01101000" // /* MW 2 */ + 2393 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 + 2394 "10011000" // LDA r16, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2395 "00010110" // /* MW 3 */ + 2396 "00011110" // /* MW 2 */ + 2397 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 36 + 2398 "10011000" // LDA r18, [p0], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2399 "01010110" // /* MW 3 */ + 2400 "00111110" // /* MW 2 */ + 2401 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 46 + 2402 "10011000" // LDA r17, [p0], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2403 "00110110" // /* MW 3 */ + 2404 "11101110" // /* MW 2 */ + 2405 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 60 + 2406 "10011000" // LDA r27, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2407 "01110110" // /* MW 3 */ + 2408 "00000111" // /* MW 2 */ + 2409 "00000000" // /* MW 1 */ + 2410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2411 "00000000" // /* MW 1 */ + 2412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2413 "00000000" // /* MW 1 */ + 2414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2415 "00000000" // /* MW 1 */ + 2416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2417 "00000000" // /* MW 1 */ + 2418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2419 "00000000" // /* MW 1 */ + 2420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2421 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 195 30 first +.src_ref 1 "io_buffer_compiler.h" 195 37 first + 2422 "00011000" // SEL.EQZ r16, r16, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2423 "00100010" // /* MW 3 */ + 2424 "00100001" // /* MW 2 */ + 2425 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 194 23 first + 2426 "10011000" // ST r16, [p0, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2427 "00010001" // /* MW 3 */ + 2428 "11010110" // /* MW 2 */ + 2429 "00001000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 + 2430 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2431 "11111101" // /* MW 3 */ + 2432 "11100000" // /* MW 2 */ + 2433 "00010111" // /* MW 1 */ + 2434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2435 "00000000" // /* MW 1 */ + 2436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2437 "00000000" // /* MW 1 */ + 2438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2439 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 first + 2440 "00011000" // ACQ.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2441 "00001000" // /* MW 3 */ + 2442 "01010111" // /* MW 2 */ + 2443 "00010100" // /* MW 1 */ + 2444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2445 "00000000" // /* MW 1 */ + 2446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2447 "00000000" // /* MW 1 */ + 2448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2449 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 17 79 first + 2450 "10011000" // LDA p0, [p7], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2451 "00011110" // /* MW 3 */ + 2452 "00101100" // /* MW 2 */ + 2453 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 18 47 first + 2454 "10011000" // LDA p1, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2455 "10011110" // /* MW 3 */ + 2456 "11111100" // /* MW 2 */ + 2457 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 19 81 first + 2458 "10011000" // LDA p2, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2459 "00011110" // /* MW 3 */ + 2460 "00000101" // /* MW 2 */ + 2461 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 16 4 first +.no_stack_arguments + 2462 "00000100" // JL #7536 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7536 delay_slots=5 */ + 2463 "00000001" // /* MW 5 */ + 2464 "00000000" // /* MW 4 */ + 2465 "10111000" // /* MW 3 */ + 2466 "00001110" // /* MW 2 */ + 2467 "00000000" // /* MW 1 */ +.delay_slot + 2468 "10011000" // ST r26, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2469 "01010101" // /* MW 3 */ + 2470 "11110011" // /* MW 2 */ + 2471 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2473 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2475 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2477 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2479 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 38 60 first +.return_address + 2480 "10011000" // LDA r16, [p7, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2481 "00010110" // /* MW 3 */ + 2482 "11110110" // /* MW 2 */ + 2483 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 + 2484 "00011000" // LDA r26, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2485 "01010001" // /* MW 3 */ + 2486 "11110011" // /* MW 2 */ + 2487 "00000111" // /* MW 1 */ + 2488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2489 "00000000" // /* MW 1 */ + 2490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2491 "00000000" // /* MW 1 */ + 2492 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2493 "00000000" // /* MW 1 */ + 2494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2495 "00000000" // /* MW 1 */ + 2496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2497 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 first + 2498 "00011000" // ADD.NC p0, r16, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2499 "00001000" // /* MW 3 */ + 2500 "01101000" // /* MW 2 */ + 2501 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 + 2502 "10011000" // LDA r16, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2503 "00010110" // /* MW 3 */ + 2504 "00000110" // /* MW 2 */ + 2505 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_main.h" 440 8 + 2506 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2507 "00000101" // /* MW 3 */ + 2508 "00100010" // /* MW 2 */ + 2509 "00010000" // /* MW 1 */ + 2510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2511 "00000000" // /* MW 1 */ + 2512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2513 "00000000" // /* MW 1 */ + 2514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2515 "00000000" // /* MW 1 */ + 2516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2517 "00000000" // /* MW 1 */ + 2518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2519 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 first + 2520 "00011000" // REL.COND r16, r17, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2521 "00011000" // /* MW 3 */ + 2522 "00010101" // /* MW 2 */ + 2523 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 41 +.src_ref 1 "io_buffer_compiler.h" 606 24 + 2524 "11010100" // LDA lr, [sp, #-12]; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2525 "01000001" // /* MW 5 */ + 2526 "10101111" // /* MW 4 */ + 2527 "00101101" // /* MW 3 */ + 2528 "10000111" // /* MW 2 */ + 2529 "11111110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 first + 2530 "10011000" // LDA r16, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2531 "00010110" // /* MW 3 */ + 2532 "11110110" // /* MW 2 */ + 2533 "00000000" // /* MW 1 */ + 2534 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2535 "10011001" // /* MW 3 */ + 2536 "11111011" // /* MW 2 */ + 2537 "00000111" // /* MW 1 */ + 2538 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2539 "00000000" // /* MW 1 */ + 2540 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2541 "11110001" // /* MW 3 */ + 2542 "11111101" // /* MW 2 */ + 2543 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 41 first + 2544 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2545 "00000001" // /* MW 5 */ + 2546 "00000000" // /* MW 4 */ + 2547 "00000000" // /* MW 3 */ + 2548 "11111000" // /* MW 2 */ + 2549 "11111111" // /* MW 1 */ + 2550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2551 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable2.cc" 41 + 2552 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 2553 "00000000" // /* MW 3 */ + 2554 "00101000" // /* MW 2 */ + 2555 "00010000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 first +.delay_slot + 2556 "10011000" // SUB r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2557 "00000001" // /* MW 3 */ + 2558 "01100011" // /* MW 2 */ + 2559 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.delay_slot + 2560 "00011000" // SEL.EQZ r16, r16, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2561 "00010010" // /* MW 3 */ + 2562 "00100001" // /* MW 2 */ + 2563 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 22 +.delay_slot + 2564 "10011000" // ST r16, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2565 "00010001" // /* MW 3 */ + 2566 "11110110" // /* MW 2 */ + 2567 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2569 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13kernelWrapperPPvjjjj__end +.label __Z13kernelWrapperPPvjjjj___func_end0 + 2571 "00000000" // /* MW 1 */ +.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv___func_begin0 +.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv +.function setup _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv +.src_ref 2 "reduce_base_c8.h" 218 first +.src_ref 2 "reduce_base_c8.h" 220 27 first +.src_ref 2 "reduce_base_c8.h" 290 63 +.src_ref 2 "reduce_base_c8.h" 348 46 +.function_start + 2576 "01110110" // LDA r3, [p1], #4; MOVS p3, p0; MOVX r6, #-5; MOV r0, p1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 2577 "01111000" // /* MW 11 */ + 2578 "01100000" // /* MW 10 */ + 2579 "00001001" // /* MW 9 */ + 2580 "01101000" // /* MW 8 */ + 2581 "01100111" // /* MW 7 */ + 2582 "00111110" // /* MW 6 */ + 2583 "10001011" // /* MW 5 */ + 2584 "10000000" // /* MW 4 */ + 2585 "11010011" // /* MW 3 */ + 2586 "10001110" // /* MW 2 */ + 2587 "00100011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 287 40 +.src_ref 2 "reduce_base_c8.h" 287 40 +.src_ref 2 "reduce_base_c8.h" 348 46 first + 2588 "10111010" // MOVA r7, #16; MOVX r2, #-24; ADD.NC p2, r0, #28 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2589 "00001000" // /* MW 9 */ + 2590 "00000111" // /* MW 8 */ + 2591 "00110000" // /* MW 7 */ + 2592 "00001001" // /* MW 6 */ + 2593 "00100101" // /* MW 5 */ + 2594 "00111110" // /* MW 4 */ + 2595 "00000000" // /* MW 3 */ + 2596 "00000111" // /* MW 2 */ + 2597 "00000010" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 +.src_ref 2 "reduce_base_c8.h" 293 77 +.src_ref 2 "reduce_base_c8.h" 298 44 +.src_ref 2 "reduce_base_c8.h" 299 40 +.src_ref 2 "reduce_base_c8.h" 300 59 +.src_ref 2 "reduce_base_c8.h" 326 79 + 2598 "10111010" // MOVA r30, #3; MOVX r1, #-3; MOV r0, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2599 "01111000" // /* MW 9 */ + 2600 "01100000" // /* MW 8 */ + 2601 "00001000" // /* MW 7 */ + 2602 "10101000" // /* MW 6 */ + 2603 "00010111" // /* MW 5 */ + 2604 "00111110" // /* MW 4 */ + 2605 "00000000" // /* MW 3 */ + 2606 "01111110" // /* MW 2 */ + 2607 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 276 57 +.src_ref 2 "reduce_base_c8.h" 301 81 +.src_ref 2 "reduce_base_c8.h" 305 77 + 2608 "10111010" // MOVA r5, #-1; MOVXM r4, #65528 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2609 "00010000" // /* MW 9 */ + 2610 "11111100" // /* MW 8 */ + 2611 "10001111" // /* MW 7 */ + 2612 "00111100" // /* MW 6 */ + 2613 "00000000" // /* MW 5 */ + 2614 "00000000" // /* MW 4 */ + 2615 "00000000" // /* MW 3 */ + 2616 "11100101" // /* MW 2 */ + 2617 "11111111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 218 +.src_ref 2 "reduce_base_c8.h" 280 76 +.src_ref 2 "reduce_base_c8.h" 312 98 + 2618 "10111010" // MOVA r16, #-4; PADDXM [sp], #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2619 "01110000" // /* MW 9 */ + 2620 "00000000" // /* MW 8 */ + 2621 "00000000" // /* MW 7 */ + 2622 "00000000" // /* MW 6 */ + 2623 "00000010" // /* MW 5 */ + 2624 "00000000" // /* MW 4 */ + 2625 "00000000" // /* MW 3 */ + 2626 "10010000" // /* MW 2 */ + 2627 "11111111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 298 44 first + 2628 "00011000" // ADD.NC p4, r0, #46 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2629 "00010111" // /* MW 3 */ + 2630 "01100000" // /* MW 2 */ + 2631 "00011100" // /* MW 1 */ + 2632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2633 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 220 25 first + 2634 "10011000" // ST r3, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2635 "01110001" // /* MW 3 */ + 2636 "00011100" // /* MW 2 */ + 2637 "00001000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 221 28 first + 2638 "10011000" // LDA r26, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2639 "01010110" // /* MW 3 */ + 2640 "00011111" // /* MW 2 */ + 2641 "00000001" // /* MW 1 */ + 2642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2643 "00000000" // /* MW 1 */ + 2644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2645 "00000000" // /* MW 1 */ + 2646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2647 "00000000" // /* MW 1 */ + 2648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2649 "00000000" // /* MW 1 */ + 2650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2651 "00000000" // /* MW 1 */ + 2652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2653 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 221 26 +.src_ref 2 "reduce_base_c8.h" 301 81 first + 2654 "01011100" // ST r26, [p0], #4; AND r17, r26, r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2655 "10001001" // /* MW 5 */ + 2656 "01000100" // /* MW 4 */ + 2657 "00111101" // /* MW 3 */ + 2658 "11101010" // /* MW 2 */ + 2659 "00000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 222 26 first +.src_ref 2 "reduce_base_c8.h" 293 58 first +.src_ref 2 "reduce_base_c8.h" 301 81 + 2660 "10111010" // LDA r29, [p1], #4; MUL r4, r3, r26; ADD.NC r22, r17, r4 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2661 "10101000" // /* MW 9 */ + 2662 "01001000" // /* MW 8 */ + 2663 "11001100" // /* MW 7 */ + 2664 "01111110" // /* MW 6 */ + 2665 "01001101" // /* MW 5 */ + 2666 "00000110" // /* MW 4 */ + 2667 "11010000" // /* MW 3 */ + 2668 "11110110" // /* MW 2 */ + 2669 "00100011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 290 63 first + 2670 "10011000" // LSHL r18, r26, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2671 "01101101" // /* MW 3 */ + 2672 "10100100" // /* MW 2 */ + 2673 "00010110" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 293 77 first + 2674 "10011000" // LSHL r6, r4, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2675 "00011101" // /* MW 3 */ + 2676 "00001100" // /* MW 2 */ + 2677 "00010001" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 290 41 first +.src_ref 2 "reduce_base_c8.h" 300 59 first + 2678 "00100100" // LSHL r17, r26, r1; ADD.NC r1, r18, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2679 "11111111" // /* MW 5 */ + 2680 "10110010" // /* MW 4 */ + 2681 "10110000" // /* MW 3 */ + 2682 "01000011" // /* MW 2 */ + 2683 "11010100" // /* MW 1 */ + 2684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2685 "00000000" // /* MW 1 */ + 2686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2687 "00000000" // /* MW 1 */ + 2688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2689 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 222 24 first +.src_ref 2 "reduce_base_c8.h" 287 40 first + 2690 "01011100" // ST r29, [p0], #4; MAC r7, r7, r29, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2691 "01001100" // /* MW 5 */ + 2692 "10011100" // /* MW 4 */ + 2693 "00111110" // /* MW 3 */ + 2694 "11110110" // /* MW 2 */ + 2695 "00000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 223 29 first +.src_ref 2 "reduce_base_c8.h" 312 60 first + 2696 "11111010" // LDA r2, [p1], #4; ST r29, [sp, #-4]; MUL r4, r29, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2697 "10101111" // /* MW 9 */ + 2698 "01001001" // /* MW 8 */ + 2699 "00000111" // /* MW 7 */ + 2700 "10000000" // /* MW 6 */ + 2701 "10110101" // /* MW 5 */ + 2702 "11111111" // /* MW 4 */ + 2703 "11010111" // /* MW 3 */ + 2704 "10001010" // /* MW 2 */ + 2705 "00100011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 305 57 first + 2706 "10011000" // MUL r20, r3, r29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2707 "11011111" // /* MW 3 */ + 2708 "11101001" // /* MW 2 */ + 2709 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 312 78 first + 2710 "10011000" // MUL r28, r3, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2711 "01001111" // /* MW 3 */ + 2712 "11111000" // /* MW 2 */ + 2713 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 299 40 first + 2714 "10011000" // LSHL r21, r29, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2715 "11101101" // /* MW 3 */ + 2716 "01101011" // /* MW 2 */ + 2717 "00010111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 276 57 first +.src_ref 2 "reduce_base_c8.h" 299 40 + 2718 "00100100" // LSHL r18, r29, r5; ADD.NC r27, r21, #-48 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2719 "11010000" // /* MW 5 */ + 2720 "10110101" // /* MW 4 */ + 2721 "10111101" // /* MW 3 */ + 2722 "10001011" // /* MW 2 */ + 2723 "11101100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 276 41 + 2724 "00011000" // ADD r23, r18, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2725 "11111111" // /* MW 3 */ + 2726 "10101111" // /* MW 2 */ + 2727 "00010100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 301 85 first + 2728 "10011000" // MUL r29, r29, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2729 "01101111" // /* MW 3 */ + 2730 "01111011" // /* MW 2 */ + 2731 "00010111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 223 27 first +.src_ref 2 "reduce_base_c8.h" 236 8 first + 2732 "01011100" // ST r2, [p0], #4; LT r24, r30, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2733 "01010101" // /* MW 5 */ + 2734 "01100000" // /* MW 4 */ + 2735 "00111111" // /* MW 3 */ + 2736 "10001010" // /* MW 2 */ + 2737 "00000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 224 33 first + 2738 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2739 "00101110" // /* MW 3 */ + 2740 "00011100" // /* MW 2 */ + 2741 "00000001" // /* MW 1 */ + 2742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2743 "00000000" // /* MW 1 */ + 2744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2745 "00000000" // /* MW 1 */ + 2746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2747 "00000000" // /* MW 1 */ + 2748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2749 "00000000" // /* MW 1 */ + 2750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2751 "00000000" // /* MW 1 */ + 2752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2753 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 224 31 +.src_ref 2 "reduce_base_c8.h" 318 64 + 2754 "00000010" // ST el0, [p0], #4; MOV r31, el0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2755 "01110000" // /* MW 7 */ + 2756 "00001110" // /* MW 6 */ + 2757 "11110000" // /* MW 5 */ + 2758 "00000011" // /* MW 4 */ + 2759 "00110000" // /* MW 3 */ + 2760 "10000101" // /* MW 2 */ + 2761 "00000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 225 34 first + 2762 "10011000" // LDA eh0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2763 "00001110" // /* MW 3 */ + 2764 "00000100" // /* MW 2 */ + 2765 "00000001" // /* MW 1 */ + 2766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2767 "00000000" // /* MW 1 */ + 2768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2769 "00000000" // /* MW 1 */ + 2770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2771 "00000000" // /* MW 1 */ + 2772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2773 "00000000" // /* MW 1 */ + 2774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2775 "00000000" // /* MW 1 */ + 2776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2777 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 225 32 +.src_ref 2 "reduce_base_c8.h" 318 64 + 2778 "00000010" // ST eh0, [p0]; MOV r25, eh0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2779 "01110000" // /* MW 7 */ + 2780 "10001110" // /* MW 6 */ + 2781 "00110000" // /* MW 5 */ + 2782 "00000011" // /* MW 4 */ + 2783 "00110000" // /* MW 3 */ + 2784 "10000001" // /* MW 2 */ + 2785 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 226 32 first + 2786 "10011000" // LDA r30, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2787 "11010110" // /* MW 3 */ + 2788 "00010111" // /* MW 2 */ + 2789 "00000001" // /* MW 1 */ + 2790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2791 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 first + 2792 "10000100" // JNZ r24, #2912 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=2912 delay_slots=5 */ + 2793 "00000001" // /* MW 5 */ + 2794 "01000000" // /* MW 4 */ + 2795 "10110000" // /* MW 3 */ + 2796 "00000101" // /* MW 2 */ + 2797 "11000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 312 98 first +.delay_slot + 2798 "10011000" // LSHL r19, r28, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2799 "00001101" // /* MW 3 */ + 2800 "00100111" // /* MW 2 */ + 2801 "00010111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 318 64 first +.delay_slot + 2802 "10011000" // MUL r25, r31, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2803 "10011111" // /* MW 3 */ + 2804 "11110011" // /* MW 2 */ + 2805 "00010111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 300 41 first +.src_ref 2 "reduce_base_c8.h" 305 77 first +.delay_slot + 2806 "00100100" // LSHL r20, r20, r5; ADD.NC r5, r17, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2807 "11111111" // /* MW 5 */ + 2808 "10110001" // /* MW 4 */ + 2809 "10110010" // /* MW 3 */ + 2810 "00001011" // /* MW 2 */ + 2811 "10100101" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 280 76 first +.delay_slot + 2812 "10011000" // LSHL r16, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2813 "00001101" // /* MW 3 */ + 2814 "00100001" // /* MW 2 */ + 2815 "00010001" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 226 30 first +.src_ref 2 "reduce_base_c8.h" 318 88 first +.delay_slot + 2816 "01011100" // ST r30, [p0, #4]; MUL r31, r25, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2817 "11011111" // /* MW 5 */ + 2818 "11111111" // /* MW 4 */ + 2819 "00111100" // /* MW 3 */ + 2820 "11111010" // /* MW 2 */ + 2821 "00000010" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2822 "00011000" // MOVX r28, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2823 "00000101" // /* MW 3 */ + 2824 "00111000" // /* MW 2 */ + 2825 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 first + 2826 "10011000" // EQ r28, r2, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2827 "11000111" // /* MW 3 */ + 2828 "10111001" // /* MW 2 */ + 2829 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2830 "10000100" // JNZ r28, #4032 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4032 delay_slots=5 */ + 2831 "00000001" // /* MW 5 */ + 2832 "01000000" // /* MW 4 */ + 2833 "11100000" // /* MW 3 */ + 2834 "00000111" // /* MW 2 */ + 2835 "11100000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2837 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2839 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2840 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2841 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2843 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 242 41 first +.delay_slot + 2844 "00011000" // ADD r22, r3, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2845 "11111111" // /* MW 3 */ + 2846 "11101101" // /* MW 2 */ + 2847 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2848 "00011000" // MOVX r17, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2849 "00001001" // /* MW 3 */ + 2850 "00100010" // /* MW 2 */ + 2851 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 first + 2852 "10011000" // EQ r17, r17, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2853 "00100111" // /* MW 3 */ + 2854 "01100010" // /* MW 2 */ + 2855 "00010100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2856 "10000100" // JNZ r17, #3904 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3904 delay_slots=5 */ + 2857 "00000001" // /* MW 5 */ + 2858 "01000000" // /* MW 4 */ + 2859 "10100000" // /* MW 3 */ + 2860 "00000111" // /* MW 2 */ + 2861 "10001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2863 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2865 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2867 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2869 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 +.delay_slot + 2870 "00011000" // MOVX r7, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2871 "00001101" // /* MW 3 */ + 2872 "00001110" // /* MW 2 */ + 2873 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2874 "10011000" // EQ r2, r7, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2875 "00100111" // /* MW 3 */ + 2876 "11000100" // /* MW 2 */ + 2877 "00010001" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2878 "10000100" // JNZ r2, #3744 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3744 delay_slots=5 */ + 2879 "00000001" // /* MW 5 */ + 2880 "01000000" // /* MW 4 */ + 2881 "01010000" // /* MW 3 */ + 2882 "00000111" // /* MW 2 */ + 2883 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2885 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2887 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2889 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2891 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2892 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2893 "00000000" // /* MW 1 */ + 2894 "10000100" // J #3552 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3552 delay_slots=5 */ + 2895 "00000000" // /* MW 5 */ + 2896 "00000000" // /* MW 4 */ + 2897 "11110000" // /* MW 3 */ + 2898 "00000110" // /* MW 2 */ + 2899 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 329 30 +.delay_slot + 2900 "00011000" // MOVX r26, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2901 "00010001" // /* MW 3 */ + 2902 "00110100" // /* MW 2 */ + 2903 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2905 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2907 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2909 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2911 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_336 +.src_ref 2 "reduce_base_c8.h" 236 8 + 2912 "00011000" // MOVX r29, #5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2913 "00010101" // /* MW 3 */ + 2914 "00111010" // /* MW 2 */ + 2915 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2916 "10011000" // LT r24, r29, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2917 "00101010" // /* MW 3 */ + 2918 "01110000" // /* MW 2 */ + 2919 "00010111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2920 "10000100" // JNZ r24, #3232 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3232 delay_slots=5 */ + 2921 "00000001" // /* MW 5 */ + 2922 "01000000" // /* MW 4 */ + 2923 "01010000" // /* MW 3 */ + 2924 "00000110" // /* MW 2 */ + 2925 "11000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2927 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2929 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2931 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2933 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 +.src_ref 2 "reduce_base_c8.h" 316 38 +.src_ref 2 "reduce_base_c8.h" 329 30 +.delay_slot + 2934 "00011000" // MOVX r26, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2935 "00010001" // /* MW 3 */ + 2936 "00110100" // /* MW 2 */ + 2937 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2938 "10011000" // EQ r17, r26, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2939 "00100111" // /* MW 3 */ + 2940 "10100010" // /* MW 2 */ + 2941 "00010110" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2942 "10000100" // JNZ r17, #3104 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3104 delay_slots=5 */ + 2943 "00000001" // /* MW 5 */ + 2944 "01000000" // /* MW 4 */ + 2945 "00010000" // /* MW 3 */ + 2946 "00000110" // /* MW 2 */ + 2947 "10001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2949 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2951 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2953 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2955 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2957 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2958 "10011000" // NE r2, r29, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2959 "00101000" // /* MW 3 */ + 2960 "01000100" // /* MW 2 */ + 2961 "00010111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 2962 "10000100" // JNZ r2, #3552 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3552 delay_slots=5 */ + 2963 "00000001" // /* MW 5 */ + 2964 "01000000" // /* MW 4 */ + 2965 "11110000" // /* MW 3 */ + 2966 "00000110" // /* MW 2 */ + 2967 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2969 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2971 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2973 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2975 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2977 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 286 44 first +.src_ref 2 "reduce_base_c8.h" 289 38 +.src_ref 2 "reduce_base_c8.h" 291 40 +.src_ref 2 "reduce_base_c8.h" 291 40 + 2978 "10111010" // ST.s16 r21, [p4], #2; MOVX r2, #16; MOV m0, #-20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2979 "01011000" // /* MW 9 */ + 2980 "11101100" // /* MW 8 */ + 2981 "00000111" // /* MW 7 */ + 2982 "00001000" // /* MW 6 */ + 2983 "00100010" // /* MW 5 */ + 2984 "00000000" // /* MW 4 */ + 2985 "11100000" // /* MW 3 */ + 2986 "11010110" // /* MW 2 */ + 2987 "10000011" // /* MW 1 */ + 2988 "11111000" // MOV r30, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2989 "10100000" // /* MW 3 */ + 2990 "10011100" // /* MW 2 */ + 2991 "00011111" // /* MW 1 */ + 2992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2993 "00000000" // /* MW 1 */ + 2994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2995 "00000000" // /* MW 1 */ + 2996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2997 "00000000" // /* MW 1 */ + 2998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2999 "00000000" // /* MW 1 */ + 3000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3001 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 287 38 first + 3002 "00011000" // ST.s16 r7, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3003 "11110111" // /* MW 3 */ + 3004 "00011100" // /* MW 2 */ + 3005 "00000100" // /* MW 1 */ + 3006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3007 "00000000" // /* MW 1 */ + 3008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3009 "00000000" // /* MW 1 */ + 3010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3011 "00000000" // /* MW 1 */ + 3012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3013 "00000000" // /* MW 1 */ + 3014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3015 "00000000" // /* MW 1 */ + 3016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3017 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 288 39 first + 3018 "00011000" // ST.s16 r23, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3019 "11110111" // /* MW 3 */ + 3020 "00011110" // /* MW 2 */ + 3021 "00000100" // /* MW 1 */ + 3022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3023 "00000000" // /* MW 1 */ + 3024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3025 "00000000" // /* MW 1 */ + 3026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3027 "00000000" // /* MW 1 */ + 3028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3029 "00000000" // /* MW 1 */ + 3030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3031 "00000000" // /* MW 1 */ + 3032 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3033 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 289 38 first + 3034 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3035 "01010111" // /* MW 3 */ + 3036 "00011100" // /* MW 2 */ + 3037 "00000100" // /* MW 1 */ + 3038 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3039 "00000000" // /* MW 1 */ + 3040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3041 "00000000" // /* MW 1 */ + 3042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3043 "00000000" // /* MW 1 */ + 3044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3045 "00000000" // /* MW 1 */ + 3046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3047 "00000000" // /* MW 1 */ + 3048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3049 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 290 39 first + 3050 "00011000" // ST.s16 r1, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3051 "00110111" // /* MW 3 */ + 3052 "00011100" // /* MW 2 */ + 3053 "00000100" // /* MW 1 */ + 3054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3055 "00000000" // /* MW 1 */ + 3056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3057 "00000000" // /* MW 1 */ + 3058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3059 "00000000" // /* MW 1 */ + 3060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3061 "00000000" // /* MW 1 */ + 3062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3063 "00000000" // /* MW 1 */ + 3064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3065 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 291 40 first + 3066 "00011000" // ST.s16 r2, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3067 "01010111" // /* MW 3 */ + 3068 "00001000" // /* MW 2 */ + 3069 "00000100" // /* MW 1 */ + 3070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3071 "00000000" // /* MW 1 */ + 3072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3073 "00000000" // /* MW 1 */ + 3074 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */ + 3075 "00000000" // /* MW 5 */ + 3076 "00000000" // /* MW 4 */ + 3077 "11101000" // /* MW 3 */ + 3078 "00000110" // /* MW 2 */ + 3079 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3080 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3081 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3083 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3085 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 292 38 first +.delay_slot + 3086 "10011000" // ST r18, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3087 "01010001" // /* MW 3 */ + 3088 "00000110" // /* MW 2 */ + 3089 "00001100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 293 38 first +.delay_slot + 3090 "00101110" // NOPA; ST r6, [p4, #4]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 3091 "00011100" // /* MW 13 */ + 3092 "00000000" // /* MW 12 */ + 3093 "00000000" // /* MW 11 */ + 3094 "01010111" // /* MW 10 */ + 3095 "00011010" // /* MW 9 */ + 3096 "01000000" // /* MW 8 */ + 3097 "00000000" // /* MW 7 */ + 3098 "00000000" // /* MW 6 */ + 3099 "10100011" // /* MW 5 */ + 3100 "00101001" // /* MW 4 */ + 3101 "11111000" // /* MW 3 */ + 3102 "00101100" // /* MW 2 */ + 3103 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_528 +.src_ref 2 "reduce_base_c8.h" 274 44 first +.src_ref 2 "reduce_base_c8.h" 275 40 +.src_ref 2 "reduce_base_c8.h" 275 40 + 3104 "10111010" // ST.s16 r4, [p4], #2; MOVX r6, #-3; MOV r2, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3105 "01011000" // /* MW 9 */ + 3106 "00010000" // /* MW 8 */ + 3107 "01001000" // /* MW 7 */ + 3108 "10101000" // /* MW 6 */ + 3109 "01100111" // /* MW 5 */ + 3110 "00111110" // /* MW 4 */ + 3111 "11100000" // /* MW 3 */ + 3112 "10010010" // /* MW 2 */ + 3113 "10000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 275 40 first +.src_ref 2 "reduce_base_c8.h" 279 40 + 3114 "10111010" // MOVA m0, #-20; MAC r2, r2, r6, r4; MOV r30, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3115 "01111000" // /* MW 9 */ + 3116 "00001110" // /* MW 8 */ + 3117 "11010000" // /* MW 7 */ + 3118 "00110011" // /* MW 6 */ + 3119 "00100010" // /* MW 5 */ + 3120 "00001100" // /* MW 4 */ + 3121 "10000000" // /* MW 3 */ + 3122 "10000000" // /* MW 2 */ + 3123 "11111101" // /* MW 1 */ + 3124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3125 "00000000" // /* MW 1 */ + 3126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3127 "00000000" // /* MW 1 */ + 3128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3129 "00000000" // /* MW 1 */ + 3130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3131 "00000000" // /* MW 1 */ + 3132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3133 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 275 38 + 3134 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3135 "01010111" // /* MW 3 */ + 3136 "00011100" // /* MW 2 */ + 3137 "00000100" // /* MW 1 */ + 3138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3139 "00000000" // /* MW 1 */ + 3140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3141 "00000000" // /* MW 1 */ + 3142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3143 "00000000" // /* MW 1 */ + 3144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3145 "00000000" // /* MW 1 */ + 3146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3147 "00000000" // /* MW 1 */ + 3148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3149 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 276 39 first + 3150 "00011000" // ST.s16 r23, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3151 "11110111" // /* MW 3 */ + 3152 "00011110" // /* MW 2 */ + 3153 "00000100" // /* MW 1 */ + 3154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3155 "00000000" // /* MW 1 */ + 3156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3157 "00000000" // /* MW 1 */ + 3158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3159 "00000000" // /* MW 1 */ + 3160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3161 "00000000" // /* MW 1 */ + 3162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3163 "00000000" // /* MW 1 */ + 3164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3165 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 275 38 first +.src_ref 2 "reduce_base_c8.h" 277 38 first + 3166 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3167 "01010111" // /* MW 3 */ + 3168 "00011100" // /* MW 2 */ + 3169 "00000100" // /* MW 1 */ + 3170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3171 "00000000" // /* MW 1 */ + 3172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3173 "00000000" // /* MW 1 */ + 3174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3175 "00000000" // /* MW 1 */ + 3176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3177 "00000000" // /* MW 1 */ + 3178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3179 "00000000" // /* MW 1 */ + 3180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3181 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 278 39 first + 3182 "00011000" // ST.s16 r5, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3183 "10110111" // /* MW 3 */ + 3184 "00011100" // /* MW 2 */ + 3185 "00000100" // /* MW 1 */ + 3186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3187 "00000000" // /* MW 1 */ + 3188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3189 "00000000" // /* MW 1 */ + 3190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3191 "00000000" // /* MW 1 */ + 3192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3193 "00000000" // /* MW 1 */ + 3194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3195 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3197 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 279 40 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3198 "00011000" // ST.s16 r1, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3199 "00110111" // /* MW 3 */ + 3200 "00001000" // /* MW 2 */ + 3201 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3203 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3205 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3206 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */ + 3207 "00000000" // /* MW 5 */ + 3208 "00000000" // /* MW 4 */ + 3209 "11101000" // /* MW 3 */ + 3210 "00000110" // /* MW 2 */ + 3211 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 279 40 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3212 "00011000" // MOVX r1, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3213 "01000001" // /* MW 3 */ + 3214 "00000010" // /* MW 2 */ + 3215 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3216 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3217 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3219 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 281 38 first +.delay_slot + 3220 "10011000" // ST r3, [p4, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3221 "01110001" // /* MW 3 */ + 3222 "00010100" // /* MW 2 */ + 3223 "00001100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 280 38 first +.delay_slot + 3224 "00000010" // ST r16, [p4]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3225 "01110000" // /* MW 7 */ + 3226 "10100101" // /* MW 6 */ + 3227 "00000001" // /* MW 5 */ + 3228 "00000000" // /* MW 4 */ + 3229 "00110000" // /* MW 3 */ + 3230 "11000010" // /* MW 2 */ + 3231 "10000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_656 +.src_ref 2 "reduce_base_c8.h" 236 8 +.src_ref 2 "reduce_base_c8.h" 301 40 +.src_ref 2 "reduce_base_c8.h" 302 76 + 3232 "00101100" // LDA r3, [sp, #-4]; MOVX r4, #6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3233 "00110010" // /* MW 5 */ + 3234 "00010000" // /* MW 4 */ + 3235 "00100000" // /* MW 3 */ + 3236 "10001110" // /* MW 2 */ + 3237 "11111111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 first + 3238 "10011000" // EQ r4, r2, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3239 "01000111" // /* MW 3 */ + 3240 "10001000" // /* MW 2 */ + 3241 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 3242 "10000100" // JNZ r4, #3408 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3408 delay_slots=5 */ + 3243 "00000001" // /* MW 5 */ + 3244 "01000000" // /* MW 4 */ + 3245 "10101000" // /* MW 3 */ + 3246 "00000110" // /* MW 2 */ + 3247 "00100000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 298 44 +.src_ref 2 "reduce_base_c8.h" 303 40 +.src_ref 2 "reduce_base_c8.h" 310 44 +.src_ref 2 "reduce_base_c8.h" 311 38 +.delay_slot + 3248 "00011000" // MOVX r1, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3249 "01000001" // /* MW 3 */ + 3250 "00000010" // /* MW 2 */ + 3251 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3253 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3255 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3256 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3257 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3259 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 3260 "00011000" // MOVX r3, #7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3261 "00011101" // /* MW 3 */ + 3262 "00000110" // /* MW 2 */ + 3263 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 3264 "10011000" // NE r2, r3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3265 "00101000" // /* MW 3 */ + 3266 "11000100" // /* MW 2 */ + 3267 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 236 8 + 3268 "10000100" // JNZ r2, #3552 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3552 delay_slots=5 */ + 3269 "00000001" // /* MW 5 */ + 3270 "01000000" // /* MW 4 */ + 3271 "11110000" // /* MW 3 */ + 3272 "00000110" // /* MW 2 */ + 3273 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3275 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3277 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3279 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3281 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3283 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 310 44 first +.src_ref 2 "reduce_base_c8.h" 312 41 first +.src_ref 2 "reduce_base_c8.h" 315 40 + 3284 "10111010" // ST.s16 r1, [p4], #2; ADD r2, r19, #-1; MOV m0, #-20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3285 "01011000" // /* MW 9 */ + 3286 "11101100" // /* MW 8 */ + 3287 "00000111" // /* MW 7 */ + 3288 "11111000" // /* MW 6 */ + 3289 "00101111" // /* MW 5 */ + 3290 "00100110" // /* MW 4 */ + 3291 "11100000" // /* MW 3 */ + 3292 "10000110" // /* MW 2 */ + 3293 "10000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 313 38 +.src_ref 2 "reduce_base_c8.h" 317 97 + 3294 "10111010" // MOVA r3, #-6; MOVXM dj0, #65536 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3295 "00010000" // /* MW 9 */ + 3296 "00000000" // /* MW 8 */ + 3297 "01000000" // /* MW 7 */ + 3298 "01000000" // /* MW 6 */ + 3299 "00000000" // /* MW 5 */ + 3300 "00000000" // /* MW 4 */ + 3301 "00000000" // /* MW 3 */ + 3302 "01000011" // /* MW 2 */ + 3303 "11111111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 315 40 +.src_ref 2 "reduce_base_c8.h" 317 97 first + 3304 "01100100" // LSHL r3, r28, r3; MOV r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3305 "00000001" // /* MW 5 */ + 3306 "00100000" // /* MW 4 */ + 3307 "10111100" // /* MW 3 */ + 3308 "11000111" // /* MW 2 */ + 3309 "11100000" // /* MW 1 */ + 3310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3311 "00000000" // /* MW 1 */ + 3312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3313 "00000000" // /* MW 1 */ + 3314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3315 "00000000" // /* MW 1 */ + 3316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3317 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 311 38 first + 3318 "00011000" // ST.s16 r1, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3319 "00110111" // /* MW 3 */ + 3320 "00011100" // /* MW 2 */ + 3321 "00000100" // /* MW 1 */ + 3322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3323 "00000000" // /* MW 1 */ + 3324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3325 "00000000" // /* MW 1 */ + 3326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3327 "00000000" // /* MW 1 */ + 3328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3329 "00000000" // /* MW 1 */ + 3330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3331 "00000000" // /* MW 1 */ + 3332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3333 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 312 39 first + 3334 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3335 "01010111" // /* MW 3 */ + 3336 "00011100" // /* MW 2 */ + 3337 "00000100" // /* MW 1 */ + 3338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3339 "00000000" // /* MW 1 */ + 3340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3341 "00000000" // /* MW 1 */ + 3342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3343 "00000000" // /* MW 1 */ + 3344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3345 "00000000" // /* MW 1 */ + 3346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3347 "00000000" // /* MW 1 */ + 3348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3349 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 313 38 first + 3350 "10011000" // ST dj0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3351 "01000001" // /* MW 3 */ + 3352 "00011100" // /* MW 2 */ + 3353 "00001100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 315 40 first + 3354 "00011000" // ST.s16 r24, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3355 "00010111" // /* MW 3 */ + 3356 "00001011" // /* MW 2 */ + 3357 "00000100" // /* MW 1 */ + 3358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3359 "00000000" // /* MW 1 */ + 3360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3361 "00000000" // /* MW 1 */ + 3362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3363 "00000000" // /* MW 1 */ + 3364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3365 "00000000" // /* MW 1 */ + 3366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3367 "00000000" // /* MW 1 */ + 3368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3369 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 316 38 first + 3370 "10011000" // ST r26, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3371 "01010001" // /* MW 3 */ + 3372 "00000111" // /* MW 2 */ + 3373 "00001100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 317 38 first + 3374 "10011000" // ST r3, [p4, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3375 "01110001" // /* MW 3 */ + 3376 "00010100" // /* MW 2 */ + 3377 "00001100" // /* MW 1 */ + 3378 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */ + 3379 "00000000" // /* MW 5 */ + 3380 "00000000" // /* MW 4 */ + 3381 "11101000" // /* MW 3 */ + 3382 "00000110" // /* MW 2 */ + 3383 "00000000" // /* MW 1 */ +.delay_slot + 3384 "11111000" // MOV r30, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3385 "10100000" // /* MW 3 */ + 3386 "10011111" // /* MW 2 */ + 3387 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3389 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3391 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3393 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3394 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 3395 "00011100" // /* MW 13 */ + 3396 "00000000" // /* MW 12 */ + 3397 "00000000" // /* MW 11 */ + 3398 "01010111" // /* MW 10 */ + 3399 "00011010" // /* MW 9 */ + 3400 "01000000" // /* MW 8 */ + 3401 "00000000" // /* MW 7 */ + 3402 "00000000" // /* MW 6 */ + 3403 "10110110" // /* MW 5 */ + 3404 "00000010" // /* MW 4 */ + 3405 "11110000" // /* MW 3 */ + 3406 "00101100" // /* MW 2 */ + 3407 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_832 +.src_ref 2 "reduce_base_c8.h" 298 44 first +.src_ref 2 "reduce_base_c8.h" 301 40 +.src_ref 2 "reduce_base_c8.h" 301 40 first + 3408 "10111010" // ST.s16 r1, [p4], #2; MSC r2, r2, r3, r22; MOV r2, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3409 "01011000" // /* MW 9 */ + 3410 "00010000" // /* MW 8 */ + 3411 "01001000" // /* MW 7 */ + 3412 "01110000" // /* MW 6 */ + 3413 "00101011" // /* MW 5 */ + 3414 "00000110" // /* MW 4 */ + 3415 "11100000" // /* MW 3 */ + 3416 "10000110" // /* MW 2 */ + 3417 "10000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 302 76 +.src_ref 2 "reduce_base_c8.h" 303 40 +.src_ref 2 "reduce_base_c8.h" 306 62 + 3418 "10111010" // MOVA m0, #-20; MOVX r4, #-3; MOV r6, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3419 "01111000" // /* MW 9 */ + 3420 "00001110" // /* MW 8 */ + 3421 "11010000" // /* MW 7 */ + 3422 "10101000" // /* MW 6 */ + 3423 "01000111" // /* MW 5 */ + 3424 "00111110" // /* MW 4 */ + 3425 "10000000" // /* MW 3 */ + 3426 "10000000" // /* MW 2 */ + 3427 "11111101" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 302 76 first + 3428 "10011000" // LSHL r4, r3, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3429 "01001101" // /* MW 3 */ + 3430 "11001000" // /* MW 2 */ + 3431 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 302 41 +.src_ref 2 "reduce_base_c8.h" 306 62 first + 3432 "00100100" // MUL r30, r30, r6; ADD.NC r3, r4, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3433 "11111111" // /* MW 5 */ + 3434 "10100100" // /* MW 4 */ + 3435 "11110001" // /* MW 3 */ + 3436 "10001101" // /* MW 2 */ + 3437 "11110111" // /* MW 1 */ + 3438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3439 "00000000" // /* MW 1 */ + 3440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3441 "00000000" // /* MW 1 */ + 3442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3443 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 299 38 first + 3444 "00011000" // ST.s16 r27, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3445 "01110111" // /* MW 3 */ + 3446 "00011111" // /* MW 2 */ + 3447 "00000100" // /* MW 1 */ + 3448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3449 "00000000" // /* MW 1 */ + 3450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3451 "00000000" // /* MW 1 */ + 3452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3453 "00000000" // /* MW 1 */ + 3454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3455 "00000000" // /* MW 1 */ + 3456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3457 "00000000" // /* MW 1 */ + 3458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3459 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 300 39 first + 3460 "00011000" // ST.s16 r5, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3461 "10110111" // /* MW 3 */ + 3462 "00011100" // /* MW 2 */ + 3463 "00000100" // /* MW 1 */ + 3464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3465 "00000000" // /* MW 1 */ + 3466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3467 "00000000" // /* MW 1 */ + 3468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3469 "00000000" // /* MW 1 */ + 3470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3471 "00000000" // /* MW 1 */ + 3472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3473 "00000000" // /* MW 1 */ + 3474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3475 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 301 38 first + 3476 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3477 "01010111" // /* MW 3 */ + 3478 "00011100" // /* MW 2 */ + 3479 "00000100" // /* MW 1 */ + 3480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3481 "00000000" // /* MW 1 */ + 3482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3483 "00000000" // /* MW 1 */ + 3484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3485 "00000000" // /* MW 1 */ + 3486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3487 "00000000" // /* MW 1 */ + 3488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3489 "00000000" // /* MW 1 */ + 3490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3491 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 302 39 first + 3492 "00011000" // ST.s16 r3, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3493 "01110111" // /* MW 3 */ + 3494 "00011100" // /* MW 2 */ + 3495 "00000100" // /* MW 1 */ + 3496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3497 "00000000" // /* MW 1 */ + 3498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3499 "00000000" // /* MW 1 */ + 3500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3501 "00000000" // /* MW 1 */ + 3502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3503 "00000000" // /* MW 1 */ + 3504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3505 "00000000" // /* MW 1 */ + 3506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3507 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 303 40 first + 3508 "00011000" // ST.s16 r1, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3509 "00110111" // /* MW 3 */ + 3510 "00001000" // /* MW 2 */ + 3511 "00000100" // /* MW 1 */ + 3512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3513 "00000000" // /* MW 1 */ + 3514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3515 "00000000" // /* MW 1 */ + 3516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3517 "00000000" // /* MW 1 */ + 3518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3519 "00000000" // /* MW 1 */ + 3520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3521 "00000000" // /* MW 1 */ + 3522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3523 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 304 38 first + 3524 "10011000" // ST r17, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3525 "00110001" // /* MW 3 */ + 3526 "00000110" // /* MW 2 */ + 3527 "00001100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 305 38 first + 3528 "00000010" // ST r20, [p4, #4]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3529 "01110000" // /* MW 7 */ + 3530 "10100101" // /* MW 6 */ + 3531 "00000001" // /* MW 5 */ + 3532 "00000000" // /* MW 4 */ + 3533 "00110000" // /* MW 3 */ + 3534 "11010010" // /* MW 2 */ + 3535 "10000010" // /* MW 1 */ +.label __ll42__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv + 3536 "10111000" // MOV dj0, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3537 "01000000" // /* MW 3 */ + 3538 "10000000" // /* MW 2 */ + 3539 "00011000" // /* MW 1 */ + 3540 "00110110" // ST.s16 r30, [p3, dj0]; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3541 "10000001" // /* MW 11 */ + 3542 "10101101" // /* MW 10 */ + 3543 "00000000" // /* MW 9 */ + 3544 "00000000" // /* MW 8 */ + 3545 "00000000" // /* MW 7 */ + 3546 "00000000" // /* MW 6 */ + 3547 "00100000" // /* MW 5 */ + 3548 "00000000" // /* MW 4 */ + 3549 "11100000" // /* MW 3 */ + 3550 "01111010" // /* MW 2 */ + 3551 "01100000" // /* MW 1 */ +.label __ll70__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv +.src_ref 2 "reduce_base_c8.h" 326 79 first +.src_ref 2 "reduce_base_c8.h" 329 51 + 3552 "00010100" // MOVA m2, #24; ADD.NC p0, r0, #30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3553 "00011110" // /* MW 5 */ + 3554 "11000000" // /* MW 4 */ + 3555 "10000000" // /* MW 3 */ + 3556 "00001000" // /* MW 2 */ + 3557 "00000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 329 30 +.src_ref 2 "reduce_base_c8.h" 330 26 +.src_ref 3 "reduce_mean_c8_impl.h" 139 51 first + 3558 "10111010" // LDA r2, [p2], #4; MOVX r0, #16; MOV m0, #-30 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3559 "01011000" // /* MW 9 */ + 3560 "11100010" // /* MW 8 */ + 3561 "00000111" // /* MW 7 */ + 3562 "00001000" // /* MW 6 */ + 3563 "00000010" // /* MW 5 */ + 3564 "00000000" // /* MW 4 */ + 3565 "11010000" // /* MW 3 */ + 3566 "10001010" // /* MW 2 */ + 3567 "01000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 331 24 +.src_ref 3 "reduce_mean_c8_impl.h" 140 34 first + 3568 "01010100" // LDA.s16 r3, [p2]; MOV m1, #38 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3569 "10011001" // /* MW 5 */ + 3570 "00000000" // /* MW 4 */ + 3571 "01010010" // /* MW 3 */ + 3572 "10001110" // /* MW 2 */ + 3573 "01000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 141 49 first + 3574 "10011000" // LDA r1, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3575 "00110110" // /* MW 3 */ + 3576 "00010100" // /* MW 2 */ + 3577 "00000010" // /* MW 1 */ + 3578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3579 "00000000" // /* MW 1 */ + 3580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3581 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 326 28 first + 3582 "00011000" // ST.s16 r31, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3583 "11110111" // /* MW 3 */ + 3584 "00101111" // /* MW 2 */ + 3585 "00000000" // /* MW 1 */ + 3586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3587 "00000000" // /* MW 1 */ + 3588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3589 "00000000" // /* MW 1 */ + 3590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3591 "00000000" // /* MW 1 */ + 3592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3593 "00000000" // /* MW 1 */ + 3594 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3595 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 3596 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3597 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 327 31 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 3598 "00011000" // ST.s16 r24, [p0], #10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3599 "00010111" // /* MW 3 */ + 3600 "01011111" // /* MW 2 */ + 3601 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3603 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3605 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3607 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3608 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3609 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 327 31 +.src_ref 2 "reduce_base_c8.h" 328 23 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3610 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3611 "00000001" // /* MW 3 */ + 3612 "00110000" // /* MW 2 */ + 3613 "00010000" // /* MW 1 */ + 3614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3615 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 328 23 first + 3616 "00011000" // ST.s16 r24, [p0], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3617 "00010111" // /* MW 3 */ + 3618 "11001111" // /* MW 2 */ + 3619 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 329 51 first + 3620 "10011000" // LDA.u16 r4, [p0], m2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3621 "10011010" // /* MW 3 */ + 3622 "01001000" // /* MW 2 */ + 3623 "00000000" // /* MW 1 */ + 3624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3625 "00000000" // /* MW 1 */ + 3626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3627 "00000000" // /* MW 1 */ + 3628 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3629 "00000000" // /* MW 1 */ + 3630 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3631 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 3632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3633 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 329 28 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 3634 "00011000" // ST.s16 r0, [p0], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3635 "00010111" // /* MW 3 */ + 3636 "11111100" // /* MW 2 */ + 3637 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 329 30 +.src_ref 2 "reduce_base_c8.h" 330 28 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3638 "00100100" // LSHL r4, r4, r26; ADD.NC r5, r4, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3639 "11111111" // /* MW 5 */ + 3640 "10100100" // /* MW 4 */ + 3641 "10110010" // /* MW 3 */ + 3642 "00110101" // /* MW 2 */ + 3643 "00100001" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 329 30 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3644 "10011000" // SUB r0, r0, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3645 "01000001" // /* MW 3 */ + 3646 "00000000" // /* MW 2 */ + 3647 "00010000" // /* MW 1 */ + 3648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3649 "00000000" // /* MW 1 */ + 3650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3651 "00000000" // /* MW 1 */ + 3652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3653 "00000000" // /* MW 1 */ + 3654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3655 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 330 26 first + 3656 "00011000" // ST.s16 r5, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3657 "10110111" // /* MW 3 */ + 3658 "00001000" // /* MW 2 */ + 3659 "00000000" // /* MW 1 */ + 3660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3661 "00000000" // /* MW 1 */ + 3662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3663 "00000000" // /* MW 1 */ + 3664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3665 "00000000" // /* MW 1 */ + 3666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3667 "00000000" // /* MW 1 */ + 3668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3669 "00000000" // /* MW 1 */ + 3670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3671 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 331 24 first + 3672 "00011000" // ST.s16 r19, [p0], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3673 "01110111" // /* MW 3 */ + 3674 "00101010" // /* MW 2 */ + 3675 "00000000" // /* MW 1 */ + 3676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3677 "00000000" // /* MW 1 */ + 3678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3679 "00000000" // /* MW 1 */ + 3680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3681 "00000000" // /* MW 1 */ + 3682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3683 "00000000" // /* MW 1 */ + 3684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3685 "00000000" // /* MW 1 */ + 3686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3687 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 139 40 first + 3688 "00011000" // ST.s8 r2, [p0], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3689 "01000111" // /* MW 3 */ + 3690 "11101100" // /* MW 2 */ + 3691 "00000000" // /* MW 1 */ + 3692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3693 "00000000" // /* MW 1 */ + 3694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3695 "00000000" // /* MW 1 */ + 3696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3697 "00000000" // /* MW 1 */ + 3698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3699 "00000000" // /* MW 1 */ + 3700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3701 "00000000" // /* MW 1 */ + 3702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3703 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 140 34 first + 3704 "00011000" // ST.s16 r3, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3705 "01110111" // /* MW 3 */ + 3706 "00000100" // /* MW 2 */ + 3707 "00000000" // /* MW 1 */ + 3708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3709 "00000000" // /* MW 1 */ + 3710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3711 "00000000" // /* MW 1 */ + 3712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3713 "00000000" // /* MW 1 */ + 3714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3715 "00000000" // /* MW 1 */ + 3716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3717 "00000000" // /* MW 1 */ + 3718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3719 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 141 38 first + 3720 "00011000" // ST.s8 r1, [p0, #-2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3721 "00100111" // /* MW 3 */ + 3722 "11100100" // /* MW 2 */ + 3723 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 349 4 first + 3724 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 3725 "00000000" // /* MW 3 */ + 3726 "00101000" // /* MW 2 */ + 3727 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 349 4 +.delay_slot + 3728 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3729 "00000001" // /* MW 5 */ + 3730 "00000000" // /* MW 4 */ + 3731 "00000000" // /* MW 3 */ + 3732 "11111000" // /* MW 2 */ + 3733 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3735 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3736 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3737 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3738 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3739 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3740 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3741 "01100111" // /* MW 3 */ + 3742 "00000001" // /* MW 2 */ + 3743 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_1168 +.src_ref 2 "reduce_base_c8.h" 262 44 first +.src_ref 2 "reduce_base_c8.h" 263 77 + 3744 "10111010" // ST.s16 r21, [p4], #2; MOVXM r5, #65512 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3745 "00010000" // /* MW 9 */ + 3746 "11110100" // /* MW 8 */ + 3747 "10101111" // /* MW 7 */ + 3748 "00111100" // /* MW 6 */ + 3749 "00000000" // /* MW 5 */ + 3750 "00000000" // /* MW 4 */ + 3751 "11100000" // /* MW 3 */ + 3752 "11010110" // /* MW 2 */ + 3753 "10000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 263 56 +.src_ref 2 "reduce_base_c8.h" 263 77 first +.src_ref 2 "reduce_base_c8.h" 267 40 + 3754 "10111010" // LDA r2, [sp, #-4]; ADD r7, r5, r26; MOV m0, #-20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3755 "01011000" // /* MW 9 */ + 3756 "11101100" // /* MW 8 */ + 3757 "00000111" // /* MW 7 */ + 3758 "00000100" // /* MW 6 */ + 3759 "01111101" // /* MW 5 */ + 3760 "00001010" // /* MW 4 */ + 3761 "00100000" // /* MW 3 */ + 3762 "10001010" // /* MW 2 */ + 3763 "11111111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 265 118 +.src_ref 2 "reduce_base_c8.h" 329 30 + 3764 "10111010" // MOVA r26, #4; MOVXM r6, #65535 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3765 "10010000" // /* MW 9 */ + 3766 "11111111" // /* MW 8 */ + 3767 "11001111" // /* MW 7 */ + 3768 "00111100" // /* MW 6 */ + 3769 "00000000" // /* MW 5 */ + 3770 "00000000" // /* MW 4 */ + 3771 "00000000" // /* MW 3 */ + 3772 "10011010" // /* MW 2 */ + 3773 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 265 118 first + 3774 "10011000" // ADD r17, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3775 "01100000" // /* MW 3 */ + 3776 "11100010" // /* MW 2 */ + 3777 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 265 98 +.src_ref 2 "reduce_base_c8.h" 267 116 first + 3778 "00011000" // MAC r29, r29, r17, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3779 "01000110" // /* MW 3 */ + 3780 "01111010" // /* MW 2 */ + 3781 "00010100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 265 60 first +.src_ref 2 "reduce_base_c8.h" 265 98 first + 3782 "00011000" // MSC r21, r21, r17, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3783 "01001110" // /* MW 3 */ + 3784 "01101010" // /* MW 2 */ + 3785 "00010100" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 3786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3787 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 263 38 first +.aggressive_scheduled_block_id 4 +.noswbrkpt + 3788 "00011000" // ST.s16 r2, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3789 "01010111" // /* MW 3 */ + 3790 "00011100" // /* MW 2 */ + 3791 "00000100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 263 56 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3792 "10011000" // MUL r2, r7, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3793 "00101111" // /* MW 3 */ + 3794 "11000100" // /* MW 2 */ + 3795 "00010001" // /* MW 1 */ + 3796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3797 "00000000" // /* MW 1 */ + 3798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3799 "00000000" // /* MW 1 */ + 3800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3801 "00000000" // /* MW 1 */ + 3802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3803 "00000000" // /* MW 1 */ + 3804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3805 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 264 39 first + 3806 "00011000" // ST.s16 r22, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3807 "11010111" // /* MW 3 */ + 3808 "00011110" // /* MW 2 */ + 3809 "00000100" // /* MW 1 */ + 3810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3811 "00000000" // /* MW 1 */ + 3812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3813 "00000000" // /* MW 1 */ + 3814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3815 "00000000" // /* MW 1 */ + 3816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3817 "00000000" // /* MW 1 */ + 3818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3819 "00000000" // /* MW 1 */ + 3820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3821 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 265 38 first + 3822 "00011000" // ST.s16 r21, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3823 "10110111" // /* MW 3 */ + 3824 "00011110" // /* MW 2 */ + 3825 "00000100" // /* MW 1 */ + 3826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3827 "00000000" // /* MW 1 */ + 3828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3829 "00000000" // /* MW 1 */ + 3830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3831 "00000000" // /* MW 1 */ + 3832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3833 "00000000" // /* MW 1 */ + 3834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3835 "00000000" // /* MW 1 */ + 3836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3837 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 266 39 first + 3838 "00011000" // ST.s16 r1, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3839 "00110111" // /* MW 3 */ + 3840 "00011100" // /* MW 2 */ + 3841 "00000100" // /* MW 1 */ + 3842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3843 "00000000" // /* MW 1 */ + 3844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3845 "00000000" // /* MW 1 */ + 3846 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3847 "00000000" // /* MW 1 */ + 3848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3849 "00000000" // /* MW 1 */ + 3850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3851 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 3852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3853 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 267 40 first +.aggressive_scheduled_block_id 5 +.noswbrkpt + 3854 "00011000" // ST.s16 r2, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3855 "01010111" // /* MW 3 */ + 3856 "00001000" // /* MW 2 */ + 3857 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3859 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3861 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3862 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */ + 3863 "00000000" // /* MW 5 */ + 3864 "00000000" // /* MW 4 */ + 3865 "11101000" // /* MW 3 */ + 3866 "00000110" // /* MW 2 */ + 3867 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 267 42 +.delay_slot +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3868 "00011000" // MOVX r5, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3869 "01000001" // /* MW 3 */ + 3870 "00001010" // /* MW 2 */ + 3871 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 267 42 +.delay_slot +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3872 "10011000" // SUB r2, r5, r29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3873 "11010001" // /* MW 3 */ + 3874 "01000101" // /* MW 2 */ + 3875 "00010001" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 270 64 +.delay_slot + 3876 "11111000" // MOV r6, eh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3877 "00011100" // /* MW 3 */ + 3878 "10100001" // /* MW 2 */ + 3879 "00011001" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 268 38 first +.delay_slot + 3880 "00000010" // ST r3, [p4]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3881 "01110000" // /* MW 7 */ + 3882 "10100101" // /* MW 6 */ + 3883 "00000001" // /* MW 5 */ + 3884 "00000000" // /* MW 4 */ + 3885 "00110000" // /* MW 3 */ + 3886 "10001110" // /* MW 2 */ + 3887 "10000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 269 38 first +.src_ref 2 "reduce_base_c8.h" 270 64 first +.delay_slot + 3888 "11100001" // NOPA; NOPB; ST r16, [p4, #4]; MUL r30, r30, r6; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3889 "00000000" // /* MW 15 */ + 3890 "00000000" // /* MW 14 */ + 3891 "01111000" // /* MW 13 */ + 3892 "10100101" // /* MW 12 */ + 3893 "00000001" // /* MW 11 */ + 3894 "01111100" // /* MW 10 */ + 3895 "11100011" // /* MW 9 */ + 3896 "10111101" // /* MW 8 */ + 3897 "00010001" // /* MW 7 */ + 3898 "00010110" // /* MW 6 */ + 3899 "00100100" // /* MW 5 */ + 3900 "00000000" // /* MW 4 */ + 3901 "11110000" // /* MW 3 */ + 3902 "00101100" // /* MW 2 */ + 3903 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_1328 +.src_ref 2 "reduce_base_c8.h" 250 44 +.src_ref 2 "reduce_base_c8.h" 250 44 first +.src_ref 2 "reduce_base_c8.h" 255 40 + 3904 "10111010" // ST.s16 r4, [p4], #2; MOVX r4, #16; MOV m0, #-20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3905 "01011000" // /* MW 9 */ + 3906 "11101100" // /* MW 8 */ + 3907 "00000111" // /* MW 7 */ + 3908 "00001000" // /* MW 6 */ + 3909 "01000010" // /* MW 5 */ + 3910 "00000000" // /* MW 4 */ + 3911 "11100000" // /* MW 3 */ + 3912 "10010010" // /* MW 2 */ + 3913 "10000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 255 42 +.src_ref 2 "reduce_base_c8.h" 255 113 +.src_ref 2 "reduce_base_c8.h" 255 113 +.src_ref 2 "reduce_base_c8.h" 255 113 first + 3914 "10111010" // LDA r1, [sp, #-4]; MSC r2, r2, r3, r26; MOV r2, #8 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3915 "01011000" // /* MW 9 */ + 3916 "00001000" // /* MW 8 */ + 3917 "01001000" // /* MW 7 */ + 3918 "01110000" // /* MW 6 */ + 3919 "00101101" // /* MW 5 */ + 3920 "00000110" // /* MW 4 */ + 3921 "00100000" // /* MW 3 */ + 3922 "10000110" // /* MW 2 */ + 3923 "11111111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 255 42 +.src_ref 2 "reduce_base_c8.h" 255 113 +.src_ref 2 "reduce_base_c8.h" 329 30 + 3924 "01100100" // MOVX r3, #16; MOV r26, #4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3925 "00010001" // /* MW 5 */ + 3926 "00100000" // /* MW 4 */ + 3927 "00101101" // /* MW 3 */ + 3928 "11001000" // /* MW 2 */ + 3929 "00000000" // /* MW 1 */ + 3930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3931 "00000000" // /* MW 1 */ + 3932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3933 "00000000" // /* MW 1 */ + 3934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3935 "00000000" // /* MW 1 */ + 3936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3937 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 251 38 first + 3938 "00011000" // ST.s16 r27, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3939 "01110111" // /* MW 3 */ + 3940 "00011111" // /* MW 2 */ + 3941 "00000100" // /* MW 1 */ + 3942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3943 "00000000" // /* MW 1 */ + 3944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3945 "00000000" // /* MW 1 */ + 3946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3947 "00000000" // /* MW 1 */ + 3948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3949 "00000000" // /* MW 1 */ + 3950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3951 "00000000" // /* MW 1 */ + 3952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3953 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 252 39 first + 3954 "00011000" // ST.s16 r5, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3955 "10110111" // /* MW 3 */ + 3956 "00011100" // /* MW 2 */ + 3957 "00000100" // /* MW 1 */ + 3958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3959 "00000000" // /* MW 1 */ + 3960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3961 "00000000" // /* MW 1 */ + 3962 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3963 "00000000" // /* MW 1 */ + 3964 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3965 "00000000" // /* MW 1 */ + 3966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3967 "00000000" // /* MW 1 */ + 3968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3969 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 253 38 first + 3970 "00011000" // ST.s16 r27, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3971 "01110111" // /* MW 3 */ + 3972 "00011111" // /* MW 2 */ + 3973 "00000100" // /* MW 1 */ + 3974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3975 "00000000" // /* MW 1 */ + 3976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3977 "00000000" // /* MW 1 */ + 3978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3979 "00000000" // /* MW 1 */ + 3980 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3981 "00000000" // /* MW 1 */ + 3982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3983 "00000000" // /* MW 1 */ + 3984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3985 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 254 39 first + 3986 "00011000" // ST.s16 r22, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3987 "11010111" // /* MW 3 */ + 3988 "00011110" // /* MW 2 */ + 3989 "00000100" // /* MW 1 */ + 3990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3991 "00000000" // /* MW 1 */ + 3992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3993 "00000000" // /* MW 1 */ + 3994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3995 "00000000" // /* MW 1 */ + 3996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3997 "00000000" // /* MW 1 */ + 3998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3999 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 4000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4001 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 255 40 first +.aggressive_scheduled_block_id 6 +.noswbrkpt + 4002 "00011000" // ST.s16 r3, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4003 "01110111" // /* MW 3 */ + 4004 "00001000" // /* MW 2 */ + 4005 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 4006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4007 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 4008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4009 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 4010 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */ + 4011 "00000000" // /* MW 5 */ + 4012 "00000000" // /* MW 4 */ + 4013 "11101000" // /* MW 3 */ + 4014 "00000110" // /* MW 2 */ + 4015 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 255 42 +.src_ref 2 "reduce_base_c8.h" 255 113 +.delay_slot +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4016 "00011000" // MAC r3, r3, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4017 "00100110" // /* MW 3 */ + 4018 "01000110" // /* MW 2 */ + 4019 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4021 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4023 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 256 38 first +.delay_slot + 4024 "10011000" // ST r6, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4025 "11010001" // /* MW 3 */ + 4026 "00000100" // /* MW 2 */ + 4027 "00001100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 257 38 first +.delay_slot + 4028 "10011000" // ST r18, [p4, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4029 "01010001" // /* MW 3 */ + 4030 "00010110" // /* MW 2 */ + 4031 "00001100" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv_1456 +.src_ref 2 "reduce_base_c8.h" 238 44 first + 4032 "00011000" // ST.s16 r21, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4033 "10110111" // /* MW 3 */ + 4034 "00011110" // /* MW 2 */ + 4035 "00000100" // /* MW 1 */ + 4036 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4037 "00000000" // /* MW 1 */ + 4038 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4039 "00000000" // /* MW 1 */ + 4040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4041 "00000000" // /* MW 1 */ + 4042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4043 "00000000" // /* MW 1 */ + 4044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4045 "00000000" // /* MW 1 */ + 4046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4047 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 239 38 first + 4048 "00011000" // ST.s16 r7, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4049 "11110111" // /* MW 3 */ + 4050 "00011100" // /* MW 2 */ + 4051 "00000100" // /* MW 1 */ + 4052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4053 "00000000" // /* MW 1 */ + 4054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4055 "00000000" // /* MW 1 */ + 4056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4057 "00000000" // /* MW 1 */ + 4058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4059 "00000000" // /* MW 1 */ + 4060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4061 "00000000" // /* MW 1 */ + 4062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4063 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 240 39 first + 4064 "00011000" // ST.s16 r23, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4065 "11110111" // /* MW 3 */ + 4066 "00011110" // /* MW 2 */ + 4067 "00000100" // /* MW 1 */ + 4068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4069 "00000000" // /* MW 1 */ + 4070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4071 "00000000" // /* MW 1 */ + 4072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4073 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 241 40 +.src_ref 2 "reduce_base_c8.h" 241 94 + 4074 "00011000" // LDA r3, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4075 "01110001" // /* MW 3 */ + 4076 "11111100" // /* MW 2 */ + 4077 "00000111" // /* MW 1 */ + 4078 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4079 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 4080 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4081 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 241 38 first +.aggressive_scheduled_block_id 7 +.noswbrkpt + 4082 "00011000" // ST.s16 r1, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4083 "00110111" // /* MW 3 */ + 4084 "00011100" // /* MW 2 */ + 4085 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 4086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4087 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 241 94 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 4088 "01000100" // MOVXM r1, #65504 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4089 "11000000" // /* MW 5 */ + 4090 "10111111" // /* MW 4 */ + 4091 "11110000" // /* MW 3 */ + 4092 "00000000" // /* MW 2 */ + 4093 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 241 94 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 4094 "10011000" // ADD r2, r1, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4095 "10100000" // /* MW 3 */ + 4096 "01000101" // /* MW 2 */ + 4097 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 241 40 +.src_ref 2 "reduce_base_c8.h" 241 40 +.src_ref 2 "reduce_base_c8.h" 241 94 +.src_ref 2 "reduce_base_c8.h" 241 94 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4098 "01100100" // MAC r1, r1, r3, r2; MOV r1, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4099 "01000001" // /* MW 5 */ + 4100 "10100000" // /* MW 4 */ + 4101 "11000000" // /* MW 3 */ + 4102 "01000100" // /* MW 2 */ + 4103 "00011000" // /* MW 1 */ + 4104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4105 "00000000" // /* MW 1 */ + 4106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4107 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 242 39 first + 4108 "00011000" // ST.s16 r22, [p4], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4109 "11010111" // /* MW 3 */ + 4110 "00011110" // /* MW 2 */ + 4111 "00000100" // /* MW 1 */ + 4112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4113 "00000000" // /* MW 1 */ + 4114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4115 "00000000" // /* MW 1 */ + 4116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4117 "00000000" // /* MW 1 */ + 4118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4119 "00000000" // /* MW 1 */ + 4120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4121 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 243 40 +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 4122 "10111000" // MOV m0, #-20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4123 "11011000" // /* MW 3 */ + 4124 "00001111" // /* MW 2 */ + 4125 "00011000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 243 40 first +.aggressive_scheduled_block_id 8 +.noswbrkpt + 4126 "00011000" // ST.s16 r5, [p4], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4127 "10110111" // /* MW 3 */ + 4128 "00001000" // /* MW 2 */ + 4129 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 4130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4131 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 4132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4133 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 4134 "10000100" // J #3536 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=3536 delay_slots=5 */ + 4135 "00000000" // /* MW 5 */ + 4136 "00000000" // /* MW 4 */ + 4137 "11101000" // /* MW 3 */ + 4138 "00000110" // /* MW 2 */ + 4139 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 243 42 +.src_ref 2 "reduce_base_c8.h" 243 42 +.src_ref 2 "reduce_base_c8.h" 243 91 +.src_ref 2 "reduce_base_c8.h" 243 91 +.delay_slot +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4140 "01100100" // MSC r5, r5, r22, r4; MOV r5, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4141 "01000001" // /* MW 5 */ + 4142 "10100000" // /* MW 4 */ + 4143 "11000010" // /* MW 3 */ + 4144 "01001001" // /* MW 2 */ + 4145 "10110001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4147 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4149 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 244 38 first +.delay_slot + 4150 "10011000" // ST r20, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4151 "10010001" // /* MW 3 */ + 4152 "00000110" // /* MW 2 */ + 4153 "00001100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 245 38 first +.src_ref 2 "reduce_base_c8.h" 329 30 +.delay_slot + 4154 "00111010" // ST r17, [p4, #4]; MOVX r26, #4; MOV r30, eh0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4155 "01111001" // /* MW 9 */ + 4156 "10001110" // /* MW 8 */ + 4157 "11010000" // /* MW 7 */ + 4158 "10001011" // /* MW 6 */ + 4159 "10100000" // /* MW 5 */ + 4160 "00000001" // /* MW 4 */ + 4161 "00110000" // /* MW 3 */ + 4162 "11000110" // /* MW 2 */ +.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv__end +.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE5setupER18reduce_c8_params_tIS4_EPKv___func_end0 + 4163 "10000010" // /* MW 1 */ +.label __Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t___func_begin0 +.label _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t +.function pad_3d<(pad_3d_mode)0, bfloat16, 1> _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t +.src_ref 3 "pad_3d.h" 266 first +.src_ref 3 "pad_3d.h" 465 37 first +.src_ref 3 "pad_3d.h" 468 21 first +.src_ref 3 "pad_3d.h" 471 29 +.src_ref 3 "pad_3d.h" 479 21 +.function_start + 4176 "10111010" // LDA r0, [p2, #4]; MOVX r4, #-2; MOV m1, #-24 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4177 "01011000" // /* MW 9 */ + 4178 "11101000" // /* MW 8 */ + 4179 "10000111" // /* MW 7 */ + 4180 "11001000" // /* MW 6 */ + 4181 "01000111" // /* MW 5 */ + 4182 "00111110" // /* MW 4 */ + 4183 "11010000" // /* MW 3 */ + 4184 "10000010" // /* MW 2 */ + 4185 "01000010" // /* MW 1 */ +.src_ref 4 "array_helpers.hpp" 950 13 +.src_ref 3 "pad_3d.h" 469 21 first +.src_ref 3 "pad_3d.h" 478 21 +.src_ref 3 "pad_3d.h" 499 52 +.src_ref 3 "pad_3d.h" 511 25 + 4186 "10111010" // LDA r1, [p2], #8; MOVX r2, #-3; MOV r16, #6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4187 "01011000" // /* MW 9 */ + 4188 "00000110" // /* MW 8 */ + 4189 "00001000" // /* MW 7 */ + 4190 "10101010" // /* MW 6 */ + 4191 "00100111" // /* MW 5 */ + 4192 "00111110" // /* MW 4 */ + 4193 "11010000" // /* MW 3 */ + 4194 "10000110" // /* MW 2 */ + 4195 "01000101" // /* MW 1 */ +.src_ref 4 "array_helpers.hpp" 950 13 +.src_ref 3 "pad_3d.h" 470 21 first +.src_ref 3 "pad_3d.h" 486 26 +.src_ref 3 "pad_3d.h" 498 10 +.src_ref 3 "pad_3d.h" 499 26 +.src_ref 3 "pad_3d.h" 509 21 +.src_ref 3 "pad_3d.h" 517 22 + 4196 "10111010" // LDA r5, [p2], #28; MOVX r24, #0; MOV r3, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4197 "01111000" // /* MW 9 */ + 4198 "01100000" // /* MW 8 */ + 4199 "01101000" // /* MW 7 */ + 4200 "00001000" // /* MW 6 */ + 4201 "10000000" // /* MW 5 */ + 4202 "00000001" // /* MW 4 */ + 4203 "11010000" // /* MW 3 */ + 4204 "10010110" // /* MW 2 */ + 4205 "01001111" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 471 29 first + 4206 "10011000" // LDA.s16 r18, [p2], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4207 "01010010" // /* MW 3 */ + 4208 "00101010" // /* MW 2 */ + 4209 "00000010" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 472 25 first + 4210 "10011000" // LDA r6, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4211 "11010110" // /* MW 3 */ + 4212 "00011100" // /* MW 2 */ + 4213 "00000010" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 473 26 first + 4214 "10011000" // LDA r7, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4215 "11110110" // /* MW 3 */ + 4216 "00101100" // /* MW 2 */ + 4217 "00000010" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 475 24 first + 4218 "10011000" // LDA r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4219 "00110110" // /* MW 3 */ + 4220 "00000110" // /* MW 2 */ + 4221 "00000010" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 479 21 first + 4222 "10011000" // ASHL r19, r0, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4223 "01001110" // /* MW 3 */ + 4224 "00100110" // /* MW 2 */ + 4225 "00010000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 477 23 first + 4226 "10011000" // LDA r4, [p2, #8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4227 "10010110" // /* MW 3 */ + 4228 "00100100" // /* MW 2 */ + 4229 "00000010" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 478 21 first + 4230 "10011000" // ASHL r20, r5, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4231 "00101110" // /* MW 3 */ + 4232 "01101000" // /* MW 2 */ + 4233 "00010001" // /* MW 1 */ +.src_ref 5 "broadcast.hpp" 56 25 first + 4234 "11111000" // VBCST.16 x0, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4235 "01110010" // /* MW 3 */ + 4236 "01001001" // /* MW 2 */ + 4237 "00011000" // /* MW 1 */ + 4238 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4239 "00000000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 485 45 first + 4240 "10011000" // MUL r18, r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4241 "01001111" // /* MW 3 */ + 4242 "11100101" // /* MW 2 */ + 4243 "00010100" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 485 34 + 4244 "10011000" // SUB r19, r1, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4245 "00010001" // /* MW 3 */ + 4246 "01100111" // /* MW 2 */ + 4247 "00010000" // /* MW 1 */ +.src_ref 4 "array_helpers.hpp" 998 25 first + 4248 "10011000" // MUL r19, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4249 "00101111" // /* MW 3 */ + 4250 "11100111" // /* MW 2 */ + 4251 "00010100" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 486 43 first + 4252 "10011000" // MUL r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4253 "00101111" // /* MW 3 */ + 4254 "01100011" // /* MW 2 */ + 4255 "00010100" // /* MW 1 */ +.src_ref 4 "array_helpers.hpp" 950 13 first + 4256 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4257 "00001101" // /* MW 3 */ + 4258 "11100001" // /* MW 2 */ + 4259 "00010100" // /* MW 1 */ +.src_ref 4 "array_helpers.hpp" 950 13 +.src_ref 3 "pad_3d.h" 486 26 first + 4260 "10100100" // GE r16, r24, r17; ADD.NC p2, r3, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4261 "10000010" // /* MW 5 */ + 4262 "11000011" // /* MW 4 */ + 4263 "00110100" // /* MW 3 */ + 4264 "00100011" // /* MW 2 */ + 4265 "11000100" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 486 4 + 4266 "10000100" // JNZ r16, #4416 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4416 delay_slots=5 */ + 4267 "00000001" // /* MW 5 */ + 4268 "01000000" // /* MW 4 */ + 4269 "10100000" // /* MW 3 */ + 4270 "00001000" // /* MW 2 */ + 4271 "10000000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 487 22 +.delay_slot + 4272 "11111000" // VMOV bmll0, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4273 "10010010" // /* MW 3 */ + 4274 "00000000" // /* MW 2 */ + 4275 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4277 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4279 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4281 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4283 "00000000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 486 4 + 4284 "01000100" // MOVXM ls, #4400 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4285 "01100000" // /* MW 5 */ + 4286 "11100010" // /* MW 4 */ + 4287 "00010001" // /* MW 3 */ + 4288 "00000000" // /* MW 2 */ + 4289 "00000000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 486 4 + 4290 "01000100" // MOVXM le, #4400 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4291 "01100000" // /* MW 5 */ + 4292 "11100010" // /* MW 4 */ + 4293 "00010110" // /* MW 3 */ + 4294 "00000000" // /* MW 2 */ + 4295 "00000000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 486 4 + 4296 "00000010" // NOPS; MOV lc, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4297 "01110000" // /* MW 7 */ + 4298 "01010000" // /* MW 6 */ + 4299 "10111100" // /* MW 5 */ + 4300 "00000010" // /* MW 4 */ + 4301 "01100000" // /* MW 3 */ + 4302 "00101011" // /* MW 2 */ + 4303 "00000000" // /* MW 1 */ + 4304 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4305 "00000000" // /* MW 15 */ + 4306 "00000000" // /* MW 14 */ + 4307 "01111000" // /* MW 13 */ + 4308 "10100101" // /* MW 12 */ + 4309 "00000001" // /* MW 11 */ + 4310 "00000000" // /* MW 10 */ + 4311 "00000000" // /* MW 9 */ + 4312 "00000000" // /* MW 8 */ + 4313 "01011011" // /* MW 7 */ + 4314 "00000001" // /* MW 6 */ + 4315 "00100000" // /* MW 5 */ + 4316 "00000000" // /* MW 4 */ + 4317 "11110000" // /* MW 3 */ + 4318 "00101100" // /* MW 2 */ + 4319 "00000000" // /* MW 1 */ + 4320 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4321 "00000000" // /* MW 15 */ + 4322 "00000000" // /* MW 14 */ + 4323 "01111000" // /* MW 13 */ + 4324 "10100101" // /* MW 12 */ + 4325 "00000001" // /* MW 11 */ + 4326 "00000000" // /* MW 10 */ + 4327 "00000000" // /* MW 9 */ + 4328 "00000000" // /* MW 8 */ + 4329 "01011011" // /* MW 7 */ + 4330 "00000001" // /* MW 6 */ + 4331 "00100000" // /* MW 5 */ + 4332 "00000000" // /* MW 4 */ + 4333 "11110000" // /* MW 3 */ + 4334 "00101100" // /* MW 2 */ + 4335 "00000000" // /* MW 1 */ + 4336 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4337 "00000000" // /* MW 15 */ + 4338 "00000000" // /* MW 14 */ + 4339 "01111000" // /* MW 13 */ + 4340 "10100101" // /* MW 12 */ + 4341 "00000001" // /* MW 11 */ + 4342 "00000000" // /* MW 10 */ + 4343 "00000000" // /* MW 9 */ + 4344 "00000000" // /* MW 8 */ + 4345 "01011011" // /* MW 7 */ + 4346 "00000001" // /* MW 6 */ + 4347 "00100000" // /* MW 5 */ + 4348 "00000000" // /* MW 4 */ + 4349 "11110000" // /* MW 3 */ + 4350 "00101100" // /* MW 2 */ + 4351 "00000000" // /* MW 1 */ + 4352 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4353 "00000000" // /* MW 15 */ + 4354 "00000000" // /* MW 14 */ + 4355 "01111000" // /* MW 13 */ + 4356 "10100101" // /* MW 12 */ + 4357 "00000001" // /* MW 11 */ + 4358 "00000000" // /* MW 10 */ + 4359 "00000000" // /* MW 9 */ + 4360 "00000000" // /* MW 8 */ + 4361 "01011011" // /* MW 7 */ + 4362 "00000001" // /* MW 6 */ + 4363 "00100000" // /* MW 5 */ + 4364 "00000000" // /* MW 4 */ + 4365 "11110000" // /* MW 3 */ + 4366 "00101100" // /* MW 2 */ + 4367 "00000000" // /* MW 1 */ + 4368 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4369 "00000000" // /* MW 15 */ + 4370 "00000000" // /* MW 14 */ + 4371 "01111000" // /* MW 13 */ + 4372 "10100101" // /* MW 12 */ + 4373 "00000001" // /* MW 11 */ + 4374 "00000000" // /* MW 10 */ + 4375 "00000000" // /* MW 9 */ + 4376 "00000000" // /* MW 8 */ + 4377 "01011011" // /* MW 7 */ + 4378 "00000001" // /* MW 6 */ + 4379 "00100000" // /* MW 5 */ + 4380 "00000000" // /* MW 4 */ + 4381 "11110000" // /* MW 3 */ + 4382 "00101100" // /* MW 2 */ + 4383 "00000000" // /* MW 1 */ + 4384 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4385 "00000000" // /* MW 15 */ + 4386 "00000000" // /* MW 14 */ + 4387 "01111000" // /* MW 13 */ + 4388 "10100101" // /* MW 12 */ + 4389 "00000001" // /* MW 11 */ + 4390 "00000000" // /* MW 10 */ + 4391 "00000000" // /* MW 9 */ + 4392 "00000000" // /* MW 8 */ + 4393 "01011011" // /* MW 7 */ + 4394 "00000001" // /* MW 6 */ + 4395 "00100000" // /* MW 5 */ + 4396 "00000000" // /* MW 4 */ + 4397 "11110000" // /* MW 3 */ + 4398 "00101100" // /* MW 2 */ + 4399 "00000000" // /* MW 1 */ +.label ZLS_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_224 +.src_ref 3 "pad_3d.h" 487 22 first +.begin_of_loop +.end_of_loop +.loop_nesting 1 + 4400 "11100001" // NOPA; NOPB; VST bmll0, [p2], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4401 "00000000" // /* MW 15 */ + 4402 "00000000" // /* MW 14 */ + 4403 "01111000" // /* MW 13 */ + 4404 "10100101" // /* MW 12 */ + 4405 "00000001" // /* MW 11 */ + 4406 "00000000" // /* MW 10 */ + 4407 "00000000" // /* MW 9 */ + 4408 "10000000" // /* MW 8 */ + 4409 "00000110" // /* MW 7 */ + 4410 "00011100" // /* MW 6 */ + 4411 "00100010" // /* MW 5 */ + 4412 "00000000" // /* MW 4 */ + 4413 "11110000" // /* MW 3 */ + 4414 "00101100" // /* MW 2 */ + 4415 "00000000" // /* MW 1 */ +.label TGT_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_240 +.src_ref 3 "pad_3d.h" 495 21 +.src_ref 3 "pad_3d.h" 495 40 first +.src_ref 3 "pad_3d.h" 498 10 +.src_ref 3 "pad_3d.h" 499 38 first +.loop_nesting 0 + 4416 "10111010" // MOVA r6, #4; MUL r16, r5, r1; ADD.NC r17, r7, r6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4417 "10101000" // /* MW 9 */ + 4418 "11001100" // /* MW 8 */ + 4419 "00101001" // /* MW 7 */ + 4420 "11111110" // /* MW 6 */ + 4421 "00000000" // /* MW 5 */ + 4422 "00001011" // /* MW 4 */ + 4423 "00000000" // /* MW 3 */ + 4424 "10000110" // /* MW 2 */ + 4425 "00000000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 495 40 +.src_ref 3 "pad_3d.h" 496 29 first + 4426 "00100100" // SUB r17, r0, r17; ADD.NC dn1, r7, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4427 "11111111" // /* MW 5 */ + 4428 "10000111" // /* MW 4 */ + 4429 "00110010" // /* MW 3 */ + 4430 "01100010" // /* MW 2 */ + 4431 "00000100" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 495 21 first + 4432 "10011000" // LSHL r17, r17, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4433 "01101101" // /* MW 3 */ + 4434 "01100010" // /* MW 2 */ + 4435 "00010100" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 495 58 +.src_ref 3 "pad_3d.h" 498 23 first + 4436 "00100100" // SUB r17, r0, r7; ADD.NC m1, r17, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4437 "00010000" // /* MW 5 */ + 4438 "00010001" // /* MW 4 */ + 4439 "00110010" // /* MW 3 */ + 4440 "01001110" // /* MW 2 */ + 4441 "00000100" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 499 45 first + 4442 "10011000" // MUL r16, r7, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4443 "00001111" // /* MW 3 */ + 4444 "11100001" // /* MW 2 */ + 4445 "00010001" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 498 10 first + 4446 "10011000" // LSHL r6, r17, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4447 "01101101" // /* MW 3 */ + 4448 "01001100" // /* MW 2 */ + 4449 "00010100" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 498 10 +.src_ref 3 "pad_3d.h" 499 52 first + 4450 "10100100" // ASHL r6, r16, r2; ADD.NC p2, r3, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4451 "00110010" // /* MW 5 */ + 4452 "11000011" // /* MW 4 */ + 4453 "11010100" // /* MW 3 */ + 4454 "10000101" // /* MW 2 */ + 4455 "10000001" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 499 26 + 4456 "10011000" // GE r7, r24, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4457 "01101001" // /* MW 3 */ + 4458 "00001110" // /* MW 2 */ + 4459 "00010110" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 499 4 + 4460 "10000100" // JNZ r7, #4624 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4624 delay_slots=5 */ + 4461 "00000001" // /* MW 5 */ + 4462 "01000000" // /* MW 4 */ + 4463 "00001000" // /* MW 3 */ + 4464 "00001001" // /* MW 2 */ + 4465 "00111000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4467 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4469 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4471 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4473 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4475 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 3 "pad_3d.h" 499 4 + 4476 "10111010" // MOVA dc1, #0; MOVXM ls, #4608 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4477 "00010000" // /* MW 9 */ + 4478 "00000000" // /* MW 8 */ + 4479 "01111001" // /* MW 7 */ + 4480 "00000100" // /* MW 6 */ + 4481 "00000000" // /* MW 5 */ + 4482 "00000000" // /* MW 4 */ + 4483 "10000000" // /* MW 3 */ + 4484 "00000111" // /* MW 2 */ + 4485 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 3 "pad_3d.h" 499 4 + 4486 "10111010" // MOVA dj1, #16; MOVXM le, #4608 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4487 "00010000" // /* MW 9 */ + 4488 "00000000" // /* MW 8 */ + 4489 "10111001" // /* MW 7 */ + 4490 "00000101" // /* MW 6 */ + 4491 "00000000" // /* MW 5 */ + 4492 "00000000" // /* MW 4 */ + 4493 "10000000" // /* MW 3 */ + 4494 "00000110" // /* MW 2 */ + 4495 "00000010" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 499 4 + 4496 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV lc, r6; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4497 "00000000" // /* MW 15 */ + 4498 "00000000" // /* MW 14 */ + 4499 "01111000" // /* MW 13 */ + 4500 "10010000" // /* MW 12 */ + 4501 "10111001" // /* MW 11 */ + 4502 "00000010" // /* MW 10 */ + 4503 "00000000" // /* MW 9 */ + 4504 "00000000" // /* MW 8 */ + 4505 "01011011" // /* MW 7 */ + 4506 "00000001" // /* MW 6 */ + 4507 "00100000" // /* MW 5 */ + 4508 "00000000" // /* MW 4 */ + 4509 "11110000" // /* MW 3 */ + 4510 "00101100" // /* MW 2 */ + 4511 "00000000" // /* MW 1 */ + 4512 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4513 "00000000" // /* MW 15 */ + 4514 "00000000" // /* MW 14 */ + 4515 "01111000" // /* MW 13 */ + 4516 "10100101" // /* MW 12 */ + 4517 "00000001" // /* MW 11 */ + 4518 "00000000" // /* MW 10 */ + 4519 "00000000" // /* MW 9 */ + 4520 "00000000" // /* MW 8 */ + 4521 "01011011" // /* MW 7 */ + 4522 "00000001" // /* MW 6 */ + 4523 "00100000" // /* MW 5 */ + 4524 "00000000" // /* MW 4 */ + 4525 "11110000" // /* MW 3 */ + 4526 "00101100" // /* MW 2 */ + 4527 "00000000" // /* MW 1 */ + 4528 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4529 "00000000" // /* MW 15 */ + 4530 "00000000" // /* MW 14 */ + 4531 "01111000" // /* MW 13 */ + 4532 "10100101" // /* MW 12 */ + 4533 "00000001" // /* MW 11 */ + 4534 "00000000" // /* MW 10 */ + 4535 "00000000" // /* MW 9 */ + 4536 "00000000" // /* MW 8 */ + 4537 "01011011" // /* MW 7 */ + 4538 "00000001" // /* MW 6 */ + 4539 "00100000" // /* MW 5 */ + 4540 "00000000" // /* MW 4 */ + 4541 "11110000" // /* MW 3 */ + 4542 "00101100" // /* MW 2 */ + 4543 "00000000" // /* MW 1 */ + 4544 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4545 "00000000" // /* MW 15 */ + 4546 "00000000" // /* MW 14 */ + 4547 "01111000" // /* MW 13 */ + 4548 "10100101" // /* MW 12 */ + 4549 "00000001" // /* MW 11 */ + 4550 "00000000" // /* MW 10 */ + 4551 "00000000" // /* MW 9 */ + 4552 "00000000" // /* MW 8 */ + 4553 "01011011" // /* MW 7 */ + 4554 "00000001" // /* MW 6 */ + 4555 "00100000" // /* MW 5 */ + 4556 "00000000" // /* MW 4 */ + 4557 "11110000" // /* MW 3 */ + 4558 "00101100" // /* MW 2 */ + 4559 "00000000" // /* MW 1 */ + 4560 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4561 "00000000" // /* MW 15 */ + 4562 "00000000" // /* MW 14 */ + 4563 "01111000" // /* MW 13 */ + 4564 "10100101" // /* MW 12 */ + 4565 "00000001" // /* MW 11 */ + 4566 "00000000" // /* MW 10 */ + 4567 "00000000" // /* MW 9 */ + 4568 "00000000" // /* MW 8 */ + 4569 "01011011" // /* MW 7 */ + 4570 "00000001" // /* MW 6 */ + 4571 "00100000" // /* MW 5 */ + 4572 "00000000" // /* MW 4 */ + 4573 "11110000" // /* MW 3 */ + 4574 "00101100" // /* MW 2 */ + 4575 "00000000" // /* MW 1 */ + 4576 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4577 "00000000" // /* MW 15 */ + 4578 "00000000" // /* MW 14 */ + 4579 "01111000" // /* MW 13 */ + 4580 "10100101" // /* MW 12 */ + 4581 "00000001" // /* MW 11 */ + 4582 "00000000" // /* MW 10 */ + 4583 "00000000" // /* MW 9 */ + 4584 "00000000" // /* MW 8 */ + 4585 "01011011" // /* MW 7 */ + 4586 "00000001" // /* MW 6 */ + 4587 "00100000" // /* MW 5 */ + 4588 "00000000" // /* MW 4 */ + 4589 "11110000" // /* MW 3 */ + 4590 "00101100" // /* MW 2 */ + 4591 "00000000" // /* MW 1 */ + 4592 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4593 "00000000" // /* MW 15 */ + 4594 "00000000" // /* MW 14 */ + 4595 "01111000" // /* MW 13 */ + 4596 "10100101" // /* MW 12 */ + 4597 "00000001" // /* MW 11 */ + 4598 "00000000" // /* MW 10 */ + 4599 "00000000" // /* MW 9 */ + 4600 "00000000" // /* MW 8 */ + 4601 "01011011" // /* MW 7 */ + 4602 "00000001" // /* MW 6 */ + 4603 "00100000" // /* MW 5 */ + 4604 "00000000" // /* MW 4 */ + 4605 "11110000" // /* MW 3 */ + 4606 "00101100" // /* MW 2 */ + 4607 "00000000" // /* MW 1 */ +.label ZLS_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_432 +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 5 "vector.hpp" 1159 33 first +.begin_of_loop +.end_of_loop +.loop_nesting 1 + 4608 "11100001" // NOPA; NOPB; VST.2D.128 wl0, [p2], d1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4609 "00000000" // /* MW 15 */ + 4610 "00000000" // /* MW 14 */ + 4611 "01111000" // /* MW 13 */ + 4612 "10100101" // /* MW 12 */ + 4613 "00000001" // /* MW 11 */ + 4614 "00000000" // /* MW 10 */ + 4615 "00000000" // /* MW 9 */ + 4616 "00000000" // /* MW 8 */ + 4617 "00101110" // /* MW 7 */ + 4618 "00110000" // /* MW 6 */ + 4619 "00100010" // /* MW 5 */ + 4620 "00000000" // /* MW 4 */ + 4621 "11110000" // /* MW 3 */ + 4622 "00101100" // /* MW 2 */ + 4623 "00000000" // /* MW 1 */ +.label TGT_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_448 +.src_ref 3 "pad_3d.h" 514 39 +.loop_nesting 0 + 4624 "01000100" // MOVXM r7, #2147483640 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4625 "11110000" // /* MW 5 */ + 4626 "10111111" // /* MW 4 */ + 4627 "11110011" // /* MW 3 */ + 4628 "11111111" // /* MW 2 */ + 4629 "01111111" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 514 39 first + 4630 "10011000" // AND r7, r7, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4631 "01000100" // /* MW 3 */ + 4632 "11001110" // /* MW 2 */ + 4633 "00010001" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 514 35 + 4634 "10011000" // SUB r7, r5, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4635 "01110001" // /* MW 3 */ + 4636 "01001110" // /* MW 2 */ + 4637 "00010001" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 514 55 + 4638 "10011000" // MUL r7, r7, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4639 "00001111" // /* MW 3 */ + 4640 "11001110" // /* MW 2 */ + 4641 "00010001" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 511 25 first + 4642 "10011000" // ASHL r2, r4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4643 "00101110" // /* MW 3 */ + 4644 "00000100" // /* MW 2 */ + 4645 "00010001" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 509 36 first + 4646 "10011000" // SUB r4, r5, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4647 "01000001" // /* MW 3 */ + 4648 "01001000" // /* MW 2 */ + 4649 "00010001" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 515 30 first + 4650 "10011000" // MUL r2, r2, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4651 "00001111" // /* MW 3 */ + 4652 "10000100" // /* MW 2 */ + 4653 "00010000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 509 28 first + 4654 "10011000" // MUL r0, r4, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4655 "00001111" // /* MW 3 */ + 4656 "00000000" // /* MW 2 */ + 4657 "00010001" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 509 21 +.src_ref 3 "pad_3d.h" 514 55 +.src_ref 3 "pad_3d.h" 517 39 first + 4658 "01100100" // MUL r1, r1, r2; MOV r6, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4659 "00000101" // /* MW 5 */ + 4660 "00100000" // /* MW 4 */ + 4661 "11110011" // /* MW 3 */ + 4662 "01000101" // /* MW 2 */ + 4663 "00001000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 509 21 first + 4664 "10011000" // LSHL r0, r0, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4665 "01101101" // /* MW 3 */ + 4666 "00000000" // /* MW 2 */ + 4667 "00010000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 509 21 +.src_ref 3 "pad_3d.h" 517 22 first + 4668 "10100100" // GE r0, r24, r1; ADD.NC p2, r3, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4669 "00000010" // /* MW 5 */ + 4670 "11000011" // /* MW 4 */ + 4671 "00110100" // /* MW 3 */ + 4672 "00000011" // /* MW 2 */ + 4673 "11000000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 517 4 + 4674 "10000100" // JNZ r0, #4832 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4832 delay_slots=5 */ + 4675 "00000001" // /* MW 5 */ + 4676 "01000000" // /* MW 4 */ + 4677 "01110000" // /* MW 3 */ + 4678 "00001001" // /* MW 2 */ + 4679 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4681 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4683 "00000000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 514 55 first +.delay_slot + 4684 "10011000" // LSHL r4, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4685 "01101101" // /* MW 3 */ + 4686 "11001000" // /* MW 2 */ + 4687 "00010001" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 514 55 +.delay_slot + 4688 "00011000" // ADD.NC m0, r4, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4689 "00001000" // /* MW 3 */ + 4690 "00000010" // /* MW 2 */ + 4691 "00011000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 515 37 first +.delay_slot + 4692 "10011000" // ADD.NC dn0, r2, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4693 "01111111" // /* MW 3 */ + 4694 "01000001" // /* MW 2 */ + 4695 "00011000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 3 "pad_3d.h" 517 4 first + 4696 "10111010" // MOVA dc0, #0; MOVXM ls, #4816 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4697 "00010000" // /* MW 9 */ + 4698 "01101000" // /* MW 8 */ + 4699 "01111001" // /* MW 7 */ + 4700 "00000100" // /* MW 6 */ + 4701 "00000000" // /* MW 5 */ + 4702 "00000000" // /* MW 4 */ + 4703 "10000000" // /* MW 3 */ + 4704 "00000011" // /* MW 2 */ + 4705 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 3 "pad_3d.h" 517 4 + 4706 "10111010" // MOVA dj0, #16; MOVXM le, #4816 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4707 "00010000" // /* MW 9 */ + 4708 "01101000" // /* MW 8 */ + 4709 "10111001" // /* MW 7 */ + 4710 "00000101" // /* MW 6 */ + 4711 "00000000" // /* MW 5 */ + 4712 "00000000" // /* MW 4 */ + 4713 "10000000" // /* MW 3 */ + 4714 "00000010" // /* MW 2 */ + 4715 "00000010" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 517 4 + 4716 "11111000" // MOV lc, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4717 "10100000" // /* MW 3 */ + 4718 "01110000" // /* MW 2 */ + 4719 "00011101" // /* MW 1 */ + 4720 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4721 "00000000" // /* MW 15 */ + 4722 "00000000" // /* MW 14 */ + 4723 "01111000" // /* MW 13 */ + 4724 "10100101" // /* MW 12 */ + 4725 "00000001" // /* MW 11 */ + 4726 "00000000" // /* MW 10 */ + 4727 "00000000" // /* MW 9 */ + 4728 "00000000" // /* MW 8 */ + 4729 "01011011" // /* MW 7 */ + 4730 "00000001" // /* MW 6 */ + 4731 "00100000" // /* MW 5 */ + 4732 "00000000" // /* MW 4 */ + 4733 "11110000" // /* MW 3 */ + 4734 "00101100" // /* MW 2 */ + 4735 "00000000" // /* MW 1 */ + 4736 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4737 "00000000" // /* MW 15 */ + 4738 "00000000" // /* MW 14 */ + 4739 "01111000" // /* MW 13 */ + 4740 "10100101" // /* MW 12 */ + 4741 "00000001" // /* MW 11 */ + 4742 "00000000" // /* MW 10 */ + 4743 "00000000" // /* MW 9 */ + 4744 "00000000" // /* MW 8 */ + 4745 "01011011" // /* MW 7 */ + 4746 "00000001" // /* MW 6 */ + 4747 "00100000" // /* MW 5 */ + 4748 "00000000" // /* MW 4 */ + 4749 "11110000" // /* MW 3 */ + 4750 "00101100" // /* MW 2 */ + 4751 "00000000" // /* MW 1 */ + 4752 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4753 "00000000" // /* MW 15 */ + 4754 "00000000" // /* MW 14 */ + 4755 "01111000" // /* MW 13 */ + 4756 "10100101" // /* MW 12 */ + 4757 "00000001" // /* MW 11 */ + 4758 "00000000" // /* MW 10 */ + 4759 "00000000" // /* MW 9 */ + 4760 "00000000" // /* MW 8 */ + 4761 "01011011" // /* MW 7 */ + 4762 "00000001" // /* MW 6 */ + 4763 "00100000" // /* MW 5 */ + 4764 "00000000" // /* MW 4 */ + 4765 "11110000" // /* MW 3 */ + 4766 "00101100" // /* MW 2 */ + 4767 "00000000" // /* MW 1 */ + 4768 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4769 "00000000" // /* MW 15 */ + 4770 "00000000" // /* MW 14 */ + 4771 "01111000" // /* MW 13 */ + 4772 "10100101" // /* MW 12 */ + 4773 "00000001" // /* MW 11 */ + 4774 "00000000" // /* MW 10 */ + 4775 "00000000" // /* MW 9 */ + 4776 "00000000" // /* MW 8 */ + 4777 "01011011" // /* MW 7 */ + 4778 "00000001" // /* MW 6 */ + 4779 "00100000" // /* MW 5 */ + 4780 "00000000" // /* MW 4 */ + 4781 "11110000" // /* MW 3 */ + 4782 "00101100" // /* MW 2 */ + 4783 "00000000" // /* MW 1 */ + 4784 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4785 "00000000" // /* MW 15 */ + 4786 "00000000" // /* MW 14 */ + 4787 "01111000" // /* MW 13 */ + 4788 "10100101" // /* MW 12 */ + 4789 "00000001" // /* MW 11 */ + 4790 "00000000" // /* MW 10 */ + 4791 "00000000" // /* MW 9 */ + 4792 "00000000" // /* MW 8 */ + 4793 "01011011" // /* MW 7 */ + 4794 "00000001" // /* MW 6 */ + 4795 "00100000" // /* MW 5 */ + 4796 "00000000" // /* MW 4 */ + 4797 "11110000" // /* MW 3 */ + 4798 "00101100" // /* MW 2 */ + 4799 "00000000" // /* MW 1 */ + 4800 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4801 "00000000" // /* MW 15 */ + 4802 "00000000" // /* MW 14 */ + 4803 "01111000" // /* MW 13 */ + 4804 "10100101" // /* MW 12 */ + 4805 "00000001" // /* MW 11 */ + 4806 "00000000" // /* MW 10 */ + 4807 "00000000" // /* MW 9 */ + 4808 "00000000" // /* MW 8 */ + 4809 "01011011" // /* MW 7 */ + 4810 "00000001" // /* MW 6 */ + 4811 "00100000" // /* MW 5 */ + 4812 "00000000" // /* MW 4 */ + 4813 "11110000" // /* MW 3 */ + 4814 "00101100" // /* MW 2 */ + 4815 "00000000" // /* MW 1 */ +.label ZLS_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_640 +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 5 "vector.hpp" 1159 33 first +.begin_of_loop +.end_of_loop +.loop_nesting 1 + 4816 "11100001" // NOPA; NOPB; VST.2D.128 wl0, [p2], d0; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4817 "00000000" // /* MW 15 */ + 4818 "00000000" // /* MW 14 */ + 4819 "01111000" // /* MW 13 */ + 4820 "10100101" // /* MW 12 */ + 4821 "00000001" // /* MW 11 */ + 4822 "00000000" // /* MW 10 */ + 4823 "00000000" // /* MW 9 */ + 4824 "00000000" // /* MW 8 */ + 4825 "00101110" // /* MW 7 */ + 4826 "00010000" // /* MW 6 */ + 4827 "00100010" // /* MW 5 */ + 4828 "00000000" // /* MW 4 */ + 4829 "11110000" // /* MW 3 */ + 4830 "00101100" // /* MW 2 */ + 4831 "00000000" // /* MW 1 */ +.label TGT_F_Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t_656 +.src_ref 3 "pad_3d.h" 282 first +.loop_nesting 0 + 4832 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 4833 "00000000" // /* MW 3 */ + 4834 "00101000" // /* MW 2 */ + 4835 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4837 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4839 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4840 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4841 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4843 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t__end +.label __Z6pad_3dIL11pad_3d_mode0E8bfloat16Li1EEvPT0_S3_R15pad_3d_params_t___func_end0 + 4845 "00000000" // /* MW 1 */ +.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E___func_begin0 +.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E +.function run _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E +.src_ref 2 "reduce_base_c8.h" 352 30 +.src_ref 2 "reduce_base_c8.h" 362 first +.src_ref 2 "reduce_base_c8.h" 365 18 +.src_ref 3 "reduce_mean_c8_impl.h" 200 65 +.src_ref 3 "reduce_mean_c8_impl.h" 223 35 +.function_start + 4848 "11111000" // MOV r3, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4849 "11000000" // /* MW 3 */ + 4850 "11010100" // /* MW 2 */ + 4851 "00011000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 365 18 first + 4852 "00000010" // MOVS dn3, p7; ADD.NC p7, r3, #44 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4853 "00000000" // /* MW 7 */ + 4854 "11001011" // /* MW 6 */ + 4855 "10110000" // /* MW 5 */ + 4856 "00000011" // /* MW 4 */ + 4857 "01100000" // /* MW 3 */ + 4858 "10010001" // /* MW 2 */ + 4859 "01101011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 367 19 first + 4860 "10011000" // LDA.u16 r0, [p7], #-16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4861 "00011010" // /* MW 3 */ + 4862 "10001100" // /* MW 2 */ + 4863 "00000111" // /* MW 1 */ + 4864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4865 "00000000" // /* MW 1 */ + 4866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4867 "00000000" // /* MW 1 */ + 4868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4869 "00000000" // /* MW 1 */ + 4870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4871 "00000000" // /* MW 1 */ + 4872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4873 "00000000" // /* MW 1 */ + 4874 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4875 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 367 12 +.src_ref 2 "reduce_base_c8.h" 367 19 + 4876 "10000100" // JNZ r0, #5088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5088 delay_slots=5 */ + 4877 "00000001" // /* MW 5 */ + 4878 "01000000" // /* MW 4 */ + 4879 "11110000" // /* MW 3 */ + 4880 "00001001" // /* MW 2 */ + 4881 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 101 18 +.src_ref 5 "broadcast.hpp" 80 25 +.src_ref 5 "blend.hpp" 170 36 +.src_ref 2 "reduce_base_c8.h" 372 34 +.delay_slot + 4882 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4883 "00000001" // /* MW 3 */ + 4884 "00100000" // /* MW 2 */ + 4885 "00010000" // /* MW 1 */ +.src_ref 5 "broadcast.hpp" 80 25 first +.delay_slot + 4886 "11111000" // VBCST.32 x1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4887 "01110010" // /* MW 3 */ + 4888 "11000010" // /* MW 2 */ + 4889 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4891 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4892 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4893 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 362 +.delay_slot + 4894 "11000100" // PADDXM [sp], #256 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4895 "00000001" // /* MW 5 */ + 4896 "00000000" // /* MW 4 */ + 4897 "00000000" // /* MW 3 */ + 4898 "00100000" // /* MW 2 */ + 4899 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 372 43 + 4900 "10111000" // MOV dj2, #36 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4901 "01001000" // /* MW 3 */ + 4902 "10000000" // /* MW 2 */ + 4903 "00011010" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 372 43 first + 4904 "10011000" // LDA r1, [p2, dj2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4905 "00110110" // /* MW 3 */ + 4906 "01000000" // /* MW 2 */ + 4907 "00000010" // /* MW 1 */ + 4908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4909 "00000000" // /* MW 1 */ + 4910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4911 "00000000" // /* MW 1 */ + 4912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4913 "00000000" // /* MW 1 */ + 4914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4915 "00000000" // /* MW 1 */ + 4916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4917 "00000000" // /* MW 1 */ + 4918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4919 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 372 34 + 4920 "10011000" // GE r2, r16, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4921 "00011001" // /* MW 3 */ + 4922 "00000100" // /* MW 2 */ + 4923 "00010100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 372 12 + 4924 "10000100" // JNZ r2, #5088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5088 delay_slots=5 */ + 4925 "00000001" // /* MW 5 */ + 4926 "01000000" // /* MW 4 */ + 4927 "11110000" // /* MW 3 */ + 4928 "00001001" // /* MW 2 */ + 4929 "00010000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 2 "reduce_base_c8.h" 374 29 +.delay_slot + 4930 "11111000" // VMOV bmll2, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4931 "10010010" // /* MW 3 */ + 4932 "00000010" // /* MW 2 */ + 4933 "00011010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4935 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4937 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4939 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4941 "00000000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 2 "reduce_base_c8.h" 372 12 +.src_ref 2 "reduce_base_c8.h" 374 29 + 4942 "01110110" // NOPA; MOVS p3, p1; MOVXM ls, #5072 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4943 "00010000" // /* MW 11 */ + 4944 "11101000" // /* MW 10 */ + 4945 "01111001" // /* MW 9 */ + 4946 "00000100" // /* MW 8 */ + 4947 "00000000" // /* MW 7 */ + 4948 "00000000" // /* MW 6 */ + 4949 "10001011" // /* MW 5 */ + 4950 "10000100" // /* MW 4 */ + 4951 "11110011" // /* MW 3 */ + 4952 "00101100" // /* MW 2 */ + 4953 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 372 12 + 4954 "01000100" // MOVXM le, #5072 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4955 "10100000" // /* MW 5 */ + 4956 "11100111" // /* MW 4 */ + 4957 "00010110" // /* MW 3 */ + 4958 "00000000" // /* MW 2 */ + 4959 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 372 12 + 4960 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV lc, r1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4961 "00000000" // /* MW 15 */ + 4962 "00000000" // /* MW 14 */ + 4963 "01111000" // /* MW 13 */ + 4964 "01010000" // /* MW 12 */ + 4965 "10111000" // /* MW 11 */ + 4966 "00000010" // /* MW 10 */ + 4967 "00000000" // /* MW 9 */ + 4968 "00000000" // /* MW 8 */ + 4969 "01011011" // /* MW 7 */ + 4970 "00000001" // /* MW 6 */ + 4971 "00100000" // /* MW 5 */ + 4972 "00000000" // /* MW 4 */ + 4973 "11110000" // /* MW 3 */ + 4974 "00101100" // /* MW 2 */ + 4975 "00000000" // /* MW 1 */ + 4976 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4977 "00000000" // /* MW 15 */ + 4978 "00000000" // /* MW 14 */ + 4979 "01111000" // /* MW 13 */ + 4980 "10100101" // /* MW 12 */ + 4981 "00000001" // /* MW 11 */ + 4982 "00000000" // /* MW 10 */ + 4983 "00000000" // /* MW 9 */ + 4984 "00000000" // /* MW 8 */ + 4985 "01011011" // /* MW 7 */ + 4986 "00000001" // /* MW 6 */ + 4987 "00100000" // /* MW 5 */ + 4988 "00000000" // /* MW 4 */ + 4989 "11110000" // /* MW 3 */ + 4990 "00101100" // /* MW 2 */ + 4991 "00000000" // /* MW 1 */ + 4992 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4993 "00000000" // /* MW 15 */ + 4994 "00000000" // /* MW 14 */ + 4995 "01111000" // /* MW 13 */ + 4996 "10100101" // /* MW 12 */ + 4997 "00000001" // /* MW 11 */ + 4998 "00000000" // /* MW 10 */ + 4999 "00000000" // /* MW 9 */ + 5000 "00000000" // /* MW 8 */ + 5001 "01011011" // /* MW 7 */ + 5002 "00000001" // /* MW 6 */ + 5003 "00100000" // /* MW 5 */ + 5004 "00000000" // /* MW 4 */ + 5005 "11110000" // /* MW 3 */ + 5006 "00101100" // /* MW 2 */ + 5007 "00000000" // /* MW 1 */ + 5008 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5009 "00000000" // /* MW 15 */ + 5010 "00000000" // /* MW 14 */ + 5011 "01111000" // /* MW 13 */ + 5012 "10100101" // /* MW 12 */ + 5013 "00000001" // /* MW 11 */ + 5014 "00000000" // /* MW 10 */ + 5015 "00000000" // /* MW 9 */ + 5016 "00000000" // /* MW 8 */ + 5017 "01011011" // /* MW 7 */ + 5018 "00000001" // /* MW 6 */ + 5019 "00100000" // /* MW 5 */ + 5020 "00000000" // /* MW 4 */ + 5021 "11110000" // /* MW 3 */ + 5022 "00101100" // /* MW 2 */ + 5023 "00000000" // /* MW 1 */ + 5024 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5025 "00000000" // /* MW 15 */ + 5026 "00000000" // /* MW 14 */ + 5027 "01111000" // /* MW 13 */ + 5028 "10100101" // /* MW 12 */ + 5029 "00000001" // /* MW 11 */ + 5030 "00000000" // /* MW 10 */ + 5031 "00000000" // /* MW 9 */ + 5032 "00000000" // /* MW 8 */ + 5033 "01011011" // /* MW 7 */ + 5034 "00000001" // /* MW 6 */ + 5035 "00100000" // /* MW 5 */ + 5036 "00000000" // /* MW 4 */ + 5037 "11110000" // /* MW 3 */ + 5038 "00101100" // /* MW 2 */ + 5039 "00000000" // /* MW 1 */ + 5040 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5041 "00000000" // /* MW 15 */ + 5042 "00000000" // /* MW 14 */ + 5043 "01111000" // /* MW 13 */ + 5044 "10100101" // /* MW 12 */ + 5045 "00000001" // /* MW 11 */ + 5046 "00000000" // /* MW 10 */ + 5047 "00000000" // /* MW 9 */ + 5048 "00000000" // /* MW 8 */ + 5049 "01011011" // /* MW 7 */ + 5050 "00000001" // /* MW 6 */ + 5051 "00100000" // /* MW 5 */ + 5052 "00000000" // /* MW 4 */ + 5053 "11110000" // /* MW 3 */ + 5054 "00101100" // /* MW 2 */ + 5055 "00000000" // /* MW 1 */ + 5056 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5057 "00000000" // /* MW 15 */ + 5058 "00000000" // /* MW 14 */ + 5059 "01111000" // /* MW 13 */ + 5060 "10100101" // /* MW 12 */ + 5061 "00000001" // /* MW 11 */ + 5062 "00000000" // /* MW 10 */ + 5063 "00000000" // /* MW 9 */ + 5064 "00000000" // /* MW 8 */ + 5065 "01011011" // /* MW 7 */ + 5066 "00000001" // /* MW 6 */ + 5067 "00100000" // /* MW 5 */ + 5068 "00000000" // /* MW 4 */ + 5069 "11110000" // /* MW 3 */ + 5070 "00101100" // /* MW 2 */ + 5071 "00000000" // /* MW 1 */ +.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_224 +.src_ref 5 "vector.hpp" 1159 33 first +.src_ref 2 "reduce_base_c8.h" 374 29 first +.begin_of_loop +.end_of_loop +.loop_nesting 1 + 5072 "11100001" // NOPA; NOPB; VST bmll2, [p3], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5073 "00000000" // /* MW 15 */ + 5074 "00000000" // /* MW 14 */ + 5075 "01111000" // /* MW 13 */ + 5076 "10100101" // /* MW 12 */ + 5077 "00000001" // /* MW 11 */ + 5078 "00000000" // /* MW 10 */ + 5079 "00000000" // /* MW 9 */ + 5080 "10000000" // /* MW 8 */ + 5081 "00000110" // /* MW 7 */ + 5082 "00011101" // /* MW 6 */ + 5083 "00100011" // /* MW 5 */ + 5084 "00000000" // /* MW 4 */ + 5085 "11110000" // /* MW 3 */ + 5086 "00101100" // /* MW 2 */ + 5087 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_240 +.src_ref 5 "vector.hpp" 57 98 +.src_ref 2 "reduce_base_c8.h" 388 28 +.src_ref 2 "reduce_base_c8.h" 412 41 +.src_ref 3 "reduce_mean_c8_impl.h" 268 19 +.loop_nesting 0 + 5088 "10111000" // MOV m4, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5089 "01000000" // /* MW 3 */ + 5090 "00000000" // /* MW 2 */ + 5091 "00011100" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 388 28 first + 5092 "10011000" // LDA.u16 r17, [p7], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5093 "00111010" // /* MW 3 */ + 5094 "10001010" // /* MW 2 */ + 5095 "00000111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 388 28 +.src_ref 2 "reduce_base_c8.h" 388 28 + 5096 "01010100" // LDA.s16 r22, [p7], #-2; MOV m5, #-58 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5097 "00011001" // /* MW 5 */ + 5098 "00011111" // /* MW 4 */ + 5099 "01011010" // /* MW 3 */ + 5100 "11011010" // /* MW 2 */ + 5101 "11111111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 388 28 +.src_ref 2 "reduce_base_c8.h" 570 33 +.src_ref 2 "reduce_base_c8.h" 570 33 +.src_ref 2 "reduce_base_c8.h" 570 33 + 5102 "01010100" // LDA.u16 r26, [p7], m5; MOV dj0, #46 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5103 "10111001" // /* MW 5 */ + 5104 "00000000" // /* MW 4 */ + 5105 "01010001" // /* MW 3 */ + 5106 "01101011" // /* MW 2 */ + 5107 "11110101" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 570 33 first +.src_ref 2 "reduce_base_c8.h" 594 43 + 5108 "11010100" // LDA.s16 r20, [p7, dj0]; MOV r19, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5109 "10000001" // /* MW 5 */ + 5110 "10111101" // /* MW 4 */ + 5111 "01011001" // /* MW 3 */ + 5112 "01010010" // /* MW 2 */ + 5113 "11100000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 570 33 +.src_ref 2 "reduce_base_c8.h" 594 43 first + 5114 "00010100" // LDA.s16 r19, [p7, dj0]; ADD.NC p3, r19, #56 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5115 "00111000" // /* MW 5 */ + 5116 "11010011" // /* MW 4 */ + 5117 "01010110" // /* MW 3 */ + 5118 "01001110" // /* MW 2 */ + 5119 "11100000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 594 43 + 5120 "10011000" // LDA.s16 r21, [p3], #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5121 "10110010" // /* MW 3 */ + 5122 "11011110" // /* MW 2 */ + 5123 "00000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 594 64 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 5124 "10011000" // LDA.u16 r28, [p3], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5125 "10011010" // /* MW 3 */ + 5126 "11111111" // /* MW 2 */ + 5127 "00000011" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 388 28 +.src_ref 2 "reduce_base_c8.h" 595 56 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 5128 "00101100" // LDA.s16 r17, [p3], #6; MOVX r7, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5129 "00010010" // /* MW 5 */ + 5130 "00011100" // /* MW 4 */ + 5131 "01010000" // /* MW 3 */ + 5132 "11000110" // /* MW 2 */ + 5133 "01100111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "aie_core.h" 90 15 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 7 "accum.hpp" 153 115 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 943 89 +.src_ref 2 "reduce_base_c8.h" 388 28 +.src_ref 2 "reduce_base_c8.h" 596 56 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5134 "10111010" // LDA.s16 r18, [p3, #-2]; MOVX r18, #-2; MOV dc4, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5135 "01011000" // /* MW 9 */ + 5136 "00000000" // /* MW 8 */ + 5137 "01100000" // /* MW 7 */ + 5138 "11001010" // /* MW 6 */ + 5139 "00100111" // /* MW 5 */ + 5140 "00111111" // /* MW 4 */ + 5141 "01010000" // /* MW 3 */ + 5142 "11001010" // /* MW 2 */ + 5143 "01111110" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "aie_core.h" 90 15 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 2 "reduce_base_c8.h" 388 28 first +.src_ref 2 "reduce_base_c8.h" 570 24 +.src_ref 2 "reduce_base_c8.h" 570 24 +.src_ref 2 "reduce_base_c8.h" 570 24 +.src_ref 2 "reduce_base_c8.h" 570 33 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5144 "01110110" // LDA.s16 r7, [p7, dj0]; MOVS dc2, dc4; LSHL r18, r17, r18; MOV r6, #1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5145 "01011000" // /* MW 11 */ + 5146 "00000001" // /* MW 10 */ + 5147 "11001000" // /* MW 9 */ + 5148 "01101100" // /* MW 8 */ + 5149 "00101001" // /* MW 7 */ + 5150 "00100011" // /* MW 6 */ + 5151 "01001011" // /* MW 5 */ + 5152 "00010000" // /* MW 4 */ + 5153 "01010010" // /* MW 3 */ + 5154 "00011110" // /* MW 2 */ + 5155 "11100000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 +.src_ref 6 "me_vmult_float_emulated.h" 117 42 +.src_ref 6 "me_vmult_float_emulated.h" 117 42 +.src_ref 6 "me_vmult_float_emulated.h" 118 6 +.src_ref 6 "me_vmult_float_emulated.h" 118 6 +.src_ref 6 "me_vmult_float_emulated.h" 118 9 +.src_ref 6 "me_vmult_float_emulated.h" 118 9 +.src_ref 6 "me_vmult_float_emulated.h" 119 6 +.src_ref 6 "me_vmult_float_emulated.h" 119 6 +.src_ref 6 "me_vmult_float_emulated.h" 119 9 +.src_ref 6 "me_vmult_float_emulated.h" 119 9 +.src_ref 6 "me_vmult_float_emulated.h" 120 6 +.src_ref 6 "me_vmult_float_emulated.h" 120 6 +.src_ref 6 "me_vmult_float_emulated.h" 120 9 +.src_ref 6 "me_vmult_float_emulated.h" 120 9 +.src_ref 6 "me_vmult_float_emulated.h" 121 6 +.src_ref 6 "me_vmult_float_emulated.h" 121 6 +.src_ref 6 "me_vmult_float_emulated.h" 121 9 +.src_ref 6 "me_vmult_float_emulated.h" 121 9 +.src_ref 6 "me_vmult_float_emulated.h" 122 6 +.src_ref 6 "me_vmult_float_emulated.h" 122 6 +.src_ref 6 "me_vmult_float_emulated.h" 122 9 +.src_ref 6 "me_vmult_float_emulated.h" 122 9 +.src_ref 6 "me_vmult_float_emulated.h" 123 6 +.src_ref 6 "me_vmult_float_emulated.h" 123 6 +.src_ref 6 "me_vmult_float_emulated.h" 123 9 +.src_ref 6 "me_vmult_float_emulated.h" 123 9 +.src_ref 6 "me_vmult_float_emulated.h" 124 6 +.src_ref 6 "me_vmult_float_emulated.h" 124 6 +.src_ref 6 "me_vmult_float_emulated.h" 124 9 +.src_ref 6 "me_vmult_float_emulated.h" 124 9 +.src_ref 6 "me_vmult_float_emulated.h" 125 6 +.src_ref 6 "me_vmult_float_emulated.h" 125 6 +.src_ref 6 "me_vmult_float_emulated.h" 125 9 +.src_ref 6 "me_vmult_float_emulated.h" 125 9 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 5 "add.hpp" 28 49 +.src_ref 5 "add.hpp" 28 49 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 7 "add_accum.hpp" 19 92 +.src_ref 7 "add_accum.hpp" 19 92 +.src_ref 7 "add_accum.hpp" 19 92 +.src_ref 7 "add_accum.hpp" 19 92 +.src_ref 2 "reduce_base_c8.h" 388 28 +.src_ref 2 "reduce_base_c8.h" 595 75 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5156 "01110110" // LDA.u16 r27, [p3]; MOVS dn2, r26; LSHL r7, r22, r7; MOV r2, #60 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5157 "01011000" // /* MW 11 */ + 5158 "00111100" // /* MW 10 */ + 5159 "01001000" // /* MW 9 */ + 5160 "11101100" // /* MW 8 */ + 5161 "01110011" // /* MW 7 */ + 5162 "00101100" // /* MW 6 */ + 5163 "00001011" // /* MW 5 */ + 5164 "01011010" // /* MW 4 */ + 5165 "01010010" // /* MW 3 */ + 5166 "11101111" // /* MW 2 */ + 5167 "01100000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "aie_core.h" 73 15 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 7 "accum.hpp" 153 115 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 2 "reduce_base_c8.h" 570 24 first +.src_ref 3 "reduce_mean_c8_impl.h" 200 65 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5168 "01110110" // MOVA dj2, #64; MOVS p3, p1; LSHL r20, r20, r6; MOV m2, r7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5169 "01111000" // /* MW 11 */ + 5170 "11010000" // /* MW 10 */ + 5171 "00000001" // /* MW 9 */ + 5172 "01101101" // /* MW 8 */ + 5173 "01000011" // /* MW 7 */ + 5174 "00101001" // /* MW 6 */ + 5175 "10001011" // /* MW 5 */ + 5176 "10000100" // /* MW 4 */ + 5177 "10000011" // /* MW 3 */ + 5178 "00001010" // /* MW 2 */ + 5179 "00001000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 first +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 943 89 +.src_ref 2 "reduce_base_c8.h" 570 24 +.src_ref 2 "reduce_base_c8.h" 570 24 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5180 "10111010" // VLDA.2D bmll1, [p3], d2; LSHL r19, r19, r6; MOV m5, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5181 "01111000" // /* MW 9 */ + 5182 "00010000" // /* MW 8 */ + 5183 "10000101" // /* MW 7 */ + 5184 "01101110" // /* MW 6 */ + 5185 "00110011" // /* MW 5 */ + 5186 "00100111" // /* MW 4 */ + 5187 "10110000" // /* MW 3 */ + 5188 "00010010" // /* MW 2 */ + 5189 "01101010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 90 15 first +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 943 89 +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 2 "reduce_base_c8.h" 570 24 +.src_ref 2 "reduce_base_c8.h" 570 24 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 5190 "10111010" // VLDA.CONV.fp32.bf16 bmll4, [p0], m5; LSHL r19, r21, r6; MOV m6, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5191 "01111000" // /* MW 9 */ + 5192 "11010000" // /* MW 8 */ + 5193 "00000100" // /* MW 7 */ + 5194 "01101111" // /* MW 6 */ + 5195 "00110011" // /* MW 5 */ + 5196 "00101011" // /* MW 4 */ + 5197 "00110000" // /* MW 3 */ + 5198 "01000001" // /* MW 2 */ + 5199 "00010101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 90 15 +.src_ref 2 "reduce_base_c8.h" 391 8 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5200 "00100100" // LSHL r17, r17, r6; ADD.NC lc, r18, #-2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5201 "11111110" // /* MW 5 */ + 5202 "11110010" // /* MW 4 */ + 5203 "10111010" // /* MW 3 */ + 5204 "01001101" // /* MW 2 */ + 5205 "10001100" // /* MW 1 */ +.src_ref 6 "aie_core.h" 90 15 +.src_ref 6 "aie_core.h" 90 15 first +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 943 89 + 5206 "11100100" // LSHL r17, r18, r6; MOV dj0, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5207 "01000001" // /* MW 5 */ + 5208 "00010001" // /* MW 4 */ + 5209 "10110001" // /* MW 3 */ + 5210 "01001101" // /* MW 2 */ + 5211 "10010100" // /* MW 1 */ +.src_ref 6 "aie_core.h" 90 15 +.src_ref 6 "aie_core.h" 90 15 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 943 89 +.src_ref 7 "accum.hpp" 943 89 +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 2 "reduce_base_c8.h" 570 24 first +.src_ref 2 "reduce_base_c8.h" 570 24 first + 5212 "01110110" // VLDA.CONV.fp32.bf16 bmll0, [p0], m6; MOVS dc0, dc4; LSHL r6, r7, r6; MOV m0, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5213 "01111000" // /* MW 11 */ + 5214 "11010000" // /* MW 10 */ + 5215 "00000100" // /* MW 9 */ + 5216 "01101100" // /* MW 8 */ + 5217 "01100011" // /* MW 7 */ + 5218 "00001110" // /* MW 6 */ + 5219 "01001011" // /* MW 5 */ + 5220 "00010000" // /* MW 4 */ + 5221 "00110000" // /* MW 3 */ + 5222 "00000001" // /* MW 2 */ + 5223 "00011001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 90 15 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 943 89 +.src_ref 7 "accum.hpp" 943 89 +.src_ref 7 "add_accum.hpp" 19 92 first +.src_ref 2 "reduce_base_c8.h" 570 24 + 5224 "01001010" // MOVS dn0, r28; MOV m7, r6; VADD.f dm4, dm1, dm4, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5225 "00111101" // /* MW 9 */ + 5226 "00110000" // /* MW 8 */ + 5227 "00010100" // /* MW 7 */ + 5228 "11100100" // /* MW 6 */ + 5229 "00100000" // /* MW 5 */ + 5230 "00000011" // /* MW 4 */ + 5231 "01100111" // /* MW 3 */ + 5232 "10000001" // /* MW 2 */ + 5233 "00001011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 90 15 +.src_ref 6 "aie_core.h" 90 15 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 943 89 +.src_ref 7 "accum.hpp" 943 89 +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 2 "reduce_base_c8.h" 570 24 first + 5234 "10111010" // VLDA.CONV.fp32.bf16 bmll2, [p0], m7; MOVS dn4, r27; MOV dj4, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5235 "01110010" // /* MW 9 */ + 5236 "01010000" // /* MW 8 */ + 5237 "01000100" // /* MW 7 */ + 5238 "00000010" // /* MW 6 */ + 5239 "00001011" // /* MW 5 */ + 5240 "01011011" // /* MW 4 */ + 5241 "00110100" // /* MW 3 */ + 5242 "00100001" // /* MW 2 */ + 5243 "00011101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "aie_core.h" 90 15 first +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 7 "accum.hpp" 153 115 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 943 89 + 5244 "11010100" // VLDA.3D.CONV.fp32.bf16 bmll3, [p0], d0; MOV dc1, dc4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5245 "00000001" // /* MW 5 */ + 5246 "10010011" // /* MW 4 */ + 5247 "00110011" // /* MW 3 */ + 5248 "00110001" // /* MW 2 */ + 5249 "00000011" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 7 "add_accum.hpp" 19 92 first +.src_ref 2 "reduce_base_c8.h" 570 24 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 5250 "01100010" // VLDA.CONV.fp32.bf16 bmll4, [p0], m5; VADD.f dm1, dm4, dm0, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5251 "00111101" // /* MW 7 */ + 5252 "10000000" // /* MW 6 */ + 5253 "00010001" // /* MW 5 */ + 5254 "00000100" // /* MW 4 */ + 5255 "00110000" // /* MW 3 */ + 5256 "01000001" // /* MW 2 */ + 5257 "00010101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 first +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 7 "accum.hpp" 198 120 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 5258 "10011000" // VLDA.2D bmll1, [p3], d2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5259 "10010101" // /* MW 3 */ + 5260 "01010000" // /* MW 2 */ + 5261 "00000011" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5263 "00000000" // /* MW 1 */ +.src_ref 7 "add_accum.hpp" 19 92 first +.src_ref 2 "reduce_base_c8.h" 391 8 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5264 "01011010" // MOVXM ls, #5312; VADD.f dm0, dm1, dm2, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5265 "00111101" // /* MW 9 */ + 5266 "00101000" // /* MW 8 */ + 5267 "00010000" // /* MW 7 */ + 5268 "00000010" // /* MW 6 */ + 5269 "01001100" // /* MW 5 */ + 5270 "10001111" // /* MW 4 */ + 5271 "00000000" // /* MW 3 */ + 5272 "00000000" // /* MW 2 */ + 5273 "00000000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 2 "reduce_base_c8.h" 412 41 +.src_ref 2 "reduce_base_c8.h" 570 24 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 5274 "11010100" // VLDA.CONV.fp32.bf16 bmll0, [p0], m6; MOV dj3, m4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5275 "00000001" // /* MW 5 */ + 5276 "00010000" // /* MW 4 */ + 5277 "00110111" // /* MW 3 */ + 5278 "00000001" // /* MW 2 */ + 5279 "00011001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "aie_core.h" 73 15 +.src_ref 6 "aie_core.h" 73 15 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 5 "vector.hpp" 1285 72 +.src_ref 7 "accum.hpp" 153 115 +.src_ref 7 "accum.hpp" 153 115 +.src_ref 7 "accum.hpp" 153 115 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 7 "add_accum.hpp" 19 92 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5280 "11101011" // MOVA dj1, #64; NOPB; MOVS p4, p1; MOVX r4, #32; MOV m1, m2; VADD.f dm4, dm1, dm4, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5281 "10000001" // /* MW 15 */ + 5282 "10100001" // /* MW 14 */ + 5283 "01111000" // /* MW 13 */ + 5284 "00000000" // /* MW 12 */ + 5285 "10000010" // /* MW 11 */ + 5286 "00001000" // /* MW 10 */ + 5287 "01000100" // /* MW 9 */ + 5288 "00000000" // /* MW 8 */ + 5289 "10001011" // /* MW 7 */ + 5290 "10000100" // /* MW 6 */ + 5291 "00100100" // /* MW 5 */ + 5292 "00000000" // /* MW 4 */ + 5293 "10000000" // /* MW 3 */ + 5294 "00000110" // /* MW 2 */ + 5295 "00001000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 7 "accum.hpp" 153 115 +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 7 "add_accum.hpp" 19 92 +.src_ref 2 "reduce_base_c8.h" 391 8 first +.src_ref 2 "reduce_base_c8.h" 570 24 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5296 "11101011" // VLDA.CONV.fp32.bf16 bmll2, [p0], m7;NOPB; MOVS dn1, r26; MOVXM le, #5408; VADD.f dm2, dm0, dm3, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5297 "01100001" // /* MW 15 */ + 5298 "10010000" // /* MW 14 */ + 5299 "00010000" // /* MW 13 */ + 5300 "10010000" // /* MW 12 */ + 5301 "10111010" // /* MW 11 */ + 5302 "00000101" // /* MW 10 */ + 5303 "00000000" // /* MW 9 */ + 5304 "00000000" // /* MW 8 */ + 5305 "00001011" // /* MW 7 */ + 5306 "01011010" // /* MW 6 */ + 5307 "00100001" // /* MW 5 */ + 5308 "00000000" // /* MW 4 */ + 5309 "00110000" // /* MW 3 */ + 5310 "00100001" // /* MW 2 */ + 5311 "00011101" // /* MW 1 */ +.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_464 +.src_ref 6 "aie_core.h" 90 15 first +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 943 89 +.begin_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 5312 "10011000" // VLDA.3D.CONV.fp32.bf16 bmll3, [p0], d0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5313 "10001001" // /* MW 3 */ + 5314 "00011001" // /* MW 2 */ + 5315 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 first +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "add_accum.hpp" 19 92 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5316 "01100110" // VLDA.2D bmll1, [p3], d2; NOPB; NOPS; VADD.f dm1, dm4, dm0, r2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5317 "00111101" // /* MW 11 */ + 5318 "10000000" // /* MW 10 */ + 5319 "00010001" // /* MW 9 */ + 5320 "10001110" // /* MW 8 */ + 5321 "10101101" // /* MW 7 */ + 5322 "00000000" // /* MW 6 */ + 5323 "00100000" // /* MW 5 */ + 5324 "00000000" // /* MW 4 */ + 5325 "10110000" // /* MW 3 */ + 5326 "00010010" // /* MW 2 */ + 5327 "01101010" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 7 "accum.hpp" 198 120 +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 2 "reduce_base_c8.h" 570 24 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5328 "11100001" // VLDA.CONV.fp32.bf16 bmll4, [p0], m5;NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5329 "00000000" // /* MW 15 */ + 5330 "00000000" // /* MW 14 */ + 5331 "01111000" // /* MW 13 */ + 5332 "10100101" // /* MW 12 */ + 5333 "00000001" // /* MW 11 */ + 5334 "00000000" // /* MW 10 */ + 5335 "00000000" // /* MW 9 */ + 5336 "00000000" // /* MW 8 */ + 5337 "01011011" // /* MW 7 */ + 5338 "00000001" // /* MW 6 */ + 5339 "00100000" // /* MW 5 */ + 5340 "00000000" // /* MW 4 */ + 5341 "00110000" // /* MW 3 */ + 5342 "01000001" // /* MW 2 */ + 5343 "00010101" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5344 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5345 "00000000" // /* MW 15 */ + 5346 "00000000" // /* MW 14 */ + 5347 "01111000" // /* MW 13 */ + 5348 "10100101" // /* MW 12 */ + 5349 "00000001" // /* MW 11 */ + 5350 "00000000" // /* MW 10 */ + 5351 "00000000" // /* MW 9 */ + 5352 "00000000" // /* MW 8 */ + 5353 "01011011" // /* MW 7 */ + 5354 "00000001" // /* MW 6 */ + 5355 "00100000" // /* MW 5 */ + 5356 "00000000" // /* MW 4 */ + 5357 "11110000" // /* MW 3 */ + 5358 "00101100" // /* MW 2 */ + 5359 "00000000" // /* MW 1 */ +.src_ref 7 "add_accum.hpp" 19 92 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5360 "11101011" // NOPA; NOPB; NOPS; NOPX; NOPM; VADD.f dm0, dm1, dm2, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5361 "01000001" // /* MW 15 */ + 5362 "10000001" // /* MW 14 */ + 5363 "01111000" // /* MW 13 */ + 5364 "10100101" // /* MW 12 */ + 5365 "00000001" // /* MW 11 */ + 5366 "00000000" // /* MW 10 */ + 5367 "00000000" // /* MW 9 */ + 5368 "00000000" // /* MW 8 */ + 5369 "01011011" // /* MW 7 */ + 5370 "00000001" // /* MW 6 */ + 5371 "00100000" // /* MW 5 */ + 5372 "00000000" // /* MW 4 */ + 5373 "11110000" // /* MW 3 */ + 5374 "00101100" // /* MW 2 */ + 5375 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 first +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 5 "vector.hpp" 1159 33 first +.src_ref 7 "accum.hpp" 153 115 first +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 2 "reduce_base_c8.h" 570 24 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5376 "11100001" // VLDA.CONV.fp32.bf16 bmll0, [p0], m6;NOPB; VST.2D bmll2, [p4], d1; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5377 "00000000" // /* MW 15 */ + 5378 "00000000" // /* MW 14 */ + 5379 "01111000" // /* MW 13 */ + 5380 "10100101" // /* MW 12 */ + 5381 "00000001" // /* MW 11 */ + 5382 "00000000" // /* MW 10 */ + 5383 "00000000" // /* MW 9 */ + 5384 "10000000" // /* MW 8 */ + 5385 "00000110" // /* MW 7 */ + 5386 "00110001" // /* MW 6 */ + 5387 "00100100" // /* MW 5 */ + 5388 "00000000" // /* MW 4 */ + 5389 "00110000" // /* MW 3 */ + 5390 "00000001" // /* MW 2 */ + 5391 "00011001" // /* MW 1 */ +.src_ref 7 "add_accum.hpp" 19 92 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5392 "11101011" // NOPA; NOPB; NOPS; NOPX; NOPM; VADD.f dm4, dm1, dm4, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5393 "10000001" // /* MW 15 */ + 5394 "10100001" // /* MW 14 */ + 5395 "01111000" // /* MW 13 */ + 5396 "10100101" // /* MW 12 */ + 5397 "00000001" // /* MW 11 */ + 5398 "00000000" // /* MW 10 */ + 5399 "00000000" // /* MW 9 */ + 5400 "00000000" // /* MW 8 */ + 5401 "01011011" // /* MW 7 */ + 5402 "00000001" // /* MW 6 */ + 5403 "00100000" // /* MW 5 */ + 5404 "00000000" // /* MW 4 */ + 5405 "11110000" // /* MW 3 */ + 5406 "00101100" // /* MW 2 */ + 5407 "00000000" // /* MW 1 */ +.label ZLE_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_560 +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 7 "add_accum.hpp" 19 92 +.src_ref 2 "reduce_base_c8.h" 570 24 first +.end_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5408 "11101011" // VLDA.CONV.fp32.bf16 bmll2, [p0], m7;NOPB; NOPS; NOPX; NOPM; VADD.f dm2, dm0, dm3, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5409 "01100001" // /* MW 15 */ + 5410 "10010000" // /* MW 14 */ + 5411 "01111000" // /* MW 13 */ + 5412 "10100101" // /* MW 12 */ + 5413 "00000001" // /* MW 11 */ + 5414 "00000000" // /* MW 10 */ + 5415 "00000000" // /* MW 9 */ + 5416 "00000000" // /* MW 8 */ + 5417 "01011011" // /* MW 7 */ + 5418 "00000001" // /* MW 6 */ + 5419 "00100000" // /* MW 5 */ + 5420 "00000000" // /* MW 4 */ + 5421 "00110000" // /* MW 3 */ + 5422 "00100001" // /* MW 2 */ + 5423 "00011101" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 107 23 +.src_ref 2 "reduce_base_c8.h" 412 41 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 5424 "10111010" // LDA.u16 r1, [p7, dj3]; MOVXM r5, #16256 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5425 "00010000" // /* MW 9 */ + 5426 "11000000" // /* MW 8 */ + 5427 "10101111" // /* MW 7 */ + 5428 "00001100" // /* MW 6 */ + 5429 "00000000" // /* MW 5 */ + 5430 "00000000" // /* MW 4 */ + 5431 "01010000" // /* MW 3 */ + 5432 "00000111" // /* MW 2 */ + 5433 "11101100" // /* MW 1 */ +.src_ref 6 "aie_core.h" 90 15 first +.src_ref 6 "me_vmult_float_emulated.h" 107 23 first +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 943 89 first +.src_ref 7 "add_accum.hpp" 19 92 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5434 "01001010" // VLDA.3D.CONV.fp32.bf16 bmll3, [p0], d0; VBCST.16 x4, r5; VADD.f dm1, dm4, dm0, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5435 "00111101" // /* MW 9 */ + 5436 "10000000" // /* MW 8 */ + 5437 "00010001" // /* MW 7 */ + 5438 "11100010" // /* MW 6 */ + 5439 "01110010" // /* MW 5 */ + 5440 "00010101" // /* MW 4 */ + 5441 "00110010" // /* MW 3 */ + 5442 "00110001" // /* MW 2 */ + 5443 "00000011" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 101 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5444 "11111000" // VBCST.16 x0, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5445 "01110010" // /* MW 3 */ + 5446 "01000001" // /* MW 2 */ + 5447 "00011000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5449 "00000000" // /* MW 1 */ +.src_ref 7 "add_accum.hpp" 19 92 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5450 "01001000" // VADD.f dm0, dm1, dm2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5451 "00111101" // /* MW 3 */ + 5452 "00101000" // /* MW 2 */ + 5453 "00010000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 first +.src_ref 5 "vector.hpp" 1159 33 first +.src_ref 7 "accum.hpp" 153 115 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5454 "10011000" // VST.2D bmll2, [p4], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5455 "00000110" // /* MW 3 */ + 5456 "00110001" // /* MW 2 */ + 5457 "00001100" // /* MW 1 */ + 5458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5459 "00000000" // /* MW 1 */ +.src_ref 7 "add_accum.hpp" 19 92 first +.src_ref 2 "reduce_base_c8.h" 412 52 first + 5460 "01100010" // ADD r5, r1, #-1; VADD.f dm2, dm0, dm3, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5461 "00111101" // /* MW 7 */ + 5462 "00001100" // /* MW 6 */ + 5463 "00010010" // /* MW 5 */ + 5464 "11111001" // /* MW 4 */ + 5465 "01011111" // /* MW 3 */ + 5466 "00000010" // /* MW 2 */ + 5467 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 412 31 + 5468 "10011000" // NE r0, r5, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5469 "00001000" // /* MW 3 */ + 5470 "01000000" // /* MW 2 */ + 5471 "00010001" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 412 16 + 5472 "10000100" // JNZ r0, #6368 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6368 delay_slots=5 */ + 5473 "00000001" // /* MW 5 */ + 5474 "01000000" // /* MW 4 */ + 5475 "01110000" // /* MW 3 */ + 5476 "00001100" // /* MW 2 */ + 5477 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5479 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5481 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5483 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 73 15 first +.src_ref 5 "vector.hpp" 1159 33 first +.src_ref 7 "accum.hpp" 153 115 first +.delay_slot + 5484 "10011000" // VST.2D bmll2, [p4], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5485 "00000110" // /* MW 3 */ + 5486 "00110001" // /* MW 2 */ + 5487 "00001100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5489 "00000000" // /* MW 1 */ +.src_ref 5 "broadcast.hpp" 80 25 first +.src_ref 3 "reduce_mean_c8_impl.h" 184 15 first +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 5490 "00101100" // LDA r6, [p2, #12]; MOVX r5, #3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5491 "00011010" // /* MW 5 */ + 5492 "00010100" // /* MW 4 */ + 5493 "11010000" // /* MW 3 */ + 5494 "10011010" // /* MW 2 */ + 5495 "01000110" // /* MW 1 */ + 5496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5497 "00000000" // /* MW 1 */ + 5498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5499 "00000000" // /* MW 1 */ + 5500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5501 "00000000" // /* MW 1 */ + 5502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5503 "00000000" // /* MW 1 */ + 5504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5505 "00000000" // /* MW 1 */ + 5506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5507 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 5508 "10011000" // GE r7, r5, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5509 "01101001" // /* MW 3 */ + 5510 "01001110" // /* MW 2 */ + 5511 "00010001" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 5512 "10000100" // JNZ r7, #7296 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7296 delay_slots=5 */ + 5513 "00000001" // /* MW 5 */ + 5514 "01000000" // /* MW 4 */ + 5515 "01000000" // /* MW 3 */ + 5516 "00001110" // /* MW 2 */ + 5517 "00111000" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 +.delay_slot + 5518 "00011000" // MOVX r0, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5519 "00010001" // /* MW 3 */ + 5520 "00000000" // /* MW 2 */ + 5521 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5523 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5525 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5527 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5529 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 5530 "10011000" // NE r5, r6, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5531 "00001000" // /* MW 3 */ + 5532 "10001010" // /* MW 2 */ + 5533 "00010001" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 5534 "10000100" // JNZ r5, #6512 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6512 delay_slots=5 */ + 5535 "00000001" // /* MW 5 */ + 5536 "01000000" // /* MW 4 */ + 5537 "10111000" // /* MW 3 */ + 5538 "00001100" // /* MW 2 */ + 5539 "00101000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5541 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5543 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5545 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5547 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5549 "00000000" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 + 5550 "11100100" // MOVX r17, #257; MOV dc4, lr /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5551 "11000001" // /* MW 5 */ + 5552 "10000011" // /* MW 4 */ + 5553 "10101001" // /* MW 3 */ + 5554 "01000000" // /* MW 2 */ + 5555 "00100100" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 + 5556 "01000100" // MOVXM r21, #65535 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5557 "11111110" // /* MW 5 */ + 5558 "10111111" // /* MW 4 */ + 5559 "11111010" // /* MW 3 */ + 5560 "00000000" // /* MW 2 */ + 5561 "00000000" // /* MW 1 */ +.src_ref 5 "blend.hpp" 163 48 + 5562 "00101100" // NOPA; MOVX r20, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5563 "00000010" // /* MW 5 */ + 5564 "01010000" // /* MW 4 */ + 5565 "11110000" // /* MW 3 */ + 5566 "00101100" // /* MW 2 */ + 5567 "00000000" // /* MW 1 */ +.label __ll91__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 +.src_ref 6 "me_vmult_float_emulated.h" 117 42 +.src_ref 6 "me_vmult_float_emulated.h" 118 6 +.src_ref 6 "me_vmult_float_emulated.h" 118 9 +.src_ref 6 "me_vmult_float_emulated.h" 119 6 +.src_ref 6 "me_vmult_float_emulated.h" 119 9 +.src_ref 6 "me_vmult_float_emulated.h" 120 6 +.src_ref 6 "me_vmult_float_emulated.h" 120 9 +.src_ref 6 "me_vmult_float_emulated.h" 121 6 +.src_ref 6 "me_vmult_float_emulated.h" 121 9 +.src_ref 6 "me_vmult_float_emulated.h" 122 6 +.src_ref 6 "me_vmult_float_emulated.h" 122 9 +.src_ref 6 "me_vmult_float_emulated.h" 123 6 +.src_ref 6 "me_vmult_float_emulated.h" 123 9 +.src_ref 6 "me_vmult_float_emulated.h" 124 6 +.src_ref 6 "me_vmult_float_emulated.h" 124 9 +.src_ref 6 "me_vmult_float_emulated.h" 125 6 +.src_ref 6 "me_vmult_float_emulated.h" 125 9 +.src_ref 5 "add.hpp" 28 49 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 5 "add_reduce.hpp" 324 44 +.src_ref 3 "reduce_mean_c8_impl.h" 200 49 +.src_ref 3 "reduce_mean_c8_impl.h" 200 65 +.src_ref 3 "reduce_mean_c8_impl.h" 200 65 +.src_ref 3 "reduce_mean_c8_impl.h" 223 35 + 5568 "01110110" // MOVA dj2, #64; MOVS p2, r3; MOVX r5, #16; MOV r2, #60 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5569 "01011000" // /* MW 11 */ + 5570 "00111100" // /* MW 10 */ + 5571 "01001000" // /* MW 9 */ + 5572 "00001000" // /* MW 8 */ + 5573 "01010010" // /* MW 7 */ + 5574 "00000000" // /* MW 6 */ + 5575 "00001011" // /* MW 5 */ + 5576 "10000011" // /* MW 4 */ + 5577 "10000010" // /* MW 3 */ + 5578 "00001010" // /* MW 2 */ + 5579 "00001000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1285 72 +.src_ref 3 "reduce_mean_c8_impl.h" 200 65 first + 5580 "00101100" // LDA.s16 r6, [p2, dj2]; MOVX r4, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5581 "00000010" // /* MW 5 */ + 5582 "00010001" // /* MW 4 */ + 5583 "01010000" // /* MW 3 */ + 5584 "00011010" // /* MW 2 */ + 5585 "01001000" // /* MW 1 */ + 5586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5587 "00000000" // /* MW 1 */ + 5588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5589 "00000000" // /* MW 1 */ + 5590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5591 "00000000" // /* MW 1 */ + 5592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5593 "00000000" // /* MW 1 */ + 5594 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5595 "00000000" // /* MW 1 */ + 5596 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5597 "01100111" // /* MW 3 */ + 5598 "00000001" // /* MW 2 */ + 5599 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 200 49 + 5600 "11100001" // NOPA; NOPB; NOPS; ASHL r5, r6, r5; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5601 "00000000" // /* MW 15 */ + 5602 "00000000" // /* MW 14 */ + 5603 "01111000" // /* MW 13 */ + 5604 "10100101" // /* MW 12 */ + 5605 "00000001" // /* MW 11 */ + 5606 "11110100" // /* MW 10 */ + 5607 "01010010" // /* MW 9 */ + 5608 "00001100" // /* MW 8 */ + 5609 "01011011" // /* MW 7 */ + 5610 "00000001" // /* MW 6 */ + 5611 "00100000" // /* MW 5 */ + 5612 "00000000" // /* MW 4 */ + 5613 "11110000" // /* MW 3 */ + 5614 "00101100" // /* MW 2 */ + 5615 "00000000" // /* MW 1 */ +.label __ll93__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E +.src_ref 3 "reduce_mean_c8_impl.h" 223 35 + 5616 "01110110" // MOVA dj2, #36; ST dn3, [sp, #-4]; MOVXM p7, #509168 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5617 "00010000" // /* MW 11 */ + 5618 "01111000" // /* MW 10 */ + 5619 "10110010" // /* MW 9 */ + 5620 "11110011" // /* MW 8 */ + 5621 "00000001" // /* MW 7 */ + 5622 "10000000" // /* MW 6 */ + 5623 "10100101" // /* MW 5 */ + 5624 "11111101" // /* MW 4 */ + 5625 "10000111" // /* MW 3 */ + 5626 "10001010" // /* MW 2 */ + 5627 "00000100" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1285 72 +.src_ref 5 "vector.hpp" 1289 16 + 5628 "01110110" // LDA.s8 r23, [p7]; ST dc4, [sp, #-8]; MOVX r5, #0; VBCST.32 x2, r5 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5629 "01111000" // /* MW 11 */ + 5630 "00111001" // /* MW 10 */ + 5631 "10001011" // /* MW 9 */ + 5632 "00001000" // /* MW 8 */ + 5633 "01010000" // /* MW 7 */ + 5634 "10000000" // /* MW 6 */ + 5635 "01100101" // /* MW 5 */ + 5636 "11111010" // /* MW 4 */ + 5637 "01010111" // /* MW 3 */ + 5638 "11011100" // /* MW 2 */ + 5639 "11100000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 112 6 +.src_ref 6 "me_vmult_float_emulated.h" 112 19 +.src_ref 5 "vector.hpp" 57 98 +.src_ref 5 "vector.hpp" 57 98 +.src_ref 5 "vector.hpp" 1280 49 +.src_ref 5 "vector.hpp" 1285 72 +.src_ref 5 "vector.hpp" 1287 41 +.src_ref 5 "vector.hpp" 1288 16 +.src_ref 5 "vector.hpp" 1289 16 +.src_ref 5 "vector.hpp" 1292 26 +.src_ref 3 "reduce_mean_c8_impl.h" 223 35 first +.src_ref 3 "reduce_mean_c8_impl.h" 268 19 + 5640 "01110110" // LDA r6, [p2, dj2]; MOVS p7, p1; MOVX r22, #-1; VMOV bmll0, x2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5641 "01111000" // /* MW 11 */ + 5642 "01001001" // /* MW 10 */ + 5643 "00000010" // /* MW 9 */ + 5644 "11101000" // /* MW 8 */ + 5645 "01100111" // /* MW 7 */ + 5646 "00111111" // /* MW 6 */ + 5647 "10001011" // /* MW 5 */ + 5648 "10000100" // /* MW 4 */ + 5649 "11010111" // /* MW 3 */ + 5650 "00011010" // /* MW 2 */ + 5651 "01001000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 915 23 +.src_ref 5 "vector.hpp" 915 23 +.src_ref 5 "vector.hpp" 1280 49 + 5652 "10111010" // MOVA r24, #31; MOVX vaddSign0, #1; VMOV bmll2, x2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5653 "01111000" // /* MW 9 */ + 5654 "01001001" // /* MW 8 */ + 5655 "00000010" // /* MW 7 */ + 5656 "00000001" // /* MW 6 */ + 5657 "11010010" // /* MW 5 */ + 5658 "00000010" // /* MW 4 */ + 5659 "00000000" // /* MW 3 */ + 5660 "11111000" // /* MW 2 */ + 5661 "00000011" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 3 "reduce_mean_c8_impl.h" 223 9 first + 5662 "10111010" // MOVA r25, #16; MOVXM ls, #5760 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5663 "00010000" // /* MW 9 */ + 5664 "01000000" // /* MW 8 */ + 5665 "01111011" // /* MW 7 */ + 5666 "00000100" // /* MW 6 */ + 5667 "00000000" // /* MW 5 */ + 5668 "00000000" // /* MW 4 */ + 5669 "00000000" // /* MW 3 */ + 5670 "00011001" // /* MW 2 */ + 5671 "00000010" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 223 9 + 5672 "10111010" // VLDA wl2, [sp, #-32]; MOVXM le, #6336 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5673 "00010000" // /* MW 9 */ + 5674 "01100000" // /* MW 8 */ + 5675 "10111100" // /* MW 7 */ + 5676 "00000101" // /* MW 6 */ + 5677 "00000000" // /* MW 5 */ + 5678 "00000000" // /* MW 4 */ + 5679 "10110000" // /* MW 3 */ + 5680 "10010100" // /* MW 2 */ + 5681 "11111111" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1286 98 + 5682 "00011000" // MOVX r26, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5683 "00000001" // /* MW 3 */ + 5684 "01110100" // /* MW 2 */ + 5685 "00010000" // /* MW 1 */ + 5686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5687 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 112 6 +.src_ref 6 "me_vmult_float_emulated.h" 112 19 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 114 6 +.src_ref 6 "me_vmult_float_emulated.h" 114 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 6 +.src_ref 6 "me_vmult_float_emulated.h" 115 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 +.src_ref 5 "vector.hpp" 1286 72 +.src_ref 7 "accum.hpp" 1108 103 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 5688 "00011000" // MOVX crRnd, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5689 "10000000" // /* MW 3 */ + 5690 "11111010" // /* MW 2 */ + 5691 "00010101" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 112 6 first +.src_ref 6 "me_vmult_float_emulated.h" 112 19 first +.src_ref 3 "reduce_mean_c8_impl.h" 223 9 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 5692 "00000010" // VCONV.bf16.fp32 wl0, bmll0; ADD.NC lc, r6, #0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5693 "00000000" // /* MW 7 */ + 5694 "10000000" // /* MW 6 */ + 5695 "10111001" // /* MW 5 */ + 5696 "00000010" // /* MW 4 */ + 5697 "11000000" // /* MW 3 */ + 5698 "00000010" // /* MW 2 */ + 5699 "00001000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 114 6 +.src_ref 6 "me_vmult_float_emulated.h" 114 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 6 +.src_ref 6 "me_vmult_float_emulated.h" 115 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5700 "11111000" // VMOV x3, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5701 "10010010" // /* MW 3 */ + 5702 "10100000" // /* MW 2 */ + 5703 "00011001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 first +.src_ref 6 "me_vmult_float_emulated.h" 115 6 +.src_ref 6 "me_vmult_float_emulated.h" 115 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 + 5704 "01100010" // VMOV x5, x3; VMSC.f dm0, dm2, x0, x4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5705 "10000011" // /* MW 7 */ + 5706 "01000000" // /* MW 6 */ + 5707 "00010000" // /* MW 5 */ + 5708 "11100110" // /* MW 4 */ + 5709 "10010010" // /* MW 3 */ + 5710 "10100110" // /* MW 2 */ + 5711 "00000010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 + 5712 "11111000" // VMOV x6, x5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5713 "10010010" // /* MW 3 */ + 5714 "00101010" // /* MW 2 */ + 5715 "00011011" // /* MW 1 */ + 5716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5717 "00000000" // /* MW 1 */ + 5718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5719 "00000000" // /* MW 1 */ + 5720 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5721 "00000000" // /* MW 1 */ + 5722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5723 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 114 6 first +.src_ref 6 "me_vmult_float_emulated.h" 114 19 first + 5724 "00011000" // VCONV.bf16.fp32 wl3, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5725 "00010110" // /* MW 3 */ + 5726 "11000000" // /* MW 2 */ + 5727 "00001001" // /* MW 1 */ + 5728 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5729 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 115 34 first + 5730 "01001000" // VMSC.f dm0, dm0, x3, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5731 "10000011" // /* MW 3 */ + 5732 "00000110" // /* MW 2 */ + 5733 "00010000" // /* MW 1 */ + 5734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5735 "00000000" // /* MW 1 */ + 5736 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5737 "00000000" // /* MW 1 */ + 5738 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5739 "00000000" // /* MW 1 */ + 5740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5741 "00000000" // /* MW 1 */ + 5742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5743 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 115 6 +.src_ref 6 "me_vmult_float_emulated.h" 115 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "add_reduce.hpp" 322 47 + 5744 "11100001" // NOPA; NOPB; VCONV.bf16.fp32 wl5, bmll0; MOVX r7, #8; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5745 "00000000" // /* MW 15 */ + 5746 "00000000" // /* MW 14 */ + 5747 "01111000" // /* MW 13 */ + 5748 "10100101" // /* MW 12 */ + 5749 "00000001" // /* MW 11 */ + 5750 "00001000" // /* MW 10 */ + 5751 "01110001" // /* MW 9 */ + 5752 "00000000" // /* MW 8 */ + 5753 "00010110" // /* MW 7 */ + 5754 "11000000" // /* MW 6 */ + 5755 "00100010" // /* MW 5 */ + 5756 "00000000" // /* MW 4 */ + 5757 "11110000" // /* MW 3 */ + 5758 "00101100" // /* MW 2 */ + 5759 "00000000" // /* MW 1 */ +.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_912 +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 3 "reduce_mean_c8_impl.h" 226 22 first +.begin_of_loop +.loop_nesting 1 + 5760 "11110100" // VLDB x7, [p1], #64; VMOV bmhh4, x9 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5761 "00100101" // /* MW 5 */ + 5762 "10100101" // /* MW 4 */ + 5763 "10001001" // /* MW 3 */ + 5764 "10111110" // /* MW 2 */ + 5765 "00100011" // /* MW 1 */ + 5766 "11111000" // VMOV bmhh3, x11 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5767 "10010010" // /* MW 3 */ + 5768 "11010110" // /* MW 2 */ + 5769 "00011011" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1280 49 + 5770 "11111000" // MOV r28, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5771 "11000000" // /* MW 3 */ + 5772 "00011110" // /* MW 2 */ + 5773 "00011111" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1280 49 first + 5774 "10011000" // AND r29, r28, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5775 "10000100" // /* MW 3 */ + 5776 "00111011" // /* MW 2 */ + 5777 "00010111" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1285 72 first +.src_ref 5 "vector.hpp" 1285 72 first + 5778 "00100100" // LT r27, r29, r4; ADD.NC r28, r29, #-32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5779 "11100000" // /* MW 5 */ + 5780 "00111101" // /* MW 4 */ + 5781 "01011110" // /* MW 3 */ + 5782 "11001001" // /* MW 2 */ + 5783 "11101110" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1285 72 + 5784 "10011000" // LSHL r30, r22, r29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5785 "11011101" // /* MW 3 */ + 5786 "10111101" // /* MW 2 */ + 5787 "00010101" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1286 98 first + 5788 "10011000" // SUB r31, r26, r29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5789 "11010001" // /* MW 3 */ + 5790 "10111111" // /* MW 2 */ + 5791 "00010110" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1285 72 first +.src_ref 5 "add_reduce.hpp" 322 47 first + 5792 "10100100" // SEL.EQZ r30, r5, r30, r27; VSHIFT x8, x7, x0, r25 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5793 "11001101" // /* MW 5 */ + 5794 "01110000" // /* MW 4 */ + 5795 "01001000" // /* MW 3 */ + 5796 "10111100" // /* MW 2 */ + 5797 "00101111" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first + 5798 "11111000" // VMOV bmll4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5799 "10010010" // /* MW 3 */ + 5800 "00010000" // /* MW 2 */ + 5801 "00011100" // /* MW 1 */ +.src_ref 5 "vector.hpp" 142 95 first +.src_ref 7 "accum.hpp" 198 120 + 5802 "11111000" // VMOV wl8, wh7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5803 "00100010" // /* MW 3 */ + 5804 "01001110" // /* MW 2 */ + 5805 "00011100" // /* MW 1 */ +.src_ref 5 "vector.hpp" 142 95 +.src_ref 7 "accum.hpp" 198 120 first + 5806 "11111000" // VMOV wl10, wl7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5807 "00100010" // /* MW 3 */ + 5808 "01001111" // /* MW 2 */ + 5809 "00011101" // /* MW 1 */ + 5810 "11111000" // VMOV bmhl4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5811 "10010010" // /* MW 3 */ + 5812 "10010000" // /* MW 2 */ + 5813 "00011100" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 5814 "11111000" // VMOV bmhl3, x10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5815 "10010010" // /* MW 3 */ + 5816 "10010100" // /* MW 2 */ + 5817 "00011011" // /* MW 1 */ +.src_ref 7 "accum.hpp" 199 120 first +.src_ref 5 "add.hpp" 28 49 first +.aggressive_scheduled_block_id 4 +.noswbrkpt + 5818 "01100010" // VMOV cml2, cmh4; VADD.f dm3, dm1, dm2, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5819 "00111101" // /* MW 7 */ + 5820 "00101000" // /* MW 6 */ + 5821 "00010011" // /* MW 5 */ + 5822 "11100110" // /* MW 4 */ + 5823 "10001010" // /* MW 3 */ + 5824 "00010010" // /* MW 2 */ + 5825 "00000010" // /* MW 1 */ +.src_ref 7 "accum.hpp" 199 120 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5826 "11111000" // VMOV cml1, cmh3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5827 "10001010" // /* MW 3 */ + 5828 "00001110" // /* MW 2 */ + 5829 "00011001" // /* MW 1 */ +.src_ref 5 "vector.hpp" 142 95 first +.src_ref 5 "vector.hpp" 243 115 first +.src_ref 5 "add_reduce.hpp" 324 44 first +.aggressive_scheduled_block_id 4 +.noswbrkpt + 5830 "01100010" // VMOV wl8, wh7; VADD.f dm2, dm2, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5831 "00111101" // /* MW 7 */ + 5832 "01010000" // /* MW 6 */ + 5833 "00010010" // /* MW 5 */ + 5834 "11100110" // /* MW 4 */ + 5835 "00100010" // /* MW 3 */ + 5836 "01001110" // /* MW 2 */ + 5837 "00000100" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5838 "11111000" // VMOV bmll2, x7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5839 "10010010" // /* MW 3 */ + 5840 "00001110" // /* MW 2 */ + 5841 "00011010" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 5842 "11011000" // VSHIFT x9, x8, x0, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5843 "01100110" // /* MW 3 */ + 5844 "11000000" // /* MW 2 */ + 5845 "00011100" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 5 "add_reduce.hpp" 324 44 first +.aggressive_scheduled_block_id 5 +.noswbrkpt + 5846 "01100010" // VMOV bmll1, x8; VADD.f dm4, dm1, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5847 "00111101" // /* MW 7 */ + 5848 "00110000" // /* MW 6 */ + 5849 "00010100" // /* MW 5 */ + 5850 "11100110" // /* MW 4 */ + 5851 "10010010" // /* MW 3 */ + 5852 "00010000" // /* MW 2 */ + 5853 "00000001" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5854 "11111000" // VMOV bmll4, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5855 "10010010" // /* MW 3 */ + 5856 "00010010" // /* MW 2 */ + 5857 "00011100" // /* MW 1 */ +.src_ref 7 "accum.hpp" 151 136 first + 5858 "11111000" // VMOV x8, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5859 "00010010" // /* MW 3 */ + 5860 "00101100" // /* MW 2 */ + 5861 "00011100" // /* MW 1 */ +.src_ref 5 "vector.hpp" 243 115 first +.src_ref 7 "accum.hpp" 151 115 + 5862 "11111000" // VMOV wl9, wl8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5863 "00100010" // /* MW 3 */ + 5864 "11010001" // /* MW 2 */ + 5865 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 5866 "11011000" // VSHIFT x8, x9, x0, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5867 "01100110" // /* MW 3 */ + 5868 "01001000" // /* MW 2 */ + 5869 "00011100" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 5 "add_reduce.hpp" 324 44 first +.aggressive_scheduled_block_id 6 +.noswbrkpt + 5870 "01100010" // VMOV bmll1, x8; VADD.f dm1, dm3, dm1, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5871 "00111101" // /* MW 7 */ + 5872 "01100100" // /* MW 6 */ + 5873 "00010001" // /* MW 5 */ + 5874 "11100110" // /* MW 4 */ + 5875 "10010010" // /* MW 3 */ + 5876 "00010000" // /* MW 2 */ + 5877 "00000001" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5878 "11111000" // VMOV bmll3, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5879 "10010010" // /* MW 3 */ + 5880 "00010010" // /* MW 2 */ + 5881 "00011011" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 + 5882 "11111000" // VMOV x8, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5883 "00010010" // /* MW 3 */ + 5884 "00101000" // /* MW 2 */ + 5885 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 5886 "11011000" // VSHIFT x10, x8, x0, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5887 "00011110" // /* MW 3 */ + 5888 "01000000" // /* MW 2 */ + 5889 "00011101" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first +.src_ref 5 "add_reduce.hpp" 324 44 first +.aggressive_scheduled_block_id 7 +.noswbrkpt + 5890 "01100010" // VMOV x8, bmll4; VADD.f dm2, dm2, dm3, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5891 "00111101" // /* MW 7 */ + 5892 "01001100" // /* MW 6 */ + 5893 "00010010" // /* MW 5 */ + 5894 "11100110" // /* MW 4 */ + 5895 "00010010" // /* MW 3 */ + 5896 "00110000" // /* MW 2 */ + 5897 "00000100" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 5898 "11111000" // VMOV bmll3, x10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5899 "10010010" // /* MW 3 */ + 5900 "00010100" // /* MW 2 */ + 5901 "00011011" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.src_ref 5 "add_reduce.hpp" 324 44 first +.aggressive_scheduled_block_id 7 +.noswbrkpt + 5902 "01100010" // VSHIFT x8, x8, x0, r7; VADD.f dm3, dm4, dm3, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5903 "00111101" // /* MW 7 */ + 5904 "10001100" // /* MW 6 */ + 5905 "00010011" // /* MW 5 */ + 5906 "11000110" // /* MW 4 */ + 5907 "00011110" // /* MW 3 */ + 5908 "01000000" // /* MW 2 */ + 5909 "00000100" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5910 "11111000" // VMOV bmll3, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5911 "10010010" // /* MW 3 */ + 5912 "00010000" // /* MW 2 */ + 5913 "00011011" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 5914 "11111000" // VMOV x8, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5915 "00010010" // /* MW 3 */ + 5916 "00100100" // /* MW 2 */ + 5917 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.src_ref 5 "add_reduce.hpp" 324 44 +.aggressive_scheduled_block_id 8 +.noswbrkpt + 5918 "01100010" // VSHIFT x8, x8, x0, r7; VADD.f dm1, dm1, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5919 "00111101" // /* MW 7 */ + 5920 "00110000" // /* MW 6 */ + 5921 "00010001" // /* MW 5 */ + 5922 "11000110" // /* MW 4 */ + 5923 "00011110" // /* MW 3 */ + 5924 "01000000" // /* MW 2 */ + 5925 "00000100" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5926 "11111000" // VMOV bmll4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5927 "10010010" // /* MW 3 */ + 5928 "00010000" // /* MW 2 */ + 5929 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id first + 5930 "11111000" // VMOV x8, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5931 "00010010" // /* MW 3 */ + 5932 "00101000" // /* MW 2 */ + 5933 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.src_ref 5 "add_reduce.hpp" 324 44 +.aggressive_scheduled_block_id 9 +.noswbrkpt + 5934 "01100010" // VSHIFT x8, x8, x0, r0; VADD.f dm2, dm2, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5935 "00111101" // /* MW 7 */ + 5936 "01010000" // /* MW 6 */ + 5937 "00010010" // /* MW 5 */ + 5938 "11000110" // /* MW 4 */ + 5939 "00000010" // /* MW 3 */ + 5940 "01000000" // /* MW 2 */ + 5941 "00000100" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5942 "11111000" // VMOV bmll4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5943 "10010010" // /* MW 3 */ + 5944 "00010000" // /* MW 2 */ + 5945 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first + 5946 "11111000" // VMOV x8, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5947 "00010010" // /* MW 3 */ + 5948 "00101100" // /* MW 2 */ + 5949 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id first + 5950 "11011000" // VSHIFT x8, x8, x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5951 "00000010" // /* MW 3 */ + 5952 "01000000" // /* MW 2 */ + 5953 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first +.src_ref 5 "add_reduce.hpp" 324 44 first +.aggressive_scheduled_block_id 10 +.noswbrkpt + 5954 "01100010" // VMOV x10, bmll1; VADD.f dm3, dm3, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5955 "00111101" // /* MW 7 */ + 5956 "01110000" // /* MW 6 */ + 5957 "00010011" // /* MW 5 */ + 5958 "11100110" // /* MW 4 */ + 5959 "00010010" // /* MW 3 */ + 5960 "00100100" // /* MW 2 */ + 5961 "00000101" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 10 +.nohwbrkpt +.noswbrkpt + 5962 "11111000" // VMOV bmll4, x8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5963 "10010010" // /* MW 3 */ + 5964 "00010000" // /* MW 2 */ + 5965 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.src_ref 5 "add_reduce.hpp" 324 44 first +.aggressive_scheduled_block_id 10 +.noswbrkpt + 5966 "01100010" // VSHIFT x10, x10, x0, r0; VADD.f dm0, dm1, dm4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5967 "00111101" // /* MW 7 */ + 5968 "00110000" // /* MW 6 */ + 5969 "00010000" // /* MW 5 */ + 5970 "11000110" // /* MW 4 */ + 5971 "00000010" // /* MW 3 */ + 5972 "01010000" // /* MW 2 */ + 5973 "00000101" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5974 "11111000" // VMOV bmll4, x10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5975 "10010010" // /* MW 3 */ + 5976 "00010100" // /* MW 2 */ + 5977 "00011100" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first + 5978 "11111000" // VMOV x8, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5979 "00010010" // /* MW 3 */ + 5980 "00101000" // /* MW 2 */ + 5981 "00011100" // /* MW 1 */ +.src_ref 5 "vector.hpp" 915 23 first + 5982 "10111000" // VEXTRACT.32 r23, x8, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5983 "00000001" // /* MW 3 */ + 5984 "11100010" // /* MW 2 */ + 5985 "00011101" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id first + 5986 "11111000" // VMOV x10, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5987 "00010010" // /* MW 3 */ + 5988 "00101100" // /* MW 2 */ + 5989 "00011101" // /* MW 1 */ +.src_ref 5 "vector.hpp" 915 23 first +.src_ref 5 "vector.hpp" 1288 16 first +.aggressive_scheduled_block_id 11 +.noswbrkpt + 5990 "01110100" // VLDB wh10, [p7, #32]; VEXTRACT.32 r6, x10, #0, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5991 "00000011" // /* MW 5 */ + 5992 "01010100" // /* MW 4 */ + 5993 "10000011" // /* MW 3 */ + 5994 "11010000" // /* MW 2 */ + 5995 "11100010" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 5996 "11111000" // VMOV x11, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5997 "00010010" // /* MW 3 */ + 5998 "10100000" // /* MW 2 */ + 5999 "00011101" // /* MW 1 */ +.src_ref 5 "vector.hpp" 915 23 first +.src_ref 5 "vector.hpp" 1287 41 first +.src_ref 5 "broadcast.hpp" 80 25 first +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 6000 "10110100" // VLDB wl10, [p7]; VEXTBCST.32 x10, x11, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6001 "00000110" // /* MW 5 */ + 6002 "10110100" // /* MW 4 */ + 6003 "10001010" // /* MW 3 */ + 6004 "11010100" // /* MW 2 */ + 6005 "11100000" // /* MW 1 */ +.src_ref 5 "blend.hpp" 163 48 first +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 6006 "00111000" // VSEL.32 x9, x10, x9, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6007 "10100000" // /* MW 3 */ + 6008 "11010100" // /* MW 2 */ + 6009 "00011100" // /* MW 1 */ +.src_ref 5 "vector.hpp" 853 46 first +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 6010 "01111000" // VINSERT.32 x10, x2, #0, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6011 "11010001" // /* MW 3 */ + 6012 "00010000" // /* MW 2 */ + 6013 "00011101" // /* MW 1 */ +.src_ref 5 "vector.hpp" 853 46 +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 6014 "01111000" // VINSERT.32 x8, x2, #0, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6015 "11110001" // /* MW 3 */ + 6016 "00010010" // /* MW 2 */ + 6017 "00011100" // /* MW 1 */ +.src_ref 5 "vector.hpp" 142 95 first +.src_ref 5 "vector.hpp" 1413 19 first +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 6018 "11111000" // VMOV wl11, wl9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6019 "00100010" // /* MW 3 */ + 6020 "11010011" // /* MW 2 */ + 6021 "00011101" // /* MW 1 */ +.src_ref 5 "vector.hpp" 142 95 +.src_ref 5 "vector.hpp" 1413 19 +.aggressive_scheduled_block_id 11 +.nohwbrkpt +.noswbrkpt + 6022 "11111000" // VMOV wh11, wl9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6023 "00100010" // /* MW 3 */ + 6024 "10010011" // /* MW 2 */ + 6025 "00011101" // /* MW 1 */ +.src_ref 5 "vector.hpp" 142 95 +.src_ref 5 "vector.hpp" 1413 19 +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6026 "11111000" // VMOV wh8, wl10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6027 "00100010" // /* MW 3 */ + 6028 "00010101" // /* MW 2 */ + 6029 "00011100" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 first + 6030 "00111000" // VSEL.32 x8, x11, x8, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6031 "00000000" // /* MW 3 */ + 6032 "01011100" // /* MW 2 */ + 6033 "00011100" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 + 6034 "00111000" // VSEL.32 x8, x1, x8, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6035 "00001000" // /* MW 3 */ + 6036 "00001100" // /* MW 2 */ + 6037 "00011100" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 + 6038 "00111000" // VSEL.32 x7, x8, x7, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6039 "10101000" // /* MW 3 */ + 6040 "11000011" // /* MW 2 */ + 6041 "00011011" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 + 6042 "11111000" // VMOV bmll0, x7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6043 "10010010" // /* MW 3 */ + 6044 "00001110" // /* MW 2 */ + 6045 "00011000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 + 6046 "11111000" // VMOV x9, x6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6047 "10010010" // /* MW 3 */ + 6048 "10101100" // /* MW 2 */ + 6049 "00011100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 first +.src_ref 6 "me_vmult_float_emulated.h" 108 19 first +.src_ref 6 "me_vmult_float_emulated.h" 109 21 first + 6050 "00000010" // VCONV.bf16.fp32 wl6, bmll0; VMOV bmll2, x7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6051 "01110000" // /* MW 7 */ + 6052 "01001001" // /* MW 6 */ + 6053 "00000111" // /* MW 5 */ + 6054 "00000001" // /* MW 4 */ + 6055 "11000000" // /* MW 3 */ + 6056 "00000010" // /* MW 2 */ + 6057 "01101000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 + 6058 "11111000" // VMOV x8, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6059 "10010010" // /* MW 3 */ + 6060 "00110010" // /* MW 2 */ + 6061 "00011100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 5 "vector.hpp" 1285 72 +.src_ref 5 "vector.hpp" 1285 72 first +.src_ref 5 "vector.hpp" 1289 16 + 6062 "01011010" // LSHL r29, r22, r28; MOV r27, r29; VMSC.f dm2, dm2, x6, x4, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6063 "10000011" // /* MW 9 */ + 6064 "01001100" // /* MW 8 */ + 6065 "00010010" // /* MW 7 */ + 6066 "00001111" // /* MW 6 */ + 6067 "11101010" // /* MW 5 */ + 6068 "11101101" // /* MW 4 */ + 6069 "11001101" // /* MW 3 */ + 6070 "10111011" // /* MW 2 */ + 6071 "00000101" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 120 9 first +.src_ref 5 "vector.hpp" 1285 72 +.src_ref 5 "vector.hpp" 1289 16 first +.aggressive_scheduled_block_id 12 +.aggressive_scheduled_block_id first + 6072 "01100010" // SEL.EQZ r19, r5, r29, r27; VMUL.f dm1, x6, x5, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6073 "10100001" // /* MW 7 */ + 6074 "11101100" // /* MW 6 */ + 6075 "00010001" // /* MW 5 */ + 6076 "10010001" // /* MW 4 */ + 6077 "00111110" // /* MW 3 */ + 6078 "00001011" // /* MW 2 */ + 6079 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 124 9 first +.src_ref 5 "vector.hpp" 1285 72 first +.src_ref 5 "vector.hpp" 1289 16 +.aggressive_scheduled_block_id 12 +.noswbrkpt + 6080 "01011010" // SEL.EQZ r18, r22, r30, r27; VMOV x6, x8; VMUL.f dm0, x6, x3, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6081 "01100001" // /* MW 9 */ + 6082 "11101100" // /* MW 8 */ + 6083 "00010000" // /* MW 7 */ + 6084 "00101111" // /* MW 6 */ + 6085 "00001001" // /* MW 5 */ + 6086 "00110011" // /* MW 4 */ + 6087 "11100010" // /* MW 3 */ + 6088 "10100101" // /* MW 2 */ + 6089 "00000101" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 125 9 first +.aggressive_scheduled_block_id 12 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6090 "01001000" // VMUL.f dm3, x6, x0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6091 "00000001" // /* MW 3 */ + 6092 "11101100" // /* MW 2 */ + 6093 "00010011" // /* MW 1 */ + 6094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6095 "00000000" // /* MW 1 */ + 6096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6097 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 109 21 first +.src_ref 6 "me_vmult_float_emulated.h" 110 6 first +.src_ref 6 "me_vmult_float_emulated.h" 110 19 first + 6098 "00011000" // VCONV.bf16.fp32 wl9, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6099 "00010110" // /* MW 3 */ + 6100 "11000001" // /* MW 2 */ + 6101 "00001100" // /* MW 1 */ + 6102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6103 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 111 34 first + 6104 "01001000" // VMSC.f dm2, dm2, x9, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6105 "10000011" // /* MW 3 */ + 6106 "01010010" // /* MW 2 */ + 6107 "00010010" // /* MW 1 */ + 6108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6109 "00000000" // /* MW 1 */ + 6110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6111 "00000000" // /* MW 1 */ + 6112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6113 "00000000" // /* MW 1 */ + 6114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6115 "00000000" // /* MW 1 */ + 6116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6117 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 + 6118 "00011000" // VCONV.bf16.fp32 wl8, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6119 "00010110" // /* MW 3 */ + 6120 "01000001" // /* MW 2 */ + 6121 "00001100" // /* MW 1 */ + 6122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6123 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 117 42 first + 6124 "01001000" // VMUL.f dm4, x8, x5, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6125 "10100001" // /* MW 3 */ + 6126 "11110000" // /* MW 2 */ + 6127 "00010100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 118 9 first + 6128 "01001000" // VMUL.f dm2, x8, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6129 "01100001" // /* MW 3 */ + 6130 "11110000" // /* MW 2 */ + 6131 "00010010" // /* MW 1 */ + 6132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6133 "00000000" // /* MW 1 */ + 6134 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6135 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 13 +.aggressive_scheduled_block_id first + 6136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6137 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 119 9 first +.aggressive_scheduled_block_id 13 +.noswbrkpt + 6138 "01001000" // VMUL.f dm2, x9, x5, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6139 "10100001" // /* MW 3 */ + 6140 "11110010" // /* MW 2 */ + 6141 "00010010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 117 42 first +.aggressive_scheduled_block_id 13 +.nohwbrkpt +.noswbrkpt + 6142 "11111000" // VMOV lfl0, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6143 "00010010" // /* MW 3 */ + 6144 "01110000" // /* MW 2 */ + 6145 "00011001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 118 6 first +.aggressive_scheduled_block_id 13 +.nohwbrkpt +.noswbrkpt + 6146 "01001000" // VADD.f dm4, dm4, dm2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6147 "00111101" // /* MW 3 */ + 6148 "10001000" // /* MW 2 */ + 6149 "00010100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 118 6 +.aggressive_scheduled_block_id 13 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6150 "11111000" // VMOV bmll4, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6151 "10010010" // /* MW 3 */ + 6152 "00000101" // /* MW 2 */ + 6153 "00011100" // /* MW 1 */ + 6154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6155 "00000000" // /* MW 1 */ + 6156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6157 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 14 +.aggressive_scheduled_block_id first + 6158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6159 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 122 9 first +.aggressive_scheduled_block_id 14 +.noswbrkpt + 6160 "01001000" // VMUL.f dm2, x0, x8, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6161 "00000001" // /* MW 3 */ + 6162 "11100001" // /* MW 2 */ + 6163 "00010010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 118 6 first +.aggressive_scheduled_block_id 14 +.nohwbrkpt +.noswbrkpt + 6164 "11111000" // VMOV lfh0, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6165 "00010010" // /* MW 3 */ + 6166 "01110000" // /* MW 2 */ + 6167 "00011000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 119 6 first +.aggressive_scheduled_block_id 14 +.nohwbrkpt +.noswbrkpt + 6168 "01001000" // VADD.f dm4, dm4, dm2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6169 "00111101" // /* MW 3 */ + 6170 "10001000" // /* MW 2 */ + 6171 "00010100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 119 6 +.aggressive_scheduled_block_id 14 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6172 "11111000" // VMOV bmll4, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6173 "10010010" // /* MW 3 */ + 6174 "00000001" // /* MW 2 */ + 6175 "00011100" // /* MW 1 */ + 6176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6177 "00000000" // /* MW 1 */ + 6178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6179 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 15 +.aggressive_scheduled_block_id first + 6180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6181 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 121 9 first +.aggressive_scheduled_block_id 15 +.noswbrkpt + 6182 "01001000" // VMUL.f dm1, x9, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6183 "01100001" // /* MW 3 */ + 6184 "11110010" // /* MW 2 */ + 6185 "00010001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 119 6 first +.aggressive_scheduled_block_id 15 +.nohwbrkpt +.noswbrkpt + 6186 "11111000" // VMOV lfl0, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6187 "00010010" // /* MW 3 */ + 6188 "01110000" // /* MW 2 */ + 6189 "00011001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 120 6 first +.aggressive_scheduled_block_id 15 +.nohwbrkpt +.noswbrkpt + 6190 "01001000" // VADD.f dm4, dm4, dm1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6191 "00111101" // /* MW 3 */ + 6192 "10000100" // /* MW 2 */ + 6193 "00010100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 120 6 +.aggressive_scheduled_block_id 15 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6194 "11111000" // VMOV bmll4, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6195 "10010010" // /* MW 3 */ + 6196 "00000101" // /* MW 2 */ + 6197 "00011100" // /* MW 1 */ + 6198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6199 "00000000" // /* MW 1 */ + 6200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6201 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 16 +.aggressive_scheduled_block_id first + 6202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6203 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 123 9 first +.aggressive_scheduled_block_id 16 +.noswbrkpt + 6204 "01001000" // VMUL.f dm1, x9, x0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6205 "00000001" // /* MW 3 */ + 6206 "11110010" // /* MW 2 */ + 6207 "00010001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 120 6 first +.aggressive_scheduled_block_id 16 +.nohwbrkpt +.noswbrkpt + 6208 "11111000" // VMOV lfh0, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6209 "00010010" // /* MW 3 */ + 6210 "01110000" // /* MW 2 */ + 6211 "00011000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 121 6 first +.aggressive_scheduled_block_id 16 +.nohwbrkpt +.noswbrkpt + 6212 "01001000" // VADD.f dm4, dm4, dm1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6213 "00111101" // /* MW 3 */ + 6214 "10000100" // /* MW 2 */ + 6215 "00010100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 121 6 +.aggressive_scheduled_block_id 16 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6216 "11111000" // VMOV bmll4, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6217 "10010010" // /* MW 3 */ + 6218 "00000001" // /* MW 2 */ + 6219 "00011100" // /* MW 1 */ + 6220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6221 "00000000" // /* MW 1 */ + 6222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6223 "00000000" // /* MW 1 */ + 6224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6225 "00000000" // /* MW 1 */ + 6226 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6227 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 121 6 +.aggressive_scheduled_block_id 17 +.aggressive_scheduled_block_id first + 6228 "11111000" // VMOV lfl1, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6229 "00010010" // /* MW 3 */ + 6230 "01110000" // /* MW 2 */ + 6231 "00011101" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 122 6 first +.aggressive_scheduled_block_id 17 +.noswbrkpt + 6232 "01001000" // VADD.f dm2, dm4, dm2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6233 "00111101" // /* MW 3 */ + 6234 "10001000" // /* MW 2 */ + 6235 "00010010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 122 6 +.aggressive_scheduled_block_id 17 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6236 "11111000" // VMOV bmll4, lfl1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6237 "10010010" // /* MW 3 */ + 6238 "00010101" // /* MW 2 */ + 6239 "00011100" // /* MW 1 */ + 6240 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6241 "00000000" // /* MW 1 */ + 6242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6243 "00000000" // /* MW 1 */ + 6244 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6245 "00000000" // /* MW 1 */ + 6246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6247 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 122 6 +.aggressive_scheduled_block_id 18 +.aggressive_scheduled_block_id first + 6248 "11111000" // VMOV lfh1, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6249 "00010010" // /* MW 3 */ + 6250 "01101000" // /* MW 2 */ + 6251 "00011100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 123 6 first +.aggressive_scheduled_block_id 18 +.noswbrkpt + 6252 "01001000" // VADD.f dm2, dm2, dm1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6253 "00111101" // /* MW 3 */ + 6254 "01000100" // /* MW 2 */ + 6255 "00010010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 123 6 +.aggressive_scheduled_block_id 18 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6256 "11111000" // VMOV bmll2, lfh1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6257 "10010010" // /* MW 3 */ + 6258 "00010001" // /* MW 2 */ + 6259 "00011010" // /* MW 1 */ + 6260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6261 "00000000" // /* MW 1 */ + 6262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6263 "00000000" // /* MW 1 */ + 6264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6265 "00000000" // /* MW 1 */ + 6266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6267 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 123 6 +.aggressive_scheduled_block_id 19 +.aggressive_scheduled_block_id first + 6268 "11111000" // VMOV lfl1, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6269 "00010010" // /* MW 3 */ + 6270 "01101000" // /* MW 2 */ + 6271 "00011101" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 124 6 first +.aggressive_scheduled_block_id 19 +.noswbrkpt + 6272 "01001000" // VADD.f dm0, dm1, dm0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6273 "00111101" // /* MW 3 */ + 6274 "00100000" // /* MW 2 */ + 6275 "00010000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 124 6 +.aggressive_scheduled_block_id 19 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6276 "11111000" // VMOV bmll1, lfl1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6277 "10010010" // /* MW 3 */ + 6278 "00010101" // /* MW 2 */ + 6279 "00011001" // /* MW 1 */ + 6280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6281 "00000000" // /* MW 1 */ + 6282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6283 "00000000" // /* MW 1 */ + 6284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6285 "00000000" // /* MW 1 */ + 6286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6287 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 124 6 +.aggressive_scheduled_block_id 20 +.aggressive_scheduled_block_id first + 6288 "11111000" // VMOV lfh1, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6289 "00010010" // /* MW 3 */ + 6290 "01100000" // /* MW 2 */ + 6291 "00011100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 125 6 first +.aggressive_scheduled_block_id 20 +.noswbrkpt + 6292 "01001000" // VADD.f dm0, dm0, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6293 "00111101" // /* MW 3 */ + 6294 "00001100" // /* MW 2 */ + 6295 "00010000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 125 6 +.aggressive_scheduled_block_id 20 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6296 "11111000" // VMOV bmll0, lfh1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6297 "10010010" // /* MW 3 */ + 6298 "00010001" // /* MW 2 */ + 6299 "00011000" // /* MW 1 */ + 6300 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6301 "00000000" // /* MW 1 */ + 6302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6303 "00000000" // /* MW 1 */ + 6304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6305 "00000000" // /* MW 1 */ + 6306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6307 "00000000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1286 72 first +.src_ref 7 "accum.hpp" 1108 103 first + 6308 "00011000" // VCONV.bf16.fp32 wl11, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6309 "00010110" // /* MW 3 */ + 6310 "11000000" // /* MW 2 */ + 6311 "00001101" // /* MW 1 */ + 6312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6313 "00000000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1286 41 + 6314 "11011000" // VSHIFT x11, x0, x11, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6315 "11111110" // /* MW 3 */ + 6316 "10000101" // /* MW 2 */ + 6317 "00011101" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1289 16 first + 6318 "00111000" // VSEL.8 x11, x10, x11, r19:r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6319 "11001100" // /* MW 3 */ + 6320 "11010101" // /* MW 2 */ + 6321 "00011101" // /* MW 1 */ + 6322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6323 "00000000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 57 98 first +.src_ref 5 "vector.hpp" 1292 26 first + 6324 "00110110" // NOPA; NOPB; VST wh11, [p7, #32]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6325 "01000001" // /* MW 11 */ + 6326 "01100101" // /* MW 10 */ + 6327 "10001011" // /* MW 9 */ + 6328 "00000011" // /* MW 8 */ + 6329 "00000000" // /* MW 7 */ + 6330 "00000000" // /* MW 6 */ + 6331 "00100000" // /* MW 5 */ + 6332 "00000000" // /* MW 4 */ + 6333 "11110000" // /* MW 3 */ + 6334 "00101100" // /* MW 2 */ + 6335 "00000000" // /* MW 1 */ +.label ZLE_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1488 +.src_ref 5 "vector.hpp" 57 98 +.src_ref 3 "reduce_mean_c8_impl.h" 268 19 first +.end_of_loop + 6336 "11100001" // NOPA; NOPB; VST wl11, [p7], m4; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6337 "00000000" // /* MW 15 */ + 6338 "00000000" // /* MW 14 */ + 6339 "01111000" // /* MW 13 */ + 6340 "10100101" // /* MW 12 */ + 6341 "00000001" // /* MW 11 */ + 6342 "00000000" // /* MW 10 */ + 6343 "00000000" // /* MW 9 */ + 6344 "10000000" // /* MW 8 */ + 6345 "11101010" // /* MW 7 */ + 6346 "10001010" // /* MW 6 */ + 6347 "00100111" // /* MW 5 */ + 6348 "00000000" // /* MW 4 */ + 6349 "11110000" // /* MW 3 */ + 6350 "00101100" // /* MW 2 */ + 6351 "00000000" // /* MW 1 */ +.loop_nesting 0 + 6352 "10000100" // J #6384 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6384 delay_slots=5 */ + 6353 "00000000" // /* MW 5 */ + 6354 "00000000" // /* MW 4 */ + 6355 "01111000" // /* MW 3 */ + 6356 "00001100" // /* MW 2 */ + 6357 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6359 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6361 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6363 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6365 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6367 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1520 + 6368 "01011100" // ST dn3, [sp, #-4]; MOVX vaddSign0, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6369 "10000000" // /* MW 5 */ + 6370 "10110100" // /* MW 4 */ + 6371 "10110000" // /* MW 3 */ + 6372 "10110100" // /* MW 2 */ + 6373 "11111111" // /* MW 1 */ + 6374 "01111010" // NOPA; ST lr, [sp, #-8]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6375 "00000000" // /* MW 9 */ + 6376 "00000000" // /* MW 8 */ + 6377 "00000000" // /* MW 7 */ + 6378 "10000000" // /* MW 6 */ + 6379 "00111101" // /* MW 5 */ + 6380 "11111000" // /* MW 4 */ + 6381 "11110111" // /* MW 3 */ + 6382 "00101100" // /* MW 2 */ + 6383 "00000000" // /* MW 1 */ +.label __ll133__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E +.src_ref 2 "reduce_base_c8.h" 352 30 first + 6384 "00011000" // ADD.NC p7, r3, #34 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6385 "10010001" // /* MW 3 */ + 6386 "01100001" // /* MW 2 */ + 6387 "00011111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 352 30 + 6388 "11010100" // LDA.u16 r3, [p7]; MOV crMCDEn, vaddSign0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6389 "11000001" // /* MW 5 */ + 6390 "01100100" // /* MW 4 */ + 6391 "01011011" // /* MW 3 */ + 6392 "10001111" // /* MW 2 */ + 6393 "11100000" // /* MW 1 */ +.aggressive_scheduled_block_id 21 +.aggressive_scheduled_block_id first + 6394 "11111000" // MOV crSCDEn, crMCDEn /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6395 "01100000" // /* MW 3 */ + 6396 "01111011" // /* MW 2 */ + 6397 "00011000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 352 30 +.aggressive_scheduled_block_id 21 +.noswbrkpt + 6398 "00011000" // ST.s16 r3, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6399 "01110111" // /* MW 3 */ + 6400 "00000100" // /* MW 2 */ + 6401 "00000111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 353 57 first +.aggressive_scheduled_block_id 21 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6402 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */ + 6403 "00000001" // /* MW 5 */ + 6404 "00000000" // /* MW 4 */ + 6405 "11111000" // /* MW 3 */ + 6406 "00010011" // /* MW 2 */ + 6407 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 21 +.nohwbrkpt +.noswbrkpt + 6408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6409 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 21 +.nohwbrkpt +.noswbrkpt + 6410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6411 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 21 +.nohwbrkpt +.noswbrkpt + 6412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6413 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 352 30 first +.delay_slot +.aggressive_scheduled_block_id 21 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6414 "00011000" // ADD r3, r3, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6415 "00000111" // /* MW 3 */ + 6416 "11000110" // /* MW 2 */ + 6417 "00010000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 352 30 +.delay_slot + 6418 "01111110" // NOPA; NOPB; NOPS; EXTEND.u16 r0, r3; NOPM /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 6419 "01100000" // /* MW 13 */ + 6420 "00101011" // /* MW 12 */ + 6421 "00000000" // /* MW 11 */ + 6422 "10101111" // /* MW 10 */ + 6423 "00110100" // /* MW 9 */ + 6424 "00000000" // /* MW 8 */ + 6425 "10110000" // /* MW 7 */ + 6426 "11000000" // /* MW 6 */ + 6427 "00100000" // /* MW 5 */ + 6428 "00000000" // /* MW 4 */ + 6429 "11110000" // /* MW 3 */ + 6430 "00101100" // /* MW 2 */ + 6431 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 420 4 +.return_address + 6432 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6433 "00111001" // /* MW 3 */ + 6434 "11111000" // /* MW 2 */ + 6435 "00000111" // /* MW 1 */ + 6436 "00011000" // LDA p1, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6437 "10011001" // /* MW 3 */ + 6438 "11111100" // /* MW 2 */ + 6439 "00000111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 353 23 first + 6440 "00011000" // ST.s16 r3, [p7, #10] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6441 "01110111" // /* MW 3 */ + 6442 "01010100" // /* MW 2 */ + 6443 "00000111" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 420 4 first + 6444 "11000100" // PADDXM [sp], #-256 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6445 "00000001" // /* MW 5 */ + 6446 "00000000" // /* MW 4 */ + 6447 "00000000" // /* MW 3 */ + 6448 "11100000" // /* MW 2 */ + 6449 "11111111" // /* MW 1 */ + 6450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6451 "00000000" // /* MW 1 */ + 6452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6453 "00000000" // /* MW 1 */ + 6454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6455 "00000000" // /* MW 1 */ +.src_ref 2 "reduce_base_c8.h" 420 4 + 6456 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 6457 "00000000" // /* MW 3 */ + 6458 "00101000" // /* MW 2 */ + 6459 "00010000" // /* MW 1 */ +.delay_slot + 6460 "11111000" // MOV p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6461 "11000000" // /* MW 3 */ + 6462 "01100010" // /* MW 2 */ + 6463 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6465 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6467 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6469 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6470 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6471 "01111110" // /* MW 9 */ + 6472 "10100101" // /* MW 8 */ + 6473 "00000001" // /* MW 7 */ + 6474 "00000000" // /* MW 6 */ + 6475 "00010000" // /* MW 5 */ + 6476 "00000000" // /* MW 4 */ + 6477 "11110000" // /* MW 3 */ + 6478 "00101100" // /* MW 2 */ + 6479 "00000000" // /* MW 1 */ +.label __ll135__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E +.src_ref 5 "blend.hpp" 163 48 + 6480 "10111010" // MOVA r20, #255; J #5568 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=5568 delay_slots=5 */ + 6481 "00100000" // /* MW 9 */ + 6482 "00000000" // /* MW 8 */ + 6483 "00000000" // /* MW 7 */ + 6484 "10111000" // /* MW 6 */ + 6485 "00000010" // /* MW 5 */ + 6486 "00000000" // /* MW 4 */ + 6487 "00000000" // /* MW 3 */ + 6488 "11110100" // /* MW 2 */ + 6489 "00011111" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 +.delay_slot + 6490 "00011000" // MOVX r21, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6491 "00000001" // /* MW 3 */ + 6492 "00101010" // /* MW 2 */ + 6493 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6495 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6497 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6499 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6500 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6501 "10000001" // /* MW 11 */ + 6502 "10101101" // /* MW 10 */ + 6503 "00000000" // /* MW 9 */ + 6504 "00000000" // /* MW 8 */ + 6505 "00000000" // /* MW 7 */ + 6506 "00000000" // /* MW 6 */ + 6507 "00100000" // /* MW 5 */ + 6508 "00000000" // /* MW 4 */ + 6509 "11110000" // /* MW 3 */ + 6510 "00101100" // /* MW 2 */ + 6511 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1664 +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 6512 "00011000" // MOVX r5, #5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6513 "00010101" // /* MW 3 */ + 6514 "00001010" // /* MW 2 */ + 6515 "00010000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 first + 6516 "10011000" // EQ r5, r5, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6517 "01100111" // /* MW 3 */ + 6518 "01001010" // /* MW 2 */ + 6519 "00010001" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 6520 "10000100" // JNZ r5, #7264 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7264 delay_slots=5 */ + 6521 "00000001" // /* MW 5 */ + 6522 "01000000" // /* MW 4 */ + 6523 "00110000" // /* MW 3 */ + 6524 "00001110" // /* MW 2 */ + 6525 "00101000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6527 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6529 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6531 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6533 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6535 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 6536 "00011000" // MOVX r7, #6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6537 "00011001" // /* MW 3 */ + 6538 "00001110" // /* MW 2 */ + 6539 "00010000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 6540 "10011000" // EQ r7, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6541 "01100111" // /* MW 3 */ + 6542 "11001110" // /* MW 2 */ + 6543 "00010001" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 6544 "10000100" // JNZ r7, #7504 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7504 delay_slots=5 */ + 6545 "00000001" // /* MW 5 */ + 6546 "01000000" // /* MW 4 */ + 6547 "10101000" // /* MW 3 */ + 6548 "00001110" // /* MW 2 */ + 6549 "00111000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 200 49 +.delay_slot + 6550 "00011000" // MOVX r5, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6551 "01000001" // /* MW 3 */ + 6552 "00001010" // /* MW 2 */ + 6553 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6555 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6557 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6558 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6559 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6560 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6561 "00000000" // /* MW 15 */ + 6562 "00000000" // /* MW 14 */ + 6563 "01111000" // /* MW 13 */ + 6564 "10100101" // /* MW 12 */ + 6565 "00000001" // /* MW 11 */ + 6566 "00000000" // /* MW 10 */ + 6567 "00000000" // /* MW 9 */ + 6568 "00000000" // /* MW 8 */ + 6569 "01011011" // /* MW 7 */ + 6570 "00000001" // /* MW 6 */ + 6571 "00100000" // /* MW 5 */ + 6572 "00000000" // /* MW 4 */ + 6573 "11110000" // /* MW 3 */ + 6574 "00101100" // /* MW 2 */ + 6575 "00000000" // /* MW 1 */ +.label __ll67__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E +.src_ref 3 "reduce_mean_c8_impl.h" 200 65 first +.src_ref 3 "reduce_mean_c8_impl.h" 202 30 + 6576 "10111010" // LDA.s16 r7, [p2, dj2]; MOVX r17, #7; MOV dc4, lr /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6577 "01111000" // /* MW 9 */ + 6578 "11110000" // /* MW 8 */ + 6579 "01100000" // /* MW 7 */ + 6580 "11101010" // /* MW 6 */ + 6581 "00010000" // /* MW 5 */ + 6582 "00000001" // /* MW 4 */ + 6583 "01010000" // /* MW 3 */ + 6584 "00011110" // /* MW 2 */ + 6585 "01001000" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 +.src_ref 3 "reduce_mean_c8_impl.h" 202 30 first + 6586 "01100100" // NE r6, r17, r6; MOV r17, #257 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6587 "00000101" // /* MW 5 */ + 6588 "10100100" // /* MW 4 */ + 6589 "00011000" // /* MW 3 */ + 6590 "10001101" // /* MW 2 */ + 6591 "10001001" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 202 12 + 6592 "10000100" // JNZ r6, #7232 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7232 delay_slots=5 */ + 6593 "00000001" // /* MW 5 */ + 6594 "01000000" // /* MW 4 */ + 6595 "00100000" // /* MW 3 */ + 6596 "00001110" // /* MW 2 */ + 6597 "00110000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6598 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6599 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6601 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6603 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6605 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 200 49 first +.delay_slot + 6606 "10011000" // ASHL r5, r7, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6607 "01011110" // /* MW 3 */ + 6608 "11001010" // /* MW 2 */ + 6609 "00010001" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 7 "accum.hpp" 199 120 +.src_ref 3 "reduce_mean_c8_impl.h" 206 9 first +.src_ref 3 "reduce_mean_c8_impl.h" 206 35 +.src_ref 3 "reduce_mean_c8_impl.h" 209 22 + 6610 "01110110" // MOVA dj2, #36; MOVS p0, p1; MOVXM ls, #6672 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6611 "00010000" // /* MW 11 */ + 6612 "00001000" // /* MW 10 */ + 6613 "01111101" // /* MW 9 */ + 6614 "00000100" // /* MW 8 */ + 6615 "00000000" // /* MW 7 */ + 6616 "00000000" // /* MW 6 */ + 6617 "10001011" // /* MW 5 */ + 6618 "10000100" // /* MW 4 */ + 6619 "10000000" // /* MW 3 */ + 6620 "10001010" // /* MW 2 */ + 6621 "00000100" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 206 9 +.src_ref 3 "reduce_mean_c8_impl.h" 206 35 + 6622 "01110110" // LDA r7, [p2, dj2]; ST dn3, [sp, #-4]; MOVXM le, #6768 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6623 "00010000" // /* MW 11 */ + 6624 "00111000" // /* MW 10 */ + 6625 "10111101" // /* MW 9 */ + 6626 "00000101" // /* MW 8 */ + 6627 "00000000" // /* MW 7 */ + 6628 "10000000" // /* MW 6 */ + 6629 "10100101" // /* MW 5 */ + 6630 "11111101" // /* MW 4 */ + 6631 "11010111" // /* MW 3 */ + 6632 "00011110" // /* MW 2 */ + 6633 "01001000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 199 120 first +.src_ref 3 "reduce_mean_c8_impl.h" 209 22 first + 6634 "10011000" // VLDA bmll2, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6635 "00010101" // /* MW 3 */ + 6636 "00011101" // /* MW 2 */ + 6637 "00000000" // /* MW 1 */ + 6638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6639 "00000000" // /* MW 1 */ +.src_ref 7 "accum.hpp" 199 120 + 6640 "11111000" // VMOV bmhh4, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6641 "10010010" // /* MW 3 */ + 6642 "11000010" // /* MW 2 */ + 6643 "00011100" // /* MW 1 */ + 6644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6645 "00000000" // /* MW 1 */ +.src_ref 7 "accum.hpp" 199 120 +.src_ref 5 "add.hpp" 28 49 first + 6646 "01100010" // VMOV bmll3, bmhh4; VADD.f dm1, dm3, dm2, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6647 "00111101" // /* MW 7 */ + 6648 "01101000" // /* MW 6 */ + 6649 "00010001" // /* MW 5 */ + 6650 "11100110" // /* MW 4 */ + 6651 "00010010" // /* MW 3 */ + 6652 "00010011" // /* MW 2 */ + 6653 "00000011" // /* MW 1 */ + 6654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6655 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 206 9 first + 6656 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC lc, r7, #-1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6657 "00000000" // /* MW 15 */ + 6658 "00000000" // /* MW 14 */ + 6659 "11001000" // /* MW 13 */ + 6660 "11111111" // /* MW 12 */ + 6661 "10111001" // /* MW 11 */ + 6662 "00000010" // /* MW 10 */ + 6663 "00000000" // /* MW 9 */ + 6664 "00000000" // /* MW 8 */ + 6665 "01011011" // /* MW 7 */ + 6666 "00000001" // /* MW 6 */ + 6667 "00100000" // /* MW 5 */ + 6668 "00000000" // /* MW 4 */ + 6669 "11110000" // /* MW 3 */ + 6670 "00101100" // /* MW 2 */ + 6671 "00000000" // /* MW 1 */ +.label ZLS_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1824 +.src_ref 5 "vector.hpp" 1139 17 first +.src_ref 7 "accum.hpp" 199 120 first +.src_ref 3 "reduce_mean_c8_impl.h" 209 22 first +.begin_of_loop +.loop_nesting 1 + 6672 "11100001" // VLDA bmll2, [p0], #64; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6673 "00000000" // /* MW 15 */ + 6674 "00000000" // /* MW 14 */ + 6675 "01111000" // /* MW 13 */ + 6676 "10100101" // /* MW 12 */ + 6677 "00000001" // /* MW 11 */ + 6678 "00000000" // /* MW 10 */ + 6679 "00000000" // /* MW 9 */ + 6680 "00000000" // /* MW 8 */ + 6681 "01011011" // /* MW 7 */ + 6682 "00000001" // /* MW 6 */ + 6683 "00100000" // /* MW 5 */ + 6684 "00000000" // /* MW 4 */ + 6685 "10110000" // /* MW 3 */ + 6686 "10100010" // /* MW 2 */ + 6687 "00000011" // /* MW 1 */ + 6688 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6689 "00000000" // /* MW 15 */ + 6690 "00000000" // /* MW 14 */ + 6691 "01111000" // /* MW 13 */ + 6692 "10100101" // /* MW 12 */ + 6693 "00000001" // /* MW 11 */ + 6694 "00000000" // /* MW 10 */ + 6695 "00000000" // /* MW 9 */ + 6696 "00000000" // /* MW 8 */ + 6697 "01011011" // /* MW 7 */ + 6698 "00000001" // /* MW 6 */ + 6699 "00100000" // /* MW 5 */ + 6700 "00000000" // /* MW 4 */ + 6701 "11110000" // /* MW 3 */ + 6702 "00101100" // /* MW 2 */ + 6703 "00000000" // /* MW 1 */ + 6704 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6705 "00000000" // /* MW 15 */ + 6706 "00000000" // /* MW 14 */ + 6707 "01111000" // /* MW 13 */ + 6708 "10100101" // /* MW 12 */ + 6709 "00000001" // /* MW 11 */ + 6710 "00000000" // /* MW 10 */ + 6711 "00000000" // /* MW 9 */ + 6712 "00000000" // /* MW 8 */ + 6713 "01011011" // /* MW 7 */ + 6714 "00000001" // /* MW 6 */ + 6715 "00100000" // /* MW 5 */ + 6716 "00000000" // /* MW 4 */ + 6717 "11110000" // /* MW 3 */ + 6718 "00101100" // /* MW 2 */ + 6719 "00000000" // /* MW 1 */ + 6720 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6721 "00000000" // /* MW 15 */ + 6722 "00000000" // /* MW 14 */ + 6723 "01111000" // /* MW 13 */ + 6724 "10100101" // /* MW 12 */ + 6725 "00000001" // /* MW 11 */ + 6726 "00000000" // /* MW 10 */ + 6727 "00000000" // /* MW 9 */ + 6728 "00000000" // /* MW 8 */ + 6729 "01011011" // /* MW 7 */ + 6730 "00000001" // /* MW 6 */ + 6731 "00100000" // /* MW 5 */ + 6732 "00000000" // /* MW 4 */ + 6733 "11110000" // /* MW 3 */ + 6734 "00101100" // /* MW 2 */ + 6735 "00000000" // /* MW 1 */ +.src_ref 7 "accum.hpp" 150 115 first +.aggressive_scheduled_block_id 22 +.aggressive_scheduled_block_id first + 6736 "11100001" // NOPA; NOPB; NOPS; NOPX; VMOV bmhh4, bmll1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6737 "00000000" // /* MW 15 */ + 6738 "00000000" // /* MW 14 */ + 6739 "01111000" // /* MW 13 */ + 6740 "00001001" // /* MW 12 */ + 6741 "01100010" // /* MW 11 */ + 6742 "00000010" // /* MW 10 */ + 6743 "00000000" // /* MW 9 */ + 6744 "00000000" // /* MW 8 */ + 6745 "01011011" // /* MW 7 */ + 6746 "00000001" // /* MW 6 */ + 6747 "00100000" // /* MW 5 */ + 6748 "00000000" // /* MW 4 */ + 6749 "11110000" // /* MW 3 */ + 6750 "00101100" // /* MW 2 */ + 6751 "00000000" // /* MW 1 */ +.src_ref 5 "add.hpp" 28 49 first +.aggressive_scheduled_block_id 22 +.noswbrkpt + 6752 "11101011" // NOPA; NOPB; NOPS; NOPX; NOPM; VADD.f dm1, dm3, dm2, r2 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6753 "01000001" // /* MW 15 */ + 6754 "10001011" // /* MW 14 */ + 6755 "01111000" // /* MW 13 */ + 6756 "10100101" // /* MW 12 */ + 6757 "00000001" // /* MW 11 */ + 6758 "00000000" // /* MW 10 */ + 6759 "00000000" // /* MW 9 */ + 6760 "00000000" // /* MW 8 */ + 6761 "01011011" // /* MW 7 */ + 6762 "00000001" // /* MW 6 */ + 6763 "00100000" // /* MW 5 */ + 6764 "00000000" // /* MW 4 */ + 6765 "11110000" // /* MW 3 */ + 6766 "00101100" // /* MW 2 */ + 6767 "00000000" // /* MW 1 */ +.label ZLE_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_1920 +.src_ref 7 "accum.hpp" 199 120 first +.end_of_loop +.aggressive_scheduled_block_id 22 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6768 "11100001" // NOPA; NOPB; NOPS; NOPX; VMOV bmll3, bmhh4; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6769 "00000000" // /* MW 15 */ + 6770 "00000000" // /* MW 14 */ + 6771 "01111000" // /* MW 13 */ + 6772 "10001001" // /* MW 12 */ + 6773 "10001001" // /* MW 11 */ + 6774 "00000001" // /* MW 10 */ + 6775 "00000000" // /* MW 9 */ + 6776 "00000000" // /* MW 8 */ + 6777 "01011011" // /* MW 7 */ + 6778 "00000001" // /* MW 6 */ + 6779 "00100000" // /* MW 5 */ + 6780 "00000000" // /* MW 4 */ + 6781 "11110000" // /* MW 3 */ + 6782 "00101100" // /* MW 2 */ + 6783 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 5 "add_reduce.hpp" 322 47 +.aggressive_scheduled_block_id 23 +.aggressive_scheduled_block_id first +.loop_nesting 0 + 6784 "10111010" // MOVA r16, #16; MOVXM p7, #509168 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6785 "00010000" // /* MW 9 */ + 6786 "01111000" // /* MW 8 */ + 6787 "10110010" // /* MW 7 */ + 6788 "11110011" // /* MW 6 */ + 6789 "00000001" // /* MW 5 */ + 6790 "00000000" // /* MW 4 */ + 6791 "00000000" // /* MW 3 */ + 6792 "00010000" // /* MW 2 */ + 6793 "00000010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 19 first +.src_ref 5 "vector.hpp" 915 23 +.src_ref 5 "add_reduce.hpp" 322 47 +.aggressive_scheduled_block_id 23 +.noswbrkpt + 6794 "10111010" // LDA.s8 r4, [p7]; MOVX r6, #8; MOV vaddSign0, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6795 "01011000" // /* MW 9 */ + 6796 "00000001" // /* MW 8 */ + 6797 "10011000" // /* MW 7 */ + 6798 "00001000" // /* MW 6 */ + 6799 "01100001" // /* MW 5 */ + 6800 "00000000" // /* MW 4 */ + 6801 "01010000" // /* MW 3 */ + 6802 "10010000" // /* MW 2 */ + 6803 "11100000" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 +.aggressive_scheduled_block_id 23 +.nohwbrkpt +.noswbrkpt + 6804 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6805 "00000101" // /* MW 3 */ + 6806 "00100010" // /* MW 2 */ + 6807 "00010000" // /* MW 1 */ +.aggressive_scheduled_block_id 23 +.nohwbrkpt +.noswbrkpt + 6808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6809 "00000000" // /* MW 1 */ +.src_ref 7 "accum.hpp" 150 115 first +.aggressive_scheduled_block_id 23 +.nohwbrkpt +.noswbrkpt + 6810 "11111000" // VMOV bmhh4, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6811 "00010010" // /* MW 3 */ + 6812 "11000100" // /* MW 2 */ + 6813 "00011100" // /* MW 1 */ +.aggressive_scheduled_block_id 23 +.nohwbrkpt +.noswbrkpt + 6814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6815 "00000000" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 +.aggressive_scheduled_block_id 23 +.nohwbrkpt +.noswbrkpt + 6816 "11111000" // VMOV x2, bmhh4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6817 "00010010" // /* MW 3 */ + 6818 "00110011" // /* MW 2 */ + 6819 "00011001" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.aggressive_scheduled_block_id 23 +.nohwbrkpt +.noswbrkpt + 6820 "11011000" // VSHIFT x2, x2, x0, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6821 "00010010" // /* MW 3 */ + 6822 "00010000" // /* MW 2 */ + 6823 "00011001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 112 6 +.src_ref 6 "me_vmult_float_emulated.h" 112 19 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 114 6 +.src_ref 6 "me_vmult_float_emulated.h" 114 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 6 +.src_ref 6 "me_vmult_float_emulated.h" 115 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 +.src_ref 5 "vector.hpp" 1159 33 +.src_ref 7 "accum.hpp" 198 120 first +.src_ref 7 "accum.hpp" 1108 103 +.src_ref 5 "add_reduce.hpp" 324 44 first +.aggressive_scheduled_block_id 23 +.noswbrkpt + 6824 "01011010" // MOVX crRnd, r4; VMOV bmll0, x2; VADD.f dm0, dm2, dm0, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6825 "00111101" // /* MW 9 */ + 6826 "01000000" // /* MW 8 */ + 6827 "00010000" // /* MW 7 */ + 6828 "00101111" // /* MW 6 */ + 6829 "01001001" // /* MW 5 */ + 6830 "00000000" // /* MW 4 */ + 6831 "10000000" // /* MW 3 */ + 6832 "00111010" // /* MW 2 */ + 6833 "00000001" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 +.aggressive_scheduled_block_id 23 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6834 "11111000" // VMOV bmll2, bmhh4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6835 "00010010" // /* MW 3 */ + 6836 "00010011" // /* MW 2 */ + 6837 "00011010" // /* MW 1 */ +.src_ref 5 "broadcast.hpp" 80 25 first + 6838 "11111000" // VBCST.32 x2, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6839 "01110010" // /* MW 3 */ + 6840 "00010110" // /* MW 2 */ + 6841 "00011001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 112 6 +.src_ref 6 "me_vmult_float_emulated.h" 112 19 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 + 6842 "11111000" // VMOV bmll1, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6843 "10010010" // /* MW 3 */ + 6844 "00000100" // /* MW 2 */ + 6845 "00011001" // /* MW 1 */ + 6846 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6847 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 113 21 first + 6848 "11111000" // VMOV bmll2, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6849 "00010010" // /* MW 3 */ + 6850 "00000100" // /* MW 2 */ + 6851 "00011010" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first +.aggressive_scheduled_block_id 24 +.aggressive_scheduled_block_id first + 6852 "11111000" // VMOV x2, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6853 "00010010" // /* MW 3 */ + 6854 "00100000" // /* MW 2 */ + 6855 "00011001" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.src_ref 5 "add_reduce.hpp" 324 44 +.aggressive_scheduled_block_id 24 +.noswbrkpt + 6856 "01100010" // VSHIFT x2, x2, x0, r16; VADD.f dm0, dm0, dm3, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6857 "00111101" // /* MW 7 */ + 6858 "00001100" // /* MW 6 */ + 6859 "00010000" // /* MW 5 */ + 6860 "11000110" // /* MW 4 */ + 6861 "01000010" // /* MW 3 */ + 6862 "00010000" // /* MW 2 */ + 6863 "00000001" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 24 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6864 "11111000" // VMOV bmll3, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6865 "10010010" // /* MW 3 */ + 6866 "00000100" // /* MW 2 */ + 6867 "00011011" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 114 6 +.src_ref 6 "me_vmult_float_emulated.h" 114 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 6 +.src_ref 6 "me_vmult_float_emulated.h" 115 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 + 6868 "11111000" // VMOV x2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6869 "10010010" // /* MW 3 */ + 6870 "00100000" // /* MW 2 */ + 6871 "00011001" // /* MW 1 */ + 6872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6873 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 112 6 first +.src_ref 6 "me_vmult_float_emulated.h" 112 19 first + 6874 "00011000" // VCONV.bf16.fp32 wl0, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6875 "10010110" // /* MW 3 */ + 6876 "01000000" // /* MW 2 */ + 6877 "00001000" // /* MW 1 */ + 6878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6879 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 113 21 first +.src_ref 5 "add_reduce.hpp" 324 22 first +.aggressive_scheduled_block_id 25 +.aggressive_scheduled_block_id first + 6880 "01100010" // VMOV x3, bmll0; VMSC.f dm4, dm2, x0, x4, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6881 "10000011" // /* MW 7 */ + 6882 "01000000" // /* MW 6 */ + 6883 "00010100" // /* MW 5 */ + 6884 "11100110" // /* MW 4 */ + 6885 "00010010" // /* MW 3 */ + 6886 "10100000" // /* MW 2 */ + 6887 "00000001" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.src_ref 5 "add_reduce.hpp" 324 44 +.aggressive_scheduled_block_id 25 +.noswbrkpt + 6888 "01100010" // VSHIFT x3, x3, x0, r6; VADD.f dm0, dm0, dm2, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6889 "00111101" // /* MW 7 */ + 6890 "00001000" // /* MW 6 */ + 6891 "00010000" // /* MW 5 */ + 6892 "11000110" // /* MW 4 */ + 6893 "00011010" // /* MW 3 */ + 6894 "10011000" // /* MW 2 */ + 6895 "00000001" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 25 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6896 "11111000" // VMOV bmll2, x3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6897 "10010010" // /* MW 3 */ + 6898 "00000110" // /* MW 2 */ + 6899 "00011010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 115 6 +.src_ref 6 "me_vmult_float_emulated.h" 115 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 + 6900 "11111000" // VMOV x3, x2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6901 "10010010" // /* MW 3 */ + 6902 "10100100" // /* MW 2 */ + 6903 "00011001" // /* MW 1 */ + 6904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6905 "00000000" // /* MW 1 */ + 6906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6907 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 113 21 first +.src_ref 6 "me_vmult_float_emulated.h" 114 6 first +.src_ref 6 "me_vmult_float_emulated.h" 114 19 first + 6908 "00011000" // VCONV.bf16.fp32 wl2, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6909 "00010110" // /* MW 3 */ + 6910 "01000010" // /* MW 2 */ + 6911 "00001001" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first +.aggressive_scheduled_block_id 26 +.aggressive_scheduled_block_id first + 6912 "11111000" // VMOV x5, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6913 "00010010" // /* MW 3 */ + 6914 "10100000" // /* MW 2 */ + 6915 "00011010" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 322 47 first +.src_ref 5 "add_reduce.hpp" 324 44 +.aggressive_scheduled_block_id 26 +.noswbrkpt + 6916 "01100010" // VSHIFT x6, x5, x0, r0; VADD.f dm0, dm0, dm2, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6917 "00111101" // /* MW 7 */ + 6918 "00001000" // /* MW 6 */ + 6919 "00010000" // /* MW 5 */ + 6920 "11000110" // /* MW 4 */ + 6921 "00000010" // /* MW 3 */ + 6922 "00101000" // /* MW 2 */ + 6923 "00000011" // /* MW 1 */ +.src_ref 7 "accum.hpp" 198 120 first +.aggressive_scheduled_block_id 26 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6924 "11111000" // VMOV bmll2, x6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6925 "10010010" // /* MW 3 */ + 6926 "00001100" // /* MW 2 */ + 6927 "00011010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 + 6928 "11111000" // VMOV x5, x3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6929 "10010010" // /* MW 3 */ + 6930 "10100110" // /* MW 2 */ + 6931 "00011010" // /* MW 1 */ + 6932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6933 "00000000" // /* MW 1 */ + 6934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6935 "00000000" // /* MW 1 */ + 6936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6937 "00000000" // /* MW 1 */ +.src_ref 5 "add_reduce.hpp" 324 22 first + 6938 "11111000" // VMOV x6, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6939 "00010010" // /* MW 3 */ + 6940 "00100000" // /* MW 2 */ + 6941 "00011011" // /* MW 1 */ +.src_ref 5 "vector.hpp" 915 23 first + 6942 "10111000" // VEXTRACT.32 r0, x6, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6943 "00000001" // /* MW 3 */ + 6944 "00011010" // /* MW 2 */ + 6945 "00011000" // /* MW 1 */ + 6946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6947 "00000000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 856 23 first + 6948 "01111000" // VINSERT.32 x6, x0, #0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6949 "00010001" // /* MW 3 */ + 6950 "00000000" // /* MW 2 */ + 6951 "00011011" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 first + 6952 "00111000" // VSEL.32 x1, x1, x6, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6953 "00001000" // /* MW 3 */ + 6954 "10001011" // /* MW 2 */ + 6955 "00011000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 + 6956 "11111000" // VMOV bmll2, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6957 "10010010" // /* MW 3 */ + 6958 "00000010" // /* MW 2 */ + 6959 "00011010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 + 6960 "11111000" // VMOV x1, x5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6961 "10010010" // /* MW 3 */ + 6962 "10101010" // /* MW 2 */ + 6963 "00011000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 108 6 first +.src_ref 6 "me_vmult_float_emulated.h" 108 19 first +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 + 6964 "00000010" // VCONV.bf16.fp32 wl5, bmll2; VMOV x6, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6965 "01110000" // /* MW 7 */ + 6966 "01001001" // /* MW 6 */ + 6967 "10010001" // /* MW 5 */ + 6968 "00000001" // /* MW 4 */ + 6969 "11000000" // /* MW 3 */ + 6970 "00100010" // /* MW 2 */ + 6971 "01011000" // /* MW 1 */ + 6972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6973 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 109 21 first + 6974 "01001000" // VMSC.f dm1, dm2, x5, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6975 "10000011" // /* MW 3 */ + 6976 "01001010" // /* MW 2 */ + 6977 "00010001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 124 9 first + 6978 "01001000" // VMUL.f dm0, x5, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6979 "01000001" // /* MW 3 */ + 6980 "11101010" // /* MW 2 */ + 6981 "00010000" // /* MW 1 */ + 6982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6983 "00000000" // /* MW 1 */ + 6984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6985 "00000000" // /* MW 1 */ + 6986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6987 "00000000" // /* MW 1 */ + 6988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6989 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 109 21 first +.src_ref 6 "me_vmult_float_emulated.h" 110 6 first +.src_ref 6 "me_vmult_float_emulated.h" 110 19 first + 6990 "00011000" // VCONV.bf16.fp32 wl1, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6991 "10010110" // /* MW 3 */ + 6992 "11000000" // /* MW 2 */ + 6993 "00001000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 115 34 first + 6994 "01001000" // VMSC.f dm4, dm4, x2, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6995 "10000011" // /* MW 3 */ + 6996 "10000100" // /* MW 2 */ + 6997 "00010100" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 111 34 first + 6998 "01001000" // VMSC.f dm3, dm1, x1, x4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6999 "10000011" // /* MW 3 */ + 7000 "00100010" // /* MW 2 */ + 7001 "00010011" // /* MW 1 */ + 7002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7003 "00000000" // /* MW 1 */ + 7004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7005 "00000000" // /* MW 1 */ + 7006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7007 "00000000" // /* MW 1 */ + 7008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7009 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 115 6 first +.src_ref 6 "me_vmult_float_emulated.h" 115 19 first +.src_ref 6 "me_vmult_float_emulated.h" 115 34 first + 7010 "00011000" // VCONV.bf16.fp32 wl3, bmll4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7011 "00010110" // /* MW 3 */ + 7012 "11000010" // /* MW 2 */ + 7013 "00001001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 111 6 first +.src_ref 6 "me_vmult_float_emulated.h" 111 19 first +.src_ref 6 "me_vmult_float_emulated.h" 111 34 first + 7014 "00011000" // VCONV.bf16.fp32 wl6, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7015 "10010110" // /* MW 3 */ + 7016 "01000001" // /* MW 2 */ + 7017 "00001011" // /* MW 1 */ + 7018 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7019 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 117 42 first + 7020 "01001000" // VMUL.f dm2, x6, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7021 "01100001" // /* MW 3 */ + 7022 "11101100" // /* MW 2 */ + 7023 "00010010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 118 9 first + 7024 "01001000" // VMUL.f dm3, x6, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7025 "01000001" // /* MW 3 */ + 7026 "11101100" // /* MW 2 */ + 7027 "00010011" // /* MW 1 */ + 7028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7029 "00000000" // /* MW 1 */ + 7030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7031 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 27 +.aggressive_scheduled_block_id first + 7032 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7033 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 119 9 first +.aggressive_scheduled_block_id 27 +.noswbrkpt + 7034 "01001000" // VMUL.f dm3, x1, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7035 "01100001" // /* MW 3 */ + 7036 "11100010" // /* MW 2 */ + 7037 "00010011" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 117 42 first +.aggressive_scheduled_block_id 27 +.nohwbrkpt +.noswbrkpt + 7038 "11111000" // VMOV lfl0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7039 "00010010" // /* MW 3 */ + 7040 "01101000" // /* MW 2 */ + 7041 "00011001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 118 6 first +.aggressive_scheduled_block_id 27 +.nohwbrkpt +.noswbrkpt + 7042 "01001000" // VADD.f dm2, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7043 "00111101" // /* MW 3 */ + 7044 "01001100" // /* MW 2 */ + 7045 "00010010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 118 6 +.aggressive_scheduled_block_id 27 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7046 "11111000" // VMOV bmll2, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7047 "10010010" // /* MW 3 */ + 7048 "00000101" // /* MW 2 */ + 7049 "00011010" // /* MW 1 */ + 7050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7051 "00000000" // /* MW 1 */ + 7052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7053 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 28 +.aggressive_scheduled_block_id first + 7054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7055 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 120 9 first +.aggressive_scheduled_block_id 28 +.noswbrkpt + 7056 "01001000" // VMUL.f dm3, x5, x3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7057 "01100001" // /* MW 3 */ + 7058 "11101010" // /* MW 2 */ + 7059 "00010011" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 118 6 first +.aggressive_scheduled_block_id 28 +.nohwbrkpt +.noswbrkpt + 7060 "11111000" // VMOV lfh0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7061 "00010010" // /* MW 3 */ + 7062 "01101000" // /* MW 2 */ + 7063 "00011000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 119 6 first +.aggressive_scheduled_block_id 28 +.nohwbrkpt +.noswbrkpt + 7064 "01001000" // VADD.f dm2, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7065 "00111101" // /* MW 3 */ + 7066 "01001100" // /* MW 2 */ + 7067 "00010010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 119 6 +.aggressive_scheduled_block_id 28 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7068 "11111000" // VMOV bmll2, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7069 "10010010" // /* MW 3 */ + 7070 "00000001" // /* MW 2 */ + 7071 "00011010" // /* MW 1 */ + 7072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7073 "00000000" // /* MW 1 */ + 7074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7075 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 29 +.aggressive_scheduled_block_id first + 7076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7077 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 121 9 first +.aggressive_scheduled_block_id 29 +.noswbrkpt + 7078 "01001000" // VMUL.f dm3, x1, x2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7079 "01000001" // /* MW 3 */ + 7080 "11100010" // /* MW 2 */ + 7081 "00010011" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 119 6 first +.aggressive_scheduled_block_id 29 +.nohwbrkpt +.noswbrkpt + 7082 "11111000" // VMOV lfl0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7083 "00010010" // /* MW 3 */ + 7084 "01101000" // /* MW 2 */ + 7085 "00011001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 120 6 first +.aggressive_scheduled_block_id 29 +.nohwbrkpt +.noswbrkpt + 7086 "01001000" // VADD.f dm2, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7087 "00111101" // /* MW 3 */ + 7088 "01001100" // /* MW 2 */ + 7089 "00010010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 120 6 +.aggressive_scheduled_block_id 29 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7090 "11111000" // VMOV bmll2, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7091 "10010010" // /* MW 3 */ + 7092 "00000101" // /* MW 2 */ + 7093 "00011010" // /* MW 1 */ + 7094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7095 "00000000" // /* MW 1 */ + 7096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7097 "00000000" // /* MW 1 */ + 7098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7099 "00000000" // /* MW 1 */ + 7100 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7101 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 120 6 +.aggressive_scheduled_block_id 30 +.aggressive_scheduled_block_id first + 7102 "11111000" // VMOV lfh0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7103 "00010010" // /* MW 3 */ + 7104 "01101000" // /* MW 2 */ + 7105 "00011000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 121 6 first +.aggressive_scheduled_block_id 30 +.noswbrkpt + 7106 "01001000" // VADD.f dm2, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7107 "00111101" // /* MW 3 */ + 7108 "01001100" // /* MW 2 */ + 7109 "00010010" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 121 6 +.aggressive_scheduled_block_id 30 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7110 "11111000" // VMOV bmll2, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7111 "10010010" // /* MW 3 */ + 7112 "00000001" // /* MW 2 */ + 7113 "00011010" // /* MW 1 */ + 7114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7115 "00000000" // /* MW 1 */ + 7116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7117 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 122 9 first + 7118 "01001000" // VMUL.f dm3, x0, x6, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7119 "11000001" // /* MW 3 */ + 7120 "11100000" // /* MW 2 */ + 7121 "00010011" // /* MW 1 */ + 7122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7123 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 121 6 first +.aggressive_scheduled_block_id 31 +.aggressive_scheduled_block_id first + 7124 "11111000" // VMOV lfl0, bmll2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7125 "00010010" // /* MW 3 */ + 7126 "01101000" // /* MW 2 */ + 7127 "00011001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 122 6 first +.aggressive_scheduled_block_id 31 +.noswbrkpt + 7128 "01001000" // VADD.f dm3, dm2, dm3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7129 "00111101" // /* MW 3 */ + 7130 "01001100" // /* MW 2 */ + 7131 "00010011" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 122 6 +.aggressive_scheduled_block_id 31 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7132 "11111000" // VMOV bmll2, lfl0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7133 "10010010" // /* MW 3 */ + 7134 "00000101" // /* MW 2 */ + 7135 "00011010" // /* MW 1 */ + 7136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7137 "00000000" // /* MW 1 */ + 7138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7139 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 123 9 first + 7140 "01001000" // VMUL.f dm1, x1, x0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7141 "00000001" // /* MW 3 */ + 7142 "11100010" // /* MW 2 */ + 7143 "00010001" // /* MW 1 */ + 7144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7145 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 122 6 first +.aggressive_scheduled_block_id 32 +.aggressive_scheduled_block_id first + 7146 "11111000" // VMOV lfh0, bmll3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7147 "00010010" // /* MW 3 */ + 7148 "01101100" // /* MW 2 */ + 7149 "00011000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 123 6 first +.aggressive_scheduled_block_id 32 +.noswbrkpt + 7150 "01001000" // VADD.f dm1, dm2, dm1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7151 "00111101" // /* MW 3 */ + 7152 "01000100" // /* MW 2 */ + 7153 "00010001" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 123 6 +.aggressive_scheduled_block_id 32 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7154 "11111000" // VMOV bmll2, lfh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7155 "10010010" // /* MW 3 */ + 7156 "00000001" // /* MW 2 */ + 7157 "00011010" // /* MW 1 */ + 7158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7159 "00000000" // /* MW 1 */ + 7160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7161 "00000000" // /* MW 1 */ + 7162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7163 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 33 +.aggressive_scheduled_block_id first + 7164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7165 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 123 6 +.src_ref 6 "me_vmult_float_emulated.h" 124 6 first +.aggressive_scheduled_block_id 33 +.noswbrkpt + 7166 "01100010" // VMOV x0, bmll1; VADD.f dm0, dm2, dm0, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7167 "00111101" // /* MW 7 */ + 7168 "01000000" // /* MW 6 */ + 7169 "00010000" // /* MW 5 */ + 7170 "11100110" // /* MW 4 */ + 7171 "00010010" // /* MW 3 */ + 7172 "00100100" // /* MW 2 */ + 7173 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 124 6 +.src_ref 6 "me_vmult_float_emulated.h" 125 9 first +.aggressive_scheduled_block_id 33 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7174 "01100010" // VMOV bmll2, x0; VMUL.f dm4, x5, x0, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7175 "00000001" // /* MW 7 */ + 7176 "11101010" // /* MW 6 */ + 7177 "00010100" // /* MW 5 */ + 7178 "11100110" // /* MW 4 */ + 7179 "10010010" // /* MW 3 */ + 7180 "00000000" // /* MW 2 */ + 7181 "00000010" // /* MW 1 */ + 7182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7183 "00000000" // /* MW 1 */ + 7184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7185 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 34 +.aggressive_scheduled_block_id first + 7186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7187 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 125 6 +.aggressive_scheduled_block_id 34 +.noswbrkpt + 7188 "01001000" // VADD.f dm0, dm2, dm4, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7189 "00111101" // /* MW 3 */ + 7190 "01010000" // /* MW 2 */ + 7191 "00010000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 124 6 first +.aggressive_scheduled_block_id 34 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7192 "11111000" // VMOV bmll2, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7193 "00010010" // /* MW 3 */ + 7194 "00000000" // /* MW 2 */ + 7195 "00011010" // /* MW 1 */ + 7196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7197 "00000000" // /* MW 1 */ + 7198 "10000100" // J #6384 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6384 delay_slots=5 */ + 7199 "00000000" // /* MW 5 */ + 7200 "00000000" // /* MW 4 */ + 7201 "01111000" // /* MW 3 */ + 7202 "00001100" // /* MW 2 */ + 7203 "00000000" // /* MW 1 */ +.delay_slot + 7204 "10011000" // ST dc4, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7205 "01100101" // /* MW 3 */ + 7206 "11111010" // /* MW 2 */ + 7207 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7209 "00000000" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 125 6 first +.delay_slot + 7210 "11111000" // VMOV bmll2, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7211 "00010010" // /* MW 3 */ + 7212 "00000000" // /* MW 2 */ + 7213 "00011010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7215 "00000000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 1159 33 first +.src_ref 7 "accum.hpp" 1108 103 first +.delay_slot + 7216 "11100001" // NOPA; NOPB; VST.CONV.bf16.fp32 bmll2, [p1];NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7217 "00000000" // /* MW 15 */ + 7218 "00000000" // /* MW 14 */ + 7219 "01111000" // /* MW 13 */ + 7220 "10100101" // /* MW 12 */ + 7221 "00000001" // /* MW 11 */ + 7222 "00000000" // /* MW 10 */ + 7223 "00000000" // /* MW 9 */ + 7224 "10000000" // /* MW 8 */ + 7225 "00010010" // /* MW 7 */ + 7226 "00000101" // /* MW 6 */ + 7227 "00100001" // /* MW 5 */ + 7228 "00000000" // /* MW 4 */ + 7229 "11110000" // /* MW 3 */ + 7230 "00101100" // /* MW 2 */ + 7231 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2384 +.src_ref 5 "blend.hpp" 163 48 + 7232 "10111010" // MOVA r20, #0; J #5616 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=5616 delay_slots=5 */ + 7233 "00100000" // /* MW 9 */ + 7234 "00000000" // /* MW 8 */ + 7235 "00000000" // /* MW 7 */ + 7236 "10111110" // /* MW 6 */ + 7237 "00000010" // /* MW 5 */ + 7238 "00000000" // /* MW 4 */ + 7239 "00000000" // /* MW 3 */ + 7240 "00010100" // /* MW 2 */ + 7241 "00000000" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 +.delay_slot + 7242 "00011000" // MOVX r21, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7243 "00000001" // /* MW 3 */ + 7244 "00101010" // /* MW 2 */ + 7245 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7247 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7248 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7249 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7251 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7252 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7253 "10000001" // /* MW 11 */ + 7254 "10101101" // /* MW 10 */ + 7255 "00000000" // /* MW 9 */ + 7256 "00000000" // /* MW 8 */ + 7257 "00000000" // /* MW 7 */ + 7258 "00000000" // /* MW 6 */ + 7259 "00100000" // /* MW 5 */ + 7260 "00000000" // /* MW 4 */ + 7261 "11110000" // /* MW 3 */ + 7262 "00101100" // /* MW 2 */ + 7263 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2416 + 7264 "10000100" // J #7456 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=7456 delay_slots=5 */ + 7265 "00000000" // /* MW 5 */ + 7266 "00000000" // /* MW 4 */ + 7267 "10010000" // /* MW 3 */ + 7268 "00001110" // /* MW 2 */ + 7269 "00000000" // /* MW 1 */ +.delay_slot + 7270 "00000010" // ST p1, [sp, #-4]; MOV dc4, lr /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7271 "01110000" // /* MW 7 */ + 7272 "11110000" // /* MW 6 */ + 7273 "01100000" // /* MW 5 */ + 7274 "00000010" // /* MW 4 */ + 7275 "10110000" // /* MW 3 */ + 7276 "10010011" // /* MW 2 */ + 7277 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7279 "00000000" // /* MW 1 */ +.delay_slot + 7280 "00011000" // VST x0, [sp, #-256] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7281 "00110011" // /* MW 3 */ + 7282 "11110000" // /* MW 2 */ + 7283 "00001111" // /* MW 1 */ +.delay_slot + 7284 "00011000" // VST x4, [sp, #-192] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7285 "00110011" // /* MW 3 */ + 7286 "11110101" // /* MW 2 */ + 7287 "00001111" // /* MW 1 */ +.delay_slot + 7288 "00000010" // VST x1, [sp, #-128]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7289 "01110000" // /* MW 7 */ + 7290 "10100101" // /* MW 6 */ + 7291 "00000001" // /* MW 5 */ + 7292 "00000000" // /* MW 4 */ + 7293 "01100000" // /* MW 3 */ + 7294 "00001110" // /* MW 2 */ + 7295 "11111111" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2448 +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 7296 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7297 "00000101" // /* MW 3 */ + 7298 "00100010" // /* MW 2 */ + 7299 "00010000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 first + 7300 "10011000" // EQ r17, r17, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7301 "01100111" // /* MW 3 */ + 7302 "01100010" // /* MW 2 */ + 7303 "00010100" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 7304 "10000100" // JNZ r17, #7456 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7456 delay_slots=5 */ + 7305 "00000001" // /* MW 5 */ + 7306 "01000000" // /* MW 4 */ + 7307 "10010000" // /* MW 3 */ + 7308 "00001110" // /* MW 2 */ + 7309 "10001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7311 "00000000" // /* MW 1 */ +.delay_slot + 7312 "00011000" // VST x0, [sp, #-256] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7313 "00110011" // /* MW 3 */ + 7314 "11110000" // /* MW 2 */ + 7315 "00001111" // /* MW 1 */ +.delay_slot + 7316 "00011000" // VST x4, [sp, #-192] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7317 "00110011" // /* MW 3 */ + 7318 "11110101" // /* MW 2 */ + 7319 "00001111" // /* MW 1 */ +.delay_slot + 7320 "00011000" // VST x1, [sp, #-128] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7321 "01110011" // /* MW 3 */ + 7322 "11111000" // /* MW 2 */ + 7323 "00001111" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 +.delay_slot + 7324 "00111010" // ST p1, [sp, #-4]; MOVX r7, #2; MOV dc4, lr /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7325 "01111001" // /* MW 9 */ + 7326 "11110000" // /* MW 8 */ + 7327 "01100000" // /* MW 7 */ + 7328 "01001010" // /* MW 6 */ + 7329 "01110000" // /* MW 5 */ + 7330 "00000000" // /* MW 4 */ + 7331 "10110000" // /* MW 3 */ + 7332 "10010011" // /* MW 2 */ + 7333 "11111111" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 7334 "10011000" // EQ r7, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7335 "01100111" // /* MW 3 */ + 7336 "11001110" // /* MW 2 */ + 7337 "00010001" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 7338 "10000100" // JNZ r7, #7424 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7424 delay_slots=5 */ + 7339 "00000001" // /* MW 5 */ + 7340 "01000000" // /* MW 4 */ + 7341 "10000000" // /* MW 3 */ + 7342 "00001110" // /* MW 2 */ + 7343 "00111000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7345 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7347 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7348 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7349 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7351 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7353 "00000000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 7354 "10011000" // EQ r7, r5, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7355 "01100111" // /* MW 3 */ + 7356 "01001110" // /* MW 2 */ + 7357 "00010001" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 184 32 + 7358 "10000100" // JNZ r7, #7392 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7392 delay_slots=5 */ + 7359 "00000001" // /* MW 5 */ + 7360 "01000000" // /* MW 4 */ + 7361 "01110000" // /* MW 3 */ + 7362 "00001110" // /* MW 2 */ + 7363 "00111000" // /* MW 1 */ +.src_ref 3 "reduce_mean_c8_impl.h" 200 49 +.delay_slot + 7364 "00011000" // MOVX r5, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7365 "01000001" // /* MW 3 */ + 7366 "00001010" // /* MW 2 */ + 7367 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7369 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7371 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7373 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7375 "00000000" // /* MW 1 */ + 7376 "10000100" // J #6576 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6576 delay_slots=5 */ + 7377 "00000000" // /* MW 5 */ + 7378 "00000000" // /* MW 4 */ + 7379 "11011000" // /* MW 3 */ + 7380 "00001100" // /* MW 2 */ + 7381 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7383 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7385 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7387 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7389 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7391 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2544 +.src_ref 5 "blend.hpp" 170 36 + 7392 "10111010" // MOVA r17, #257; J #5568 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=5568 delay_slots=5 */ + 7393 "00100000" // /* MW 9 */ + 7394 "00000000" // /* MW 8 */ + 7395 "00000000" // /* MW 7 */ + 7396 "10111000" // /* MW 6 */ + 7397 "00000010" // /* MW 5 */ + 7398 "00000000" // /* MW 4 */ + 7399 "00000000" // /* MW 3 */ + 7400 "00110001" // /* MW 2 */ + 7401 "00100000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 57 98 +.src_ref 5 "blend.hpp" 170 36 +.src_ref 3 "reduce_mean_c8_impl.h" 268 19 +.delay_slot + 7402 "01100100" // MOVX r21, #0; MOV m4, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7403 "01000001" // /* MW 5 */ + 7404 "00000000" // /* MW 4 */ + 7405 "00101000" // /* MW 3 */ + 7406 "01000000" // /* MW 2 */ + 7407 "00000101" // /* MW 1 */ +.src_ref 5 "blend.hpp" 163 48 +.delay_slot + 7408 "00011000" // MOVX r20, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7409 "00000001" // /* MW 3 */ + 7410 "00101000" // /* MW 2 */ + 7411 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7413 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7415 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7416 "00100010" // NOPA; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7417 "00011100" // /* MW 7 */ + 7418 "00000000" // /* MW 6 */ + 7419 "00000000" // /* MW 5 */ + 7420 "00000100" // /* MW 4 */ + 7421 "11110000" // /* MW 3 */ + 7422 "00101100" // /* MW 2 */ + 7423 "00000000" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2576 + 7424 "10000100" // J #6480 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6480 delay_slots=5 */ + 7425 "00000000" // /* MW 5 */ + 7426 "00000000" // /* MW 4 */ + 7427 "10101000" // /* MW 3 */ + 7428 "00001100" // /* MW 2 */ + 7429 "00000000" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 +.delay_slot + 7430 "01000100" // MOVXM r17, #65535 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7431 "11111110" // /* MW 5 */ + 7432 "10111111" // /* MW 4 */ + 7433 "11111000" // /* MW 3 */ + 7434 "00000000" // /* MW 2 */ + 7435 "00000000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 57 98 +.src_ref 3 "reduce_mean_c8_impl.h" 268 19 +.delay_slot + 7436 "10111000" // MOV m4, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7437 "00100000" // /* MW 3 */ + 7438 "00000000" // /* MW 2 */ + 7439 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7441 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7443 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7444 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7445 "10000001" // /* MW 11 */ + 7446 "10101101" // /* MW 10 */ + 7447 "00000000" // /* MW 9 */ + 7448 "00000000" // /* MW 8 */ + 7449 "00000000" // /* MW 7 */ + 7450 "00000000" // /* MW 6 */ + 7451 "00100000" // /* MW 5 */ + 7452 "00000000" // /* MW 4 */ + 7453 "11110000" // /* MW 3 */ + 7454 "00101100" // /* MW 2 */ + 7455 "00000000" // /* MW 1 */ +.label __ll128__ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E +.src_ref 6 "me_vmult_float_emulated.h" 108 6 +.src_ref 6 "me_vmult_float_emulated.h" 108 19 +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 110 6 +.src_ref 6 "me_vmult_float_emulated.h" 110 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 6 +.src_ref 6 "me_vmult_float_emulated.h" 111 19 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 112 6 +.src_ref 6 "me_vmult_float_emulated.h" 112 19 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 114 6 +.src_ref 6 "me_vmult_float_emulated.h" 114 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 6 +.src_ref 6 "me_vmult_float_emulated.h" 115 19 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 + 7456 "10111010" // VLDA x0, [sp, #-256]; J #5568 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=5568 delay_slots=5 */ + 7457 "00100000" // /* MW 9 */ + 7458 "00000000" // /* MW 8 */ + 7459 "00000000" // /* MW 7 */ + 7460 "10111000" // /* MW 6 */ + 7461 "00000010" // /* MW 5 */ + 7462 "00000000" // /* MW 4 */ + 7463 "01110000" // /* MW 3 */ + 7464 "00000111" // /* MW 2 */ + 7465 "11111110" // /* MW 1 */ +.src_ref 6 "me_vmult_float_emulated.h" 109 21 +.src_ref 6 "me_vmult_float_emulated.h" 111 34 +.src_ref 6 "me_vmult_float_emulated.h" 113 21 +.src_ref 6 "me_vmult_float_emulated.h" 115 34 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "add_reduce.hpp" 322 47 +.src_ref 5 "blend.hpp" 163 48 +.delay_slot + 7466 "10111010" // VLDA x4, [sp, #-192]; MOVX r0, #4; MOV r20, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7467 "01011000" // /* MW 9 */ + 7468 "00000000" // /* MW 8 */ + 7469 "10001000" // /* MW 7 */ + 7470 "10001010" // /* MW 6 */ + 7471 "00000000" // /* MW 5 */ + 7472 "00000000" // /* MW 4 */ + 7473 "01110000" // /* MW 3 */ + 7474 "10100111" // /* MW 2 */ + 7475 "11111110" // /* MW 1 */ +.src_ref 5 "vector.hpp" 57 98 +.src_ref 5 "vector.hpp" 57 98 +.src_ref 5 "vector.hpp" 1139 17 +.src_ref 5 "vector.hpp" 1280 49 +.src_ref 5 "vector.hpp" 1287 41 +.src_ref 5 "vector.hpp" 1288 16 +.src_ref 5 "vector.hpp" 1292 26 +.src_ref 5 "blend.hpp" 170 36 +.src_ref 3 "reduce_mean_c8_impl.h" 226 22 +.src_ref 3 "reduce_mean_c8_impl.h" 268 19 +.delay_slot + 7476 "10111010" // LDA p1, [sp, #-4]; MOVXM r16, #65535 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7477 "10010000" // /* MW 9 */ + 7478 "11111111" // /* MW 8 */ + 7479 "00001111" // /* MW 7 */ + 7480 "00111110" // /* MW 6 */ + 7481 "00000000" // /* MW 5 */ + 7482 "00000000" // /* MW 4 */ + 7483 "00100000" // /* MW 3 */ + 7484 "10010011" // /* MW 2 */ + 7485 "11111111" // /* MW 1 */ +.src_ref 5 "vector.hpp" 57 98 +.src_ref 5 "blend.hpp" 170 36 +.src_ref 3 "reduce_mean_c8_impl.h" 268 19 +.delay_slot + 7486 "01100100" // MOVX r21, #0; MOV m4, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7487 "10000001" // /* MW 5 */ + 7488 "00000000" // /* MW 4 */ + 7489 "00101000" // /* MW 3 */ + 7490 "01000000" // /* MW 2 */ + 7491 "00000101" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 +.delay_slot + 7492 "00011000" // MOVX r17, #257 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7493 "00000101" // /* MW 3 */ + 7494 "00100010" // /* MW 2 */ + 7495 "00010001" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 +.delay_slot + 7496 "00100010" // VLDA x1, [sp, #-128]; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7497 "00011100" // /* MW 7 */ + 7498 "00000000" // /* MW 6 */ + 7499 "00000000" // /* MW 5 */ + 7500 "00000100" // /* MW 4 */ + 7501 "01110000" // /* MW 3 */ + 7502 "00001111" // /* MW 2 */ + 7503 "11111111" // /* MW 1 */ +.label TGT_F_ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E_2656 + 7504 "10000100" // J #6480 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6480 delay_slots=5 */ + 7505 "00000000" // /* MW 5 */ + 7506 "00000000" // /* MW 4 */ + 7507 "10101000" // /* MW 3 */ + 7508 "00001100" // /* MW 2 */ + 7509 "00000000" // /* MW 1 */ +.delay_slot + 7510 "11111000" // MOV dc4, lr /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7511 "11100000" // /* MW 3 */ + 7512 "11000001" // /* MW 2 */ + 7513 "00011100" // /* MW 1 */ +.src_ref 5 "blend.hpp" 170 36 +.delay_slot + 7514 "01000100" // MOVXM r17, #65535 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7515 "11111110" // /* MW 5 */ + 7516 "10111111" // /* MW 4 */ + 7517 "11111000" // /* MW 3 */ + 7518 "00000000" // /* MW 2 */ + 7519 "00000000" // /* MW 1 */ +.src_ref 5 "vector.hpp" 57 98 +.src_ref 3 "reduce_mean_c8_impl.h" 268 19 +.delay_slot + 7520 "10111000" // MOV m4, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7521 "00100000" // /* MW 3 */ + 7522 "00000000" // /* MW 2 */ + 7523 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7525 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E__end +.label __ZN18reduce_skeleton_c8I8bfloat1619reduce_mean_c8_implIS0_E23reduce_mean_c8_params_tIS0_EE3runEPS0_S6_R18reduce_c8_params_tIS4_E___func_end0 + 7527 "00000000" // /* MW 1 */ +.label __Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_reduce_mean_c8 _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 8 "superkernels.cpp" 472 +.src_ref 8 "superkernels.cpp" 472 first +.function_start + 7536 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7537 "00000001" // /* MW 5 */ + 7538 "00000000" // /* MW 4 */ + 7539 "00000000" // /* MW 3 */ + 7540 "00010000" // /* MW 2 */ + 7541 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 477 6 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7542 "00111010" // ST p7, [sp, #-20]; MOVXM p7, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7543 "00010001" // /* MW 9 */ + 7544 "01100000" // /* MW 8 */ + 7545 "10110010" // /* MW 7 */ + 7546 "11110011" // /* MW 6 */ + 7547 "00000001" // /* MW 5 */ + 7548 "00000000" // /* MW 4 */ + 7549 "10110000" // /* MW 3 */ + 7550 "11110011" // /* MW 2 */ + 7551 "11111101" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 477 6 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7552 "10111010" // LDA r16, [p7]; ST p6, [sp, #-28]; MOV r16, CORE_ID /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7553 "01110010" // /* MW 9 */ + 7554 "01110000" // /* MW 8 */ + 7555 "00001101" // /* MW 7 */ + 7556 "10000010" // /* MW 6 */ + 7557 "00011101" // /* MW 5 */ + 7558 "11100111" // /* MW 4 */ + 7559 "11010111" // /* MW 3 */ + 7560 "11000010" // /* MW 2 */ + 7561 "11100000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 474 22 first +.src_ref 8 "superkernels.cpp" 569 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7562 "00111010" // ST r11, [sp, #-8]; EXTEND.u8 r16, r16; MOV r11, lr /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7563 "01111001" // /* MW 9 */ + 7564 "11110000" // /* MW 8 */ + 7565 "01101000" // /* MW 7 */ + 7566 "10000001" // /* MW 6 */ + 7567 "00000100" // /* MW 5 */ + 7568 "00100001" // /* MW 4 */ + 7569 "10110000" // /* MW 3 */ + 7570 "00101110" // /* MW 2 */ + 7571 "11111111" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 474 30 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7572 "01011100" // ST r15, [sp, #-16]; ADD r17, r16, #-2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7573 "11110110" // /* MW 5 */ + 7574 "01000111" // /* MW 4 */ + 7575 "10111000" // /* MW 3 */ + 7576 "00111110" // /* MW 2 */ + 7577 "11111110" // /* MW 1 */ + 7578 "10011000" // ST r13, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7579 "10110101" // /* MW 3 */ + 7580 "11101001" // /* MW 2 */ + 7581 "00001111" // /* MW 1 */ + 7582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7583 "00000000" // /* MW 1 */ + 7584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7585 "00000000" // /* MW 1 */ + 7586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7587 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 477 6 first +.src_ref 8 "superkernels.cpp" 477 16 first + 7588 "10000100" // JNZ r16, #8160 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8160 delay_slots=5 */ + 7589 "00000001" // /* MW 5 */ + 7590 "01000000" // /* MW 4 */ + 7591 "11110000" // /* MW 3 */ + 7592 "00001111" // /* MW 2 */ + 7593 "10000000" // /* MW 1 */ +.delay_slot + 7594 "10011000" // ST r12, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7595 "10010101" // /* MW 3 */ + 7596 "11111101" // /* MW 2 */ + 7597 "00001111" // /* MW 1 */ +.delay_slot + 7598 "10011000" // ST r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7599 "11010101" // /* MW 3 */ + 7600 "11110101" // /* MW 2 */ + 7601 "00001111" // /* MW 1 */ +.delay_slot + 7602 "10011000" // ST p0, [sp, #-32] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7603 "00011101" // /* MW 3 */ + 7604 "11100000" // /* MW 2 */ + 7605 "00001111" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 474 11 +.delay_slot + 7606 "01000100" // MOVXM p6, #509128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7607 "10010000" // /* MW 5 */ + 7608 "11001001" // /* MW 4 */ + 7609 "11001100" // /* MW 3 */ + 7610 "00000111" // /* MW 2 */ + 7611 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 474 11 first +.delay_slot + 7612 "10011000" // ST r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7613 "00110001" // /* MW 3 */ + 7614 "00000110" // /* MW 2 */ + 7615 "00001110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 1 "io_buffer_main.h" 218 49 +.src_ref 1 "io_buffer_main.h" 324 51 +.src_ref 5 "tile.hpp" 74 8 +.src_ref 5 "tile.hpp" 74 8 + 7616 "01110110" // MOVA r17, #1; MOVS p7, p2; MOVXM p2, #509164 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7617 "00010000" // /* MW 11 */ + 7618 "01110110" // /* MW 10 */ + 7619 "00110010" // /* MW 9 */ + 7620 "11110001" // /* MW 8 */ + 7621 "00000001" // /* MW 7 */ + 7622 "00000000" // /* MW 6 */ + 7623 "10001011" // /* MW 5 */ + 7624 "10001000" // /* MW 4 */ + 7625 "00000111" // /* MW 3 */ + 7626 "00110001" // /* MW 2 */ + 7627 "00000000" // /* MW 1 */ +.src_ref 5 "tile.hpp" 74 8 first +.src_ref 5 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 7628 "00111010" // ST r17, [p2]; MOVXM p2, #509168 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7629 "00010001" // /* MW 9 */ + 7630 "01111000" // /* MW 8 */ + 7631 "00110010" // /* MW 7 */ + 7632 "11110001" // /* MW 6 */ + 7633 "00000001" // /* MW 5 */ + 7634 "00000000" // /* MW 4 */ + 7635 "00110000" // /* MW 3 */ + 7636 "11000110" // /* MW 2 */ + 7637 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 483 44 +.src_ref 5 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 7638 "11010100" // ST.s8 r16, [p2]; MOV p6, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7639 "10000001" // /* MW 5 */ + 7640 "11000101" // /* MW 4 */ + 7641 "11101100" // /* MW 3 */ + 7642 "11000000" // /* MW 2 */ + 7643 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 480 4 first +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 7644 "00000100" // JL #2576 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=2576 delay_slots=5 */ + 7645 "00000001" // /* MW 5 */ + 7646 "00000000" // /* MW 4 */ + 7647 "00001000" // /* MW 3 */ + 7648 "00000101" // /* MW 2 */ + 7649 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 480 4 +.delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7650 "01000100" // MOVXM p0, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7651 "10000000" // /* MW 5 */ + 7652 "11001000" // /* MW 4 */ + 7653 "11000000" // /* MW 3 */ + 7654 "00000111" // /* MW 2 */ + 7655 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7657 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7659 "00000000" // /* MW 1 */ +.src_ref 5 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7660 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7661 "00110001" // /* MW 3 */ + 7662 "00100000" // /* MW 2 */ + 7663 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7664 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7665 "00000000" // /* MW 15 */ + 7666 "00000000" // /* MW 14 */ + 7667 "01111000" // /* MW 13 */ + 7668 "10100101" // /* MW 12 */ + 7669 "00000001" // /* MW 11 */ + 7670 "00000000" // /* MW 10 */ + 7671 "00000000" // /* MW 9 */ + 7672 "00000000" // /* MW 8 */ + 7673 "01011011" // /* MW 7 */ + 7674 "00000001" // /* MW 6 */ + 7675 "00100000" // /* MW 5 */ + 7676 "00000000" // /* MW 4 */ + 7677 "11110000" // /* MW 3 */ + 7678 "00101100" // /* MW 2 */ + 7679 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 51 +.src_ref 8 "superkernels.cpp" 487 47 +.return_address + 7680 "10111010" // MOVA r17, #0; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7681 "00010000" // /* MW 9 */ + 7682 "00100000" // /* MW 8 */ + 7683 "00110010" // /* MW 7 */ + 7684 "11110001" // /* MW 6 */ + 7685 "00000001" // /* MW 5 */ + 7686 "00000000" // /* MW 4 */ + 7687 "00000000" // /* MW 3 */ + 7688 "00010001" // /* MW 2 */ + 7689 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 18 +.src_ref 8 "superkernels.cpp" 481 51 first + 7690 "10111010" // LDA r14, [p2]; MOVXM p2, #509128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7691 "00010000" // /* MW 9 */ + 7692 "01100100" // /* MW 8 */ + 7693 "00110010" // /* MW 7 */ + 7694 "11110001" // /* MW 6 */ + 7695 "00000001" // /* MW 5 */ + 7696 "00000000" // /* MW 4 */ + 7697 "11010000" // /* MW 3 */ + 7698 "10111010" // /* MW 2 */ + 7699 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 18 +.src_ref 8 "superkernels.cpp" 481 85 + 7700 "10111010" // LDA r18, [p2]; MOVXM p2, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7701 "00010000" // /* MW 9 */ + 7702 "00100010" // /* MW 8 */ + 7703 "00110010" // /* MW 7 */ + 7704 "11110001" // /* MW 6 */ + 7705 "00000001" // /* MW 5 */ + 7706 "00000000" // /* MW 4 */ + 7707 "11010000" // /* MW 3 */ + 7708 "11001010" // /* MW 2 */ + 7709 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 85 +.src_ref 8 "superkernels.cpp" 482 16 + 7710 "10111010" // LDA r13, [p2], #4; MOVXM p3, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7711 "00010000" // /* MW 9 */ + 7712 "01101000" // /* MW 8 */ + 7713 "10110010" // /* MW 7 */ + 7714 "11110001" // /* MW 6 */ + 7715 "00000001" // /* MW 5 */ + 7716 "00000000" // /* MW 4 */ + 7717 "11010000" // /* MW 3 */ + 7718 "10110110" // /* MW 2 */ + 7719 "01000011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 16 +.src_ref 8 "superkernels.cpp" 482 40 first + 7720 "10111010" // LDA el0, [p2, #4]; MOVXM p1, #509132 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7721 "00010000" // /* MW 9 */ + 7722 "01100110" // /* MW 8 */ + 7723 "10110010" // /* MW 7 */ + 7724 "11110000" // /* MW 6 */ + 7725 "00000001" // /* MW 5 */ + 7726 "00000000" // /* MW 4 */ + 7727 "11010000" // /* MW 3 */ + 7728 "10000101" // /* MW 2 */ + 7729 "01000010" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 120 first +.src_ref 8 "superkernels.cpp" 483 44 + 7730 "11010100" // LDA r15, [p2]; MOV r16, p6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7731 "10000001" // /* MW 5 */ + 7732 "00111001" // /* MW 4 */ + 7733 "11011000" // /* MW 3 */ + 7734 "10111110" // /* MW 2 */ + 7735 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 483 44 + 7736 "00011000" // ADD.NC p2, r16, #40 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7737 "00010100" // /* MW 3 */ + 7738 "01101000" // /* MW 2 */ + 7739 "00011010" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 + 7740 "01000100" // MOVXM p6, #509184 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7741 "00000000" // /* MW 5 */ + 7742 "11001010" // /* MW 4 */ + 7743 "11001100" // /* MW 3 */ + 7744 "00000111" // /* MW 2 */ + 7745 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 483 13 + 7746 "01000100" // MOVXM p0, #509160 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7747 "11010000" // /* MW 5 */ + 7748 "11001001" // /* MW 4 */ + 7749 "11000000" // /* MW 3 */ + 7750 "00000111" // /* MW 2 */ + 7751 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 27 + 7752 "10011000" // MUL r18, r14, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7753 "00101111" // /* MW 3 */ + 7754 "10100101" // /* MW 2 */ + 7755 "00010011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 + 7756 "01000100" // MOVXM r16, #-2147483648 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7757 "00000000" // /* MW 5 */ + 7758 "00100000" // /* MW 4 */ + 7759 "00001000" // /* MW 3 */ + 7760 "00000000" // /* MW 2 */ + 7761 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 61 +.src_ref 8 "superkernels.cpp" 482 16 first + 7762 "01011100" // ST el0, [p3]; MUL r18, r13, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7763 "01011111" // /* MW 5 */ + 7764 "11001010" // /* MW 4 */ + 7765 "00110110" // /* MW 3 */ + 7766 "10000101" // /* MW 2 */ + 7767 "01100000" // /* MW 1 */ + 7768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7769 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 96 first + 7770 "10011000" // MUL r18, r15, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7771 "00101111" // /* MW 3 */ + 7772 "11100101" // /* MW 2 */ + 7773 "00010011" // /* MW 1 */ + 7774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7775 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 481 16 + 7776 "10011000" // ST r18, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7777 "01010001" // /* MW 3 */ + 7778 "00000110" // /* MW 2 */ + 7779 "00001001" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 483 15 first + 7780 "10011000" // LDA el0, [p2], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7781 "00101110" // /* MW 3 */ + 7782 "01001100" // /* MW 2 */ + 7783 "00000010" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 first + 7784 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7785 "00110001" // /* MW 3 */ + 7786 "00011110" // /* MW 2 */ + 7787 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 + 7788 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7789 "00110001" // /* MW 3 */ + 7790 "00011110" // /* MW 2 */ + 7791 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 + 7792 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7793 "00110001" // /* MW 3 */ + 7794 "00011110" // /* MW 2 */ + 7795 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 + 7796 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7797 "00110001" // /* MW 3 */ + 7798 "00011110" // /* MW 2 */ + 7799 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 + 7800 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7801 "00110001" // /* MW 3 */ + 7802 "00011110" // /* MW 2 */ + 7803 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 + 7804 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7805 "00110001" // /* MW 3 */ + 7806 "00011110" // /* MW 2 */ + 7807 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 483 13 first + 7808 "10011000" // ST el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7809 "00101001" // /* MW 3 */ + 7810 "00000100" // /* MW 2 */ + 7811 "00001000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 first + 7812 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7813 "00110001" // /* MW 3 */ + 7814 "00011110" // /* MW 2 */ + 7815 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 + 7816 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7817 "00110001" // /* MW 3 */ + 7818 "00011110" // /* MW 2 */ + 7819 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 487 47 + 7820 "10011000" // ST r17, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7821 "00110001" // /* MW 3 */ + 7822 "00011110" // /* MW 2 */ + 7823 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 first + 7824 "10011000" // LDA r1, [p2], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7825 "00110110" // /* MW 3 */ + 7826 "11011100" // /* MW 2 */ + 7827 "00000010" // /* MW 1 */ + 7828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7829 "00000000" // /* MW 1 */ + 7830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7831 "00000000" // /* MW 1 */ + 7832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7833 "00000000" // /* MW 1 */ + 7834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7835 "00000000" // /* MW 1 */ + 7836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7837 "00000000" // /* MW 1 */ + 7838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7839 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 + 7840 "10011000" // GEU r17, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7841 "00001011" // /* MW 3 */ + 7842 "01100011" // /* MW 2 */ + 7843 "00010000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 + 7844 "10000100" // JNZ r17, #7920 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7920 delay_slots=5 */ + 7845 "00000001" // /* MW 5 */ + 7846 "01000000" // /* MW 4 */ + 7847 "01111000" // /* MW 3 */ + 7848 "00001111" // /* MW 2 */ + 7849 "10001000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 1 "io_buffer_main.h" 218 49 +.src_ref 1 "io_buffer_main.h" 324 51 +.delay_slot + 7850 "11111000" // MOV r12, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7851 "11000000" // /* MW 3 */ + 7852 "00011110" // /* MW 2 */ + 7853 "00011011" // /* MW 1 */ +.delay_slot + 7854 "10011000" // ST p2, [sp, #-40] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7855 "00011101" // /* MW 3 */ + 7856 "11011001" // /* MW 2 */ + 7857 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7859 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7861 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7863 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.no_stack_arguments + 7864 "00000100" // JL #10912 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10912 delay_slots=5 */ + 7865 "00000001" // /* MW 5 */ + 7866 "00000000" // /* MW 4 */ + 7867 "01010000" // /* MW 3 */ + 7868 "00010101" // /* MW 2 */ + 7869 "00000000" // /* MW 1 */ +.delay_slot + 7870 "10011000" // ST r12, [sp, #-36] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7871 "10010101" // /* MW 3 */ + 7872 "11011101" // /* MW 2 */ + 7873 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7874 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7875 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7877 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7879 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7880 "00100010" // NOPA; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7881 "00011100" // /* MW 7 */ + 7882 "00000000" // /* MW 6 */ + 7883 "00000000" // /* MW 5 */ + 7884 "00000100" // /* MW 4 */ + 7885 "11110000" // /* MW 3 */ + 7886 "00101100" // /* MW 2 */ + 7887 "00000000" // /* MW 1 */ +.return_address + 7888 "10000100" // J #7984 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=7984 delay_slots=5 */ + 7889 "00000000" // /* MW 5 */ + 7890 "00000000" // /* MW 4 */ + 7891 "10011000" // /* MW 3 */ + 7892 "00001111" // /* MW 2 */ + 7893 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.delay_slot + 7894 "01000100" // MOVXM p7, #509168 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7895 "11100000" // /* MW 5 */ + 7896 "11001001" // /* MW 4 */ + 7897 "11001110" // /* MW 3 */ + 7898 "00000111" // /* MW 2 */ + 7899 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7901 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7903 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7905 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7906 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 7907 "00011100" // /* MW 13 */ + 7908 "00000000" // /* MW 12 */ + 7909 "00000000" // /* MW 11 */ + 7910 "01010111" // /* MW 10 */ + 7911 "00011010" // /* MW 9 */ + 7912 "01000000" // /* MW 8 */ + 7913 "00000000" // /* MW 7 */ + 7914 "00000000" // /* MW 6 */ + 7915 "10110110" // /* MW 5 */ + 7916 "00000010" // /* MW 4 */ + 7917 "11110000" // /* MW 3 */ + 7918 "00101100" // /* MW 2 */ + 7919 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_384 +.src_ref 8 "superkernels.cpp" 491 40 +.no_stack_arguments + 7920 "00000100" // JL #10912 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10912 delay_slots=5 */ + 7921 "00000001" // /* MW 5 */ + 7922 "00000000" // /* MW 4 */ + 7923 "01010000" // /* MW 3 */ + 7924 "00010101" // /* MW 2 */ + 7925 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7927 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7929 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7931 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7932 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7933 "01100111" // /* MW 3 */ + 7934 "00000001" // /* MW 2 */ + 7935 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.delay_slot + 7936 "11100001" // NOPA; NOPB; NOPS; SUB r1, r1, r16; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7937 "00000000" // /* MW 15 */ + 7938 "00000000" // /* MW 14 */ + 7939 "01111000" // /* MW 13 */ + 7940 "10100101" // /* MW 12 */ + 7941 "00000001" // /* MW 11 */ + 7942 "00001100" // /* MW 10 */ + 7943 "00011000" // /* MW 9 */ + 7944 "00000010" // /* MW 8 */ + 7945 "01011011" // /* MW 7 */ + 7946 "00000001" // /* MW 6 */ + 7947 "00100000" // /* MW 5 */ + 7948 "00000000" // /* MW 4 */ + 7949 "11110000" // /* MW 3 */ + 7950 "00101100" // /* MW 2 */ + 7951 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.return_address +.no_stack_arguments + 7952 "00000100" // JL #12416 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12416 delay_slots=5 */ + 7953 "00000001" // /* MW 5 */ + 7954 "00000000" // /* MW 4 */ + 7955 "01000000" // /* MW 3 */ + 7956 "00011000" // /* MW 2 */ + 7957 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.delay_slot + 7958 "11111000" // MOV r1, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7959 "00100000" // /* MW 3 */ + 7960 "01010000" // /* MW 2 */ + 7961 "00011000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.delay_slot + 7962 "01000100" // MOVXM p7, #509168 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7963 "11100000" // /* MW 5 */ + 7964 "11001001" // /* MW 4 */ + 7965 "11001110" // /* MW 3 */ + 7966 "00000111" // /* MW 2 */ + 7967 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.delay_slot + 7968 "01000100" // MOVXM r2, #1325400064 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7969 "00000000" // /* MW 5 */ + 7970 "00100000" // /* MW 4 */ + 7971 "00000001" // /* MW 3 */ + 7972 "00000000" // /* MW 2 */ + 7973 "01001111" // /* MW 1 */ +.delay_slot + 7974 "10011000" // ST r12, [sp, #-36] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7975 "10010101" // /* MW 3 */ + 7976 "11011101" // /* MW 2 */ + 7977 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7978 "00111100" // NOPA; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7979 "00100000" // /* MW 5 */ + 7980 "00000000" // /* MW 4 */ + 7981 "11110000" // /* MW 3 */ + 7982 "00101100" // /* MW 2 */ + 7983 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 8 "superkernels.cpp" 491 40 +.src_ref 8 "superkernels.cpp" 491 40 +.src_ref 8 "superkernels.cpp" 491 40 +.return_address + 7984 "10111010" // LDA.s8 r16, [p7]; MOVX vaddSign0, #1; VINSERT.32 x0, x0, #0, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7985 "10111000" // /* MW 9 */ + 7986 "00001000" // /* MW 8 */ + 7987 "00000000" // /* MW 7 */ + 7988 "00000000" // /* MW 6 */ + 7989 "11010010" // /* MW 5 */ + 7990 "00000010" // /* MW 4 */ + 7991 "01010000" // /* MW 3 */ + 7992 "11000000" // /* MW 2 */ + 7993 "11100000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.src_ref 8 "superkernels.cpp" 492 38 +.src_ref 8 "superkernels.cpp" 492 38 + 7994 "10111010" // MOVA m0, #-38; MOVX r24, #0; VMOV bmll0, x0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7995 "01111000" // /* MW 9 */ + 7996 "01001001" // /* MW 8 */ + 7997 "00000000" // /* MW 7 */ + 7998 "00001000" // /* MW 6 */ + 7999 "10000000" // /* MW 5 */ + 8000 "00000001" // /* MW 4 */ + 8001 "10000000" // /* MW 3 */ + 8002 "01000000" // /* MW 2 */ + 8003 "11111011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 498 13 +.src_ref 8 "superkernels.cpp" 498 15 + 8004 "10111010" // LDA p2, [sp, #-40]; MOVXM p3, #509140 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8005 "00010000" // /* MW 9 */ + 8006 "01101010" // /* MW 8 */ + 8007 "10110010" // /* MW 7 */ + 8008 "11110001" // /* MW 6 */ + 8009 "00000001" // /* MW 5 */ + 8010 "00000000" // /* MW 4 */ + 8011 "00100000" // /* MW 3 */ + 8012 "00100011" // /* MW 2 */ + 8013 "11111011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 499 14 + 8014 "01000100" // MOVXM p1, #509144 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8015 "10110000" // /* MW 5 */ + 8016 "11001001" // /* MW 4 */ + 8017 "11000010" // /* MW 3 */ + 8018 "00000111" // /* MW 2 */ + 8019 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 7 +.src_ref 8 "superkernels.cpp" 508 7 +.src_ref 8 "superkernels.cpp" 511 7 + 8020 "01000100" // MOVXM p7, #509136 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8021 "10100000" // /* MW 5 */ + 8022 "11001001" // /* MW 4 */ + 8023 "11001110" // /* MW 3 */ + 8024 "00000111" // /* MW 2 */ + 8025 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 8026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8027 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 38 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 8028 "00011000" // ST.s16 r16, [p6], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8029 "00010111" // /* MW 3 */ + 8030 "00011110" // /* MW 2 */ + 8031 "00000110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8032 "00011000" // MOVX crRnd, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8033 "10000000" // /* MW 3 */ + 8034 "00111010" // /* MW 2 */ + 8035 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8036 "00011000" // VCONV.bf16.fp32 wl0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8037 "00010110" // /* MW 3 */ + 8038 "01000000" // /* MW 2 */ + 8039 "00001000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 8040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8041 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 491 40 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8042 "10111000" // VEXTRACT.16 r16, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8043 "00000001" // /* MW 3 */ + 8044 "00000001" // /* MW 2 */ + 8045 "00011100" // /* MW 1 */ + 8046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8047 "00000000" // /* MW 1 */ + 8048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8049 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 492 38 first + 8050 "00011000" // ST.s8 r24, [p6], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8051 "00000111" // /* MW 3 */ + 8052 "00001011" // /* MW 2 */ + 8053 "00000110" // /* MW 1 */ + 8054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8055 "00000000" // /* MW 1 */ + 8056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8057 "00000000" // /* MW 1 */ + 8058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8059 "00000000" // /* MW 1 */ + 8060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8061 "00000000" // /* MW 1 */ + 8062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8063 "00000000" // /* MW 1 */ + 8064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8065 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 494 25 first + 8066 "10011000" // ST r14, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8067 "11010001" // /* MW 3 */ + 8068 "00011101" // /* MW 2 */ + 8069 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 495 24 first + 8070 "10011000" // ST r15, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8071 "11110001" // /* MW 3 */ + 8072 "00000101" // /* MW 2 */ + 8073 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 496 24 first + 8074 "10011000" // ST r13, [p6, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8075 "10110001" // /* MW 3 */ + 8076 "00010101" // /* MW 2 */ + 8077 "00001110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 498 15 first + 8078 "10011000" // LDA el0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8079 "00101110" // /* MW 3 */ + 8080 "00011100" // /* MW 2 */ + 8081 "00000010" // /* MW 1 */ + 8082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8083 "00000000" // /* MW 1 */ + 8084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8085 "00000000" // /* MW 1 */ + 8086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8087 "00000000" // /* MW 1 */ + 8088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8089 "00000000" // /* MW 1 */ + 8090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8091 "00000000" // /* MW 1 */ + 8092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8093 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 498 13 + 8094 "10011000" // ST el0, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8095 "00101001" // /* MW 3 */ + 8096 "00000100" // /* MW 2 */ + 8097 "00001011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 499 16 first + 8098 "10011000" // LDA el0, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8099 "00101110" // /* MW 3 */ + 8100 "00000100" // /* MW 2 */ + 8101 "00000010" // /* MW 1 */ + 8102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8103 "00000000" // /* MW 1 */ + 8104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8105 "00000000" // /* MW 1 */ + 8106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8107 "00000000" // /* MW 1 */ + 8108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8109 "00000000" // /* MW 1 */ + 8110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8111 "00000000" // /* MW 1 */ + 8112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8113 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 499 14 + 8114 "10011000" // ST el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8115 "00101001" // /* MW 3 */ + 8116 "00000100" // /* MW 2 */ + 8117 "00001001" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 500 15 first + 8118 "10011000" // LDA el0, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8119 "00101110" // /* MW 3 */ + 8120 "00010100" // /* MW 2 */ + 8121 "00000010" // /* MW 1 */ + 8122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8123 "00000000" // /* MW 1 */ + 8124 "10000100" // J #8176 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8176 delay_slots=5 */ + 8125 "00000000" // /* MW 5 */ + 8126 "00000000" // /* MW 4 */ + 8127 "11111000" // /* MW 3 */ + 8128 "00001111" // /* MW 2 */ + 8129 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 500 13 +.delay_slot + 8130 "01000100" // MOVXM p0, #509148 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8131 "10111000" // /* MW 5 */ + 8132 "11001001" // /* MW 4 */ + 8133 "11000000" // /* MW 3 */ + 8134 "00000111" // /* MW 2 */ + 8135 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8137 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8139 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8140 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8141 "01100111" // /* MW 3 */ + 8142 "00000001" // /* MW 2 */ + 8143 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 500 13 +.delay_slot + 8144 "11100001" // NOPA; NOPB; ST el0, [p0]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8145 "00000000" // /* MW 15 */ + 8146 "00000000" // /* MW 14 */ + 8147 "01111000" // /* MW 13 */ + 8148 "10100101" // /* MW 12 */ + 8149 "00000001" // /* MW 11 */ + 8150 "00000000" // /* MW 10 */ + 8151 "00000000" // /* MW 9 */ + 8152 "10000000" // /* MW 8 */ + 8153 "00101001" // /* MW 7 */ + 8154 "00000100" // /* MW 6 */ + 8155 "00100000" // /* MW 5 */ + 8156 "00000000" // /* MW 4 */ + 8157 "11110000" // /* MW 3 */ + 8158 "00101100" // /* MW 2 */ + 8159 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_624 +.src_ref 8 "superkernels.cpp" 505 7 +.src_ref 8 "superkernels.cpp" 508 7 +.src_ref 8 "superkernels.cpp" 511 7 + 8160 "00111010" // ST p2, [sp, #-36]; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8161 "00010001" // /* MW 9 */ + 8162 "01101000" // /* MW 8 */ + 8163 "10110010" // /* MW 7 */ + 8164 "11110011" // /* MW 6 */ + 8165 "00000001" // /* MW 5 */ + 8166 "00000000" // /* MW 4 */ + 8167 "10110000" // /* MW 3 */ + 8168 "10100011" // /* MW 2 */ + 8169 "11111011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 1 "io_buffer_main.h" 218 49 +.src_ref 1 "io_buffer_main.h" 324 51 + 8170 "11010100" // NOPA; MOV r12, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8171 "10000001" // /* MW 5 */ + 8172 "00101001" // /* MW 4 */ + 8173 "11110110" // /* MW 3 */ + 8174 "00101100" // /* MW 2 */ + 8175 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_640 +.src_ref 8 "superkernels.cpp" 505 7 first +.src_ref 8 "superkernels.cpp" 505 19 + 8176 "00101100" // LDA r16, [p7]; MOVX r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8177 "00001010" // /* MW 5 */ + 8178 "01000100" // /* MW 4 */ + 8179 "11010000" // /* MW 3 */ + 8180 "11000010" // /* MW 2 */ + 8181 "11100000" // /* MW 1 */ + 8182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8183 "00000000" // /* MW 1 */ + 8184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8185 "00000000" // /* MW 1 */ + 8186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8187 "00000000" // /* MW 1 */ + 8188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8189 "00000000" // /* MW 1 */ + 8190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8191 "00000000" // /* MW 1 */ + 8192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8193 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 19 + 8194 "10011000" // NE r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8195 "00001000" // /* MW 3 */ + 8196 "01100011" // /* MW 2 */ + 8197 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 25 + 8198 "10000100" // JNZ r17, #8368 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8368 delay_slots=5 */ + 8199 "00000001" // /* MW 5 */ + 8200 "01000000" // /* MW 4 */ + 8201 "01011000" // /* MW 3 */ + 8202 "00010000" // /* MW 2 */ + 8203 "10001000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 first +.delay_slot + 8204 "00011000" // ADD.NC p6, r12, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8205 "00000110" // /* MW 3 */ + 8206 "01100110" // /* MW 2 */ + 8207 "00011110" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8209 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8211 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8212 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8213 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8215 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 29 + 8216 "01000100" // MOVXM p2, #509124 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8217 "10001000" // /* MW 5 */ + 8218 "11001001" // /* MW 4 */ + 8219 "11000100" // /* MW 3 */ + 8220 "00000111" // /* MW 2 */ + 8221 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 29 first +.src_ref 8 "superkernels.cpp" 505 65 + 8222 "10111010" // LDA r16, [p2]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8223 "00010000" // /* MW 9 */ + 8224 "00110000" // /* MW 8 */ + 8225 "00110010" // /* MW 7 */ + 8226 "11110001" // /* MW 6 */ + 8227 "00000001" // /* MW 5 */ + 8228 "00000000" // /* MW 4 */ + 8229 "11010000" // /* MW 3 */ + 8230 "11000010" // /* MW 2 */ + 8231 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 65 + 8232 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8233 "00111010" // /* MW 3 */ + 8234 "00000100" // /* MW 2 */ + 8235 "00000010" // /* MW 1 */ + 8236 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8237 "00000000" // /* MW 1 */ + 8238 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8239 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 41 +.no_stack_arguments + 8240 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */ + 8241 "00000001" // /* MW 5 */ + 8242 "00000000" // /* MW 4 */ + 8243 "11111000" // /* MW 3 */ + 8244 "00010011" // /* MW 2 */ + 8245 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 41 +.delay_slot + 8246 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8247 "00000001" // /* MW 3 */ + 8248 "00011010" // /* MW 2 */ + 8249 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8251 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 41 +.delay_slot + 8252 "10011000" // LT r27, r16, r13 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8253 "11011010" // /* MW 3 */ + 8254 "00110110" // /* MW 2 */ + 8255 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 41 +.src_ref 8 "superkernels.cpp" 505 41 +.delay_slot + 8256 "11100100" // SUB r17, r13, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8257 "01000001" // /* MW 5 */ + 8258 "10111011" // /* MW 4 */ + 8259 "00110111" // /* MW 3 */ + 8260 "01100000" // /* MW 2 */ + 8261 "01101100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 41 +.delay_slot + 8262 "01111010" // NOPA; NOPS; SEL.EQZ r0, r16, r17, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8263 "00010010" // /* MW 9 */ + 8264 "00000001" // /* MW 8 */ + 8265 "00000100" // /* MW 7 */ + 8266 "00000000" // /* MW 6 */ + 8267 "01011011" // /* MW 5 */ + 8268 "00000001" // /* MW 4 */ + 8269 "11110000" // /* MW 3 */ + 8270 "00101100" // /* MW 2 */ + 8271 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 41 +.src_ref 8 "superkernels.cpp" 505 41 +.return_address + 8272 "11100100" // SUB r16, r13, r3; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8273 "01000001" // /* MW 5 */ + 8274 "10101111" // /* MW 4 */ + 8275 "00111101" // /* MW 3 */ + 8276 "00000110" // /* MW 2 */ + 8277 "01101100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 41 + 8278 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8279 "00000010" // /* MW 3 */ + 8280 "11100001" // /* MW 2 */ + 8281 "00010000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 505 6 +.src_ref 8 "superkernels.cpp" 505 76 + 8282 "10000100" // JNZ r16, #8352 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8352 delay_slots=5 */ + 8283 "00000001" // /* MW 5 */ + 8284 "01000000" // /* MW 4 */ + 8285 "01010000" // /* MW 3 */ + 8286 "00010000" // /* MW 2 */ + 8287 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8289 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8291 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8293 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8294 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8295 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8297 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 +.src_ref 1 "io_buffer_main.h" 395 8 + 8298 "11100100" // MOVX r16, #-1; MOV p2, p6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8299 "10000001" // /* MW 5 */ + 8300 "11011001" // /* MW 4 */ + 8301 "10100100" // /* MW 3 */ + 8302 "00011111" // /* MW 2 */ + 8303 "11111100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 first + 8304 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8305 "01110110" // /* MW 3 */ + 8306 "11111111" // /* MW 2 */ + 8307 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 8308 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8309 "00110110" // /* MW 3 */ + 8310 "11111110" // /* MW 2 */ + 8311 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 8312 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8313 "01010110" // /* MW 3 */ + 8314 "11111110" // /* MW 2 */ + 8315 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 8316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8317 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first +.aggressive_scheduled_block_id 4 +.noswbrkpt + 8318 "10011000" // LDA r17, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8319 "00110110" // /* MW 3 */ + 8320 "01000110" // /* MW 2 */ + 8321 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 8322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8323 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 8324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8325 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 8326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8327 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 8328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8329 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 8330 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8331 "00010010" // /* MW 3 */ + 8332 "10100011" // /* MW 2 */ + 8333 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8334 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8335 "00110001" // /* MW 3 */ + 8336 "00000110" // /* MW 2 */ + 8337 "00001010" // /* MW 1 */ + 8338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8339 "00000000" // /* MW 1 */ + 8340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8341 "00000000" // /* MW 1 */ + 8342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8343 "00000000" // /* MW 1 */ + 8344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8345 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 8346 "00101100" // NOPA; ACQ r17, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8347 "00010000" // /* MW 5 */ + 8348 "10100110" // /* MW 4 */ + 8349 "11111000" // /* MW 3 */ + 8350 "00101100" // /* MW 2 */ + 8351 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_816 + 8352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8353 "00000000" // /* MW 1 */ + 8354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8355 "00000000" // /* MW 1 */ + 8356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8357 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 7 first + 8358 "10111010" // LDA r16, [p7]; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8359 "01111110" // /* MW 9 */ + 8360 "10100101" // /* MW 8 */ + 8361 "00000001" // /* MW 7 */ + 8362 "00000000" // /* MW 6 */ + 8363 "00010000" // /* MW 5 */ + 8364 "00000000" // /* MW 4 */ + 8365 "11010000" // /* MW 3 */ + 8366 "11000010" // /* MW 2 */ + 8367 "11100000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_832 +.src_ref 8 "superkernels.cpp" 508 19 +.src_ref 8 "superkernels.cpp" 522 6 +.src_ref 8 "superkernels.cpp" 558 19 + 8368 "00011000" // MOVX r14, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8369 "00001001" // /* MW 3 */ + 8370 "00011100" // /* MW 2 */ + 8371 "00010000" // /* MW 1 */ + 8372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8373 "00000000" // /* MW 1 */ + 8374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8375 "00000000" // /* MW 1 */ + 8376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8377 "00000000" // /* MW 1 */ + 8378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8379 "00000000" // /* MW 1 */ + 8380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8381 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 19 + 8382 "10011000" // NE r16, r14, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8383 "00001000" // /* MW 3 */ + 8384 "10100001" // /* MW 2 */ + 8385 "00010011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 25 + 8386 "10000100" // JNZ r16, #8544 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8544 delay_slots=5 */ + 8387 "00000001" // /* MW 5 */ + 8388 "01000000" // /* MW 4 */ + 8389 "10110000" // /* MW 3 */ + 8390 "00010000" // /* MW 2 */ + 8391 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8393 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8394 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8395 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8397 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8399 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8401 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 29 + 8402 "01000100" // MOVXM p2, #509152 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8403 "11000000" // /* MW 5 */ + 8404 "11001001" // /* MW 4 */ + 8405 "11000100" // /* MW 3 */ + 8406 "00000111" // /* MW 2 */ + 8407 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 29 +.src_ref 8 "superkernels.cpp" 508 65 + 8408 "10111010" // LDA r16, [p2]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8409 "00010000" // /* MW 9 */ + 8410 "00110000" // /* MW 8 */ + 8411 "00110010" // /* MW 7 */ + 8412 "11110001" // /* MW 6 */ + 8413 "00000001" // /* MW 5 */ + 8414 "00000000" // /* MW 4 */ + 8415 "11010000" // /* MW 3 */ + 8416 "11000010" // /* MW 2 */ + 8417 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 65 + 8418 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8419 "00111010" // /* MW 3 */ + 8420 "00000100" // /* MW 2 */ + 8421 "00000010" // /* MW 1 */ + 8422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8423 "00000000" // /* MW 1 */ + 8424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8425 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 41 +.no_stack_arguments + 8426 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */ + 8427 "00000001" // /* MW 5 */ + 8428 "00000000" // /* MW 4 */ + 8429 "11111000" // /* MW 3 */ + 8430 "00010011" // /* MW 2 */ + 8431 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 41 +.delay_slot + 8432 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8433 "00000001" // /* MW 3 */ + 8434 "00011010" // /* MW 2 */ + 8435 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8437 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 41 +.delay_slot + 8438 "10011000" // LT r27, r16, r13 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8439 "11011010" // /* MW 3 */ + 8440 "00110110" // /* MW 2 */ + 8441 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 41 +.src_ref 8 "superkernels.cpp" 508 41 +.delay_slot + 8442 "11100100" // SUB r17, r13, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8443 "01000001" // /* MW 5 */ + 8444 "10111011" // /* MW 4 */ + 8445 "00110111" // /* MW 3 */ + 8446 "01100000" // /* MW 2 */ + 8447 "01101100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 41 +.delay_slot + 8448 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r0, r16, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8449 "00000000" // /* MW 15 */ + 8450 "00000000" // /* MW 14 */ + 8451 "01111000" // /* MW 13 */ + 8452 "10100101" // /* MW 12 */ + 8453 "00000001" // /* MW 11 */ + 8454 "10010000" // /* MW 10 */ + 8455 "00001000" // /* MW 9 */ + 8456 "00100000" // /* MW 8 */ + 8457 "01011011" // /* MW 7 */ + 8458 "00000001" // /* MW 6 */ + 8459 "00100000" // /* MW 5 */ + 8460 "00000000" // /* MW 4 */ + 8461 "11110000" // /* MW 3 */ + 8462 "00101100" // /* MW 2 */ + 8463 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 41 +.src_ref 8 "superkernels.cpp" 508 41 +.return_address + 8464 "11100100" // SUB r16, r13, r3; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8465 "01000001" // /* MW 5 */ + 8466 "10101111" // /* MW 4 */ + 8467 "00111101" // /* MW 3 */ + 8468 "00000110" // /* MW 2 */ + 8469 "01101100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 41 + 8470 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8471 "00000010" // /* MW 3 */ + 8472 "11100001" // /* MW 2 */ + 8473 "00010000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 508 6 +.src_ref 8 "superkernels.cpp" 508 76 + 8474 "10000100" // JNZ r16, #8544 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8544 delay_slots=5 */ + 8475 "00000001" // /* MW 5 */ + 8476 "01000000" // /* MW 4 */ + 8477 "10110000" // /* MW 3 */ + 8478 "00010000" // /* MW 2 */ + 8479 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8481 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8483 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8485 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8487 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8489 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 +.src_ref 1 "io_buffer_main.h" 395 8 + 8490 "11100100" // MOVX r16, #-1; MOV p2, p6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8491 "10000001" // /* MW 5 */ + 8492 "11011001" // /* MW 4 */ + 8493 "10100100" // /* MW 3 */ + 8494 "00011111" // /* MW 2 */ + 8495 "11111100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 first + 8496 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8497 "01110110" // /* MW 3 */ + 8498 "11111111" // /* MW 2 */ + 8499 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 8500 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8501 "00110110" // /* MW 3 */ + 8502 "11111110" // /* MW 2 */ + 8503 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 8504 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8505 "01010110" // /* MW 3 */ + 8506 "11111110" // /* MW 2 */ + 8507 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 8508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8509 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first +.aggressive_scheduled_block_id 5 +.noswbrkpt + 8510 "10011000" // LDA r17, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8511 "00110110" // /* MW 3 */ + 8512 "01000110" // /* MW 2 */ + 8513 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 8514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8515 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 8516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8517 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 8518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8519 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 8520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8521 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 8522 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8523 "00010010" // /* MW 3 */ + 8524 "10100011" // /* MW 2 */ + 8525 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8526 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8527 "00110001" // /* MW 3 */ + 8528 "00000110" // /* MW 2 */ + 8529 "00001010" // /* MW 1 */ + 8530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8531 "00000000" // /* MW 1 */ + 8532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8533 "00000000" // /* MW 1 */ + 8534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8535 "00000000" // /* MW 1 */ + 8536 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8537 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 8538 "00101100" // NOPA; ACQ r17, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8539 "00010000" // /* MW 5 */ + 8540 "10100110" // /* MW 4 */ + 8541 "11111000" // /* MW 3 */ + 8542 "00101100" // /* MW 2 */ + 8543 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1008 + 8544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8545 "00000000" // /* MW 1 */ + 8546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8547 "00000000" // /* MW 1 */ + 8548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8549 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 7 first +.src_ref 8 "superkernels.cpp" 511 29 + 8550 "10111010" // LDA r16, [p7]; MOVXM p7, #509156 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8551 "00010000" // /* MW 9 */ + 8552 "01110010" // /* MW 8 */ + 8553 "10110010" // /* MW 7 */ + 8554 "11110011" // /* MW 6 */ + 8555 "00000001" // /* MW 5 */ + 8556 "00000000" // /* MW 4 */ + 8557 "11010000" // /* MW 3 */ + 8558 "11000010" // /* MW 2 */ + 8559 "11100000" // /* MW 1 */ + 8560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8561 "00000000" // /* MW 1 */ + 8562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8563 "00000000" // /* MW 1 */ + 8564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8565 "00000000" // /* MW 1 */ + 8566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8567 "00000000" // /* MW 1 */ + 8568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8569 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 19 + 8570 "00011000" // MOVX r18, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8571 "00010001" // /* MW 3 */ + 8572 "00100100" // /* MW 2 */ + 8573 "00010000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 19 + 8574 "10011000" // NE r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8575 "00001000" // /* MW 3 */ + 8576 "10100001" // /* MW 2 */ + 8577 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 25 + 8578 "10000100" // JNZ r16, #8768 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8768 delay_slots=5 */ + 8579 "00000001" // /* MW 5 */ + 8580 "01000000" // /* MW 4 */ + 8581 "00100000" // /* MW 3 */ + 8582 "00010001" // /* MW 2 */ + 8583 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 66 +.delay_slot + 8584 "01000100" // MOVXM p2, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8585 "11000000" // /* MW 5 */ + 8586 "11001000" // /* MW 4 */ + 8587 "11000100" // /* MW 3 */ + 8588 "00000111" // /* MW 2 */ + 8589 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8591 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8593 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8594 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8595 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 42 +.delay_slot + 8596 "00011000" // MOVX r17, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8597 "00000001" // /* MW 3 */ + 8598 "00100010" // /* MW 2 */ + 8599 "00010000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 29 +.src_ref 8 "superkernels.cpp" 511 42 + 8600 "00101100" // LDA r16, [p7]; MOVX r13, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8601 "00000010" // /* MW 5 */ + 8602 "00110100" // /* MW 4 */ + 8603 "11010000" // /* MW 3 */ + 8604 "11000010" // /* MW 2 */ + 8605 "11100000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 66 + 8606 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8607 "00111010" // /* MW 3 */ + 8608 "00000100" // /* MW 2 */ + 8609 "00000010" // /* MW 1 */ + 8610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8611 "00000000" // /* MW 1 */ + 8612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8613 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 42 +.no_stack_arguments + 8614 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */ + 8615 "00000001" // /* MW 5 */ + 8616 "00000000" // /* MW 4 */ + 8617 "11111000" // /* MW 3 */ + 8618 "00010011" // /* MW 2 */ + 8619 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8621 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8623 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 42 +.delay_slot + 8624 "10011000" // LT r27, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8625 "00011010" // /* MW 3 */ + 8626 "00110111" // /* MW 2 */ + 8627 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 42 +.src_ref 8 "superkernels.cpp" 511 42 +.delay_slot + 8628 "11100100" // SUB r17, r17, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8629 "01000001" // /* MW 5 */ + 8630 "10111011" // /* MW 4 */ + 8631 "00110111" // /* MW 3 */ + 8632 "01100000" // /* MW 2 */ + 8633 "10001100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 42 +.delay_slot + 8634 "00101100" // NOPA; SEL.EQZ r0, r16, r17, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8635 "00100100" // /* MW 5 */ + 8636 "00000010" // /* MW 4 */ + 8637 "11111000" // /* MW 3 */ + 8638 "00101100" // /* MW 2 */ + 8639 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 42 +.src_ref 8 "superkernels.cpp" 511 42 +.return_address + 8640 "11100100" // SUB r16, r13, r3; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8641 "01000001" // /* MW 5 */ + 8642 "10101111" // /* MW 4 */ + 8643 "00111101" // /* MW 3 */ + 8644 "00000110" // /* MW 2 */ + 8645 "01101100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 42 + 8646 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8647 "00000010" // /* MW 3 */ + 8648 "11100001" // /* MW 2 */ + 8649 "00010000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 511 6 +.src_ref 8 "superkernels.cpp" 511 77 + 8650 "10000100" // JNZ r16, #8736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8736 delay_slots=5 */ + 8651 "00000001" // /* MW 5 */ + 8652 "01000000" // /* MW 4 */ + 8653 "00010000" // /* MW 3 */ + 8654 "00010001" // /* MW 2 */ + 8655 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8657 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8659 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8661 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8663 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8665 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 516 45 +.src_ref 8 "superkernels.cpp" 522 6 +.src_ref 1 "io_buffer_main.h" 218 49 first +.src_ref 1 "io_buffer_main.h" 395 8 + 8666 "10111010" // LDA r27, [p6], #-4; MOVX r17, #-1; MOV r16, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8667 "01011000" // /* MW 9 */ + 8668 "00000001" // /* MW 8 */ + 8669 "00001000" // /* MW 7 */ + 8670 "11101010" // /* MW 6 */ + 8671 "00010111" // /* MW 5 */ + 8672 "00111111" // /* MW 4 */ + 8673 "11010000" // /* MW 3 */ + 8674 "11101110" // /* MW 2 */ + 8675 "11011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 8676 "10011000" // LDA r18, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8677 "01010110" // /* MW 3 */ + 8678 "11111110" // /* MW 2 */ + 8679 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 8680 "10011000" // LDA r19, [p6], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8681 "01110110" // /* MW 3 */ + 8682 "11111110" // /* MW 2 */ + 8683 "00000110" // /* MW 1 */ +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 8684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8685 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first +.aggressive_scheduled_block_id 6 +.noswbrkpt + 8686 "10011000" // LDA r18, [p6, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8687 "01010110" // /* MW 3 */ + 8688 "01000110" // /* MW 2 */ + 8689 "00000110" // /* MW 1 */ +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 8690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8691 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 8692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8693 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 8694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8695 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 8696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8697 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 8698 "00011000" // SEL.EQZ r18, r19, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8699 "00100010" // /* MW 3 */ + 8700 "11100101" // /* MW 2 */ + 8701 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8702 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8703 "01010001" // /* MW 3 */ + 8704 "00000110" // /* MW 2 */ + 8705 "00001110" // /* MW 1 */ + 8706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8707 "00000000" // /* MW 1 */ + 8708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8709 "00000000" // /* MW 1 */ + 8710 "10000100" // J #8784 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8784 delay_slots=5 */ + 8711 "00000000" // /* MW 5 */ + 8712 "00000000" // /* MW 4 */ + 8713 "00101000" // /* MW 3 */ + 8714 "00010001" // /* MW 2 */ + 8715 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8717 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first +.delay_slot + 8718 "00011000" // ACQ r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8719 "00011000" // /* MW 3 */ + 8720 "10010011" // /* MW 2 */ + 8721 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8723 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8725 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8726 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8727 "01111110" // /* MW 9 */ + 8728 "10100101" // /* MW 8 */ + 8729 "00000001" // /* MW 7 */ + 8730 "00000000" // /* MW 6 */ + 8731 "00010000" // /* MW 5 */ + 8732 "00000000" // /* MW 4 */ + 8733 "11110000" // /* MW 3 */ + 8734 "00101100" // /* MW 2 */ + 8735 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1200 + 8736 "10000100" // J #8784 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8784 delay_slots=5 */ + 8737 "00000000" // /* MW 5 */ + 8738 "00000000" // /* MW 4 */ + 8739 "00101000" // /* MW 3 */ + 8740 "00010001" // /* MW 2 */ + 8741 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 516 45 +.src_ref 8 "superkernels.cpp" 522 6 +.delay_slot + 8742 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8743 "00000101" // /* MW 3 */ + 8744 "00100000" // /* MW 2 */ + 8745 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8747 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8749 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8751 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8752 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8753 "00000000" // /* MW 15 */ + 8754 "00000000" // /* MW 14 */ + 8755 "01111000" // /* MW 13 */ + 8756 "10100101" // /* MW 12 */ + 8757 "00000001" // /* MW 11 */ + 8758 "00000000" // /* MW 10 */ + 8759 "00000000" // /* MW 9 */ + 8760 "00000000" // /* MW 8 */ + 8761 "01011011" // /* MW 7 */ + 8762 "00000001" // /* MW 6 */ + 8763 "00100000" // /* MW 5 */ + 8764 "00000000" // /* MW 4 */ + 8765 "11110000" // /* MW 3 */ + 8766 "00101100" // /* MW 2 */ + 8767 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1232 +.src_ref 8 "superkernels.cpp" 516 45 +.src_ref 8 "superkernels.cpp" 522 6 + 8768 "11100001" // NOPA; NOPB; NOPS; MOVX r16, #1; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8769 "00000000" // /* MW 15 */ + 8770 "00000000" // /* MW 14 */ + 8771 "01111000" // /* MW 13 */ + 8772 "10100101" // /* MW 12 */ + 8773 "00000001" // /* MW 11 */ + 8774 "00101000" // /* MW 10 */ + 8775 "00000000" // /* MW 9 */ + 8776 "00000001" // /* MW 8 */ + 8777 "01011011" // /* MW 7 */ + 8778 "00000001" // /* MW 6 */ + 8779 "00100000" // /* MW 5 */ + 8780 "00000000" // /* MW 4 */ + 8781 "11110000" // /* MW 3 */ + 8782 "00101100" // /* MW 2 */ + 8783 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1248 +.src_ref 8 "superkernels.cpp" 516 47 +.src_ref 1 "io_buffer_main.h" 125 25 + 8784 "10111010" // LDA p7, [sp, #-32]; MOVXM p6, #509132 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8785 "00010000" // /* MW 9 */ + 8786 "01100110" // /* MW 8 */ + 8787 "00110010" // /* MW 7 */ + 8788 "11110011" // /* MW 6 */ + 8789 "00000001" // /* MW 5 */ + 8790 "00000000" // /* MW 4 */ + 8791 "00100000" // /* MW 3 */ + 8792 "01110011" // /* MW 2 */ + 8793 "11111100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 516 47 first +.src_ref 8 "superkernels.cpp" 522 6 + 8794 "10111010" // LDA r21, [p6]; MOVXM p2, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8795 "00010000" // /* MW 9 */ + 8796 "01101000" // /* MW 8 */ + 8797 "00110010" // /* MW 7 */ + 8798 "11110001" // /* MW 6 */ + 8799 "00000001" // /* MW 5 */ + 8800 "00000000" // /* MW 4 */ + 8801 "11010000" // /* MW 3 */ + 8802 "11010110" // /* MW 2 */ + 8803 "11000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 514 2 +.src_ref 8 "superkernels.cpp" 522 6 first + 8804 "10111010" // LDA r17, [p2]; MOVXM p6, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8805 "00010000" // /* MW 9 */ + 8806 "01100000" // /* MW 8 */ + 8807 "00110010" // /* MW 7 */ + 8808 "11110011" // /* MW 6 */ + 8809 "00000001" // /* MW 5 */ + 8810 "00000000" // /* MW 4 */ + 8811 "11010000" // /* MW 3 */ + 8812 "11000110" // /* MW 2 */ + 8813 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 514 2 first + 8814 "10011000" // LDA r20, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8815 "10010110" // /* MW 3 */ + 8816 "00000110" // /* MW 2 */ + 8817 "00000110" // /* MW 1 */ + 8818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8819 "00000000" // /* MW 1 */ + 8820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8821 "00000000" // /* MW 1 */ + 8822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8823 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 8824 "10011000" // LDA r19, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8825 "01110110" // /* MW 3 */ + 8826 "00000110" // /* MW 2 */ + 8827 "00000111" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 516 45 first + 8828 "10011000" // LSHL r21, r21, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8829 "00001101" // /* MW 3 */ + 8830 "01101011" // /* MW 2 */ + 8831 "00010101" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 522 6 first + 8832 "10011000" // EQ r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8833 "00000111" // /* MW 3 */ + 8834 "01100001" // /* MW 2 */ + 8835 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 522 6 + 8836 "10000100" // JNZ r16, #9232 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9232 delay_slots=5 */ + 8837 "00000001" // /* MW 5 */ + 8838 "01000000" // /* MW 4 */ + 8839 "00001000" // /* MW 3 */ + 8840 "00010010" // /* MW 2 */ + 8841 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 514 2 first +.delay_slot + 8842 "00011000" // ADD r20, r20, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8843 "00000111" // /* MW 3 */ + 8844 "00101000" // /* MW 2 */ + 8845 "00010101" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 514 2 +.delay_slot + 8846 "10011000" // ST r20, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8847 "10010001" // /* MW 3 */ + 8848 "00000110" // /* MW 2 */ + 8849 "00001110" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8851 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 516 45 first +.delay_slot + 8852 "01011000" // ADD.NC p0, r19, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8853 "11010101" // /* MW 3 */ + 8854 "01101001" // /* MW 2 */ + 8855 "00011000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 516 12 +.src_ref 8 "superkernels.cpp" 522 6 +.delay_slot + 8856 "01011100" // ST p0, [sp, #-68]; MOVX r18, #4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8857 "00100010" // /* MW 5 */ + 8858 "01001000" // /* MW 4 */ + 8859 "10110000" // /* MW 3 */ + 8860 "10000011" // /* MW 2 */ + 8861 "11110111" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 522 6 first + 8862 "10011000" // EQ r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8863 "00100111" // /* MW 3 */ + 8864 "01100001" // /* MW 2 */ + 8865 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 522 6 + 8866 "10000100" // JNZ r16, #9088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9088 delay_slots=5 */ + 8867 "00000001" // /* MW 5 */ + 8868 "01000000" // /* MW 4 */ + 8869 "11000000" // /* MW 3 */ + 8870 "00010001" // /* MW 2 */ + 8871 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8873 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8874 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8875 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8877 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8879 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8881 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 522 6 + 8882 "10011000" // NE r16, r17, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8883 "11101000" // /* MW 3 */ + 8884 "01100000" // /* MW 2 */ + 8885 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 522 6 + 8886 "10000100" // JNZ r16, #9040 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9040 delay_slots=5 */ + 8887 "00000001" // /* MW 5 */ + 8888 "01000000" // /* MW 4 */ + 8889 "10101000" // /* MW 3 */ + 8890 "00010001" // /* MW 2 */ + 8891 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 523 26 +.delay_slot + 8892 "01000100" // MOVXM p6, #509152 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8893 "11000000" // /* MW 5 */ + 8894 "11001001" // /* MW 4 */ + 8895 "11001100" // /* MW 3 */ + 8896 "00000111" // /* MW 2 */ + 8897 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8899 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8901 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8903 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8905 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 523 26 first +.src_ref 8 "superkernels.cpp" 523 61 + 8906 "10111010" // LDA r18, [p6]; MOVXM p6, #509000 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8907 "00010000" // /* MW 9 */ + 8908 "00100100" // /* MW 8 */ + 8909 "00110010" // /* MW 7 */ + 8910 "11110011" // /* MW 6 */ + 8911 "00000001" // /* MW 5 */ + 8912 "00000000" // /* MW 4 */ + 8913 "11010000" // /* MW 3 */ + 8914 "11001010" // /* MW 2 */ + 8915 "11000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 523 61 +.src_ref 8 "superkernels.cpp" 524 44 + 8916 "10111010" // LDA r16, [p6]; MOVXM p6, #509140 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8917 "00010000" // /* MW 9 */ + 8918 "01101010" // /* MW 8 */ + 8919 "00110010" // /* MW 7 */ + 8920 "11110011" // /* MW 6 */ + 8921 "00000001" // /* MW 5 */ + 8922 "00000000" // /* MW 4 */ + 8923 "11010000" // /* MW 3 */ + 8924 "11000010" // /* MW 2 */ + 8925 "11000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 524 30 +.src_ref 8 "superkernels.cpp" 524 44 first + 8926 "00101100" // LDA r17, [p6]; MOVX r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8927 "00000010" // /* MW 5 */ + 8928 "01100000" // /* MW 4 */ + 8929 "11010000" // /* MW 3 */ + 8930 "11000110" // /* MW 2 */ + 8931 "11000000" // /* MW 1 */ + 8932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8933 "00000000" // /* MW 1 */ + 8934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8935 "00000000" // /* MW 1 */ + 8936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8937 "00000000" // /* MW 1 */ + 8938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8939 "00000000" // /* MW 1 */ + 8940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8941 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 523 37 first + 8942 "10011000" // MUL r18, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8943 "00001111" // /* MW 3 */ + 8944 "10100101" // /* MW 2 */ + 8945 "00010100" // /* MW 1 */ + 8946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8947 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 524 30 first +.src_ref 8 "superkernels.cpp" 524 30 first + 8948 "10100100" // SUB r19, r17, r18; ADD.NC r20, r18, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8949 "10000010" // /* MW 5 */ + 8950 "00110010" // /* MW 4 */ + 8951 "00111010" // /* MW 3 */ + 8952 "11100100" // /* MW 2 */ + 8953 "10001100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 524 30 + 8954 "10011000" // LTU r27, r20, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8955 "00011100" // /* MW 3 */ + 8956 "00110111" // /* MW 2 */ + 8957 "00010101" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 524 30 + 8958 "00011000" // SEL.EQZ r19, r19, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8959 "00000010" // /* MW 3 */ + 8960 "11100111" // /* MW 2 */ + 8961 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 524 42 + 8962 "10011000" // LTU r27, r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8963 "00011100" // /* MW 3 */ + 8964 "10110111" // /* MW 2 */ + 8965 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 524 30 + 8966 "00011000" // SEL.EQZ r17, r24, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8967 "00110010" // /* MW 3 */ + 8968 "00100011" // /* MW 2 */ + 8969 "00010110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 525 65 first + 8970 "10011000" // SUB r18, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8971 "00010001" // /* MW 3 */ + 8972 "00100101" // /* MW 2 */ + 8973 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 526 36 first + 8974 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8975 "00001000" // /* MW 3 */ + 8976 "01100001" // /* MW 2 */ + 8977 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 547 6 first + 8978 "10000100" // JNZ r16, #9344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9344 delay_slots=5 */ + 8979 "00000001" // /* MW 5 */ + 8980 "01000000" // /* MW 4 */ + 8981 "01000000" // /* MW 3 */ + 8982 "00010010" // /* MW 2 */ + 8983 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 525 32 +.delay_slot + 8984 "01000100" // MOVXM p6, #509200 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8985 "00100000" // /* MW 5 */ + 8986 "11001010" // /* MW 4 */ + 8987 "11001100" // /* MW 3 */ + 8988 "00000111" // /* MW 2 */ + 8989 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 525 32 first +.delay_slot + 8990 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8991 "01010001" // /* MW 3 */ + 8992 "00000110" // /* MW 2 */ + 8993 "00001110" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8995 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8997 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8999 "00000000" // /* MW 1 */ + 9000 "10000100" // J #9200 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9200 delay_slots=5 */ + 9001 "00000000" // /* MW 5 */ + 9002 "00000000" // /* MW 4 */ + 9003 "11111000" // /* MW 3 */ + 9004 "00010001" // /* MW 2 */ + 9005 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 7 +.src_ref 8 "superkernels.cpp" 558 19 +.delay_slot + 9006 "10111010" // MOVA r14, #2; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9007 "00010000" // /* MW 9 */ + 9008 "01101000" // /* MW 8 */ + 9009 "10110010" // /* MW 7 */ + 9010 "11110011" // /* MW 6 */ + 9011 "00000001" // /* MW 5 */ + 9012 "00000000" // /* MW 4 */ + 9013 "00000000" // /* MW 3 */ + 9014 "01001110" // /* MW 2 */ + 9015 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 552 2 +.src_ref 8 "superkernels.cpp" 554 19 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 +.delay_slot + 9016 "10111010" // MOVA r15, #1; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9017 "00010000" // /* MW 9 */ + 9018 "00100000" // /* MW 8 */ + 9019 "00110010" // /* MW 7 */ + 9020 "11110001" // /* MW 6 */ + 9021 "00000001" // /* MW 5 */ + 9022 "00000000" // /* MW 4 */ + 9023 "00000000" // /* MW 3 */ + 9024 "00101111" // /* MW 2 */ + 9025 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 43 +.src_ref 8 "superkernels.cpp" 555 15 +.src_ref 8 "superkernels.cpp" 558 43 +.src_ref 8 "superkernels.cpp" 559 15 +.src_ref 8 "superkernels.cpp" 562 44 +.src_ref 8 "superkernels.cpp" 563 16 +.src_ref 8 "superkernels.cpp" 567 14 +.delay_slot + 9026 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9027 "00000001" // /* MW 3 */ + 9028 "00011010" // /* MW 2 */ + 9029 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9031 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9032 "00100010" // NOPA; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9033 "00011100" // /* MW 7 */ + 9034 "00000000" // /* MW 6 */ + 9035 "00000000" // /* MW 5 */ + 9036 "00000100" // /* MW 4 */ + 9037 "11110000" // /* MW 3 */ + 9038 "00101100" // /* MW 2 */ + 9039 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1504 + 9040 "10000100" // J #9200 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9200 delay_slots=5 */ + 9041 "00000000" // /* MW 5 */ + 9042 "00000000" // /* MW 4 */ + 9043 "11111000" // /* MW 3 */ + 9044 "00010001" // /* MW 2 */ + 9045 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 7 +.src_ref 8 "superkernels.cpp" 558 19 +.delay_slot + 9046 "10111010" // MOVA r14, #2; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9047 "00010000" // /* MW 9 */ + 9048 "01101000" // /* MW 8 */ + 9049 "10110010" // /* MW 7 */ + 9050 "11110011" // /* MW 6 */ + 9051 "00000001" // /* MW 5 */ + 9052 "00000000" // /* MW 4 */ + 9053 "00000000" // /* MW 3 */ + 9054 "01001110" // /* MW 2 */ + 9055 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 552 2 +.src_ref 8 "superkernels.cpp" 554 19 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 +.delay_slot + 9056 "10111010" // MOVA r15, #1; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9057 "00010000" // /* MW 9 */ + 9058 "00100000" // /* MW 8 */ + 9059 "00110010" // /* MW 7 */ + 9060 "11110001" // /* MW 6 */ + 9061 "00000001" // /* MW 5 */ + 9062 "00000000" // /* MW 4 */ + 9063 "00000000" // /* MW 3 */ + 9064 "00101111" // /* MW 2 */ + 9065 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 43 +.src_ref 8 "superkernels.cpp" 555 15 +.src_ref 8 "superkernels.cpp" 558 43 +.src_ref 8 "superkernels.cpp" 559 15 +.src_ref 8 "superkernels.cpp" 562 44 +.src_ref 8 "superkernels.cpp" 563 16 +.src_ref 8 "superkernels.cpp" 567 14 +.delay_slot + 9066 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9067 "00000001" // /* MW 3 */ + 9068 "00011010" // /* MW 2 */ + 9069 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9071 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9072 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9073 "00000000" // /* MW 15 */ + 9074 "00000000" // /* MW 14 */ + 9075 "01111000" // /* MW 13 */ + 9076 "10100101" // /* MW 12 */ + 9077 "00000001" // /* MW 11 */ + 9078 "00000000" // /* MW 10 */ + 9079 "00000000" // /* MW 9 */ + 9080 "00000000" // /* MW 8 */ + 9081 "01011011" // /* MW 7 */ + 9082 "00000001" // /* MW 6 */ + 9083 "00100000" // /* MW 5 */ + 9084 "00000000" // /* MW 4 */ + 9085 "11110000" // /* MW 3 */ + 9086 "00101100" // /* MW 2 */ + 9087 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1552 +.src_ref 8 "superkernels.cpp" 532 27 +.src_ref 8 "superkernels.cpp" 533 31 +.src_ref 8 "superkernels.cpp" 554 43 +.src_ref 8 "superkernels.cpp" 555 15 +.src_ref 8 "superkernels.cpp" 558 43 +.src_ref 8 "superkernels.cpp" 559 15 +.src_ref 8 "superkernels.cpp" 562 44 +.src_ref 8 "superkernels.cpp" 563 16 +.src_ref 8 "superkernels.cpp" 567 14 + 9088 "10111010" // MOVA r13, #0; MOVXM p6, #509156 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9089 "00010000" // /* MW 9 */ + 9090 "01110010" // /* MW 8 */ + 9091 "00110010" // /* MW 7 */ + 9092 "11110011" // /* MW 6 */ + 9093 "00000001" // /* MW 5 */ + 9094 "00000000" // /* MW 4 */ + 9095 "00000000" // /* MW 3 */ + 9096 "00001101" // /* MW 2 */ + 9097 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 532 27 first +.src_ref 8 "superkernels.cpp" 532 63 +.src_ref 8 "superkernels.cpp" 552 2 + 9098 "10111010" // LDA r18, [p6]; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9099 "00010000" // /* MW 9 */ + 9100 "00100000" // /* MW 8 */ + 9101 "00110010" // /* MW 7 */ + 9102 "11110001" // /* MW 6 */ + 9103 "00000001" // /* MW 5 */ + 9104 "00000000" // /* MW 4 */ + 9105 "11010000" // /* MW 3 */ + 9106 "11001010" // /* MW 2 */ + 9107 "11000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 532 63 +.src_ref 8 "superkernels.cpp" 533 46 + 9108 "10111010" // LDA r16, [p2]; MOVXM p6, #509144 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9109 "00010000" // /* MW 9 */ + 9110 "01101100" // /* MW 8 */ + 9111 "00110010" // /* MW 7 */ + 9112 "11110011" // /* MW 6 */ + 9113 "00000001" // /* MW 5 */ + 9114 "00000000" // /* MW 4 */ + 9115 "11010000" // /* MW 3 */ + 9116 "11000010" // /* MW 2 */ + 9117 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 533 46 first +.src_ref 8 "superkernels.cpp" 554 19 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 + 9118 "00101100" // LDA r17, [p6]; MOVX r15, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9119 "00001010" // /* MW 5 */ + 9120 "00111100" // /* MW 4 */ + 9121 "11010000" // /* MW 3 */ + 9122 "11000110" // /* MW 2 */ + 9123 "11000000" // /* MW 1 */ + 9124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9125 "00000000" // /* MW 1 */ + 9126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9127 "00000000" // /* MW 1 */ + 9128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9129 "00000000" // /* MW 1 */ + 9130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9131 "00000000" // /* MW 1 */ + 9132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9133 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 532 39 first + 9134 "10011000" // MUL r18, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9135 "00001111" // /* MW 3 */ + 9136 "10100101" // /* MW 2 */ + 9137 "00010100" // /* MW 1 */ + 9138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9139 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 533 31 first +.src_ref 8 "superkernels.cpp" 533 31 first + 9140 "10100100" // SUB r19, r17, r18; ADD.NC r20, r18, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9141 "10000010" // /* MW 5 */ + 9142 "00110010" // /* MW 4 */ + 9143 "00111010" // /* MW 3 */ + 9144 "11100100" // /* MW 2 */ + 9145 "10001100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 533 31 + 9146 "10011000" // LTU r27, r20, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9147 "00011100" // /* MW 3 */ + 9148 "00110111" // /* MW 2 */ + 9149 "00010101" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 533 31 + 9150 "00011000" // SEL.EQZ r19, r19, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9151 "00000010" // /* MW 3 */ + 9152 "11100111" // /* MW 2 */ + 9153 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 533 44 + 9154 "10011000" // LTU r27, r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9155 "00011100" // /* MW 3 */ + 9156 "10110111" // /* MW 2 */ + 9157 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 533 31 + 9158 "00011000" // SEL.EQZ r17, r13, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9159 "00110010" // /* MW 3 */ + 9160 "01100011" // /* MW 2 */ + 9161 "00010011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 534 67 first + 9162 "10011000" // SUB r18, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9163 "00010001" // /* MW 3 */ + 9164 "00100101" // /* MW 2 */ + 9165 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 535 37 first + 9166 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9167 "00001000" // /* MW 3 */ + 9168 "01100001" // /* MW 2 */ + 9169 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 547 6 first + 9170 "10000100" // JNZ r16, #9344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9344 delay_slots=5 */ + 9171 "00000001" // /* MW 5 */ + 9172 "01000000" // /* MW 4 */ + 9173 "01000000" // /* MW 3 */ + 9174 "00010010" // /* MW 2 */ + 9175 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 534 33 +.delay_slot + 9176 "01000100" // MOVXM p6, #509208 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9177 "00110000" // /* MW 5 */ + 9178 "11001010" // /* MW 4 */ + 9179 "11001100" // /* MW 3 */ + 9180 "00000111" // /* MW 2 */ + 9181 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 534 33 first +.delay_slot + 9182 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9183 "01010001" // /* MW 3 */ + 9184 "00000110" // /* MW 2 */ + 9185 "00001110" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9187 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9189 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 7 +.delay_slot + 9190 "10111010" // NOPA; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9191 "00010000" // /* MW 9 */ + 9192 "01101000" // /* MW 8 */ + 9193 "10110010" // /* MW 7 */ + 9194 "11110011" // /* MW 6 */ + 9195 "00000001" // /* MW 5 */ + 9196 "00000000" // /* MW 4 */ + 9197 "11110000" // /* MW 3 */ + 9198 "00101100" // /* MW 2 */ + 9199 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1664 +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 1 "io_buffer_main.h" 324 51 + 9200 "00111010" // MOVS p6, r12; J #9408 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=9408 delay_slots=5 */ + 9201 "00100001" // /* MW 9 */ + 9202 "00000000" // /* MW 8 */ + 9203 "00000000" // /* MW 7 */ + 9204 "10011000" // /* MW 6 */ + 9205 "00000100" // /* MW 5 */ + 9206 "00000000" // /* MW 4 */ + 9207 "01100000" // /* MW 3 */ + 9208 "10000001" // /* MW 2 */ + 9209 "11010001" // /* MW 1 */ +.delay_slot + 9210 "00011000" // LDA r12, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9211 "10010001" // /* MW 3 */ + 9212 "11100101" // /* MW 2 */ + 9213 "00000111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9215 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9216 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9217 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9219 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9220 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9221 "10000001" // /* MW 11 */ + 9222 "10101101" // /* MW 10 */ + 9223 "00000000" // /* MW 9 */ + 9224 "00000000" // /* MW 8 */ + 9225 "00000000" // /* MW 7 */ + 9226 "00000000" // /* MW 6 */ + 9227 "00100000" // /* MW 5 */ + 9228 "00000000" // /* MW 4 */ + 9229 "11110000" // /* MW 3 */ + 9230 "00101100" // /* MW 2 */ + 9231 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_1696 +.src_ref 8 "superkernels.cpp" 541 26 + 9232 "01000100" // MOVXM p6, #509124 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9233 "10001000" // /* MW 5 */ + 9234 "11001001" // /* MW 4 */ + 9235 "11001100" // /* MW 3 */ + 9236 "00000111" // /* MW 2 */ + 9237 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 541 26 first +.src_ref 8 "superkernels.cpp" 541 61 + 9238 "10111010" // LDA r19, [p6]; MOVXM p6, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9239 "00010000" // /* MW 9 */ + 9240 "00100010" // /* MW 8 */ + 9241 "00110010" // /* MW 7 */ + 9242 "11110011" // /* MW 6 */ + 9243 "00000001" // /* MW 5 */ + 9244 "00000000" // /* MW 4 */ + 9245 "11010000" // /* MW 3 */ + 9246 "11001110" // /* MW 2 */ + 9247 "11000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 541 61 +.src_ref 8 "superkernels.cpp" 542 44 + 9248 "10111010" // LDA r16, [p6]; MOVXM p6, #509148 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9249 "00010000" // /* MW 9 */ + 9250 "01101110" // /* MW 8 */ + 9251 "00110010" // /* MW 7 */ + 9252 "11110011" // /* MW 6 */ + 9253 "00000001" // /* MW 5 */ + 9254 "00000000" // /* MW 4 */ + 9255 "11010000" // /* MW 3 */ + 9256 "11000010" // /* MW 2 */ + 9257 "11000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 542 44 first + 9258 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9259 "01010110" // /* MW 3 */ + 9260 "00000110" // /* MW 2 */ + 9261 "00000110" // /* MW 1 */ + 9262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9263 "00000000" // /* MW 1 */ + 9264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9265 "00000000" // /* MW 1 */ + 9266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9267 "00000000" // /* MW 1 */ + 9268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9269 "00000000" // /* MW 1 */ + 9270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9271 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 541 37 first + 9272 "10011000" // MUL r19, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9273 "00001111" // /* MW 3 */ + 9274 "11100111" // /* MW 2 */ + 9275 "00010100" // /* MW 1 */ + 9276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9277 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 542 30 first +.src_ref 8 "superkernels.cpp" 542 30 first + 9278 "10100100" // SUB r20, r18, r19; ADD.NC r21, r19, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9279 "10000010" // /* MW 5 */ + 9280 "10110011" // /* MW 4 */ + 9281 "00111010" // /* MW 3 */ + 9282 "00100110" // /* MW 2 */ + 9283 "10010101" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 542 30 + 9284 "10011000" // LTU r27, r21, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9285 "00101100" // /* MW 3 */ + 9286 "01110111" // /* MW 2 */ + 9287 "00010101" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 542 30 + 9288 "00011000" // SEL.EQZ r20, r20, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9289 "00000010" // /* MW 3 */ + 9290 "00101001" // /* MW 2 */ + 9291 "00010101" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 542 30 +.src_ref 8 "superkernels.cpp" 542 42 + 9292 "01100100" // LTU r27, r19, r18; MOV r17, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9293 "00000001" // /* MW 5 */ + 9294 "10100000" // /* MW 4 */ + 9295 "10011000" // /* MW 3 */ + 9296 "11100101" // /* MW 2 */ + 9297 "10011110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 542 30 + 9298 "00011000" // SEL.EQZ r17, r17, r20, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9299 "01000010" // /* MW 3 */ + 9300 "01100011" // /* MW 2 */ + 9301 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 543 69 first + 9302 "10011000" // SUB r18, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9303 "00010001" // /* MW 3 */ + 9304 "00100101" // /* MW 2 */ + 9305 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 544 38 first + 9306 "10011000" // EQ r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9307 "00000111" // /* MW 3 */ + 9308 "01100001" // /* MW 2 */ + 9309 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 547 6 first + 9310 "10000100" // JNZ r16, #10176 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10176 delay_slots=5 */ + 9311 "00000001" // /* MW 5 */ + 9312 "01000000" // /* MW 4 */ + 9313 "11100000" // /* MW 3 */ + 9314 "00010011" // /* MW 2 */ + 9315 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 543 34 +.delay_slot + 9316 "01000100" // MOVXM p6, #509216 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9317 "01000000" // /* MW 5 */ + 9318 "11001010" // /* MW 4 */ + 9319 "11001100" // /* MW 3 */ + 9320 "00000111" // /* MW 2 */ + 9321 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 543 34 first +.delay_slot + 9322 "10011000" // ST r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9323 "01010001" // /* MW 3 */ + 9324 "00000110" // /* MW 2 */ + 9325 "00001110" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9327 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9329 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 9330 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 9331 "00011100" // /* MW 13 */ + 9332 "00000000" // /* MW 12 */ + 9333 "00000000" // /* MW 11 */ + 9334 "01010111" // /* MW 10 */ + 9335 "00011010" // /* MW 9 */ + 9336 "01000000" // /* MW 8 */ + 9337 "00000000" // /* MW 7 */ + 9338 "00000000" // /* MW 6 */ + 9339 "10110110" // /* MW 5 */ + 9340 "00000010" // /* MW 4 */ + 9341 "11110000" // /* MW 3 */ + 9342 "00101100" // /* MW 2 */ + 9343 "00000000" // /* MW 1 */ +.label __ll65__Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 8 "superkernels.cpp" 554 19 +.src_ref 8 "superkernels.cpp" 558 19 +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 1 "io_buffer_main.h" 324 51 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 +.aggressive_scheduled_block_id 7 +.noswbrkpt + 9344 "01110110" // LDA p0, [sp, #-68]; MOVS p6, r12; MOVX r14, #2; MOV r15, #1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9345 "01011000" // /* MW 11 */ + 9346 "00000001" // /* MW 10 */ + 9347 "11101000" // /* MW 9 */ + 9348 "01001001" // /* MW 8 */ + 9349 "11100000" // /* MW 7 */ + 9350 "00000000" // /* MW 6 */ + 9351 "00001011" // /* MW 5 */ + 9352 "10001100" // /* MW 4 */ + 9353 "00100110" // /* MW 3 */ + 9354 "10000011" // /* MW 2 */ + 9355 "11110111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 9356 "00011000" // LDA p1, [sp, #-68] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9357 "10011001" // /* MW 3 */ + 9358 "10111100" // /* MW 2 */ + 9359 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 9360 "00011000" // LDA r12, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9361 "10010001" // /* MW 3 */ + 9362 "11100101" // /* MW 2 */ + 9363 "00000111" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 287 11 first +.aggressive_scheduled_block_id 7 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 9364 "00000100" // JL #4176 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4176 delay_slots=5 */ + 9365 "00000001" // /* MW 5 */ + 9366 "00000000" // /* MW 4 */ + 9367 "00101000" // /* MW 3 */ + 9368 "00001000" // /* MW 2 */ + 9369 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 552 2 +.delay_slot +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9370 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9371 "11000000" // /* MW 3 */ + 9372 "01100000" // /* MW 2 */ + 9373 "00011111" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 43 +.src_ref 8 "superkernels.cpp" 555 15 +.src_ref 8 "superkernels.cpp" 558 43 +.src_ref 8 "superkernels.cpp" 559 15 +.src_ref 8 "superkernels.cpp" 562 44 +.src_ref 8 "superkernels.cpp" 563 16 +.src_ref 8 "superkernels.cpp" 567 14 +.delay_slot + 9374 "00011000" // MOVX r13, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9375 "00000001" // /* MW 3 */ + 9376 "00011010" // /* MW 2 */ + 9377 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9379 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9381 "00000000" // /* MW 1 */ +.src_ref 3 "pad_3d.h" 287 11 +.delay_slot + 9382 "10111010" // NOPA; MOVXM p2, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9383 "00010000" // /* MW 9 */ + 9384 "10000000" // /* MW 8 */ + 9385 "00110010" // /* MW 7 */ + 9386 "11110001" // /* MW 6 */ + 9387 "00000001" // /* MW 5 */ + 9388 "00000000" // /* MW 4 */ + 9389 "11110000" // /* MW 3 */ + 9390 "00101100" // /* MW 2 */ + 9391 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 552 2 +.src_ref 8 "superkernels.cpp" 552 2 +.return_address + 9392 "00111010" // MOVS p0, p7; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9393 "00010001" // /* MW 9 */ + 9394 "00100000" // /* MW 8 */ + 9395 "00110010" // /* MW 7 */ + 9396 "11110001" // /* MW 6 */ + 9397 "00000001" // /* MW 5 */ + 9398 "00000000" // /* MW 4 */ + 9399 "01100000" // /* MW 3 */ + 9400 "10010001" // /* MW 2 */ + 9401 "00010011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 7 + 9402 "01000100" // MOVXM p7, #509136 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9403 "10100000" // /* MW 5 */ + 9404 "11001001" // /* MW 4 */ + 9405 "11001110" // /* MW 3 */ + 9406 "00000111" // /* MW 2 */ + 9407 "00000000" // /* MW 1 */ +.label __ll95__Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 1 "io_buffer_main.h" 324 51 first + 9408 "10011000" // LDA p1, [p6], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9409 "10011110" // /* MW 3 */ + 9410 "01011100" // /* MW 2 */ + 9411 "00000110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 552 2 first +.no_stack_arguments + 9412 "00000100" // JL #4848 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4848 delay_slots=5 */ + 9413 "00000001" // /* MW 5 */ + 9414 "00000000" // /* MW 4 */ + 9415 "01111000" // /* MW 3 */ + 9416 "00001001" // /* MW 2 */ + 9417 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9419 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9421 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9423 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9425 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9426 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 9427 "00011100" // /* MW 13 */ + 9428 "00000000" // /* MW 12 */ + 9429 "00000000" // /* MW 11 */ + 9430 "01010111" // /* MW 10 */ + 9431 "00011010" // /* MW 9 */ + 9432 "01000000" // /* MW 8 */ + 9433 "00000000" // /* MW 7 */ + 9434 "00000000" // /* MW 6 */ + 9435 "10110110" // /* MW 5 */ + 9436 "00000010" // /* MW 4 */ + 9437 "11110000" // /* MW 3 */ + 9438 "00101100" // /* MW 2 */ + 9439 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 7 first +.return_address + 9440 "10011000" // LDA r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9441 "00010110" // /* MW 3 */ + 9442 "00000110" // /* MW 2 */ + 9443 "00000111" // /* MW 1 */ + 9444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9445 "00000000" // /* MW 1 */ + 9446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9447 "00000000" // /* MW 1 */ + 9448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9449 "00000000" // /* MW 1 */ + 9450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9451 "00000000" // /* MW 1 */ + 9452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9453 "00000000" // /* MW 1 */ + 9454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9455 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 19 + 9456 "10011000" // NE r17, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9457 "00001000" // /* MW 3 */ + 9458 "11100011" // /* MW 2 */ + 9459 "00010011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 25 + 9460 "10000100" // JNZ r17, #9664 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9664 delay_slots=5 */ + 9461 "00000001" // /* MW 5 */ + 9462 "01000000" // /* MW 4 */ + 9463 "11100000" // /* MW 3 */ + 9464 "00010010" // /* MW 2 */ + 9465 "10001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9467 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9469 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9471 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9473 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9475 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 29 +.src_ref 8 "superkernels.cpp" 555 15 + 9476 "01000100" // MOVXM p7, #509124 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9477 "10001000" // /* MW 5 */ + 9478 "11001001" // /* MW 4 */ + 9479 "11001110" // /* MW 3 */ + 9480 "00000111" // /* MW 2 */ + 9481 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 29 +.src_ref 8 "superkernels.cpp" 554 67 + 9482 "10111010" // LDA r16, [p7]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9483 "00010000" // /* MW 9 */ + 9484 "00110000" // /* MW 8 */ + 9485 "00110010" // /* MW 7 */ + 9486 "11110001" // /* MW 6 */ + 9487 "00000001" // /* MW 5 */ + 9488 "00000000" // /* MW 4 */ + 9489 "11010000" // /* MW 3 */ + 9490 "11000010" // /* MW 2 */ + 9491 "11100000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 67 + 9492 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9493 "00111010" // /* MW 3 */ + 9494 "00000100" // /* MW 2 */ + 9495 "00000010" // /* MW 1 */ + 9496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9497 "00000000" // /* MW 1 */ + 9498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9499 "00000000" // /* MW 1 */ + 9500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9501 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 43 +.no_stack_arguments + 9502 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */ + 9503 "00000001" // /* MW 5 */ + 9504 "00000000" // /* MW 4 */ + 9505 "11111000" // /* MW 3 */ + 9506 "00010011" // /* MW 2 */ + 9507 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9509 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 29 +.delay_slot + 9510 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9511 "00000111" // /* MW 3 */ + 9512 "00100000" // /* MW 2 */ + 9513 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 29 +.src_ref 8 "superkernels.cpp" 554 43 +.delay_slot + 9514 "01011100" // ST r16, [p7]; LT r27, r16, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9515 "10110101" // /* MW 5 */ + 9516 "01101101" // /* MW 4 */ + 9517 "00111000" // /* MW 3 */ + 9518 "11000010" // /* MW 2 */ + 9519 "11100000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 43 +.src_ref 8 "superkernels.cpp" 554 43 +.delay_slot + 9520 "11100100" // SUB r17, r13, r16; MOV r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9521 "01000001" // /* MW 5 */ + 9522 "10111011" // /* MW 4 */ + 9523 "00110111" // /* MW 3 */ + 9524 "01100000" // /* MW 2 */ + 9525 "01101100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 43 +.delay_slot + 9526 "01111010" // NOPA; NOPS; SEL.EQZ r0, r16, r17, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9527 "00010010" // /* MW 9 */ + 9528 "00000001" // /* MW 8 */ + 9529 "00000100" // /* MW 7 */ + 9530 "00000000" // /* MW 6 */ + 9531 "01011011" // /* MW 5 */ + 9532 "00000001" // /* MW 4 */ + 9533 "11110000" // /* MW 3 */ + 9534 "00101100" // /* MW 2 */ + 9535 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 43 +.src_ref 8 "superkernels.cpp" 554 43 +.src_ref 1 "io_buffer_main.h" 324 51 +.return_address + 9536 "10111010" // LDA p2, [sp, #-36]; SUB r16, r13, r3; MOV r27, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9537 "01111000" // /* MW 9 */ + 9538 "11010000" // /* MW 8 */ + 9539 "01101011" // /* MW 7 */ + 9540 "10001111" // /* MW 6 */ + 9541 "00000001" // /* MW 5 */ + 9542 "00011011" // /* MW 4 */ + 9543 "00100000" // /* MW 3 */ + 9544 "10100011" // /* MW 2 */ + 9545 "11111011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 43 + 9546 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9547 "00000010" // /* MW 3 */ + 9548 "11100001" // /* MW 2 */ + 9549 "00010000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 6 +.src_ref 8 "superkernels.cpp" 554 78 + 9550 "10000100" // JNZ r16, #9632 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9632 delay_slots=5 */ + 9551 "00000001" // /* MW 5 */ + 9552 "01000000" // /* MW 4 */ + 9553 "11010000" // /* MW 3 */ + 9554 "00010010" // /* MW 2 */ + 9555 "10000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 +.delay_slot + 9556 "00011000" // MOVX r15, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9557 "00000101" // /* MW 3 */ + 9558 "00011110" // /* MW 2 */ + 9559 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9561 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9563 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9565 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9567 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 555 15 first +.src_ref 1 "io_buffer_main.h" 324 51 first + 9568 "00001100" // LDA r16, [p2, #20]; ST r13, [p7] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9569 "01100011" // /* MW 5 */ + 9570 "00001011" // /* MW 4 */ + 9571 "11011110" // /* MW 3 */ + 9572 "11000010" // /* MW 2 */ + 9573 "01001010" // /* MW 1 */ + 9574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9575 "00000000" // /* MW 1 */ + 9576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9577 "00000000" // /* MW 1 */ + 9578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9579 "00000000" // /* MW 1 */ + 9580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9581 "00000000" // /* MW 1 */ + 9582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9583 "00000000" // /* MW 1 */ + 9584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9585 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 9586 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9587 "11111000" // /* MW 3 */ + 9588 "00010000" // /* MW 2 */ + 9589 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 7 +.src_ref 1 "io_buffer_main.h" 327 40 first + 9590 "10111010" // LDA r16, [p6, #-8]; MOVXM p7, #509136 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9591 "00010000" // /* MW 9 */ + 9592 "01101000" // /* MW 8 */ + 9593 "10110010" // /* MW 7 */ + 9594 "11110011" // /* MW 6 */ + 9595 "00000001" // /* MW 5 */ + 9596 "00000000" // /* MW 4 */ + 9597 "11010000" // /* MW 3 */ + 9598 "11000010" // /* MW 2 */ + 9599 "11011100" // /* MW 1 */ + 9600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9601 "00000000" // /* MW 1 */ + 9602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9603 "00000000" // /* MW 1 */ + 9604 "10000100" // J #9648 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9648 delay_slots=5 */ + 9605 "00000000" // /* MW 5 */ + 9606 "00000000" // /* MW 4 */ + 9607 "11011000" // /* MW 3 */ + 9608 "00010010" // /* MW 2 */ + 9609 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9611 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9613 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9615 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 +.delay_slot + 9616 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9617 "00000001" // /* MW 3 */ + 9618 "11100001" // /* MW 2 */ + 9619 "00010011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 +.delay_slot + 9620 "00110110" // NOPA; NOPB; ST r16, [p6, #-8]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9621 "11000001" // /* MW 11 */ + 9622 "00001000" // /* MW 10 */ + 9623 "01110011" // /* MW 9 */ + 9624 "00000011" // /* MW 8 */ + 9625 "00000000" // /* MW 7 */ + 9626 "00000000" // /* MW 6 */ + 9627 "00100000" // /* MW 5 */ + 9628 "00000000" // /* MW 4 */ + 9629 "11110000" // /* MW 3 */ + 9630 "00101100" // /* MW 2 */ + 9631 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2096 +.src_ref 8 "superkernels.cpp" 558 7 + 9632 "11100001" // NOPA; NOPB; NOPS; MOVXM p7, #509136; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9633 "00000000" // /* MW 15 */ + 9634 "00000000" // /* MW 14 */ + 9635 "00010000" // /* MW 13 */ + 9636 "01101000" // /* MW 12 */ + 9637 "10110010" // /* MW 11 */ + 9638 "11110011" // /* MW 10 */ + 9639 "00000001" // /* MW 9 */ + 9640 "00000000" // /* MW 8 */ + 9641 "01011011" // /* MW 7 */ + 9642 "00000001" // /* MW 6 */ + 9643 "00100000" // /* MW 5 */ + 9644 "00000000" // /* MW 4 */ + 9645 "11110000" // /* MW 3 */ + 9646 "00101100" // /* MW 2 */ + 9647 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2112 +.src_ref 8 "superkernels.cpp" 558 7 first + 9648 "11100001" // LDA r16, [p7]; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9649 "00000000" // /* MW 15 */ + 9650 "00000000" // /* MW 14 */ + 9651 "01111000" // /* MW 13 */ + 9652 "10100101" // /* MW 12 */ + 9653 "00000001" // /* MW 11 */ + 9654 "00000000" // /* MW 10 */ + 9655 "00000000" // /* MW 9 */ + 9656 "00000000" // /* MW 8 */ + 9657 "01011011" // /* MW 7 */ + 9658 "00000001" // /* MW 6 */ + 9659 "00100000" // /* MW 5 */ + 9660 "00000000" // /* MW 4 */ + 9661 "11010000" // /* MW 3 */ + 9662 "11000010" // /* MW 2 */ + 9663 "11100000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2128 +.src_ref 8 "superkernels.cpp" 558 43 + 9664 "00011000" // MOVX r17, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9665 "00000001" // /* MW 3 */ + 9666 "00100010" // /* MW 2 */ + 9667 "00010000" // /* MW 1 */ + 9668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9669 "00000000" // /* MW 1 */ + 9670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9671 "00000000" // /* MW 1 */ + 9672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9673 "00000000" // /* MW 1 */ + 9674 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9675 "00000000" // /* MW 1 */ + 9676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9677 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 19 + 9678 "10011000" // NE r16, r14, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9679 "00001000" // /* MW 3 */ + 9680 "10100001" // /* MW 2 */ + 9681 "00010011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 25 + 9682 "10000100" // JNZ r16, #9872 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9872 delay_slots=5 */ + 9683 "00000001" // /* MW 5 */ + 9684 "01000000" // /* MW 4 */ + 9685 "01001000" // /* MW 3 */ + 9686 "00010011" // /* MW 2 */ + 9687 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 29 +.src_ref 8 "superkernels.cpp" 559 15 +.delay_slot + 9688 "01000100" // MOVXM p7, #509152 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9689 "11000000" // /* MW 5 */ + 9690 "11001001" // /* MW 4 */ + 9691 "11001110" // /* MW 3 */ + 9692 "00000111" // /* MW 2 */ + 9693 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 67 +.delay_slot + 9694 "01000100" // MOVXM p2, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9695 "11000000" // /* MW 5 */ + 9696 "11001000" // /* MW 4 */ + 9697 "11000100" // /* MW 3 */ + 9698 "00000111" // /* MW 2 */ + 9699 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9701 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9703 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9704 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9705 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 29 + 9706 "10011000" // LDA r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9707 "00010110" // /* MW 3 */ + 9708 "00000110" // /* MW 2 */ + 9709 "00000111" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 67 + 9710 "10011000" // LDA.u16 r1, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9711 "00111010" // /* MW 3 */ + 9712 "00000100" // /* MW 2 */ + 9713 "00000010" // /* MW 1 */ + 9714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9715 "00000000" // /* MW 1 */ + 9716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9717 "00000000" // /* MW 1 */ + 9718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9719 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 43 +.no_stack_arguments + 9720 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */ + 9721 "00000001" // /* MW 5 */ + 9722 "00000000" // /* MW 4 */ + 9723 "11111000" // /* MW 3 */ + 9724 "00010011" // /* MW 2 */ + 9725 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9727 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 29 +.delay_slot + 9728 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9729 "00000111" // /* MW 3 */ + 9730 "00100000" // /* MW 2 */ + 9731 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 29 +.src_ref 8 "superkernels.cpp" 558 43 +.delay_slot + 9732 "01011100" // ST r16, [p7]; LT r27, r16, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9733 "00110101" // /* MW 5 */ + 9734 "01101110" // /* MW 4 */ + 9735 "00111000" // /* MW 3 */ + 9736 "11000010" // /* MW 2 */ + 9737 "11100000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 43 +.src_ref 8 "superkernels.cpp" 558 43 +.delay_slot + 9738 "11100100" // SUB r17, r17, r16; MOV r14, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9739 "01000001" // /* MW 5 */ + 9740 "00111011" // /* MW 4 */ + 9741 "00110111" // /* MW 3 */ + 9742 "01100000" // /* MW 2 */ + 9743 "10001100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 43 +.delay_slot + 9744 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r0, r16, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9745 "00000000" // /* MW 15 */ + 9746 "00000000" // /* MW 14 */ + 9747 "01111000" // /* MW 13 */ + 9748 "10100101" // /* MW 12 */ + 9749 "00000001" // /* MW 11 */ + 9750 "10010000" // /* MW 10 */ + 9751 "00001000" // /* MW 9 */ + 9752 "00100000" // /* MW 8 */ + 9753 "01011011" // /* MW 7 */ + 9754 "00000001" // /* MW 6 */ + 9755 "00100000" // /* MW 5 */ + 9756 "00000000" // /* MW 4 */ + 9757 "11110000" // /* MW 3 */ + 9758 "00101100" // /* MW 2 */ + 9759 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 43 +.src_ref 8 "superkernels.cpp" 558 43 +.src_ref 1 "io_buffer_main.h" 324 51 +.src_ref 1 "io_buffer_main.h" 324 51 +.return_address + 9760 "10111010" // LDA p1, [sp, #-36]; SUB r16, r13, r3; MOV r27, r14 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9761 "01111000" // /* MW 9 */ + 9762 "10010000" // /* MW 8 */ + 9763 "01101011" // /* MW 7 */ + 9764 "10001111" // /* MW 6 */ + 9765 "00000001" // /* MW 5 */ + 9766 "00011011" // /* MW 4 */ + 9767 "00100000" // /* MW 3 */ + 9768 "10010011" // /* MW 2 */ + 9769 "11111011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 43 + 9770 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9771 "00000010" // /* MW 3 */ + 9772 "11100001" // /* MW 2 */ + 9773 "00010000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 558 6 +.src_ref 8 "superkernels.cpp" 558 78 + 9774 "10000100" // JNZ r16, #9840 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9840 delay_slots=5 */ + 9775 "00000001" // /* MW 5 */ + 9776 "01000000" // /* MW 4 */ + 9777 "00111000" // /* MW 3 */ + 9778 "00010011" // /* MW 2 */ + 9779 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 7 +.delay_slot + 9780 "01000100" // MOVXM p2, #509136 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9781 "10100000" // /* MW 5 */ + 9782 "11001001" // /* MW 4 */ + 9783 "11000100" // /* MW 3 */ + 9784 "00000111" // /* MW 2 */ + 9785 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9787 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9789 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9791 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9793 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 559 15 first +.src_ref 1 "io_buffer_main.h" 324 51 first + 9794 "00001100" // LDA r16, [p1, #20]; ST r13, [p7] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9795 "01100011" // /* MW 5 */ + 9796 "00001011" // /* MW 4 */ + 9797 "11011110" // /* MW 3 */ + 9798 "11000010" // /* MW 2 */ + 9799 "00101010" // /* MW 1 */ + 9800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9801 "00000000" // /* MW 1 */ + 9802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9803 "00000000" // /* MW 1 */ + 9804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9805 "00000000" // /* MW 1 */ + 9806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9807 "00000000" // /* MW 1 */ + 9808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9809 "00000000" // /* MW 1 */ + 9810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9811 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 9812 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9813 "11111000" // /* MW 3 */ + 9814 "00010000" // /* MW 2 */ + 9815 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 40 first + 9816 "10011000" // LDA r16, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9817 "00010110" // /* MW 3 */ + 9818 "11100110" // /* MW 2 */ + 9819 "00000110" // /* MW 1 */ + 9820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9821 "00000000" // /* MW 1 */ + 9822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9823 "00000000" // /* MW 1 */ + 9824 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9825 "00000000" // /* MW 1 */ + 9826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9827 "00000000" // /* MW 1 */ + 9828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9829 "00000000" // /* MW 1 */ + 9830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9831 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 + 9832 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9833 "00000001" // /* MW 3 */ + 9834 "11100001" // /* MW 2 */ + 9835 "00010011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 9836 "10011000" // ST r16, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9837 "00010001" // /* MW 3 */ + 9838 "11100110" // /* MW 2 */ + 9839 "00001110" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2304 + 9840 "10000100" // J #9888 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9888 delay_slots=5 */ + 9841 "00000000" // /* MW 5 */ + 9842 "00000000" // /* MW 4 */ + 9843 "01010000" // /* MW 3 */ + 9844 "00010011" // /* MW 2 */ + 9845 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 +.delay_slot + 9846 "11111000" // MOV p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9847 "11000000" // /* MW 3 */ + 9848 "01100010" // /* MW 2 */ + 9849 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9851 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9853 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9854 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9855 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9856 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9857 "00000000" // /* MW 15 */ + 9858 "00000000" // /* MW 14 */ + 9859 "01111000" // /* MW 13 */ + 9860 "10100101" // /* MW 12 */ + 9861 "00000001" // /* MW 11 */ + 9862 "00000000" // /* MW 10 */ + 9863 "00000000" // /* MW 9 */ + 9864 "00000000" // /* MW 8 */ + 9865 "01011011" // /* MW 7 */ + 9866 "00000001" // /* MW 6 */ + 9867 "00100000" // /* MW 5 */ + 9868 "00000000" // /* MW 4 */ + 9869 "11110000" // /* MW 3 */ + 9870 "00101100" // /* MW 2 */ + 9871 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2336 +.src_ref 8 "superkernels.cpp" 562 7 +.src_ref 1 "io_buffer_main.h" 324 51 + 9872 "11100001" // LDA p7, [sp, #-36]; NOPB; NOPS; MOVXM p2, #509136; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9873 "00000000" // /* MW 15 */ + 9874 "00000000" // /* MW 14 */ + 9875 "00010000" // /* MW 13 */ + 9876 "01101000" // /* MW 12 */ + 9877 "00110010" // /* MW 11 */ + 9878 "11110001" // /* MW 10 */ + 9879 "00000001" // /* MW 9 */ + 9880 "00000000" // /* MW 8 */ + 9881 "01011011" // /* MW 7 */ + 9882 "00000001" // /* MW 6 */ + 9883 "00100000" // /* MW 5 */ + 9884 "00000000" // /* MW 4 */ + 9885 "00100000" // /* MW 3 */ + 9886 "11110011" // /* MW 2 */ + 9887 "11111011" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2352 +.src_ref 8 "superkernels.cpp" 562 7 first +.src_ref 8 "superkernels.cpp" 562 19 + 9888 "00101100" // LDA r16, [p2]; MOVX r17, #4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9889 "00100010" // /* MW 5 */ + 9890 "01000100" // /* MW 4 */ + 9891 "11010000" // /* MW 3 */ + 9892 "11000010" // /* MW 2 */ + 9893 "01000000" // /* MW 1 */ + 9894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9895 "00000000" // /* MW 1 */ + 9896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9897 "00000000" // /* MW 1 */ + 9898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9899 "00000000" // /* MW 1 */ + 9900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9901 "00000000" // /* MW 1 */ + 9902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9903 "00000000" // /* MW 1 */ + 9904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9905 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 19 + 9906 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9907 "00001000" // /* MW 3 */ + 9908 "01100001" // /* MW 2 */ + 9909 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 25 + 9910 "10000100" // JNZ r16, #10064 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10064 delay_slots=5 */ + 9911 "00000001" // /* MW 5 */ + 9912 "01000000" // /* MW 4 */ + 9913 "10101000" // /* MW 3 */ + 9914 "00010011" // /* MW 2 */ + 9915 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 29 +.delay_slot + 9916 "01000100" // MOVXM p2, #509156 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9917 "11001000" // /* MW 5 */ + 9918 "11001001" // /* MW 4 */ + 9919 "11000100" // /* MW 3 */ + 9920 "00000111" // /* MW 2 */ + 9921 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9923 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9925 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9927 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9929 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 29 +.src_ref 8 "superkernels.cpp" 562 68 + 9930 "10111010" // LDA r16, [p2]; MOVXM p1, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9931 "00010000" // /* MW 9 */ + 9932 "00110000" // /* MW 8 */ + 9933 "10110010" // /* MW 7 */ + 9934 "11110000" // /* MW 6 */ + 9935 "00000001" // /* MW 5 */ + 9936 "00000000" // /* MW 4 */ + 9937 "11010000" // /* MW 3 */ + 9938 "11000010" // /* MW 2 */ + 9939 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 68 + 9940 "10011000" // LDA.u16 r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9941 "00111010" // /* MW 3 */ + 9942 "00000100" // /* MW 2 */ + 9943 "00000001" // /* MW 1 */ + 9944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9945 "00000000" // /* MW 1 */ + 9946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9947 "00000000" // /* MW 1 */ + 9948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9949 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 44 +.no_stack_arguments + 9950 "00000100" // JL #10224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10224 delay_slots=5 */ + 9951 "00000001" // /* MW 5 */ + 9952 "00000000" // /* MW 4 */ + 9953 "11111000" // /* MW 3 */ + 9954 "00010011" // /* MW 2 */ + 9955 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9957 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 29 +.delay_slot + 9958 "00011000" // ADD r16, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9959 "00000111" // /* MW 3 */ + 9960 "00100000" // /* MW 2 */ + 9961 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 29 +.src_ref 8 "superkernels.cpp" 562 44 +.delay_slot + 9962 "01011100" // ST r16, [p2]; LT r27, r16, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9963 "10110101" // /* MW 5 */ + 9964 "01101101" // /* MW 4 */ + 9965 "00111000" // /* MW 3 */ + 9966 "11000010" // /* MW 2 */ + 9967 "01000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 44 +.src_ref 8 "superkernels.cpp" 562 44 +.delay_slot + 9968 "11100100" // SUB r17, r13, r16; MOV r14, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9969 "01000001" // /* MW 5 */ + 9970 "00111011" // /* MW 4 */ + 9971 "00110111" // /* MW 3 */ + 9972 "01100000" // /* MW 2 */ + 9973 "01101100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 44 +.delay_slot + 9974 "01111010" // NOPA; NOPS; SEL.EQZ r0, r16, r17, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9975 "00010010" // /* MW 9 */ + 9976 "00000001" // /* MW 8 */ + 9977 "00000100" // /* MW 7 */ + 9978 "00000000" // /* MW 6 */ + 9979 "01011011" // /* MW 5 */ + 9980 "00000001" // /* MW 4 */ + 9981 "11110000" // /* MW 3 */ + 9982 "00101100" // /* MW 2 */ + 9983 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 44 +.src_ref 8 "superkernels.cpp" 562 44 +.return_address + 9984 "11100100" // SUB r16, r13, r3; MOV r27, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9985 "01000001" // /* MW 5 */ + 9986 "10101110" // /* MW 4 */ + 9987 "00111101" // /* MW 3 */ + 9988 "00000110" // /* MW 2 */ + 9989 "01101100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 44 + 9990 "00011000" // SEL.EQZ r16, r3, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9991 "00000010" // /* MW 3 */ + 9992 "11100001" // /* MW 2 */ + 9993 "00010000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 562 6 +.src_ref 8 "superkernels.cpp" 562 79 + 9994 "10000100" // JNZ r16, #10064 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10064 delay_slots=5 */ + 9995 "00000001" // /* MW 5 */ + 9996 "01000000" // /* MW 4 */ + 9997 "10101000" // /* MW 3 */ + 9998 "00010011" // /* MW 2 */ + 9999 "10000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 563 16 +.delay_slot + 10000 "01000100" // MOVXM p2, #509156 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10001 "11001000" // /* MW 5 */ + 10002 "11001001" // /* MW 4 */ + 10003 "11000100" // /* MW 3 */ + 10004 "00000111" // /* MW 2 */ + 10005 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10007 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10009 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10011 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10013 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 563 16 first +.src_ref 1 "io_buffer_main.h" 324 51 first + 10014 "00001100" // LDA r16, [p7, #20]; ST r13, [p2] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10015 "01100011" // /* MW 5 */ + 10016 "00001011" // /* MW 4 */ + 10017 "11010100" // /* MW 3 */ + 10018 "11000010" // /* MW 2 */ + 10019 "11101010" // /* MW 1 */ + 10020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10021 "00000000" // /* MW 1 */ + 10022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10023 "00000000" // /* MW 1 */ + 10024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10025 "00000000" // /* MW 1 */ + 10026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10027 "00000000" // /* MW 1 */ + 10028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10029 "00000000" // /* MW 1 */ + 10030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10031 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 10032 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10033 "11111000" // /* MW 3 */ + 10034 "00010000" // /* MW 2 */ + 10035 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 40 first + 10036 "10011000" // LDA r16, [p6, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10037 "00010110" // /* MW 3 */ + 10038 "11100110" // /* MW 2 */ + 10039 "00000110" // /* MW 1 */ + 10040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10041 "00000000" // /* MW 1 */ + 10042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10043 "00000000" // /* MW 1 */ + 10044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10045 "00000000" // /* MW 1 */ + 10046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10047 "00000000" // /* MW 1 */ + 10048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10049 "00000000" // /* MW 1 */ + 10050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10051 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 + 10052 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10053 "00000001" // /* MW 3 */ + 10054 "11100001" // /* MW 2 */ + 10055 "00010011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 10056 "00000010" // ST r16, [p6, #-8]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10057 "01110000" // /* MW 7 */ + 10058 "10100101" // /* MW 6 */ + 10059 "00000001" // /* MW 5 */ + 10060 "00000000" // /* MW 4 */ + 10061 "00110000" // /* MW 3 */ + 10062 "11000010" // /* MW 2 */ + 10063 "11011100" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2528 +.src_ref 8 "superkernels.cpp" 566 6 +.src_ref 8 "superkernels.cpp" 567 14 + 10064 "01000100" // MOVXM p6, #509120 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10065 "10000000" // /* MW 5 */ + 10066 "11001001" // /* MW 4 */ + 10067 "11001100" // /* MW 3 */ + 10068 "00000111" // /* MW 2 */ + 10069 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 566 6 first +.src_ref 8 "superkernels.cpp" 566 19 + 10070 "10111010" // LDA r16, [p6]; MOVXM p2, #509160 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10071 "00010000" // /* MW 9 */ + 10072 "01110100" // /* MW 8 */ + 10073 "00110010" // /* MW 7 */ + 10074 "11110001" // /* MW 6 */ + 10075 "00000001" // /* MW 5 */ + 10076 "00000000" // /* MW 4 */ + 10077 "11010000" // /* MW 3 */ + 10078 "11000010" // /* MW 2 */ + 10079 "11000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 566 19 + 10080 "10011000" // LDA r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10081 "00110110" // /* MW 3 */ + 10082 "00000110" // /* MW 2 */ + 10083 "00000010" // /* MW 1 */ + 10084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10085 "00000000" // /* MW 1 */ + 10086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10087 "00000000" // /* MW 1 */ + 10088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10089 "00000000" // /* MW 1 */ + 10090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10091 "00000000" // /* MW 1 */ + 10092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10093 "00000000" // /* MW 1 */ + 10094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10095 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 566 16 + 10096 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10097 "00001000" // /* MW 3 */ + 10098 "01100001" // /* MW 2 */ + 10099 "00010100" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 566 6 + 10100 "10000100" // JNZ r16, #10128 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10128 delay_slots=5 */ + 10101 "00000001" // /* MW 5 */ + 10102 "01000000" // /* MW 4 */ + 10103 "11001000" // /* MW 3 */ + 10104 "00010011" // /* MW 2 */ + 10105 "10000000" // /* MW 1 */ +.delay_slot + 10106 "00011000" // LDA p7, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10107 "10011001" // /* MW 3 */ + 10108 "11101111" // /* MW 2 */ + 10109 "00000111" // /* MW 1 */ +.delay_slot + 10110 "00011000" // LDA r15, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10111 "11110001" // /* MW 3 */ + 10112 "11110001" // /* MW 2 */ + 10113 "00000111" // /* MW 1 */ +.delay_slot + 10114 "00011000" // LDA r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10115 "11010001" // /* MW 3 */ + 10116 "11110101" // /* MW 2 */ + 10117 "00000111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10119 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10121 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 567 14 first + 10122 "00001100" // NOPA; ST r13, [p6] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10123 "01100011" // /* MW 5 */ + 10124 "00001011" // /* MW 4 */ + 10125 "11111100" // /* MW 3 */ + 10126 "00101100" // /* MW 2 */ + 10127 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2592 +.src_ref 8 "superkernels.cpp" 569 +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 10128 "11010100" // LDA r11, [sp, #-8]; MOV lr, r11 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10129 "01000001" // /* MW 5 */ + 10130 "11101011" // /* MW 4 */ + 10131 "00101110" // /* MW 3 */ + 10132 "00101110" // /* MW 2 */ + 10133 "11111111" // /* MW 1 */ +.aggressive_scheduled_block_id 8 +.noswbrkpt + 10134 "00011000" // LDA r12, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10135 "10010001" // /* MW 3 */ + 10136 "11111101" // /* MW 2 */ + 10137 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 10138 "00011000" // LDA r13, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10139 "10110001" // /* MW 3 */ + 10140 "11101001" // /* MW 2 */ + 10141 "00000111" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 569 first +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 10142 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10143 "00000000" // /* MW 3 */ + 10144 "00101000" // /* MW 2 */ + 10145 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10146 "11111000" // MOV p6, r12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10147 "00100000" // /* MW 3 */ + 10148 "01100110" // /* MW 2 */ + 10149 "00011110" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 569 +.delay_slot + 10150 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10151 "00000001" // /* MW 5 */ + 10152 "00000000" // /* MW 4 */ + 10153 "00000000" // /* MW 3 */ + 10154 "11110000" // /* MW 2 */ + 10155 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10157 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10159 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10160 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10161 "00000000" // /* MW 15 */ + 10162 "00000000" // /* MW 14 */ + 10163 "01111000" // /* MW 13 */ + 10164 "10100101" // /* MW 12 */ + 10165 "00000001" // /* MW 11 */ + 10166 "00000000" // /* MW 10 */ + 10167 "00000000" // /* MW 9 */ + 10168 "00000000" // /* MW 8 */ + 10169 "01011011" // /* MW 7 */ + 10170 "00000001" // /* MW 6 */ + 10171 "00100000" // /* MW 5 */ + 10172 "00000000" // /* MW 4 */ + 10173 "11110000" // /* MW 3 */ + 10174 "00101100" // /* MW 2 */ + 10175 "00000000" // /* MW 1 */ +.label TGT_F_Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_2640 +.src_ref 8 "superkernels.cpp" 554 43 +.src_ref 8 "superkernels.cpp" 555 15 +.src_ref 8 "superkernels.cpp" 558 43 +.src_ref 8 "superkernels.cpp" 559 15 +.src_ref 8 "superkernels.cpp" 562 44 +.src_ref 8 "superkernels.cpp" 563 16 +.src_ref 8 "superkernels.cpp" 567 14 +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 1 "io_buffer_main.h" 324 51 + 10176 "01110110" // MOVA r13, #0; MOVS p6, r12; J #9408 /* MW 12 */ /* control_operation: words=12 jump unconditional cycles_taken=1 direct absolute target_address=9408 delay_slots=5 */ + 10177 "00100000" // /* MW 11 */ + 10178 "00000000" // /* MW 10 */ + 10179 "00000000" // /* MW 9 */ + 10180 "10011000" // /* MW 8 */ + 10181 "00000100" // /* MW 7 */ + 10182 "00000000" // /* MW 6 */ + 10183 "00001011" // /* MW 5 */ + 10184 "10001100" // /* MW 4 */ + 10185 "00000110" // /* MW 3 */ + 10186 "00001101" // /* MW 2 */ + 10187 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 19 +.src_ref 8 "superkernels.cpp" 558 19 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 +.delay_slot + 10188 "01100100" // MOVX r15, #1; MOV r14, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10189 "00001001" // /* MW 5 */ + 10190 "00100000" // /* MW 4 */ + 10191 "10100111" // /* MW 3 */ + 10192 "11000000" // /* MW 2 */ + 10193 "00000011" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 552 2 +.delay_slot + 10194 "01000100" // MOVXM p2, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10195 "10000000" // /* MW 5 */ + 10196 "11001000" // /* MW 4 */ + 10197 "11000100" // /* MW 3 */ + 10198 "00000111" // /* MW 2 */ + 10199 "00000000" // /* MW 1 */ +.src_ref 8 "superkernels.cpp" 554 7 +.delay_slot + 10200 "01000100" // MOVXM p7, #509136 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10201 "10100000" // /* MW 5 */ + 10202 "11001001" // /* MW 4 */ + 10203 "11001110" // /* MW 3 */ + 10204 "00000111" // /* MW 2 */ + 10205 "00000000" // /* MW 1 */ +.delay_slot + 10206 "00011000" // LDA r12, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10207 "10010001" // /* MW 3 */ + 10208 "11100101" // /* MW 2 */ + 10209 "00000111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z26superkernel_reduce_mean_c8RN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 10211 "00000000" // /* MW 1 */ +.label _ZN12me_primitive10udiv_dstepEjjRjS0_ +.function udiv_dstep _ZN12me_primitive10udiv_dstepEjjRjS0_ +.src_ref 9 "me_div.c" 108 19 +.src_ref 9 "me_div.c" 108 19 +.src_ref 9 "me_div.c" 115 4 first +.function_start + 10224 "11100100" // MOVX r3, #0; MOV r31, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10225 "01000001" // /* MW 5 */ + 10226 "10100000" // /* MW 4 */ + 10227 "00101111" // /* MW 3 */ + 10228 "11000000" // /* MW 2 */ + 10229 "00000000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10230 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10231 "00011100" // /* MW 3 */ + 10232 "11000110" // /* MW 2 */ + 10233 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10234 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10235 "00011100" // /* MW 3 */ + 10236 "11000110" // /* MW 2 */ + 10237 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10238 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10239 "00011100" // /* MW 3 */ + 10240 "11000110" // /* MW 2 */ + 10241 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10242 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10243 "00011100" // /* MW 3 */ + 10244 "11000110" // /* MW 2 */ + 10245 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10246 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10247 "00011100" // /* MW 3 */ + 10248 "11000110" // /* MW 2 */ + 10249 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10250 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10251 "00011100" // /* MW 3 */ + 10252 "11000110" // /* MW 2 */ + 10253 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10254 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10255 "00011100" // /* MW 3 */ + 10256 "11000110" // /* MW 2 */ + 10257 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10258 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10259 "00011100" // /* MW 3 */ + 10260 "11000110" // /* MW 2 */ + 10261 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10262 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10263 "00011100" // /* MW 3 */ + 10264 "11000110" // /* MW 2 */ + 10265 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10266 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10267 "00011100" // /* MW 3 */ + 10268 "11000110" // /* MW 2 */ + 10269 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10270 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10271 "00011100" // /* MW 3 */ + 10272 "11000110" // /* MW 2 */ + 10273 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10274 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10275 "00011100" // /* MW 3 */ + 10276 "11000110" // /* MW 2 */ + 10277 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10278 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10279 "00011100" // /* MW 3 */ + 10280 "11000110" // /* MW 2 */ + 10281 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10282 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10283 "00011100" // /* MW 3 */ + 10284 "11000110" // /* MW 2 */ + 10285 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10286 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10287 "00011100" // /* MW 3 */ + 10288 "11000110" // /* MW 2 */ + 10289 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10290 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10291 "00011100" // /* MW 3 */ + 10292 "11000110" // /* MW 2 */ + 10293 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10294 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10295 "00011100" // /* MW 3 */ + 10296 "11000110" // /* MW 2 */ + 10297 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10298 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10299 "00011100" // /* MW 3 */ + 10300 "11000110" // /* MW 2 */ + 10301 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10302 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10303 "00011100" // /* MW 3 */ + 10304 "11000110" // /* MW 2 */ + 10305 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10306 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10307 "00011100" // /* MW 3 */ + 10308 "11000110" // /* MW 2 */ + 10309 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10310 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10311 "00011100" // /* MW 3 */ + 10312 "11000110" // /* MW 2 */ + 10313 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10314 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10315 "00011100" // /* MW 3 */ + 10316 "11000110" // /* MW 2 */ + 10317 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10318 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10319 "00011100" // /* MW 3 */ + 10320 "11000110" // /* MW 2 */ + 10321 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10322 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10323 "00011100" // /* MW 3 */ + 10324 "11000110" // /* MW 2 */ + 10325 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10326 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10327 "00011100" // /* MW 3 */ + 10328 "11000110" // /* MW 2 */ + 10329 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10330 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10331 "00011100" // /* MW 3 */ + 10332 "11000110" // /* MW 2 */ + 10333 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10334 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10335 "00011100" // /* MW 3 */ + 10336 "11000110" // /* MW 2 */ + 10337 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 + 10338 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10339 "00011100" // /* MW 3 */ + 10340 "11000110" // /* MW 2 */ + 10341 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 119 first + 10342 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10343 "00000000" // /* MW 3 */ + 10344 "00101000" // /* MW 2 */ + 10345 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 first +.delay_slot + 10346 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10347 "00011100" // /* MW 3 */ + 10348 "11000110" // /* MW 2 */ + 10349 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 +.delay_slot + 10350 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10351 "00011100" // /* MW 3 */ + 10352 "11000110" // /* MW 2 */ + 10353 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 +.delay_slot + 10354 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10355 "00011100" // /* MW 3 */ + 10356 "11000110" // /* MW 2 */ + 10357 "00010000" // /* MW 1 */ +.src_ref 9 "me_div.c" 108 19 +.delay_slot + 10358 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10359 "00011100" // /* MW 3 */ + 10360 "11000110" // /* MW 2 */ + 10361 "00010000" // /* MW 1 */ +.delay_slot + 10362 "11111000" // MOV r2, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10363 "10100000" // /* MW 3 */ + 10364 "10011111" // /* MW 2 */ +.label _ZN12me_primitive10udiv_dstepEjjRjS0___end + 10365 "00011000" // /* MW 1 */ +.label _ZL19propagateFloat32NaNjj +.function propagateFloat32NaN _ZL19propagateFloat32NaNjj +.src_ref 10 "softfloat-specialize" 78 24 +.src_ref 10 "softfloat-specialize" 137 22 +.src_ref 10 "softfloat-specialize" 139 22 +.src_ref 10 "softfloat-specialize" 143 4 first +.function_start + 10368 "10111010" // MOVA r3, #-22; MOVXM r18, #-16777216 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10369 "00010000" // /* MW 9 */ + 10370 "00000000" // /* MW 8 */ + 10371 "01001000" // /* MW 7 */ + 10372 "00000010" // /* MW 6 */ + 10373 "11000000" // /* MW 5 */ + 10374 "00111111" // /* MW 4 */ + 10375 "00000000" // /* MW 3 */ + 10376 "01000011" // /* MW 2 */ + 10377 "11111101" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 137 22 +.src_ref 10 "softfloat-specialize" 139 22 +.src_ref 10 "softfloat-specialize" 140 6 +.src_ref 10 "softfloat-specialize" 141 6 + 10378 "10111010" // MOVA r7, #511; MOVXM r0, #4194304 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10379 "00010000" // /* MW 9 */ + 10380 "00000000" // /* MW 8 */ + 10381 "00001000" // /* MW 7 */ + 10382 "00000000" // /* MW 6 */ + 10383 "00010000" // /* MW 5 */ + 10384 "00000000" // /* MW 4 */ + 10385 "00000000" // /* MW 3 */ + 10386 "11100111" // /* MW 2 */ + 10387 "00111111" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 78 38 +.src_ref 10 "softfloat-specialize" 137 22 +.src_ref 10 "softfloat-specialize" 139 22 +.src_ref 10 "softfloat-specialize" 140 6 first + 10388 "10111010" // MOVA r16, #1; OR r4, r1, r0; MOV r5, #510 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10389 "01011000" // /* MW 9 */ + 10390 "11111110" // /* MW 8 */ + 10391 "10101001" // /* MW 7 */ + 10392 "00101100" // /* MW 6 */ + 10393 "01000000" // /* MW 5 */ + 10394 "00000010" // /* MW 4 */ + 10395 "00000000" // /* MW 3 */ + 10396 "00110000" // /* MW 2 */ + 10397 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 141 6 first + 10398 "10011000" // OR r0, r2, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10399 "00000101" // /* MW 3 */ + 10400 "10000000" // /* MW 2 */ + 10401 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 137 22 first + 10402 "10011000" // LSHL r6, r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10403 "00111101" // /* MW 3 */ + 10404 "01001100" // /* MW 2 */ + 10405 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 139 22 first + 10406 "10011000" // LSHL r3, r2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10407 "00111101" // /* MW 3 */ + 10408 "10000110" // /* MW 2 */ + 10409 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 139 22 + 10410 "10011000" // AND r3, r7, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10411 "00110100" // /* MW 3 */ + 10412 "11000110" // /* MW 2 */ + 10413 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 137 22 first + 10414 "10011000" // AND r6, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10415 "01100100" // /* MW 3 */ + 10416 "11001100" // /* MW 2 */ + 10417 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 137 22 + 10418 "10011000" // EQ r6, r5, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10419 "01100111" // /* MW 3 */ + 10420 "01001100" // /* MW 2 */ + 10421 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 78 38 first + 10422 "10011000" // LSHL r17, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10423 "00001101" // /* MW 3 */ + 10424 "10100011" // /* MW 2 */ + 10425 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 78 24 + 10426 "10011000" // LTU r27, r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10427 "00011100" // /* MW 3 */ + 10428 "10110111" // /* MW 2 */ + 10429 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 143 62 first + 10430 "00011000" // SEL.EQZ r17, r4, r0, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10431 "00000010" // /* MW 3 */ + 10432 "00100010" // /* MW 2 */ + 10433 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 137 22 +.src_ref 10 "softfloat-specialize" 139 22 + 10434 "01000100" // MOVXM r16, #4194303 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10435 "11111110" // /* MW 5 */ + 10436 "00111111" // /* MW 4 */ + 10437 "11111000" // /* MW 3 */ + 10438 "00111111" // /* MW 2 */ + 10439 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 139 22 first + 10440 "10011000" // AND r2, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10441 "00000100" // /* MW 3 */ + 10442 "10000101" // /* MW 2 */ + 10443 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 139 22 + 10444 "00011000" // NEZ r2, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10445 "11110000" // /* MW 3 */ + 10446 "10000100" // /* MW 2 */ + 10447 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 137 22 first + 10448 "10011000" // AND r1, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10449 "00000100" // /* MW 3 */ + 10450 "01000011" // /* MW 2 */ + 10451 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 137 22 + 10452 "00011000" // NEZ r1, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10453 "11110000" // /* MW 3 */ + 10454 "01000010" // /* MW 2 */ + 10455 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 143 4 first + 10456 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10457 "00000000" // /* MW 3 */ + 10458 "00101000" // /* MW 2 */ + 10459 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 137 22 first +.delay_slot + 10460 "10011000" // AND r27, r1, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10461 "01100100" // /* MW 3 */ + 10462 "01110110" // /* MW 2 */ + 10463 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 139 22 first +.delay_slot + 10464 "10011000" // EQ r1, r3, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10465 "01010111" // /* MW 3 */ + 10466 "11000010" // /* MW 2 */ + 10467 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 143 49 first +.delay_slot + 10468 "00011000" // SEL.EQZ r3, r17, r4, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10469 "01000010" // /* MW 3 */ + 10470 "01000110" // /* MW 2 */ + 10471 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 139 22 first +.delay_slot + 10472 "10011000" // AND r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10473 "00100100" // /* MW 3 */ + 10474 "01110110" // /* MW 2 */ + 10475 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-specialize" 143 27 first +.delay_slot + 10476 "00011000" // SEL.EQZ r0, r3, r0, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10477 "00000010" // /* MW 3 */ + 10478 "11000000" // /* MW 2 */ +.label _ZL19propagateFloat32NaNjj__end + 10479 "00010000" // /* MW 1 */ +.label _ZL19roundAndPackFloat32iij +.function roundAndPackFloat32 _ZL19roundAndPackFloat32iij +.src_ref 10 "softfloat.c" 154 first +.src_ref 10 "softfloat.c" 161 19 +.src_ref 10 "softfloat.c" 203 30 +.function_start + 10480 "10111010" // MOVA r0, #64; MOVXM p0, #509172 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10481 "00010000" // /* MW 9 */ + 10482 "01111010" // /* MW 8 */ + 10483 "00110010" // /* MW 7 */ + 10484 "11110000" // /* MW 6 */ + 10485 "00000001" // /* MW 5 */ + 10486 "00000000" // /* MW 4 */ + 10487 "00000000" // /* MW 3 */ + 10488 "00000000" // /* MW 2 */ + 10489 "00001000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 161 19 first +.src_ref 10 "softfloat.c" 171 16 +.src_ref 10 "softfloat.c" 174 16 +.src_ref 10 "softfloat.c" 178 21 +.src_ref 10 "softfloat.c" 194 29 + 10490 "00101100" // LDA r4, [p0]; MOVX r6, #127 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10491 "11111010" // /* MW 5 */ + 10492 "10011001" // /* MW 4 */ + 10493 "11010000" // /* MW 3 */ + 10494 "10010010" // /* MW 2 */ + 10495 "00000000" // /* MW 1 */ +.swstall __RAW__R_1948 + 10496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10497 "00000000" // /* MW 1 */ +.swstall __RAW__R_1948 + 10498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10499 "00000000" // /* MW 1 */ +.swstall __RAW__R_1948 + 10500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10501 "00000000" // /* MW 1 */ +.swstall __RAW__R_1948 + 10502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10503 "00000000" // /* MW 1 */ +.swstall __RAW__R_1948 + 10504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10505 "00000000" // /* MW 1 */ +.swstall __RAW__R_1948 + 10506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10507 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 162 36 first +.src_ref 10 "softfloat.c" 164 4 first + 10508 "10000100" // JZ r4, #10576 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10576 delay_slots=5 */ + 10509 "00000001" // /* MW 5 */ + 10510 "00000000" // /* MW 4 */ + 10511 "10101000" // /* MW 3 */ + 10512 "00010100" // /* MW 2 */ + 10513 "00100000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 182 40 +.src_ref 10 "softfloat.c" 185 68 +.src_ref 10 "softfloat.c" 202 18 +.delay_slot + 10514 "00011000" // MOVX r5, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10515 "00000001" // /* MW 3 */ + 10516 "01001010" // /* MW 2 */ + 10517 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10519 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10521 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10523 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10525 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 165 8 +.src_ref 10 "softfloat.c" 171 16 +.src_ref 10 "softfloat.c" 171 34 +.src_ref 10 "softfloat.c" 174 16 +.src_ref 10 "softfloat.c" 174 34 + 10526 "10111010" // MOVA r16, #3; MOVX r7, #2; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10527 "01011000" // /* MW 9 */ + 10528 "00000000" // /* MW 8 */ + 10529 "00001000" // /* MW 7 */ + 10530 "01001011" // /* MW 6 */ + 10531 "01110000" // /* MW 5 */ + 10532 "00000000" // /* MW 4 */ + 10533 "00000000" // /* MW 3 */ + 10534 "01110000" // /* MW 2 */ + 10535 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 165 26 +.src_ref 10 "softfloat.c" 171 34 first + 10536 "01100100" // EQ r27, r7, r4; MOV r5, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10537 "00000101" // /* MW 5 */ + 10538 "10100000" // /* MW 4 */ + 10539 "11110010" // /* MW 3 */ + 10540 "11001000" // /* MW 2 */ + 10541 "00111110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 171 16 + 10542 "00011000" // SEL.EQZ r7, r6, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10543 "10000010" // /* MW 3 */ + 10544 "10001111" // /* MW 2 */ + 10545 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 174 34 first + 10546 "10011000" // EQ r27, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10547 "00000111" // /* MW 3 */ + 10548 "00110111" // /* MW 2 */ + 10549 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 170 12 +.src_ref 10 "softfloat.c" 174 16 + 10550 "11100100" // SEL.EQZ r16, r6, r24, r27; MOV r27, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10551 "01000001" // /* MW 5 */ + 10552 "10100001" // /* MW 4 */ + 10553 "01001101" // /* MW 3 */ + 10554 "00110000" // /* MW 2 */ + 10555 "00110100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 170 12 first +.src_ref 10 "softfloat.c" 170 12 first + 10556 "00011000" // SEL.EQZ r7, r16, r7, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10557 "01110010" // /* MW 3 */ + 10558 "00001110" // /* MW 2 */ + 10559 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 165 26 first + 10560 "10011000" // EQ r27, r5, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10561 "01000111" // /* MW 3 */ + 10562 "01110110" // /* MW 2 */ + 10563 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 165 8 + 10564 "00110110" // NOPA; NOPB; NOPS; SEL.EQZ r5, r7, r24, r27 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10565 "10000001" // /* MW 11 */ + 10566 "10101101" // /* MW 10 */ + 10567 "00000000" // /* MW 9 */ + 10568 "00010000" // /* MW 8 */ + 10569 "01011100" // /* MW 7 */ + 10570 "00001110" // /* MW 6 */ + 10571 "00100000" // /* MW 5 */ + 10572 "00000000" // /* MW 4 */ + 10573 "11110000" // /* MW 3 */ + 10574 "00101100" // /* MW 2 */ + 10575 "00000000" // /* MW 1 */ +.label TGT_F_ZL19roundAndPackFloat32iij_96 +.src_ref 10 "softfloat.c" 179 14 +.src_ref 10 "softfloat.c" 179 17 first +.src_ref 10 "softfloat.c" 180 23 +.src_ref 10 "softfloat.c" 181 28 + 10576 "01100100" // EXTEND.u16 r18, r2; MOV r16, #253 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10577 "11110101" // /* MW 5 */ + 10578 "00100011" // /* MW 4 */ + 10579 "00001000" // /* MW 3 */ + 10580 "10010110" // /* MW 2 */ + 10581 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 179 14 + 10582 "10011000" // LT r18, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10583 "00001010" // /* MW 3 */ + 10584 "10100101" // /* MW 2 */ + 10585 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 179 4 + 10586 "10000100" // JNZ r18, #10768 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10768 delay_slots=5 */ + 10587 "00000001" // /* MW 5 */ + 10588 "01000000" // /* MW 4 */ + 10589 "00001000" // /* MW 3 */ + 10590 "00010101" // /* MW 2 */ + 10591 "10010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 178 21 first +.delay_slot + 10592 "10011000" // AND r17, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10593 "01100100" // /* MW 3 */ + 10594 "11100010" // /* MW 2 */ + 10595 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 48 +.src_ref 10 "softfloat.c" 128 31 +.delay_slot + 10596 "00011000" // MOVX r7, #31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10597 "01111101" // /* MW 3 */ + 10598 "00001110" // /* MW 2 */ + 10599 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 31 first +.delay_slot + 10600 "10011000" // LSHL r1, r1, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10601 "01111101" // /* MW 3 */ + 10602 "01000010" // /* MW 2 */ + 10603 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10605 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10607 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 181 28 first +.src_ref 10 "softfloat.c" 182 40 first +.src_ref 10 "softfloat.c" 182 59 + 10608 "10111010" // MOVA r18, #0; EQ r19, r2, r16; ADD.NC r20, r3, r5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10609 "10101000" // /* MW 9 */ + 10610 "11001010" // /* MW 8 */ + 10611 "10001000" // /* MW 7 */ + 10612 "00111110" // /* MW 6 */ + 10613 "00111000" // /* MW 5 */ + 10614 "00000101" // /* MW 4 */ + 10615 "00000000" // /* MW 3 */ + 10616 "00010010" // /* MW 2 */ + 10617 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 182 59 + 10618 "10011000" // LT r20, r20, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10619 "00101010" // /* MW 3 */ + 10620 "00101001" // /* MW 2 */ + 10621 "00010101" // /* MW 1 */ +.src_ref 10 "softfloat.c" 180 23 first + 10622 "10011000" // LT r16, r16, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10623 "00101010" // /* MW 3 */ + 10624 "00100000" // /* MW 2 */ + 10625 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 182 18 first + 10626 "10011000" // AND r19, r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10627 "01000100" // /* MW 3 */ + 10628 "11100111" // /* MW 2 */ + 10629 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 181 13 first + 10630 "10011000" // OR r19, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10631 "00000101" // /* MW 3 */ + 10632 "11100111" // /* MW 2 */ + 10633 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 180 8 first + 10634 "10000100" // JNZ r19, #10848 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10848 delay_slots=5 */ + 10635 "00000001" // /* MW 5 */ + 10636 "01000000" // /* MW 4 */ + 10637 "00110000" // /* MW 3 */ + 10638 "00010101" // /* MW 2 */ + 10639 "10011000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 38 +.src_ref 10 "softfloat.c" 187 18 +.src_ref 10 "softfloat.c" 192 39 +.src_ref 10 "softfloat.c" 204 4 +.src_ref 10 "softfloat.c" 204 14 +.delay_slot + 10640 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10641 "00000001" // /* MW 3 */ + 10642 "00100000" // /* MW 2 */ + 10643 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10645 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10647 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10649 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10651 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 187 18 first + 10652 "10011000" // GE r19, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10653 "00001001" // /* MW 3 */ + 10654 "10100111" // /* MW 2 */ + 10655 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 187 8 + 10656 "10000100" // JNZ r19, #10784 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10784 delay_slots=5 */ + 10657 "00000001" // /* MW 5 */ + 10658 "01000000" // /* MW 4 */ + 10659 "00010000" // /* MW 3 */ + 10660 "00010101" // /* MW 2 */ + 10661 "10011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10663 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10665 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10667 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10669 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10671 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 192 39 first + 10672 "10011000" // SUB r2, r16, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10673 "00100001" // /* MW 3 */ + 10674 "00000100" // /* MW 2 */ + 10675 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 46 4 first +.src_ref 10 "softfloat-macros" 46 15 first + 10676 "10000100" // JZ r2, #10736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10736 delay_slots=5 */ + 10677 "00000001" // /* MW 5 */ + 10678 "00000000" // /* MW 4 */ + 10679 "11111000" // /* MW 3 */ + 10680 "00010100" // /* MW 2 */ + 10681 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10683 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10685 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10687 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10689 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10691 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 +.src_ref 10 "softfloat-macros" 50 38 first + 10692 "01100100" // SUB r17, r16, r2; MOV r19, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10693 "10000001" // /* MW 5 */ + 10694 "10100000" // /* MW 4 */ + 10695 "00111001" // /* MW 3 */ + 10696 "01000100" // /* MW 2 */ + 10697 "10000100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 48 + 10698 "10011000" // AND r7, r7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10699 "00010100" // /* MW 3 */ + 10700 "11001111" // /* MW 2 */ + 10701 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 32 + 10702 "10011000" // LSHL r7, r3, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10703 "01111101" // /* MW 3 */ + 10704 "11001110" // /* MW 2 */ + 10705 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 15 + 10706 "10011000" // LSHL r17, r3, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10707 "00011101" // /* MW 3 */ + 10708 "11100011" // /* MW 2 */ + 10709 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 first + 10710 "10011000" // LT r27, r2, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10711 "00111010" // /* MW 3 */ + 10712 "10110111" // /* MW 2 */ + 10713 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 57 first + 10714 "00011000" // NEZ r7, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10715 "11110000" // /* MW 3 */ + 10716 "11001110" // /* MW 2 */ + 10717 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 53 16 first + 10718 "00011000" // NEZ r3, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10719 "11110000" // /* MW 3 */ + 10720 "11000110" // /* MW 2 */ + 10721 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 25 first + 10722 "10011000" // OR r2, r7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10723 "00010101" // /* MW 3 */ + 10724 "11000101" // /* MW 2 */ + 10725 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 9 first + 10726 "01111010" // NOPA; NOPS; SEL.EQZ r3, r3, r2, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10727 "00100010" // /* MW 9 */ + 10728 "11000110" // /* MW 8 */ + 10729 "00000000" // /* MW 7 */ + 10730 "00000000" // /* MW 6 */ + 10731 "01011011" // /* MW 5 */ + 10732 "00000001" // /* MW 4 */ + 10733 "11110000" // /* MW 3 */ + 10734 "00101100" // /* MW 2 */ + 10735 "00000000" // /* MW 1 */ +.label TGT_F_ZL19roundAndPackFloat32iij_256 + 10736 "10000100" // J #10784 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10784 delay_slots=5 */ + 10737 "00000000" // /* MW 5 */ + 10738 "00000000" // /* MW 4 */ + 10739 "00010000" // /* MW 3 */ + 10740 "00010101" // /* MW 2 */ + 10741 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 194 29 first +.delay_slot + 10742 "10011000" // AND r17, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10743 "01100100" // /* MW 3 */ + 10744 "11100010" // /* MW 2 */ + 10745 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 204 4 +.src_ref 10 "softfloat.c" 204 14 +.delay_slot + 10746 "00011000" // MOVX r2, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10747 "00000001" // /* MW 3 */ + 10748 "00000100" // /* MW 2 */ + 10749 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10751 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10753 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10754 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 10755 "00011100" // /* MW 13 */ + 10756 "00000000" // /* MW 12 */ + 10757 "00000000" // /* MW 11 */ + 10758 "01010111" // /* MW 10 */ + 10759 "00011010" // /* MW 9 */ + 10760 "01000000" // /* MW 8 */ + 10761 "00000000" // /* MW 7 */ + 10762 "00000000" // /* MW 6 */ + 10763 "10110110" // /* MW 5 */ + 10764 "00000010" // /* MW 4 */ + 10765 "11110000" // /* MW 3 */ + 10766 "00101100" // /* MW 2 */ + 10767 "00000000" // /* MW 1 */ +.label TGT_F_ZL19roundAndPackFloat32iij_288 +.src_ref 10 "softfloat.c" 204 4 +.src_ref 10 "softfloat.c" 204 14 + 10768 "11100001" // NOPA; NOPB; NOPS; MOVX r16, #0; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10769 "00000000" // /* MW 15 */ + 10770 "00000000" // /* MW 14 */ + 10771 "01111000" // /* MW 13 */ + 10772 "10100101" // /* MW 12 */ + 10773 "00000001" // /* MW 11 */ + 10774 "00001000" // /* MW 10 */ + 10775 "00000000" // /* MW 9 */ + 10776 "00000001" // /* MW 8 */ + 10777 "01011011" // /* MW 7 */ + 10778 "00000001" // /* MW 6 */ + 10779 "00100000" // /* MW 5 */ + 10780 "00000000" // /* MW 4 */ + 10781 "11110000" // /* MW 3 */ + 10782 "00101100" // /* MW 2 */ + 10783 "00000000" // /* MW 1 */ +.label TGT_F_ZL19roundAndPackFloat32iij_304 +.src_ref 10 "softfloat.c" 202 18 first +.src_ref 10 "softfloat.c" 202 36 +.src_ref 10 "softfloat.c" 203 30 first + 10784 "10111010" // MOVA r0, #-7; XOR r3, r17, r0; ADD.NC r5, r3, r5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10785 "10101000" // /* MW 9 */ + 10786 "11001010" // /* MW 8 */ + 10787 "10101000" // /* MW 7 */ + 10788 "00110100" // /* MW 6 */ + 10789 "00110000" // /* MW 5 */ + 10790 "00100010" // /* MW 4 */ + 10791 "00000000" // /* MW 3 */ + 10792 "00100000" // /* MW 2 */ + 10793 "11111111" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 59 +.src_ref 10 "softfloat.c" 203 12 +.src_ref 10 "softfloat.c" 203 46 + 10794 "10111010" // MOVA r3, #23; OR r6, r3, r4; MOV r4, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10795 "01011000" // /* MW 9 */ + 10796 "11111111" // /* MW 8 */ + 10797 "10001111" // /* MW 7 */ + 10798 "00101100" // /* MW 6 */ + 10799 "01100010" // /* MW 5 */ + 10800 "00000110" // /* MW 4 */ + 10801 "00000000" // /* MW 3 */ + 10802 "11100011" // /* MW 2 */ + 10803 "00000010" // /* MW 1 */ +.src_ref 10 "softfloat.c" 203 46 + 10804 "00011000" // EQZ r6, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10805 "11010000" // /* MW 3 */ + 10806 "10001100" // /* MW 2 */ + 10807 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 202 36 + 10808 "10011000" // LSHL r0, r5, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10809 "00001101" // /* MW 3 */ + 10810 "01000000" // /* MW 2 */ + 10811 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 203 12 + 10812 "10011000" // XOR r4, r6, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10813 "01000110" // /* MW 3 */ + 10814 "10001000" // /* MW 2 */ + 10815 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 205 4 first + 10816 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10817 "00000000" // /* MW 3 */ + 10818 "00101000" // /* MW 2 */ + 10819 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 203 9 first +.delay_slot + 10820 "10011000" // AND r27, r4, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10821 "00000100" // /* MW 3 */ + 10822 "00110110" // /* MW 2 */ + 10823 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 204 4 first +.src_ref 10 "softfloat.c" 204 14 first +.delay_slot + 10824 "00011000" // SEL.EQZ r2, r16, r2, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10825 "00100010" // /* MW 3 */ + 10826 "00000100" // /* MW 2 */ + 10827 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 59 first +.delay_slot + 10828 "10011000" // LSHL r2, r2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10829 "00111101" // /* MW 3 */ + 10830 "10000100" // /* MW 2 */ + 10831 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 38 +.delay_slot + 10832 "10011000" // ADD r2, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10833 "00100000" // /* MW 3 */ + 10834 "01000100" // /* MW 2 */ + 10835 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 66 +.delay_slot + 10836 "00110110" // NOPA; NOPB; NOPS; ADD r0, r27, r2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10837 "10000001" // /* MW 11 */ + 10838 "10101101" // /* MW 10 */ + 10839 "00000000" // /* MW 9 */ + 10840 "00000100" // /* MW 8 */ + 10841 "00000001" // /* MW 7 */ + 10842 "00110110" // /* MW 6 */ + 10843 "00100000" // /* MW 5 */ + 10844 "00000000" // /* MW 4 */ + 10845 "11110000" // /* MW 3 */ + 10846 "00101100" // /* MW 2 */ + 10847 "00000000" // /* MW 1 */ +.label TGT_F_ZL19roundAndPackFloat32iij_368 +.src_ref 10 "softfloat.c" 185 12 first + 10848 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10849 "00000000" // /* MW 3 */ + 10850 "00101000" // /* MW 2 */ + 10851 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 38 +.delay_slot + 10852 "01000100" // MOVXM r2, #2139095040 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10853 "00000000" // /* MW 5 */ + 10854 "00100000" // /* MW 4 */ + 10855 "00000001" // /* MW 3 */ + 10856 "10000000" // /* MW 2 */ + 10857 "01111111" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 38 first +.delay_slot + 10858 "10011000" // ADD r3, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10859 "00100000" // /* MW 3 */ + 10860 "01000110" // /* MW 2 */ + 10861 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 185 68 first +.delay_slot + 10862 "00011000" // EQZ r2, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10863 "11010000" // /* MW 3 */ + 10864 "01000100" // /* MW 2 */ + 10865 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 185 49 +.delay_slot + 10866 "10011000" // SUB r0, r3, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10867 "00100001" // /* MW 3 */ + 10868 "11000000" // /* MW 2 */ + 10869 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL19roundAndPackFloat32iij__end + 10871 "00000000" // /* MW 1 */ +.label _ZL28normalizeRoundAndPackFloat32iij +.function normalizeRoundAndPackFloat32 _ZL28normalizeRoundAndPackFloat32iij +.src_ref 10 "softfloat.c" 218 first +.src_ref 10 "softfloat.c" 224 11 first +.tail_call +.function_start + 10880 "10000100" // J #10480 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10480 delay_slots=5 */ + 10881 "00000000" // /* MW 5 */ + 10882 "00000000" // /* MW 4 */ + 10883 "01111000" // /* MW 3 */ + 10884 "00010100" // /* MW 2 */ + 10885 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 552 53 first +.delay_slot + 10886 "00011000" // CLZ r16, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10887 "00110000" // /* MW 3 */ + 10888 "11100000" // /* MW 2 */ + 10889 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 223 45 first +.delay_slot + 10890 "00011000" // ADD r16, r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10891 "11111111" // /* MW 3 */ + 10892 "00100001" // /* MW 2 */ + 10893 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 224 44 first +.delay_slot + 10894 "10011000" // SUB r2, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10895 "00000001" // /* MW 3 */ + 10896 "10000101" // /* MW 2 */ + 10897 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 224 62 +.delay_slot + 10898 "10011000" // LSHL r3, r3, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10899 "00001101" // /* MW 3 */ + 10900 "11000111" // /* MW 2 */ + 10901 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL28normalizeRoundAndPackFloat32iij__end + 10903 "00000000" // /* MW 1 */ +.label int32_to_float32 +.function int32_to_float32 int32_to_float32 +.src_ref 10 "softfloat.c" 477 first +.src_ref 10 "softfloat.c" 481 4 +.src_ref 10 "softfloat.c" 481 11 first +.function_start + 10912 "10000100" // JZ r1, #10992 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10992 delay_slots=5 */ + 10913 "00000001" // /* MW 5 */ + 10914 "00000000" // /* MW 4 */ + 10915 "01111000" // /* MW 3 */ + 10916 "00010101" // /* MW 2 */ + 10917 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10919 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10921 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10923 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10925 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10927 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 482 11 + 10928 "01000100" // MOVXM r16, #-2147483648 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10929 "00000000" // /* MW 5 */ + 10930 "00100000" // /* MW 4 */ + 10931 "00001000" // /* MW 3 */ + 10932 "00000000" // /* MW 2 */ + 10933 "10000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 482 11 first + 10934 "10011000" // EQ r16, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10935 "00000111" // /* MW 3 */ + 10936 "01100001" // /* MW 2 */ + 10937 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 482 4 + 10938 "10000100" // JNZ r16, #11008 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11008 delay_slots=5 */ + 10939 "00000001" // /* MW 5 */ + 10940 "01000000" // /* MW 4 */ + 10941 "10000000" // /* MW 3 */ + 10942 "00010101" // /* MW 2 */ + 10943 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10945 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10947 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10949 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10951 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10953 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 484 11 +.src_ref 10 "softfloat.c" 484 11 first +.tail_call + 10954 "10111010" // MOVA r2, #156; J #10880 /* MW 10 */ /* control_operation: words=10 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10880 delay_slots=5 */ + 10955 "00100000" // /* MW 9 */ + 10956 "00000000" // /* MW 8 */ + 10957 "00000000" // /* MW 7 */ + 10958 "01010000" // /* MW 6 */ + 10959 "00000101" // /* MW 5 */ + 10960 "00000000" // /* MW 4 */ + 10961 "00000000" // /* MW 3 */ + 10962 "10000010" // /* MW 2 */ + 10963 "00010011" // /* MW 1 */ +.src_ref 10 "softfloat.c" 484 60 +.src_ref 10 "softfloat.c" 484 62 +.delay_slot + 10964 "00011000" // ABS r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10965 "00010000" // /* MW 3 */ + 10966 "01000111" // /* MW 2 */ + 10967 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 483 16 +.delay_slot + 10968 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10969 "00000001" // /* MW 3 */ + 10970 "00100000" // /* MW 2 */ + 10971 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 483 16 first +.delay_slot + 10972 "10011000" // LT r1, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10973 "00001010" // /* MW 3 */ + 10974 "01000011" // /* MW 2 */ + 10975 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10977 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10978 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 10979 "00011100" // /* MW 13 */ + 10980 "00000000" // /* MW 12 */ + 10981 "00000000" // /* MW 11 */ + 10982 "01010111" // /* MW 10 */ + 10983 "00011010" // /* MW 9 */ + 10984 "01000000" // /* MW 8 */ + 10985 "00000000" // /* MW 7 */ + 10986 "00000000" // /* MW 6 */ + 10987 "10110110" // /* MW 5 */ + 10988 "00000010" // /* MW 4 */ + 10989 "11110000" // /* MW 3 */ + 10990 "00101100" // /* MW 2 */ + 10991 "00000000" // /* MW 1 */ +.label TGT_Fint32_to_float32_80 +.src_ref 10 "softfloat.c" 481 18 first +.return_address + 10992 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10993 "00000000" // /* MW 3 */ + 10994 "00101000" // /* MW 2 */ + 10995 "00010000" // /* MW 1 */ +.delay_slot + 10996 "00011000" // MOVX r0, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10997 "00000001" // /* MW 3 */ + 10998 "00000000" // /* MW 2 */ + 10999 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11001 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11003 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11005 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11007 "00000000" // /* MW 1 */ +.label TGT_Fint32_to_float32_96 +.src_ref 10 "softfloat.c" 482 37 first + 11008 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11009 "00000000" // /* MW 3 */ + 11010 "00101000" // /* MW 2 */ + 11011 "00010000" // /* MW 1 */ +.delay_slot + 11012 "01000100" // MOVXM r0, #-822083584 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11013 "00000000" // /* MW 5 */ + 11014 "00100000" // /* MW 4 */ + 11015 "00000000" // /* MW 3 */ + 11016 "00000000" // /* MW 2 */ + 11017 "11001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11018 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11019 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11021 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11023 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label int32_to_float32__end + 11025 "00000000" // /* MW 1 */ +.label _ZL14addFloat32Sigsjji +.function addFloat32Sigs _ZL14addFloat32Sigsjji +.src_ref 10 "softfloat.c" 70 13 +.src_ref 10 "softfloat.c" 81 14 +.src_ref 10 "softfloat.c" 734 first +.function_start + 11040 "10111010" // MOVA r18, #-23; MOVXM r16, #8388607 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11041 "10010000" // /* MW 9 */ + 11042 "11111111" // /* MW 8 */ + 11043 "00001111" // /* MW 7 */ + 11044 "11111110" // /* MW 6 */ + 11045 "00011111" // /* MW 5 */ + 11046 "00000000" // /* MW 4 */ + 11047 "00000000" // /* MW 3 */ + 11048 "00110010" // /* MW 2 */ + 11049 "11111101" // /* MW 1 */ +.src_ref 10 "softfloat.c" 81 14 first + 11050 "10011000" // LSHL r17, r1, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11051 "00101101" // /* MW 3 */ + 11052 "01100011" // /* MW 2 */ + 11053 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 81 14 + 11054 "10011000" // LSHL r4, r2, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11055 "00101101" // /* MW 3 */ + 11056 "10001001" // /* MW 2 */ + 11057 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 81 21 + 11058 "00011000" // EXTEND.u8 r27, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11059 "10010000" // /* MW 3 */ + 11060 "01110110" // /* MW 2 */ + 11061 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 81 21 + 11062 "00011000" // EXTEND.u8 r25, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11063 "10010000" // /* MW 3 */ + 11064 "00110010" // /* MW 2 */ + 11065 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 38 +.src_ref 10 "softfloat.c" 744 19 first +.src_ref 10 "softfloat.c" 747 11 +.src_ref 10 "softfloat.c" 761 22 +.src_ref 10 "softfloat.c" 772 35 +.src_ref 10 "softfloat.c" 788 24 + 11066 "01100100" // SUB r17, r27, r25; MOV r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11067 "00000001" // /* MW 5 */ + 11068 "00100000" // /* MW 4 */ + 11069 "00111100" // /* MW 3 */ + 11070 "01110010" // /* MW 2 */ + 11071 "11011100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 747 11 first + 11072 "10011000" // LT r4, r24, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11073 "00011010" // /* MW 3 */ + 11074 "00001001" // /* MW 2 */ + 11075 "00010110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 747 4 + 11076 "10000100" // JNZ r4, #11248 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11248 delay_slots=5 */ + 11077 "00000001" // /* MW 5 */ + 11078 "01000000" // /* MW 4 */ + 11079 "11111000" // /* MW 3 */ + 11080 "00010101" // /* MW 2 */ + 11081 "00100000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 70 13 first +.delay_slot + 11082 "10011000" // AND r19, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11083 "00000100" // /* MW 3 */ + 11084 "01100111" // /* MW 2 */ + 11085 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 70 13 +.src_ref 10 "softfloat.c" 745 9 +.src_ref 10 "softfloat.c" 746 9 +.delay_slot + 11086 "01100100" // AND r16, r2, r16; MOV r0, #6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11087 "00011001" // /* MW 5 */ + 11088 "00100000" // /* MW 4 */ + 11089 "10010000" // /* MW 3 */ + 11090 "00100000" // /* MW 2 */ + 11091 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 745 9 first +.delay_slot + 11092 "10011000" // LSHL r19, r19, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11093 "00001101" // /* MW 3 */ + 11094 "11100110" // /* MW 2 */ + 11095 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 746 9 first +.src_ref 10 "softfloat.c" 748 18 +.src_ref 10 "softfloat.c" 762 18 +.delay_slot + 11096 "01100100" // LSHL r16, r16, r0; MOV r20, #255 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11097 "11111101" // /* MW 5 */ + 11098 "00100011" // /* MW 4 */ + 11099 "10111010" // /* MW 3 */ + 11100 "00000001" // /* MW 2 */ + 11101 "10000100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 48 +.src_ref 10 "softfloat.c" 128 31 +.src_ref 10 "softfloat.c" 748 18 first +.delay_slot + 11102 "01100100" // EQ r0, r27, r20; MOV r18, #31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11103 "01111101" // /* MW 5 */ + 11104 "00100000" // /* MW 4 */ + 11105 "11111001" // /* MW 3 */ + 11106 "00101000" // /* MW 2 */ + 11107 "11011000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 761 22 first + 11108 "10011000" // GE r5, r17, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11109 "10001001" // /* MW 3 */ + 11110 "01001011" // /* MW 2 */ + 11111 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 761 9 + 11112 "10000100" // JNZ r5, #11440 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11440 delay_slots=5 */ + 11113 "00000001" // /* MW 5 */ + 11114 "01000000" // /* MW 4 */ + 11115 "01011000" // /* MW 3 */ + 11116 "00010110" // /* MW 2 */ + 11117 "00101000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 31 first +.delay_slot + 11118 "10011000" // LSHL r4, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11119 "00101101" // /* MW 3 */ + 11120 "11001001" // /* MW 2 */ + 11121 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11123 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11125 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11127 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11129 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 762 18 first + 11130 "10011000" // EQ r20, r25, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11131 "01000111" // /* MW 3 */ + 11132 "01101001" // /* MW 2 */ + 11133 "00010110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 762 8 + 11134 "10000100" // JNZ r20, #11392 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11392 delay_slots=5 */ + 11135 "00000001" // /* MW 5 */ + 11136 "01000000" // /* MW 4 */ + 11137 "01000000" // /* MW 3 */ + 11138 "00010110" // /* MW 2 */ + 11139 "10100000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11141 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11143 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11145 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11147 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11149 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 793 11 + 11150 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11151 "10100000" // /* MW 3 */ + 11152 "01010001" // /* MW 2 */ + 11153 "00011000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 787 4 + 11154 "11111000" // MOV r2, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11155 "10100000" // /* MW 3 */ + 11156 "10011100" // /* MW 2 */ + 11157 "00011000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 767 12 first + 11158 "00011000" // ADD r0, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11159 "00000111" // /* MW 3 */ + 11160 "01000000" // /* MW 2 */ + 11161 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 766 8 first + 11162 "00011000" // SEL.EQZ r17, r0, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11163 "00010010" // /* MW 3 */ + 11164 "00100011" // /* MW 2 */ + 11165 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 772 35 first + 11166 "10011000" // SUB r17, r24, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11167 "00010001" // /* MW 3 */ + 11168 "00100011" // /* MW 2 */ + 11169 "00010110" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 46 4 first +.src_ref 10 "softfloat-macros" 46 15 first + 11170 "10000100" // JZ r17, #11344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11344 delay_slots=5 */ + 11171 "00000001" // /* MW 5 */ + 11172 "00000000" // /* MW 4 */ + 11173 "00101000" // /* MW 3 */ + 11174 "00010110" // /* MW 2 */ + 11175 "10001000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 770 17 +.src_ref 10 "softfloat.c" 785 9 +.delay_slot + 11176 "01000100" // MOVXM r20, #536870912 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11177 "00000000" // /* MW 5 */ + 11178 "00100000" // /* MW 4 */ + 11179 "00001010" // /* MW 3 */ + 11180 "00000000" // /* MW 2 */ + 11181 "00100000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 770 17 first +.delay_slot + 11182 "10011000" // OR r3, r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11183 "01000101" // /* MW 3 */ + 11184 "11000111" // /* MW 2 */ + 11185 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 766 8 first +.delay_slot + 11186 "00011000" // SEL.EQZ r19, r19, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11187 "00110010" // /* MW 3 */ + 11188 "11100110" // /* MW 2 */ + 11189 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11191 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11193 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 +.src_ref 10 "softfloat-macros" 50 38 first + 11194 "01100100" // SUB r3, r24, r17; MOV r0, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11195 "10000001" // /* MW 5 */ + 11196 "00100000" // /* MW 4 */ + 11197 "00110000" // /* MW 3 */ + 11198 "11100010" // /* MW 2 */ + 11199 "11000000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 48 + 11200 "10011000" // AND r18, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11201 "00100100" // /* MW 3 */ + 11202 "11100101" // /* MW 2 */ + 11203 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 32 + 11204 "10011000" // LSHL r18, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11205 "00101101" // /* MW 3 */ + 11206 "11100101" // /* MW 2 */ + 11207 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 first + 11208 "10011000" // LT r27, r17, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11209 "00001010" // /* MW 3 */ + 11210 "01110110" // /* MW 2 */ + 11211 "00010100" // /* MW 1 */ + 11212 "10000100" // J #11344 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11344 delay_slots=5 */ + 11213 "00000000" // /* MW 5 */ + 11214 "00000000" // /* MW 4 */ + 11215 "00101000" // /* MW 3 */ + 11216 "00010110" // /* MW 2 */ + 11217 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 15 first +.delay_slot + 11218 "10011000" // LSHL r3, r19, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11219 "00111101" // /* MW 3 */ + 11220 "11000110" // /* MW 2 */ + 11221 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 57 +.delay_slot + 11222 "00011000" // NEZ r18, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11223 "11110000" // /* MW 3 */ + 11224 "10100100" // /* MW 2 */ + 11225 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 53 16 first +.delay_slot + 11226 "00011000" // NEZ r17, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11227 "11110000" // /* MW 3 */ + 11228 "11100010" // /* MW 2 */ + 11229 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 25 first +.delay_slot + 11230 "10011000" // OR r18, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11231 "00100101" // /* MW 3 */ + 11232 "11100101" // /* MW 2 */ + 11233 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 9 first +.delay_slot + 11234 "01111110" // NOPA; NOPB; NOPS; SEL.EQZ r19, r17, r18, r27; NOPM /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 11235 "01100000" // /* MW 13 */ + 11236 "00101011" // /* MW 12 */ + 11237 "00000000" // /* MW 11 */ + 11238 "10101111" // /* MW 10 */ + 11239 "00110100" // /* MW 9 */ + 11240 "00000000" // /* MW 8 */ + 11241 "00100010" // /* MW 7 */ + 11242 "01100111" // /* MW 6 */ + 11243 "00100100" // /* MW 5 */ + 11244 "00000000" // /* MW 4 */ + 11245 "11110000" // /* MW 3 */ + 11246 "00101100" // /* MW 2 */ + 11247 "00000000" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_208 +.src_ref 10 "softfloat.c" 748 8 first + 11248 "10000100" // JNZ r0, #11504 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11504 delay_slots=5 */ + 11249 "00000001" // /* MW 5 */ + 11250 "01000000" // /* MW 4 */ + 11251 "01111000" // /* MW 3 */ + 11252 "00010110" // /* MW 2 */ + 11253 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 756 17 +.src_ref 10 "softfloat.c" 785 9 +.delay_slot + 11254 "01000100" // MOVXM r20, #536870912 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11255 "00000000" // /* MW 5 */ + 11256 "00100000" // /* MW 4 */ + 11257 "00001010" // /* MW 3 */ + 11258 "00000000" // /* MW 2 */ + 11259 "00100000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11261 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11263 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11265 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11267 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 793 11 + 11268 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11269 "10100000" // /* MW 3 */ + 11270 "01010001" // /* MW 2 */ + 11271 "00011000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 753 12 first +.src_ref 10 "softfloat.c" 787 4 + 11272 "11100100" // ADD r3, r17, #-1; MOV r2, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11273 "01000001" // /* MW 5 */ + 11274 "00111011" // /* MW 4 */ + 11275 "11100001" // /* MW 3 */ + 11276 "11111111" // /* MW 2 */ + 11277 "10001000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 752 8 +.src_ref 10 "softfloat.c" 752 18 + 11278 "11111000" // MOV r27, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11279 "10100000" // /* MW 3 */ + 11280 "11011100" // /* MW 2 */ + 11281 "00011110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 752 8 first +.src_ref 10 "softfloat.c" 752 18 first + 11282 "00011000" // SEL.EQZ r17, r3, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11283 "00010010" // /* MW 3 */ + 11284 "11100011" // /* MW 2 */ + 11285 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 46 4 first +.src_ref 10 "softfloat-macros" 46 15 first + 11286 "10000100" // JZ r17, #11344 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11344 delay_slots=5 */ + 11287 "00000001" // /* MW 5 */ + 11288 "00000000" // /* MW 4 */ + 11289 "00101000" // /* MW 3 */ + 11290 "00010110" // /* MW 2 */ + 11291 "10001000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 756 17 first +.delay_slot + 11292 "10011000" // OR r0, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11293 "00000101" // /* MW 3 */ + 11294 "00000001" // /* MW 2 */ + 11295 "00010101" // /* MW 1 */ +.src_ref 10 "softfloat.c" 752 8 first +.src_ref 10 "softfloat.c" 752 18 first +.delay_slot + 11296 "00011000" // SEL.EQZ r16, r16, r0, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11297 "00000010" // /* MW 3 */ + 11298 "00100000" // /* MW 2 */ + 11299 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11300 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11301 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11303 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11305 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 +.src_ref 10 "softfloat-macros" 50 38 first + 11306 "01100100" // SUB r3, r24, r17; MOV r0, #32 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11307 "10000001" // /* MW 5 */ + 11308 "00100000" // /* MW 4 */ + 11309 "00110000" // /* MW 3 */ + 11310 "11100010" // /* MW 2 */ + 11311 "11000000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 48 + 11312 "10011000" // AND r18, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11313 "00100100" // /* MW 3 */ + 11314 "11100101" // /* MW 2 */ + 11315 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 32 + 11316 "10011000" // LSHL r18, r16, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11317 "00101101" // /* MW 3 */ + 11318 "00100101" // /* MW 2 */ + 11319 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 15 + 11320 "10011000" // LSHL r3, r16, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11321 "00111101" // /* MW 3 */ + 11322 "00000110" // /* MW 2 */ + 11323 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 first + 11324 "10011000" // LT r27, r17, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11325 "00001010" // /* MW 3 */ + 11326 "01110110" // /* MW 2 */ + 11327 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 57 first + 11328 "00011000" // NEZ r18, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11329 "11110000" // /* MW 3 */ + 11330 "10100100" // /* MW 2 */ + 11331 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 53 16 first + 11332 "00011000" // NEZ r16, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11333 "11110000" // /* MW 3 */ + 11334 "00100000" // /* MW 2 */ + 11335 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 25 first + 11336 "10011000" // OR r17, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11337 "00100101" // /* MW 3 */ + 11338 "11100011" // /* MW 2 */ + 11339 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 9 first + 11340 "00011000" // SEL.EQZ r16, r16, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11341 "00010010" // /* MW 3 */ + 11342 "00100001" // /* MW 2 */ + 11343 "00010100" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_304 +.src_ref 10 "softfloat.c" 785 9 first +.src_ref 10 "softfloat.c" 786 26 +.src_ref 10 "softfloat.c" 787 4 first + 11344 "10111010" // MOVA r18, #1; OR r19, r19, r20; ADD.NC r17, r2, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11345 "11001000" // /* MW 9 */ + 11346 "10111111" // /* MW 8 */ + 11347 "00101000" // /* MW 7 */ + 11348 "00101110" // /* MW 6 */ + 11349 "00111010" // /* MW 5 */ + 11350 "00100111" // /* MW 4 */ + 11351 "00000000" // /* MW 3 */ + 11352 "00110010" // /* MW 2 */ + 11353 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 786 18 first +.src_ref 10 "softfloat.c" 790 8 first + 11354 "00100100" // ADD r19, r19, r16; ADD.NC r16, r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11355 "00000001" // /* MW 5 */ + 11356 "00110001" // /* MW 4 */ + 11357 "00011000" // /* MW 3 */ + 11358 "11100000" // /* MW 2 */ + 11359 "10011100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 786 26 + 11360 "10011000" // LSHL r18, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11361 "00101101" // /* MW 3 */ + 11362 "11100101" // /* MW 2 */ + 11363 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 788 24 first + 11364 "10011000" // LT r27, r18, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11365 "10001010" // /* MW 3 */ + 11366 "10110111" // /* MW 2 */ + 11367 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 788 4 + 11368 "00011000" // SEL.EQZ r2, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11369 "00000010" // /* MW 3 */ + 11370 "01000101" // /* MW 2 */ + 11371 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 788 4 + 11372 "00011000" // SEL.EQZ r3, r18, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11373 "00110010" // /* MW 3 */ + 11374 "10000111" // /* MW 2 */ + 11375 "00010100" // /* MW 1 */ +.label __ll1__ZL14addFloat32Sigsjji +.src_ref 10 "softfloat.c" 793 11 first +.tail_call + 11376 "10000100" // J #10480 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10480 delay_slots=5 */ + 11377 "00000000" // /* MW 5 */ + 11378 "00000000" // /* MW 4 */ + 11379 "01111000" // /* MW 3 */ + 11380 "00010100" // /* MW 2 */ + 11381 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11383 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11385 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11387 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11389 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11391 "00000000" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_352 +.src_ref 10 "softfloat.c" 763 12 first +.return_address + 11392 "10000100" // JNZ r16, #11536 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11536 delay_slots=5 */ + 11393 "00000001" // /* MW 5 */ + 11394 "01000000" // /* MW 4 */ + 11395 "10001000" // /* MW 3 */ + 11396 "00010110" // /* MW 2 */ + 11397 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11399 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11401 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11403 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11405 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11407 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 764 12 first + 11408 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11409 "00000000" // /* MW 3 */ + 11410 "00101000" // /* MW 2 */ + 11411 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 38 +.delay_slot + 11412 "01000100" // MOVXM r16, #2139095040 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11413 "00000000" // /* MW 5 */ + 11414 "00100000" // /* MW 4 */ + 11415 "00001000" // /* MW 3 */ + 11416 "10000000" // /* MW 2 */ + 11417 "01111111" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 38 first +.delay_slot + 11418 "10011000" // ADD r0, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11419 "00000000" // /* MW 3 */ + 11420 "00000001" // /* MW 2 */ + 11421 "00010001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11423 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11424 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11425 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11426 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 11427 "00011100" // /* MW 13 */ + 11428 "00000000" // /* MW 12 */ + 11429 "00000000" // /* MW 11 */ + 11430 "01010111" // /* MW 10 */ + 11431 "00011010" // /* MW 9 */ + 11432 "01000000" // /* MW 8 */ + 11433 "00000000" // /* MW 7 */ + 11434 "00000000" // /* MW 6 */ + 11435 "10110110" // /* MW 5 */ + 11436 "00000010" // /* MW 4 */ + 11437 "11110000" // /* MW 3 */ + 11438 "00101100" // /* MW 2 */ + 11439 "00000000" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_400 +.src_ref 10 "softfloat.c" 776 8 first + 11440 "10000100" // JNZ r0, #11552 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11552 delay_slots=5 */ + 11441 "00000001" // /* MW 5 */ + 11442 "01000000" // /* MW 4 */ + 11443 "10010000" // /* MW 3 */ + 11444 "00010110" // /* MW 2 */ + 11445 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11447 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11449 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11451 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11453 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11455 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 780 8 first + 11456 "10000100" // JZ r27, #11600 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11600 delay_slots=5 */ + 11457 "00000001" // /* MW 5 */ + 11458 "00000000" // /* MW 4 */ + 11459 "10101000" // /* MW 3 */ + 11460 "00010110" // /* MW 2 */ + 11461 "11011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11463 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11465 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11467 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11469 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11471 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 793 11 + 11472 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11473 "10100000" // /* MW 3 */ + 11474 "01010001" // /* MW 2 */ + 11475 "00011000" // /* MW 1 */ + 11476 "10000100" // J #11376 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11376 delay_slots=5 */ + 11477 "00000000" // /* MW 5 */ + 11478 "00000000" // /* MW 4 */ + 11479 "00111000" // /* MW 3 */ + 11480 "00010110" // /* MW 2 */ + 11481 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 781 26 +.delay_slot + 11482 "01000100" // MOVXM r17, #1073741824 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11483 "00000000" // /* MW 5 */ + 11484 "10100000" // /* MW 4 */ + 11485 "00001000" // /* MW 3 */ + 11486 "00000000" // /* MW 2 */ + 11487 "01000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 781 26 first +.src_ref 10 "softfloat.c" 793 11 +.delay_slot + 11488 "11100100" // ADD r17, r19, r17; MOV r2, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11489 "01000001" // /* MW 5 */ + 11490 "00111011" // /* MW 4 */ + 11491 "00010001" // /* MW 3 */ + 11492 "01100010" // /* MW 2 */ + 11493 "10011100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 781 33 +.delay_slot + 11494 "10011000" // ADD r3, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11495 "00000000" // /* MW 3 */ + 11496 "01000111" // /* MW 2 */ + 11497 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11499 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11500 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11501 "01100111" // /* MW 3 */ + 11502 "00000001" // /* MW 2 */ + 11503 "00000000" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_464 +.src_ref 10 "softfloat.c" 749 12 first + 11504 "10000100" // JNZ r19, #11632 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11632 delay_slots=5 */ + 11505 "00000001" // /* MW 5 */ + 11506 "01000000" // /* MW 4 */ + 11507 "10111000" // /* MW 3 */ + 11508 "00010110" // /* MW 2 */ + 11509 "10011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11511 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11513 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11515 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11517 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11519 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 750 12 first + 11520 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11521 "00000000" // /* MW 3 */ + 11522 "00101000" // /* MW 2 */ + 11523 "00010000" // /* MW 1 */ +.delay_slot + 11524 "11111000" // MOV r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11525 "10100000" // /* MW 3 */ + 11526 "00010000" // /* MW 2 */ + 11527 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11529 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11531 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11533 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11535 "00000000" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_496 +.src_ref 10 "softfloat.c" 763 31 first +.tail_call + 11536 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */ + 11537 "00000000" // /* MW 5 */ + 11538 "00000000" // /* MW 4 */ + 11539 "01000000" // /* MW 3 */ + 11540 "00010100" // /* MW 2 */ + 11541 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11543 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11545 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11547 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11549 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11551 "00000000" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_512 +.src_ref 10 "softfloat.c" 777 22 first +.return_address + 11552 "10011000" // OR r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11553 "00000101" // /* MW 3 */ + 11554 "11100001" // /* MW 2 */ + 11555 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 777 12 + 11556 "10000100" // JNZ r16, #11648 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11648 delay_slots=5 */ + 11557 "00000001" // /* MW 5 */ + 11558 "01000000" // /* MW 4 */ + 11559 "11000000" // /* MW 3 */ + 11560 "00010110" // /* MW 2 */ + 11561 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11563 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11565 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11567 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11569 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11571 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 778 12 first + 11572 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11573 "00000000" // /* MW 3 */ + 11574 "00101000" // /* MW 2 */ + 11575 "00010000" // /* MW 1 */ +.delay_slot + 11576 "11111000" // MOV r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11577 "10100000" // /* MW 3 */ + 11578 "00010000" // /* MW 2 */ + 11579 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11581 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11583 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11585 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11586 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 11587 "00011100" // /* MW 13 */ + 11588 "00000000" // /* MW 12 */ + 11589 "00000000" // /* MW 11 */ + 11590 "01010111" // /* MW 10 */ + 11591 "00011010" // /* MW 9 */ + 11592 "01000000" // /* MW 8 */ + 11593 "00000000" // /* MW 7 */ + 11594 "00000000" // /* MW 6 */ + 11595 "10110110" // /* MW 5 */ + 11596 "00000010" // /* MW 4 */ + 11597 "11110000" // /* MW 3 */ + 11598 "00101100" // /* MW 2 */ + 11599 "00000000" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_560 +.src_ref 10 "softfloat.c" 780 25 first +.src_ref 10 "softfloat.c" 780 62 first + 11600 "10100100" // RET lr; ADD.NC r16, r19, r16 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 11601 "10000010" // /* MW 5 */ + 11602 "00110011" // /* MW 4 */ + 11603 "00001000" // /* MW 3 */ + 11604 "00000000" // /* MW 2 */ + 11605 "00000101" // /* MW 1 */ +.src_ref 10 "softfloat.c" 780 70 +.delay_slot + 11606 "00011000" // MOVX r17, #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11607 "11101001" // /* MW 3 */ + 11608 "11100010" // /* MW 2 */ + 11609 "00010111" // /* MW 1 */ +.src_ref 10 "softfloat.c" 780 70 +.delay_slot + 11610 "10011000" // LSHL r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11611 "00011101" // /* MW 3 */ + 11612 "00100001" // /* MW 2 */ + 11613 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 66 first +.delay_slot + 11614 "10011000" // ADD r0, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11615 "00000000" // /* MW 3 */ + 11616 "00000001" // /* MW 2 */ + 11617 "00010001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11619 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11620 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 11621 "10000001" // /* MW 11 */ + 11622 "10101101" // /* MW 10 */ + 11623 "00000000" // /* MW 9 */ + 11624 "00000000" // /* MW 8 */ + 11625 "00000000" // /* MW 7 */ + 11626 "00000000" // /* MW 6 */ + 11627 "00100000" // /* MW 5 */ + 11628 "00000000" // /* MW 4 */ + 11629 "11110000" // /* MW 3 */ + 11630 "00101100" // /* MW 2 */ + 11631 "00000000" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_592 +.src_ref 10 "softfloat.c" 749 31 first +.tail_call + 11632 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */ + 11633 "00000000" // /* MW 5 */ + 11634 "00000000" // /* MW 4 */ + 11635 "01000000" // /* MW 3 */ + 11636 "00010100" // /* MW 2 */ + 11637 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11639 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11641 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11643 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11645 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11647 "00000000" // /* MW 1 */ +.label TGT_F_ZL14addFloat32Sigsjji_608 +.src_ref 10 "softfloat.c" 777 38 first +.tail_call +.return_address + 11648 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */ + 11649 "00000000" // /* MW 5 */ + 11650 "00000000" // /* MW 4 */ + 11651 "01000000" // /* MW 3 */ + 11652 "00010100" // /* MW 2 */ + 11653 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11655 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11657 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11659 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11661 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL14addFloat32Sigsjji__end + 11663 "00000000" // /* MW 1 */ +.label _ZL14subFloat32Sigsjji +.function subFloat32Sigs _ZL14subFloat32Sigsjji +.src_ref 10 "softfloat.c" 70 13 +.src_ref 10 "softfloat.c" 81 14 +.src_ref 10 "softfloat.c" 805 first +.function_start + 11664 "10111010" // MOVA r17, #-23; MOVXM r16, #8388607 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11665 "10010000" // /* MW 9 */ + 11666 "11111111" // /* MW 8 */ + 11667 "00001111" // /* MW 7 */ + 11668 "11111110" // /* MW 6 */ + 11669 "00011111" // /* MW 5 */ + 11670 "00000000" // /* MW 4 */ + 11671 "00000000" // /* MW 3 */ + 11672 "00110001" // /* MW 2 */ + 11673 "11111101" // /* MW 1 */ +.src_ref 10 "softfloat.c" 81 14 first + 11674 "10011000" // LSHL r4, r2, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11675 "00011101" // /* MW 3 */ + 11676 "10001001" // /* MW 2 */ + 11677 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 81 14 + 11678 "10011000" // LSHL r18, r1, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11679 "00011101" // /* MW 3 */ + 11680 "01100101" // /* MW 2 */ + 11681 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 70 13 first + 11682 "10011000" // AND r20, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11683 "00000100" // /* MW 3 */ + 11684 "01101001" // /* MW 2 */ + 11685 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 81 21 first + 11686 "00011000" // EXTEND.u8 r25, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11687 "10010000" // /* MW 3 */ + 11688 "00110010" // /* MW 2 */ + 11689 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 81 21 + 11690 "00011000" // EXTEND.u8 r27, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11691 "10010000" // /* MW 3 */ + 11692 "10110110" // /* MW 2 */ + 11693 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 70 13 first +.src_ref 10 "softfloat.c" 816 9 +.src_ref 10 "softfloat.c" 817 9 + 11694 "01100100" // AND r16, r2, r16; MOV r19, #7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11695 "00011101" // /* MW 5 */ + 11696 "10100000" // /* MW 4 */ + 11697 "10011001" // /* MW 3 */ + 11698 "00100000" // /* MW 2 */ + 11699 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 816 9 first + 11700 "10011000" // LSHL r17, r20, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11701 "00111101" // /* MW 3 */ + 11702 "00100011" // /* MW 2 */ + 11703 "00010101" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 38 +.src_ref 10 "softfloat.c" 815 19 first +.src_ref 10 "softfloat.c" 818 11 +.src_ref 10 "softfloat.c" 819 17 +.src_ref 10 "softfloat.c" 843 31 + 11704 "01100100" // SUB r18, r27, r25; MOV r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11705 "00000001" // /* MW 5 */ + 11706 "00100000" // /* MW 4 */ + 11707 "00111100" // /* MW 3 */ + 11708 "10110010" // /* MW 2 */ + 11709 "11011100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 818 11 first + 11710 "10011000" // LT r5, r24, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11711 "00101010" // /* MW 3 */ + 11712 "00001011" // /* MW 2 */ + 11713 "00010110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 818 4 + 11714 "10000100" // JNZ r5, #11904 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11904 delay_slots=5 */ + 11715 "00000001" // /* MW 5 */ + 11716 "01000000" // /* MW 4 */ + 11717 "01000000" // /* MW 3 */ + 11718 "00010111" // /* MW 2 */ + 11719 "00101000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 817 9 first +.delay_slot + 11720 "10011000" // LSHL r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11721 "00111101" // /* MW 3 */ + 11722 "00100001" // /* MW 2 */ + 11723 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 833 14 +.src_ref 10 "softfloat.c" 851 14 +.src_ref 10 "softfloat.c" 859 13 +.src_ref 10 "softfloat.c" 862 9 +.delay_slot + 11724 "10111010" // MOVA r0, #255; MOVXM r4, #1073741824 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11725 "00010000" // /* MW 9 */ + 11726 "00000000" // /* MW 8 */ + 11727 "10001000" // /* MW 7 */ + 11728 "00000000" // /* MW 6 */ + 11729 "00000000" // /* MW 5 */ + 11730 "00010000" // /* MW 4 */ + 11731 "00000000" // /* MW 3 */ + 11732 "11100000" // /* MW 2 */ + 11733 "00011111" // /* MW 1 */ +.src_ref 10 "softfloat.c" 851 14 first +.delay_slot + 11734 "10011000" // EQ r20, r27, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11735 "00000111" // /* MW 3 */ + 11736 "11101000" // /* MW 2 */ + 11737 "00010110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 862 9 first +.delay_slot + 11738 "10011000" // OR r19, r17, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11739 "01000101" // /* MW 3 */ + 11740 "01100110" // /* MW 2 */ + 11741 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 859 13 first +.delay_slot + 11742 "10011000" // OR r4, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11743 "00000101" // /* MW 3 */ + 11744 "00001001" // /* MW 2 */ + 11745 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 819 17 first + 11746 "10011000" // GE r6, r18, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11747 "10001001" // /* MW 3 */ + 11748 "10001101" // /* MW 2 */ + 11749 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 819 4 + 11750 "10000100" // JNZ r6, #12064 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12064 delay_slots=5 */ + 11751 "00000001" // /* MW 5 */ + 11752 "01000000" // /* MW 4 */ + 11753 "10010000" // /* MW 3 */ + 11754 "00010111" // /* MW 2 */ + 11755 "00110000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 825 4 +.src_ref 10 "softfloat.c" 835 34 +.delay_slot + 11756 "00011000" // MOVX r5, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11757 "00000101" // /* MW 3 */ + 11758 "00001010" // /* MW 2 */ + 11759 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 835 34 first +.delay_slot + 11760 "10011000" // XOR r7, r3, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11761 "01010110" // /* MW 3 */ + 11762 "11001110" // /* MW 2 */ + 11763 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11765 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11767 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11769 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 833 14 first + 11770 "10011000" // EQ r20, r25, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11771 "00000111" // /* MW 3 */ + 11772 "01101000" // /* MW 2 */ + 11773 "00010110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 833 4 + 11774 "10000100" // JNZ r20, #12176 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12176 delay_slots=5 */ + 11775 "00000001" // /* MW 5 */ + 11776 "01000000" // /* MW 4 */ + 11777 "11001000" // /* MW 3 */ + 11778 "00010111" // /* MW 2 */ + 11779 "10100000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11781 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11783 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11785 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11787 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11789 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 868 11 + 11790 "11111000" // MOV r1, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11791 "10100000" // /* MW 3 */ + 11792 "01010011" // /* MW 2 */ + 11793 "00011000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 838 8 first + 11794 "00011000" // ADD r16, r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11795 "00000111" // /* MW 3 */ + 11796 "10100000" // /* MW 2 */ + 11797 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 837 4 first + 11798 "00011000" // SEL.EQZ r16, r16, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11799 "00100010" // /* MW 3 */ + 11800 "00100001" // /* MW 2 */ + 11801 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 843 31 first + 11802 "10011000" // SUB r16, r24, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11803 "00000001" // /* MW 3 */ + 11804 "00100001" // /* MW 2 */ + 11805 "00010110" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 46 4 first +.src_ref 10 "softfloat-macros" 46 15 first + 11806 "10000100" // JZ r16, #11872 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11872 delay_slots=5 */ + 11807 "00000001" // /* MW 5 */ + 11808 "00000000" // /* MW 4 */ + 11809 "00110000" // /* MW 3 */ + 11810 "00010111" // /* MW 2 */ + 11811 "10000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 837 4 first +.delay_slot + 11812 "00011000" // SEL.EQZ r17, r17, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11813 "00110010" // /* MW 3 */ + 11814 "01100011" // /* MW 2 */ + 11815 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11817 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11819 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11821 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11823 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 +.src_ref 10 "softfloat-macros" 50 38 first +.src_ref 10 "softfloat-macros" 50 48 + 11824 "10111010" // MOVA r20, #32; SUB r3, r24, r16; MOV r18, #31 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11825 "01011000" // /* MW 9 */ + 11826 "00011111" // /* MW 8 */ + 11827 "01001000" // /* MW 7 */ + 11828 "00001110" // /* MW 6 */ + 11829 "00111000" // /* MW 5 */ + 11830 "00110000" // /* MW 4 */ + 11831 "00000000" // /* MW 3 */ + 11832 "00010100" // /* MW 2 */ + 11833 "00000100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 48 + 11834 "10011000" // AND r18, r3, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11835 "00100100" // /* MW 3 */ + 11836 "11100101" // /* MW 2 */ + 11837 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 32 + 11838 "10011000" // LSHL r18, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11839 "00101101" // /* MW 3 */ + 11840 "01100101" // /* MW 2 */ + 11841 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 53 16 first + 11842 "00011000" // NEZ r19, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11843 "11110000" // /* MW 3 */ + 11844 "01100110" // /* MW 2 */ + 11845 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 first + 11846 "10011000" // LT r27, r16, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11847 "01001010" // /* MW 3 */ + 11848 "00110111" // /* MW 2 */ + 11849 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 15 first + 11850 "10011000" // LSHL r17, r17, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11851 "00111101" // /* MW 3 */ + 11852 "01100010" // /* MW 2 */ + 11853 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 57 + 11854 "00011000" // NEZ r18, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11855 "11110000" // /* MW 3 */ + 11856 "10100100" // /* MW 2 */ + 11857 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 25 + 11858 "10011000" // OR r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11859 "00100101" // /* MW 3 */ + 11860 "01100001" // /* MW 2 */ + 11861 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 9 first + 11862 "01111010" // NOPA; NOPS; SEL.EQZ r17, r19, r16, r27 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11863 "00000010" // /* MW 9 */ + 11864 "11100011" // /* MW 8 */ + 11865 "00000100" // /* MW 7 */ + 11866 "00000000" // /* MW 6 */ + 11867 "01011011" // /* MW 5 */ + 11868 "00000001" // /* MW 4 */ + 11869 "11110000" // /* MW 3 */ + 11870 "00101100" // /* MW 2 */ + 11871 "00000000" // /* MW 1 */ +.label __ll2__ZL14subFloat32Sigsjji + 11872 "10000100" // J #12032 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=12032 delay_slots=5 */ + 11873 "00000000" // /* MW 5 */ + 11874 "00000000" // /* MW 4 */ + 11875 "10000000" // /* MW 3 */ + 11876 "00010111" // /* MW 2 */ + 11877 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 846 16 first +.delay_slot + 11878 "10011000" // SUB r3, r4, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11879 "00010001" // /* MW 3 */ + 11880 "00000111" // /* MW 2 */ + 11881 "00010001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11883 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11885 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11887 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11888 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11889 "00000000" // /* MW 15 */ + 11890 "00000000" // /* MW 14 */ + 11891 "01111000" // /* MW 13 */ + 11892 "10100101" // /* MW 12 */ + 11893 "00000001" // /* MW 11 */ + 11894 "00000000" // /* MW 10 */ + 11895 "00000000" // /* MW 9 */ + 11896 "00000000" // /* MW 8 */ + 11897 "01011011" // /* MW 7 */ + 11898 "00000001" // /* MW 6 */ + 11899 "00100000" // /* MW 5 */ + 11900 "00000000" // /* MW 4 */ + 11901 "11110000" // /* MW 3 */ + 11902 "00101100" // /* MW 2 */ + 11903 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_240 +.src_ref 10 "softfloat.c" 851 4 first + 11904 "10000100" // JNZ r20, #12224 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12224 delay_slots=5 */ + 11905 "00000001" // /* MW 5 */ + 11906 "01000000" // /* MW 4 */ + 11907 "11100000" // /* MW 3 */ + 11908 "00010111" // /* MW 2 */ + 11909 "10100000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11911 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11913 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11915 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11917 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11919 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 867 4 + 11920 "11111000" // MOV r0, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11921 "10100000" // /* MW 3 */ + 11922 "00011101" // /* MW 2 */ + 11923 "00011000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 868 11 + 11924 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11925 "10100000" // /* MW 3 */ + 11926 "01010001" // /* MW 2 */ + 11927 "00011000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 855 4 +.src_ref 10 "softfloat.c" 855 14 + 11928 "11111000" // MOV r27, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11929 "10100000" // /* MW 3 */ + 11930 "11011100" // /* MW 2 */ + 11931 "00011110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 867 4 + 11932 "11111000" // MOV r25, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11933 "00100000" // /* MW 3 */ + 11934 "01010000" // /* MW 2 */ + 11935 "00011110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 856 8 first + 11936 "00011000" // ADD r17, r18, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11937 "11111111" // /* MW 3 */ + 11938 "10100011" // /* MW 2 */ + 11939 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 855 4 first +.src_ref 10 "softfloat.c" 855 14 first + 11940 "00011000" // SEL.EQZ r17, r17, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11941 "00100010" // /* MW 3 */ + 11942 "01100011" // /* MW 2 */ + 11943 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 46 4 first +.src_ref 10 "softfloat-macros" 46 15 first + 11944 "10000100" // JZ r17, #12016 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12016 delay_slots=5 */ + 11945 "00000001" // /* MW 5 */ + 11946 "00000000" // /* MW 4 */ + 11947 "01111000" // /* MW 3 */ + 11948 "00010111" // /* MW 2 */ + 11949 "10001000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 855 4 first +.src_ref 10 "softfloat.c" 855 14 first +.delay_slot + 11950 "00011000" // SEL.EQZ r16, r16, r4, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11951 "01000010" // /* MW 3 */ + 11952 "00100000" // /* MW 2 */ + 11953 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11954 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11955 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11956 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11957 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11959 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11961 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 +.src_ref 10 "softfloat-macros" 50 38 first +.src_ref 10 "softfloat-macros" 50 48 + 11962 "10111010" // MOVA r3, #32; SUB r18, r24, r17; MOV r20, #31 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11963 "01011000" // /* MW 9 */ + 11964 "00011111" // /* MW 8 */ + 11965 "10001000" // /* MW 7 */ + 11966 "10001110" // /* MW 6 */ + 11967 "00101000" // /* MW 5 */ + 11968 "00110001" // /* MW 4 */ + 11969 "00000000" // /* MW 3 */ + 11970 "00000011" // /* MW 2 */ + 11971 "00000100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 48 + 11972 "10011000" // AND r20, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11973 "01000100" // /* MW 3 */ + 11974 "10101001" // /* MW 2 */ + 11975 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 32 + 11976 "10011000" // LSHL r20, r16, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11977 "01001101" // /* MW 3 */ + 11978 "00101001" // /* MW 2 */ + 11979 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 15 + 11980 "10011000" // LSHL r18, r16, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11981 "00101101" // /* MW 3 */ + 11982 "00100101" // /* MW 2 */ + 11983 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 20 first + 11984 "10011000" // LT r27, r17, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11985 "00111010" // /* MW 3 */ + 11986 "01110110" // /* MW 2 */ + 11987 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 57 first + 11988 "00011000" // NEZ r20, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11989 "11110000" // /* MW 3 */ + 11990 "00101000" // /* MW 2 */ + 11991 "00010101" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 53 16 first + 11992 "00011000" // NEZ r16, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11993 "11110000" // /* MW 3 */ + 11994 "00100000" // /* MW 2 */ + 11995 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 50 25 first + 11996 "10011000" // OR r17, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11997 "01000101" // /* MW 3 */ + 11998 "10100011" // /* MW 2 */ + 11999 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat-macros" 49 9 first + 12000 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r16, r16, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12001 "00000000" // /* MW 15 */ + 12002 "00000000" // /* MW 14 */ + 12003 "01111000" // /* MW 13 */ + 12004 "10100101" // /* MW 12 */ + 12005 "00000001" // /* MW 11 */ + 12006 "10010000" // /* MW 10 */ + 12007 "00001000" // /* MW 9 */ + 12008 "00100001" // /* MW 8 */ + 12009 "01011011" // /* MW 7 */ + 12010 "00000001" // /* MW 6 */ + 12011 "00100000" // /* MW 5 */ + 12012 "00000000" // /* MW 4 */ + 12013 "11110000" // /* MW 3 */ + 12014 "00101100" // /* MW 2 */ + 12015 "00000000" // /* MW 1 */ +.label __ll1__ZL14subFloat32Sigsjji +.src_ref 10 "softfloat.c" 864 16 first + 12016 "11100001" // NOPA; NOPB; NOPS; SUB r3, r19, r16; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12017 "00000000" // /* MW 15 */ + 12018 "00000000" // /* MW 14 */ + 12019 "01111000" // /* MW 13 */ + 12020 "10100101" // /* MW 12 */ + 12021 "00000001" // /* MW 11 */ + 12022 "00001100" // /* MW 10 */ + 12023 "00111000" // /* MW 9 */ + 12024 "00100110" // /* MW 8 */ + 12025 "01011011" // /* MW 7 */ + 12026 "00000001" // /* MW 6 */ + 12027 "00100000" // /* MW 5 */ + 12028 "00000000" // /* MW 4 */ + 12029 "11110000" // /* MW 3 */ + 12030 "00101100" // /* MW 2 */ + 12031 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_368 +.src_ref 10 "softfloat.c" 868 11 first +.tail_call + 12032 "10000100" // J #10880 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10880 delay_slots=5 */ + 12033 "00000000" // /* MW 5 */ + 12034 "00000000" // /* MW 4 */ + 12035 "01000000" // /* MW 3 */ + 12036 "00010101" // /* MW 2 */ + 12037 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 867 4 first +.delay_slot + 12038 "00011000" // ADD r2, r25, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12039 "11111111" // /* MW 3 */ + 12040 "01000101" // /* MW 2 */ + 12041 "00010110" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12043 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12045 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12047 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12048 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12049 "00000000" // /* MW 15 */ + 12050 "00000000" // /* MW 14 */ + 12051 "01111000" // /* MW 13 */ + 12052 "10100101" // /* MW 12 */ + 12053 "00000001" // /* MW 11 */ + 12054 "00000000" // /* MW 10 */ + 12055 "00000000" // /* MW 9 */ + 12056 "00000000" // /* MW 8 */ + 12057 "01011011" // /* MW 7 */ + 12058 "00000001" // /* MW 6 */ + 12059 "00100000" // /* MW 5 */ + 12060 "00000000" // /* MW 4 */ + 12061 "11110000" // /* MW 3 */ + 12062 "00101100" // /* MW 2 */ + 12063 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_400 +.src_ref 10 "softfloat.c" 820 4 first +.return_address + 12064 "10000100" // JNZ r20, #12256 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12256 delay_slots=5 */ + 12065 "00000001" // /* MW 5 */ + 12066 "01000000" // /* MW 4 */ + 12067 "11110000" // /* MW 3 */ + 12068 "00010111" // /* MW 2 */ + 12069 "10100000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12071 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12073 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12075 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12077 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12078 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12079 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 829 14 first + 12080 "10011000" // LTU r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12081 "00011100" // /* MW 3 */ + 12082 "00100111" // /* MW 2 */ + 12083 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 829 4 + 12084 "10000100" // JNZ r19, #12304 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12304 delay_slots=5 */ + 12085 "00000001" // /* MW 5 */ + 12086 "01000000" // /* MW 4 */ + 12087 "00001000" // /* MW 3 */ + 12088 "00011000" // /* MW 2 */ + 12089 "10011000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 825 4 first +.delay_slot + 12090 "00011000" // SEL.EQZ r24, r5, r25, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12091 "10010010" // /* MW 3 */ + 12092 "01110001" // /* MW 2 */ + 12093 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 825 4 +.delay_slot + 12094 "11111000" // MOV r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12095 "10100000" // /* MW 3 */ + 12096 "10011101" // /* MW 2 */ + 12097 "00011100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 825 4 +.delay_slot + 12098 "00011000" // SEL.EQZ r25, r5, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12099 "00100010" // /* MW 3 */ + 12100 "01110011" // /* MW 2 */ + 12101 "00010001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12103 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12105 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 830 14 first + 12106 "10011000" // LTU r18, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12107 "00001100" // /* MW 3 */ + 12108 "01100101" // /* MW 2 */ + 12109 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 830 4 + 12110 "10000100" // JNZ r18, #12336 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12336 delay_slots=5 */ + 12111 "00000001" // /* MW 5 */ + 12112 "01000000" // /* MW 4 */ + 12113 "00011000" // /* MW 3 */ + 12114 "00011000" // /* MW 2 */ + 12115 "10010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12117 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12119 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12121 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12123 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12125 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 31 + 12126 "00011000" // MOVX r16, #31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12127 "01111101" // /* MW 3 */ + 12128 "00100000" // /* MW 2 */ + 12129 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 831 24 + 12130 "01000100" // MOVXM p0, #509172 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12131 "11101000" // /* MW 5 */ + 12132 "11001001" // /* MW 4 */ + 12133 "11000000" // /* MW 3 */ + 12134 "00000111" // /* MW 2 */ + 12135 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 831 24 first + 12136 "10011000" // LDA r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12137 "01010110" // /* MW 3 */ + 12138 "00000110" // /* MW 2 */ + 12139 "00000000" // /* MW 1 */ +.swstall __RAW__R_1948 + 12140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12141 "00000000" // /* MW 1 */ +.swstall __RAW__R_1948 + 12142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12143 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 831 4 + 12144 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12145 "00000000" // /* MW 3 */ + 12146 "00101000" // /* MW 2 */ + 12147 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 831 44 +.delay_slot + 12148 "00011000" // MOVX r17, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12149 "00001101" // /* MW 3 */ + 12150 "00100010" // /* MW 2 */ + 12151 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12152 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12153 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12155 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 831 44 +.delay_slot + 12156 "10011000" // EQ r17, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12157 "00100111" // /* MW 3 */ + 12158 "01100011" // /* MW 2 */ + 12159 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 31 first +.delay_slot + 12160 "11100001" // NOPA; NOPB; NOPS; LSHL r0, r17, r16; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12161 "00000000" // /* MW 15 */ + 12162 "00000000" // /* MW 14 */ + 12163 "01111000" // /* MW 13 */ + 12164 "10100101" // /* MW 12 */ + 12165 "00000001" // /* MW 11 */ + 12166 "01101100" // /* MW 10 */ + 12167 "00001000" // /* MW 9 */ + 12168 "00100010" // /* MW 8 */ + 12169 "01011011" // /* MW 7 */ + 12170 "00000001" // /* MW 6 */ + 12171 "00100000" // /* MW 5 */ + 12172 "00000000" // /* MW 4 */ + 12173 "11110000" // /* MW 3 */ + 12174 "00101100" // /* MW 2 */ + 12175 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_512 +.src_ref 10 "softfloat.c" 834 8 first + 12176 "10000100" // JNZ r16, #12368 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12368 delay_slots=5 */ + 12177 "00000001" // /* MW 5 */ + 12178 "01000000" // /* MW 4 */ + 12179 "00101000" // /* MW 3 */ + 12180 "00011000" // /* MW 2 */ + 12181 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12183 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12185 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12187 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12189 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12191 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 31 + 12192 "00011000" // MOVX r16, #31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12193 "01111101" // /* MW 3 */ + 12194 "00100000" // /* MW 2 */ + 12195 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 835 8 first + 12196 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12197 "00000000" // /* MW 3 */ + 12198 "00101000" // /* MW 2 */ + 12199 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 31 first +.delay_slot + 12200 "10011000" // LSHL r16, r7, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12201 "00001101" // /* MW 3 */ + 12202 "11100001" // /* MW 2 */ + 12203 "00010001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 38 +.delay_slot + 12204 "01000100" // MOVXM r17, #2139095040 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12205 "00000000" // /* MW 5 */ + 12206 "10100000" // /* MW 4 */ + 12207 "00001000" // /* MW 3 */ + 12208 "10000000" // /* MW 2 */ + 12209 "01111111" // /* MW 1 */ +.src_ref 10 "softfloat.c" 128 38 +.delay_slot + 12210 "10011000" // ADD r0, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12211 "00000000" // /* MW 3 */ + 12212 "01000001" // /* MW 2 */ + 12213 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12215 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12216 "00100010" // NOPA; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12217 "00011100" // /* MW 7 */ + 12218 "00000000" // /* MW 6 */ + 12219 "00000000" // /* MW 5 */ + 12220 "00000100" // /* MW 4 */ + 12221 "11110000" // /* MW 3 */ + 12222 "00101100" // /* MW 2 */ + 12223 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_560 +.src_ref 10 "softfloat.c" 852 8 first + 12224 "10000100" // JNZ r17, #12384 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12384 delay_slots=5 */ + 12225 "00000001" // /* MW 5 */ + 12226 "01000000" // /* MW 4 */ + 12227 "00110000" // /* MW 3 */ + 12228 "00011000" // /* MW 2 */ + 12229 "10001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12231 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12233 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12234 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12235 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12236 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12237 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12238 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12239 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 853 8 first + 12240 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12241 "00000000" // /* MW 3 */ + 12242 "00101000" // /* MW 2 */ + 12243 "00010000" // /* MW 1 */ +.delay_slot + 12244 "11111000" // MOV r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12245 "10100000" // /* MW 3 */ + 12246 "00010000" // /* MW 2 */ + 12247 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12248 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12249 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12251 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12253 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12255 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_592 +.src_ref 10 "softfloat.c" 821 18 first + 12256 "10011000" // OR r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12257 "00000101" // /* MW 3 */ + 12258 "01100001" // /* MW 2 */ + 12259 "00010100" // /* MW 1 */ +.src_ref 10 "softfloat.c" 821 8 + 12260 "10000100" // JNZ r16, #12400 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12400 delay_slots=5 */ + 12261 "00000001" // /* MW 5 */ + 12262 "01000000" // /* MW 4 */ + 12263 "00111000" // /* MW 3 */ + 12264 "00011000" // /* MW 2 */ + 12265 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12267 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12269 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12271 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12273 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12275 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 823 8 first + 12276 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12277 "00000000" // /* MW 3 */ + 12278 "00101000" // /* MW 2 */ + 12279 "00010000" // /* MW 1 */ +.delay_slot + 12280 "01000100" // MOVXM r0, #2147483647 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12281 "11111110" // /* MW 5 */ + 12282 "00111111" // /* MW 4 */ + 12283 "11110000" // /* MW 3 */ + 12284 "11111111" // /* MW 2 */ + 12285 "01111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12287 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12289 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12291 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12292 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12293 "10000001" // /* MW 11 */ + 12294 "10101101" // /* MW 10 */ + 12295 "00000000" // /* MW 9 */ + 12296 "00000000" // /* MW 8 */ + 12297 "00000000" // /* MW 7 */ + 12298 "00000000" // /* MW 6 */ + 12299 "00100000" // /* MW 5 */ + 12300 "00000000" // /* MW 4 */ + 12301 "11110000" // /* MW 3 */ + 12302 "00101100" // /* MW 2 */ + 12303 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_640 + 12304 "10000100" // J #12016 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=12016 delay_slots=5 */ + 12305 "00000000" // /* MW 5 */ + 12306 "00000000" // /* MW 4 */ + 12307 "01111000" // /* MW 3 */ + 12308 "00010111" // /* MW 2 */ + 12309 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 868 11 +.delay_slot + 12310 "11111000" // MOV r1, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12311 "10100000" // /* MW 3 */ + 12312 "01010001" // /* MW 2 */ + 12313 "00011000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 864 16 +.delay_slot + 12314 "11111000" // MOV r19, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12315 "10100000" // /* MW 3 */ + 12316 "11011000" // /* MW 2 */ + 12317 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12318 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12319 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12321 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12322 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 12323 "00011100" // /* MW 13 */ + 12324 "00000000" // /* MW 12 */ + 12325 "00000000" // /* MW 11 */ + 12326 "01010111" // /* MW 10 */ + 12327 "00011010" // /* MW 9 */ + 12328 "01000000" // /* MW 8 */ + 12329 "00000000" // /* MW 7 */ + 12330 "00000000" // /* MW 6 */ + 12331 "10110110" // /* MW 5 */ + 12332 "00000010" // /* MW 4 */ + 12333 "11110000" // /* MW 3 */ + 12334 "00101100" // /* MW 2 */ + 12335 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_672 + 12336 "10000100" // J #11872 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11872 delay_slots=5 */ + 12337 "00000000" // /* MW 5 */ + 12338 "00000000" // /* MW 4 */ + 12339 "00110000" // /* MW 3 */ + 12340 "00010111" // /* MW 2 */ + 12341 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 846 16 +.delay_slot + 12342 "11111000" // MOV r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12343 "00100000" // /* MW 3 */ + 12344 "00011000" // /* MW 2 */ + 12345 "00011001" // /* MW 1 */ +.src_ref 10 "softfloat.c" 867 4 +.delay_slot + 12346 "11111000" // MOV r25, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12347 "00100000" // /* MW 3 */ + 12348 "01011100" // /* MW 2 */ + 12349 "00011110" // /* MW 1 */ +.src_ref 10 "softfloat.c" 868 11 +.delay_slot + 12350 "11111000" // MOV r1, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12351 "10100000" // /* MW 3 */ + 12352 "01010011" // /* MW 2 */ + 12353 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12355 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12356 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12357 "10000001" // /* MW 11 */ + 12358 "10101101" // /* MW 10 */ + 12359 "00000000" // /* MW 9 */ + 12360 "00000000" // /* MW 8 */ + 12361 "00000000" // /* MW 7 */ + 12362 "00000000" // /* MW 6 */ + 12363 "00100000" // /* MW 5 */ + 12364 "00000000" // /* MW 4 */ + 12365 "11110000" // /* MW 3 */ + 12366 "00101100" // /* MW 2 */ + 12367 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_704 +.src_ref 10 "softfloat.c" 834 27 first +.tail_call + 12368 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */ + 12369 "00000000" // /* MW 5 */ + 12370 "00000000" // /* MW 4 */ + 12371 "01000000" // /* MW 3 */ + 12372 "00010100" // /* MW 2 */ + 12373 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12375 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12377 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12379 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12381 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12383 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_720 +.src_ref 10 "softfloat.c" 852 27 first +.tail_call +.return_address + 12384 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */ + 12385 "00000000" // /* MW 5 */ + 12386 "00000000" // /* MW 4 */ + 12387 "01000000" // /* MW 3 */ + 12388 "00010100" // /* MW 2 */ + 12389 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12391 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12393 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12394 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12395 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12397 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12399 "00000000" // /* MW 1 */ +.label TGT_F_ZL14subFloat32Sigsjji_736 +.src_ref 10 "softfloat.c" 821 34 first +.tail_call +.return_address + 12400 "10000100" // J #10368 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10368 delay_slots=5 */ + 12401 "00000000" // /* MW 5 */ + 12402 "00000000" // /* MW 4 */ + 12403 "01000000" // /* MW 3 */ + 12404 "00010100" // /* MW 2 */ + 12405 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12407 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12408 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12409 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12410 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12411 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12413 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL14subFloat32Sigsjji__end + 12415 "00000000" // /* MW 1 */ +.label float32_add +.function float32_add float32_add +.src_ref 10 "softfloat.c" 92 12 +.src_ref 10 "softfloat.c" 878 first +.function_start + 12416 "00011000" // MOVX r16, #-31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12417 "10000101" // /* MW 3 */ + 12418 "11100000" // /* MW 2 */ + 12419 "00010111" // /* MW 1 */ +.src_ref 10 "softfloat.c" 92 12 first + 12420 "10011000" // LSHL r3, r1, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12421 "00001101" // /* MW 3 */ + 12422 "01000111" // /* MW 2 */ + 12423 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 92 12 + 12424 "10011000" // LSHL r16, r2, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12425 "00001101" // /* MW 3 */ + 12426 "10100001" // /* MW 2 */ + 12427 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 884 15 first + 12428 "10011000" // EQ r16, r3, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12429 "00000111" // /* MW 3 */ + 12430 "11100001" // /* MW 2 */ + 12431 "00010000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 884 4 + 12432 "10000100" // JNZ r16, #12464 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12464 delay_slots=5 */ + 12433 "00000001" // /* MW 5 */ + 12434 "01000000" // /* MW 4 */ + 12435 "01011000" // /* MW 3 */ + 12436 "00011000" // /* MW 2 */ + 12437 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12439 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12441 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12443 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12445 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12447 "00000000" // /* MW 1 */ +.src_ref 10 "softfloat.c" 888 15 first +.tail_call + 12448 "10000100" // J #11664 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=11664 delay_slots=5 */ + 12449 "00000000" // /* MW 5 */ + 12450 "00000000" // /* MW 4 */ + 12451 "11001000" // /* MW 3 */ + 12452 "00010110" // /* MW 2 */ + 12453 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12455 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12457 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12459 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12461 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12463 "00000000" // /* MW 1 */ +.label TGT_Ffloat32_add_48 +.src_ref 10 "softfloat.c" 885 15 first +.tail_call +.return_address + 12464 "10000100" // J #11040 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=11040 delay_slots=5 */ + 12465 "00000000" // /* MW 5 */ + 12466 "00000000" // /* MW 4 */ + 12467 "10010000" // /* MW 3 */ + 12468 "00010101" // /* MW 2 */ + 12469 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12471 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12473 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12475 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12477 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label float32_add__end + 12479 "00000000" // /* MW 1 */ +.dir 0 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable2/src" +.dir 1 "/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer" +.dir 2 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common" +.dir 3 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc" +.dir 4 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail" +.dir 5 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2" +.dir 6 "/usr/local/lib/python3.10/dist-packages/data/aie2p/lib" +.dir 7 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p" +.dir 8 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend" +.dir 9 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21708/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib" +.dir 10 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21708/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib/softfloat" diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/Release/0_2_reloadable8.txt b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/Release/0_2_reloadable8.txt new file mode 100644 index 0000000000000000000000000000000000000000..2d1d5946a6747db932adeab9e7d141d4fd318d32 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/Release/0_2_reloadable8.txt @@ -0,0 +1,2975 @@ +Contents of the .debug_line section: + +sigmoid_carf_templated_lut.h: +File name Line number Starting address View Stmt + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 218 0xa10 x +reduce_base_c8.h 220 0xa10 1 x +reduce_base_c8.h 290 0xa10 2 +reduce_base_c8.h 348 0xa10 3 +reduce_base_c8.h 287 0xa1c +reduce_base_c8.h 287 0xa1c 1 +reduce_base_c8.h 348 0xa1c 2 x +reduce_base_c8.h 236 0xa26 +reduce_base_c8.h 293 0xa26 1 +reduce_base_c8.h 298 0xa26 2 +reduce_base_c8.h 299 0xa26 3 +reduce_base_c8.h 300 0xa26 4 +reduce_base_c8.h 326 0xa26 5 +reduce_base_c8.h 276 0xa30 +reduce_base_c8.h 301 0xa30 1 +reduce_base_c8.h 305 0xa30 2 +reduce_base_c8.h 218 0xa3a +reduce_base_c8.h 280 0xa3a 1 +reduce_base_c8.h 312 0xa3a 2 +reduce_base_c8.h 298 0xa44 x +reduce_base_c8.h 220 0xa4a x +reduce_base_c8.h 221 0xa4e x +reduce_base_c8.h 221 0xa5e +reduce_base_c8.h 301 0xa5e 1 x +reduce_base_c8.h 222 0xa64 x +reduce_base_c8.h 293 0xa64 1 x +reduce_base_c8.h 301 0xa64 2 +reduce_base_c8.h 290 0xa6e x +reduce_base_c8.h 293 0xa72 x +reduce_base_c8.h 290 0xa76 x +reduce_base_c8.h 300 0xa76 1 x +reduce_base_c8.h 222 0xa82 x +reduce_base_c8.h 287 0xa82 1 x +reduce_base_c8.h 223 0xa88 x +reduce_base_c8.h 312 0xa88 1 x +reduce_base_c8.h 305 0xa92 x +reduce_base_c8.h 312 0xa96 x +reduce_base_c8.h 299 0xa9a x +reduce_base_c8.h 276 0xa9e x +reduce_base_c8.h 299 0xa9e 1 +reduce_base_c8.h 276 0xaa4 +reduce_base_c8.h 301 0xaa8 x +reduce_base_c8.h 223 0xaac x +reduce_base_c8.h 236 0xaac 1 x +reduce_base_c8.h 224 0xab2 x +reduce_base_c8.h 224 0xac2 +reduce_base_c8.h 318 0xac2 1 +reduce_base_c8.h 225 0xaca x +reduce_base_c8.h 225 0xada +reduce_base_c8.h 318 0xada 1 +reduce_base_c8.h 226 0xae2 x +reduce_base_c8.h 236 0xae8 x +reduce_base_c8.h 312 0xaee x +reduce_base_c8.h 318 0xaf2 x +reduce_base_c8.h 300 0xaf6 x +reduce_base_c8.h 305 0xaf6 1 x +reduce_base_c8.h 280 0xafc x +reduce_base_c8.h 226 0xb00 x +reduce_base_c8.h 318 0xb00 1 x +reduce_base_c8.h 236 0xb06 +reduce_base_c8.h 236 0xb0a x +reduce_base_c8.h 236 0xb0e +reduce_base_c8.h 242 0xb1c x +reduce_base_c8.h 236 0xb20 +reduce_base_c8.h 236 0xb24 x +reduce_base_c8.h 236 0xb28 +reduce_base_c8.h 236 0xb36 +reduce_base_c8.h 236 0xb3a +reduce_base_c8.h 236 0xb3e +reduce_base_c8.h 329 0xb54 +reduce_base_c8.h 236 0xb60 +reduce_base_c8.h 236 0xb64 +reduce_base_c8.h 236 0xb68 +reduce_base_c8.h 236 0xb76 +reduce_base_c8.h 316 0xb76 1 +reduce_base_c8.h 329 0xb76 2 +reduce_base_c8.h 236 0xb7a +reduce_base_c8.h 236 0xb7e +reduce_base_c8.h 236 0xb8e +reduce_base_c8.h 236 0xb92 +reduce_base_c8.h 286 0xba2 x +reduce_base_c8.h 289 0xba2 1 +reduce_base_c8.h 291 0xba2 2 +reduce_base_c8.h 291 0xba2 3 +reduce_base_c8.h 287 0xbba x +reduce_base_c8.h 288 0xbca x +reduce_base_c8.h 289 0xbda x +reduce_base_c8.h 290 0xbea x +reduce_base_c8.h 291 0xbfa x +reduce_base_c8.h 292 0xc0e x +reduce_base_c8.h 293 0xc12 x +reduce_base_c8.h 274 0xc20 x +reduce_base_c8.h 275 0xc20 1 +reduce_base_c8.h 275 0xc20 2 +reduce_base_c8.h 275 0xc2a x +reduce_base_c8.h 279 0xc2a 1 +reduce_base_c8.h 275 0xc3e +reduce_base_c8.h 276 0xc4e x +reduce_base_c8.h 275 0xc5e x +reduce_base_c8.h 277 0xc5e 1 x +reduce_base_c8.h 278 0xc6e x +reduce_base_c8.h 279 0xc7e x +reduce_base_c8.h 279 0xc8c +reduce_base_c8.h 281 0xc94 x +reduce_base_c8.h 280 0xc98 x +reduce_base_c8.h 236 0xca0 +reduce_base_c8.h 301 0xca0 1 +reduce_base_c8.h 302 0xca0 2 +reduce_base_c8.h 236 0xca6 x +reduce_base_c8.h 236 0xcaa +reduce_base_c8.h 298 0xcb0 +reduce_base_c8.h 303 0xcb0 1 +reduce_base_c8.h 310 0xcb0 2 +reduce_base_c8.h 311 0xcb0 3 +reduce_base_c8.h 236 0xcbc +reduce_base_c8.h 236 0xcc0 +reduce_base_c8.h 236 0xcc4 +reduce_base_c8.h 310 0xcd4 x +reduce_base_c8.h 312 0xcd4 1 x +reduce_base_c8.h 315 0xcd4 2 +reduce_base_c8.h 313 0xcde +reduce_base_c8.h 317 0xcde 1 +reduce_base_c8.h 315 0xce8 +reduce_base_c8.h 317 0xce8 1 x +reduce_base_c8.h 311 0xcf6 x +reduce_base_c8.h 312 0xd06 x +reduce_base_c8.h 313 0xd16 x +reduce_base_c8.h 315 0xd1a x +reduce_base_c8.h 316 0xd2a x +reduce_base_c8.h 317 0xd2e x +reduce_base_c8.h 298 0xd50 x +reduce_base_c8.h 301 0xd50 1 +reduce_base_c8.h 301 0xd50 2 x +reduce_base_c8.h 302 0xd5a +reduce_base_c8.h 303 0xd5a 1 +reduce_base_c8.h 306 0xd5a 2 +reduce_base_c8.h 302 0xd64 x +reduce_base_c8.h 302 0xd68 +reduce_base_c8.h 306 0xd68 1 x +reduce_base_c8.h 299 0xd74 x +reduce_base_c8.h 300 0xd84 x +reduce_base_c8.h 301 0xd94 x +reduce_base_c8.h 302 0xda4 x +reduce_base_c8.h 303 0xdb4 x +reduce_base_c8.h 304 0xdc4 x +reduce_base_c8.h 305 0xdc8 x +reduce_base_c8.h 326 0xde0 x +reduce_base_c8.h 329 0xde0 1 +reduce_base_c8.h 329 0xde6 +reduce_base_c8.h 330 0xde6 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 139 0xde6 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 331 0xdf0 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 140 0xdf0 1 x +reduce_mean_c8_impl.h 141 0xdf6 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 326 0xdfe x +reduce_base_c8.h 327 0xe0e x +reduce_base_c8.h 327 0xe1a +reduce_base_c8.h 328 0xe1a 1 +reduce_base_c8.h 328 0xe20 x +reduce_base_c8.h 329 0xe24 x +reduce_base_c8.h 329 0xe32 +reduce_base_c8.h 329 0xe36 +reduce_base_c8.h 330 0xe36 1 +reduce_base_c8.h 329 0xe3c +reduce_base_c8.h 330 0xe48 x +reduce_base_c8.h 331 0xe58 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 139 0xe68 x +reduce_mean_c8_impl.h 140 0xe78 x +reduce_mean_c8_impl.h 141 0xe88 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 349 0xe8c x +reduce_base_c8.h 349 0xe90 +reduce_base_c8.h 262 0xea0 x +reduce_base_c8.h 263 0xea0 1 +reduce_base_c8.h 263 0xeaa +reduce_base_c8.h 263 0xeaa 1 x +reduce_base_c8.h 267 0xeaa 2 +reduce_base_c8.h 265 0xeb4 +reduce_base_c8.h 329 0xeb4 1 +reduce_base_c8.h 265 0xebe x +reduce_base_c8.h 265 0xec2 +reduce_base_c8.h 267 0xec2 1 x +reduce_base_c8.h 265 0xec6 x +reduce_base_c8.h 265 0xec6 1 x +reduce_base_c8.h 263 0xecc x +reduce_base_c8.h 263 0xed0 +reduce_base_c8.h 264 0xede x +reduce_base_c8.h 265 0xeee x +reduce_base_c8.h 266 0xefe x +reduce_base_c8.h 267 0xf0e x +reduce_base_c8.h 267 0xf1c +reduce_base_c8.h 267 0xf20 +reduce_base_c8.h 270 0xf24 +reduce_base_c8.h 268 0xf28 x +reduce_base_c8.h 269 0xf30 x +reduce_base_c8.h 270 0xf30 1 x +reduce_base_c8.h 250 0xf40 +reduce_base_c8.h 250 0xf40 1 x +reduce_base_c8.h 255 0xf40 2 +reduce_base_c8.h 255 0xf4a +reduce_base_c8.h 255 0xf4a 1 +reduce_base_c8.h 255 0xf4a 2 +reduce_base_c8.h 255 0xf4a 3 x +reduce_base_c8.h 255 0xf54 +reduce_base_c8.h 255 0xf54 1 +reduce_base_c8.h 329 0xf54 2 +reduce_base_c8.h 251 0xf62 x +reduce_base_c8.h 252 0xf72 x +reduce_base_c8.h 253 0xf82 x +reduce_base_c8.h 254 0xf92 x +reduce_base_c8.h 255 0xfa2 x +reduce_base_c8.h 255 0xfb0 +reduce_base_c8.h 255 0xfb0 1 +reduce_base_c8.h 256 0xfb8 x +reduce_base_c8.h 257 0xfbc x +reduce_base_c8.h 238 0xfc0 x +reduce_base_c8.h 239 0xfd0 x +reduce_base_c8.h 240 0xfe0 x +reduce_base_c8.h 241 0xfea +reduce_base_c8.h 241 0xfea 1 +reduce_base_c8.h 241 0xff2 x +reduce_base_c8.h 241 0xff8 +reduce_base_c8.h 241 0xffe +reduce_base_c8.h 241 0x1002 +reduce_base_c8.h 241 0x1002 1 +reduce_base_c8.h 241 0x1002 2 +reduce_base_c8.h 241 0x1002 3 +reduce_base_c8.h 242 0x100c x +reduce_base_c8.h 243 0x101a +reduce_base_c8.h 243 0x101e x +reduce_base_c8.h 243 0x102c +reduce_base_c8.h 243 0x102c 1 +reduce_base_c8.h 243 0x102c 2 +reduce_base_c8.h 243 0x102c 3 +reduce_base_c8.h 244 0x1036 x +reduce_base_c8.h 245 0x103a x +reduce_base_c8.h 329 0x103a 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h: +pad_3d.h 266 0x1050 x +pad_3d.h 465 0x1050 1 x +pad_3d.h 468 0x1050 2 x +pad_3d.h 471 0x1050 3 +pad_3d.h 479 0x1050 4 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 950 0x105a + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h: +pad_3d.h 469 0x105a 1 x +pad_3d.h 478 0x105a 2 +pad_3d.h 499 0x105a 3 +pad_3d.h 511 0x105a 4 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 950 0x1064 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h: +pad_3d.h 470 0x1064 1 x +pad_3d.h 486 0x1064 2 +pad_3d.h 498 0x1064 3 +pad_3d.h 499 0x1064 4 +pad_3d.h 509 0x1064 5 +pad_3d.h 517 0x1064 6 +pad_3d.h 471 0x106e x +pad_3d.h 472 0x1072 x +pad_3d.h 473 0x1076 x +pad_3d.h 475 0x107a x +pad_3d.h 479 0x107e x +pad_3d.h 477 0x1082 x +pad_3d.h 478 0x1086 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 56 0x108a x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h: +pad_3d.h 485 0x1090 x +pad_3d.h 485 0x1094 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 998 0x1098 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h: +pad_3d.h 486 0x109c x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/../detail/array_helpers.hpp: +array_helpers.hpp 950 0x10a0 x +array_helpers.hpp 950 0x10a4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h: +pad_3d.h 486 0x10a4 1 x +pad_3d.h 486 0x10aa +pad_3d.h 487 0x10b0 +pad_3d.h 486 0x10bc +pad_3d.h 486 0x10c2 +pad_3d.h 486 0x10c8 +pad_3d.h 487 0x1130 x +pad_3d.h 495 0x1140 +pad_3d.h 495 0x1140 1 x +pad_3d.h 498 0x1140 2 +pad_3d.h 499 0x1140 3 x +pad_3d.h 495 0x114a +pad_3d.h 496 0x114a 1 x +pad_3d.h 495 0x1150 x +pad_3d.h 495 0x1154 +pad_3d.h 498 0x1154 1 x +pad_3d.h 499 0x115a x +pad_3d.h 498 0x115e x +pad_3d.h 498 0x1162 +pad_3d.h 499 0x1162 1 x +pad_3d.h 499 0x1168 +pad_3d.h 499 0x116c + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x117c + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x117c 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h: +pad_3d.h 499 0x117c 2 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x1186 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1186 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h: +pad_3d.h 499 0x1186 2 +pad_3d.h 499 0x1190 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x1200 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1200 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h: +pad_3d.h 514 0x1210 +pad_3d.h 514 0x1216 x +pad_3d.h 514 0x121a +pad_3d.h 514 0x121e +pad_3d.h 511 0x1222 x +pad_3d.h 509 0x1226 x +pad_3d.h 515 0x122a x +pad_3d.h 509 0x122e x +pad_3d.h 509 0x1232 +pad_3d.h 514 0x1232 1 +pad_3d.h 517 0x1232 2 x +pad_3d.h 509 0x1238 x +pad_3d.h 509 0x123c +pad_3d.h 517 0x123c 1 x +pad_3d.h 517 0x1242 +pad_3d.h 514 0x124c x +pad_3d.h 514 0x1250 +pad_3d.h 515 0x1254 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x1258 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1258 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h: +pad_3d.h 517 0x1258 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x1262 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1262 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h: +pad_3d.h 517 0x1262 2 +pad_3d.h 517 0x126c + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x12d0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x12d0 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h: +pad_3d.h 282 0x12e0 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 352 0x12f0 +reduce_base_c8.h 362 0x12f0 1 x +reduce_base_c8.h 365 0x12f0 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 200 0x12f0 3 +reduce_mean_c8_impl.h 223 0x12f0 4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 365 0x12f4 x +reduce_base_c8.h 367 0x12fc x +reduce_base_c8.h 367 0x130c +reduce_base_c8.h 367 0x130c 1 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 101 0x1312 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 80 0x1312 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp: +blend.hpp 170 0x1312 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 372 0x1312 3 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 80 0x1316 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 362 0x131e +reduce_base_c8.h 372 0x1324 +reduce_base_c8.h 372 0x1328 x +reduce_base_c8.h 372 0x1338 +reduce_base_c8.h 372 0x133c + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1342 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 374 0x1342 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x134e + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 372 0x134e 1 +reduce_base_c8.h 374 0x134e 2 +reduce_base_c8.h 372 0x135a +reduce_base_c8.h 372 0x1360 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x13d0 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 374 0x13d0 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 57 0x13e0 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 388 0x13e0 1 +reduce_base_c8.h 412 0x13e0 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 268 0x13e0 3 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 388 0x13e4 x +reduce_base_c8.h 388 0x13e8 +reduce_base_c8.h 388 0x13e8 1 +reduce_base_c8.h 388 0x13ee +reduce_base_c8.h 570 0x13ee 1 +reduce_base_c8.h 570 0x13ee 2 +reduce_base_c8.h 570 0x13ee 3 +reduce_base_c8.h 570 0x13f4 x +reduce_base_c8.h 594 0x13f4 1 +reduce_base_c8.h 570 0x13fa +reduce_base_c8.h 594 0x13fa 1 x +reduce_base_c8.h 594 0x1400 +reduce_base_c8.h 594 0x1404 +reduce_base_c8.h 388 0x1408 +reduce_base_c8.h 595 0x1408 1 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x140e +aie_core.h 73 0x140e 1 +aie_core.h 90 0x140e 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x140e 3 +vector.hpp 1139 0x140e 4 +vector.hpp 1159 0x140e 5 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x140e 6 +accum.hpp 198 0x140e 7 +accum.hpp 198 0x140e 8 +accum.hpp 943 0x140e 9 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 388 0x140e 10 +reduce_base_c8.h 596 0x140e 11 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x1418 +aie_core.h 90 0x1418 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1418 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x1418 3 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 388 0x1418 4 x +reduce_base_c8.h 570 0x1418 5 +reduce_base_c8.h 570 0x1418 6 +reduce_base_c8.h 570 0x1418 7 +reduce_base_c8.h 570 0x1418 8 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x1424 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 109 0x1424 1 +me_vmult_float_emulated.h 109 0x1424 2 +me_vmult_float_emulated.h 111 0x1424 3 +me_vmult_float_emulated.h 111 0x1424 4 +me_vmult_float_emulated.h 113 0x1424 5 +me_vmult_float_emulated.h 113 0x1424 6 +me_vmult_float_emulated.h 115 0x1424 7 +me_vmult_float_emulated.h 115 0x1424 8 +me_vmult_float_emulated.h 117 0x1424 9 +me_vmult_float_emulated.h 117 0x1424 10 +me_vmult_float_emulated.h 118 0x1424 11 +me_vmult_float_emulated.h 118 0x1424 12 +me_vmult_float_emulated.h 118 0x1424 13 +me_vmult_float_emulated.h 118 0x1424 14 +me_vmult_float_emulated.h 119 0x1424 15 +me_vmult_float_emulated.h 119 0x1424 16 +me_vmult_float_emulated.h 119 0x1424 17 +me_vmult_float_emulated.h 119 0x1424 18 +me_vmult_float_emulated.h 120 0x1424 19 +me_vmult_float_emulated.h 120 0x1424 20 +me_vmult_float_emulated.h 120 0x1424 21 +me_vmult_float_emulated.h 120 0x1424 22 +me_vmult_float_emulated.h 121 0x1424 23 +me_vmult_float_emulated.h 121 0x1424 24 +me_vmult_float_emulated.h 121 0x1424 25 +me_vmult_float_emulated.h 121 0x1424 26 +me_vmult_float_emulated.h 122 0x1424 27 +me_vmult_float_emulated.h 122 0x1424 28 +me_vmult_float_emulated.h 122 0x1424 29 +me_vmult_float_emulated.h 122 0x1424 30 +me_vmult_float_emulated.h 123 0x1424 31 +me_vmult_float_emulated.h 123 0x1424 32 +me_vmult_float_emulated.h 123 0x1424 33 +me_vmult_float_emulated.h 123 0x1424 34 +me_vmult_float_emulated.h 124 0x1424 35 +me_vmult_float_emulated.h 124 0x1424 36 +me_vmult_float_emulated.h 124 0x1424 37 +me_vmult_float_emulated.h 124 0x1424 38 +me_vmult_float_emulated.h 125 0x1424 39 +me_vmult_float_emulated.h 125 0x1424 40 +me_vmult_float_emulated.h 125 0x1424 41 +me_vmult_float_emulated.h 125 0x1424 42 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1424 43 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x1424 44 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add.hpp: +add.hpp 28 0x1424 45 +add.hpp 28 0x1424 46 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x1424 47 +add_reduce.hpp 324 0x1424 48 +add_reduce.hpp 324 0x1424 49 +add_reduce.hpp 324 0x1424 50 +add_reduce.hpp 324 0x1424 51 +add_reduce.hpp 324 0x1424 52 +add_reduce.hpp 324 0x1424 53 +add_reduce.hpp 324 0x1424 54 +add_reduce.hpp 324 0x1424 55 +add_reduce.hpp 324 0x1424 56 +add_reduce.hpp 324 0x1424 57 +add_reduce.hpp 324 0x1424 58 +add_reduce.hpp 324 0x1424 59 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1424 60 +add_accum.hpp 19 0x1424 61 +add_accum.hpp 19 0x1424 62 +add_accum.hpp 19 0x1424 63 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 388 0x1424 64 +reduce_base_c8.h 595 0x1424 65 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x1430 +aie_core.h 73 0x1430 1 +aie_core.h 73 0x1430 2 +aie_core.h 73 0x1430 3 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1430 4 +vector.hpp 1139 0x1430 5 +vector.hpp 1139 0x1430 6 +vector.hpp 1159 0x1430 7 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x1430 8 +accum.hpp 198 0x1430 9 +accum.hpp 198 0x1430 10 +accum.hpp 198 0x1430 11 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 570 0x1430 12 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 200 0x1430 13 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x143c x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x143c 1 +vector.hpp 1139 0x143c 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x143c 3 +accum.hpp 198 0x143c 4 x +accum.hpp 943 0x143c 5 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 570 0x143c 6 +reduce_base_c8.h 570 0x143c 7 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 90 0x1446 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1446 1 +vector.hpp 1139 0x1446 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x1446 3 +accum.hpp 198 0x1446 4 +accum.hpp 943 0x1446 5 +accum.hpp 943 0x1446 6 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 570 0x1446 7 +reduce_base_c8.h 570 0x1446 8 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 90 0x1450 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 391 0x1450 1 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 90 0x1456 +aie_core.h 90 0x1456 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1456 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x1456 3 +accum.hpp 943 0x1456 4 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 90 0x145c +aie_core.h 90 0x145c 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x145c 2 +vector.hpp 1139 0x145c 3 +vector.hpp 1139 0x145c 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x145c 5 +accum.hpp 198 0x145c 6 +accum.hpp 198 0x145c 7 x +accum.hpp 943 0x145c 8 +accum.hpp 943 0x145c 9 +accum.hpp 943 0x145c 10 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 570 0x145c 11 x +reduce_base_c8.h 570 0x145c 12 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 90 0x1468 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1468 1 +vector.hpp 1139 0x1468 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x1468 3 +accum.hpp 198 0x1468 4 +accum.hpp 943 0x1468 5 +accum.hpp 943 0x1468 6 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1468 7 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 570 0x1468 8 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 90 0x1472 +aie_core.h 90 0x1472 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1472 2 +vector.hpp 1139 0x1472 3 +vector.hpp 1139 0x1472 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x1472 5 +accum.hpp 198 0x1472 6 +accum.hpp 198 0x1472 7 x +accum.hpp 943 0x1472 8 +accum.hpp 943 0x1472 9 +accum.hpp 943 0x1472 10 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 570 0x1472 11 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x147c +aie_core.h 90 0x147c 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x147c 2 +vector.hpp 1159 0x147c 3 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x147c 4 +accum.hpp 198 0x147c 5 +accum.hpp 943 0x147c 6 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1482 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x1482 1 x +accum.hpp 943 0x1482 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1482 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 570 0x1482 4 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x148a x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x148a 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x148a 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1490 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 391 0x1490 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x149a x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x149a 1 x +accum.hpp 943 0x149a 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 412 0x149a 3 +reduce_base_c8.h 570 0x149a 4 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x14a0 +aie_core.h 73 0x14a0 1 +aie_core.h 73 0x14a0 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x14a0 3 +vector.hpp 1159 0x14a0 4 +vector.hpp 1159 0x14a0 5 +vector.hpp 1285 0x14a0 6 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x14a0 7 +accum.hpp 153 0x14a0 8 +accum.hpp 153 0x14a0 9 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 322 0x14a0 10 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x14a0 11 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x14b0 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x14b0 1 x +vector.hpp 1159 0x14b0 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x14b0 3 +accum.hpp 198 0x14b0 4 x +accum.hpp 943 0x14b0 5 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x14b0 6 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 391 0x14b0 7 x +reduce_base_c8.h 570 0x14b0 8 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 90 0x14c0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x14c0 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x14c0 2 +accum.hpp 943 0x14c0 3 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x14c4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x14c4 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x14c4 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x14c4 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x14d0 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x14d0 1 +accum.hpp 943 0x14d0 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 570 0x14d0 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x14f0 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x1500 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1500 1 x +vector.hpp 1159 0x1500 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x1500 3 x +accum.hpp 198 0x1500 4 x +accum.hpp 943 0x1500 5 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 570 0x1500 6 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1510 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1520 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x1520 1 x +accum.hpp 943 0x1520 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1520 3 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 570 0x1520 4 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 107 0x1530 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 412 0x1530 1 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 90 0x153a x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 107 0x153a 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x153a 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x153a 3 x +accum.hpp 943 0x153a 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x153a 5 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 101 0x1544 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x154a x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x154e x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x154e 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x154e 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1554 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 412 0x1554 1 x +reduce_base_c8.h 412 0x155c +reduce_base_c8.h 412 0x1560 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 73 0x156c x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x156c 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 153 0x156c 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 80 0x1572 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 184 0x1572 1 x +reduce_mean_c8_impl.h 184 0x1572 2 +reduce_mean_c8_impl.h 184 0x1584 +reduce_mean_c8_impl.h 184 0x1588 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 322 0x158e +add_reduce.hpp 322 0x158e 1 +add_reduce.hpp 322 0x158e 2 +add_reduce.hpp 322 0x158e 3 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 184 0x158e 4 +reduce_mean_c8_impl.h 184 0x159a +reduce_mean_c8_impl.h 184 0x159e + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp: +blend.hpp 170 0x15ae +blend.hpp 170 0x15b4 +blend.hpp 163 0x15ba + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 109 0x15c0 +me_vmult_float_emulated.h 111 0x15c0 1 +me_vmult_float_emulated.h 113 0x15c0 2 +me_vmult_float_emulated.h 115 0x15c0 3 +me_vmult_float_emulated.h 117 0x15c0 4 +me_vmult_float_emulated.h 118 0x15c0 5 +me_vmult_float_emulated.h 118 0x15c0 6 +me_vmult_float_emulated.h 119 0x15c0 7 +me_vmult_float_emulated.h 119 0x15c0 8 +me_vmult_float_emulated.h 120 0x15c0 9 +me_vmult_float_emulated.h 120 0x15c0 10 +me_vmult_float_emulated.h 121 0x15c0 11 +me_vmult_float_emulated.h 121 0x15c0 12 +me_vmult_float_emulated.h 122 0x15c0 13 +me_vmult_float_emulated.h 122 0x15c0 14 +me_vmult_float_emulated.h 123 0x15c0 15 +me_vmult_float_emulated.h 123 0x15c0 16 +me_vmult_float_emulated.h 124 0x15c0 17 +me_vmult_float_emulated.h 124 0x15c0 18 +me_vmult_float_emulated.h 125 0x15c0 19 +me_vmult_float_emulated.h 125 0x15c0 20 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add.hpp: +add.hpp 28 0x15c0 21 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x15c0 22 +add_reduce.hpp 324 0x15c0 23 +add_reduce.hpp 324 0x15c0 24 +add_reduce.hpp 324 0x15c0 25 +add_reduce.hpp 324 0x15c0 26 +add_reduce.hpp 324 0x15c0 27 +add_reduce.hpp 324 0x15c0 28 +add_reduce.hpp 324 0x15c0 29 +add_reduce.hpp 324 0x15c0 30 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 200 0x15c0 31 +reduce_mean_c8_impl.h 200 0x15c0 32 +reduce_mean_c8_impl.h 200 0x15c0 33 +reduce_mean_c8_impl.h 223 0x15c0 34 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1285 0x15cc + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 200 0x15cc 1 x +reduce_mean_c8_impl.h 200 0x15e0 +reduce_mean_c8_impl.h 223 0x15f0 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1285 0x15fc +vector.hpp 1289 0x15fc 1 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 112 0x1608 +me_vmult_float_emulated.h 112 0x1608 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 57 0x1608 2 +vector.hpp 57 0x1608 3 +vector.hpp 1280 0x1608 4 +vector.hpp 1285 0x1608 5 +vector.hpp 1287 0x1608 6 +vector.hpp 1288 0x1608 7 +vector.hpp 1289 0x1608 8 +vector.hpp 1292 0x1608 9 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 223 0x1608 10 x +reduce_mean_c8_impl.h 268 0x1608 11 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x1614 +vector.hpp 915 0x1614 1 +vector.hpp 1280 0x1614 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 322 0x161e +add_reduce.hpp 322 0x161e 1 +add_reduce.hpp 322 0x161e 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 223 0x161e 3 x +reduce_mean_c8_impl.h 223 0x1628 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1286 0x1632 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 108 0x1638 +me_vmult_float_emulated.h 108 0x1638 1 +me_vmult_float_emulated.h 109 0x1638 2 +me_vmult_float_emulated.h 110 0x1638 3 +me_vmult_float_emulated.h 110 0x1638 4 +me_vmult_float_emulated.h 111 0x1638 5 +me_vmult_float_emulated.h 111 0x1638 6 +me_vmult_float_emulated.h 111 0x1638 7 +me_vmult_float_emulated.h 112 0x1638 8 +me_vmult_float_emulated.h 112 0x1638 9 +me_vmult_float_emulated.h 113 0x1638 10 +me_vmult_float_emulated.h 114 0x1638 11 +me_vmult_float_emulated.h 114 0x1638 12 +me_vmult_float_emulated.h 115 0x1638 13 +me_vmult_float_emulated.h 115 0x1638 14 +me_vmult_float_emulated.h 115 0x1638 15 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1286 0x1638 16 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1108 0x1638 17 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 112 0x163c x +me_vmult_float_emulated.h 112 0x163c 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 223 0x163c 2 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 108 0x1644 +me_vmult_float_emulated.h 108 0x1644 1 +me_vmult_float_emulated.h 109 0x1644 2 +me_vmult_float_emulated.h 110 0x1644 3 +me_vmult_float_emulated.h 110 0x1644 4 +me_vmult_float_emulated.h 111 0x1644 5 +me_vmult_float_emulated.h 111 0x1644 6 +me_vmult_float_emulated.h 111 0x1644 7 +me_vmult_float_emulated.h 113 0x1644 8 +me_vmult_float_emulated.h 114 0x1644 9 +me_vmult_float_emulated.h 114 0x1644 10 +me_vmult_float_emulated.h 115 0x1644 11 +me_vmult_float_emulated.h 115 0x1644 12 +me_vmult_float_emulated.h 115 0x1644 13 +me_vmult_float_emulated.h 108 0x1648 +me_vmult_float_emulated.h 108 0x1648 1 +me_vmult_float_emulated.h 109 0x1648 2 +me_vmult_float_emulated.h 110 0x1648 3 +me_vmult_float_emulated.h 110 0x1648 4 +me_vmult_float_emulated.h 111 0x1648 5 +me_vmult_float_emulated.h 111 0x1648 6 +me_vmult_float_emulated.h 111 0x1648 7 +me_vmult_float_emulated.h 113 0x1648 8 x +me_vmult_float_emulated.h 115 0x1648 9 +me_vmult_float_emulated.h 115 0x1648 10 +me_vmult_float_emulated.h 115 0x1648 11 +me_vmult_float_emulated.h 108 0x1650 +me_vmult_float_emulated.h 108 0x1650 1 +me_vmult_float_emulated.h 109 0x1650 2 +me_vmult_float_emulated.h 110 0x1650 3 +me_vmult_float_emulated.h 110 0x1650 4 +me_vmult_float_emulated.h 111 0x1650 5 +me_vmult_float_emulated.h 111 0x1650 6 +me_vmult_float_emulated.h 111 0x1650 7 +me_vmult_float_emulated.h 113 0x165c +me_vmult_float_emulated.h 114 0x165c 1 x +me_vmult_float_emulated.h 114 0x165c 2 x +me_vmult_float_emulated.h 115 0x1662 x +me_vmult_float_emulated.h 115 0x1670 +me_vmult_float_emulated.h 115 0x1670 1 +me_vmult_float_emulated.h 115 0x1670 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 322 0x1670 3 +add_reduce.hpp 322 0x1670 4 +add_reduce.hpp 322 0x1670 5 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1680 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 226 0x1680 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1280 0x168a +vector.hpp 1280 0x168e x +vector.hpp 1285 0x1692 x +vector.hpp 1285 0x1692 1 x +vector.hpp 1285 0x1698 +vector.hpp 1286 0x169c x +vector.hpp 1285 0x16a0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 322 0x16a0 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x16a6 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 142 0x16aa x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x16aa 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 142 0x16ae + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x16ae 1 x +accum.hpp 199 0x16ba x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add.hpp: +add.hpp 28 0x16ba 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 199 0x16c2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 142 0x16c6 x +vector.hpp 243 0x16c6 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x16c6 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x16ce x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 322 0x16d2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x16d6 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x16d6 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x16de +accum.hpp 151 0x16e2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 243 0x16e6 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 151 0x16e6 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 322 0x16ea x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x16ee x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x16ee 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x16f6 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x16fa +add_reduce.hpp 322 0x16fe x +add_reduce.hpp 324 0x1702 x +add_reduce.hpp 324 0x1702 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x170a x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 322 0x170e x +add_reduce.hpp 324 0x170e 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x1716 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x171a x +add_reduce.hpp 322 0x171e x +add_reduce.hpp 324 0x171e 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x1726 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x172a x +add_reduce.hpp 322 0x172e x +add_reduce.hpp 324 0x172e 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x1736 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x173a x +add_reduce.hpp 322 0x173e x +add_reduce.hpp 324 0x1742 x +add_reduce.hpp 324 0x1742 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x174a x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 322 0x174e x +add_reduce.hpp 324 0x174e 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x1756 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x175a x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x175e x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x1762 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x1766 x +vector.hpp 1288 0x1766 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x176c x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x1770 x +vector.hpp 1287 0x1770 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 80 0x1770 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp: +blend.hpp 163 0x1776 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 853 0x177a x +vector.hpp 853 0x177e +vector.hpp 142 0x1782 x +vector.hpp 1413 0x1782 1 x +vector.hpp 142 0x1786 +vector.hpp 1413 0x1786 1 +vector.hpp 142 0x178a +vector.hpp 1413 0x178a 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp: +blend.hpp 170 0x178e x +blend.hpp 170 0x1792 +blend.hpp 170 0x1796 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 108 0x179a +me_vmult_float_emulated.h 108 0x179a 1 +me_vmult_float_emulated.h 108 0x179e +me_vmult_float_emulated.h 108 0x179e 1 +me_vmult_float_emulated.h 109 0x179e 2 +me_vmult_float_emulated.h 110 0x179e 3 +me_vmult_float_emulated.h 110 0x179e 4 +me_vmult_float_emulated.h 111 0x179e 5 +me_vmult_float_emulated.h 111 0x179e 6 +me_vmult_float_emulated.h 111 0x179e 7 +me_vmult_float_emulated.h 108 0x17a2 x +me_vmult_float_emulated.h 108 0x17a2 1 x +me_vmult_float_emulated.h 109 0x17a2 2 x +me_vmult_float_emulated.h 108 0x17aa +me_vmult_float_emulated.h 108 0x17aa 1 +me_vmult_float_emulated.h 109 0x17aa 2 +me_vmult_float_emulated.h 110 0x17aa 3 +me_vmult_float_emulated.h 110 0x17aa 4 +me_vmult_float_emulated.h 111 0x17aa 5 +me_vmult_float_emulated.h 111 0x17aa 6 +me_vmult_float_emulated.h 111 0x17aa 7 +me_vmult_float_emulated.h 109 0x17ae + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1285 0x17ae 1 +vector.hpp 1285 0x17ae 2 x +vector.hpp 1289 0x17ae 3 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 120 0x17b8 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1285 0x17b8 1 +vector.hpp 1289 0x17b8 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 108 0x17c0 +me_vmult_float_emulated.h 108 0x17c0 1 +me_vmult_float_emulated.h 109 0x17c0 2 +me_vmult_float_emulated.h 110 0x17c0 3 +me_vmult_float_emulated.h 110 0x17c0 4 +me_vmult_float_emulated.h 111 0x17c0 5 +me_vmult_float_emulated.h 111 0x17c0 6 +me_vmult_float_emulated.h 111 0x17c0 7 +me_vmult_float_emulated.h 124 0x17c0 8 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1285 0x17c0 9 x +vector.hpp 1289 0x17c0 10 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 125 0x17ca x +me_vmult_float_emulated.h 109 0x17d2 x +me_vmult_float_emulated.h 110 0x17d2 1 x +me_vmult_float_emulated.h 110 0x17d2 2 x +me_vmult_float_emulated.h 111 0x17d8 x +me_vmult_float_emulated.h 111 0x17e6 +me_vmult_float_emulated.h 111 0x17e6 1 +me_vmult_float_emulated.h 111 0x17e6 2 +me_vmult_float_emulated.h 117 0x17ec x +me_vmult_float_emulated.h 118 0x17f0 x +me_vmult_float_emulated.h 119 0x17fa x +me_vmult_float_emulated.h 117 0x17fe x +me_vmult_float_emulated.h 118 0x1802 x +me_vmult_float_emulated.h 118 0x1806 +me_vmult_float_emulated.h 122 0x1810 x +me_vmult_float_emulated.h 118 0x1814 x +me_vmult_float_emulated.h 119 0x1818 x +me_vmult_float_emulated.h 119 0x181c +me_vmult_float_emulated.h 121 0x1826 x +me_vmult_float_emulated.h 119 0x182a x +me_vmult_float_emulated.h 120 0x182e x +me_vmult_float_emulated.h 120 0x1832 +me_vmult_float_emulated.h 123 0x183c x +me_vmult_float_emulated.h 120 0x1840 x +me_vmult_float_emulated.h 121 0x1844 x +me_vmult_float_emulated.h 121 0x1848 +me_vmult_float_emulated.h 121 0x1854 +me_vmult_float_emulated.h 122 0x1858 x +me_vmult_float_emulated.h 122 0x185c +me_vmult_float_emulated.h 122 0x1868 +me_vmult_float_emulated.h 123 0x186c x +me_vmult_float_emulated.h 123 0x1870 +me_vmult_float_emulated.h 123 0x187c +me_vmult_float_emulated.h 124 0x1880 x +me_vmult_float_emulated.h 124 0x1884 +me_vmult_float_emulated.h 124 0x1890 +me_vmult_float_emulated.h 125 0x1894 x +me_vmult_float_emulated.h 125 0x1898 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1286 0x18a4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1108 0x18a4 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1286 0x18aa +vector.hpp 1289 0x18ae x +vector.hpp 57 0x18b4 x +vector.hpp 1292 0x18b4 1 x +vector.hpp 57 0x18c0 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 268 0x18c0 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/reduce_base_c8.h: +reduce_base_c8.h 352 0x18f0 x +reduce_base_c8.h 352 0x18f4 +reduce_base_c8.h 352 0x18fe +reduce_base_c8.h 353 0x1902 x +reduce_base_c8.h 352 0x190e x +reduce_base_c8.h 352 0x1912 +reduce_base_c8.h 420 0x1920 +reduce_base_c8.h 353 0x1928 x +reduce_base_c8.h 420 0x192c x +reduce_base_c8.h 420 0x1938 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp: +blend.hpp 163 0x1950 +blend.hpp 170 0x195a + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 184 0x1970 +reduce_mean_c8_impl.h 184 0x1974 x +reduce_mean_c8_impl.h 184 0x1978 +reduce_mean_c8_impl.h 184 0x1988 +reduce_mean_c8_impl.h 184 0x198c +reduce_mean_c8_impl.h 184 0x1990 +reduce_mean_c8_impl.h 200 0x1996 +reduce_mean_c8_impl.h 200 0x19b0 x +reduce_mean_c8_impl.h 202 0x19b0 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp: +blend.hpp 170 0x19ba + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 202 0x19ba 1 x +reduce_mean_c8_impl.h 202 0x19c0 +reduce_mean_c8_impl.h 200 0x19ce x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x19d2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 199 0x19d2 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 206 0x19d2 2 x +reduce_mean_c8_impl.h 206 0x19d2 3 +reduce_mean_c8_impl.h 209 0x19d2 4 +reduce_mean_c8_impl.h 206 0x19de +reduce_mean_c8_impl.h 206 0x19de 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x19ea x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 199 0x19ea 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 209 0x19ea 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 199 0x19f0 +accum.hpp 199 0x19f6 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add.hpp: +add.hpp 28 0x19f6 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 206 0x1a00 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1a10 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 199 0x1a10 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 209 0x1a10 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 150 0x1a50 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add.hpp: +add.hpp 28 0x1a60 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 199 0x1a70 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 108 0x1a80 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 322 0x1a80 1 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 108 0x1a8a x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x1a8a 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 322 0x1a8a 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp: +blend.hpp 170 0x1a94 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 150 0x1a9a x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 322 0x1aa0 +add_reduce.hpp 322 0x1aa4 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 108 0x1aa8 +me_vmult_float_emulated.h 108 0x1aa8 1 +me_vmult_float_emulated.h 109 0x1aa8 2 +me_vmult_float_emulated.h 110 0x1aa8 3 +me_vmult_float_emulated.h 110 0x1aa8 4 +me_vmult_float_emulated.h 111 0x1aa8 5 +me_vmult_float_emulated.h 111 0x1aa8 6 +me_vmult_float_emulated.h 111 0x1aa8 7 +me_vmult_float_emulated.h 112 0x1aa8 8 +me_vmult_float_emulated.h 112 0x1aa8 9 +me_vmult_float_emulated.h 113 0x1aa8 10 +me_vmult_float_emulated.h 114 0x1aa8 11 +me_vmult_float_emulated.h 114 0x1aa8 12 +me_vmult_float_emulated.h 115 0x1aa8 13 +me_vmult_float_emulated.h 115 0x1aa8 14 +me_vmult_float_emulated.h 115 0x1aa8 15 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1aa8 16 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x1aa8 17 x +accum.hpp 1108 0x1aa8 18 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x1aa8 19 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x1ab2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 80 0x1ab6 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 112 0x1aba +me_vmult_float_emulated.h 112 0x1aba 1 +me_vmult_float_emulated.h 113 0x1aba 2 +me_vmult_float_emulated.h 113 0x1ac0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x1ac4 x +add_reduce.hpp 322 0x1ac8 x +add_reduce.hpp 324 0x1ac8 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x1ad0 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 108 0x1ad4 +me_vmult_float_emulated.h 108 0x1ad4 1 +me_vmult_float_emulated.h 109 0x1ad4 2 +me_vmult_float_emulated.h 110 0x1ad4 3 +me_vmult_float_emulated.h 110 0x1ad4 4 +me_vmult_float_emulated.h 111 0x1ad4 5 +me_vmult_float_emulated.h 111 0x1ad4 6 +me_vmult_float_emulated.h 111 0x1ad4 7 +me_vmult_float_emulated.h 113 0x1ad4 8 +me_vmult_float_emulated.h 114 0x1ad4 9 +me_vmult_float_emulated.h 114 0x1ad4 10 +me_vmult_float_emulated.h 115 0x1ad4 11 +me_vmult_float_emulated.h 115 0x1ad4 12 +me_vmult_float_emulated.h 115 0x1ad4 13 +me_vmult_float_emulated.h 112 0x1ada x +me_vmult_float_emulated.h 112 0x1ada 1 x +me_vmult_float_emulated.h 113 0x1ae0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x1ae0 1 x +add_reduce.hpp 322 0x1ae8 x +add_reduce.hpp 324 0x1ae8 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x1af0 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 108 0x1af4 +me_vmult_float_emulated.h 108 0x1af4 1 +me_vmult_float_emulated.h 109 0x1af4 2 +me_vmult_float_emulated.h 110 0x1af4 3 +me_vmult_float_emulated.h 110 0x1af4 4 +me_vmult_float_emulated.h 111 0x1af4 5 +me_vmult_float_emulated.h 111 0x1af4 6 +me_vmult_float_emulated.h 111 0x1af4 7 +me_vmult_float_emulated.h 115 0x1af4 8 +me_vmult_float_emulated.h 115 0x1af4 9 +me_vmult_float_emulated.h 115 0x1af4 10 +me_vmult_float_emulated.h 113 0x1afc x +me_vmult_float_emulated.h 114 0x1afc 1 x +me_vmult_float_emulated.h 114 0x1afc 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x1b00 x +add_reduce.hpp 322 0x1b04 x +add_reduce.hpp 324 0x1b04 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 198 0x1b0c x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 108 0x1b10 +me_vmult_float_emulated.h 108 0x1b10 1 +me_vmult_float_emulated.h 109 0x1b10 2 +me_vmult_float_emulated.h 110 0x1b10 3 +me_vmult_float_emulated.h 110 0x1b10 4 +me_vmult_float_emulated.h 111 0x1b10 5 +me_vmult_float_emulated.h 111 0x1b10 6 +me_vmult_float_emulated.h 111 0x1b10 7 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 324 0x1b1a x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 915 0x1b1e x +vector.hpp 856 0x1b24 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp: +blend.hpp 170 0x1b28 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 108 0x1b2c +me_vmult_float_emulated.h 108 0x1b2c 1 +me_vmult_float_emulated.h 109 0x1b30 +me_vmult_float_emulated.h 110 0x1b30 1 +me_vmult_float_emulated.h 110 0x1b30 2 +me_vmult_float_emulated.h 111 0x1b30 3 +me_vmult_float_emulated.h 111 0x1b30 4 +me_vmult_float_emulated.h 111 0x1b30 5 +me_vmult_float_emulated.h 108 0x1b34 x +me_vmult_float_emulated.h 108 0x1b34 1 x +me_vmult_float_emulated.h 111 0x1b34 2 +me_vmult_float_emulated.h 111 0x1b34 3 +me_vmult_float_emulated.h 111 0x1b34 4 +me_vmult_float_emulated.h 109 0x1b3e x +me_vmult_float_emulated.h 124 0x1b42 x +me_vmult_float_emulated.h 109 0x1b4e x +me_vmult_float_emulated.h 110 0x1b4e 1 x +me_vmult_float_emulated.h 110 0x1b4e 2 x +me_vmult_float_emulated.h 115 0x1b52 x +me_vmult_float_emulated.h 111 0x1b56 x +me_vmult_float_emulated.h 115 0x1b62 x +me_vmult_float_emulated.h 115 0x1b62 1 x +me_vmult_float_emulated.h 115 0x1b62 2 x +me_vmult_float_emulated.h 111 0x1b66 x +me_vmult_float_emulated.h 111 0x1b66 1 x +me_vmult_float_emulated.h 111 0x1b66 2 x +me_vmult_float_emulated.h 117 0x1b6c x +me_vmult_float_emulated.h 118 0x1b70 x +me_vmult_float_emulated.h 119 0x1b7a x +me_vmult_float_emulated.h 117 0x1b7e x +me_vmult_float_emulated.h 118 0x1b82 x +me_vmult_float_emulated.h 118 0x1b86 +me_vmult_float_emulated.h 120 0x1b90 x +me_vmult_float_emulated.h 118 0x1b94 x +me_vmult_float_emulated.h 119 0x1b98 x +me_vmult_float_emulated.h 119 0x1b9c +me_vmult_float_emulated.h 121 0x1ba6 x +me_vmult_float_emulated.h 119 0x1baa x +me_vmult_float_emulated.h 120 0x1bae x +me_vmult_float_emulated.h 120 0x1bb2 +me_vmult_float_emulated.h 120 0x1bbe +me_vmult_float_emulated.h 121 0x1bc2 x +me_vmult_float_emulated.h 121 0x1bc6 +me_vmult_float_emulated.h 122 0x1bce x +me_vmult_float_emulated.h 121 0x1bd4 x +me_vmult_float_emulated.h 122 0x1bd8 x +me_vmult_float_emulated.h 122 0x1bdc +me_vmult_float_emulated.h 123 0x1be4 x +me_vmult_float_emulated.h 122 0x1bea x +me_vmult_float_emulated.h 123 0x1bee x +me_vmult_float_emulated.h 123 0x1bf2 +me_vmult_float_emulated.h 123 0x1bfe +me_vmult_float_emulated.h 124 0x1bfe 1 x +me_vmult_float_emulated.h 124 0x1c06 +me_vmult_float_emulated.h 125 0x1c06 1 x +me_vmult_float_emulated.h 125 0x1c14 +me_vmult_float_emulated.h 124 0x1c18 x +me_vmult_float_emulated.h 125 0x1c2a x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1c30 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1108 0x1c30 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp: +blend.hpp 163 0x1c40 +blend.hpp 170 0x1c4a + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 184 0x1c80 +reduce_mean_c8_impl.h 184 0x1c84 x +reduce_mean_c8_impl.h 184 0x1c88 +reduce_mean_c8_impl.h 184 0x1c9c +reduce_mean_c8_impl.h 184 0x1ca6 +reduce_mean_c8_impl.h 184 0x1caa +reduce_mean_c8_impl.h 184 0x1cba +reduce_mean_c8_impl.h 184 0x1cbe +reduce_mean_c8_impl.h 200 0x1cc4 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp: +blend.hpp 170 0x1ce0 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 57 0x1cea + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp: +blend.hpp 170 0x1cea 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 268 0x1cea 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp: +blend.hpp 163 0x1cf0 +blend.hpp 170 0x1d06 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 57 0x1d0c + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 268 0x1d0c 1 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/me_vmult_float_emulated.h: +me_vmult_float_emulated.h 108 0x1d20 +me_vmult_float_emulated.h 108 0x1d20 1 +me_vmult_float_emulated.h 109 0x1d20 2 +me_vmult_float_emulated.h 110 0x1d20 3 +me_vmult_float_emulated.h 110 0x1d20 4 +me_vmult_float_emulated.h 111 0x1d20 5 +me_vmult_float_emulated.h 111 0x1d20 6 +me_vmult_float_emulated.h 111 0x1d20 7 +me_vmult_float_emulated.h 112 0x1d20 8 +me_vmult_float_emulated.h 112 0x1d20 9 +me_vmult_float_emulated.h 113 0x1d20 10 +me_vmult_float_emulated.h 114 0x1d20 11 +me_vmult_float_emulated.h 114 0x1d20 12 +me_vmult_float_emulated.h 115 0x1d20 13 +me_vmult_float_emulated.h 115 0x1d20 14 +me_vmult_float_emulated.h 115 0x1d20 15 +me_vmult_float_emulated.h 109 0x1d2a +me_vmult_float_emulated.h 111 0x1d2a 1 +me_vmult_float_emulated.h 113 0x1d2a 2 +me_vmult_float_emulated.h 115 0x1d2a 3 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/add_reduce.hpp: +add_reduce.hpp 322 0x1d2a 4 +add_reduce.hpp 322 0x1d2a 5 +add_reduce.hpp 322 0x1d2a 6 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp: +blend.hpp 163 0x1d2a 7 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 57 0x1d34 +vector.hpp 57 0x1d34 1 +vector.hpp 1139 0x1d34 2 +vector.hpp 1280 0x1d34 3 +vector.hpp 1287 0x1d34 4 +vector.hpp 1288 0x1d34 5 +vector.hpp 1292 0x1d34 6 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp: +blend.hpp 170 0x1d34 7 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 226 0x1d34 8 +reduce_mean_c8_impl.h 268 0x1d34 9 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 57 0x1d3e + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp: +blend.hpp 170 0x1d3e 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 268 0x1d3e 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/../aie2/blend.hpp: +blend.hpp 170 0x1d44 +blend.hpp 170 0x1d48 +blend.hpp 170 0x1d5a + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 57 0x1d60 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/reduce_mean_c8_impl.h: +reduce_mean_c8_impl.h 268 0x1d60 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 472 0x1d70 +superkernels.cpp 472 0x1d70 1 x +superkernels.cpp 477 0x1d76 +superkernels.cpp 477 0x1d80 x +superkernels.cpp 474 0x1d8a x +superkernels.cpp 569 0x1d8a 1 +superkernels.cpp 474 0x1d94 +superkernels.cpp 477 0x1da4 x +superkernels.cpp 477 0x1da4 1 x +superkernels.cpp 474 0x1db6 +superkernels.cpp 474 0x1dbc x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x1dc0 +io_buffer_main.h 218 0x1dc0 1 +io_buffer_main.h 324 0x1dc0 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x1dc0 3 +tile.hpp 74 0x1dc0 4 +tile.hpp 74 0x1dcc x +tile.hpp 86 0x1dcc 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 483 0x1dd6 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x1dd6 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 480 0x1ddc x +superkernels.cpp 480 0x1de2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x1dec + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 481 0x1e00 +superkernels.cpp 487 0x1e00 1 +superkernels.cpp 481 0x1e0a +superkernels.cpp 481 0x1e0a 1 x +superkernels.cpp 481 0x1e14 +superkernels.cpp 481 0x1e14 1 +superkernels.cpp 481 0x1e1e +superkernels.cpp 482 0x1e1e 1 +superkernels.cpp 481 0x1e28 +superkernels.cpp 482 0x1e28 1 x +superkernels.cpp 481 0x1e32 x +superkernels.cpp 483 0x1e32 1 +superkernels.cpp 483 0x1e38 +superkernels.cpp 487 0x1e3c +superkernels.cpp 483 0x1e42 +superkernels.cpp 481 0x1e48 +superkernels.cpp 491 0x1e4c +superkernels.cpp 481 0x1e52 +superkernels.cpp 482 0x1e52 1 x +superkernels.cpp 481 0x1e5a x +superkernels.cpp 481 0x1e60 +superkernels.cpp 483 0x1e64 x +superkernels.cpp 487 0x1e68 x +superkernels.cpp 487 0x1e6c +superkernels.cpp 487 0x1e70 +superkernels.cpp 487 0x1e74 +superkernels.cpp 487 0x1e78 +superkernels.cpp 487 0x1e7c +superkernels.cpp 483 0x1e80 x +superkernels.cpp 487 0x1e84 x +superkernels.cpp 487 0x1e88 +superkernels.cpp 487 0x1e8c +superkernels.cpp 491 0x1e90 x +superkernels.cpp 491 0x1ea0 +superkernels.cpp 491 0x1ea4 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x1eaa +io_buffer_main.h 218 0x1eaa 1 +io_buffer_main.h 324 0x1eaa 2 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 491 0x1eb8 +superkernels.cpp 491 0x1ed6 +superkernels.cpp 491 0x1ef0 +superkernels.cpp 491 0x1f00 +superkernels.cpp 491 0x1f10 +superkernels.cpp 491 0x1f16 +superkernels.cpp 491 0x1f1a +superkernels.cpp 491 0x1f20 +superkernels.cpp 491 0x1f30 +superkernels.cpp 491 0x1f30 1 +superkernels.cpp 491 0x1f30 2 +superkernels.cpp 491 0x1f3a +superkernels.cpp 492 0x1f3a 1 +superkernels.cpp 492 0x1f3a 2 +superkernels.cpp 498 0x1f44 +superkernels.cpp 498 0x1f44 1 +superkernels.cpp 499 0x1f4e +superkernels.cpp 505 0x1f54 +superkernels.cpp 508 0x1f54 1 +superkernels.cpp 511 0x1f54 2 +superkernels.cpp 491 0x1f5c +superkernels.cpp 491 0x1f60 +superkernels.cpp 491 0x1f64 +superkernels.cpp 491 0x1f6a +superkernels.cpp 492 0x1f72 x +superkernels.cpp 494 0x1f82 x +superkernels.cpp 495 0x1f86 x +superkernels.cpp 496 0x1f8a x +superkernels.cpp 498 0x1f8e x +superkernels.cpp 498 0x1f9e +superkernels.cpp 499 0x1fa2 x +superkernels.cpp 499 0x1fb2 +superkernels.cpp 500 0x1fb6 x +superkernels.cpp 500 0x1fc2 +superkernels.cpp 500 0x1fd0 +superkernels.cpp 505 0x1fe0 +superkernels.cpp 508 0x1fe0 1 +superkernels.cpp 511 0x1fe0 2 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x1fea +io_buffer_main.h 218 0x1fea 1 +io_buffer_main.h 324 0x1fea 2 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 505 0x1ff0 x +superkernels.cpp 505 0x1ff0 1 +superkernels.cpp 505 0x2002 +superkernels.cpp 505 0x2006 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x200c x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 505 0x2018 +superkernels.cpp 505 0x201e x +superkernels.cpp 505 0x201e 1 +superkernels.cpp 505 0x2028 +superkernels.cpp 505 0x2030 +superkernels.cpp 505 0x2036 +superkernels.cpp 505 0x203c +superkernels.cpp 505 0x2040 +superkernels.cpp 505 0x2040 1 +superkernels.cpp 505 0x2046 +superkernels.cpp 505 0x2050 +superkernels.cpp 505 0x2050 1 +superkernels.cpp 505 0x2056 +superkernels.cpp 505 0x205a +superkernels.cpp 505 0x205a 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x206a +io_buffer_main.h 395 0x206a 1 +io_buffer_main.h 218 0x2070 x +io_buffer_main.h 218 0x2074 +io_buffer_main.h 218 0x2078 +io_buffer_main.h 235 0x207e x +io_buffer_main.h 218 0x208a x +io_buffer_main.h 218 0x208a 1 x +io_buffer_main.h 218 0x208e +io_buffer_main.h 395 0x209a x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 508 0x20a6 x +superkernels.cpp 508 0x20b0 +superkernels.cpp 522 0x20b0 1 +superkernels.cpp 558 0x20b0 2 +superkernels.cpp 508 0x20be +superkernels.cpp 508 0x20c2 +superkernels.cpp 508 0x20d2 +superkernels.cpp 508 0x20d8 +superkernels.cpp 508 0x20d8 1 +superkernels.cpp 508 0x20e2 +superkernels.cpp 508 0x20ea +superkernels.cpp 508 0x20f0 +superkernels.cpp 508 0x20f6 +superkernels.cpp 508 0x20fa +superkernels.cpp 508 0x20fa 1 +superkernels.cpp 508 0x2100 +superkernels.cpp 508 0x2110 +superkernels.cpp 508 0x2110 1 +superkernels.cpp 508 0x2116 +superkernels.cpp 508 0x211a +superkernels.cpp 508 0x211a 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x212a +io_buffer_main.h 395 0x212a 1 +io_buffer_main.h 218 0x2130 x +io_buffer_main.h 218 0x2134 +io_buffer_main.h 218 0x2138 +io_buffer_main.h 235 0x213e x +io_buffer_main.h 218 0x214a x +io_buffer_main.h 218 0x214a 1 x +io_buffer_main.h 218 0x214e +io_buffer_main.h 395 0x215a x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 511 0x2166 x +superkernels.cpp 511 0x2166 1 +superkernels.cpp 511 0x217a +superkernels.cpp 511 0x217e +superkernels.cpp 511 0x2182 +superkernels.cpp 511 0x2188 +superkernels.cpp 511 0x2194 +superkernels.cpp 511 0x2198 +superkernels.cpp 511 0x2198 1 +superkernels.cpp 511 0x219e +superkernels.cpp 511 0x21a6 +superkernels.cpp 511 0x21b0 +superkernels.cpp 511 0x21b4 +superkernels.cpp 511 0x21b4 1 +superkernels.cpp 511 0x21ba +superkernels.cpp 511 0x21c0 +superkernels.cpp 511 0x21c0 1 +superkernels.cpp 511 0x21c6 +superkernels.cpp 511 0x21ca +superkernels.cpp 511 0x21ca 1 +superkernels.cpp 516 0x21da +superkernels.cpp 522 0x21da 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x21da 2 x +io_buffer_main.h 395 0x21da 3 +io_buffer_main.h 218 0x21e4 +io_buffer_main.h 218 0x21e8 +io_buffer_main.h 235 0x21ee x +io_buffer_main.h 218 0x21fa x +io_buffer_main.h 218 0x21fa 1 x +io_buffer_main.h 218 0x21fe +io_buffer_main.h 395 0x220e x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 516 0x2226 +superkernels.cpp 522 0x2226 1 +superkernels.cpp 516 0x2240 +superkernels.cpp 522 0x2240 1 +superkernels.cpp 516 0x2250 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x2250 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 516 0x225a x +superkernels.cpp 522 0x225a 1 +superkernels.cpp 514 0x2264 +superkernels.cpp 522 0x2264 1 x +superkernels.cpp 514 0x226e x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x2278 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 516 0x227c x +superkernels.cpp 522 0x2280 x +superkernels.cpp 522 0x2284 +superkernels.cpp 514 0x228a x +superkernels.cpp 514 0x228e +superkernels.cpp 516 0x2294 x +superkernels.cpp 516 0x2298 +superkernels.cpp 522 0x2298 1 +superkernels.cpp 522 0x229e x +superkernels.cpp 522 0x22a2 +superkernels.cpp 522 0x22b2 +superkernels.cpp 522 0x22b6 +superkernels.cpp 523 0x22bc +superkernels.cpp 523 0x22ca x +superkernels.cpp 523 0x22ca 1 +superkernels.cpp 523 0x22d4 +superkernels.cpp 524 0x22d4 1 +superkernels.cpp 524 0x22de +superkernels.cpp 524 0x22de 1 x +superkernels.cpp 523 0x22ee x +superkernels.cpp 524 0x22f4 x +superkernels.cpp 524 0x22f4 1 x +superkernels.cpp 524 0x22fa +superkernels.cpp 524 0x22fe +superkernels.cpp 524 0x2302 +superkernels.cpp 524 0x2306 +superkernels.cpp 525 0x230a x +superkernels.cpp 526 0x230e x +superkernels.cpp 547 0x2312 x +superkernels.cpp 525 0x2318 +superkernels.cpp 525 0x231e x +superkernels.cpp 554 0x232e +superkernels.cpp 558 0x232e 1 +superkernels.cpp 552 0x2338 +superkernels.cpp 554 0x2338 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x2338 2 +io_buffer_main.h 327 0x2338 3 +io_buffer_main.h 425 0x2338 4 +io_buffer_main.h 425 0x2338 5 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 554 0x2342 +superkernels.cpp 555 0x2342 1 +superkernels.cpp 558 0x2342 2 +superkernels.cpp 559 0x2342 3 +superkernels.cpp 562 0x2342 4 +superkernels.cpp 563 0x2342 5 +superkernels.cpp 567 0x2342 6 +superkernels.cpp 554 0x2356 +superkernels.cpp 558 0x2356 1 +superkernels.cpp 552 0x2360 +superkernels.cpp 554 0x2360 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x2360 2 +io_buffer_main.h 327 0x2360 3 +io_buffer_main.h 425 0x2360 4 +io_buffer_main.h 425 0x2360 5 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 554 0x236a +superkernels.cpp 555 0x236a 1 +superkernels.cpp 558 0x236a 2 +superkernels.cpp 559 0x236a 3 +superkernels.cpp 562 0x236a 4 +superkernels.cpp 563 0x236a 5 +superkernels.cpp 567 0x236a 6 +superkernels.cpp 532 0x2380 +superkernels.cpp 533 0x2380 1 +superkernels.cpp 554 0x2380 2 +superkernels.cpp 555 0x2380 3 +superkernels.cpp 558 0x2380 4 +superkernels.cpp 559 0x2380 5 +superkernels.cpp 562 0x2380 6 +superkernels.cpp 563 0x2380 7 +superkernels.cpp 567 0x2380 8 +superkernels.cpp 532 0x238a x +superkernels.cpp 532 0x238a 1 +superkernels.cpp 552 0x238a 2 +superkernels.cpp 532 0x2394 +superkernels.cpp 533 0x2394 1 +superkernels.cpp 533 0x239e x +superkernels.cpp 554 0x239e 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x239e 2 +io_buffer_main.h 327 0x239e 3 +io_buffer_main.h 425 0x239e 4 +io_buffer_main.h 425 0x239e 5 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 532 0x23ae x +superkernels.cpp 533 0x23b4 x +superkernels.cpp 533 0x23b4 1 x +superkernels.cpp 533 0x23ba +superkernels.cpp 533 0x23be +superkernels.cpp 533 0x23c2 +superkernels.cpp 533 0x23c6 +superkernels.cpp 534 0x23ca x +superkernels.cpp 535 0x23ce x +superkernels.cpp 547 0x23d2 x +superkernels.cpp 534 0x23d8 +superkernels.cpp 534 0x23de x +superkernels.cpp 554 0x23e6 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x23f0 +io_buffer_main.h 324 0x23f0 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 541 0x2410 +superkernels.cpp 541 0x2416 x +superkernels.cpp 541 0x2416 1 +superkernels.cpp 541 0x2420 +superkernels.cpp 542 0x2420 1 +superkernels.cpp 542 0x242a x +superkernels.cpp 541 0x2438 x +superkernels.cpp 542 0x243e x +superkernels.cpp 542 0x243e 1 x +superkernels.cpp 542 0x2444 +superkernels.cpp 542 0x2448 +superkernels.cpp 542 0x244c +superkernels.cpp 542 0x244c 1 +superkernels.cpp 542 0x2452 +superkernels.cpp 543 0x2456 x +superkernels.cpp 544 0x245a x +superkernels.cpp 547 0x245e x +superkernels.cpp 543 0x2464 +superkernels.cpp 543 0x246a x +superkernels.cpp 554 0x2480 +superkernels.cpp 558 0x2480 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x2480 2 +io_buffer_main.h 125 0x2480 3 x +io_buffer_main.h 324 0x2480 4 +io_buffer_main.h 327 0x2480 5 +io_buffer_main.h 327 0x2480 6 +io_buffer_main.h 425 0x2480 7 +io_buffer_main.h 425 0x2480 8 +io_buffer_main.h 125 0x248c + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h: +pad_3d.h 287 0x2494 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 552 0x249a +superkernels.cpp 554 0x249e +superkernels.cpp 555 0x249e 1 +superkernels.cpp 558 0x249e 2 +superkernels.cpp 559 0x249e 3 +superkernels.cpp 562 0x249e 4 +superkernels.cpp 563 0x249e 5 +superkernels.cpp 567 0x249e 6 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/pad_3d.h: +pad_3d.h 287 0x24a6 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 552 0x24b0 +superkernels.cpp 552 0x24b0 1 +superkernels.cpp 554 0x24ba + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x24c0 x +io_buffer_main.h 324 0x24c0 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 552 0x24c4 x +superkernels.cpp 554 0x24e0 x +superkernels.cpp 554 0x24f0 +superkernels.cpp 554 0x24f4 +superkernels.cpp 554 0x2504 +superkernels.cpp 555 0x2504 1 +superkernels.cpp 554 0x250a +superkernels.cpp 554 0x250a 1 +superkernels.cpp 554 0x2514 +superkernels.cpp 554 0x251e +superkernels.cpp 554 0x2526 +superkernels.cpp 554 0x252a +superkernels.cpp 554 0x252a 1 +superkernels.cpp 554 0x2530 +superkernels.cpp 554 0x2530 1 +superkernels.cpp 554 0x2536 +superkernels.cpp 554 0x2540 +superkernels.cpp 554 0x2540 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x2540 2 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 554 0x254a +superkernels.cpp 554 0x254e +superkernels.cpp 554 0x254e 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x2554 +io_buffer_main.h 327 0x2554 1 +io_buffer_main.h 327 0x2554 2 +io_buffer_main.h 425 0x2554 3 +io_buffer_main.h 425 0x2554 4 +io_buffer_main.h 425 0x2554 5 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 555 0x2560 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x2560 1 x +io_buffer_main.h 425 0x2572 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 558 0x2576 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x2576 1 x +io_buffer_main.h 327 0x2590 +io_buffer_main.h 327 0x2594 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 558 0x25a0 +superkernels.cpp 558 0x25b0 x +superkernels.cpp 558 0x25c0 +superkernels.cpp 558 0x25ce +superkernels.cpp 558 0x25d2 +superkernels.cpp 558 0x25d8 +superkernels.cpp 559 0x25d8 1 +superkernels.cpp 558 0x25de +superkernels.cpp 558 0x25ea +superkernels.cpp 558 0x25ee +superkernels.cpp 558 0x25f8 +superkernels.cpp 558 0x2600 +superkernels.cpp 558 0x2604 +superkernels.cpp 558 0x2604 1 +superkernels.cpp 558 0x260a +superkernels.cpp 558 0x260a 1 +superkernels.cpp 558 0x2610 +superkernels.cpp 558 0x2620 +superkernels.cpp 558 0x2620 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x2620 2 +io_buffer_main.h 324 0x2620 3 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 558 0x262a +superkernels.cpp 558 0x262e +superkernels.cpp 558 0x262e 1 +superkernels.cpp 562 0x2634 +superkernels.cpp 559 0x2642 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x2642 1 x +io_buffer_main.h 425 0x2654 x +io_buffer_main.h 327 0x2658 x +io_buffer_main.h 327 0x2668 +io_buffer_main.h 327 0x266c +io_buffer_main.h 324 0x2676 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 562 0x2690 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x2690 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 562 0x26a0 x +superkernels.cpp 562 0x26a0 1 +superkernels.cpp 562 0x26b2 +superkernels.cpp 562 0x26b6 +superkernels.cpp 562 0x26bc +superkernels.cpp 562 0x26ca +superkernels.cpp 562 0x26ca 1 +superkernels.cpp 562 0x26d4 +superkernels.cpp 562 0x26de +superkernels.cpp 562 0x26e6 +superkernels.cpp 562 0x26ea +superkernels.cpp 562 0x26ea 1 +superkernels.cpp 562 0x26f0 +superkernels.cpp 562 0x26f0 1 +superkernels.cpp 562 0x26f6 +superkernels.cpp 562 0x2700 +superkernels.cpp 562 0x2700 1 +superkernels.cpp 562 0x2706 +superkernels.cpp 562 0x270a +superkernels.cpp 562 0x270a 1 +superkernels.cpp 563 0x2710 +superkernels.cpp 563 0x271e x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x271e 1 x +io_buffer_main.h 425 0x2730 x +io_buffer_main.h 327 0x2734 x +io_buffer_main.h 327 0x2744 +io_buffer_main.h 327 0x2748 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 566 0x2750 +superkernels.cpp 567 0x2750 1 +superkernels.cpp 566 0x2756 x +superkernels.cpp 566 0x2756 1 +superkernels.cpp 566 0x2760 +superkernels.cpp 566 0x2770 +superkernels.cpp 566 0x2774 +superkernels.cpp 567 0x278a x +superkernels.cpp 569 0x2790 +superkernels.cpp 569 0x279e x +superkernels.cpp 569 0x27a6 +superkernels.cpp 554 0x27c0 +superkernels.cpp 555 0x27c0 1 +superkernels.cpp 558 0x27c0 2 +superkernels.cpp 559 0x27c0 3 +superkernels.cpp 562 0x27c0 4 +superkernels.cpp 563 0x27c0 5 +superkernels.cpp 567 0x27c0 6 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x27c0 7 +io_buffer_main.h 324 0x27c0 8 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 554 0x27cc +superkernels.cpp 558 0x27cc 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x27cc 2 +io_buffer_main.h 327 0x27cc 3 +io_buffer_main.h 425 0x27cc 4 +io_buffer_main.h 425 0x27cc 5 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 552 0x27d2 +superkernels.cpp 554 0x27d8 +superkernels.cpp - 0x27d9 + + +superkernels.cpp: +File name Line number Starting address View Stmt + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable2/src/0_0_reloadable2.cc: +0_0_reloadable2.cc 29 0x930 x +0_0_reloadable2.cc 31 0x930 1 x +0_0_reloadable2.cc 29 0x936 +0_0_reloadable2.cc 31 0x93c + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0x93c 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable2/src/0_0_reloadable2.cc: +0_0_reloadable2.cc 17 0x944 +0_0_reloadable2.cc 31 0x944 1 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 590 0x956 x +io_buffer_compiler.h 590 0x95a +io_buffer_compiler.h 590 0x95e +io_buffer_compiler.h 590 0x962 +io_buffer_compiler.h 590 0x966 +io_buffer_compiler.h 195 0x976 x +io_buffer_compiler.h 195 0x976 1 x +io_buffer_compiler.h 194 0x97a x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 410 0x97e +io_buffer_main.h 410 0x988 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable2/src/0_0_reloadable2.cc: +0_0_reloadable2.cc 17 0x992 x +0_0_reloadable2.cc 18 0x996 x +0_0_reloadable2.cc 19 0x99a x +0_0_reloadable2.cc 16 0x99e x +0_0_reloadable2.cc 38 0x9b0 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 440 0x9b4 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 605 0x9c2 x +io_buffer_compiler.h 605 0x9c6 +io_buffer_compiler.h 606 0x9ca + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 440 0x9ca 1 +io_buffer_main.h 440 0x9d8 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable2/src/0_0_reloadable2.cc: +0_0_reloadable2.cc 41 0x9dc + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0x9dc 1 +io_buffer_compiler.h 606 0x9e2 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable2/src/0_0_reloadable2.cc: +0_0_reloadable2.cc 41 0x9f0 x +0_0_reloadable2.cc 41 0x9f8 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0x9fc x +io_buffer_compiler.h 606 0xa00 +io_buffer_compiler.h 606 0xa04 +io_buffer_compiler.h - 0xa05 + + +CU: me_div.c: +File name Line number Starting address View Stmt + +./me_div.c:[++] +me_div.c 108 0x27f0 +me_div.c 108 0x27f0 1 +me_div.c 115 0x27f0 2 x +me_div.c 108 0x27f6 +me_div.c 108 0x27fa +me_div.c 108 0x27fe +me_div.c 108 0x2802 +me_div.c 108 0x2806 +me_div.c 108 0x280a +me_div.c 108 0x280e +me_div.c 108 0x2812 +me_div.c 108 0x2816 +me_div.c 108 0x281a +me_div.c 108 0x281e +me_div.c 108 0x2822 +me_div.c 108 0x2826 +me_div.c 108 0x282a +me_div.c 108 0x282e +me_div.c 108 0x2832 +me_div.c 108 0x2836 +me_div.c 108 0x283a +me_div.c 108 0x283e +me_div.c 108 0x2842 +me_div.c 108 0x2846 +me_div.c 108 0x284a +me_div.c 108 0x284e +me_div.c 108 0x2852 +me_div.c 108 0x2856 +me_div.c 108 0x285a +me_div.c 108 0x285e +me_div.c 108 0x2862 +me_div.c 119 0x2866 x +me_div.c 108 0x286a x +me_div.c 108 0x286e +me_div.c 108 0x2872 +me_div.c 108 0x2876 +me_div.c - 0x2877 + + +CU: No directory table +CU: Empty file name table + - 0x1 + + +CU: softfloat-specialize: +File name Line number Starting address View Stmt + +./softfloat-specialize:[++] +softfloat-specialize 78 0x2880 +softfloat-specialize 137 0x2880 1 +softfloat-specialize 139 0x2880 2 +softfloat-specialize 143 0x2880 3 x +softfloat-specialize 137 0x288a +softfloat-specialize 139 0x288a 1 +softfloat-specialize 140 0x288a 2 +softfloat-specialize 141 0x288a 3 +softfloat-specialize 78 0x2894 +softfloat-specialize 137 0x2894 1 +softfloat-specialize 139 0x2894 2 +softfloat-specialize 140 0x2894 3 x +softfloat-specialize 141 0x289e x +softfloat-specialize 137 0x28a2 x +softfloat-specialize 139 0x28a6 x +softfloat-specialize 139 0x28aa +softfloat-specialize 137 0x28ae x +softfloat-specialize 137 0x28b2 +softfloat-specialize 78 0x28b6 x +softfloat-specialize 78 0x28ba +softfloat-specialize 143 0x28be x +softfloat-specialize 137 0x28c2 +softfloat-specialize 139 0x28c2 1 +softfloat-specialize 139 0x28c8 x +softfloat-specialize 139 0x28cc +softfloat-specialize 137 0x28d0 x +softfloat-specialize 137 0x28d4 +softfloat-specialize 143 0x28d8 x +softfloat-specialize 137 0x28dc x +softfloat-specialize 139 0x28e0 x +softfloat-specialize 143 0x28e4 x +softfloat-specialize 139 0x28e8 x +softfloat-specialize 143 0x28ec x + +./softfloat.c:[++] +softfloat.c 154 0x28f0 x +softfloat.c 161 0x28f0 1 +softfloat.c 203 0x28f0 2 +softfloat.c 161 0x28fa x +softfloat.c 171 0x28fa 1 +softfloat.c 174 0x28fa 2 +softfloat.c 178 0x28fa 3 +softfloat.c 194 0x28fa 4 +softfloat.c 162 0x290c x +softfloat.c 164 0x290c 1 x +softfloat.c 182 0x2912 +softfloat.c 185 0x2912 1 +softfloat.c 202 0x2912 2 +softfloat.c 165 0x291e +softfloat.c 171 0x291e 1 +softfloat.c 171 0x291e 2 +softfloat.c 174 0x291e 3 +softfloat.c 174 0x291e 4 +softfloat.c 165 0x2928 +softfloat.c 171 0x2928 1 x +softfloat.c 171 0x292e +softfloat.c 174 0x2932 x +softfloat.c 170 0x2936 +softfloat.c 174 0x2936 1 +softfloat.c 170 0x293c x +softfloat.c 170 0x293c 1 x +softfloat.c 165 0x2940 x +softfloat.c 165 0x2944 +softfloat.c 179 0x2950 +softfloat.c 179 0x2950 1 x +softfloat.c 180 0x2950 2 +softfloat.c 181 0x2950 3 +softfloat.c 179 0x2956 +softfloat.c 179 0x295a +softfloat.c 178 0x2960 x + +./softfloat-macros:[++] +softfloat-macros 50 0x2964 + +./softfloat.c:[++] +softfloat.c 128 0x2964 1 +softfloat.c 128 0x2968 x +softfloat.c 181 0x2970 x +softfloat.c 182 0x2970 1 x +softfloat.c 182 0x2970 2 +softfloat.c 182 0x297a +softfloat.c 180 0x297e x +softfloat.c 182 0x2982 x +softfloat.c 181 0x2986 x +softfloat.c 180 0x298a x + +./softfloat-macros:[++] +softfloat-macros 50 0x2990 + +./softfloat.c:[++] +softfloat.c 187 0x2990 1 +softfloat.c 192 0x2990 2 +softfloat.c 204 0x2990 3 +softfloat.c 204 0x2990 4 +softfloat.c 187 0x299c x +softfloat.c 187 0x29a0 +softfloat.c 192 0x29b0 x + +./softfloat-macros:[++] +softfloat-macros 46 0x29b4 x +softfloat-macros 46 0x29b4 1 x +softfloat-macros 49 0x29c4 +softfloat-macros 50 0x29c4 1 x +softfloat-macros 50 0x29ca +softfloat-macros 50 0x29ce +softfloat-macros 50 0x29d2 +softfloat-macros 49 0x29d6 x +softfloat-macros 50 0x29da x +softfloat-macros 53 0x29de x +softfloat-macros 50 0x29e2 x +softfloat-macros 49 0x29e6 x + +./softfloat.c:[++] +softfloat.c 194 0x29f6 x +softfloat.c 204 0x29fa +softfloat.c 204 0x29fa 1 +softfloat.c 204 0x2a10 +softfloat.c 204 0x2a10 1 +softfloat.c 202 0x2a20 x +softfloat.c 202 0x2a20 1 +softfloat.c 203 0x2a20 2 x +softfloat.c 128 0x2a2a +softfloat.c 203 0x2a2a 1 +softfloat.c 203 0x2a2a 2 +softfloat.c 203 0x2a34 +softfloat.c 202 0x2a38 +softfloat.c 203 0x2a3c +softfloat.c 205 0x2a40 x +softfloat.c 203 0x2a44 x +softfloat.c 204 0x2a48 x +softfloat.c 204 0x2a48 1 x +softfloat.c 128 0x2a4c x +softfloat.c 128 0x2a50 +softfloat.c 128 0x2a54 +softfloat.c 185 0x2a60 x +softfloat.c 128 0x2a64 +softfloat.c 128 0x2a6a x +softfloat.c 185 0x2a6e x +softfloat.c 185 0x2a72 +softfloat.c 218 0x2a80 x +softfloat.c 224 0x2a80 1 x + +./softfloat-macros:[++] +softfloat-macros 552 0x2a86 x + +./softfloat.c:[++] +softfloat.c 223 0x2a8a x +softfloat.c 224 0x2a8e x +softfloat.c 224 0x2a92 +softfloat.c 477 0x2aa0 x +softfloat.c 481 0x2aa0 1 +softfloat.c 481 0x2aa0 2 x +softfloat.c 482 0x2ab0 +softfloat.c 482 0x2ab6 x +softfloat.c 482 0x2aba +softfloat.c 484 0x2aca +softfloat.c 484 0x2aca 1 x +softfloat.c 484 0x2ad4 +softfloat.c 484 0x2ad4 1 +softfloat.c 483 0x2ad8 +softfloat.c 483 0x2adc x +softfloat.c 481 0x2af0 x +softfloat.c 482 0x2b00 x +softfloat.c 70 0x2b20 +softfloat.c 81 0x2b20 1 +softfloat.c 734 0x2b20 2 x +softfloat.c 81 0x2b2a x +softfloat.c 81 0x2b2e +softfloat.c 81 0x2b32 +softfloat.c 81 0x2b36 + +./softfloat-macros:[++] +softfloat-macros 50 0x2b3a + +./softfloat.c:[++] +softfloat.c 744 0x2b3a 1 x +softfloat.c 747 0x2b3a 2 +softfloat.c 761 0x2b3a 3 +softfloat.c 772 0x2b3a 4 +softfloat.c 788 0x2b3a 5 +softfloat.c 747 0x2b40 x +softfloat.c 747 0x2b44 +softfloat.c 70 0x2b4a x +softfloat.c 70 0x2b4e +softfloat.c 745 0x2b4e 1 +softfloat.c 746 0x2b4e 2 +softfloat.c 745 0x2b54 x +softfloat.c 746 0x2b58 x +softfloat.c 748 0x2b58 1 +softfloat.c 762 0x2b58 2 + +./softfloat-macros:[++] +softfloat-macros 50 0x2b5e + +./softfloat.c:[++] +softfloat.c 128 0x2b5e 1 +softfloat.c 748 0x2b5e 2 x +softfloat.c 761 0x2b64 x +softfloat.c 761 0x2b68 +softfloat.c 128 0x2b6e x +softfloat.c 762 0x2b7a x +softfloat.c 762 0x2b7e +softfloat.c 793 0x2b8e +softfloat.c 787 0x2b92 +softfloat.c 767 0x2b96 x +softfloat.c 766 0x2b9a x +softfloat.c 772 0x2b9e x + +./softfloat-macros:[++] +softfloat-macros 46 0x2ba2 x +softfloat-macros 46 0x2ba2 1 x + +./softfloat.c:[++] +softfloat.c 770 0x2ba8 +softfloat.c 785 0x2ba8 1 +softfloat.c 770 0x2bae x +softfloat.c 766 0x2bb2 x + +./softfloat-macros:[++] +softfloat-macros 49 0x2bba +softfloat-macros 50 0x2bba 1 x +softfloat-macros 50 0x2bc0 +softfloat-macros 50 0x2bc4 +softfloat-macros 49 0x2bc8 x +softfloat-macros 50 0x2bd2 x +softfloat-macros 50 0x2bd6 +softfloat-macros 53 0x2bda x +softfloat-macros 50 0x2bde x +softfloat-macros 49 0x2be2 x + +./softfloat.c:[++] +softfloat.c 748 0x2bf0 x +softfloat.c 756 0x2bf6 +softfloat.c 785 0x2bf6 1 +softfloat.c 793 0x2c04 +softfloat.c 753 0x2c08 x +softfloat.c 787 0x2c08 1 +softfloat.c 752 0x2c0e +softfloat.c 752 0x2c0e 1 +softfloat.c 752 0x2c12 x +softfloat.c 752 0x2c12 1 x + +./softfloat-macros:[++] +softfloat-macros 46 0x2c16 x +softfloat-macros 46 0x2c16 1 x + +./softfloat.c:[++] +softfloat.c 756 0x2c1c x +softfloat.c 752 0x2c20 x +softfloat.c 752 0x2c20 1 x + +./softfloat-macros:[++] +softfloat-macros 49 0x2c2a +softfloat-macros 50 0x2c2a 1 x +softfloat-macros 50 0x2c30 +softfloat-macros 50 0x2c34 +softfloat-macros 50 0x2c38 +softfloat-macros 49 0x2c3c x +softfloat-macros 50 0x2c40 x +softfloat-macros 53 0x2c44 x +softfloat-macros 50 0x2c48 x +softfloat-macros 49 0x2c4c x + +./softfloat.c:[++] +softfloat.c 785 0x2c50 x +softfloat.c 786 0x2c50 1 +softfloat.c 787 0x2c50 2 x +softfloat.c 786 0x2c5a x +softfloat.c 790 0x2c5a 1 x +softfloat.c 786 0x2c60 +softfloat.c 788 0x2c64 x +softfloat.c 788 0x2c68 +softfloat.c 788 0x2c6c +softfloat.c 793 0x2c70 x +softfloat.c 763 0x2c80 x +softfloat.c 764 0x2c90 x +softfloat.c 128 0x2c94 +softfloat.c 128 0x2c9a x +softfloat.c 776 0x2cb0 x +softfloat.c 780 0x2cc0 x +softfloat.c 793 0x2cd0 +softfloat.c 781 0x2cda +softfloat.c 781 0x2ce0 x +softfloat.c 793 0x2ce0 1 +softfloat.c 781 0x2ce6 +softfloat.c 749 0x2cf0 x +softfloat.c 750 0x2d00 x +softfloat.c 763 0x2d10 x +softfloat.c 777 0x2d20 x +softfloat.c 777 0x2d24 +softfloat.c 778 0x2d34 x +softfloat.c 780 0x2d50 x +softfloat.c 780 0x2d50 1 x +softfloat.c 780 0x2d56 +softfloat.c 780 0x2d5a +softfloat.c 128 0x2d5e x +softfloat.c 749 0x2d70 x +softfloat.c 777 0x2d80 x +softfloat.c 70 0x2d90 +softfloat.c 81 0x2d90 1 +softfloat.c 805 0x2d90 2 x +softfloat.c 81 0x2d9a x +softfloat.c 81 0x2d9e +softfloat.c 70 0x2da2 x +softfloat.c 81 0x2da6 x +softfloat.c 81 0x2daa +softfloat.c 70 0x2dae x +softfloat.c 816 0x2dae 1 +softfloat.c 817 0x2dae 2 +softfloat.c 816 0x2db4 x + +./softfloat-macros:[++] +softfloat-macros 50 0x2db8 + +./softfloat.c:[++] +softfloat.c 815 0x2db8 1 x +softfloat.c 818 0x2db8 2 +softfloat.c 819 0x2db8 3 +softfloat.c 843 0x2db8 4 +softfloat.c 818 0x2dbe x +softfloat.c 818 0x2dc2 +softfloat.c 817 0x2dc8 x +softfloat.c 833 0x2dcc +softfloat.c 851 0x2dcc 1 +softfloat.c 859 0x2dcc 2 +softfloat.c 862 0x2dcc 3 +softfloat.c 851 0x2dd6 x +softfloat.c 862 0x2dda x +softfloat.c 859 0x2dde x +softfloat.c 819 0x2de2 x +softfloat.c 819 0x2de6 +softfloat.c 825 0x2dec +softfloat.c 835 0x2dec 1 +softfloat.c 835 0x2df0 x +softfloat.c 833 0x2dfa x +softfloat.c 833 0x2dfe +softfloat.c 868 0x2e0e +softfloat.c 838 0x2e12 x +softfloat.c 837 0x2e16 x +softfloat.c 843 0x2e1a x + +./softfloat-macros:[++] +softfloat-macros 46 0x2e1e x +softfloat-macros 46 0x2e1e 1 x + +./softfloat.c:[++] +softfloat.c 837 0x2e24 x + +./softfloat-macros:[++] +softfloat-macros 49 0x2e30 +softfloat-macros 50 0x2e30 1 x +softfloat-macros 50 0x2e30 2 +softfloat-macros 50 0x2e3a +softfloat-macros 50 0x2e3e +softfloat-macros 53 0x2e42 x +softfloat-macros 49 0x2e46 x +softfloat-macros 50 0x2e4a x +softfloat-macros 50 0x2e4e +softfloat-macros 50 0x2e52 +softfloat-macros 49 0x2e56 x + +./softfloat.c:[++] +softfloat.c 846 0x2e66 x +softfloat.c 851 0x2e80 x +softfloat.c 867 0x2e90 +softfloat.c 868 0x2e94 +softfloat.c 855 0x2e98 +softfloat.c 855 0x2e98 1 +softfloat.c 867 0x2e9c +softfloat.c 856 0x2ea0 x +softfloat.c 855 0x2ea4 x +softfloat.c 855 0x2ea4 1 x + +./softfloat-macros:[++] +softfloat-macros 46 0x2ea8 x +softfloat-macros 46 0x2ea8 1 x + +./softfloat.c:[++] +softfloat.c 855 0x2eae x +softfloat.c 855 0x2eae 1 x + +./softfloat-macros:[++] +softfloat-macros 49 0x2eba +softfloat-macros 50 0x2eba 1 x +softfloat-macros 50 0x2eba 2 +softfloat-macros 50 0x2ec4 +softfloat-macros 50 0x2ec8 +softfloat-macros 50 0x2ecc +softfloat-macros 49 0x2ed0 x +softfloat-macros 50 0x2ed4 x +softfloat-macros 53 0x2ed8 x +softfloat-macros 50 0x2edc x +softfloat-macros 49 0x2ee0 x + +./softfloat.c:[++] +softfloat.c 864 0x2ef0 x +softfloat.c 868 0x2f00 x +softfloat.c 867 0x2f06 x +softfloat.c 820 0x2f20 x +softfloat.c 829 0x2f30 x +softfloat.c 829 0x2f34 +softfloat.c 825 0x2f3a x +softfloat.c 825 0x2f3e +softfloat.c 825 0x2f42 +softfloat.c 830 0x2f4a x +softfloat.c 830 0x2f4e +softfloat.c 128 0x2f5e +softfloat.c 831 0x2f62 +softfloat.c 831 0x2f68 x +softfloat.c 831 0x2f70 +softfloat.c 831 0x2f74 +softfloat.c 831 0x2f7c +softfloat.c 128 0x2f80 x +softfloat.c 834 0x2f90 x +softfloat.c 128 0x2fa0 +softfloat.c 835 0x2fa4 x +softfloat.c 128 0x2fa8 x +softfloat.c 128 0x2fac +softfloat.c 128 0x2fb2 +softfloat.c 852 0x2fc0 x +softfloat.c 853 0x2fd0 x +softfloat.c 821 0x2fe0 x +softfloat.c 821 0x2fe4 +softfloat.c 823 0x2ff4 x +softfloat.c 868 0x3016 +softfloat.c 864 0x301a +softfloat.c 846 0x3036 +softfloat.c 867 0x303a +softfloat.c 868 0x303e +softfloat.c 834 0x3050 x +softfloat.c 852 0x3060 x +softfloat.c 821 0x3070 x +softfloat.c 92 0x3080 +softfloat.c 878 0x3080 1 x +softfloat.c 92 0x3084 x +softfloat.c 92 0x3088 +softfloat.c 884 0x308c x +softfloat.c 884 0x3090 +softfloat.c 888 0x30a0 x +softfloat.c 885 0x30b0 x +softfloat.c - 0x30b1 + + diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/scripts/0_2_reloadable8.bcf b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/scripts/0_2_reloadable8.bcf new file mode 100644 index 0000000000000000000000000000000000000000..ac2c44e2095fee61e0bb45bf67ea52ec6719ca60 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/scripts/0_2_reloadable8.bcf @@ -0,0 +1,16 @@ +_reserved DMb 0x0 0x40000 + +_reserved PM 0x0 0x930 //reserved for main elf + +_entry_point _Z13kernelWrapperPPvjjjj +_symbol _Z13kernelWrapperPPvjjjj 0x930 + +_reserved DMb 0x7b280 0x800 //reserved for lcp ping-pong buffers +_reserved DMb 0x7ba80 0x40 //reserved for sync buffer +_stack DM_stack 0x7bac0 0x940 //stack for core +_reserved DMb 0x7c400 0x40 //reserved for main elf heap +//space for synopsys compiler at 0x7c440 0x880//heap +_reserved DMb 0x40000 0x3b280 + +_reserved DMb 0x7ccc0 0x3340 +_reserved DMb 0x80000 0x80000 // And everything else the core can't see diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/scripts/0_2_reloadable8.prx b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/scripts/0_2_reloadable8.prx new file mode 100644 index 0000000000000000000000000000000000000000..567f2a1e009a9a50a3924730ab7d2a674ab11298 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/scripts/0_2_reloadable8.prx @@ -0,0 +1,13 @@ + + + \ No newline at end of file diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/src/0_2_reloadable8.cc b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/src/0_2_reloadable8.cc new file mode 100644 index 0000000000000000000000000000000000000000..7b211124072bdc08c2e3d113228cd9b65f8857a3 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable8/src/0_2_reloadable8.cc @@ -0,0 +1,41 @@ +// Automatically generated processor driver using AIEngine tool-chain + +#include +#include +#include + + +// Declare Kernel functions and initializers +void superkernel_reduce_mean_c8(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict); + +// Declare Kernel objects and external arrays + + +void _b961_wrapper(void* args[]) +{ + superkernel_reduce_mean_c8( + *reinterpret_cast*>(args[0]), + *reinterpret_cast(args[2]), + *reinterpret_cast*>(args[1])); +} + +using UniformKernelFunc = void (*)(void **); + +static UniformKernelFunc g_uniformKernelFuncs[1] = { + _b961_wrapper +}; + +__attribute__((always_inline)) void kernelWrapper(void* args[], uint32 kernelId, uint32 numSyncIn, uint32 numAsyncIn, uint32 numSyncOut) +{ + uint32 idx = 0; + reinterpret_cast(args[idx])->acquire(numSyncIn > 0); + idx += (numSyncIn > 0) ? 1 : 0; + idx += numAsyncIn; + + (*(g_uniformKernelFuncs[kernelId]))(args); + + idx = 0; + reinterpret_cast(args[idx])->release(numSyncIn > 0); + idx += (numSyncIn > 0) ? 1 : 0; + idx += numAsyncIn; +} diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/Release/0_2_reloadable9.calltree b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/Release/0_2_reloadable9.calltree new file mode 100644 index 0000000000000000000000000000000000000000..78faacd04df9723eae105409bd66ec99011b5021 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/Release/0_2_reloadable9.calltree @@ -0,0 +1,88 @@ + +// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:48:01 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// bridge -o../Release/0_0_reloadable3 ../Release/0_0_reloadable3.o -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/isg -g -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable3.bcf -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork3342 -pme + + +// Release: ipp V-2024.06-TGT-241219 + +_Z13kernelWrapperPPvjjjj + _Z13_b896_wrapperPPv (referenced text) + _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh + _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params + _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams + _Z13_b901_wrapperPPv (referenced text) + _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E + _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E + _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E + _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E + _Z13_b906_wrapperPPv (referenced text) + _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv + _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E + _Z13_b881_wrapperPPv (referenced text) + _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv + _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv + _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E + _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE + _Z13_b891_wrapperPPv (referenced text) + _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E + _Z13_b919_wrapperPPv (referenced text) + _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh + _ZN12me_primitive10udiv_dstepEjjRjS0_ + _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params + + +Call tree stack and functions sizes: + +stack stack stack call func func function name + desc level level desc +----- ----- ----- ----- ----- ----- -------------------------------------------------------------- + 64 320 0 0 390 11754 _Z13kernelWrapperPPvjjjj + 0 192 1 1 36 4714 _Z13_b896_wrapperPPv + 64 192 1 2 568 4678 _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + 64 64 2 3 1430 1430 _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh + 128 128 2 3 2410 2680 _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params + 0 0 3 4 270 270 _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams + 0 192 1 1 32 1252 _Z13_b901_wrapperPPv + 64 192 1 2 488 1220 _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + 64 128 2 3 62 304 _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv + 64 64 3 4 162 186 _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv + 0 0 4 5 24 24 _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E + 0 0 2 4 56 56 _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E + 128 128 2 3 114 428 _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E + 0 0 3 4 314 314 _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E + 0 64 1 1 32 862 _Z13_b906_wrapperPPv + 64 64 1 2 488 830 _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + 0 0 2 3 100 100 _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv + 0 0 2 3 242 242 _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E + 0 256 1 1 32 1394 _Z13_b881_wrapperPPv + 64 256 1 2 488 1362 _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + 64 64 2 3 74 190 _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv + 0 0 3 4 116 116 _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv + 64 192 2 3 150 684 _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E + 128 128 3 4 534 534 _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE + 0 128 1 1 36 1092 _Z13_b891_wrapperPPv + 64 128 1 2 602 1056 _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE + 64 64 2 3 138 162 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv + 0 0 3 4 24 24 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E + 0 0 2 3 292 292 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E + 0 192 1 1 36 2050 _Z13_b919_wrapperPPv + 128 192 1 2 478 2014 _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + 64 64 2 3 672 814 _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh + 0 0 3 4 142 142 _ZN12me_primitive10udiv_dstepEjjRjS0_ + 0 0 2 3 722 722 _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params + + +Maximum call level : 5 +Maximum stack level: 4 +Maximum stack size : 320 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/Release/0_2_reloadable9.cmic2 b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/Release/0_2_reloadable9.cmic2 new file mode 100644 index 0000000000000000000000000000000000000000..f6eec0e2b8bd493ef0112849914646d03f76489e --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/Release/0_2_reloadable9.cmic2 @@ -0,0 +1,17226 @@ + +// File generated by darts version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:48:03 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// darts -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -d -h -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable3 me + +// Release: ipp V-2024.06-TGT-241219 +.label __Z13kernelWrapperPPvjjjj___func_begin0 +.label _Z13kernelWrapperPPvjjjj +.function kernelWrapper _Z13kernelWrapperPPvjjjj +.src_ref 0 "0_0_reloadable3.cc" 82 first +.src_ref 0 "0_0_reloadable3.cc" 84 60 first +.src_ref 0 "0_0_reloadable3.cc" 84 110 +.src_ref 0 "0_0_reloadable3.cc" 86 110 +.src_ref 1 "io_buffer_compiler.h" 606 24 +.function_start + 2352 "11010100" // LDA r17, [p0]; MOV r2, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2353 "01000001" // /* MW 5 */ + 2354 "00100001" // /* MW 4 */ + 2355 "11010001" // /* MW 3 */ + 2356 "11000110" // /* MW 2 */ + 2357 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 82 + 2358 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2359 "00000001" // /* MW 5 */ + 2360 "00000000" // /* MW 4 */ + 2361 "00000000" // /* MW 3 */ + 2362 "00001000" // /* MW 2 */ + 2363 "00000000" // /* MW 1 */ + 2364 "00000010" // ST p7, [sp, #-12]; MOV r1, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2365 "01110000" // /* MW 7 */ + 2366 "11010000" // /* MW 6 */ + 2367 "00101011" // /* MW 5 */ + 2368 "00000000" // /* MW 4 */ + 2369 "10110000" // /* MW 3 */ + 2370 "11110011" // /* MW 2 */ + 2371 "11111110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 84 110 +.src_ref 0 "0_0_reloadable3.cc" 86 110 +.src_ref 1 "io_buffer_compiler.h" 606 24 + 2372 "00000010" // ST lr, [sp, #-4]; MOV r15, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2373 "01110000" // /* MW 7 */ + 2374 "10010000" // /* MW 6 */ + 2375 "11101000" // /* MW 5 */ + 2376 "00000001" // /* MW 4 */ + 2377 "10110000" // /* MW 3 */ + 2378 "10000111" // /* MW 2 */ + 2379 "11111111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 84 110 first + 2380 "01011100" // ST r1, [sp, #-8]; NEZ r16, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2381 "11100000" // /* MW 5 */ + 2382 "11000001" // /* MW 4 */ + 2383 "10110111" // /* MW 3 */ + 2384 "00000110" // /* MW 2 */ + 2385 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 + 2386 "11111000" // MOV r26, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2387 "00100000" // /* MW 3 */ + 2388 "10011000" // /* MW 2 */ + 2389 "00011110" // /* MW 1 */ + 2390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2391 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 first + 2392 "00011000" // ADD.NC p7, r17, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2393 "10000010" // /* MW 3 */ + 2394 "01101000" // /* MW 2 */ + 2395 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 + 2396 "10011000" // LDA r17, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2397 "00110110" // /* MW 3 */ + 2398 "00011110" // /* MW 2 */ + 2399 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 36 + 2400 "10011000" // LDA r19, [p7], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2401 "01110110" // /* MW 3 */ + 2402 "00111110" // /* MW 2 */ + 2403 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 46 + 2404 "10011000" // LDA r18, [p7], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2405 "01010110" // /* MW 3 */ + 2406 "11101110" // /* MW 2 */ + 2407 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 60 + 2408 "10011000" // LDA r27, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2409 "01110110" // /* MW 3 */ + 2410 "00000111" // /* MW 2 */ + 2411 "00000111" // /* MW 1 */ + 2412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2413 "00000000" // /* MW 1 */ + 2414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2415 "00000000" // /* MW 1 */ + 2416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2417 "00000000" // /* MW 1 */ + 2418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2419 "00000000" // /* MW 1 */ + 2420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2421 "00000000" // /* MW 1 */ + 2422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2423 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 195 30 first +.src_ref 1 "io_buffer_compiler.h" 195 37 first + 2424 "00011000" // SEL.EQZ r17, r17, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2425 "00110010" // /* MW 3 */ + 2426 "01100011" // /* MW 2 */ + 2427 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 194 23 first + 2428 "10011000" // ST r17, [p7, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2429 "00110001" // /* MW 3 */ + 2430 "11010110" // /* MW 2 */ + 2431 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 +.src_ref 1 "io_buffer_main.h" 410 8 + 2432 "00011000" // MOVX r17, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2433 "11111101" // /* MW 3 */ + 2434 "11100010" // /* MW 2 */ + 2435 "00010111" // /* MW 1 */ + 2436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2437 "00000000" // /* MW 1 */ + 2438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2439 "00000000" // /* MW 1 */ + 2440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2441 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 first + 2442 "00011000" // ACQ.COND r18, r17, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2443 "00011000" // /* MW 3 */ + 2444 "10010111" // /* MW 2 */ + 2445 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 86 60 +.src_ref 0 "0_0_reloadable3.cc" 90 7 + 2446 "00011000" // MOVX r18, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2447 "00001001" // /* MW 3 */ + 2448 "00100100" // /* MW 2 */ + 2449 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 86 60 first + 2450 "10011000" // LSHL r20, r16, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2451 "00101101" // /* MW 3 */ + 2452 "00101001" // /* MW 2 */ + 2453 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 86 60 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 2454 "11111000" // MOV dj0, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2455 "00100000" // /* MW 3 */ + 2456 "10001010" // /* MW 2 */ + 2457 "00011000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 86 60 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 2458 "00001100" // LDA r19, [p0, dj0]; ST dj0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2459 "10001011" // /* MW 5 */ + 2460 "11011000" // /* MW 4 */ + 2461 "11011111" // /* MW 3 */ + 2462 "01001110" // /* MW 2 */ + 2463 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2465 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2467 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2469 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2471 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 86 110 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2472 "00011000" // MOVX r19, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2473 "00000101" // /* MW 3 */ + 2474 "00100110" // /* MW 2 */ + 2475 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 86 110 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2476 "10011000" // LTU r26, r19, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2477 "11111100" // /* MW 3 */ + 2478 "11110100" // /* MW 2 */ + 2479 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 first + 2480 "00000010" // ST r26, [sp, #-16]; ADD.NC p7, r19, #4 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2481 "00000000" // /* MW 7 */ + 2482 "11000001" // /* MW 6 */ + 2483 "10110100" // /* MW 5 */ + 2484 "00000011" // /* MW 4 */ + 2485 "10110000" // /* MW 3 */ + 2486 "01101010" // /* MW 2 */ + 2487 "11111110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 + 2488 "10011000" // LDA r19, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2489 "01110110" // /* MW 3 */ + 2490 "00011110" // /* MW 2 */ + 2491 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 36 + 2492 "10011000" // LDA r21, [p7], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2493 "10110110" // /* MW 3 */ + 2494 "00111110" // /* MW 2 */ + 2495 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 46 + 2496 "10011000" // LDA r20, [p7], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2497 "10010110" // /* MW 3 */ + 2498 "11101110" // /* MW 2 */ + 2499 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 60 + 2500 "10011000" // LDA r27, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2501 "01110110" // /* MW 3 */ + 2502 "00000111" // /* MW 2 */ + 2503 "00000111" // /* MW 1 */ + 2504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2505 "00000000" // /* MW 1 */ + 2506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2507 "00000000" // /* MW 1 */ + 2508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2509 "00000000" // /* MW 1 */ + 2510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2511 "00000000" // /* MW 1 */ + 2512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2513 "00000000" // /* MW 1 */ + 2514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2515 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 195 30 first +.src_ref 1 "io_buffer_compiler.h" 195 37 first + 2516 "00011000" // SEL.EQZ r19, r19, r21, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2517 "01010010" // /* MW 3 */ + 2518 "11100111" // /* MW 2 */ + 2519 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 194 23 first + 2520 "10011000" // ST r19, [p7, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2521 "01110001" // /* MW 3 */ + 2522 "11010110" // /* MW 2 */ + 2523 "00001111" // /* MW 1 */ + 2524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2525 "00000000" // /* MW 1 */ + 2526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2527 "00000000" // /* MW 1 */ + 2528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2529 "00000000" // /* MW 1 */ + 2530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2531 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 first + 2532 "00011000" // ACQ.COND r20, r17, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2533 "00011000" // /* MW 3 */ + 2534 "00010111" // /* MW 2 */ + 2535 "00010101" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 90 7 first + 2536 "10011000" // LSHL r17, r0, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2537 "00101101" // /* MW 3 */ + 2538 "00100011" // /* MW 2 */ + 2539 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 90 7 + 2540 "11111000" // MOV dj0, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2541 "10100000" // /* MW 3 */ + 2542 "10001000" // /* MW 2 */ + 2543 "00011000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 90 7 + 2544 "01000100" // MOVXM p7, #509056 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2545 "00000000" // /* MW 5 */ + 2546 "11001001" // /* MW 4 */ + 2547 "11001110" // /* MW 3 */ + 2548 "00000111" // /* MW 2 */ + 2549 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 90 7 + 2550 "00001100" // LDA p1, [p7, dj0]; ST r16, [sp, #-24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2551 "00101011" // /* MW 5 */ + 2552 "11010100" // /* MW 4 */ + 2553 "11011111" // /* MW 3 */ + 2554 "00010011" // /* MW 2 */ + 2555 "11100000" // /* MW 1 */ + 2556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2557 "00000000" // /* MW 1 */ + 2558 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2559 "00000000" // /* MW 1 */ + 2560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2561 "00000000" // /* MW 1 */ + 2562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2563 "00000000" // /* MW 1 */ + 2564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2565 "00000000" // /* MW 1 */ + 2566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2567 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 90 4 +.no_stack_arguments + 2568 "00011000" // JL p1 /* MW 4 */ /* control_operation: words=4 call unconditional cycles_taken=1 indirect absolute delay_slots=5 */ + 2569 "01000000" // /* MW 3 */ + 2570 "00110000" // /* MW 2 */ + 2571 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 93 60 +.src_ref 0 "0_0_reloadable3.cc" 95 60 +.delay_slot + 2572 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2573 "11000000" // /* MW 3 */ + 2574 "01100000" // /* MW 2 */ + 2575 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2577 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2579 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2581 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2582 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2583 "01111110" // /* MW 9 */ + 2584 "10100101" // /* MW 8 */ + 2585 "00000001" // /* MW 7 */ + 2586 "00000000" // /* MW 6 */ + 2587 "00010000" // /* MW 5 */ + 2588 "00000000" // /* MW 4 */ + 2589 "11110000" // /* MW 3 */ + 2590 "00101100" // /* MW 2 */ + 2591 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 93 60 first +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_main.h" 440 8 +.src_ref 1 "io_buffer_main.h" 440 8 +.return_address + 2592 "00101100" // LDA r17, [p7]; MOVX r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2593 "00001010" // /* MW 5 */ + 2594 "01000000" // /* MW 4 */ + 2595 "11010000" // /* MW 3 */ + 2596 "11000110" // /* MW 2 */ + 2597 "11100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 + 2598 "00011000" // LDA r26, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2599 "01010001" // /* MW 3 */ + 2600 "11101011" // /* MW 2 */ + 2601 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 95 60 + 2602 "00011000" // LDA dj0, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2603 "01000001" // /* MW 3 */ + 2604 "11101100" // /* MW 2 */ + 2605 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_main.h" 440 8 + 2606 "00011000" // LDA el0, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2607 "00101001" // /* MW 3 */ + 2608 "11110000" // /* MW 2 */ + 2609 "00000111" // /* MW 1 */ + 2610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2611 "00000000" // /* MW 1 */ + 2612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2613 "00000000" // /* MW 1 */ + 2614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2615 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 first + 2616 "00011000" // ADD.NC p1, r17, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2617 "10001000" // /* MW 3 */ + 2618 "01101000" // /* MW 2 */ + 2619 "00011001" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 + 2620 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2621 "00110110" // /* MW 3 */ + 2622 "00000110" // /* MW 2 */ + 2623 "00000001" // /* MW 1 */ + 2624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2625 "00000000" // /* MW 1 */ + 2626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2627 "00000000" // /* MW 1 */ + 2628 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2629 "00000000" // /* MW 1 */ + 2630 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2631 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 2632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2633 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 2634 "11111000" // MOV r26, el0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2635 "00011100" // /* MW 3 */ + 2636 "10100000" // /* MW 2 */ + 2637 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2638 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2639 "00001000" // /* MW 3 */ + 2640 "01010101" // /* MW 2 */ + 2641 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_compiler.h" 606 24 first + 2642 "11010100" // LDA r17, [p1, #-4]; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2643 "01000001" // /* MW 5 */ + 2644 "10101111" // /* MW 4 */ + 2645 "11011101" // /* MW 3 */ + 2646 "11000110" // /* MW 2 */ + 2647 "00111110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 95 60 first + 2648 "10011000" // LDA r18, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2649 "01010110" // /* MW 3 */ + 2650 "00000010" // /* MW 2 */ + 2651 "00000111" // /* MW 1 */ + 2652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2653 "00000000" // /* MW 1 */ + 2654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2655 "00000000" // /* MW 1 */ + 2656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2657 "00000000" // /* MW 1 */ + 2658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2659 "00000000" // /* MW 1 */ + 2660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2661 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 first + 2662 "10011000" // SUB r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2663 "00010001" // /* MW 3 */ + 2664 "00100111" // /* MW 2 */ + 2665 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 first +.src_ref 1 "io_buffer_compiler.h" 606 24 + 2666 "00100100" // SEL.EQZ r17, r17, r19, r27; ADD.NC p0, r18, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2667 "00010000" // /* MW 5 */ + 2668 "11010010" // /* MW 4 */ + 2669 "01000000" // /* MW 3 */ + 2670 "01100110" // /* MW 2 */ + 2671 "10001100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 +.src_ref 1 "io_buffer_compiler.h" 606 22 first + 2672 "00001100" // LDA r17, [p0]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2673 "01100011" // /* MW 5 */ + 2674 "11101100" // /* MW 4 */ + 2675 "11010011" // /* MW 3 */ + 2676 "11000110" // /* MW 2 */ + 2677 "00000000" // /* MW 1 */ + 2678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2679 "00000000" // /* MW 1 */ + 2680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2681 "00000000" // /* MW 1 */ + 2682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2683 "00000000" // /* MW 1 */ + 2684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2685 "00000000" // /* MW 1 */ + 2686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2687 "00000000" // /* MW 1 */ + 2688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2689 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 first + 2690 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2691 "00001000" // /* MW 3 */ + 2692 "01010101" // /* MW 2 */ + 2693 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 98 + 2694 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2695 "00111001" // /* MW 3 */ + 2696 "11111100" // /* MW 2 */ + 2697 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 first + 2698 "10011000" // LDA r17, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2699 "00110110" // /* MW 3 */ + 2700 "11110110" // /* MW 2 */ + 2701 "00000000" // /* MW 1 */ + 2702 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2703 "10011001" // /* MW 3 */ + 2704 "11110111" // /* MW 2 */ + 2705 "00000111" // /* MW 1 */ + 2706 "00011000" // LDA r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2707 "11110001" // /* MW 3 */ + 2708 "11111001" // /* MW 2 */ + 2709 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 98 first + 2710 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2711 "00000001" // /* MW 5 */ + 2712 "00000000" // /* MW 4 */ + 2713 "00000000" // /* MW 3 */ + 2714 "11111000" // /* MW 2 */ + 2715 "11111111" // /* MW 1 */ + 2716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2717 "00000000" // /* MW 1 */ + 2718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2719 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 98 + 2720 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 2721 "00000000" // /* MW 3 */ + 2722 "00101000" // /* MW 2 */ + 2723 "00010000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.delay_slot + 2724 "11111000" // MOV r27, el0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2725 "00011100" // /* MW 3 */ + 2726 "11100000" // /* MW 2 */ + 2727 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 first +.delay_slot + 2728 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2729 "00010001" // /* MW 3 */ + 2730 "00100001" // /* MW 2 */ + 2731 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.delay_slot + 2732 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2733 "00000010" // /* MW 3 */ + 2734 "01100001" // /* MW 2 */ + 2735 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 22 +.delay_slot + 2736 "10011000" // ST r16, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2737 "00010001" // /* MW 3 */ + 2738 "11110110" // /* MW 2 */ + 2739 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13kernelWrapperPPvjjjj__end +.label __Z13kernelWrapperPPvjjjj___func_end0 + 2741 "00000000" // /* MW 1 */ +.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_begin0 +.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh +.function setup_conv2d_bf16_params _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh +.src_ref 2 "conv2d_bf16_params.h" 432 first +.src_ref 2 "conv2d_bf16_params.h" 438 17 first +.src_ref 2 "conv2d_bf16_params.h" 452 40 +.src_ref 2 "conv2d_bf16_params.h" 453 40 +.src_ref 2 "conv2d_bf16_params.h" 458 36 +.src_ref 2 "conv2d_bf16_params.h" 470 11 +.function_start + 2752 "10111010" // LDA el0, [p0], #4; MOVX r4, #4; MOV r2, p1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2753 "01111000" // /* MW 9 */ + 2754 "01100000" // /* MW 8 */ + 2755 "01001001" // /* MW 7 */ + 2756 "10001000" // /* MW 6 */ + 2757 "01000000" // /* MW 5 */ + 2758 "00000000" // /* MW 4 */ + 2759 "11010000" // /* MW 3 */ + 2760 "10000101" // /* MW 2 */ + 2761 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 +.src_ref 2 "conv2d_bf16_params.h" 438 17 first +.src_ref 2 "conv2d_bf16_params.h" 452 40 +.src_ref 2 "conv2d_bf16_params.h" 462 7 + 2762 "10111010" // LDA eh0, [p0], #4; MOVX r5, #-1; ADD.NC p2, r2, #9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2763 "01001000" // /* MW 9 */ + 2764 "10000010" // /* MW 8 */ + 2765 "00110000" // /* MW 7 */ + 2766 "11101001" // /* MW 6 */ + 2767 "01010111" // /* MW 5 */ + 2768 "00111110" // /* MW 4 */ + 2769 "11010000" // /* MW 3 */ + 2770 "10000001" // /* MW 2 */ + 2771 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 432 +.src_ref 2 "conv2d_bf16_params.h" 444 52 + 2772 "10111010" // MOVA r1, #-4; PADDXM [sp], #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2773 "01110000" // /* MW 9 */ + 2774 "00000000" // /* MW 8 */ + 2775 "00000000" // /* MW 7 */ + 2776 "00000000" // /* MW 6 */ + 2777 "00000010" // /* MW 5 */ + 2778 "00000000" // /* MW 4 */ + 2779 "00000000" // /* MW 3 */ + 2780 "10000001" // /* MW 2 */ + 2781 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 453 40 +.src_ref 2 "conv2d_bf16_params.h" 458 30 +.src_ref 2 "conv2d_bf16_params.h" 458 30 + 2782 "01110110" // MOVA r6, #12; ST r13, [sp, #-4]; MOVX r16, #1; MOV m0, #16 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 2783 "01011000" // /* MW 11 */ + 2784 "00010000" // /* MW 10 */ + 2785 "00000000" // /* MW 9 */ + 2786 "00101000" // /* MW 8 */ + 2787 "00000000" // /* MW 7 */ + 2788 "10000001" // /* MW 6 */ + 2789 "10110101" // /* MW 5 */ + 2790 "11111101" // /* MW 4 */ + 2791 "00000111" // /* MW 3 */ + 2792 "10000110" // /* MW 2 */ + 2793 "00000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 444 52 +.src_ref 2 "conv2d_bf16_params.h" 470 11 +.src_ref 2 "conv2d_bf16_params.h" 477 40 +.src_ref 2 "conv2d_bf16_params.h" 557 34 + 2794 "01110110" // MOVA r3, #3; ST r14, [sp, #-8]; MOVX r21, #-3; MOV r20, #15 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 2795 "01011000" // /* MW 11 */ + 2796 "00001111" // /* MW 10 */ + 2797 "10001000" // /* MW 9 */ + 2798 "10101010" // /* MW 8 */ + 2799 "01010111" // /* MW 7 */ + 2800 "10111111" // /* MW 6 */ + 2801 "11010101" // /* MW 5 */ + 2802 "11111001" // /* MW 4 */ + 2803 "00000111" // /* MW 3 */ + 2804 "01100011" // /* MW 2 */ + 2805 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 452 40 +.src_ref 2 "conv2d_bf16_params.h" 458 36 +.src_ref 2 "conv2d_bf16_params.h" 462 7 + 2806 "01011100" // ST r15, [sp, #-12]; MOVX r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2807 "00000010" // /* MW 5 */ + 2808 "01100000" // /* MW 4 */ + 2809 "10110000" // /* MW 3 */ + 2810 "10111110" // /* MW 2 */ + 2811 "11111110" // /* MW 1 */ + 2812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2813 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2814 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2815 "00101001" // /* MW 3 */ + 2816 "00011100" // /* MW 2 */ + 2817 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2818 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2819 "00001001" // /* MW 3 */ + 2820 "00011100" // /* MW 2 */ + 2821 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2822 "10011000" // LDA el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2823 "00101110" // /* MW 3 */ + 2824 "00011100" // /* MW 2 */ + 2825 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2826 "10011000" // LDA eh0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2827 "00001110" // /* MW 3 */ + 2828 "00011100" // /* MW 2 */ + 2829 "00000000" // /* MW 1 */ + 2830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2831 "00000000" // /* MW 1 */ + 2832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2833 "00000000" // /* MW 1 */ + 2834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2835 "00000000" // /* MW 1 */ + 2836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2837 "00000000" // /* MW 1 */ + 2838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2839 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2840 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2841 "00101001" // /* MW 3 */ + 2842 "00011100" // /* MW 2 */ + 2843 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2844 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2845 "00001001" // /* MW 3 */ + 2846 "00011100" // /* MW 2 */ + 2847 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2848 "10011000" // LDA el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2849 "00101110" // /* MW 3 */ + 2850 "00011100" // /* MW 2 */ + 2851 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2852 "10011000" // LDA eh0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2853 "00001110" // /* MW 3 */ + 2854 "00011100" // /* MW 2 */ + 2855 "00000000" // /* MW 1 */ + 2856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2857 "00000000" // /* MW 1 */ + 2858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2859 "00000000" // /* MW 1 */ + 2860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2861 "00000000" // /* MW 1 */ + 2862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2863 "00000000" // /* MW 1 */ + 2864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2865 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2866 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2867 "00101001" // /* MW 3 */ + 2868 "00011100" // /* MW 2 */ + 2869 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2870 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2871 "00001001" // /* MW 3 */ + 2872 "00011100" // /* MW 2 */ + 2873 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2874 "10011000" // LDA eh0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2875 "00001110" // /* MW 3 */ + 2876 "00000100" // /* MW 2 */ + 2877 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2878 "10011000" // LDA el0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2879 "00101110" // /* MW 3 */ + 2880 "00010100" // /* MW 2 */ + 2881 "00000000" // /* MW 1 */ + 2882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2883 "00000000" // /* MW 1 */ + 2884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2885 "00000000" // /* MW 1 */ + 2886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2887 "00000000" // /* MW 1 */ + 2888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2889 "00000000" // /* MW 1 */ + 2890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2891 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2892 "10011000" // ST eh0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2893 "00001001" // /* MW 3 */ + 2894 "00000100" // /* MW 2 */ + 2895 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2896 "10011000" // ST el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2897 "00101001" // /* MW 3 */ + 2898 "00010100" // /* MW 2 */ + 2899 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 444 40 first + 2900 "10011000" // LDA.u8 r13, [p2], #-3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2901 "10101010" // /* MW 3 */ + 2902 "11011101" // /* MW 2 */ + 2903 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 447 34 first + 2904 "10011000" // LDA.u8 r17, [p2], #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2905 "00101010" // /* MW 3 */ + 2906 "00011110" // /* MW 2 */ + 2907 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 448 34 first + 2908 "10011000" // LDA.u8 r14, [p2], #-5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2909 "11001010" // /* MW 3 */ + 2910 "10111101" // /* MW 2 */ + 2911 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 452 40 first + 2912 "10011000" // LDA.u16 r15, [p2], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2913 "11111010" // /* MW 3 */ + 2914 "11111101" // /* MW 2 */ + 2915 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 453 40 first + 2916 "10011000" // LDA.u8 r19, [p2], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2917 "01101010" // /* MW 3 */ + 2918 "00001010" // /* MW 2 */ + 2919 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 20 first + 2920 "10011000" // LDA.u8 r7, [p2], #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2921 "11101010" // /* MW 3 */ + 2922 "10101100" // /* MW 2 */ + 2923 "00000010" // /* MW 1 */ + 2924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2925 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 444 52 first + 2926 "10011000" // LSHL r1, r13, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2927 "00011101" // /* MW 3 */ + 2928 "01000010" // /* MW 2 */ + 2929 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 30 first +.src_ref 2 "conv2d_bf16_params.h" 462 7 first + 2930 "00100100" // EQ r16, r1, r16; ADD.NC r18, r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2931 "00000001" // /* MW 5 */ + 2932 "00110001" // /* MW 4 */ + 2933 "11111001" // /* MW 3 */ + 2934 "00100000" // /* MW 2 */ + 2935 "00001100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 462 7 + 2936 "10011000" // LSHL r18, r18, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2937 "01011101" // /* MW 3 */ + 2938 "10100100" // /* MW 2 */ + 2939 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 452 40 first + 2940 "10011000" // EQ r27, r15, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2941 "01000111" // /* MW 3 */ + 2942 "11110110" // /* MW 2 */ + 2943 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 452 40 +.src_ref 2 "conv2d_bf16_params.h" 462 7 first +.src_ref 2 "conv2d_bf16_params.h" 557 34 + 2944 "11100100" // SEL.EQZ r5, r24, r5, r27; MOV eh0, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2945 "00111001" // /* MW 5 */ + 2946 "10110111" // /* MW 4 */ + 2947 "01000000" // /* MW 3 */ + 2948 "01001010" // /* MW 2 */ + 2949 "11000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 462 7 + 2950 "00011000" // SEL.EQZ r29, r17, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2951 "00100010" // /* MW 3 */ + 2952 "01111011" // /* MW 2 */ + 2953 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 30 first + 2954 "10011000" // EQ r6, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2955 "01100111" // /* MW 3 */ + 2956 "11001100" // /* MW 2 */ + 2957 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 + 2958 "10011000" // AND r27, r6, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2959 "00000100" // /* MW 3 */ + 2960 "10110111" // /* MW 2 */ + 2961 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 +.src_ref 2 "conv2d_bf16_params.h" 477 40 +.src_ref 2 "conv2d_bf16_params.h" 557 34 first + 2962 "11100100" // LSHL r15, r15, r21; MOV r25, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2963 "01000001" // /* MW 5 */ + 2964 "10111011" // /* MW 4 */ + 2965 "10111100" // /* MW 3 */ + 2966 "11101011" // /* MW 2 */ + 2967 "01111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 first + 2968 "01011100" // ST r15, [sp, #-20]; SEL.EQZ r6, r7, r24, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2969 "00000100" // /* MW 5 */ + 2970 "10011011" // /* MW 4 */ + 2971 "10110011" // /* MW 3 */ + 2972 "10111110" // /* MW 2 */ + 2973 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 +.src_ref 2 "conv2d_bf16_params.h" 477 40 first + 2974 "10000100" // JNZ r25, #3056 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3056 delay_slots=5 */ + 2975 "00000001" // /* MW 5 */ + 2976 "01000000" // /* MW 4 */ + 2977 "11111000" // /* MW 3 */ + 2978 "00000101" // /* MW 2 */ + 2979 "11001000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 first +.delay_slot + 2980 "10011000" // EQ r27, r6, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2981 "01000111" // /* MW 3 */ + 2982 "10110110" // /* MW 2 */ + 2983 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 444 52 first +.delay_slot + 2984 "10011000" // AND r24, r13, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2985 "01000100" // /* MW 3 */ + 2986 "01110001" // /* MW 2 */ + 2987 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 462 7 first +.delay_slot + 2988 "10011000" // LSHL r30, r19, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2989 "01011101" // /* MW 3 */ + 2990 "11111100" // /* MW 2 */ + 2991 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 470 11 first +.delay_slot + 2992 "10011000" // LSHL r20, r27, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2993 "01001101" // /* MW 3 */ + 2994 "11101000" // /* MW 2 */ + 2995 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 470 11 +.src_ref 2 "conv2d_bf16_params.h" 477 40 first +.delay_slot + 2996 "00011000" // SEL.EQZ r6, r6, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2997 "00110010" // /* MW 3 */ + 2998 "10001100" // /* MW 2 */ + 2999 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 40 + 3000 "10000100" // JNZ r27, #3056 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3056 delay_slots=5 */ + 3001 "00000001" // /* MW 5 */ + 3002 "01000000" // /* MW 4 */ + 3003 "11111000" // /* MW 3 */ + 3004 "00000101" // /* MW 2 */ + 3005 "11011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3007 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3009 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3011 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3013 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3015 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 491 25 +.src_ref 2 "conv2d_bf16_params.h" 492 25 +.src_ref 2 "conv2d_bf16_params.h" 495 99 +.src_ref 2 "conv2d_bf16_params.h" 502 57 +.src_ref 2 "conv2d_bf16_params.h" 533 46 +.src_ref 2 "conv2d_bf16_params.h" 539 82 +.src_ref 2 "conv2d_bf16_params.h" 557 34 +.src_ref 2 "conv2d_bf16_params.h" 621 240 +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.src_ref 2 "conv2d_bf16_params.h" 709 76 + 3016 "10111010" // MOVA r15, #1; J #3104 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=3104 delay_slots=5 */ + 3017 "00100000" // /* MW 9 */ + 3018 "00000000" // /* MW 8 */ + 3019 "00000000" // /* MW 7 */ + 3020 "10000100" // /* MW 6 */ + 3021 "00000001" // /* MW 5 */ + 3022 "00000000" // /* MW 4 */ + 3023 "00000000" // /* MW 3 */ + 3024 "00101111" // /* MW 2 */ + 3025 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 481 24 +.src_ref 2 "conv2d_bf16_params.h" 500 53 +.src_ref 2 "conv2d_bf16_params.h" 506 73 +.src_ref 2 "conv2d_bf16_params.h" 507 53 +.src_ref 2 "conv2d_bf16_params.h" 524 122 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.delay_slot + 3026 "10111010" // MOVA r26, #0; MOVX r5, #-3; MOV r28, #12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3027 "01011000" // /* MW 9 */ + 3028 "00001100" // /* MW 8 */ + 3029 "10001000" // /* MW 7 */ + 3030 "10101011" // /* MW 6 */ + 3031 "01010111" // /* MW 5 */ + 3032 "00111110" // /* MW 4 */ + 3033 "00000000" // /* MW 3 */ + 3034 "00011010" // /* MW 2 */ + 3035 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 504 45 +.src_ref 2 "conv2d_bf16_params.h" 510 45 +.src_ref 2 "conv2d_bf16_params.h" 520 48 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.delay_slot + 3036 "01100100" // MOVX r21, #4; MOV r2, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3037 "01000001" // /* MW 5 */ + 3038 "00100000" // /* MW 4 */ + 3039 "00100001" // /* MW 3 */ + 3040 "01000010" // /* MW 2 */ + 3041 "00000101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 40 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 578 52 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.delay_slot + 3042 "00011000" // MOVX r13, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3043 "00001101" // /* MW 3 */ + 3044 "00011010" // /* MW 2 */ + 3045 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 78 +.src_ref 2 "conv2d_bf16_params.h" 642 20 +.src_ref 2 "conv2d_bf16_params.h" 642 87 +.delay_slot + 3046 "00011000" // MOVX r7, #15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3047 "00111101" // /* MW 3 */ + 3048 "00001110" // /* MW 2 */ + 3049 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.delay_slot + 3050 "00101100" // NOPA; MOVX r4, #-4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3051 "11100010" // /* MW 5 */ + 3052 "10010001" // /* MW 4 */ + 3053 "11111111" // /* MW 3 */ + 3054 "00101100" // /* MW 2 */ + 3055 "00000000" // /* MW 1 */ +.label __ll6__Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh +.src_ref 2 "conv2d_bf16_params.h" 453 40 +.src_ref 2 "conv2d_bf16_params.h" 453 40 +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 504 45 +.src_ref 2 "conv2d_bf16_params.h" 655 23 + 3056 "01110110" // MOVA dj0, #16; MOVS p1, r2; MOVX r21, #4; MOV r4, #-4 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3057 "01011000" // /* MW 11 */ + 3058 "11111100" // /* MW 10 */ + 3059 "10001111" // /* MW 9 */ + 3060 "10001000" // /* MW 8 */ + 3061 "01010000" // /* MW 7 */ + 3062 "00000001" // /* MW 6 */ + 3063 "00001011" // /* MW 5 */ + 3064 "10000010" // /* MW 4 */ + 3065 "10000001" // /* MW 3 */ + 3066 "00000010" // /* MW 2 */ + 3067 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 453 40 first +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 481 24 +.src_ref 2 "conv2d_bf16_params.h" 500 53 +.src_ref 2 "conv2d_bf16_params.h" 506 73 +.src_ref 2 "conv2d_bf16_params.h" 507 53 +.src_ref 2 "conv2d_bf16_params.h" 524 122 +.src_ref 2 "conv2d_bf16_params.h" 539 139 + 3068 "10111010" // ST.s8 r6, [p1, dj0]; MOVX r26, #0; MOV r28, #12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3069 "01011000" // /* MW 9 */ + 3070 "00001100" // /* MW 8 */ + 3071 "10001000" // /* MW 7 */ + 3072 "00001011" // /* MW 6 */ + 3073 "10100000" // /* MW 5 */ + 3074 "00000001" // /* MW 4 */ + 3075 "11100000" // /* MW 3 */ + 3076 "00011000" // /* MW 2 */ + 3077 "00100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 491 25 +.src_ref 2 "conv2d_bf16_params.h" 492 25 +.src_ref 2 "conv2d_bf16_params.h" 495 99 +.src_ref 2 "conv2d_bf16_params.h" 502 57 +.src_ref 2 "conv2d_bf16_params.h" 510 45 +.src_ref 2 "conv2d_bf16_params.h" 520 48 +.src_ref 2 "conv2d_bf16_params.h" 533 46 +.src_ref 2 "conv2d_bf16_params.h" 539 82 +.src_ref 2 "conv2d_bf16_params.h" 557 34 +.src_ref 2 "conv2d_bf16_params.h" 621 240 +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.src_ref 2 "conv2d_bf16_params.h" 709 76 + 3078 "10111010" // MOVA r2, #16; MOVX r5, #-3; MOV r15, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3079 "01011000" // /* MW 9 */ + 3080 "00000001" // /* MW 8 */ + 3081 "11101000" // /* MW 7 */ + 3082 "10101001" // /* MW 6 */ + 3083 "01010111" // /* MW 5 */ + 3084 "00111110" // /* MW 4 */ + 3085 "00000000" // /* MW 3 */ + 3086 "00000010" // /* MW 2 */ + 3087 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 40 +.src_ref 2 "conv2d_bf16_params.h" 529 78 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 578 52 +.src_ref 2 "conv2d_bf16_params.h" 642 20 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 642 87 + 3088 "11100001" // NOPA; NOPB; NOPS; MOVX r7, #15; MOV r13, #3; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3089 "00000000" // /* MW 15 */ + 3090 "00000000" // /* MW 14 */ + 3091 "01011000" // /* MW 13 */ + 3092 "00000011" // /* MW 12 */ + 3093 "10101000" // /* MW 11 */ + 3094 "11101001" // /* MW 10 */ + 3095 "01110001" // /* MW 9 */ + 3096 "00000000" // /* MW 8 */ + 3097 "01011011" // /* MW 7 */ + 3098 "00000001" // /* MW 6 */ + 3099 "00100000" // /* MW 5 */ + 3100 "00000000" // /* MW 4 */ + 3101 "11110000" // /* MW 3 */ + 3102 "00101100" // /* MW 2 */ + 3103 "00000000" // /* MW 1 */ +.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_352 +.src_ref 2 "conv2d_bf16_params.h" 477 40 first +.src_ref 2 "conv2d_bf16_params.h" 495 68 first +.src_ref 2 "conv2d_bf16_params.h" 495 112 +.src_ref 2 "conv2d_bf16_params.h" 682 38 + 3104 "10111010" // LDA.u8 r17, [p2], #-2; EQ r27, r13, r6; MOV m0, #60 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3105 "01011000" // /* MW 9 */ + 3106 "00111100" // /* MW 8 */ + 3107 "00000000" // /* MW 7 */ + 3108 "00111100" // /* MW 6 */ + 3109 "10110011" // /* MW 5 */ + 3110 "00011011" // /* MW 4 */ + 3111 "01010000" // /* MW 3 */ + 3112 "11000101" // /* MW 2 */ + 3113 "01011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 481 24 first +.src_ref 2 "conv2d_bf16_params.h" 495 53 +.src_ref 2 "conv2d_bf16_params.h" 495 112 + 3114 "10111010" // LDA.u8 r1, [p2], m0; SEL.EQZ r18, r1, r26, r27; MOV m5, #-51 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3115 "01011000" // /* MW 9 */ + 3116 "11001101" // /* MW 8 */ + 3117 "10000111" // /* MW 7 */ + 3118 "00010010" // /* MW 6 */ + 3119 "00101101" // /* MW 5 */ + 3120 "00000011" // /* MW 4 */ + 3121 "01010000" // /* MW 3 */ + 3122 "00000101" // /* MW 2 */ + 3123 "01000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 18 first +.src_ref 2 "conv2d_bf16_params.h" 496 68 +.src_ref 2 "conv2d_bf16_params.h" 504 35 +.src_ref 2 "conv2d_bf16_params.h" 539 14 +.src_ref 2 "conv2d_bf16_params.h" 578 47 + 3124 "10111010" // MOVA r23, #2; SEL.EQZ r29, r29, r21, r27; MOV m3, #55 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3125 "01011000" // /* MW 9 */ + 3126 "00110111" // /* MW 8 */ + 3127 "10000000" // /* MW 7 */ + 3128 "10010001" // /* MW 6 */ + 3129 "11011010" // /* MW 5 */ + 3130 "00111011" // /* MW 4 */ + 3131 "00000000" // /* MW 3 */ + 3132 "01010111" // /* MW 2 */ + 3133 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 496 53 +.src_ref 2 "conv2d_bf16_params.h" 499 51 +.src_ref 2 "conv2d_bf16_params.h" 504 45 first +.src_ref 2 "conv2d_bf16_params.h" 509 50 +.src_ref 2 "conv2d_bf16_params.h" 519 42 +.src_ref 2 "conv2d_bf16_params.h" 700 34 + 3134 "10111010" // MOVA r3, #8; EQ r27, r21, r0; MOV m2, #-68 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3135 "01011000" // /* MW 9 */ + 3136 "10111100" // /* MW 8 */ + 3137 "00000111" // /* MW 7 */ + 3138 "00111101" // /* MW 6 */ + 3139 "10110000" // /* MW 5 */ + 3140 "00101011" // /* MW 4 */ + 3141 "00000000" // /* MW 3 */ + 3142 "00000011" // /* MW 2 */ + 3143 "00000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 492 25 first +.src_ref 2 "conv2d_bf16_params.h" 497 46 +.src_ref 2 "conv2d_bf16_params.h" 509 50 + 3144 "10111010" // MOVA r16, #512; LSHL r22, r15, r24; MOV m1, #112 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3145 "01011000" // /* MW 9 */ + 3146 "01110000" // /* MW 8 */ + 3147 "10000000" // /* MW 7 */ + 3148 "01101100" // /* MW 6 */ + 3149 "01101100" // /* MW 5 */ + 3150 "00011111" // /* MW 4 */ + 3151 "00000000" // /* MW 3 */ + 3152 "00010000" // /* MW 2 */ + 3153 "01000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 500 53 +.src_ref 2 "conv2d_bf16_params.h" 520 34 first + 3154 "01100100" // EXTEND.u8 r22, r22; MOV m4, #-105 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3155 "01011101" // /* MW 5 */ + 3156 "00011110" // /* MW 4 */ + 3157 "00001000" // /* MW 3 */ + 3158 "10010010" // /* MW 2 */ + 3159 "10110101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 77 +.src_ref 2 "conv2d_bf16_params.h" 520 48 + 3160 "00111010" // ST r22, [sp, #-16]; LSHL r22, r22, r2; MOV m7, #49 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3161 "01011001" // /* MW 9 */ + 3162 "00110001" // /* MW 8 */ + 3163 "10000000" // /* MW 7 */ + 3164 "01101111" // /* MW 6 */ + 3165 "01100001" // /* MW 5 */ + 3166 "00101101" // /* MW 4 */ + 3167 "10110000" // /* MW 3 */ + 3168 "01011010" // /* MW 2 */ + 3169 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 53 +.src_ref 2 "conv2d_bf16_params.h" 507 42 first + 3170 "01100100" // SUB r30, r30, r29; MOV m6, #-63 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3171 "00000101" // /* MW 5 */ + 3172 "00011111" // /* MW 4 */ + 3173 "00111100" // /* MW 3 */ + 3174 "10111010" // /* MW 2 */ + 3175 "11110111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 495 99 first + 3176 "10011000" // SUB r1, r15, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3177 "00010001" // /* MW 3 */ + 3178 "11000010" // /* MW 2 */ + 3179 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 495 96 +.src_ref 2 "conv2d_bf16_params.h" 495 96 +.src_ref 2 "conv2d_bf16_params.h" 610 64 +.src_ref 2 "conv2d_bf16_params.h" 709 96 + 3180 "01100100" // MUL r31, r17, r1; MOV r1, #7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3181 "00011101" // /* MW 5 */ + 3182 "10100000" // /* MW 4 */ + 3183 "11110000" // /* MW 3 */ + 3184 "11000011" // /* MW 2 */ + 3185 "10001111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 507 53 first + 3186 "10011000" // SUB r17, r26, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3187 "00100001" // /* MW 3 */ + 3188 "10100011" // /* MW 2 */ + 3189 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 495 96 first + 3190 "10011000" // LSHL r31, r31, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3191 "00011101" // /* MW 3 */ + 3192 "11111110" // /* MW 2 */ + 3193 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 495 53 +.src_ref 2 "conv2d_bf16_params.h" 506 48 +.src_ref 2 "conv2d_bf16_params.h" 519 42 first + 3194 "00111010" // ST r31, [p2], m5; LSHL r31, r29, r3; MOV m5, #87 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3195 "01011001" // /* MW 9 */ + 3196 "01010111" // /* MW 8 */ + 3197 "10000000" // /* MW 7 */ + 3198 "11101110" // /* MW 6 */ + 3199 "11110001" // /* MW 5 */ + 3200 "00111011" // /* MW 4 */ + 3201 "00110000" // /* MW 3 */ + 3202 "01111110" // /* MW 2 */ + 3203 "01010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 496 68 first +.src_ref 2 "conv2d_bf16_params.h" 504 35 first +.src_ref 2 "conv2d_bf16_params.h" 522 68 + 3204 "10111010" // LDA.u8 r21, [p2], m3; EQ r19, r23, r0; MOV m3, #-78 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3205 "01011000" // /* MW 9 */ + 3206 "10110010" // /* MW 8 */ + 3207 "10000111" // /* MW 7 */ + 3208 "00111101" // /* MW 6 */ + 3209 "00110000" // /* MW 5 */ + 3210 "00101111" // /* MW 4 */ + 3211 "01010000" // /* MW 3 */ + 3212 "01010101" // /* MW 2 */ + 3213 "01001101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 509 50 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3214 "01011100" // ST r19, [sp, #-24]; LSHL r19, r19, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3215 "01111011" // /* MW 5 */ + 3216 "11001100" // /* MW 4 */ + 3217 "10111001" // /* MW 3 */ + 3218 "01001110" // /* MW 2 */ + 3219 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 496 53 first +.src_ref 2 "conv2d_bf16_params.h" 520 19 first +.src_ref 2 "conv2d_bf16_params.h" 529 62 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3220 "10111010" // ST.s8 r21, [p2], m2; OR r22, r31, r22; MOV m2, #246 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3221 "01011000" // /* MW 9 */ + 3222 "11110110" // /* MW 8 */ + 3223 "00000000" // /* MW 7 */ + 3224 "00101101" // /* MW 6 */ + 3225 "01101011" // /* MW 5 */ + 3226 "00111111" // /* MW 4 */ + 3227 "11100000" // /* MW 3 */ + 3228 "01010100" // /* MW 2 */ + 3229 "01001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 497 46 first +.src_ref 2 "conv2d_bf16_params.h" 509 50 first +.src_ref 2 "conv2d_bf16_params.h" 533 44 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3230 "10111010" // LDA.u16 r16, [p2], m1; SEL.EQZ r19, r19, r16, r27; MOV m1, #-176 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3231 "01011000" // /* MW 9 */ + 3232 "01010000" // /* MW 8 */ + 3233 "10000111" // /* MW 7 */ + 3234 "00010000" // /* MW 6 */ + 3235 "00111000" // /* MW 5 */ + 3236 "00100111" // /* MW 4 */ + 3237 "01010000" // /* MW 3 */ + 3238 "01000011" // /* MW 2 */ + 3239 "01000101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 14 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3240 "10011000" // EQ r31, r23, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3241 "01100111" // /* MW 3 */ + 3242 "11111110" // /* MW 2 */ + 3243 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 499 51 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3244 "10011000" // EQ r16, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3245 "01100111" // /* MW 3 */ + 3246 "11100000" // /* MW 2 */ + 3247 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 499 51 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3248 "10011000" // OR r27, r31, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3249 "00000101" // /* MW 3 */ + 3250 "11110111" // /* MW 2 */ + 3251 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 78 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3252 "10011000" // AND r21, r7, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3253 "01010100" // /* MW 3 */ + 3254 "11101011" // /* MW 2 */ + 3255 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 507 53 first +.src_ref 2 "conv2d_bf16_params.h" 511 47 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3256 "01100100" // ASHL r30, r30, r17; MOV r17, #24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3257 "01100001" // /* MW 5 */ + 3258 "10100000" // /* MW 4 */ + 3259 "11011000" // /* MW 3 */ + 3260 "10100011" // /* MW 2 */ + 3261 "11110111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 491 25 first +.src_ref 2 "conv2d_bf16_params.h" 507 34 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3262 "00111010" // ST r16, [sp, #-32]; LSHL r18, r15, r18; ADD.NC r30, r30, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3263 "01001001" // /* MW 9 */ + 3264 "10000000" // /* MW 8 */ + 3265 "11001111" // /* MW 7 */ + 3266 "01101111" // /* MW 6 */ + 3267 "00101001" // /* MW 5 */ + 3268 "00011111" // /* MW 4 */ + 3269 "10110000" // /* MW 3 */ + 3270 "01000010" // /* MW 2 */ + 3271 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 500 53 first +.src_ref 2 "conv2d_bf16_params.h" 511 47 first + 3272 "01011100" // ST r26, [p2], #4; LSHL r17, r30, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3273 "00111011" // /* MW 5 */ + 3274 "01000110" // /* MW 4 */ + 3275 "00111111" // /* MW 3 */ + 3276 "11101010" // /* MW 2 */ + 3277 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 500 53 +.src_ref 2 "conv2d_bf16_params.h" 534 44 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 3278 "00000010" // ST r26, [p2], m4; MOV m4, #168 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3279 "01010000" // /* MW 7 */ + 3280 "10101000" // /* MW 6 */ + 3281 "00000000" // /* MW 5 */ + 3282 "00000010" // /* MW 4 */ + 3283 "00110000" // /* MW 3 */ + 3284 "01101010" // /* MW 2 */ + 3285 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 77 first +.src_ref 2 "conv2d_bf16_params.h" 509 19 first +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 3286 "01110110" // LDA.u8 r18, [p2], m7; ST r31, [sp, #-28]; OR r27, r19, r0; MOV el0, r27 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3287 "01111000" // /* MW 11 */ + 3288 "11001110" // /* MW 10 */ + 3289 "00001101" // /* MW 9 */ + 3290 "00101100" // /* MW 8 */ + 3291 "10110000" // /* MW 7 */ + 3292 "10100111" // /* MW 6 */ + 3293 "11110101" // /* MW 5 */ + 3294 "11100111" // /* MW 4 */ + 3295 "01010111" // /* MW 3 */ + 3296 "01001001" // /* MW 2 */ + 3297 "01011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 19 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3298 "10011000" // OR r17, r27, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3299 "00010101" // /* MW 3 */ + 3300 "11100011" // /* MW 2 */ + 3301 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 506 73 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3302 "10011000" // SUB r27, r26, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3303 "10000001" // /* MW 3 */ + 3304 "10110111" // /* MW 2 */ + 3305 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 527 47 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3306 "00011000" // EXTEND.u8 r24, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3307 "10010000" // /* MW 3 */ + 3308 "10110000" // /* MW 2 */ + 3309 "00010100" // /* MW 1 */ + 3310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3311 "00000000" // /* MW 1 */ + 3312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3313 "00000000" // /* MW 1 */ + 3314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3315 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 57 first + 3316 "10011000" // SUB r18, r15, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3317 "00100001" // /* MW 3 */ + 3318 "11100101" // /* MW 2 */ + 3319 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 53 + 3320 "10011000" // ST r18, [p2], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3321 "01010001" // /* MW 3 */ + 3322 "11001010" // /* MW 2 */ + 3323 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 506 48 first + 3324 "10011000" // LDA.u8 r18, [p2], m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3325 "01001010" // /* MW 3 */ + 3326 "10101010" // /* MW 2 */ + 3327 "00000010" // /* MW 1 */ + 3328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3329 "00000000" // /* MW 1 */ + 3330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3331 "00000000" // /* MW 1 */ + 3332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3333 "00000000" // /* MW 1 */ + 3334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3335 "00000000" // /* MW 1 */ + 3336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3337 "00000000" // /* MW 1 */ + 3338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3339 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 506 62 + 3340 "10011000" // SUB r18, r18, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3341 "11100001" // /* MW 3 */ + 3342 "10100100" // /* MW 2 */ + 3343 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 506 73 + 3344 "10011000" // ASHL r18, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3345 "10111110" // /* MW 3 */ + 3346 "10100101" // /* MW 2 */ + 3347 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 first + 3348 "10011000" // LSHL r18, r18, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3349 "00101101" // /* MW 3 */ + 3350 "10100100" // /* MW 2 */ + 3351 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 + 3352 "01000100" // MOVXM r27, #65536 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3353 "00000000" // /* MW 5 */ + 3354 "10100000" // /* MW 4 */ + 3355 "00001101" // /* MW 3 */ + 3356 "00000001" // /* MW 2 */ + 3357 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 + 3358 "10011000" // ADD r18, r27, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3359 "00100000" // /* MW 3 */ + 3360 "11100101" // /* MW 2 */ + 3361 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 + 3362 "01000100" // MOVXM r27, #16711680 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3363 "00000000" // /* MW 5 */ + 3364 "10100000" // /* MW 4 */ + 3365 "00001101" // /* MW 3 */ + 3366 "11111111" // /* MW 2 */ + 3367 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 +.src_ref 2 "conv2d_bf16_params.h" 539 14 +.src_ref 2 "conv2d_bf16_params.h" 642 99 + 3368 "01100100" // AND r27, r27, r18; MOV r18, #-16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3369 "11000001" // /* MW 5 */ + 3370 "00111111" // /* MW 4 */ + 3371 "10011001" // /* MW 3 */ + 3372 "11100100" // /* MW 2 */ + 3373 "11011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 511 19 first +.src_ref 2 "conv2d_bf16_params.h" 524 122 +.src_ref 2 "conv2d_bf16_params.h" 539 14 + 3374 "01100100" // OR r27, r27, r17; MOV r17, #-8 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3375 "11100001" // /* MW 5 */ + 3376 "10111111" // /* MW 4 */ + 3377 "10111000" // /* MW 3 */ + 3378 "11100010" // /* MW 2 */ + 3379 "11011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 512 64 first +.src_ref 2 "conv2d_bf16_params.h" 524 122 first + 3380 "01011100" // ST r27, [p2], #4; LSHL r19, r19, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3381 "00111011" // /* MW 5 */ + 3382 "11001110" // /* MW 4 */ + 3383 "00111001" // /* MW 3 */ + 3384 "11101110" // /* MW 2 */ + 3385 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 524 122 + 3386 "10011000" // SUB r26, r26, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3387 "00110001" // /* MW 3 */ + 3388 "10110101" // /* MW 2 */ + 3389 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 524 122 + 3390 "10011000" // LSHL r20, r20, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3391 "10101101" // /* MW 3 */ + 3392 "00101001" // /* MW 2 */ + 3393 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 520 19 first + 3394 "10011000" // OR r26, r14, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3395 "01100101" // /* MW 3 */ + 3396 "10110101" // /* MW 2 */ + 3397 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 511 36 first +.src_ref 2 "conv2d_bf16_params.h" 522 68 first + 3398 "01011100" // ST r26, [p2], m3; EXTEND.u8 r26, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3399 "00100000" // /* MW 5 */ + 3400 "01101001" // /* MW 4 */ + 3401 "00111111" // /* MW 3 */ + 3402 "01101010" // /* MW 2 */ + 3403 "01001101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 524 65 first +.src_ref 2 "conv2d_bf16_params.h" 529 62 first +.src_ref 2 "conv2d_bf16_params.h" 539 14 first + 3404 "10111010" // LDA.u8 r25, [p2], m2; LSHL r20, r27, r18; ADD.NC r30, r26, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3405 "10101000" // /* MW 9 */ + 3406 "10101000" // /* MW 8 */ + 3407 "11001110" // /* MW 7 */ + 3408 "01101111" // /* MW 6 */ + 3409 "01001001" // /* MW 5 */ + 3410 "00110111" // /* MW 4 */ + 3411 "01010000" // /* MW 3 */ + 3412 "01100101" // /* MW 2 */ + 3413 "01001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 534 93 +.src_ref 2 "conv2d_bf16_params.h" 539 14 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 3414 "01100100" // LSHL r22, r22, r17; MOV r17, #254 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3415 "11111001" // /* MW 5 */ + 3416 "10100011" // /* MW 4 */ + 3417 "10111000" // /* MW 3 */ + 3418 "10100011" // /* MW 2 */ + 3419 "10110101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 527 45 first +.src_ref 2 "conv2d_bf16_params.h" 533 44 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 3420 "00101100" // ST.s8 r25, [p2], m1; MUL r26, r26, r24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3421 "00011111" // /* MW 5 */ + 3422 "01101011" // /* MW 4 */ + 3423 "11101101" // /* MW 3 */ + 3424 "01100100" // /* MW 2 */ + 3425 "01000101" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3427 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3428 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3429 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3431 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3433 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 48 first +.src_ref 2 "conv2d_bf16_params.h" 533 46 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3434 "10100100" // LSHL r25, r16, r15; ADD.NC r27, r21, r25 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3435 "11001010" // /* MW 5 */ + 3436 "10110101" // /* MW 4 */ + 3437 "10111101" // /* MW 3 */ + 3438 "01011111" // /* MW 2 */ + 3439 "10000110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 14 first + 3440 "10000100" // JNZ r31, #3568 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3568 delay_slots=5 */ + 3441 "00000001" // /* MW 5 */ + 3442 "01000000" // /* MW 4 */ + 3443 "11111000" // /* MW 3 */ + 3444 "00000110" // /* MW 2 */ + 3445 "11111000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 76 first +.src_ref 2 "conv2d_bf16_params.h" 529 122 +.delay_slot + 3446 "10100100" // ADD r21, r19, #3; ADD.NC r27, r27, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3447 "11110010" // /* MW 5 */ + 3448 "10111011" // /* MW 4 */ + 3449 "11101101" // /* MW 3 */ + 3450 "01000001" // /* MW 2 */ + 3451 "10011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 122 +.delay_slot + 3452 "10011000" // LSHL r21, r27, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3453 "01011101" // /* MW 3 */ + 3454 "11101011" // /* MW 2 */ + 3455 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 534 93 first +.delay_slot + 3456 "10011000" // AND r17, r25, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3457 "00010100" // /* MW 3 */ + 3458 "01100011" // /* MW 2 */ + 3459 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 534 44 +.src_ref 2 "conv2d_bf16_params.h" 539 139 first +.src_ref 2 "conv2d_bf16_params.h" 555 59 +.src_ref 2 "conv2d_bf16_params.h" 559 59 +.src_ref 2 "conv2d_bf16_params.h" 700 17 +.delay_slot + 3460 "00111010" // ST r17, [p2], m4; EQ r27, r6, r28; MOV r17, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3461 "01011001" // /* MW 9 */ + 3462 "00000001" // /* MW 8 */ + 3463 "00101000" // /* MW 7 */ + 3464 "00111110" // /* MW 6 */ + 3465 "10111110" // /* MW 5 */ + 3466 "00001101" // /* MW 4 */ + 3467 "00110000" // /* MW 3 */ + 3468 "01000110" // /* MW 2 */ + 3469 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 669 63 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.delay_slot + 3470 "11111000" // MOV el1, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3471 "10011100" // /* MW 3 */ + 3472 "10011011" // /* MW 2 */ + 3473 "00011000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 14 + 3474 "00011000" // LDA r28, [sp, #-32] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3475 "10010001" // /* MW 3 */ + 3476 "11100011" // /* MW 2 */ + 3477 "00000111" // /* MW 1 */ + 3478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3479 "00000000" // /* MW 1 */ + 3480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3481 "00000000" // /* MW 1 */ + 3482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3483 "00000000" // /* MW 1 */ + 3484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3485 "00000000" // /* MW 1 */ + 3486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3487 "00000000" // /* MW 1 */ + 3488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3489 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 14 + 3490 "10000100" // JNZ r28, #3568 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3568 delay_slots=5 */ + 3491 "00000001" // /* MW 5 */ + 3492 "01000000" // /* MW 4 */ + 3493 "11111000" // /* MW 3 */ + 3494 "00000110" // /* MW 2 */ + 3495 "11100000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3497 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3499 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3501 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3503 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3505 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 115 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 539 162 + 3506 "10111010" // MOVA r28, #5; MOVX r17, #4; MOV r25, #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3507 "01011000" // /* MW 9 */ + 3508 "01000000" // /* MW 8 */ + 3509 "00101000" // /* MW 7 */ + 3510 "10001011" // /* MW 6 */ + 3511 "00010000" // /* MW 5 */ + 3512 "00000001" // /* MW 4 */ + 3513 "00000000" // /* MW 3 */ + 3514 "10111100" // /* MW 2 */ + 3515 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 139 + 3516 "00011000" // SEL.EQZ r31, r17, r13, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3517 "11010010" // /* MW 3 */ + 3518 "01111110" // /* MW 2 */ + 3519 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 162 + 3520 "10011000" // EQ r27, r25, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3521 "01100111" // /* MW 3 */ + 3522 "01110110" // /* MW 2 */ + 3523 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 135 +.src_ref 2 "conv2d_bf16_params.h" 539 139 + 3524 "01100100" // SEL.EQZ r28, r31, r28, r27; MOV r31, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3525 "00000001" // /* MW 5 */ + 3526 "10100000" // /* MW 4 */ + 3527 "01001111" // /* MW 3 */ + 3528 "00111000" // /* MW 2 */ + 3529 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 46 + 3530 "00011000" // EXTEND.s8 r25, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3531 "01010000" // /* MW 3 */ + 3532 "00110010" // /* MW 2 */ + 3533 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 44 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 3534 "10011000" // MUL r30, r25, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3535 "11101111" // /* MW 3 */ + 3536 "01111101" // /* MW 2 */ + 3537 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 115 +.src_ref 2 "conv2d_bf16_params.h" 669 63 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 3538 "11100100" // LT r27, r25, r17; MOV r27, el1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3539 "00111001" // /* MW 5 */ + 3540 "11000100" // /* MW 4 */ + 3541 "01011101" // /* MW 3 */ + 3542 "11100011" // /* MW 2 */ + 3543 "11001110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 82 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3544 "00011000" // SEL.EQZ r17, r15, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3545 "10000010" // /* MW 3 */ + 3546 "11100011" // /* MW 2 */ + 3547 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 79 + 3548 "10011000" // MUL r17, r17, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3549 "11101111" // /* MW 3 */ + 3550 "01100011" // /* MW 2 */ + 3551 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 135 + 3552 "10011000" // SUB r28, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3553 "11000001" // /* MW 3 */ + 3554 "11111001" // /* MW 2 */ + 3555 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 135 + 3556 "10011000" // ASHL r17, r17, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3557 "11001110" // /* MW 3 */ + 3558 "01100011" // /* MW 2 */ + 3559 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 555 55 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 3560 "00100010" // EXTEND.u8 r17, r17; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3561 "00011100" // /* MW 7 */ + 3562 "00000000" // /* MW 6 */ + 3563 "00000000" // /* MW 5 */ + 3564 "10000001" // /* MW 4 */ + 3565 "00010100" // /* MW 3 */ + 3566 "00100011" // /* MW 2 */ + 3567 "00000000" // /* MW 1 */ +.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_816 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 669 63 +.src_ref 2 "conv2d_bf16_params.h" 669 63 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 3568 "10111010" // MOVA r25, #0; MOVX r28, #-1; MOV r27, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3569 "01111000" // /* MW 9 */ + 3570 "00001110" // /* MW 8 */ + 3571 "01110000" // /* MW 7 */ + 3572 "11101011" // /* MW 6 */ + 3573 "11000111" // /* MW 5 */ + 3574 "00111111" // /* MW 4 */ + 3575 "00000000" // /* MW 3 */ + 3576 "00011001" // /* MW 2 */ + 3577 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 669 63 first +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3578 "00011000" // SEL.EQZ r31, r25, r28, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3579 "11000010" // /* MW 3 */ + 3580 "01111111" // /* MW 2 */ + 3581 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 497 34 first +.src_ref 2 "conv2d_bf16_params.h" 641 32 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 3582 "10111010" // LDA r27, [sp, #-24]; EXTEND.u8 r16, r16; ADD.NC r26, r29, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3583 "10101000" // /* MW 9 */ + 3584 "01110100" // /* MW 8 */ + 3585 "01001111" // /* MW 7 */ + 3586 "10000011" // /* MW 6 */ + 3587 "00000100" // /* MW 5 */ + 3588 "00100001" // /* MW 4 */ + 3589 "00100000" // /* MW 3 */ + 3590 "01101110" // /* MW 2 */ + 3591 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 559 61 first +.src_ref 2 "conv2d_bf16_params.h" 640 16 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3592 "10111010" // MOVA r30, #72; EXTEND.u8 r20, r20; MOV r29, #9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3593 "01011000" // /* MW 9 */ + 3594 "00001001" // /* MW 8 */ + 3595 "10101000" // /* MW 7 */ + 3596 "10000011" // /* MW 6 */ + 3597 "01000100" // /* MW 5 */ + 3598 "00101001" // /* MW 4 */ + 3599 "00000000" // /* MW 3 */ + 3600 "00011110" // /* MW 2 */ + 3601 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 25 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3602 "00011000" // SEL.EQZ r25, r29, r30, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3603 "11100010" // /* MW 3 */ + 3604 "01110011" // /* MW 2 */ + 3605 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 578 47 first + 3606 "10011000" // NE r28, r23, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3607 "10001000" // /* MW 3 */ + 3608 "11111001" // /* MW 2 */ + 3609 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 640 16 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 3610 "10011000" // LSHL r29, r29, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3611 "00111101" // /* MW 3 */ + 3612 "01111011" // /* MW 2 */ + 3613 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 557 34 +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.aggressive_scheduled_block_id 6 +.noswbrkpt + 3614 "10111010" // LDA r23, [sp, #-20]; MOVXM r24, #1032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3615 "00010000" // /* MW 9 */ + 3616 "00000100" // /* MW 8 */ + 3617 "00001010" // /* MW 7 */ + 3618 "00000011" // /* MW 6 */ + 3619 "00000000" // /* MW 5 */ + 3620 "00000000" // /* MW 4 */ + 3621 "00100000" // /* MW 3 */ + 3622 "11011110" // /* MW 2 */ + 3623 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 641 44 first +.src_ref 2 "conv2d_bf16_params.h" 642 45 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 3624 "00100100" // LSHL r19, r25, r19; ADD.NC r30, r26, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3625 "11111111" // /* MW 5 */ + 3626 "00111010" // /* MW 4 */ + 3627 "10111111" // /* MW 3 */ + 3628 "11100111" // /* MW 2 */ + 3629 "11001100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 20 +.src_ref 2 "conv2d_bf16_params.h" 642 87 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 3630 "00011000" // MAC r7, r7, r19, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3631 "11100110" // /* MW 3 */ + 3632 "11001111" // /* MW 2 */ + 3633 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 55 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 3634 "01100100" // EXTEND.u8 r19, r22; MOV r23, #522 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3635 "00101001" // /* MW 5 */ + 3636 "10101000" // /* MW 4 */ + 3637 "00001011" // /* MW 3 */ + 3638 "11010010" // /* MW 2 */ + 3639 "10110100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 540 38 +.src_ref 2 "conv2d_bf16_params.h" 645 41 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3640 "01100100" // SEL.EQZ r22, r23, r24, r27; MOV r26, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3641 "00000001" // /* MW 5 */ + 3642 "00100001" // /* MW 4 */ + 3643 "01001101" // /* MW 3 */ + 3644 "10110000" // /* MW 2 */ + 3645 "10111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 540 38 first +.src_ref 2 "conv2d_bf16_params.h" 557 34 + 3646 "11100100" // NE r6, r6, r26; MOV r27, eh0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3647 "00111001" // /* MW 5 */ + 3648 "11000010" // /* MW 4 */ + 3649 "00011101" // /* MW 3 */ + 3650 "10110101" // /* MW 2 */ + 3651 "00110001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 99 first + 3652 "10011000" // AND r7, r7, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3653 "00100100" // /* MW 3 */ + 3654 "11001111" // /* MW 2 */ + 3655 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 557 34 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 + 3656 "11100100" // SEL.EQZ r23, r23, r15, r27; MOV r27, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3657 "01000001" // /* MW 5 */ + 3658 "10100110" // /* MW 4 */ + 3659 "01001101" // /* MW 3 */ + 3660 "11011110" // /* MW 2 */ + 3661 "10111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 119 +.src_ref 2 "conv2d_bf16_params.h" 655 23 first + 3662 "01100100" // SEL.EQZ r4, r5, r4, r27; MOV r18, #31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3663 "01111101" // /* MW 5 */ + 3664 "00100000" // /* MW 4 */ + 3665 "01001001" // /* MW 3 */ + 3666 "00001000" // /* MW 2 */ + 3667 "00101001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 119 first + 3668 "10011000" // AND r23, r23, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3669 "00100100" // /* MW 3 */ + 3670 "11101111" // /* MW 2 */ + 3671 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 540 15 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 + 3672 "10111010" // MOVA r30, #-288; LSHL r4, r16, r4; MOV r18, #-144 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3673 "01011000" // /* MW 9 */ + 3674 "01110000" // /* MW 8 */ + 3675 "01001111" // /* MW 7 */ + 3676 "01101110" // /* MW 6 */ + 3677 "01000010" // /* MW 5 */ + 3678 "00100000" // /* MW 4 */ + 3679 "00000000" // /* MW 3 */ + 3680 "00011110" // /* MW 2 */ + 3681 "11011100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first + 3682 "00011000" // SEL.EQZ r30, r30, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3683 "00100010" // /* MW 3 */ + 3684 "10111101" // /* MW 2 */ + 3685 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 85 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 + 3686 "10111010" // MOVA r5, #144; MUL r26, r23, r19; MOV r16, #288 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3687 "01011000" // /* MW 9 */ + 3688 "00100000" // /* MW 8 */ + 3689 "00001001" // /* MW 7 */ + 3690 "11111110" // /* MW 6 */ + 3691 "10101001" // /* MW 5 */ + 3692 "00101111" // /* MW 4 */ + 3693 "00000000" // /* MW 3 */ + 3694 "00000101" // /* MW 2 */ + 3695 "00010010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first + 3696 "00011000" // SEL.EQZ r16, r16, r5, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3697 "01010010" // /* MW 3 */ + 3698 "00100000" // /* MW 2 */ + 3699 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 126 21 first +.src_ref 2 "conv2d_bf16_params.h" 559 59 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 3700 "10100100" // MUL r24, r17, r4; ADD.NC r27, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3701 "11110010" // /* MW 5 */ + 3702 "10111101" // /* MW 4 */ + 3703 "11111101" // /* MW 3 */ + 3704 "00001001" // /* MW 2 */ + 3705 "10001110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 669 41 first +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.aggressive_scheduled_block_id 7 +.noswbrkpt + 3706 "11100100" // LSHL r16, r16, r31; MOV r27, el1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3707 "00111001" // /* MW 5 */ + 3708 "11000100" // /* MW 4 */ + 3709 "10111101" // /* MW 3 */ + 3710 "00111111" // /* MW 2 */ + 3711 "10000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 117 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3712 "01011100" // ST r27, [sp, #-36]; MUL r26, r14, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3713 "01011111" // /* MW 5 */ + 3714 "01101011" // /* MW 4 */ + 3715 "10110111" // /* MW 3 */ + 3716 "11101110" // /* MW 2 */ + 3717 "11111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 700 34 first + 3718 "00011000" // SEL.EQZ r2, r2, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3719 "00110010" // /* MW 3 */ + 3720 "10000100" // /* MW 2 */ + 3721 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 578 52 first + 3722 "10011000" // LTU r31, r13, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3723 "00001100" // /* MW 3 */ + 3724 "01111110" // /* MW 2 */ + 3725 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 559 92 first + 3726 "10011000" // MUL r24, r20, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3727 "10001111" // /* MW 3 */ + 3728 "00110001" // /* MW 2 */ + 3729 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 578 36 first + 3730 "10011000" // OR r27, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3731 "11000101" // /* MW 3 */ + 3732 "11110111" // /* MW 2 */ + 3733 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 610 64 first +.src_ref 2 "conv2d_bf16_params.h" 611 47 +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 629 82 + 3734 "01110110" // MOVA r3, #128; ST r20, [sp, #-20]; LSHL r28, r27, r1; MOV r20, #256 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3735 "01011000" // /* MW 11 */ + 3736 "00000000" // /* MW 10 */ + 3737 "10001001" // /* MW 9 */ + 3738 "11101110" // /* MW 8 */ + 3739 "11000000" // /* MW 7 */ + 3740 "10110111" // /* MW 6 */ + 3741 "10010101" // /* MW 5 */ + 3742 "11101110" // /* MW 4 */ + 3743 "00000111" // /* MW 3 */ + 3744 "00000011" // /* MW 2 */ + 3745 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 621 156 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.src_ref 2 "conv2d_bf16_params.h" 649 41 + 3746 "11100100" // SEL.EQZ r20, r3, r20, r27; MOV eh0, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3747 "00111001" // /* MW 5 */ + 3748 "10110111" // /* MW 4 */ + 3749 "01000000" // /* MW 3 */ + 3750 "00101000" // /* MW 2 */ + 3751 "00011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 645 41 + 3752 "01000100" // MOVXM r31, #1542 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3753 "00001100" // /* MW 5 */ + 3754 "10101100" // /* MW 4 */ + 3755 "00001111" // /* MW 3 */ + 3756 "00000000" // /* MW 2 */ + 3757 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 554 60 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 first + 3758 "00111010" // ST r4, [sp, #-24]; EQ r27, r15, r0; ADD.NC r4, r4, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3759 "11001001" // /* MW 9 */ + 3760 "00111111" // /* MW 8 */ + 3761 "10001001" // /* MW 7 */ + 3762 "00111100" // /* MW 6 */ + 3763 "10110000" // /* MW 5 */ + 3764 "00011111" // /* MW 4 */ + 3765 "10110000" // /* MW 3 */ + 3766 "00010010" // /* MW 2 */ + 3767 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 554 53 +.src_ref 2 "conv2d_bf16_params.h" 555 53 +.src_ref 2 "conv2d_bf16_params.h" 555 59 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 + 3768 "01110110" // MOVA m3, #-148; ST r4, [p2], #4; SEL.EQZ r31, r22, r31, r27; ADD.NC r22, r17, #-1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3769 "11001000" // /* MW 11 */ + 3770 "01111111" // /* MW 10 */ + 3771 "11001100" // /* MW 9 */ + 3772 "10010010" // /* MW 8 */ + 3773 "11111111" // /* MW 7 */ + 3774 "10101101" // /* MW 6 */ + 3775 "10010001" // /* MW 5 */ + 3776 "00011100" // /* MW 4 */ + 3777 "10000010" // /* MW 3 */ + 3778 "10001100" // /* MW 2 */ + 3779 "11101101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 555 53 +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 621 240 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 + 3780 "00111010" // ST r22, [p2], m3; LSHL r21, r21, r15; MOV r27, eh0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3781 "01111001" // /* MW 9 */ + 3782 "10001110" // /* MW 8 */ + 3783 "01110000" // /* MW 7 */ + 3784 "11101111" // /* MW 6 */ + 3785 "01010111" // /* MW 5 */ + 3786 "00101011" // /* MW 4 */ + 3787 "00110000" // /* MW 3 */ + 3788 "01011010" // /* MW 2 */ + 3789 "01001101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 53 first +.src_ref 2 "conv2d_bf16_params.h" 559 53 +.src_ref 2 "conv2d_bf16_params.h" 621 140 +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 645 41 + 3790 "01110110" // MOVA r25, #22; ST r26, [p2], #4; SUB r20, r20, r28; MOV m4, #88 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3791 "01011000" // /* MW 11 */ + 3792 "01011000" // /* MW 10 */ + 3793 "00000000" // /* MW 9 */ + 3794 "00001110" // /* MW 8 */ + 3795 "01001110" // /* MW 7 */ + 3796 "10101001" // /* MW 6 */ + 3797 "01010001" // /* MW 5 */ + 3798 "00011111" // /* MW 4 */ + 3799 "00000010" // /* MW 3 */ + 3800 "11011001" // /* MW 2 */ + 3801 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 559 53 first +.src_ref 2 "conv2d_bf16_params.h" 621 156 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 first +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 3802 "01011100" // ST r24, [p2], m4; SEL.EQZ r24, r31, r25, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3803 "00100100" // /* MW 5 */ + 3804 "11100011" // /* MW 4 */ + 3805 "00111111" // /* MW 3 */ + 3806 "01100010" // /* MW 2 */ + 3807 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 610 47 first +.src_ref 2 "conv2d_bf16_params.h" 621 222 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 +.aggressive_scheduled_block_id 8 +.noswbrkpt + 3808 "01110110" // LDA r27, [sp, #-32]; ST r28, [p2], #-8; SUB r28, r21, r28; MOV r27, r6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3809 "01111000" // /* MW 11 */ + 3810 "10010000" // /* MW 10 */ + 3811 "01101001" // /* MW 9 */ + 3812 "00001111" // /* MW 8 */ + 3813 "11001110" // /* MW 7 */ + 3814 "10101011" // /* MW 6 */ + 3815 "10010001" // /* MW 5 */ + 3816 "11101111" // /* MW 4 */ + 3817 "00100010" // /* MW 3 */ + 3818 "01101110" // /* MW 2 */ + 3819 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 649 41 +.src_ref 2 "conv2d_bf16_params.h" 655 23 first +.src_ref 2 "conv2d_bf16_params.h" 661 61 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3820 "10111010" // MOVA r19, #279; SEL.EQZ r28, r20, r28, r27; ADD.NC r20, r19, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3821 "11001000" // /* MW 9 */ + 3822 "11111111" // /* MW 8 */ + 3823 "10001100" // /* MW 7 */ + 3824 "00010010" // /* MW 6 */ + 3825 "11001110" // /* MW 5 */ + 3826 "00101001" // /* MW 4 */ + 3827 "00000000" // /* MW 3 */ + 3828 "11110011" // /* MW 2 */ + 3829 "00100010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 127 19 first +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 649 41 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 +.src_ref 2 "conv2d_bf16_params.h" 710 60 +.src_ref 2 "conv2d_bf16_params.h" 710 65 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3830 "10111010" // MOVA r29, #-72; MSC r30, r30, r29, r20; MOV r27, eh0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3831 "01111000" // /* MW 9 */ + 3832 "10001110" // /* MW 8 */ + 3833 "01110000" // /* MW 7 */ + 3834 "01110011" // /* MW 6 */ + 3835 "11101010" // /* MW 5 */ + 3836 "00111011" // /* MW 4 */ + 3837 "00000000" // /* MW 3 */ + 3838 "00011101" // /* MW 2 */ + 3839 "11110111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first +.src_ref 2 "conv2d_bf16_params.h" 679 23 first +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3840 "00101100" // LDA r27, [sp, #-28]; SEL.EQZ r18, r29, r18, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3841 "01000100" // /* MW 5 */ + 3842 "11001010" // /* MW 4 */ + 3843 "00101110" // /* MW 3 */ + 3844 "11101110" // /* MW 2 */ + 3845 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 621 156 first +.src_ref 2 "conv2d_bf16_params.h" 649 41 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3846 "10111010" // MOVA r31, #32; SEL.EQZ r19, r31, r19, r27; MOV r27, r6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3847 "01111000" // /* MW 9 */ + 3848 "10010000" // /* MW 8 */ + 3849 "01101001" // /* MW 7 */ + 3850 "10010011" // /* MW 6 */ + 3851 "00111001" // /* MW 5 */ + 3852 "00111111" // /* MW 4 */ + 3853 "00000000" // /* MW 3 */ + 3854 "00011111" // /* MW 2 */ + 3855 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first +.src_ref 2 "conv2d_bf16_params.h" 700 34 first +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3856 "00011000" // SEL.EQZ r2, r31, r2, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3857 "00100010" // /* MW 3 */ + 3858 "11000100" // /* MW 2 */ + 3859 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 629 82 first +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3860 "10011000" // SUB r21, r3, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3861 "01010001" // /* MW 3 */ + 3862 "11101011" // /* MW 2 */ + 3863 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 611 47 first +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3864 "00111010" // ST r3, [p2], #12; SEL.EQZ r2, r2, r15, r27; MOV r3, #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3865 "01011001" // /* MW 9 */ + 3866 "11000000" // /* MW 8 */ + 3867 "01101111" // /* MW 7 */ + 3868 "10010000" // /* MW 6 */ + 3869 "00100111" // /* MW 5 */ + 3870 "00000100" // /* MW 4 */ + 3871 "00110000" // /* MW 3 */ + 3872 "10001110" // /* MW 2 */ + 3873 "01000111" // /* MW 1 */ +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3874 "00011000" // SEL.EQZ r28, r28, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3875 "00110010" // /* MW 3 */ + 3876 "00111000" // /* MW 2 */ + 3877 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 643 22 first +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id first + 3878 "10011000" // MUL r31, r23, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3879 "01111111" // /* MW 3 */ + 3880 "11111110" // /* MW 2 */ + 3881 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.aggressive_scheduled_block_id 9 +.noswbrkpt + 3882 "00101100" // LDA r17, [sp, #-36]; SEL.EQZ r3, r28, r3, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3883 "01100100" // /* MW 5 */ + 3884 "00001100" // /* MW 4 */ + 3885 "00101110" // /* MW 3 */ + 3886 "11000110" // /* MW 2 */ + 3887 "11111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 621 47 first +.src_ref 2 "conv2d_bf16_params.h" 629 45 +.src_ref 2 "conv2d_bf16_params.h" 684 30 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3888 "00111010" // ST r3, [p2], #-8; MUL r18, r26, r18; MOV m1, #40 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3889 "01011001" // /* MW 9 */ + 3890 "00101000" // /* MW 8 */ + 3891 "10000000" // /* MW 7 */ + 3892 "01111100" // /* MW 6 */ + 3893 "00101001" // /* MW 5 */ + 3894 "00110101" // /* MW 4 */ + 3895 "00110000" // /* MW 3 */ + 3896 "10001110" // /* MW 2 */ + 3897 "01011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 629 45 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3898 "01011100" // ST r21, [p2], m1; SEL.EQZ r3, r2, r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3899 "11100100" // /* MW 5 */ + 3900 "00001101" // /* MW 4 */ + 3901 "00110001" // /* MW 3 */ + 3902 "01010110" // /* MW 2 */ + 3903 "01000101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 644 22 +.src_ref 2 "conv2d_bf16_params.h" 700 17 first +.src_ref 2 "conv2d_bf16_params.h" 705 50 +.src_ref 2 "conv2d_bf16_params.h" 705 61 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3904 "10111010" // LDA r0, [sp, #-16]; MUL r3, r3, r17; ADD.NC r21, r7, r30 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3905 "10101000" // /* MW 9 */ + 3906 "11111100" // /* MW 8 */ + 3907 "10101001" // /* MW 7 */ + 3908 "11111110" // /* MW 6 */ + 3909 "00111000" // /* MW 5 */ + 3910 "00000110" // /* MW 4 */ + 3911 "00100000" // /* MW 3 */ + 3912 "00000010" // /* MW 2 */ + 3913 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 645 38 first +.src_ref 2 "conv2d_bf16_params.h" 700 111 +.src_ref 2 "conv2d_bf16_params.h" 700 149 +.src_ref 2 "conv2d_bf16_params.h" 705 45 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3914 "01111010" // LDA r17, [sp, #-20]; ST r24, [p2], #4; MAC r3, r3, r20, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3915 "00000110" // /* MW 9 */ + 3916 "00000110" // /* MW 8 */ + 3917 "00000101" // /* MW 7 */ + 3918 "10000000" // /* MW 6 */ + 3919 "00010001" // /* MW 5 */ + 3920 "00011111" // /* MW 4 */ + 3921 "00100010" // /* MW 3 */ + 3922 "11000110" // /* MW 2 */ + 3923 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 644 14 +.src_ref 2 "conv2d_bf16_params.h" 649 38 first +.src_ref 2 "conv2d_bf16_params.h" 674 24 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3924 "00111010" // ST r19, [p2], #28; MOVXM r19, #65520 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3925 "00010001" // /* MW 9 */ + 3926 "11111000" // /* MW 8 */ + 3927 "01101111" // /* MW 7 */ + 3928 "00111110" // /* MW 6 */ + 3929 "00000000" // /* MW 5 */ + 3930 "00000000" // /* MW 4 */ + 3931 "00110000" // /* MW 3 */ + 3932 "11001110" // /* MW 2 */ + 3933 "01001111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 644 14 first +.src_ref 2 "conv2d_bf16_params.h" 662 61 +.src_ref 2 "conv2d_bf16_params.h" 664 38 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3934 "00111010" // ST r20, [p2], #4; AND r20, r31, r19; ADD.NC r2, r14, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3935 "11001001" // /* MW 9 */ + 3936 "10111111" // /* MW 8 */ + 3937 "01001011" // /* MW 7 */ + 3938 "10100100" // /* MW 6 */ + 3939 "01001001" // /* MW 5 */ + 3940 "00111111" // /* MW 4 */ + 3941 "00110000" // /* MW 3 */ + 3942 "11010010" // /* MW 2 */ + 3943 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 19 first +.src_ref 2 "conv2d_bf16_params.h" 663 22 first +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3944 "01011100" // ST r17, [p2], #4; MSC r21, r21, r2, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3945 "10011100" // /* MW 5 */ + 3946 "01010110" // /* MW 4 */ + 3947 "00110001" // /* MW 3 */ + 3948 "11000110" // /* MW 2 */ + 3949 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 126 21 first +.src_ref 2 "conv2d_bf16_params.h" 664 38 first + 3950 "01011100" // ST r2, [p2], #4; ADD r30, r30, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3951 "10000001" // /* MW 5 */ + 3952 "01111010" // /* MW 4 */ + 3953 "00111111" // /* MW 3 */ + 3954 "10001010" // /* MW 2 */ + 3955 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 126 21 +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id first + 3956 "01011100" // ST r30, [p2], #4; SUB r28, r16, r31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3957 "11100011" // /* MW 5 */ + 3958 "01110011" // /* MW 4 */ + 3959 "00111000" // /* MW 3 */ + 3960 "11111010" // /* MW 2 */ + 3961 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 127 19 first +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.src_ref 2 "conv2d_bf16_params.h" 675 38 +.src_ref 2 "conv2d_bf16_params.h" 696 37 +.aggressive_scheduled_block_id 10 +.noswbrkpt + 3962 "00111010" // ST r21, [p2], #4; MAC r31, r31, r22, r16; MOV dc0, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3963 "01011001" // /* MW 9 */ + 3964 "00000000" // /* MW 8 */ + 3965 "01100000" // /* MW 7 */ + 3966 "00110000" // /* MW 6 */ + 3967 "11111000" // /* MW 5 */ + 3968 "00101101" // /* MW 4 */ + 3969 "00110000" // /* MW 3 */ + 3970 "11010110" // /* MW 2 */ + 3971 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 644 22 first +.src_ref 2 "conv2d_bf16_params.h" 664 38 first +.src_ref 2 "conv2d_bf16_params.h" 705 45 +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3972 "00111010" // ST dc0, [p2], #4; MUL r2, r31, r0; ADD.NC r17, r17, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3973 "11001001" // /* MW 9 */ + 3974 "01111111" // /* MW 8 */ + 3975 "00101100" // /* MW 7 */ + 3976 "01111110" // /* MW 6 */ + 3977 "00100000" // /* MW 5 */ + 3978 "00111110" // /* MW 4 */ + 3979 "00110000" // /* MW 3 */ + 3980 "10001100" // /* MW 2 */ + 3981 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.src_ref 2 "conv2d_bf16_params.h" 705 50 first +.src_ref 2 "conv2d_bf16_params.h" 705 61 first + 3982 "01011100" // ST dc0, [p2], #4; MAC r14, r14, r17, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3983 "00001100" // /* MW 5 */ + 3984 "10111000" // /* MW 4 */ + 3985 "00111000" // /* MW 3 */ + 3986 "10001100" // /* MW 2 */ + 3987 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 +.src_ref 2 "conv2d_bf16_params.h" 674 24 first +.src_ref 2 "conv2d_bf16_params.h" 675 38 first +.src_ref 2 "conv2d_bf16_params.h" 682 38 +.src_ref 2 "conv2d_bf16_params.h" 718 48 +.src_ref 2 "conv2d_bf16_params.h" 720 50 + 3988 "00111010" // ST r22, [p2], #4; AND r16, r19, r2; MOV r2, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3989 "01011001" // /* MW 9 */ + 3990 "00000000" // /* MW 8 */ + 3991 "01001000" // /* MW 7 */ + 3992 "00100100" // /* MW 6 */ + 3993 "00000001" // /* MW 5 */ + 3994 "00100111" // /* MW 4 */ + 3995 "00110000" // /* MW 3 */ + 3996 "11011010" // /* MW 2 */ + 3997 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 675 38 + 3998 "00111010" // ST r28, [p2], #4; SUB r17, r2, r31; MOV r27, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3999 "01111001" // /* MW 9 */ + 4000 "00001110" // /* MW 8 */ + 4001 "01110000" // /* MW 7 */ + 4002 "10001111" // /* MW 6 */ + 4003 "00011111" // /* MW 5 */ + 4004 "00000101" // /* MW 4 */ + 4005 "00110000" // /* MW 3 */ + 4006 "11110010" // /* MW 2 */ + 4007 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 675 38 first +.src_ref 2 "conv2d_bf16_params.h" 707 61 first + 4008 "01011100" // ST r4, [p2], #4; MUL r14, r23, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4009 "11011111" // /* MW 5 */ + 4010 "10111001" // /* MW 4 */ + 4011 "00111011" // /* MW 3 */ + 4012 "10010010" // /* MW 2 */ + 4013 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 674 22 first +.src_ref 2 "conv2d_bf16_params.h" 675 38 + 4014 "00111010" // ST r17, [p2], #4; SUB r16, r16, r31; MOV r0, #6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4015 "01011001" // /* MW 9 */ + 4016 "00000110" // /* MW 8 */ + 4017 "00001000" // /* MW 7 */ + 4018 "10001100" // /* MW 6 */ + 4019 "00001111" // /* MW 5 */ + 4020 "00100001" // /* MW 4 */ + 4021 "00110000" // /* MW 3 */ + 4022 "11000110" // /* MW 2 */ + 4023 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 25 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 675 38 first +.src_ref 2 "conv2d_bf16_params.h" 679 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id first + 4024 "01110110" // MOVA r0, #72; ST r16, [p2], #4; SEL.EQZ r16, r13, r0, r27; MOV r27, r6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4025 "01111000" // /* MW 11 */ + 4026 "10010000" // /* MW 10 */ + 4027 "01101001" // /* MW 9 */ + 4028 "00010011" // /* MW 8 */ + 4029 "00000000" // /* MW 7 */ + 4030 "10011011" // /* MW 6 */ + 4031 "00010001" // /* MW 5 */ + 4032 "00011110" // /* MW 4 */ + 4033 "00000010" // /* MW 3 */ + 4034 "00000000" // /* MW 2 */ + 4035 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first +.src_ref 2 "conv2d_bf16_params.h" 679 23 first +.src_ref 2 "conv2d_bf16_params.h" 713 12 +.aggressive_scheduled_block_id 11 +.noswbrkpt + 4036 "00101100" // LDA r5, [sp, #-24]; SEL.EQZ r5, r0, r5, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4037 "10100100" // /* MW 5 */ + 4038 "00010100" // /* MW 4 */ + 4039 "00100000" // /* MW 3 */ + 4040 "00010110" // /* MW 2 */ + 4041 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 691 56 first +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4042 "10011000" // MUL r17, r5, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4043 "10101111" // /* MW 3 */ + 4044 "01100011" // /* MW 2 */ + 4045 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 675 38 +.src_ref 2 "conv2d_bf16_params.h" 675 38 first +.src_ref 2 "conv2d_bf16_params.h" 709 71 first + 4046 "00111010" // ST dc0, [p2], #4; LSHL r16, r3, r16; MOV m2, #-56 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4047 "01011001" // /* MW 9 */ + 4048 "11001000" // /* MW 8 */ + 4049 "00000111" // /* MW 7 */ + 4050 "01101101" // /* MW 6 */ + 4051 "00001000" // /* MW 5 */ + 4052 "00000111" // /* MW 4 */ + 4053 "00110000" // /* MW 3 */ + 4054 "10001100" // /* MW 2 */ + 4055 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 675 38 +.src_ref 2 "conv2d_bf16_params.h" 706 23 first +.src_ref 2 "conv2d_bf16_params.h" 706 28 +.src_ref 2 "conv2d_bf16_params.h" 709 76 + 4056 "01110110" // MOVA r3, #-29; ST dc0, [p2], m2; LSHL r15, r16, r15; ADD.NC r13, r3, #7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4057 "11001000" // /* MW 11 */ + 4058 "11000001" // /* MW 10 */ + 4059 "10101000" // /* MW 9 */ + 4060 "11101101" // /* MW 8 */ + 4061 "11110111" // /* MW 7 */ + 4062 "10100000" // /* MW 6 */ + 4063 "01100001" // /* MW 5 */ + 4064 "01001000" // /* MW 4 */ + 4065 "00000010" // /* MW 3 */ + 4066 "01100011" // /* MW 2 */ + 4067 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 682 38 first +.src_ref 2 "conv2d_bf16_params.h" 706 28 + 4068 "01011100" // ST r2, [p2], m0; LSHL r16, r13, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4069 "01111011" // /* MW 5 */ + 4070 "11000000" // /* MW 4 */ + 4071 "00110110" // /* MW 3 */ + 4072 "00001010" // /* MW 2 */ + 4073 "01000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 126 21 first +.src_ref 2 "conv2d_bf16_params.h" 696 37 first + 4074 "01011100" // ST r22, [p2], #4; ADD r3, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4075 "01000001" // /* MW 5 */ + 4076 "10001110" // /* MW 4 */ + 4077 "00111000" // /* MW 3 */ + 4078 "11011010" // /* MW 2 */ + 4079 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 127 19 first +.src_ref 2 "conv2d_bf16_params.h" 696 37 + 4080 "01011100" // ST r18, [p2], #4; MSC r18, r18, r17, r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4081 "10011100" // /* MW 5 */ + 4082 "11001000" // /* MW 4 */ + 4083 "00111000" // /* MW 3 */ + 4084 "11001010" // /* MW 2 */ + 4085 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 first +.src_ref 2 "conv2d_bf16_params.h" 713 12 first + 4086 "01011100" // ST r4, [p2], #4; LSHL r5, r5, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4087 "11011011" // /* MW 5 */ + 4088 "10010100" // /* MW 4 */ + 4089 "00110010" // /* MW 3 */ + 4090 "10010010" // /* MW 2 */ + 4091 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 +.src_ref 2 "conv2d_bf16_params.h" 706 28 +.src_ref 2 "conv2d_bf16_params.h" 706 28 first + 4092 "00111010" // ST r3, [p2], #4; ADD r3, r13, r16; MOV r0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4093 "01011001" // /* MW 9 */ + 4094 "11111101" // /* MW 8 */ + 4095 "00001111" // /* MW 7 */ + 4096 "00000100" // /* MW 6 */ + 4097 "00111000" // /* MW 5 */ + 4098 "00011010" // /* MW 4 */ + 4099 "00110000" // /* MW 3 */ + 4100 "10001110" // /* MW 2 */ + 4101 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 706 28 + 4102 "10011000" // ASHL r0, r3, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4103 "00001110" // /* MW 3 */ + 4104 "11000000" // /* MW 2 */ + 4105 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 first +.src_ref 2 "conv2d_bf16_params.h" 707 66 first + 4106 "01011100" // ST r18, [p2], #4; MUL r4, r14, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4107 "00011111" // /* MW 5 */ + 4108 "00010000" // /* MW 4 */ + 4109 "00110111" // /* MW 3 */ + 4110 "11001010" // /* MW 2 */ + 4111 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 +.src_ref 2 "conv2d_bf16_params.h" 709 96 first + 4112 "01011100" // ST dc0, [p2], #4; LSHL r3, r0, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4113 "00111011" // /* MW 5 */ + 4114 "00001100" // /* MW 4 */ + 4115 "00110000" // /* MW 3 */ + 4116 "10001100" // /* MW 2 */ + 4117 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 first +.src_ref 2 "conv2d_bf16_params.h" 709 90 + 4118 "11111010" // LDA r13, [sp, #-4]; ST dc0, [p2], #4; SUB r3, r15, r3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4119 "00110001" // /* MW 9 */ + 4120 "11000110" // /* MW 8 */ + 4121 "00000011" // /* MW 7 */ + 4122 "10000000" // /* MW 6 */ + 4123 "01100001" // /* MW 5 */ + 4124 "00011100" // /* MW 4 */ + 4125 "00100010" // /* MW 3 */ + 4126 "10110110" // /* MW 2 */ + 4127 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 707 50 first +.src_ref 2 "conv2d_bf16_params.h" 708 59 +.src_ref 2 "conv2d_bf16_params.h" 710 60 first +.src_ref 2 "conv2d_bf16_params.h" 710 65 first + 4128 "01110110" // LDA r14, [sp, #-8]; ST r4, [p2], #4; MAC r7, r7, r29, r0; ADD.NC r1, r0, #-1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4129 "11001000" // /* MW 11 */ + 4130 "00111111" // /* MW 10 */ + 4131 "00101000" // /* MW 9 */ + 4132 "00110000" // /* MW 8 */ + 4133 "01110000" // /* MW 7 */ + 4134 "10111010" // /* MW 6 */ + 4135 "10010001" // /* MW 5 */ + 4136 "00011100" // /* MW 4 */ + 4137 "00100010" // /* MW 3 */ + 4138 "00111010" // /* MW 2 */ + 4139 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 708 48 first +.src_ref 2 "conv2d_bf16_params.h" 713 12 first + 4140 "11111010" // LDA r15, [sp, #-12]; ST r1, [p2], #4; MUL r0, r5, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4141 "10101111" // /* MW 9 */ + 4142 "01000001" // /* MW 8 */ + 4143 "00000001" // /* MW 7 */ + 4144 "10000000" // /* MW 6 */ + 4145 "00110001" // /* MW 5 */ + 4146 "00011100" // /* MW 4 */ + 4147 "00100010" // /* MW 3 */ + 4148 "10111110" // /* MW 2 */ + 4149 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 709 50 first +.src_ref 2 "conv2d_bf16_params.h" 800 first + 4150 "01011100" // ST r3, [p2], #4; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 4151 "00000000" // /* MW 5 */ + 4152 "01010000" // /* MW 4 */ + 4153 "00110000" // /* MW 3 */ + 4154 "10001110" // /* MW 2 */ + 4155 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 710 50 first +.delay_slot + 4156 "10011000" // ST r7, [p2], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4157 "11110001" // /* MW 3 */ + 4158 "01011100" // /* MW 2 */ + 4159 "00001010" // /* MW 1 */ +.delay_slot + 4160 "10011000" // ST r0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4161 "00010001" // /* MW 3 */ + 4162 "00011100" // /* MW 2 */ + 4163 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 718 48 first +.delay_slot + 4164 "10011000" // ST r2, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4165 "01010001" // /* MW 3 */ + 4166 "00011100" // /* MW 2 */ + 4167 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 718 48 +.delay_slot + 4168 "10011000" // ST r2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4169 "01010001" // /* MW 3 */ + 4170 "00000100" // /* MW 2 */ + 4171 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 720 50 first +.src_ref 2 "conv2d_bf16_params.h" 800 first +.delay_slot + 4172 "00111010" // ST r2, [p2, #4]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4173 "01110001" // /* MW 9 */ + 4174 "00000000" // /* MW 8 */ + 4175 "00000000" // /* MW 7 */ + 4176 "00000000" // /* MW 6 */ + 4177 "11111110" // /* MW 5 */ + 4178 "00111111" // /* MW 4 */ + 4179 "00110000" // /* MW 3 */ + 4180 "10001010" // /* MW 2 */ +.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh__end +.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_end0 + 4181 "01000010" // /* MW 1 */ +.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_begin0 +.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams +.function convert_bf16_to_bfp16 _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams +.src_ref 3 "utils.h" 526 11 +.src_ref 3 "utils.h" 531 4 +.src_ref 2 "conv2d_bf16.h" 689 first +.src_ref 2 "conv2d_bf16.h" 698 28 +.src_ref 2 "conv2d_bf16.h" 704 12 +.src_ref 2 "conv2d_bf16.h" 707 12 +.src_ref 2 "conv2d_bf16.h" 707 30 +.function_start + 4192 "01110110" // MOVA dc0, #0; MOVS p2, p1; MOVX r24, #0; MOV r0, p2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4193 "01111000" // /* MW 11 */ + 4194 "01100000" // /* MW 10 */ + 4195 "00001010" // /* MW 9 */ + 4196 "00001000" // /* MW 8 */ + 4197 "10000000" // /* MW 7 */ + 4198 "00000001" // /* MW 6 */ + 4199 "10001011" // /* MW 5 */ + 4200 "10000100" // /* MW 4 */ + 4201 "10000010" // /* MW 3 */ + 4202 "00000011" // /* MW 2 */ + 4203 "00000000" // /* MW 1 */ +.src_ref 3 "utils.h" 526 11 +.src_ref 3 "utils.h" 526 11 +.src_ref 2 "conv2d_bf16.h" 698 28 first +.src_ref 2 "conv2d_bf16.h" 704 12 first +.src_ref 2 "conv2d_bf16.h" 707 12 +.src_ref 2 "conv2d_bf16.h" 707 30 + 4204 "01111110" // MOVA dj1, #0; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc1, dc0; MOVX r26, #0; ADD.NC p3, r0, #4 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 4205 "01100000" // /* MW 13 */ + 4206 "00001001" // /* MW 12 */ + 4207 "00100000" // /* MW 11 */ + 4208 "00100001" // /* MW 10 */ + 4209 "00000000" // /* MW 9 */ + 4210 "00110110" // /* MW 8 */ + 4211 "00000001" // /* MW 7 */ + 4212 "00110100" // /* MW 6 */ + 4213 "00101000" // /* MW 5 */ + 4214 "00101000" // /* MW 4 */ + 4215 "10001000" // /* MW 3 */ + 4216 "00000110" // /* MW 2 */ + 4217 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 698 28 +.src_ref 2 "conv2d_bf16.h" 702 37 + 4218 "10111010" // LDA dn1, [p3], #4; MOVXM p4, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4219 "00010000" // /* MW 9 */ + 4220 "00110100" // /* MW 8 */ + 4221 "00110010" // /* MW 7 */ + 4222 "11110010" // /* MW 6 */ + 4223 "00000001" // /* MW 5 */ + 4224 "00000000" // /* MW 4 */ + 4225 "11010000" // /* MW 3 */ + 4226 "10010100" // /* MW 2 */ + 4227 "01100011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 698 43 +.src_ref 2 "conv2d_bf16.h" 702 4 first + 4228 "10111010" // LDA m1, [p3], #4; MOVXM ls, #4336 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4229 "00010000" // /* MW 9 */ + 4230 "01111000" // /* MW 8 */ + 4231 "01111000" // /* MW 7 */ + 4232 "00000100" // /* MW 6 */ + 4233 "00000000" // /* MW 5 */ + 4234 "00000000" // /* MW 4 */ + 4235 "11010000" // /* MW 3 */ + 4236 "10010000" // /* MW 2 */ + 4237 "01100011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 699 43 first +.src_ref 2 "conv2d_bf16.h" 702 4 + 4238 "10111010" // LDA m0, [p3]; MOVXM le, #4384 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4239 "00010000" // /* MW 9 */ + 4240 "10010000" // /* MW 8 */ + 4241 "10111000" // /* MW 7 */ + 4242 "00000101" // /* MW 6 */ + 4243 "00000000" // /* MW 5 */ + 4244 "00000000" // /* MW 4 */ + 4245 "11010000" // /* MW 3 */ + 4246 "10000000" // /* MW 2 */ + 4247 "01100000" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 +.src_ref 2 "conv2d_bf16.h" 702 37 first + 4248 "01010100" // LDA r0, [p3, #-12]; MOV dj0, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4249 "00000001" // /* MW 5 */ + 4250 "00000000" // /* MW 4 */ + 4251 "11010001" // /* MW 3 */ + 4252 "10000010" // /* MW 2 */ + 4253 "01111010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 702 37 + 4254 "10011000" // LDA.s8 r1, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4255 "00100010" // /* MW 3 */ + 4256 "00000100" // /* MW 2 */ + 4257 "00000100" // /* MW 1 */ + 4258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4259 "00000000" // /* MW 1 */ + 4260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4261 "00000000" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 +.src_ref 2 "conv2d_bf16.h" 705 66 first + 4262 "11110100" // VLDB.POP.512 x1, [p0, lf0, r24]; MOV dn0, dn1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4263 "00000001" // /* MW 5 */ + 4264 "10000101" // /* MW 4 */ + 4265 "10000000" // /* MW 3 */ + 4266 "00001010" // /* MW 2 */ + 4267 "00000000" // /* MW 1 */ +.src_ref 3 "utils.h" 526 11 first + 4268 "00011000" // VLDB.POP.512.2D x0, [p0, lf0, r24, d1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4269 "00010100" // /* MW 3 */ + 4270 "00110000" // /* MW 2 */ + 4271 "00111110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 704 12 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4272 "00011000" // VLDB.FILL.512 [p0, lf0, r24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4273 "00010100" // /* MW 3 */ + 4274 "00010100" // /* MW 2 */ + 4275 "00111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 702 4 first +.src_ref 2 "conv2d_bf16.h" 705 66 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4276 "00110100" // VLDB.POP.512 x1, [p0, lf0, r24]; ADD.NC lc, r0, #-3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4277 "11111101" // /* MW 5 */ + 4278 "11100000" // /* MW 4 */ + 4279 "10001010" // /* MW 3 */ + 4280 "00001010" // /* MW 2 */ + 4281 "00000000" // /* MW 1 */ +.src_ref 3 "utils.h" 526 11 first +.src_ref 2 "conv2d_bf16.h" 707 12 +.src_ref 2 "conv2d_bf16.h" 707 30 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4282 "00011100" // VLDB.POP.512.2D x0, [p0, lf0, r24, d1]; MOVX crRnd, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4283 "00000000" // /* MW 5 */ + 4284 "11110101" // /* MW 4 */ + 4285 "10000000" // /* MW 3 */ + 4286 "00000010" // /* MW 2 */ + 4287 "11000110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 704 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4288 "00011000" // VLDB.FILL.512 [p0, lf0, r24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4289 "00010100" // /* MW 3 */ + 4290 "00010100" // /* MW 2 */ + 4291 "00111100" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4293 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 705 66 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4294 "10111010" // NOPA; VLDB.POP.512 x1, [p0, lf0, r24]; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4295 "01111110" // /* MW 9 */ + 4296 "10100101" // /* MW 8 */ + 4297 "00000001" // /* MW 7 */ + 4298 "00000000" // /* MW 6 */ + 4299 "01010100" // /* MW 5 */ + 4300 "00000000" // /* MW 4 */ + 4301 "11110000" // /* MW 3 */ + 4302 "00101100" // /* MW 2 */ + 4303 "00000000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 first +.src_ref 3 "utils.h" 526 11 first +.src_ref 2 "conv2d_bf16.h" 705 30 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4304 "11100001" // NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];NOPS; NOPX; VCONV.fp32.bf16 cml0, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4305 "00000000" // /* MW 15 */ + 4306 "00000000" // /* MW 14 */ + 4307 "01111000" // /* MW 13 */ + 4308 "11000101" // /* MW 12 */ + 4309 "00000001" // /* MW 11 */ + 4310 "00000000" // /* MW 10 */ + 4311 "00000000" // /* MW 9 */ + 4312 "00000000" // /* MW 8 */ + 4313 "01011011" // /* MW 7 */ + 4314 "00000001" // /* MW 6 */ + 4315 "00101000" // /* MW 5 */ + 4316 "01100000" // /* MW 4 */ + 4317 "11111100" // /* MW 3 */ + 4318 "00101100" // /* MW 2 */ + 4319 "00000000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 +.src_ref 2 "conv2d_bf16.h" 706 18 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4320 "11100001" // NOPA; NOPB; NOPS; NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4321 "00000000" // /* MW 15 */ + 4322 "00000000" // /* MW 14 */ + 4323 "01111000" // /* MW 13 */ + 4324 "11000101" // /* MW 12 */ + 4325 "01000000" // /* MW 11 */ + 4326 "00000000" // /* MW 10 */ + 4327 "00000000" // /* MW 9 */ + 4328 "00000000" // /* MW 8 */ + 4329 "01011011" // /* MW 7 */ + 4330 "00000001" // /* MW 6 */ + 4331 "00100000" // /* MW 5 */ + 4332 "00000000" // /* MW 4 */ + 4333 "11110000" // /* MW 3 */ + 4334 "00101100" // /* MW 2 */ + 4335 "00000000" // /* MW 1 */ +.label ZLS_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_144 +.src_ref 2 "conv2d_bf16.h" 704 12 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 4336 "11100001" // NOPA; VLDB.FILL.512 [p0, lf0, r24]; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4337 "00000000" // /* MW 15 */ + 4338 "00000000" // /* MW 14 */ + 4339 "01111000" // /* MW 13 */ + 4340 "10100101" // /* MW 12 */ + 4341 "00000001" // /* MW 11 */ + 4342 "00000000" // /* MW 10 */ + 4343 "00000000" // /* MW 9 */ + 4344 "00000000" // /* MW 8 */ + 4345 "01011011" // /* MW 7 */ + 4346 "00000001" // /* MW 6 */ + 4347 "00101000" // /* MW 5 */ + 4348 "00101000" // /* MW 4 */ + 4349 "11111000" // /* MW 3 */ + 4350 "00101100" // /* MW 2 */ + 4351 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 705 66 first +.src_ref 2 "conv2d_bf16.h" 707 12 first +.src_ref 2 "conv2d_bf16.h" 707 30 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4352 "11100001" // NOPA; VLDB.POP.512 x1, [p0, lf0, r24];VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4353 "00000000" // /* MW 15 */ + 4354 "00000000" // /* MW 14 */ + 4355 "01111000" // /* MW 13 */ + 4356 "10100101" // /* MW 12 */ + 4357 "00000001" // /* MW 11 */ + 4358 "00000000" // /* MW 10 */ + 4359 "00000000" // /* MW 9 */ + 4360 "00000000" // /* MW 8 */ + 4361 "00000011" // /* MW 7 */ + 4362 "10000000" // /* MW 6 */ + 4363 "10101101" // /* MW 5 */ + 4364 "00000000" // /* MW 4 */ + 4365 "11110000" // /* MW 3 */ + 4366 "00101100" // /* MW 2 */ + 4367 "00000000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 first +.src_ref 3 "utils.h" 526 11 first +.src_ref 2 "conv2d_bf16.h" 705 30 +.src_ref 2 "conv2d_bf16.h" 708 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4368 "11100001" // NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];VST.FLUSH.512.CONV [p2, sf, r26];NOPX; VCONV.fp32.bf16 cml0, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4369 "00000000" // /* MW 15 */ + 4370 "00000000" // /* MW 14 */ + 4371 "01111000" // /* MW 13 */ + 4372 "11000101" // /* MW 12 */ + 4373 "00000001" // /* MW 11 */ + 4374 "00000000" // /* MW 10 */ + 4375 "00000000" // /* MW 9 */ + 4376 "00000000" // /* MW 8 */ + 4377 "00000011" // /* MW 7 */ + 4378 "00000000" // /* MW 6 */ + 4379 "00101001" // /* MW 5 */ + 4380 "01100000" // /* MW 4 */ + 4381 "11111100" // /* MW 3 */ + 4382 "00101100" // /* MW 2 */ + 4383 "00000000" // /* MW 1 */ +.label ZLE_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_192 +.src_ref 3 "kernel_helpers.h" 350 11 +.src_ref 3 "utils.h" 531 4 first +.src_ref 2 "conv2d_bf16.h" 706 18 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4384 "11100001" // NOPA; NOPB; VST.FLUSH.512.CONV.2D [p2, sf, r26, d0];NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4385 "00000000" // /* MW 15 */ + 4386 "00000000" // /* MW 14 */ + 4387 "01111000" // /* MW 13 */ + 4388 "11000101" // /* MW 12 */ + 4389 "01000000" // /* MW 11 */ + 4390 "00000000" // /* MW 10 */ + 4391 "00000000" // /* MW 9 */ + 4392 "00000000" // /* MW 8 */ + 4393 "00000011" // /* MW 7 */ + 4394 "00000000" // /* MW 6 */ + 4395 "00100011" // /* MW 5 */ + 4396 "00000000" // /* MW 4 */ + 4397 "11110000" // /* MW 3 */ + 4398 "00101100" // /* MW 2 */ + 4399 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 4400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4401 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 707 12 first +.src_ref 2 "conv2d_bf16.h" 707 30 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4402 "00011000" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4403 "00000011" // /* MW 3 */ + 4404 "10000000" // /* MW 2 */ + 4405 "00001101" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 first +.src_ref 2 "conv2d_bf16.h" 705 30 first +.src_ref 2 "conv2d_bf16.h" 708 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4406 "00000010" // VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4407 "01110000" // /* MW 7 */ + 4408 "11000101" // /* MW 6 */ + 4409 "00000001" // /* MW 5 */ + 4410 "00000000" // /* MW 4 */ + 4411 "01100000" // /* MW 3 */ + 4412 "00000000" // /* MW 2 */ + 4413 "00100000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 +.src_ref 2 "conv2d_bf16.h" 706 18 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4414 "11111000" // VCONV.fp32.bf16 cmh0, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4415 "10001010" // /* MW 3 */ + 4416 "10000001" // /* MW 2 */ + 4417 "00011000" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 first + 4418 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4419 "00000011" // /* MW 3 */ + 4420 "00000000" // /* MW 2 */ + 4421 "00001011" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 first +.src_ref 2 "conv2d_bf16.h" 705 30 first +.src_ref 2 "conv2d_bf16.h" 707 12 first +.src_ref 2 "conv2d_bf16.h" 707 30 first + 4422 "00000010" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4423 "01110000" // /* MW 7 */ + 4424 "11000101" // /* MW 6 */ + 4425 "00000001" // /* MW 5 */ + 4426 "00000000" // /* MW 4 */ + 4427 "01100000" // /* MW 3 */ + 4428 "00000000" // /* MW 2 */ + 4429 "10110000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 +.src_ref 2 "conv2d_bf16.h" 706 18 first +.src_ref 2 "conv2d_bf16.h" 708 12 first + 4430 "00000010" // VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cmh0, x0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4431 "01110000" // /* MW 7 */ + 4432 "11000101" // /* MW 6 */ + 4433 "01000000" // /* MW 5 */ + 4434 "00000000" // /* MW 4 */ + 4435 "01100000" // /* MW 3 */ + 4436 "00000000" // /* MW 2 */ + 4437 "00100000" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 first + 4438 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4439 "00000011" // /* MW 3 */ + 4440 "00000000" // /* MW 2 */ + 4441 "00001011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 707 12 first +.src_ref 2 "conv2d_bf16.h" 707 30 first +.src_ref 2 "conv2d_bf16.h" 723 first + 4442 "01011100" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 4443 "00000000" // /* MW 5 */ + 4444 "01010000" // /* MW 4 */ + 4445 "01100000" // /* MW 3 */ + 4446 "00000000" // /* MW 2 */ + 4447 "10110000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 708 12 first +.delay_slot + 4448 "00011000" // VST.FLUSH.512.CONV [p2, sf, r26] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4449 "00000011" // /* MW 3 */ + 4450 "00000000" // /* MW 2 */ + 4451 "00001001" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 first +.delay_slot + 4452 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4453 "00000011" // /* MW 3 */ + 4454 "00000000" // /* MW 2 */ + 4455 "00001011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4457 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4459 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams__end +.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_end0 + 4461 "00000000" // /* MW 1 */ +.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_begin0 +.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params +.function conv2d_bf16<(unsigned char)'\x01', (act_t)0, bfloat16, bfloat16, bfloat16, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::async, adf::addressing::linear, adf::margin<0U> >, false, false, true, false> _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 2 "conv2d_bf16.h" 1836 +.src_ref 2 "conv2d_bf16.h" 1836 first +.src_ref 2 "conv2d_bf16.h" 1836 first +.src_ref 2 "conv2d_bf16_params.h" 240 68 +.function_start + 4464 "01111110" // MOVA m0, #-81; PADDB [p3], #64; MOVS p4, p2; PADDXM [sp], #128 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 4465 "01100000" // /* MW 13 */ + 4466 "00010001" // /* MW 12 */ + 4467 "10010001" // /* MW 11 */ + 4468 "00001110" // /* MW 10 */ + 4469 "00000000" // /* MW 9 */ + 4470 "00000000" // /* MW 8 */ + 4471 "10000000" // /* MW 7 */ + 4472 "00000000" // /* MW 6 */ + 4473 "00100000" // /* MW 5 */ + 4474 "00111111" // /* MW 4 */ + 4475 "10000110" // /* MW 3 */ + 4476 "11100000" // /* MW 2 */ + 4477 "11110101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1836 +.src_ref 2 "conv2d_bf16_params.h" 241 95 +.src_ref 2 "conv2d_bf16_params.h" 242 153 +.src_ref 2 "conv2d_bf16_params.h" 250 129 + 4478 "01110110" // MOVA r19, #3; ST r12, [sp, #-16]; MOVX r28, #-24; MOV r17, p3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4479 "01111000" // /* MW 11 */ + 4480 "01100000" // /* MW 10 */ + 4481 "00101011" // /* MW 9 */ + 4482 "00001010" // /* MW 8 */ + 4483 "11000101" // /* MW 7 */ + 4484 "10111111" // /* MW 6 */ + 4485 "10010101" // /* MW 5 */ + 4486 "11110001" // /* MW 4 */ + 4487 "00000111" // /* MW 3 */ + 4488 "01110011" // /* MW 2 */ + 4489 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 866 21 +.src_ref 2 "conv2d_bf16.h" 876 12 +.src_ref 2 "conv2d_bf16.h" 876 51 +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16.h" 1836 +.src_ref 2 "conv2d_bf16_params.h" 242 41 +.src_ref 2 "conv2d_bf16_params.h" 242 54 +.src_ref 2 "conv2d_bf16_params.h" 242 94 +.src_ref 2 "conv2d_bf16_params.h" 242 100 +.src_ref 2 "conv2d_bf16_params.h" 242 153 +.src_ref 2 "conv2d_bf16_params.h" 243 80 +.src_ref 2 "conv2d_bf16_params.h" 245 28 +.src_ref 2 "conv2d_bf16_params.h" 250 106 +.src_ref 2 "conv2d_bf16_params.h" 250 133 + 4490 "01110110" // MOVA r25, #0; ST r17, [sp, #-40]; MOVX r17, #1; ADD.NC p2, r17, #28 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4491 "00001000" // /* MW 11 */ + 4492 "01000111" // /* MW 10 */ + 4493 "00110100" // /* MW 9 */ + 4494 "00101001" // /* MW 8 */ + 4495 "00010000" // /* MW 7 */ + 4496 "10000001" // /* MW 6 */ + 4497 "00110101" // /* MW 5 */ + 4498 "11011010" // /* MW 4 */ + 4499 "00000111" // /* MW 3 */ + 4500 "00011001" // /* MW 2 */ + 4501 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 240 68 +.src_ref 2 "conv2d_bf16_params.h" 240 68 first + 4502 "01110110" // LDA r18, [p2]; ST r9, [sp, #-12]; MOVXM r29, #16777216 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4503 "00010000" // /* MW 11 */ + 4504 "00000000" // /* MW 10 */ + 4505 "10101000" // /* MW 9 */ + 4506 "00000011" // /* MW 8 */ + 4507 "01000000" // /* MW 7 */ + 4508 "10000000" // /* MW 6 */ + 4509 "00110101" // /* MW 5 */ + 4510 "11110101" // /* MW 4 */ + 4511 "11010111" // /* MW 3 */ + 4512 "11001010" // /* MW 2 */ + 4513 "01000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 171 +.src_ref 2 "conv2d_bf16_params.h" 245 20 + 4514 "01110110" // MOVA m6, #88; ST r14, [sp, #-4]; MOVXM r31, #33554431 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4515 "10010000" // /* MW 11 */ + 4516 "11111111" // /* MW 10 */ + 4517 "11101111" // /* MW 9 */ + 4518 "11111111" // /* MW 8 */ + 4519 "01111111" // /* MW 7 */ + 4520 "10000000" // /* MW 6 */ + 4521 "11010101" // /* MW 5 */ + 4522 "11111101" // /* MW 4 */ + 4523 "10000111" // /* MW 3 */ + 4524 "00011000" // /* MW 2 */ + 4525 "00001011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 39 +.src_ref 2 "conv2d_bf16_params.h" 244 82 +.src_ref 2 "conv2d_bf16_params.h" 244 156 +.src_ref 2 "conv2d_bf16_params.h" 249 87 + 4526 "01110110" // MOVA r20, #5; ST r13, [sp, #-32]; MOVX r22, #8; MOV m4, #-20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4527 "01011000" // /* MW 11 */ + 4528 "11101100" // /* MW 10 */ + 4529 "00000111" // /* MW 9 */ + 4530 "00001010" // /* MW 8 */ + 4531 "01100001" // /* MW 7 */ + 4532 "10000001" // /* MW 6 */ + 4533 "10110101" // /* MW 5 */ + 4534 "11100001" // /* MW 4 */ + 4535 "00000111" // /* MW 3 */ + 4536 "10110100" // /* MW 2 */ + 4537 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 243 39 +.src_ref 2 "conv2d_bf16_params.h" 244 87 +.src_ref 2 "conv2d_bf16_params.h" 250 71 + 4538 "01110110" // MOVA r21, #12; ST r15, [sp, #-20]; MOVX r23, #254; MOV m5, #-60 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4539 "01011000" // /* MW 11 */ + 4540 "11000100" // /* MW 10 */ + 4541 "10000111" // /* MW 9 */ + 4542 "11001010" // /* MW 8 */ + 4543 "01110111" // /* MW 7 */ + 4544 "10000111" // /* MW 6 */ + 4545 "11110101" // /* MW 5 */ + 4546 "11101101" // /* MW 4 */ + 4547 "00000111" // /* MW 3 */ + 4548 "10010101" // /* MW 2 */ + 4549 "00000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 44 + 4550 "00000010" // ST p7, [sp, #-8]; MOV m7, #64 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4551 "01010000" // /* MW 7 */ + 4552 "01000000" // /* MW 6 */ + 4553 "10000000" // /* MW 5 */ + 4554 "00000011" // /* MW 4 */ + 4555 "10110000" // /* MW 3 */ + 4556 "01110011" // /* MW 2 */ + 4557 "11111111" // /* MW 1 */ + 4558 "10011000" // ST lr, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4559 "00111101" // /* MW 3 */ + 4560 "11100100" // /* MW 2 */ + 4561 "00001111" // /* MW 1 */ + 4562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4563 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 240 68 + 4564 "10011000" // ADD r12, r29, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4565 "00100000" // /* MW 3 */ + 4566 "01011001" // /* MW 2 */ + 4567 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 240 68 +.src_ref 2 "conv2d_bf16_params.h" 241 95 first + 4568 "01011100" // ST r12, [p2], m0; LSHL r29, r12, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4569 "10011011" // /* MW 5 */ + 4570 "01110111" // /* MW 4 */ + 4571 "00110110" // /* MW 3 */ + 4572 "00110010" // /* MW 2 */ + 4573 "01000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 54 first +.src_ref 2 "conv2d_bf16_params.h" 242 94 first + 4574 "00101100" // LDA.u8 r30, [p2], #-3; EQ r28, r29, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4575 "00101111" // /* MW 5 */ + 4576 "11110010" // /* MW 4 */ + 4577 "01011110" // /* MW 3 */ + 4578 "11111001" // /* MW 2 */ + 4579 "01011011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 245 20 first + 4580 "10011000" // LDA.u8 r9, [p2], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4581 "00101010" // /* MW 3 */ + 4582 "11001001" // /* MW 2 */ + 4583 "00000010" // /* MW 1 */ + 4584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4585 "00000000" // /* MW 1 */ + 4586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4587 "00000000" // /* MW 1 */ + 4588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4589 "00000000" // /* MW 1 */ + 4590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4591 "00000000" // /* MW 1 */ + 4592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4593 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 174 first + 4594 "10011000" // LTU r27, r29, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4595 "11101100" // /* MW 3 */ + 4596 "01110111" // /* MW 2 */ + 4597 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 153 + 4598 "00011000" // SEL.EQZ r14, r25, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4599 "00110010" // /* MW 3 */ + 4600 "01011101" // /* MW 2 */ + 4601 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 171 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4602 "10011000" // LTU r27, r31, r12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4603 "11001100" // /* MW 3 */ + 4604 "11110110" // /* MW 2 */ + 4605 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 241 95 first +.src_ref 2 "conv2d_bf16_params.h" 242 39 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4606 "00101100" // ST.s8 r28, [p2], m4; EQ r13, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4607 "11001111" // /* MW 5 */ + 4608 "10110111" // /* MW 4 */ + 4609 "11101110" // /* MW 3 */ + 4610 "01110000" // /* MW 2 */ + 4611 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 100 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4612 "10011000" // LSHL r31, r13, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4613 "00011101" // /* MW 3 */ + 4614 "01111111" // /* MW 2 */ + 4615 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 153 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4616 "00011000" // SEL.EQZ r12, r25, r14, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4617 "11100010" // /* MW 3 */ + 4618 "01011000" // /* MW 2 */ + 4619 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 98 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4620 "10011000" // OR r28, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4621 "11000101" // /* MW 3 */ + 4622 "11111001" // /* MW 2 */ + 4623 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 54 +.src_ref 2 "conv2d_bf16_params.h" 242 151 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4624 "10100100" // LTU r27, r17, r30; ADD.NC r28, r28, r12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4625 "01100010" // /* MW 5 */ + 4626 "00111100" // /* MW 4 */ + 4627 "10011110" // /* MW 3 */ + 4628 "11111101" // /* MW 2 */ + 4629 "10001110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 41 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4630 "00011000" // SEL.EQZ r28, r25, r28, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4631 "11000010" // /* MW 3 */ + 4632 "01111001" // /* MW 2 */ + 4633 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 243 80 first + 4634 "10011000" // LTU r31, r17, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4635 "11001100" // /* MW 3 */ + 4636 "01111111" // /* MW 2 */ + 4637 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 117 first +.src_ref 2 "conv2d_bf16_params.h" 243 39 + 4638 "01011100" // ST r31, [p2], m5; NE r29, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4639 "11010001" // /* MW 5 */ + 4640 "11110111" // /* MW 4 */ + 4641 "00111110" // /* MW 3 */ + 4642 "01111110" // /* MW 2 */ + 4643 "01010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 44 first +.src_ref 2 "conv2d_bf16_params.h" 245 28 first + 4644 "00101100" // LDA.u8 r30, [p2], m7; NE r12, r9, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4645 "00110001" // /* MW 5 */ + 4646 "10110010" // /* MW 4 */ + 4647 "01010100" // /* MW 3 */ + 4648 "01111001" // /* MW 2 */ + 4649 "01011101" // /* MW 1 */ + 4650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4651 "00000000" // /* MW 1 */ + 4652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4653 "00000000" // /* MW 1 */ + 4654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4655 "00000000" // /* MW 1 */ + 4656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4657 "00000000" // /* MW 1 */ + 4658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4659 "00000000" // /* MW 1 */ + 4660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4661 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 82 +.src_ref 2 "conv2d_bf16_params.h" 244 87 + 4662 "00100100" // NE r22, r30, r22; ADD.NC r31, r30, #-4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4663 "11111100" // /* MW 5 */ + 4664 "10111110" // /* MW 4 */ + 4665 "00011111" // /* MW 3 */ + 4666 "10101101" // /* MW 2 */ + 4667 "11110101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 245 33 + 4668 "10000100" // JNZ r12, #4736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4736 delay_slots=5 */ + 4669 "00000001" // /* MW 5 */ + 4670 "01000000" // /* MW 4 */ + 4671 "01000000" // /* MW 3 */ + 4672 "00001001" // /* MW 2 */ + 4673 "01100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 156 +.delay_slot + 4674 "10011000" // NE r9, r30, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4675 "01001000" // /* MW 3 */ + 4676 "10010011" // /* MW 2 */ + 4677 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 87 +.delay_slot + 4678 "00011000" // EXTEND.u8 r31, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4679 "10010000" // /* MW 3 */ + 4680 "11111110" // /* MW 2 */ + 4681 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 87 +.delay_slot + 4682 "10011000" // AND r22, r9, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4683 "01100100" // /* MW 3 */ + 4684 "01101101" // /* MW 2 */ + 4685 "00010010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 87 +.delay_slot + 4686 "10011000" // LTU r23, r31, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4687 "01111100" // /* MW 3 */ + 4688 "11101111" // /* MW 2 */ + 4689 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 132 +.delay_slot + 4690 "10011000" // AND r16, r23, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4691 "01100100" // /* MW 3 */ + 4692 "11100001" // /* MW 2 */ + 4693 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 245 33 + 4694 "10000100" // JNZ r29, #4736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4736 delay_slots=5 */ + 4695 "00000001" // /* MW 5 */ + 4696 "01000000" // /* MW 4 */ + 4697 "01000000" // /* MW 3 */ + 4698 "00001001" // /* MW 2 */ + 4699 "11101000" // /* MW 1 */ +.delay_slot + 4700 "10011000" // ST p6, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4701 "00011101" // /* MW 3 */ + 4702 "11101011" // /* MW 2 */ + 4703 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4704 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4705 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4707 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4709 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4711 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 +.src_ref 2 "conv2d_bf16.h" 876 51 + 4712 "10111010" // MOVA r27, #1; J #4784 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=4784 delay_slots=5 */ + 4713 "00100000" // /* MW 9 */ + 4714 "00000000" // /* MW 8 */ + 4715 "00000000" // /* MW 7 */ + 4716 "01010110" // /* MW 6 */ + 4717 "00000010" // /* MW 5 */ + 4718 "00000000" // /* MW 4 */ + 4719 "00000000" // /* MW 3 */ + 4720 "00111011" // /* MW 2 */ + 4721 "00000000" // /* MW 1 */ +.delay_slot + 4722 "11111000" // MOV el0, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4723 "10011100" // /* MW 3 */ + 4724 "00011001" // /* MW 2 */ + 4725 "00011000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1849 12 +.delay_slot + 4726 "00011000" // MOVX r19, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4727 "00000101" // /* MW 3 */ + 4728 "00100110" // /* MW 2 */ + 4729 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4731 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4732 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4733 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4735 "00000000" // /* MW 1 */ +.label __ll6__Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params +.src_ref 2 "conv2d_bf16_params.h" 250 71 first +.src_ref 2 "conv2d_bf16_params.h" 250 101 + 4736 "01110110" // MOVA r21, #4; ST p6, [sp, #-24]; EQ r27, r21, r30; MOV el0, r25 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4737 "01111000" // /* MW 11 */ + 4738 "11001110" // /* MW 10 */ + 4739 "00001100" // /* MW 9 */ + 4740 "00111100" // /* MW 8 */ + 4741 "10111111" // /* MW 7 */ + 4742 "10101011" // /* MW 6 */ + 4743 "00011101" // /* MW 5 */ + 4744 "11101011" // /* MW 4 */ + 4745 "00000111" // /* MW 3 */ + 4746 "10010101" // /* MW 2 */ + 4747 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 101 + 4748 "10011000" // LSHL r21, r30, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4749 "01011101" // /* MW 3 */ + 4750 "10101011" // /* MW 2 */ + 4751 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 106 + 4752 "00011000" // SEL.EQZ r21, r21, r25, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4753 "10010010" // /* MW 3 */ + 4754 "01101011" // /* MW 2 */ + 4755 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 129 + 4756 "10011000" // EQ r27, r19, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4757 "11100111" // /* MW 3 */ + 4758 "11110111" // /* MW 2 */ + 4759 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 106 +.src_ref 2 "conv2d_bf16_params.h" 250 133 + 4760 "11100100" // SEL.EQZ r19, r21, r25, r27; MOV r27, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4761 "01000001" // /* MW 5 */ + 4762 "10110000" // /* MW 4 */ + 4763 "01001101" // /* MW 3 */ + 4764 "11110010" // /* MW 2 */ + 4765 "10101100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 133 + 4766 "00011000" // SEL.EQZ r19, r25, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4767 "00110010" // /* MW 3 */ + 4768 "01100111" // /* MW 2 */ + 4769 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 249 87 first + 4770 "10011000" // AND r20, r28, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4771 "01000100" // /* MW 3 */ + 4772 "00101001" // /* MW 2 */ + 4773 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 249 87 + 4774 "00011000" // NEZ r27, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4775 "11110000" // /* MW 3 */ + 4776 "00110110" // /* MW 2 */ + 4777 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 152 first + 4778 "00101100" // NOPA; OR r19, r19, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4779 "10001011" // /* MW 5 */ + 4780 "11001111" // /* MW 4 */ + 4781 "11111001" // /* MW 3 */ + 4782 "00101100" // /* MW 2 */ + 4783 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_320 +.src_ref 2 "conv2d_bf16_params.h" 258 8 first + 4784 "01110110" // MOVA m4, #12; ST r27, [p2], #24; JNZ r29, #4832 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4832 delay_slots=5 */ + 4785 "01100000" // /* MW 11 */ + 4786 "00000000" // /* MW 10 */ + 4787 "00010000" // /* MW 9 */ + 4788 "01011100" // /* MW 8 */ + 4789 "00000010" // /* MW 7 */ + 4790 "10111010" // /* MW 6 */ + 4791 "01110001" // /* MW 5 */ + 4792 "01101111" // /* MW 4 */ + 4793 "10000010" // /* MW 3 */ + 4794 "10010000" // /* MW 2 */ + 4795 "00000001" // /* MW 1 */ +.delay_slot + 4796 "00011000" // ST.s8 r19, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4797 "01100111" // /* MW 3 */ + 4798 "10001010" // /* MW 2 */ + 4799 "00000010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4801 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4803 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4805 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4807 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 259 71 + 4808 "01000100" // MOVXM r20, #16777215 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4809 "11111110" // /* MW 5 */ + 4810 "00111111" // /* MW 4 */ + 4811 "11111010" // /* MW 3 */ + 4812 "11111111" // /* MW 2 */ + 4813 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 259 71 first + 4814 "10011000" // AND r18, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4815 "01000100" // /* MW 3 */ + 4816 "10100101" // /* MW 2 */ + 4817 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 259 71 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4818 "00101110" // NOPA; ST r18, [p3, #28]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 4819 "00011100" // /* MW 13 */ + 4820 "00000000" // /* MW 12 */ + 4821 "00000000" // /* MW 11 */ + 4822 "01010111" // /* MW 10 */ + 4823 "00011010" // /* MW 9 */ + 4824 "01000000" // /* MW 8 */ + 4825 "00000000" // /* MW 7 */ + 4826 "00000000" // /* MW 6 */ + 4827 "10100011" // /* MW 5 */ + 4828 "11101100" // /* MW 4 */ + 4829 "11110110" // /* MW 3 */ + 4830 "00101100" // /* MW 2 */ + 4831 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_368 +.src_ref 2 "conv2d_bf16.h" 1841 65 first +.src_ref 2 "conv2d_bf16.h" 1849 4 +.src_ref 2 "conv2d_bf16.h" 1849 12 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4832 "10111010" // LDA r20, [p2], #-32; EXTEND.u8 r20, r19; MOV r22, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4833 "01011000" // /* MW 9 */ + 4834 "11111101" // /* MW 8 */ + 4835 "11001111" // /* MW 7 */ + 4836 "10000010" // /* MW 6 */ + 4837 "01000100" // /* MW 5 */ + 4838 "00100111" // /* MW 4 */ + 4839 "11010000" // /* MW 3 */ + 4840 "11010010" // /* MW 2 */ + 4841 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16.h" 1841 34 +.src_ref 2 "conv2d_bf16.h" 1842 36 +.src_ref 2 "conv2d_bf16.h" 1842 67 +.src_ref 2 "conv2d_bf16.h" 1842 75 +.src_ref 2 "conv2d_bf16.h" 1845 31 +.src_ref 2 "conv2d_bf16.h" 1849 4 +.src_ref 2 "conv2d_bf16_params.h" 243 80 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4842 "10111010" // MOVA r18, #2; ADD r21, r20, #-1; MOV m4, #36 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4843 "01011000" // /* MW 9 */ + 4844 "00100100" // /* MW 8 */ + 4845 "00000000" // /* MW 7 */ + 4846 "11111010" // /* MW 6 */ + 4847 "01011111" // /* MW 5 */ + 4848 "00101001" // /* MW 4 */ + 4849 "00000000" // /* MW 3 */ + 4850 "01010010" // /* MW 2 */ + 4851 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1842 67 first +.src_ref 2 "conv2d_bf16.h" 1842 106 +.src_ref 2 "conv2d_bf16.h" 1849 4 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4852 "01110110" // LDA r22, [p2], m4; ST el0, [sp, #-48]; AND r22, r21, r22; MOV m4, #-52 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4853 "01011000" // /* MW 11 */ + 4854 "11001100" // /* MW 10 */ + 4855 "00000111" // /* MW 9 */ + 4856 "00100110" // /* MW 8 */ + 4857 "01101011" // /* MW 7 */ + 4858 "10101011" // /* MW 6 */ + 4859 "00101101" // /* MW 5 */ + 4860 "11010000" // /* MW 4 */ + 4861 "11010111" // /* MW 3 */ + 4862 "01011010" // /* MW 2 */ + 4863 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 862 52 +.src_ref 2 "conv2d_bf16.h" 1842 106 +.src_ref 2 "conv2d_bf16.h" 1845 80 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4864 "01110110" // LDA r23, [p2], m4; ST r22, [sp, #-36]; MOVX r19, #-1; MOV m4, #196 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4865 "01011000" // /* MW 11 */ + 4866 "11000100" // /* MW 10 */ + 4867 "00000000" // /* MW 9 */ + 4868 "11101010" // /* MW 8 */ + 4869 "00110111" // /* MW 7 */ + 4870 "10111111" // /* MW 6 */ + 4871 "11010101" // /* MW 5 */ + 4872 "11011110" // /* MW 4 */ + 4873 "11010111" // /* MW 3 */ + 4874 "01011110" // /* MW 2 */ + 4875 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1845 63 first + 4876 "10011000" // LDA r29, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4877 "10110110" // /* MW 3 */ + 4878 "11111111" // /* MW 2 */ + 4879 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 862 52 first + 4880 "10011000" // LDA r31, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4881 "11110110" // /* MW 3 */ + 4882 "10001011" // /* MW 2 */ + 4883 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 4884 "10011000" // LDA r21, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4885 "10110110" // /* MW 3 */ + 4886 "00000110" // /* MW 2 */ + 4887 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 2 "conv2d_bf16.h" 1841 34 first + 4888 "00101100" // LDA r20, [p0]; LSHL r9, r20, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4889 "01011011" // /* MW 5 */ + 4890 "00100110" // /* MW 4 */ + 4891 "11011010" // /* MW 3 */ + 4892 "11010010" // /* MW 2 */ + 4893 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 4894 "10011000" // LDA r30, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4895 "11010110" // /* MW 3 */ + 4896 "00000111" // /* MW 2 */ + 4897 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1842 36 first + 4898 "10011000" // LSHL r22, r22, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4899 "00101101" // /* MW 3 */ + 4900 "10101101" // /* MW 2 */ + 4901 "00010101" // /* MW 1 */ + 4902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4903 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1845 80 first + 4904 "10011000" // ASHL r19, r29, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4905 "00111110" // /* MW 3 */ + 4906 "01100111" // /* MW 2 */ + 4907 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 866 21 first + 4908 "10011000" // NE r17, r31, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4909 "00011000" // /* MW 3 */ + 4910 "11100011" // /* MW 2 */ + 4911 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 866 12 + 4912 "10000100" // JNZ r17, #5024 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5024 delay_slots=5 */ + 4913 "00000001" // /* MW 5 */ + 4914 "01000000" // /* MW 4 */ + 4915 "11010000" // /* MW 3 */ + 4916 "00001001" // /* MW 2 */ + 4917 "10001000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1842 36 first +.src_ref 2 "conv2d_bf16.h" 1842 75 first +.delay_slot + 4918 "10100100" // LSHL r22, r23, r18; ADD.NC r21, r21, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4919 "10110010" // /* MW 5 */ + 4920 "10110101" // /* MW 4 */ + 4921 "10111010" // /* MW 3 */ + 4922 "10100101" // /* MW 2 */ + 4923 "10111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1842 75 +.src_ref 2 "conv2d_bf16.h" 1845 31 first +.delay_slot + 4924 "10100100" // LSHL r21, r19, r18; ADD.NC dn0, r21, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4925 "10110010" // /* MW 5 */ + 4926 "10010101" // /* MW 4 */ + 4927 "10110000" // /* MW 3 */ + 4928 "01100101" // /* MW 2 */ + 4929 "10011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1841 34 first +.delay_slot + 4930 "00000010" // ST dn0, [sp, #-44]; ADD.NC r14, r9, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4931 "10100000" // /* MW 7 */ + 4932 "01101000" // /* MW 6 */ + 4933 "11001010" // /* MW 5 */ + 4934 "00000001" // /* MW 4 */ + 4935 "10110000" // /* MW 3 */ + 4936 "10000100" // /* MW 2 */ + 4937 "11111010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16.h" 885 4 +.delay_slot + 4938 "11111000" // MOV r15, dn0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4939 "10000000" // /* MW 3 */ + 4940 "11010000" // /* MW 2 */ + 4941 "00011011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1845 31 first +.delay_slot + 4942 "01011000" // ADD.NC p6, r21, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4943 "11111001" // /* MW 3 */ + 4944 "01101010" // /* MW 2 */ + 4945 "00011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 + 4946 "01000100" // MOVXM p7, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4947 "11010000" // /* MW 5 */ + 4948 "11001000" // /* MW 4 */ + 4949 "11001110" // /* MW 3 */ + 4950 "00000111" // /* MW 2 */ + 4951 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.src_ref 2 "conv2d_bf16.h" 867 18 first + 4952 "00101100" // LDA.s8 r17, [p7]; MOVX vaddSign0, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4953 "10000000" // /* MW 5 */ + 4954 "10110100" // /* MW 4 */ + 4955 "01010000" // /* MW 3 */ + 4956 "11000100" // /* MW 2 */ + 4957 "11100000" // /* MW 1 */ + 4958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4959 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 + 4960 "01000100" // MOVXM r20, #-8454144 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4961 "00000000" // /* MW 5 */ + 4962 "00100000" // /* MW 4 */ + 4963 "00001010" // /* MW 3 */ + 4964 "01111111" // /* MW 2 */ + 4965 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 + 4966 "01111000" // VINSERT.32 x0, x0, #0, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4967 "10010001" // /* MW 3 */ + 4968 "00000010" // /* MW 2 */ + 4969 "00011000" // /* MW 1 */ + 4970 "11111000" // MOV r20, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4971 "11100000" // /* MW 3 */ + 4972 "00010101" // /* MW 2 */ + 4973 "00011101" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 4974 "00011000" // ADD.NC p7, r20, #-66 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4975 "01011111" // /* MW 3 */ + 4976 "01101010" // /* MW 2 */ + 4977 "00011111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.src_ref 2 "conv2d_bf16.h" 867 18 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 4978 "11010100" // ST.s16 r17, [p7]; VMOV bmll0, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4979 "00100101" // /* MW 5 */ + 4980 "00000001" // /* MW 4 */ + 4981 "11100000" // /* MW 3 */ + 4982 "11000110" // /* MW 2 */ + 4983 "11100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4984 "00011000" // MOVX crRnd, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4985 "10000000" // /* MW 3 */ + 4986 "01111010" // /* MW 2 */ + 4987 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4988 "00011000" // VCONV.bf16.fp32 wl0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4989 "00010110" // /* MW 3 */ + 4990 "01000000" // /* MW 2 */ + 4991 "00001000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4993 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4994 "10111000" // VEXTRACT.16 r17, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4995 "00000001" // /* MW 3 */ + 4996 "01000001" // /* MW 2 */ + 4997 "00011100" // /* MW 1 */ + 4998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4999 "00000000" // /* MW 1 */ + 5000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5001 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 + 5002 "10011000" // LDA.s16 r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5003 "00110010" // /* MW 3 */ + 5004 "00000110" // /* MW 2 */ + 5005 "00000111" // /* MW 1 */ + 5006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5007 "00000000" // /* MW 1 */ + 5008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5009 "00000000" // /* MW 1 */ + 5010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5011 "00000000" // /* MW 1 */ + 5012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5013 "00000000" // /* MW 1 */ + 5014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5015 "00000000" // /* MW 1 */ + 5016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5017 "00000000" // /* MW 1 */ + 5018 "00001100" // NOPA; ST r17, [sp, #-48] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5019 "01101011" // /* MW 5 */ + 5020 "10100100" // /* MW 4 */ + 5021 "11111111" // /* MW 3 */ + 5022 "00101100" // /* MW 2 */ + 5023 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_560 +.src_ref 2 "conv2d_bf16.h" 881 76 +.src_ref 2 "conv2d_bf16.h" 883 4 +.src_ref 2 "conv2d_bf16.h" 884 4 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 5024 "01110110" // MOVA m4, #92; MOVS p1, r14; MOVXM p3, #509032 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5025 "00010000" // /* MW 11 */ + 5026 "00110100" // /* MW 10 */ + 5027 "10110010" // /* MW 9 */ + 5028 "11110001" // /* MW 8 */ + 5029 "00000001" // /* MW 7 */ + 5030 "00000000" // /* MW 6 */ + 5031 "00001011" // /* MW 5 */ + 5032 "10001110" // /* MW 4 */ + 5033 "10000001" // /* MW 3 */ + 5034 "10010000" // /* MW 2 */ + 5035 "00001011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 first +.src_ref 2 "conv2d_bf16.h" 876 51 first +.src_ref 2 "conv2d_bf16.h" 881 76 first +.src_ref 2 "conv2d_bf16.h" 883 4 +.src_ref 2 "conv2d_bf16.h" 884 4 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 5036 "01110110" // LDA.u8 r17, [p2], m4; MOVS p0, p1; SEL.EQZ r17, r25, r19, r27; MOV r19, #11 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5037 "01011000" // /* MW 11 */ + 5038 "00001011" // /* MW 10 */ + 5039 "01101000" // /* MW 9 */ + 5040 "10010010" // /* MW 8 */ + 5041 "00011001" // /* MW 7 */ + 5042 "00110011" // /* MW 6 */ + 5043 "10001011" // /* MW 5 */ + 5044 "10000100" // /* MW 4 */ + 5045 "01010000" // /* MW 3 */ + 5046 "01000101" // /* MW 2 */ + 5047 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 44 +.src_ref 2 "conv2d_bf16_params.h" 243 80 +.src_ref 2 "conv2d_bf16_params.h" 243 80 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5048 "10111010" // MOVA r22, #780; LTU r27, r28, r18; MOV r13, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5049 "01111000" // /* MW 9 */ + 5050 "01100000" // /* MW 8 */ + 5051 "10101010" // /* MW 7 */ + 5052 "01100101" // /* MW 6 */ + 5053 "10111001" // /* MW 5 */ + 5054 "00111001" // /* MW 4 */ + 5055 "00000000" // /* MW 3 */ + 5056 "10010110" // /* MW 2 */ + 5057 "01100001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 883 4 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5058 "00011000" // ST.s8 r19, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5059 "01100111" // /* MW 3 */ + 5060 "00000110" // /* MW 2 */ + 5061 "00000011" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5063 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 884 4 first +.aggressive_scheduled_block_id 4 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 5064 "00000100" // JL #4192 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4192 delay_slots=5 */ + 5065 "00000001" // /* MW 5 */ + 5066 "00000000" // /* MW 4 */ + 5067 "00110000" // /* MW 3 */ + 5068 "00001000" // /* MW 2 */ + 5069 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 first +.delay_slot +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5070 "10011000" // LSHL r21, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5071 "00101101" // /* MW 3 */ + 5072 "01101011" // /* MW 2 */ + 5073 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 +.delay_slot + 5074 "01011000" // ADD.NC p7, r21, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5075 "11111001" // /* MW 3 */ + 5076 "01101010" // /* MW 2 */ + 5077 "00011111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 881 45 first +.delay_slot + 5078 "10011000" // SUB r17, r25, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5079 "00010001" // /* MW 3 */ + 5080 "01100011" // /* MW 2 */ + 5081 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16_params.h" 243 80 +.delay_slot + 5082 "01100100" // LSHL r17, r17, r18; MOV r20, #781 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5083 "00110101" // /* MW 5 */ + 5084 "00101100" // /* MW 4 */ + 5085 "10111010" // /* MW 3 */ + 5086 "01100101" // /* MW 2 */ + 5087 "10001100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16_params.h" 243 80 first +.delay_slot + 5088 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r9, r22, r20, r27; ADD.NC r12, r15, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5089 "00000000" // /* MW 15 */ + 5090 "00000000" // /* MW 14 */ + 5091 "10101000" // /* MW 13 */ + 5092 "11100010" // /* MW 12 */ + 5093 "10001011" // /* MW 11 */ + 5094 "00010001" // /* MW 10 */ + 5095 "10011010" // /* MW 9 */ + 5096 "00101100" // /* MW 8 */ + 5097 "01011011" // /* MW 7 */ + 5098 "00000001" // /* MW 6 */ + 5099 "00100000" // /* MW 5 */ + 5100 "00000000" // /* MW 4 */ + 5101 "11110000" // /* MW 3 */ + 5102 "00101100" // /* MW 2 */ + 5103 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 4 +.return_address + 5104 "00011000" // LDA p1, [sp, #-44] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5105 "10011001" // /* MW 3 */ + 5106 "11010100" // /* MW 2 */ + 5107 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 4 first +.no_stack_arguments + 5108 "00000100" // JL #4192 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4192 delay_slots=5 */ + 5109 "00000001" // /* MW 5 */ + 5110 "00000000" // /* MW 4 */ + 5111 "00110000" // /* MW 3 */ + 5112 "00001000" // /* MW 2 */ + 5113 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5115 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5117 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 44 +.delay_slot + 5118 "00011000" // ADD.NC r13, r13, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5119 "10010000" // /* MW 3 */ + 5120 "01010110" // /* MW 2 */ + 5121 "00011011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 4 +.delay_slot + 5122 "11111000" // MOV p2, r13 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5123 "10100000" // /* MW 3 */ + 5124 "01100110" // /* MW 2 */ + 5125 "00011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 4 +.delay_slot + 5126 "01111010" // NOPA; MOVS p0, r15; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5127 "00000000" // /* MW 9 */ + 5128 "00000000" // /* MW 8 */ + 5129 "00000000" // /* MW 7 */ + 5130 "00000000" // /* MW 6 */ + 5131 "00001011" // /* MW 5 */ + 5132 "10001111" // /* MW 4 */ + 5133 "11110000" // /* MW 3 */ + 5134 "00101100" // /* MW 2 */ + 5135 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 +.src_ref 2 "conv2d_bf16.h" 1115 26 +.src_ref 2 "conv2d_bf16.h" 1115 26 +.return_address + 5136 "10111010" // MOVA dj6, #-332; MOVX r19, #63; ADD.NC p4, r13, #-116 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5137 "00001000" // /* MW 9 */ + 5138 "01100011" // /* MW 8 */ + 5139 "00110011" // /* MW 7 */ + 5140 "11101010" // /* MW 6 */ + 5141 "00110111" // /* MW 5 */ + 5142 "00000001" // /* MW 4 */ + 5143 "10000000" // /* MW 3 */ + 5144 "10011010" // /* MW 2 */ + 5145 "11010110" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 886 4 +.src_ref 2 "conv2d_bf16.h" 896 23 first +.src_ref 2 "conv2d_bf16.h" 1123 71 + 5146 "00101100" // LDA dn0, [p4], #4; MOVX r13, #12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5147 "01100010" // /* MW 5 */ + 5148 "00110100" // /* MW 4 */ + 5149 "11010000" // /* MW 3 */ + 5150 "10000100" // /* MW 2 */ + 5151 "10000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5152 "10011000" // LDA dj0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5153 "01000110" // /* MW 3 */ + 5154 "00011100" // /* MW 2 */ + 5155 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5156 "10011000" // LDA dn4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5157 "00100110" // /* MW 3 */ + 5158 "00011110" // /* MW 2 */ + 5159 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5160 "10011000" // LDA dj4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5161 "01000110" // /* MW 3 */ + 5162 "00011110" // /* MW 2 */ + 5163 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5164 "10011000" // LDA m0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5165 "00000110" // /* MW 3 */ + 5166 "00011100" // /* MW 2 */ + 5167 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5168 "10011000" // LDA dc0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5169 "01100110" // /* MW 3 */ + 5170 "00011100" // /* MW 2 */ + 5171 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5172 "10011000" // LDA dc4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5173 "01100110" // /* MW 3 */ + 5174 "00011110" // /* MW 2 */ + 5175 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 first + 5176 "10011000" // LDA r22, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5177 "11010110" // /* MW 3 */ + 5178 "00011110" // /* MW 2 */ + 5179 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5180 "10011000" // LDA r17, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5181 "00110110" // /* MW 3 */ + 5182 "00011110" // /* MW 2 */ + 5183 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5184 "10011000" // LDA r28, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5185 "10010110" // /* MW 3 */ + 5186 "00011111" // /* MW 2 */ + 5187 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5188 "10011000" // LDA r21, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5189 "10110110" // /* MW 3 */ + 5190 "00011110" // /* MW 2 */ + 5191 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5192 "10011000" // LDA r23, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5193 "11110110" // /* MW 3 */ + 5194 "00011110" // /* MW 2 */ + 5195 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5196 "10011000" // LDA p3, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5197 "10011110" // /* MW 3 */ + 5198 "00011101" // /* MW 2 */ + 5199 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5200 "10011000" // LDA dn2, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5201 "00100110" // /* MW 3 */ + 5202 "00011101" // /* MW 2 */ + 5203 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 first + 5204 "10011000" // LDA dn1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5205 "10100110" // /* MW 3 */ + 5206 "00011100" // /* MW 2 */ + 5207 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5208 "10011000" // LDA dj1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5209 "11000110" // /* MW 3 */ + 5210 "00011100" // /* MW 2 */ + 5211 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5212 "10011000" // LDA dn5, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5213 "10100110" // /* MW 3 */ + 5214 "00011110" // /* MW 2 */ + 5215 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5216 "10011000" // LDA r30, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5217 "11010110" // /* MW 3 */ + 5218 "00011111" // /* MW 2 */ + 5219 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5220 "10011000" // LDA r29, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5221 "10110110" // /* MW 3 */ + 5222 "00011111" // /* MW 2 */ + 5223 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5224 "10011000" // LDA dc1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5225 "11100110" // /* MW 3 */ + 5226 "00011100" // /* MW 2 */ + 5227 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1115 26 first + 5228 "10011000" // LDA.u8 r18, [p4, dj6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5229 "01001010" // /* MW 3 */ + 5230 "11000010" // /* MW 2 */ + 5231 "00000100" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 + 5232 "00011000" // LDA r20, [sp, #-48] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5233 "10010001" // /* MW 3 */ + 5234 "11010010" // /* MW 2 */ + 5235 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 first + 5236 "10011000" // LDA r2, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5237 "01010110" // /* MW 3 */ + 5238 "00000100" // /* MW 2 */ + 5239 "00000100" // /* MW 1 */ + 5240 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5241 "00000000" // /* MW 1 */ + 5242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5243 "00000000" // /* MW 1 */ + 5244 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5245 "00000000" // /* MW 1 */ + 5246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5247 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1115 26 first + 5248 "10011000" // LTU r19, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5249 "00101100" // /* MW 3 */ + 5250 "11100111" // /* MW 2 */ + 5251 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1115 12 + 5252 "10000100" // JNZ r19, #6176 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6176 delay_slots=5 */ + 5253 "00000001" // /* MW 5 */ + 5254 "01000000" // /* MW 4 */ + 5255 "00010000" // /* MW 3 */ + 5256 "00001100" // /* MW 2 */ + 5257 "10011000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 886 4 +.delay_slot + 5258 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5259 "11010000" // /* MW 5 */ + 5260 "11001000" // /* MW 4 */ + 5261 "11000100" // /* MW 3 */ + 5262 "00000111" // /* MW 2 */ + 5263 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 886 4 first +.delay_slot + 5264 "00011000" // ST.s8 r13, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5265 "10100111" // /* MW 3 */ + 5266 "00000101" // /* MW 2 */ + 5267 "00000010" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first +.delay_slot + 5268 "11111000" // VBCST.16 x9, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5269 "01110010" // /* MW 3 */ + 5270 "11010001" // /* MW 2 */ + 5271 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5273 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5275 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1123 71 first + 5276 "10111010" // LDA p4, [sp, #-40]; EQ r27, r13, r18; MOV m7, #132 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5277 "01011000" // /* MW 9 */ + 5278 "10000100" // /* MW 8 */ + 5279 "10000000" // /* MW 7 */ + 5280 "00111111" // /* MW 6 */ + 5281 "10111001" // /* MW 5 */ + 5282 "00011011" // /* MW 4 */ + 5283 "00100000" // /* MW 3 */ + 5284 "01000011" // /* MW 2 */ + 5285 "11111011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1125 16 +.src_ref 2 "conv2d_bf16.h" 1154 80 + 5286 "10111010" // MOVA r19, #0; MOVX r18, #-128; MOV m4, #60 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5287 "01011000" // /* MW 9 */ + 5288 "00111100" // /* MW 8 */ + 5289 "00000000" // /* MW 7 */ + 5290 "00001010" // /* MW 6 */ + 5291 "00100000" // /* MW 5 */ + 5292 "00111101" // /* MW 4 */ + 5293 "00000000" // /* MW 3 */ + 5294 "00010011" // /* MW 2 */ + 5295 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1125 16 + 5296 "10111010" // MOVA m5, #-64; MOVX r26, #0; MOV dc7, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5297 "01111000" // /* MW 9 */ + 5298 "11010000" // /* MW 8 */ + 5299 "11100100" // /* MW 7 */ + 5300 "00001011" // /* MW 6 */ + 5301 "10100000" // /* MW 5 */ + 5302 "00000001" // /* MW 4 */ + 5303 "10000000" // /* MW 3 */ + 5304 "00010100" // /* MW 2 */ + 5305 "11111000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 746 83 + 5306 "01110110" // MOVA m6, #-132; MOVS dc2, dc7; MOVX crRnd, r13; MOV dn3, dc7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5307 "01111000" // /* MW 11 */ + 5308 "11000000" // /* MW 10 */ + 5309 "10100111" // /* MW 9 */ + 5310 "00000001" // /* MW 8 */ + 5311 "11010100" // /* MW 7 */ + 5312 "00011011" // /* MW 6 */ + 5313 "01001011" // /* MW 5 */ + 5314 "00011100" // /* MW 4 */ + 5315 "10000010" // /* MW 3 */ + 5316 "10011000" // /* MW 2 */ + 5317 "11101111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 2 "conv2d_bf16.h" 738 8 +.src_ref 2 "conv2d_bf16.h" 1187 40 +.src_ref 2 "conv2d_bf16.h" 1199 26 +.src_ref 2 "conv2d_bf16.h" 1200 26 +.src_ref 2 "conv2d_bf16.h" 1201 26 +.src_ref 2 "conv2d_bf16.h" 1202 26 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 5318 "01110110" // LDA r5, [sp, #-44]; MOVS dc6, dc7; MOVX r31, #60; MOV r15, #7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5319 "01011000" // /* MW 11 */ + 5320 "00000111" // /* MW 10 */ + 5321 "11101000" // /* MW 9 */ + 5322 "10001001" // /* MW 8 */ + 5323 "11110111" // /* MW 7 */ + 5324 "00000001" // /* MW 6 */ + 5325 "01001011" // /* MW 5 */ + 5326 "00011100" // /* MW 4 */ + 5327 "00100110" // /* MW 3 */ + 5328 "10010110" // /* MW 2 */ + 5329 "11111010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1143 12 +.src_ref 2 "conv2d_bf16.h" 1218 20 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 5330 "10111010" // LDA r18, [sp, #-36]; MOVXM p2, #5440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5331 "00010000" // /* MW 9 */ + 5332 "10100000" // /* MW 8 */ + 5333 "00110010" // /* MW 7 */ + 5334 "00000101" // /* MW 6 */ + 5335 "00000000" // /* MW 5 */ + 5336 "00000000" // /* MW 4 */ + 5337 "00100000" // /* MW 3 */ + 5338 "11001010" // /* MW 2 */ + 5339 "11111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 749 26 +.src_ref 2 "conv2d_bf16.h" 750 26 +.src_ref 2 "conv2d_bf16.h" 751 26 +.src_ref 2 "conv2d_bf16.h" 752 26 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5340 "10111010" // LDA r13, [sp, #-32]; SEL.EQZ r6, r26, r18, r27; MOV r20, #780 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5341 "01011000" // /* MW 9 */ + 5342 "00001100" // /* MW 8 */ + 5343 "10001011" // /* MW 7 */ + 5344 "00010010" // /* MW 6 */ + 5345 "01101001" // /* MW 5 */ + 5346 "00110100" // /* MW 4 */ + 5347 "00100000" // /* MW 3 */ + 5348 "00110110" // /* MW 2 */ + 5349 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 736 8 +.src_ref 2 "conv2d_bf16.h" 738 8 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1873 + 5350 "10110110" // LDA lr, [sp, #-28]; PADDB [p4], m7; MOVX r25, #0; MOV r24, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5351 "01011000" // /* MW 11 */ + 5352 "00000000" // /* MW 10 */ + 5353 "00001000" // /* MW 9 */ + 5354 "00001011" // /* MW 8 */ + 5355 "10010000" // /* MW 7 */ + 5356 "00000001" // /* MW 6 */ + 5357 "00100000" // /* MW 5 */ + 5358 "11010111" // /* MW 4 */ + 5359 "00101001" // /* MW 3 */ + 5360 "10000111" // /* MW 2 */ + 5361 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1125 16 first + 5362 "10011000" // LDA r0, [p4], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5363 "00010110" // /* MW 3 */ + 5364 "10001000" // /* MW 2 */ + 5365 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1125 16 + 5366 "10011000" // LDA dn6, [p4], m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5367 "00100110" // /* MW 3 */ + 5368 "10101011" // /* MW 2 */ + 5369 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1125 16 + 5370 "10011000" // LDA r27, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5371 "01110110" // /* MW 3 */ + 5372 "00101111" // /* MW 2 */ + 5373 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1149 80 first + 5374 "10011000" // LDA m5, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5375 "10000110" // /* MW 3 */ + 5376 "00011110" // /* MW 2 */ + 5377 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1154 80 first + 5378 "10011000" // LDA dj5, [p4], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5379 "11000110" // /* MW 3 */ + 5380 "10001010" // /* MW 2 */ + 5381 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 743 87 first + 5382 "10011000" // LDA m4, [p4], #-28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5383 "00000110" // /* MW 3 */ + 5384 "10011110" // /* MW 2 */ + 5385 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 745 83 first + 5386 "10011000" // LDA r1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5387 "00110110" // /* MW 3 */ + 5388 "00011100" // /* MW 2 */ + 5389 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 746 83 first +.src_ref 2 "conv2d_bf16.h" 1125 16 first + 5390 "10010100" // LDA r0, [p4], m6; ADD.NC dj6, r6, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5391 "00000010" // /* MW 5 */ + 5392 "00000110" // /* MW 4 */ + 5393 "11011101" // /* MW 3 */ + 5394 "00000010" // /* MW 2 */ + 5395 "10011001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1143 66 first + 5396 "10011000" // LDA r3, [p4, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5397 "01110110" // /* MW 3 */ + 5398 "00010100" // /* MW 2 */ + 5399 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1206 63 first + 5400 "10011000" // LDA r4, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5401 "10010110" // /* MW 3 */ + 5402 "00000100" // /* MW 2 */ + 5403 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1149 89 + 5404 "11111000" // MOV r7, m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5405 "00000000" // /* MW 3 */ + 5406 "11011010" // /* MW 2 */ + 5407 "00011001" // /* MW 1 */ + 5408 "01011000" // ADD.NC dj2, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5409 "10011001" // /* MW 3 */ + 5410 "10000011" // /* MW 2 */ + 5411 "00011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1154 89 + 5412 "11111000" // MOV r16, dj5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5413 "00000000" // /* MW 3 */ + 5414 "00011011" // /* MW 2 */ + 5415 "00011100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1125 16 first + 5416 "01011000" // ADD.NC m2, r27, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5417 "10011001" // /* MW 3 */ + 5418 "00001101" // /* MW 2 */ + 5419 "00011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1149 89 first + 5420 "00011000" // ADD.NC m6, r7, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5421 "11100000" // /* MW 3 */ + 5422 "00000011" // /* MW 2 */ + 5423 "00011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1154 89 first + 5424 "00100100" // ADD r3, r3, #-1; ADD.NC m7, r16, #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5425 "11000000" // /* MW 5 */ + 5426 "00010000" // /* MW 4 */ + 5427 "11101110" // /* MW 3 */ + 5428 "11111111" // /* MW 2 */ + 5429 "00011000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1287 37 + 5430 "10111010" // NOPA; NOPB; MOV m1, dj2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5431 "01111110" // /* MW 9 */ + 5432 "10000000" // /* MW 8 */ + 5433 "10000010" // /* MW 7 */ + 5434 "00000000" // /* MW 6 */ + 5435 "00010000" // /* MW 5 */ + 5436 "00000000" // /* MW 4 */ + 5437 "11110000" // /* MW 3 */ + 5438 "00101100" // /* MW 2 */ + 5439 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_976 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 736 8 +.src_ref 2 "conv2d_bf16.h" 738 8 +.src_ref 2 "conv2d_bf16.h" 1147 31 first +.src_ref 2 "conv2d_bf16.h" 1187 40 first +.loop_nesting 1 + 5440 "01110110" // VLDA.CONV.fp32.bf16 cml0, [p6], #64; MOVS p1, r5; LSHL r14, r2, r15; MOV p0, r14 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5441 "01111000" // /* MW 11 */ + 5442 "10010000" // /* MW 10 */ + 5443 "00110011" // /* MW 9 */ + 5444 "11101100" // /* MW 8 */ + 5445 "11100111" // /* MW 7 */ + 5446 "00000100" // /* MW 6 */ + 5447 "00001011" // /* MW 5 */ + 5448 "10000101" // /* MW 4 */ + 5449 "01110001" // /* MW 3 */ + 5450 "10000101" // /* MW 2 */ + 5451 "11000011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 1188 50 first + 5452 "11110110" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc3, dn3; ADD.NC p4, r14, r12 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5453 "10100000" // /* MW 11 */ + 5454 "10011000" // /* MW 10 */ + 5455 "00110011" // /* MW 9 */ + 5456 "00000010" // /* MW 8 */ + 5457 "01001011" // /* MW 7 */ + 5458 "00001110" // /* MW 6 */ + 5459 "00101011" // /* MW 5 */ + 5460 "00101000" // /* MW 4 */ + 5461 "01111000" // /* MW 3 */ + 5462 "10000001" // /* MW 2 */ + 5463 "00100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 742 30 first + 5464 "11110110" // VLDA.POP.576 ex7, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];MOVS dn7, dn6; MOV dj7, dj6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5465 "01110000" // /* MW 11 */ + 5466 "10000000" // /* MW 10 */ + 5467 "11000110" // /* MW 9 */ + 5468 "00000011" // /* MW 8 */ + 5469 "01001011" // /* MW 7 */ + 5470 "01011010" // /* MW 6 */ + 5471 "00101111" // /* MW 5 */ + 5472 "00101000" // /* MW 4 */ + 5473 "01111000" // /* MW 3 */ + 5474 "00111001" // /* MW 2 */ + 5475 "10100000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.src_ref 2 "conv2d_bf16.h" 1149 31 first + 5476 "11110110" // VLDA.CONV.fp32.bf16 cmh0, [p6], m6;VLDB.POP.576 ex6, [p0, lf0, r24];MOVS dn3, r19; MOV m3, m2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5477 "01110000" // /* MW 11 */ + 5478 "00000000" // /* MW 10 */ + 5479 "10000010" // /* MW 9 */ + 5480 "00000001" // /* MW 8 */ + 5481 "00001011" // /* MW 7 */ + 5482 "01010011" // /* MW 6 */ + 5483 "00101011" // /* MW 5 */ + 5484 "00000011" // /* MW 4 */ + 5485 "01110100" // /* MW 3 */ + 5486 "00001101" // /* MW 2 */ + 5487 "11011001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 743 30 first + 5488 "10111010" // VLDA.POP.576 ex8, [p1, lf1, r25, m4];VLDB.POP.576.3D ex11, [p0, lf0, r24, d0]; MOV dj3, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5489 "01011110" // /* MW 9 */ + 5490 "00000000" // /* MW 8 */ + 5491 "11000000" // /* MW 7 */ + 5492 "00000001" // /* MW 6 */ + 5493 "11010100" // /* MW 5 */ + 5494 "00010010" // /* MW 4 */ + 5495 "01110100" // /* MW 3 */ + 5496 "01000001" // /* MW 2 */ + 5497 "01110001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 1152 31 first +.src_ref 2 "conv2d_bf16.h" 1206 8 first + 5498 "10110110" // VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.FILL.512 [p0, lf0, r24]; MOVXM le, #5760 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5499 "00010000" // /* MW 11 */ + 5500 "01000000" // /* MW 10 */ + 5501 "10111011" // /* MW 9 */ + 5502 "00000101" // /* MW 8 */ + 5503 "00000000" // /* MW 7 */ + 5504 "00000000" // /* MW 6 */ + 5505 "00101000" // /* MW 5 */ + 5506 "00101000" // /* MW 4 */ + 5507 "01111000" // /* MW 3 */ + 5508 "10010101" // /* MW 2 */ + 5509 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 1154 31 first +.src_ref 2 "conv2d_bf16.h" 1206 8 + 5510 "10110110" // VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.FILL.512 [p0, lf0, r24]; MOVXM ls, #5712 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5511 "00010000" // /* MW 11 */ + 5512 "00101000" // /* MW 10 */ + 5513 "01111011" // /* MW 9 */ + 5514 "00000100" // /* MW 8 */ + 5515 "00000000" // /* MW 7 */ + 5516 "00000000" // /* MW 6 */ + 5517 "00101000" // /* MW 5 */ + 5518 "00101000" // /* MW 4 */ + 5519 "01111000" // /* MW 3 */ + 5520 "00011101" // /* MW 2 */ + 5521 "11011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 740 30 first + 5522 "00111100" // VLDA.CONV.fp32.bf16 cml3, [p4];VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5523 "00101000" // /* MW 5 */ + 5524 "00000001" // /* MW 4 */ + 5525 "01110100" // /* MW 3 */ + 5526 "10110101" // /* MW 2 */ + 5527 "10000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 1157 31 first + 5528 "00111100" // VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.POP.576.3D ex4, [p0, lf0, r24, d0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5529 "00101000" // /* MW 5 */ + 5530 "00100010" // /* MW 4 */ + 5531 "01111000" // /* MW 3 */ + 5532 "10100101" // /* MW 2 */ + 5533 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 1159 31 first + 5534 "00111100" // VLDA.CONV.fp32.bf16 cmh2, [p6], m6;VLDB.FILL.512 [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5535 "00101000" // /* MW 5 */ + 5536 "00101000" // /* MW 4 */ + 5537 "01111000" // /* MW 3 */ + 5538 "00101101" // /* MW 2 */ + 5539 "11011001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 738 8 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 5540 "00111100" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5541 "00101000" // /* MW 5 */ + 5542 "00101000" // /* MW 4 */ + 5543 "01111000" // /* MW 3 */ + 5544 "10000001" // /* MW 2 */ + 5545 "00100010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.src_ref 2 "conv2d_bf16.h" 1192 29 first +.aggressive_scheduled_block_id 6 +.noswbrkpt + 5546 "00111100" // VLDA.CONV.fp32.bf16 cmh3, [p4], #64;VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5547 "00101000" // /* MW 5 */ + 5548 "00000001" // /* MW 4 */ + 5549 "01110100" // /* MW 3 */ + 5550 "10111101" // /* MW 2 */ + 5551 "10000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 745 30 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5552 "10111010" // VLDA.CONV.fp32.bf16 cmh4, [p4];VLDB.POP.576.3D ex4, [p0, lf0, r24, d0]; VSHUFFLE ex10, ex6, ex11, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5553 "11101110" // /* MW 9 */ + 5554 "11000011" // /* MW 8 */ + 5555 "10011010" // /* MW 7 */ + 5556 "00000010" // /* MW 6 */ + 5557 "00010100" // /* MW 5 */ + 5558 "00010001" // /* MW 4 */ + 5559 "01110100" // /* MW 3 */ + 5560 "11001101" // /* MW 2 */ + 5561 "10000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 1162 81 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5562 "11110110" // VLDA.CONV.fp32.bf16 cml4, [p4];VLDB.FILL.512 [p0, lf0, r24];MOVS p4, p6; VSHUFFLE ex6, ex6, ex11, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5563 "11100000" // /* MW 11 */ + 5564 "11000001" // /* MW 10 */ + 5565 "10011010" // /* MW 9 */ + 5566 "00000001" // /* MW 8 */ + 5567 "10001011" // /* MW 7 */ + 5568 "10011000" // /* MW 6 */ + 5569 "00101100" // /* MW 5 */ + 5570 "00101000" // /* MW 4 */ + 5571 "01111000" // /* MW 3 */ + 5572 "11000101" // /* MW 2 */ + 5573 "10000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 749 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5574 "01001010" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex1, [p1, lf1, r25]; VMAC.f dm0, dm0, ex10, ex7, r9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5575 "11101001" // /* MW 9 */ + 5576 "00010100" // /* MW 8 */ + 5577 "01001000" // /* MW 7 */ + 5578 "00011101" // /* MW 6 */ + 5579 "01010100" // /* MW 5 */ + 5580 "00000000" // /* MW 4 */ + 5581 "01110011" // /* MW 3 */ + 5582 "10000001" // /* MW 2 */ + 5583 "00000010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.src_ref 2 "conv2d_bf16.h" 1286 32 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5584 "01101110" // VLDA.3D.CONV.fp32.bf16 cml3, [p6], d3; MOVS dn3, dn2; MOV dj3, dj5; VMAC.f dm1, dm1, ex6, ex7, r9 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5585 "11101001" // /* MW 13 */ + 5586 "00101100" // /* MW 12 */ + 5587 "01001001" // /* MW 11 */ + 5588 "00000111" // /* MW 10 */ + 5589 "01011000" // /* MW 9 */ + 5590 "01011100" // /* MW 8 */ + 5591 "00000000" // /* MW 7 */ + 5592 "00000000" // /* MW 6 */ + 5593 "10010110" // /* MW 5 */ + 5594 "10010100" // /* MW 4 */ + 5595 "01110110" // /* MW 3 */ + 5596 "00110101" // /* MW 2 */ + 5597 "11001111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 751 26 first +.src_ref 2 "conv2d_bf16.h" 1162 81 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5598 "01101110" // VLDA.CONV.fp32.bf16 cmh3, [p4, #64]; MOVS dc5, dc7; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm2, dm2, ex10, ex8, r9 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5599 "00001001" // /* MW 13 */ + 5600 "01010101" // /* MW 12 */ + 5601 "01001010" // /* MW 11 */ + 5602 "00111110" // /* MW 10 */ + 5603 "10010000" // /* MW 9 */ + 5604 "01001100" // /* MW 8 */ + 5605 "00000000" // /* MW 7 */ + 5606 "00000000" // /* MW 6 */ + 5607 "10010110" // /* MW 5 */ + 5608 "00111000" // /* MW 4 */ + 5609 "01111010" // /* MW 3 */ + 5610 "10111101" // /* MW 2 */ + 5611 "10000010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 1199 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5612 "01101110" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dn2, dc3; VSHUFFLE ex5, ex2, ex4, r0; VADD.f dm0, dm3, dm0, r31 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5613 "00111101" // /* MW 13 */ + 5614 "01100000" // /* MW 12 */ + 5615 "11111000" // /* MW 11 */ + 5616 "00011110" // /* MW 10 */ + 5617 "10010000" // /* MW 9 */ + 5618 "01010100" // /* MW 8 */ + 5619 "00000000" // /* MW 7 */ + 5620 "00000000" // /* MW 6 */ + 5621 "10010110" // /* MW 5 */ + 5622 "10011000" // /* MW 4 */ + 5623 "01110100" // /* MW 3 */ + 5624 "00000001" // /* MW 2 */ + 5625 "01110001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 1200 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5626 "01100010" // VLDA.FILL.512 [p1, lf1, r25]; VADD.f dm1, dm3, dm1, r31 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5627 "00111101" // /* MW 7 */ + 5628 "01100100" // /* MW 6 */ + 5629 "11111001" // /* MW 5 */ + 5630 "00000100" // /* MW 4 */ + 5631 "01110000" // /* MW 3 */ + 5632 "10000001" // /* MW 2 */ + 5633 "00100010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 1201 26 first +.aggressive_scheduled_block_id 6 +.noswbrkpt + 5634 "01100010" // VLDA.POP.576 ex1, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r31 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5635 "00111101" // /* MW 7 */ + 5636 "10001000" // /* MW 6 */ + 5637 "11111010" // /* MW 5 */ + 5638 "00000100" // /* MW 4 */ + 5639 "01110000" // /* MW 3 */ + 5640 "00001001" // /* MW 2 */ + 5641 "10100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5642 "01100010" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; VMAC.f dm3, dm3, ex6, ex8, r9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5643 "00001001" // /* MW 7 */ + 5644 "01101101" // /* MW 6 */ + 5645 "01001011" // /* MW 5 */ + 5646 "00000100" // /* MW 4 */ + 5647 "01110000" // /* MW 3 */ + 5648 "00000001" // /* MW 2 */ + 5649 "01110001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5650 "00111100" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5651 "00101000" // /* MW 5 */ + 5652 "00000001" // /* MW 4 */ + 5653 "01110100" // /* MW 3 */ + 5654 "10000001" // /* MW 2 */ + 5655 "00100010" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 978 11 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5656 "00011000" // VLDB.POP.576.3D ex4, [p0, lf0, r24, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5657 "00010100" // /* MW 3 */ + 5658 "00010001" // /* MW 2 */ + 5659 "00111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 1202 26 first +.src_ref 2 "conv2d_bf16.h" 1206 8 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5660 "01100110" // VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24]; ADD.NC lc, r4, #-5; VADD.f dm3, dm4, dm3, r31 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5661 "00111101" // /* MW 11 */ + 5662 "10001100" // /* MW 10 */ + 5663 "11111011" // /* MW 9 */ + 5664 "10000010" // /* MW 8 */ + 5665 "01111101" // /* MW 7 */ + 5666 "01110010" // /* MW 6 */ + 5667 "00101101" // /* MW 5 */ + 5668 "00101000" // /* MW 4 */ + 5669 "01111000" // /* MW 3 */ + 5670 "00001001" // /* MW 2 */ + 5671 "10100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 749 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5672 "01001010" // VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24]; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5673 "00101001" // /* MW 9 */ + 5674 "00000110" // /* MW 8 */ + 5675 "10100000" // /* MW 7 */ + 5676 "00011101" // /* MW 6 */ + 5677 "00010100" // /* MW 5 */ + 5678 "00010100" // /* MW 4 */ + 5679 "01110100" // /* MW 3 */ + 5680 "00000001" // /* MW 2 */ + 5681 "01110001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.src_ref 2 "conv2d_bf16.h" 751 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5682 "01001110" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24]; NOPX; MOV dj5, r21; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5683 "00001001" // /* MW 13 */ + 5684 "01000110" // /* MW 12 */ + 5685 "10100010" // /* MW 11 */ + 5686 "00001111" // /* MW 10 */ + 5687 "10101010" // /* MW 9 */ + 5688 "01011000" // /* MW 8 */ + 5689 "00000000" // /* MW 7 */ + 5690 "00000000" // /* MW 6 */ + 5691 "00101000" // /* MW 5 */ + 5692 "00000001" // /* MW 4 */ + 5693 "01110100" // /* MW 3 */ + 5694 "10000001" // /* MW 2 */ + 5695 "00100010" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5696 "01001011" // NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5697 "01010001" // /* MW 15 */ + 5698 "00001001" // /* MW 14 */ + 5699 "11101101" // /* MW 13 */ + 5700 "00000011" // /* MW 12 */ + 5701 "11001001" // /* MW 11 */ + 5702 "00000000" // /* MW 10 */ + 5703 "00000000" // /* MW 9 */ + 5704 "00000000" // /* MW 8 */ + 5705 "01011011" // /* MW 7 */ + 5706 "00000001" // /* MW 6 */ + 5707 "00101000" // /* MW 5 */ + 5708 "00100010" // /* MW 4 */ + 5709 "11111000" // /* MW 3 */ + 5710 "00101100" // /* MW 2 */ + 5711 "00000000" // /* MW 1 */ +.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1248 +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.begin_of_loop +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt +.loop_nesting 2 + 5712 "01001011" // VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5713 "01010000" // /* MW 15 */ + 5714 "00011011" // /* MW 14 */ + 5715 "11101101" // /* MW 13 */ + 5716 "00000001" // /* MW 12 */ + 5717 "01001001" // /* MW 11 */ + 5718 "00000001" // /* MW 10 */ + 5719 "00000000" // /* MW 9 */ + 5720 "00000000" // /* MW 8 */ + 5721 "01011011" // /* MW 7 */ + 5722 "00000001" // /* MW 6 */ + 5723 "00101000" // /* MW 5 */ + 5724 "00101000" // /* MW 4 */ + 5725 "01111000" // /* MW 3 */ + 5726 "00001001" // /* MW 2 */ + 5727 "10100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 749 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5728 "01001011" // VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5729 "00110001" // /* MW 15 */ + 5730 "00000000" // /* MW 14 */ + 5731 "01111101" // /* MW 13 */ + 5732 "10100101" // /* MW 12 */ + 5733 "00000001" // /* MW 11 */ + 5734 "00000000" // /* MW 10 */ + 5735 "00000000" // /* MW 9 */ + 5736 "00000000" // /* MW 8 */ + 5737 "01011011" // /* MW 7 */ + 5738 "00000001" // /* MW 6 */ + 5739 "00101000" // /* MW 5 */ + 5740 "00101000" // /* MW 4 */ + 5741 "01111000" // /* MW 3 */ + 5742 "00000001" // /* MW 2 */ + 5743 "01110001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.src_ref 2 "conv2d_bf16.h" 751 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5744 "01001011" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5745 "00110000" // /* MW 15 */ + 5746 "00010010" // /* MW 14 */ + 5747 "01111101" // /* MW 13 */ + 5748 "10100101" // /* MW 12 */ + 5749 "00000001" // /* MW 11 */ + 5750 "00000000" // /* MW 10 */ + 5751 "00000000" // /* MW 9 */ + 5752 "00000000" // /* MW 8 */ + 5753 "01011011" // /* MW 7 */ + 5754 "00000001" // /* MW 6 */ + 5755 "00101000" // /* MW 5 */ + 5756 "00000001" // /* MW 4 */ + 5757 "01110100" // /* MW 3 */ + 5758 "10000001" // /* MW 2 */ + 5759 "00100010" // /* MW 1 */ +.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1296 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.end_of_loop +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5760 "01001011" // NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5761 "01010001" // /* MW 15 */ + 5762 "00001001" // /* MW 14 */ + 5763 "11101101" // /* MW 13 */ + 5764 "00000011" // /* MW 12 */ + 5765 "11001001" // /* MW 11 */ + 5766 "00000000" // /* MW 10 */ + 5767 "00000000" // /* MW 9 */ + 5768 "00000000" // /* MW 8 */ + 5769 "01011011" // /* MW 7 */ + 5770 "00000001" // /* MW 6 */ + 5771 "00101000" // /* MW 5 */ + 5772 "00100010" // /* MW 4 */ + 5773 "11111000" // /* MW 3 */ + 5774 "00101100" // /* MW 2 */ + 5775 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 5776 "01101110" // VLDA.POP.576 ex1, [p1, lf1, r25]; MOVS dn6, dn7; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5777 "00001001" // /* MW 13 */ + 5778 "01101010" // /* MW 12 */ + 5779 "10100011" // /* MW 11 */ + 5780 "00011110" // /* MW 10 */ + 5781 "10010000" // /* MW 9 */ + 5782 "01010100" // /* MW 8 */ + 5783 "00000000" // /* MW 7 */ + 5784 "00000000" // /* MW 6 */ + 5785 "10010110" // /* MW 5 */ + 5786 "10111100" // /* MW 4 */ + 5787 "01111100" // /* MW 3 */ + 5788 "00001001" // /* MW 2 */ + 5789 "10100000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 749 26 first +.src_ref 2 "conv2d_bf16.h" 1286 32 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5790 "01101110" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dc7, dn3; MOV dj7, dj3; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5791 "00101001" // /* MW 13 */ + 5792 "00000110" // /* MW 12 */ + 5793 "10100000" // /* MW 11 */ + 5794 "00000111" // /* MW 10 */ + 5795 "00111000" // /* MW 9 */ + 5796 "01111100" // /* MW 8 */ + 5797 "00000000" // /* MW 7 */ + 5798 "00000000" // /* MW 6 */ + 5799 "10010110" // /* MW 5 */ + 5800 "00011100" // /* MW 4 */ + 5801 "01111110" // /* MW 3 */ + 5802 "00000001" // /* MW 2 */ + 5803 "01110001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 751 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5804 "01001010" // MOVS dc3, p3; MOV r5, dj2; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5805 "00001001" // /* MW 9 */ + 5806 "01000110" // /* MW 8 */ + 5807 "10100010" // /* MW 7 */ + 5808 "11100100" // /* MW 6 */ + 5809 "00000000" // /* MW 5 */ + 5810 "01010101" // /* MW 4 */ + 5811 "01100001" // /* MW 3 */ + 5812 "10010001" // /* MW 2 */ + 5813 "01100001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5814 "01001010" // MOVS dn3, r22; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5815 "00101001" // /* MW 9 */ + 5816 "00101010" // /* MW 8 */ + 5817 "10100001" // /* MW 7 */ + 5818 "11000100" // /* MW 6 */ + 5819 "00000111" // /* MW 5 */ + 5820 "10010010" // /* MW 4 */ + 5821 "01100001" // /* MW 3 */ + 5822 "11000001" // /* MW 2 */ + 5823 "01101010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5824 "01001010" // MOVS dn7, r28; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5825 "00001001" // /* MW 9 */ + 5826 "01101010" // /* MW 8 */ + 5827 "10100011" // /* MW 7 */ + 5828 "11000100" // /* MW 6 */ + 5829 "00000011" // /* MW 5 */ + 5830 "10010010" // /* MW 4 */ + 5831 "01100010" // /* MW 3 */ + 5832 "10000001" // /* MW 2 */ + 5833 "11101011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 749 26 first +.src_ref 2 "conv2d_bf16.h" 1285 32 first +.src_ref 2 "conv2d_bf16.h" 1286 32 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5834 "01100110" // PADDB [p7], m5; MOVS p5, p7; MOV dj2, dj7; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5835 "00101001" // /* MW 11 */ + 5836 "00000110" // /* MW 10 */ + 5837 "10100000" // /* MW 9 */ + 5838 "11100110" // /* MW 8 */ + 5839 "00000000" // /* MW 7 */ + 5840 "10001111" // /* MW 6 */ + 5841 "00100010" // /* MW 5 */ + 5842 "01010111" // /* MW 4 */ + 5843 "01101111" // /* MW 3 */ + 5844 "10010001" // /* MW 2 */ + 5845 "10110011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 751 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5846 "01001010" // MOVS p4, p7; MOV m2, m3; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5847 "00001001" // /* MW 9 */ + 5848 "01000110" // /* MW 8 */ + 5849 "10100010" // /* MW 7 */ + 5850 "11100100" // /* MW 6 */ + 5851 "00000000" // /* MW 5 */ + 5852 "00000110" // /* MW 4 */ + 5853 "01100010" // /* MW 3 */ + 5854 "10010001" // /* MW 2 */ + 5855 "10010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5856 "01100010" // VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5857 "00101001" // /* MW 7 */ + 5858 "00101010" // /* MW 6 */ + 5859 "10100001" // /* MW 5 */ + 5860 "11000110" // /* MW 4 */ + 5861 "00000011" // /* MW 3 */ + 5862 "10010010" // /* MW 2 */ + 5863 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5864 "01100010" // VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5865 "00001001" // /* MW 7 */ + 5866 "01101010" // /* MW 6 */ + 5867 "10100011" // /* MW 5 */ + 5868 "11000110" // /* MW 4 */ + 5869 "00000111" // /* MW 3 */ + 5870 "10010010" // /* MW 2 */ + 5871 "00000001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 + 5872 "11111000" // MOV dj7, dj5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5873 "00000000" // /* MW 3 */ + 5874 "10001011" // /* MW 2 */ + 5875 "00011111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 750 26 first + 5876 "01100010" // MOV m3, r23; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5877 "00101001" // /* MW 7 */ + 5878 "00101010" // /* MW 6 */ + 5879 "10100001" // /* MW 5 */ + 5880 "11100110" // /* MW 4 */ + 5881 "10100000" // /* MW 3 */ + 5882 "00001011" // /* MW 2 */ + 5883 "00000011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 749 26 first + 5884 "01100010" // MOV dj3, r17; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5885 "00101001" // /* MW 7 */ + 5886 "00000110" // /* MW 6 */ + 5887 "10100000" // /* MW 5 */ + 5888 "11100110" // /* MW 4 */ + 5889 "10100000" // /* MW 3 */ + 5890 "10001000" // /* MW 2 */ + 5891 "00000011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.src_ref 2 "conv2d_bf16.h" 1286 32 + 5892 "01001010" // PADDB.3D [p0], d3; MOV m3, dj2; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5893 "00001001" // /* MW 9 */ + 5894 "01101010" // /* MW 8 */ + 5895 "10100011" // /* MW 7 */ + 5896 "11100110" // /* MW 6 */ + 5897 "00000000" // /* MW 5 */ + 5898 "00000101" // /* MW 4 */ + 5899 "00100011" // /* MW 3 */ + 5900 "11110111" // /* MW 2 */ + 5901 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 751 26 first +.src_ref 2 "conv2d_bf16.h" 1286 32 first + 5902 "01100110" // PADDB [p7], m3; MOVS p3, dc3; MOV dj5, r5; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5903 "00001001" // /* MW 11 */ + 5904 "01000110" // /* MW 10 */ + 5905 "10100010" // /* MW 9 */ + 5906 "11100110" // /* MW 8 */ + 5907 "10100000" // /* MW 7 */ + 5908 "10000010" // /* MW 6 */ + 5909 "00100101" // /* MW 5 */ + 5910 "11010111" // /* MW 4 */ + 5911 "01101110" // /* MW 3 */ + 5912 "10001001" // /* MW 2 */ + 5913 "01110001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 + 5914 "00000010" // MOVS dc3, dc5; MOV dj7, dj5 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5915 "01110000" // /* MW 7 */ + 5916 "10000000" // /* MW 6 */ + 5917 "11000101" // /* MW 5 */ + 5918 "00000011" // /* MW 4 */ + 5919 "01100000" // /* MW 3 */ + 5920 "10001001" // /* MW 2 */ + 5921 "01100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 1287 37 + 5922 "00000010" // MOVS dc5, r2; MOV m3, m1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5923 "01110000" // /* MW 7 */ + 5924 "00000000" // /* MW 6 */ + 5925 "10000001" // /* MW 5 */ + 5926 "00000001" // /* MW 4 */ + 5927 "01100000" // /* MW 3 */ + 5928 "01000001" // /* MW 2 */ + 5929 "10100000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first + 5930 "00000010" // VCONV.bf16.fp32 x11, cml1; MOV m1, r29 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5931 "01110000" // /* MW 7 */ + 5932 "01010000" // /* MW 6 */ + 5933 "10000111" // /* MW 5 */ + 5934 "00000000" // /* MW 4 */ + 5935 "11000000" // /* MW 3 */ + 5936 "00010010" // /* MW 2 */ + 5937 "10110010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 + 5938 "00000010" // VCONV.bf16.fp32 x10, cml0; MOV dj5, r30 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5939 "01110000" // /* MW 7 */ + 5940 "10010000" // /* MW 6 */ + 5941 "11000111" // /* MW 5 */ + 5942 "00000010" // /* MW 4 */ + 5943 "11000000" // /* MW 3 */ + 5944 "00000010" // /* MW 2 */ + 5945 "10100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 first +.src_ref 2 "conv2d_bf16.h" 736 8 +.src_ref 2 "conv2d_bf16.h" 1287 37 + 5946 "10111010" // PADDB.3D [p1], d1; MOVS p0, p7; MOV r14, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5947 "01110110" // /* MW 9 */ + 5948 "01100000" // /* MW 8 */ + 5949 "11001000" // /* MW 7 */ + 5950 "00000001" // /* MW 6 */ + 5951 "10010000" // /* MW 5 */ + 5952 "00111011" // /* MW 4 */ + 5953 "01100001" // /* MW 3 */ + 5954 "10010001" // /* MW 2 */ + 5955 "00010011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 2 "conv2d_bf16.h" 1287 37 + 5956 "00000010" // VCONV.bf16.fp32 x6, cmh0; MOV m1, m3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5957 "01110000" // /* MW 7 */ + 5958 "00000000" // /* MW 6 */ + 5959 "10000011" // /* MW 5 */ + 5960 "00000000" // /* MW 4 */ + 5961 "11000000" // /* MW 3 */ + 5962 "00001010" // /* MW 2 */ + 5963 "01100010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 1218 20 first +.src_ref 2 "conv2d_bf16.h" 1287 37 first + 5964 "00110110" // PADDB [p0], m1; VCONV.bf16.fp32 x5, cml2; JZ r18, #6096 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6096 delay_slots=5 */ + 5965 "01100000" // /* MW 11 */ + 5966 "00000000" // /* MW 10 */ + 5967 "00000000" // /* MW 9 */ + 5968 "11111010" // /* MW 8 */ + 5969 "00000010" // /* MW 7 */ + 5970 "00100100" // /* MW 6 */ + 5971 "00100000" // /* MW 5 */ + 5972 "01010111" // /* MW 4 */ + 5973 "11000000" // /* MW 3 */ + 5974 "00100010" // /* MW 2 */ + 5975 "01010010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 2 "conv2d_bf16.h" 738 8 +.delay_slot + 5976 "00000010" // VCONV.bf16.fp32 x7, cmh1; MOV r5, p1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5977 "01110000" // /* MW 7 */ + 5978 "01100000" // /* MW 6 */ + 5979 "10101001" // /* MW 5 */ + 5980 "00000000" // /* MW 4 */ + 5981 "11000000" // /* MW 3 */ + 5982 "00011010" // /* MW 2 */ + 5983 "01110010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 5984 "00000010" // VCONV.bf16.fp32 x8, cml3; MOV dn7, dc7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5985 "01110000" // /* MW 7 */ + 5986 "11000000" // /* MW 6 */ + 5987 "10100111" // /* MW 5 */ + 5988 "00000011" // /* MW 4 */ + 5989 "11000000" // /* MW 3 */ + 5990 "00110010" // /* MW 2 */ + 5991 "10000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 5992 "10111010" // PADDB [p5], m1; VCONV.bf16.fp32 x1, cmh3; MOV p1, p5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5993 "01110110" // /* MW 9 */ + 5994 "01100000" // /* MW 8 */ + 5995 "10110101" // /* MW 7 */ + 5996 "00000000" // /* MW 6 */ + 5997 "10010000" // /* MW 5 */ + 5998 "00101011" // /* MW 4 */ + 5999 "11000101" // /* MW 3 */ + 6000 "00111010" // /* MW 2 */ + 6001 "00010010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 2 "conv2d_bf16.h" 1286 32 +.delay_slot + 6002 "00000010" // VCONV.bf16.fp32 x2, cmh2; MOV dj5, dj2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6003 "01110000" // /* MW 7 */ + 6004 "10000000" // /* MW 6 */ + 6005 "11000010" // /* MW 5 */ + 6006 "00000010" // /* MW 4 */ + 6007 "11000000" // /* MW 3 */ + 6008 "00101010" // /* MW 2 */ + 6009 "00100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 1187 40 +.delay_slot + 6010 "00000010" // MOVS dc7, dc3; MOV r2, dc5 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6011 "01110000" // /* MW 7 */ + 6012 "11000000" // /* MW 6 */ + 6013 "01001101" // /* MW 5 */ + 6014 "00000000" // /* MW 4 */ + 6015 "01100000" // /* MW 3 */ + 6016 "10001001" // /* MW 2 */ + 6017 "11100001" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first + 6018 "11111000" // VMAX_LT.bf16 x11, r16, x11, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6019 "11101100" // /* MW 3 */ + 6020 "11011100" // /* MW 2 */ + 6021 "00011101" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 + 6022 "11111000" // VMAX_LT.bf16 x7, r16, x7, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6023 "11101100" // /* MW 3 */ + 6024 "10111100" // /* MW 2 */ + 6025 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.src_ref 4 "max_min.hpp" 20 104 + 6026 "00000010" // VST x11, [p1, dj7]; VMAX_LT.bf16 x10, r16, x10, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6027 "01110000" // /* MW 7 */ + 6028 "01110110" // /* MW 6 */ + 6029 "10101010" // /* MW 5 */ + 6030 "00000010" // /* MW 4 */ + 6031 "01100000" // /* MW 3 */ + 6032 "01011010" // /* MW 2 */ + 6033 "00111100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first + 6034 "00000010" // VST x7, [p5, #64]; VMAX_LT.bf16 x7, r16, x6, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6035 "01110000" // /* MW 7 */ + 6036 "01110110" // /* MW 6 */ + 6037 "11011010" // /* MW 5 */ + 6038 "00000001" // /* MW 4 */ + 6039 "01100000" // /* MW 3 */ + 6040 "10111010" // /* MW 2 */ + 6041 "10100010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first + 6042 "00111010" // VST x10, [p1]; J #6128 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=6128 delay_slots=5 */ + 6043 "00100001" // /* MW 9 */ + 6044 "00000000" // /* MW 8 */ + 6045 "00000000" // /* MW 7 */ + 6046 "11111110" // /* MW 6 */ + 6047 "00000010" // /* MW 5 */ + 6048 "00000000" // /* MW 4 */ + 6049 "01100000" // /* MW 3 */ + 6050 "11010010" // /* MW 2 */ + 6051 "00100000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 6052 "00000010" // VST x7, [p1, #64]; VMAX_LT.bf16 x10, r16, x8, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6053 "01110000" // /* MW 7 */ + 6054 "01110110" // /* MW 6 */ + 6055 "10100010" // /* MW 5 */ + 6056 "00000010" // /* MW 4 */ + 6057 "01100000" // /* MW 3 */ + 6058 "10111010" // /* MW 2 */ + 6059 "00100010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 +.delay_slot + 6060 "11111000" // VMAX_LT.bf16 x7, r16, x1, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6061 "11101100" // /* MW 3 */ + 6062 "10001100" // /* MW 2 */ + 6063 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.src_ref 4 "max_min.hpp" 20 104 +.delay_slot + 6064 "00000010" // VST x10, [p0]; VMAX_LT.bf16 x10, r16, x5, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6065 "01110000" // /* MW 7 */ + 6066 "01110110" // /* MW 6 */ + 6067 "10010110" // /* MW 5 */ + 6068 "00000010" // /* MW 4 */ + 6069 "01100000" // /* MW 3 */ + 6070 "11010010" // /* MW 2 */ + 6071 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 6072 "00000010" // VST x7, [p0, #64]; VMAX_LT.bf16 x2, r16, x2, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6073 "01110000" // /* MW 7 */ + 6074 "01110110" // /* MW 6 */ + 6075 "10001010" // /* MW 5 */ + 6076 "00000000" // /* MW 4 */ + 6077 "01100000" // /* MW 3 */ + 6078 "10111010" // /* MW 2 */ + 6079 "00000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.delay_slot + 6080 "11100001" // NOPA; NOPB; VST x10, [p4, dj5]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6081 "00000000" // /* MW 15 */ + 6082 "00000000" // /* MW 14 */ + 6083 "01111000" // /* MW 13 */ + 6084 "10100101" // /* MW 12 */ + 6085 "00000001" // /* MW 11 */ + 6086 "00000000" // /* MW 10 */ + 6087 "00000000" // /* MW 9 */ + 6088 "00000000" // /* MW 8 */ + 6089 "10010011" // /* MW 7 */ + 6090 "10100010" // /* MW 6 */ + 6091 "00100100" // /* MW 5 */ + 6092 "00000000" // /* MW 4 */ + 6093 "11110000" // /* MW 3 */ + 6094 "00101100" // /* MW 2 */ + 6095 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1632 +.src_ref 4 "vector.hpp" 1152 43 + 6096 "00011000" // VST.CONV.bf16.fp32 cml1, [p1, dj7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6097 "10100011" // /* MW 3 */ + 6098 "11100000" // /* MW 2 */ + 6099 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6100 "00011000" // VST.CONV.bf16.fp32 cmh1, [p5, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6101 "11100011" // /* MW 3 */ + 6102 "00010100" // /* MW 2 */ + 6103 "00001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6104 "00011000" // VST.CONV.bf16.fp32 cml0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6105 "00100011" // /* MW 3 */ + 6106 "00000100" // /* MW 2 */ + 6107 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6108 "00011000" // VST.CONV.bf16.fp32 cmh0, [p1, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6109 "01100011" // /* MW 3 */ + 6110 "00010100" // /* MW 2 */ + 6111 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6112 "00011000" // VST x8, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6113 "00010011" // /* MW 3 */ + 6114 "00000110" // /* MW 2 */ + 6115 "00001000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6116 "00011000" // VST.CONV.bf16.fp32 cmh3, [p0, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6117 "11100011" // /* MW 3 */ + 6118 "00010101" // /* MW 2 */ + 6119 "00001000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6120 "00000010" // VST.CONV.bf16.fp32 cml2, [p4, dj5]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6121 "01110000" // /* MW 7 */ + 6122 "10100101" // /* MW 6 */ + 6123 "00000001" // /* MW 5 */ + 6124 "00000000" // /* MW 4 */ + 6125 "01100000" // /* MW 3 */ + 6126 "00100100" // /* MW 2 */ + 6127 "10010100" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1664 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 1143 12 first + 6128 "00110110" // PADDB [p7], m5; VST x2, [p7, #64]; JNZD r3, r3, p2; MOV dj2, #0 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 6129 "01011000" // /* MW 11 */ + 6130 "00000000" // /* MW 10 */ + 6131 "01000000" // /* MW 9 */ + 6132 "00000001" // /* MW 8 */ + 6133 "00110101" // /* MW 7 */ + 6134 "00000110" // /* MW 6 */ + 6135 "00100000" // /* MW 5 */ + 6136 "01010111" // /* MW 4 */ + 6137 "01101111" // /* MW 3 */ + 6138 "10010010" // /* MW 2 */ + 6139 "11100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.delay_slot + 6140 "11111000" // MOV dn3, dn2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6141 "10000000" // /* MW 3 */ + 6142 "01000100" // /* MW 2 */ + 6143 "00011011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.delay_slot + 6144 "11111000" // MOV dn2, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6145 "10100000" // /* MW 3 */ + 6146 "01001001" // /* MW 2 */ + 6147 "00011010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.delay_slot + 6148 "11110100" // PADDB.3D [p7], d2; MOV dj2, dj7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6149 "00000001" // /* MW 5 */ + 6150 "00011110" // /* MW 4 */ + 6151 "00000101" // /* MW 3 */ + 6152 "01110010" // /* MW 2 */ + 6153 "11101011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.delay_slot + 6154 "11111000" // MOV dn2, dn7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6155 "10000000" // /* MW 3 */ + 6156 "01001110" // /* MW 2 */ + 6157 "00011010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6159 "00000000" // /* MW 1 */ +.loop_nesting 0 + 6160 "10000100" // J #6832 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6832 delay_slots=5 */ + 6161 "00000000" // /* MW 5 */ + 6162 "00000000" // /* MW 4 */ + 6163 "01011000" // /* MW 3 */ + 6164 "00001101" // /* MW 2 */ + 6165 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6167 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6168 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6169 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6171 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6173 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6175 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1712 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 792 8 +.src_ref 2 "conv2d_bf16.h" 1364 80 +.src_ref 2 "conv2d_bf16.h" 1364 80 + 6176 "01110110" // LDA r31, [sp, #-40]; MOVS dc2, p3; MOVX r14, #136; MOV p1, r14 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6177 "01111000" // /* MW 11 */ + 6178 "10010000" // /* MW 10 */ + 6179 "10110011" // /* MW 9 */ + 6180 "00001000" // /* MW 8 */ + 6181 "11100001" // /* MW 7 */ + 6182 "00000100" // /* MW 6 */ + 6183 "10001011" // /* MW 5 */ + 6184 "00001100" // /* MW 4 */ + 6185 "00100010" // /* MW 3 */ + 6186 "01111110" // /* MW 2 */ + 6187 "11111011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 1369 80 + 6188 "01110110" // MOVA m4, #60; MOVS dn2, r22; MOVX crRnd, r13; MOV dc6, dn2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6189 "01111000" // /* MW 11 */ + 6190 "01000000" // /* MW 10 */ + 6191 "01100010" // /* MW 9 */ + 6192 "00000011" // /* MW 8 */ + 6193 "11010100" // /* MW 7 */ + 6194 "00011011" // /* MW 6 */ + 6195 "00001011" // /* MW 5 */ + 6196 "01010110" // /* MW 4 */ + 6197 "10000010" // /* MW 3 */ + 6198 "10010000" // /* MW 2 */ + 6199 "00000111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 807 26 +.src_ref 2 "conv2d_bf16.h" 808 26 +.src_ref 2 "conv2d_bf16.h" 809 26 +.src_ref 2 "conv2d_bf16.h" 810 26 +.src_ref 2 "conv2d_bf16.h" 1436 26 +.src_ref 2 "conv2d_bf16.h" 1437 26 +.src_ref 2 "conv2d_bf16.h" 1438 26 +.src_ref 2 "conv2d_bf16.h" 1439 26 + 6200 "10111010" // MOVA r20, #60; MOVX r19, #780; MOV m2, r23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6201 "01111000" // /* MW 9 */ + 6202 "11010000" // /* MW 8 */ + 6203 "00000101" // /* MW 7 */ + 6204 "10001001" // /* MW 6 */ + 6205 "00110001" // /* MW 5 */ + 6206 "00011001" // /* MW 4 */ + 6207 "00000000" // /* MW 3 */ + 6208 "10010100" // /* MW 2 */ + 6209 "00000111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 802 83 +.src_ref 2 "conv2d_bf16.h" 1428 39 + 6210 "01110110" // MOVA m6, #-132; MOVS dn6, r28; MOVX r18, #6; MOV dj5, r30 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6211 "01111000" // /* MW 11 */ + 6212 "10010000" // /* MW 10 */ + 6213 "11000111" // /* MW 9 */ + 6214 "11001010" // /* MW 8 */ + 6215 "00100000" // /* MW 7 */ + 6216 "00000001" // /* MW 6 */ + 6217 "00001011" // /* MW 5 */ + 6218 "01011100" // /* MW 4 */ + 6219 "10000110" // /* MW 3 */ + 6220 "10011000" // /* MW 2 */ + 6221 "11101111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 792 8 +.src_ref 2 "conv2d_bf16.h" 794 8 + 6222 "01110110" // LDA p0, [sp, #-44]; MOVS dc5, r2; MOVX r25, #0; MOV m1, r29 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6223 "01111000" // /* MW 11 */ + 6224 "01010000" // /* MW 10 */ + 6225 "10000111" // /* MW 9 */ + 6226 "00001000" // /* MW 8 */ + 6227 "10010000" // /* MW 7 */ + 6228 "00000001" // /* MW 6 */ + 6229 "00001011" // /* MW 5 */ + 6230 "00000010" // /* MW 4 */ + 6231 "00100101" // /* MW 3 */ + 6232 "10000011" // /* MW 2 */ + 6233 "11111010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 794 8 +.src_ref 2 "conv2d_bf16.h" 1455 20 + 6234 "10111010" // LDA r21, [sp, #-36]; MOVX r24, #0; MOV dj6, r21 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6235 "01111000" // /* MW 9 */ + 6236 "01010000" // /* MW 8 */ + 6237 "01000101" // /* MW 7 */ + 6238 "00001011" // /* MW 6 */ + 6239 "10000000" // /* MW 5 */ + 6240 "00000001" // /* MW 4 */ + 6241 "00100000" // /* MW 3 */ + 6242 "11010110" // /* MW 2 */ + 6243 "11111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1337 12 + 6244 "10111010" // LDA r13, [sp, #-32]; MOVXM p2, #6320 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6245 "00010000" // /* MW 9 */ + 6246 "01011000" // /* MW 8 */ + 6247 "00110100" // /* MW 7 */ + 6248 "00000101" // /* MW 6 */ + 6249 "00000000" // /* MW 5 */ + 6250 "00000000" // /* MW 4 */ + 6251 "00100000" // /* MW 3 */ + 6252 "00110110" // /* MW 2 */ + 6253 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 80 first +.src_ref 2 "conv2d_bf16.h" 1873 + 6254 "10010100" // LDA lr, [sp, #-28]; ADD.NC p3, r31, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6255 "01110010" // /* MW 5 */ + 6256 "11011111" // /* MW 4 */ + 6257 "00100110" // /* MW 3 */ + 6258 "10000111" // /* MW 2 */ + 6259 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 80 + 6260 "10011000" // LDA dj3, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6261 "11000110" // /* MW 3 */ + 6262 "00011101" // /* MW 2 */ + 6263 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1369 80 first + 6264 "10011000" // LDA m4, [p3], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6265 "00000110" // /* MW 3 */ + 6266 "10001010" // /* MW 2 */ + 6267 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 799 87 first + 6268 "10011000" // LDA m5, [p3], #-28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6269 "10000110" // /* MW 3 */ + 6270 "10011110" // /* MW 2 */ + 6271 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 801 83 first + 6272 "10011000" // LDA r22, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6273 "11010110" // /* MW 3 */ + 6274 "00011110" // /* MW 2 */ + 6275 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 802 83 first + 6276 "10011000" // LDA r23, [p3], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6277 "11110110" // /* MW 3 */ + 6278 "11001010" // /* MW 2 */ + 6279 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1337 66 first + 6280 "10011000" // LDA r29, [p3, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6281 "10110110" // /* MW 3 */ + 6282 "00010111" // /* MW 2 */ + 6283 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1443 71 first + 6284 "10011000" // LDA r28, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6285 "10010110" // /* MW 3 */ + 6286 "00000111" // /* MW 2 */ + 6287 "00000011" // /* MW 1 */ + 6288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6289 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 1369 89 + 6290 "11111000" // MOV r30, m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6291 "00000000" // /* MW 3 */ + 6292 "10011000" // /* MW 2 */ + 6293 "00011111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 89 +.src_ref 2 "conv2d_bf16.h" 1518 37 + 6294 "11111000" // MOV m6, dj3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6295 "00000000" // /* MW 3 */ + 6296 "00000111" // /* MW 2 */ + 6297 "00011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 89 + 6298 "11111000" // MOV r31, m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6299 "00000000" // /* MW 3 */ + 6300 "11011100" // /* MW 2 */ + 6301 "00011111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 89 first + 6302 "00011000" // ADD.NC m3, r31, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6303 "11100000" // /* MW 3 */ + 6304 "00001111" // /* MW 2 */ + 6305 "00011011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1369 89 first + 6306 "00100100" // ADD r29, r29, #-1; ADD.NC m7, r30, #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6307 "11000000" // /* MW 5 */ + 6308 "00011110" // /* MW 4 */ + 6309 "11101110" // /* MW 3 */ + 6310 "01111111" // /* MW 2 */ + 6311 "11101111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 + 6312 "00000010" // NOPS; MOV dj7, r30 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6313 "01110000" // /* MW 7 */ + 6314 "10010000" // /* MW 6 */ + 6315 "11000111" // /* MW 5 */ + 6316 "00000011" // /* MW 4 */ + 6317 "01100000" // /* MW 3 */ + 6318 "00101011" // /* MW 2 */ + 6319 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1856 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 792 8 first +.src_ref 2 "conv2d_bf16.h" 1362 31 first +.src_ref 2 "conv2d_bf16.h" 1429 50 +.src_ref 2 "conv2d_bf16.h" 1443 16 first +.loop_nesting 1 + 6320 "01111110" // VLDA.CONV.fp32.bf16 cml0, [p6], #64;VLDB.FILL.512 [p1, lf1, r25];MOVS p3, r12; MOVXM ls, #6496 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 6321 "01100000" // /* MW 13 */ + 6322 "10000001" // /* MW 12 */ + 6323 "01110001" // /* MW 11 */ + 6324 "00000010" // /* MW 10 */ + 6325 "10010110" // /* MW 9 */ + 6326 "10001111" // /* MW 8 */ + 6327 "00000000" // /* MW 7 */ + 6328 "00000000" // /* MW 6 */ + 6329 "00101000" // /* MW 5 */ + 6330 "00101000" // /* MW 4 */ + 6331 "01111010" // /* MW 3 */ + 6332 "10000101" // /* MW 2 */ + 6333 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 793 8 first +.src_ref 2 "conv2d_bf16.h" 1364 31 first +.src_ref 2 "conv2d_bf16.h" 1443 16 + 6334 "10110110" // VLDA.CONV.fp32.bf16 cmh0, [p6], m3;VLDB.FILL.512 [p1, lf1, r25]; MOVXM le, #6544 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6335 "00010000" // /* MW 11 */ + 6336 "11001000" // /* MW 10 */ + 6337 "10111100" // /* MW 9 */ + 6338 "00000101" // /* MW 8 */ + 6339 "00000000" // /* MW 7 */ + 6340 "00000000" // /* MW 6 */ + 6341 "00101000" // /* MW 5 */ + 6342 "00101000" // /* MW 4 */ + 6343 "01111010" // /* MW 3 */ + 6344 "00001101" // /* MW 2 */ + 6345 "11001101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 794 8 first +.src_ref 2 "conv2d_bf16.h" 795 30 first +.src_ref 2 "conv2d_bf16.h" 1428 39 first +.src_ref 2 "conv2d_bf16.h" 1443 16 first + 6346 "10110110" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex10, [p1, lf1, r25]; LSHL r30, r2, r18; ADD.NC lc, r28, #-3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6347 "01001000" // /* MW 11 */ + 6348 "00111111" // /* MW 10 */ + 6349 "10111111" // /* MW 9 */ + 6350 "01101110" // /* MW 8 */ + 6351 "11101001" // /* MW 7 */ + 6352 "00000101" // /* MW 6 */ + 6353 "00101000" // /* MW 5 */ + 6354 "00000101" // /* MW 4 */ + 6355 "01110110" // /* MW 3 */ + 6356 "10000001" // /* MW 2 */ + 6357 "00000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 796 30 first +.src_ref 2 "conv2d_bf16.h" 799 30 first +.src_ref 2 "conv2d_bf16.h" 1429 50 + 6358 "10111010" // VLDA.POP.576 ex11, [p0, lf0, r24, m5];VLDB.POP.576 ex4, [p1, lf1, r25]; MOV dj2, r30 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6359 "01111110" // /* MW 9 */ + 6360 "10010000" // /* MW 8 */ + 6361 "01000111" // /* MW 7 */ + 6362 "00000001" // /* MW 6 */ + 6363 "00010100" // /* MW 5 */ + 6364 "00000001" // /* MW 4 */ + 6365 "01110011" // /* MW 3 */ + 6366 "01011001" // /* MW 2 */ + 6367 "01010101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 797 30 first +.src_ref 2 "conv2d_bf16.h" 1367 31 first + 6368 "00111100" // VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.POP.576 ex2, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6369 "00101000" // /* MW 5 */ + 6370 "00000001" // /* MW 4 */ + 6371 "01110110" // /* MW 3 */ + 6372 "10010101" // /* MW 2 */ + 6373 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 1369 31 first + 6374 "00111100" // VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.POP.576.3D ex3, [p1, lf1, r25, d0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6375 "10101000" // /* MW 5 */ + 6376 "00100001" // /* MW 4 */ + 6377 "01111010" // /* MW 3 */ + 6378 "00011101" // /* MW 2 */ + 6379 "11011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 792 8 first +.src_ref 2 "conv2d_bf16.h" 1372 31 first + 6380 "00111100" // VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.FILL.512 [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6381 "00101000" // /* MW 5 */ + 6382 "00101000" // /* MW 4 */ + 6383 "01111010" // /* MW 3 */ + 6384 "10100101" // /* MW 2 */ + 6385 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 793 8 first +.src_ref 2 "conv2d_bf16.h" 1374 31 first + 6386 "00111100" // VLDA.CONV.fp32.bf16 cmh2, [p6], m3;VLDB.FILL.512 [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6387 "00101000" // /* MW 5 */ + 6388 "00101000" // /* MW 4 */ + 6389 "01111010" // /* MW 3 */ + 6390 "00101101" // /* MW 2 */ + 6391 "11001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 795 30 first +.src_ref 2 "conv2d_bf16.h" 1377 31 first + 6392 "00111100" // VLDA.CONV.fp32.bf16 cml3, [p6], #64;VLDB.POP.576 ex1, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6393 "10101000" // /* MW 5 */ + 6394 "00000000" // /* MW 4 */ + 6395 "01110110" // /* MW 3 */ + 6396 "10110101" // /* MW 2 */ + 6397 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 796 30 first +.src_ref 2 "conv2d_bf16.h" 1379 31 first + 6398 "00111100" // VLDA.CONV.fp32.bf16 cmh3, [p6], m7;VLDB.POP.576 ex6, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6399 "00101000" // /* MW 5 */ + 6400 "00000011" // /* MW 4 */ + 6401 "01110110" // /* MW 3 */ + 6402 "00111101" // /* MW 2 */ + 6403 "11011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 797 30 first +.src_ref 2 "conv2d_bf16.h" 1429 50 first + 6404 "00111100" // VLDA.CONV.fp32.bf16 cml4, [p3, dj2];VLDB.POP.576 ex7, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6405 "10101000" // /* MW 5 */ + 6406 "00000011" // /* MW 4 */ + 6407 "01110110" // /* MW 3 */ + 6408 "01000101" // /* MW 2 */ + 6409 "01101000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 801 30 first +.src_ref 2 "conv2d_bf16.h" 1429 50 + 6410 "10111010" // VLDA.CONV.fp32.bf16 cmh4, [p3, dj2];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VSHUFFLE ex5, ex10, ex4, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6411 "11101110" // /* MW 9 */ + 6412 "00101101" // /* MW 8 */ + 6413 "01101001" // /* MW 7 */ + 6414 "00000001" // /* MW 6 */ + 6415 "00010100" // /* MW 5 */ + 6416 "00010010" // /* MW 4 */ + 6417 "01110101" // /* MW 3 */ + 6418 "01001101" // /* MW 2 */ + 6419 "01101000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 792 8 first +.src_ref 2 "conv2d_bf16.h" 794 8 first +.src_ref 2 "conv2d_bf16.h" 802 30 first + 6420 "10111010" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex10, ex10, ex4, r23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6421 "11101110" // /* MW 9 */ + 6422 "00101111" // /* MW 8 */ + 6423 "10101001" // /* MW 7 */ + 6424 "00000010" // /* MW 6 */ + 6425 "00010100" // /* MW 5 */ + 6426 "00010100" // /* MW 4 */ + 6427 "01110101" // /* MW 3 */ + 6428 "10000001" // /* MW 2 */ + 6429 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 793 8 first +.src_ref 2 "conv2d_bf16.h" 799 30 first +.src_ref 2 "conv2d_bf16.h" 803 30 first +.src_ref 2 "conv2d_bf16.h" 807 26 first + 6430 "01100110" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex2, ex3, r22; VMAC.f dm0, dm0, ex5, ex11, r9 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6431 "01101001" // /* MW 11 */ + 6432 "00001011" // /* MW 10 */ + 6433 "01001000" // /* MW 9 */ + 6434 "11000010" // /* MW 8 */ + 6435 "11011011" // /* MW 7 */ + 6436 "00010001" // /* MW 6 */ + 6437 "00101010" // /* MW 5 */ + 6438 "00101000" // /* MW 4 */ + 6439 "01111010" // /* MW 3 */ + 6440 "00000001" // /* MW 2 */ + 6441 "01010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 794 8 first +.src_ref 2 "conv2d_bf16.h" 804 30 first +.src_ref 2 "conv2d_bf16.h" 808 26 first + 6442 "01001010" // VLDA.FILL.512 [p0, lf0, r24]; VSHUFFLE ex10, ex2, ex3, r23; VMAC.f dm1, dm1, ex10, ex11, r9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6443 "01101001" // /* MW 9 */ + 6444 "00110101" // /* MW 8 */ + 6445 "01001001" // /* MW 7 */ + 6446 "11000010" // /* MW 6 */ + 6447 "11011111" // /* MW 5 */ + 6448 "00010001" // /* MW 4 */ + 6449 "01110101" // /* MW 3 */ + 6450 "10000001" // /* MW 2 */ + 6451 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 809 26 first + 6452 "01001000" // VMAC.f dm2, dm2, ex4, ex11, r9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6453 "01101001" // /* MW 3 */ + 6454 "01001001" // /* MW 2 */ + 6455 "01001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 810 26 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 6456 "01001000" // VMAC.f dm3, dm3, ex10, ex11, r9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6457 "01101001" // /* MW 3 */ + 6458 "01110101" // /* MW 2 */ + 6459 "01001011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 795 30 first +.src_ref 2 "conv2d_bf16.h" 802 30 first +.src_ref 2 "conv2d_bf16.h" 1437 26 first +.aggressive_scheduled_block_id 7 +.noswbrkpt + 6460 "01001010" // VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex5, ex1, ex6, r23; VADD.f dm1, dm4, dm1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6461 "00111101" // /* MW 9 */ + 6462 "10000100" // /* MW 8 */ + 6463 "10100001" // /* MW 7 */ + 6464 "11000110" // /* MW 6 */ + 6465 "01011111" // /* MW 5 */ + 6466 "10001011" // /* MW 4 */ + 6467 "10101010" // /* MW 3 */ + 6468 "00000000" // /* MW 2 */ + 6469 "00000110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 796 30 first +.src_ref 2 "conv2d_bf16.h" 1436 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6470 "01100010" // VLDB.POP.576 ex6, [p1, lf1, r25]; VADD.f dm0, dm4, dm0, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6471 "00111101" // /* MW 7 */ + 6472 "10000000" // /* MW 6 */ + 6473 "10100000" // /* MW 5 */ + 6474 "00000000" // /* MW 4 */ + 6475 "10010100" // /* MW 3 */ + 6476 "00000001" // /* MW 2 */ + 6477 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 797 30 first +.src_ref 2 "conv2d_bf16.h" 1438 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6478 "01100010" // VLDB.POP.576 ex7, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6479 "00111101" // /* MW 7 */ + 6480 "10001000" // /* MW 6 */ + 6481 "10100010" // /* MW 5 */ + 6482 "00000000" // /* MW 4 */ + 6483 "11010100" // /* MW 3 */ + 6484 "00000001" // /* MW 2 */ + 6485 "00000011" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 799 30 first +.src_ref 2 "conv2d_bf16.h" 1439 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6486 "01001010" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VADD.f dm3, dm4, dm3, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6487 "00111101" // /* MW 9 */ + 6488 "10001100" // /* MW 8 */ + 6489 "10100011" // /* MW 7 */ + 6490 "00011101" // /* MW 6 */ + 6491 "00010100" // /* MW 5 */ + 6492 "00010010" // /* MW 4 */ + 6493 "01110101" // /* MW 3 */ + 6494 "00000001" // /* MW 2 */ + 6495 "01010101" // /* MW 1 */ +.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2032 +.src_ref 2 "conv2d_bf16.h" 792 8 first +.src_ref 2 "conv2d_bf16.h" 801 30 first +.begin_of_loop +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt +.loop_nesting 2 + 6496 "10110100" // VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex2, ex1, ex6, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6497 "10110111" // /* MW 5 */ + 6498 "00010110" // /* MW 4 */ + 6499 "10000010" // /* MW 3 */ + 6500 "10000010" // /* MW 2 */ + 6501 "10100010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 793 8 first +.src_ref 2 "conv2d_bf16.h" 804 30 first +.src_ref 2 "conv2d_bf16.h" 808 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6502 "01001010" // VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6503 "00001001" // /* MW 9 */ + 6504 "00101010" // /* MW 8 */ + 6505 "10011001" // /* MW 7 */ + 6506 "11000110" // /* MW 6 */ + 6507 "01011111" // /* MW 5 */ + 6508 "00111100" // /* MW 4 */ + 6509 "00101010" // /* MW 3 */ + 6510 "00101000" // /* MW 2 */ + 6511 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 795 30 first +.src_ref 2 "conv2d_bf16.h" 803 30 first +.src_ref 2 "conv2d_bf16.h" 807 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6512 "01001010" // VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6513 "00001001" // /* MW 9 */ + 6514 "00000100" // /* MW 8 */ + 6515 "10011000" // /* MW 7 */ + 6516 "11000110" // /* MW 6 */ + 6517 "01011011" // /* MW 5 */ + 6518 "10111100" // /* MW 4 */ + 6519 "10101001" // /* MW 3 */ + 6520 "00000000" // /* MW 2 */ + 6521 "00000110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 796 30 first +.src_ref 2 "conv2d_bf16.h" 810 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6522 "01100010" // VLDB.POP.576 ex6, [p1, lf1, r25]; VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6523 "00001001" // /* MW 7 */ + 6524 "01101000" // /* MW 6 */ + 6525 "10011011" // /* MW 5 */ + 6526 "00000000" // /* MW 4 */ + 6527 "10010100" // /* MW 3 */ + 6528 "00000001" // /* MW 2 */ + 6529 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 794 8 first +.src_ref 2 "conv2d_bf16.h" 797 30 first +.src_ref 2 "conv2d_bf16.h" 809 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6530 "01101110" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex7, [p1, lf1, r25];NOPS; NOPX; VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 6531 "00001001" // /* MW 13 */ + 6532 "01000110" // /* MW 12 */ + 6533 "10011010" // /* MW 11 */ + 6534 "01101100" // /* MW 10 */ + 6535 "00000101" // /* MW 9 */ + 6536 "00000000" // /* MW 8 */ + 6537 "00000000" // /* MW 7 */ + 6538 "00000000" // /* MW 6 */ + 6539 "10101000" // /* MW 5 */ + 6540 "00000011" // /* MW 4 */ + 6541 "01110110" // /* MW 3 */ + 6542 "10000001" // /* MW 2 */ + 6543 "00000010" // /* MW 1 */ +.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2080 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 799 30 first +.src_ref 2 "conv2d_bf16.h" 802 30 first +.end_of_loop +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6544 "11100001" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0];NOPS; NOPX; VSHUFFLE ex5, ex1, ex6, r23; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6545 "00000000" // /* MW 15 */ + 6546 "00000000" // /* MW 14 */ + 6547 "11101000" // /* MW 13 */ + 6548 "10101111" // /* MW 12 */ + 6549 "01000101" // /* MW 11 */ + 6550 "00000001" // /* MW 10 */ + 6551 "00000000" // /* MW 9 */ + 6552 "00000000" // /* MW 8 */ + 6553 "01011011" // /* MW 7 */ + 6554 "00000001" // /* MW 6 */ + 6555 "00101000" // /* MW 5 */ + 6556 "00100100" // /* MW 4 */ + 6557 "01111010" // /* MW 3 */ + 6558 "00000001" // /* MW 2 */ + 6559 "01010101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 first +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 801 30 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 6560 "11110110" // PADDA.3D [p0], d1; PADDB [p7], m6; MOVS p5, p7; VSHUFFLE ex2, ex1, ex6, r22 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6561 "11100000" // /* MW 11 */ + 6562 "10101101" // /* MW 10 */ + 6563 "10000101" // /* MW 9 */ + 6564 "00000000" // /* MW 8 */ + 6565 "10001011" // /* MW 7 */ + 6566 "10011100" // /* MW 6 */ + 6567 "00100101" // /* MW 5 */ + 6568 "10010111" // /* MW 4 */ + 6569 "11111111" // /* MW 3 */ + 6570 "00001100" // /* MW 2 */ + 6571 "00000111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 804 30 first +.src_ref 2 "conv2d_bf16.h" 808 26 first +.src_ref 2 "conv2d_bf16.h" 1517 32 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6572 "01100110" // PADDB [p7], m4; MOVS p4, p7; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6573 "00001001" // /* MW 11 */ + 6574 "00101010" // /* MW 10 */ + 6575 "10011001" // /* MW 9 */ + 6576 "11000110" // /* MW 8 */ + 6577 "01011111" // /* MW 7 */ + 6578 "00111100" // /* MW 6 */ + 6579 "00100010" // /* MW 5 */ + 6580 "00010111" // /* MW 4 */ + 6581 "01101111" // /* MW 3 */ + 6582 "10010001" // /* MW 2 */ + 6583 "10010011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 803 30 first +.src_ref 2 "conv2d_bf16.h" 807 26 first +.src_ref 2 "conv2d_bf16.h" 1518 37 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6584 "01100110" // PADDB [p7], m6; MOVS p3, p7; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6585 "00001001" // /* MW 11 */ + 6586 "00000100" // /* MW 10 */ + 6587 "10011000" // /* MW 9 */ + 6588 "11000110" // /* MW 8 */ + 6589 "01011011" // /* MW 7 */ + 6590 "10111100" // /* MW 6 */ + 6591 "00100001" // /* MW 5 */ + 6592 "10010111" // /* MW 4 */ + 6593 "01101111" // /* MW 3 */ + 6594 "10010001" // /* MW 2 */ + 6595 "01110011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 810 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6596 "01100010" // MOV dj2, r17; VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6597 "00001001" // /* MW 7 */ + 6598 "01101000" // /* MW 6 */ + 6599 "10011011" // /* MW 5 */ + 6600 "11100110" // /* MW 4 */ + 6601 "10100000" // /* MW 3 */ + 6602 "10001000" // /* MW 2 */ + 6603 "00000010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 first +.src_ref 2 "conv2d_bf16.h" 809 26 first +.src_ref 2 "conv2d_bf16.h" 1428 39 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6604 "01001010" // PADDB.3D [p1], d2; MOV r2, dc5; VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6605 "00001001" // /* MW 9 */ + 6606 "01000110" // /* MW 8 */ + 6607 "10011010" // /* MW 7 */ + 6608 "11100110" // /* MW 6 */ + 6609 "10000000" // /* MW 5 */ + 6610 "10011011" // /* MW 4 */ + 6611 "00100000" // /* MW 3 */ + 6612 "10110111" // /* MW 2 */ + 6613 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 801 30 first + 6614 "11011000" // VSHUFFLE ex2, ex1, ex6, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6615 "01011011" // /* MW 3 */ + 6616 "00001011" // /* MW 2 */ + 6617 "00011001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 802 30 first + 6618 "11011000" // VSHUFFLE ex5, ex1, ex6, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6619 "01011111" // /* MW 3 */ + 6620 "10001011" // /* MW 2 */ + 6621 "00011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 803 30 first +.src_ref 2 "conv2d_bf16.h" 807 26 first + 6622 "01100010" // VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6623 "00001001" // /* MW 7 */ + 6624 "00000100" // /* MW 6 */ + 6625 "10011000" // /* MW 5 */ + 6626 "11000110" // /* MW 4 */ + 6627 "01011011" // /* MW 3 */ + 6628 "10111100" // /* MW 2 */ + 6629 "00000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 804 30 first +.src_ref 2 "conv2d_bf16.h" 808 26 first + 6630 "01100010" // VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6631 "00001001" // /* MW 7 */ + 6632 "00101010" // /* MW 6 */ + 6633 "10011001" // /* MW 5 */ + 6634 "11000110" // /* MW 4 */ + 6635 "01011111" // /* MW 3 */ + 6636 "00111100" // /* MW 2 */ + 6637 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 809 26 first + 6638 "01001000" // VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6639 "00001001" // /* MW 3 */ + 6640 "01000110" // /* MW 2 */ + 6641 "10011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 810 26 first + 6642 "01001000" // VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6643 "00001001" // /* MW 3 */ + 6644 "01101000" // /* MW 2 */ + 6645 "10011011" // /* MW 1 */ + 6646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6647 "00000000" // /* MW 1 */ + 6648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6649 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first + 6650 "00011000" // VCONV.bf16.fp32 x10, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6651 "00010110" // /* MW 3 */ + 6652 "00010000" // /* MW 2 */ + 6653 "00001101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 + 6654 "00011000" // VCONV.bf16.fp32 x11, cml1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6655 "10010110" // /* MW 3 */ + 6656 "10010000" // /* MW 2 */ + 6657 "00001101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 1455 20 first + 6658 "00111010" // VCONV.bf16.fp32 x1, cmh1; JZ r21, #6768 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6768 delay_slots=5 */ + 6659 "01100001" // /* MW 9 */ + 6660 "00000000" // /* MW 8 */ + 6661 "00000000" // /* MW 7 */ + 6662 "01001110" // /* MW 6 */ + 6663 "00000011" // /* MW 5 */ + 6664 "00101010" // /* MW 4 */ + 6665 "11000000" // /* MW 3 */ + 6666 "00011010" // /* MW 2 */ + 6667 "00010010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first +.delay_slot + 6668 "00011000" // VCONV.bf16.fp32 x6, cmh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6669 "01010110" // /* MW 3 */ + 6670 "00010000" // /* MW 2 */ + 6671 "00001011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 6672 "00011000" // VCONV.bf16.fp32 x2, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6673 "10010110" // /* MW 3 */ + 6674 "00010001" // /* MW 2 */ + 6675 "00001001" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 6676 "00011000" // VCONV.bf16.fp32 x7, cmh3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6677 "11010110" // /* MW 3 */ + 6678 "10010001" // /* MW 2 */ + 6679 "00001011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 6680 "00011000" // VCONV.bf16.fp32 x5, cml2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6681 "00010110" // /* MW 3 */ + 6682 "10010001" // /* MW 2 */ + 6683 "00001010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 6684 "00011000" // VCONV.bf16.fp32 x8, cmh2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6685 "01010110" // /* MW 3 */ + 6686 "00010001" // /* MW 2 */ + 6687 "00001100" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first + 6688 "11111000" // VMAX_LT.bf16 x11, r16, x11, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6689 "11101100" // /* MW 3 */ + 6690 "11011100" // /* MW 2 */ + 6691 "00011101" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 + 6692 "11111000" // VMAX_LT.bf16 x1, r16, x1, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6693 "11101100" // /* MW 3 */ + 6694 "10001100" // /* MW 2 */ + 6695 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.src_ref 4 "max_min.hpp" 20 104 + 6696 "00000010" // VST x11, [p5, dj3]; VMAX_LT.bf16 x10, r16, x10, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6697 "01110000" // /* MW 7 */ + 6698 "01110110" // /* MW 6 */ + 6699 "10101010" // /* MW 5 */ + 6700 "00000010" // /* MW 4 */ + 6701 "01100000" // /* MW 3 */ + 6702 "01011010" // /* MW 2 */ + 6703 "10101100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first + 6704 "00000010" // VST x1, [p4, #64]; VMAX_LT.bf16 x1, r16, x6, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6705 "01110000" // /* MW 7 */ + 6706 "01110110" // /* MW 6 */ + 6707 "01011010" // /* MW 5 */ + 6708 "00000000" // /* MW 4 */ + 6709 "01100000" // /* MW 3 */ + 6710 "10001010" // /* MW 2 */ + 6711 "10000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first + 6712 "00111010" // VST x10, [p5]; J #6800 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=6800 delay_slots=5 */ + 6713 "00100001" // /* MW 9 */ + 6714 "00000000" // /* MW 8 */ + 6715 "00000000" // /* MW 7 */ + 6716 "01010010" // /* MW 6 */ + 6717 "00000011" // /* MW 5 */ + 6718 "00000000" // /* MW 4 */ + 6719 "01100000" // /* MW 3 */ + 6720 "11010010" // /* MW 2 */ + 6721 "10100000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 6722 "00000010" // VST x1, [p5, #64]; VMAX_LT.bf16 x10, r16, x2, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6723 "01110000" // /* MW 7 */ + 6724 "01110110" // /* MW 6 */ + 6725 "10001010" // /* MW 5 */ + 6726 "00000010" // /* MW 4 */ + 6727 "01100000" // /* MW 3 */ + 6728 "10001010" // /* MW 2 */ + 6729 "10100010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 +.delay_slot + 6730 "11111000" // VMAX_LT.bf16 x1, r16, x7, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6731 "11101100" // /* MW 3 */ + 6732 "10111100" // /* MW 2 */ + 6733 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.src_ref 4 "max_min.hpp" 20 104 +.delay_slot + 6734 "00000010" // VST x10, [p3, dj3]; VMAX_LT.bf16 x10, r16, x5, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6735 "01110000" // /* MW 7 */ + 6736 "01110110" // /* MW 6 */ + 6737 "10010110" // /* MW 5 */ + 6738 "00000010" // /* MW 4 */ + 6739 "01100000" // /* MW 3 */ + 6740 "01010010" // /* MW 2 */ + 6741 "01101100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 6742 "10111010" // NOPA; VST x1, [p7, #64]; VMAX_LT.bf16 x8, r16, x8, x9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6743 "01110010" // /* MW 9 */ + 6744 "01110110" // /* MW 8 */ + 6745 "00100010" // /* MW 7 */ + 6746 "00000010" // /* MW 6 */ + 6747 "01010011" // /* MW 5 */ + 6748 "00010100" // /* MW 4 */ + 6749 "11110111" // /* MW 3 */ + 6750 "00101100" // /* MW 2 */ + 6751 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.delay_slot + 6752 "11100001" // NOPA; NOPB; VST x10, [p4, dj7]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6753 "00000000" // /* MW 15 */ + 6754 "00000000" // /* MW 14 */ + 6755 "01111000" // /* MW 13 */ + 6756 "10100101" // /* MW 12 */ + 6757 "00000001" // /* MW 11 */ + 6758 "00000000" // /* MW 10 */ + 6759 "00000000" // /* MW 9 */ + 6760 "00000000" // /* MW 8 */ + 6761 "10010011" // /* MW 7 */ + 6762 "11100010" // /* MW 6 */ + 6763 "00100100" // /* MW 5 */ + 6764 "00000000" // /* MW 4 */ + 6765 "11110000" // /* MW 3 */ + 6766 "00101100" // /* MW 2 */ + 6767 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2304 +.src_ref 4 "vector.hpp" 1152 43 + 6768 "00011000" // VST.CONV.bf16.fp32 cml1, [p5, dj3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6769 "10100011" // /* MW 3 */ + 6770 "01100000" // /* MW 2 */ + 6771 "00001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6772 "00011000" // VST.CONV.bf16.fp32 cmh1, [p4, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6773 "11100011" // /* MW 3 */ + 6774 "00010100" // /* MW 2 */ + 6775 "00001100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6776 "00011000" // VST.CONV.bf16.fp32 cml0, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6777 "00100011" // /* MW 3 */ + 6778 "00000100" // /* MW 2 */ + 6779 "00001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6780 "00011000" // VST.CONV.bf16.fp32 cmh0, [p5, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6781 "01100011" // /* MW 3 */ + 6782 "00010100" // /* MW 2 */ + 6783 "00001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6784 "00011000" // VST.CONV.bf16.fp32 cml3, [p3, dj3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6785 "10100011" // /* MW 3 */ + 6786 "01100001" // /* MW 2 */ + 6787 "00001011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6788 "00011000" // VST.CONV.bf16.fp32 cmh3, [p7, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6789 "11100011" // /* MW 3 */ + 6790 "00010101" // /* MW 2 */ + 6791 "00001111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6792 "00000010" // VST.CONV.bf16.fp32 cml2, [p4, dj7]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6793 "01110000" // /* MW 7 */ + 6794 "10100101" // /* MW 6 */ + 6795 "00000001" // /* MW 5 */ + 6796 "00000000" // /* MW 4 */ + 6797 "01100000" // /* MW 3 */ + 6798 "00100100" // /* MW 2 */ + 6799 "10011100" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2336 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 1337 12 first + 6800 "01011100" // VST x8, [p3, #64]; JNZD r29, r29, p2 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 6801 "01000000" // /* MW 5 */ + 6802 "11110101" // /* MW 4 */ + 6803 "01101110" // /* MW 3 */ + 6804 "11000010" // /* MW 2 */ + 6805 "01100010" // /* MW 1 */ +.delay_slot + 6806 "00011000" // PADDB [p7], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6807 "10010000" // /* MW 3 */ + 6808 "10001011" // /* MW 2 */ + 6809 "00111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6811 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6813 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6815 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6816 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6817 "00000000" // /* MW 15 */ + 6818 "00000000" // /* MW 14 */ + 6819 "01111000" // /* MW 13 */ + 6820 "10100101" // /* MW 12 */ + 6821 "00000001" // /* MW 11 */ + 6822 "00000000" // /* MW 10 */ + 6823 "00000000" // /* MW 9 */ + 6824 "00000000" // /* MW 8 */ + 6825 "01011011" // /* MW 7 */ + 6826 "00000001" // /* MW 6 */ + 6827 "00100000" // /* MW 5 */ + 6828 "00000000" // /* MW 4 */ + 6829 "11110000" // /* MW 3 */ + 6830 "00101100" // /* MW 2 */ + 6831 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2368 +.loop_nesting 0 + 6832 "00011000" // LDA r15, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6833 "11110001" // /* MW 3 */ + 6834 "11101101" // /* MW 2 */ + 6835 "00000111" // /* MW 1 */ + 6836 "00011000" // LDA r12, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6837 "10010001" // /* MW 3 */ + 6838 "11110001" // /* MW 2 */ + 6839 "00000111" // /* MW 1 */ + 6840 "00011000" // LDA r9, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6841 "00110001" // /* MW 3 */ + 6842 "11110101" // /* MW 2 */ + 6843 "00000111" // /* MW 1 */ + 6844 "00011000" // LDA p6, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6845 "00011001" // /* MW 3 */ + 6846 "11101011" // /* MW 2 */ + 6847 "00000111" // /* MW 1 */ + 6848 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6849 "10011001" // /* MW 3 */ + 6850 "11111011" // /* MW 2 */ + 6851 "00000111" // /* MW 1 */ + 6852 "00011000" // LDA r14, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6853 "11010001" // /* MW 3 */ + 6854 "11111101" // /* MW 2 */ + 6855 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1873 first + 6856 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 6857 "00000000" // /* MW 3 */ + 6858 "00101000" // /* MW 2 */ + 6859 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1873 +.delay_slot + 6860 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6861 "00000001" // /* MW 5 */ + 6862 "00000000" // /* MW 4 */ + 6863 "00000000" // /* MW 3 */ + 6864 "11110000" // /* MW 2 */ + 6865 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6867 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6869 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6871 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params__end +.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_end0 + 6873 "00000000" // /* MW 1 */ +.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function conv2d_maxpool _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 7 "superkernels.cpp" 74 first +.src_ref 7 "superkernels.cpp" 79 6 +.src_ref 7 "superkernels.cpp" 81 4 +.function_start + 6880 "10111010" // MOVA r0, #1; MOVXM p4, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6881 "00010000" // /* MW 9 */ + 6882 "00100000" // /* MW 8 */ + 6883 "00110010" // /* MW 7 */ + 6884 "11110010" // /* MW 6 */ + 6885 "00000001" // /* MW 5 */ + 6886 "00000000" // /* MW 4 */ + 6887 "00000000" // /* MW 3 */ + 6888 "00100000" // /* MW 2 */ + 6889 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 79 6 first +.src_ref 7 "superkernels.cpp" 81 4 + 6890 "10111010" // LDA r16, [p4]; MOVX r1, #0; MOV r2, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6891 "01111000" // /* MW 9 */ + 6892 "11010000" // /* MW 8 */ + 6893 "01001011" // /* MW 7 */ + 6894 "00001000" // /* MW 6 */ + 6895 "00010000" // /* MW 5 */ + 6896 "00000000" // /* MW 4 */ + 6897 "11010000" // /* MW 3 */ + 6898 "11000010" // /* MW 2 */ + 6899 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 74 + 6900 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6901 "00000001" // /* MW 5 */ + 6902 "00000000" // /* MW 4 */ + 6903 "00000000" // /* MW 3 */ + 6904 "00001000" // /* MW 2 */ + 6905 "00000000" // /* MW 1 */ + 6906 "10011000" // ST r2, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6907 "01010101" // /* MW 3 */ + 6908 "11110000" // /* MW 2 */ + 6909 "00001111" // /* MW 1 */ + 6910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6911 "00000000" // /* MW 1 */ + 6912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6913 "00000000" // /* MW 1 */ + 6914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6915 "00000000" // /* MW 1 */ + 6916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6917 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 79 6 +.src_ref 7 "superkernels.cpp" 79 16 + 6918 "10000100" // JNZ r16, #7088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7088 delay_slots=5 */ + 6919 "00000001" // /* MW 5 */ + 6920 "01000000" // /* MW 4 */ + 6921 "11011000" // /* MW 3 */ + 6922 "00001101" // /* MW 2 */ + 6923 "10000000" // /* MW 1 */ +.delay_slot + 6924 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6925 "10011101" // /* MW 3 */ + 6926 "11111011" // /* MW 2 */ + 6927 "00001111" // /* MW 1 */ +.delay_slot + 6928 "10011000" // ST p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6929 "00011101" // /* MW 3 */ + 6930 "11111111" // /* MW 2 */ + 6931 "00001111" // /* MW 1 */ +.delay_slot + 6932 "10011000" // ST p3, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6933 "10011101" // /* MW 3 */ + 6934 "11101101" // /* MW 2 */ + 6935 "00001111" // /* MW 1 */ +.delay_slot + 6936 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6937 "00111101" // /* MW 3 */ + 6938 "11110100" // /* MW 2 */ + 6939 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 +.delay_slot + 6940 "01000100" // MOVXM r15, #509376 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6941 "10000000" // /* MW 5 */ + 6942 "10101011" // /* MW 4 */ + 6943 "11000111" // /* MW 3 */ + 6944 "00000111" // /* MW 2 */ + 6945 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 113 2 +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 6946 "00111010" // MOVS p6, p1; MOVXM p7, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6947 "00010001" // /* MW 9 */ + 6948 "00110100" // /* MW 8 */ + 6949 "10110010" // /* MW 7 */ + 6950 "11110011" // /* MW 6 */ + 6951 "00000001" // /* MW 5 */ + 6952 "00000000" // /* MW 4 */ + 6953 "01100000" // /* MW 3 */ + 6954 "10010001" // /* MW 2 */ + 6955 "11010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 6956 "01110110" // ST.s8 r16, [p7]; MOVS p1, r15; MOVXM p7, #509028 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6957 "00010000" // /* MW 11 */ + 6958 "00110010" // /* MW 10 */ + 6959 "10110010" // /* MW 9 */ + 6960 "11110011" // /* MW 8 */ + 6961 "00000001" // /* MW 7 */ + 6962 "00000000" // /* MW 6 */ + 6963 "00001011" // /* MW 5 */ + 6964 "10001111" // /* MW 4 */ + 6965 "11100001" // /* MW 3 */ + 6966 "11000000" // /* MW 2 */ + 6967 "11100000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6969 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6971 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6972 "00000100" // JL #2752 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=2752 delay_slots=5 */ + 6973 "00000001" // /* MW 5 */ + 6974 "00000000" // /* MW 4 */ + 6975 "01100000" // /* MW 3 */ + 6976 "00000101" // /* MW 2 */ + 6977 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6979 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6980 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6981 "00110001" // /* MW 3 */ + 6982 "00100000" // /* MW 2 */ + 6983 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 6984 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6985 "00000101" // /* MW 3 */ + 6986 "00100000" // /* MW 2 */ + 6987 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 113 2 +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 6988 "00000010" // ST r16, [p7]; MOV p7, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6989 "01110000" // /* MW 7 */ + 6990 "01100000" // /* MW 6 */ + 6991 "10110000" // /* MW 5 */ + 6992 "00000011" // /* MW 4 */ + 6993 "00110000" // /* MW 3 */ + 6994 "11000010" // /* MW 2 */ + 6995 "11100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 +.delay_slot + 6996 "11110110" // NOPA; NOPB; NOPS; MOV p0, p2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6997 "01110000" // /* MW 11 */ + 6998 "01100000" // /* MW 10 */ + 6999 "00110010" // /* MW 9 */ + 7000 "00000000" // /* MW 8 */ + 7001 "01011011" // /* MW 7 */ + 7002 "00000001" // /* MW 6 */ + 7003 "00100000" // /* MW 5 */ + 7004 "00000000" // /* MW 4 */ + 7005 "11110000" // /* MW 3 */ + 7006 "00101100" // /* MW 2 */ + 7007 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 +.return_address + 7008 "10011000" // ADD.NC p2, r15, #11 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7009 "10000101" // /* MW 3 */ + 7010 "01100111" // /* MW 2 */ + 7011 "00011010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 87 19 +.src_ref 7 "superkernels.cpp" 87 35 first + 7012 "10111010" // LDA.u8 r16, [p2], #7; MOVXM p1, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7013 "00010000" // /* MW 9 */ + 7014 "00100010" // /* MW 8 */ + 7015 "10110010" // /* MW 7 */ + 7016 "11110000" // /* MW 6 */ + 7017 "00000001" // /* MW 5 */ + 7018 "00000000" // /* MW 4 */ + 7019 "01010000" // /* MW 3 */ + 7020 "11000001" // /* MW 2 */ + 7021 "01001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 37 first +.src_ref 7 "superkernels.cpp" 89 13 + 7022 "10111010" // LDA.u16 r19, [p2], #2; MOVXM p0, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7023 "00010000" // /* MW 9 */ + 7024 "00110000" // /* MW 8 */ + 7025 "00110010" // /* MW 7 */ + 7026 "11110000" // /* MW 6 */ + 7027 "00000001" // /* MW 5 */ + 7028 "00000000" // /* MW 4 */ + 7029 "01010000" // /* MW 3 */ + 7030 "11001111" // /* MW 2 */ + 7031 "01000011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 73 + 7032 "10011000" // LDA.u16 r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7033 "00111010" // /* MW 3 */ + 7034 "00000110" // /* MW 2 */ + 7035 "00000010" // /* MW 1 */ + 7036 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7037 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 110 + 7038 "10011000" // LDA.u16 r18, [p2, #2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7039 "01011010" // /* MW 3 */ + 7040 "00010110" // /* MW 2 */ + 7041 "00000010" // /* MW 1 */ + 7042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7043 "00000000" // /* MW 1 */ + 7044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7045 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 87 19 first +.src_ref 7 "superkernels.cpp" 113 2 + 7046 "00000010" // ST r16, [p1]; MOV p1, p6 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7047 "01110000" // /* MW 7 */ + 7048 "01100000" // /* MW 6 */ + 7049 "10110110" // /* MW 5 */ + 7050 "00000000" // /* MW 4 */ + 7051 "00110000" // /* MW 3 */ + 7052 "11000010" // /* MW 2 */ + 7053 "00100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 57 first + 7054 "10011000" // MUL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7055 "00001111" // /* MW 3 */ + 7056 "11100001" // /* MW 2 */ + 7057 "00010100" // /* MW 1 */ + 7058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7059 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 94 + 7060 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7061 "00001111" // /* MW 3 */ + 7062 "01100001" // /* MW 2 */ + 7063 "00010100" // /* MW 1 */ + 7064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7065 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 89 28 first + 7066 "10011000" // MUL r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7067 "00001111" // /* MW 3 */ + 7068 "10100001" // /* MW 2 */ + 7069 "00010100" // /* MW 1 */ + 7070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7071 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 89 13 +.src_ref 7 "superkernels.cpp" 113 2 + 7072 "11100001" // NOPA; NOPB; ST r16, [p0]; NOPX; MOV p0, p7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7073 "00000000" // /* MW 15 */ + 7074 "00000000" // /* MW 14 */ + 7075 "01111000" // /* MW 13 */ + 7076 "01100000" // /* MW 12 */ + 7077 "00110111" // /* MW 11 */ + 7078 "00000000" // /* MW 10 */ + 7079 "00000000" // /* MW 9 */ + 7080 "10000000" // /* MW 8 */ + 7081 "00010001" // /* MW 7 */ + 7082 "00000110" // /* MW 6 */ + 7083 "00100000" // /* MW 5 */ + 7084 "00000000" // /* MW 4 */ + 7085 "11110000" // /* MW 3 */ + 7086 "00101100" // /* MW 2 */ + 7087 "00000000" // /* MW 1 */ +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 7 "superkernels.cpp" 106 12 +.src_ref 7 "superkernels.cpp" 113 2 +.src_ref 7 "superkernels.cpp" 117 6 +.src_ref 7 "superkernels.cpp" 136 15 +.src_ref 1 "io_buffer_main.h" 218 49 +.src_ref 1 "io_buffer_main.h" 324 51 + 7088 "10111010" // LDA r15, [sp, #-20]; MOVXM p6, #509000 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7089 "00010000" // /* MW 9 */ + 7090 "00100100" // /* MW 8 */ + 7091 "00110010" // /* MW 7 */ + 7092 "11110011" // /* MW 6 */ + 7093 "00000001" // /* MW 5 */ + 7094 "00000000" // /* MW 4 */ + 7095 "00100000" // /* MW 3 */ + 7096 "10111110" // /* MW 2 */ + 7097 "11111101" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 106 12 first +.src_ref 7 "superkernels.cpp" 108 13 + 7098 "10111010" // LDA r16, [p6]; MOVXM p2, #509004 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7099 "00010000" // /* MW 9 */ + 7100 "00100110" // /* MW 8 */ + 7101 "00110010" // /* MW 7 */ + 7102 "11110001" // /* MW 6 */ + 7103 "00000001" // /* MW 5 */ + 7104 "00000000" // /* MW 4 */ + 7105 "11010000" // /* MW 3 */ + 7106 "11000010" // /* MW 2 */ + 7107 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 107 11 +.src_ref 7 "superkernels.cpp" 108 13 first +.src_ref 7 "superkernels.cpp" 139 6 +.src_ref 7 "superkernels.cpp" 140 14 + 7108 "10111010" // LDA r17, [p2]; MOVXM p7, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7109 "00010000" // /* MW 9 */ + 7110 "00100000" // /* MW 8 */ + 7111 "10110010" // /* MW 7 */ + 7112 "11110011" // /* MW 6 */ + 7113 "00000001" // /* MW 5 */ + 7114 "00000000" // /* MW 4 */ + 7115 "11010000" // /* MW 3 */ + 7116 "11000110" // /* MW 2 */ + 7117 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 107 11 first + 7118 "10011000" // LDA r18, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7119 "01010110" // /* MW 3 */ + 7120 "00000110" // /* MW 2 */ + 7121 "00000111" // /* MW 1 */ + 7122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7123 "00000000" // /* MW 1 */ + 7124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7125 "00000000" // /* MW 1 */ + 7126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7127 "00000000" // /* MW 1 */ + 7128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7129 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 110 6 first +.src_ref 7 "superkernels.cpp" 110 17 first + 7130 "10000100" // JNZ r16, #7216 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7216 delay_slots=5 */ + 7131 "00000001" // /* MW 5 */ + 7132 "01000000" // /* MW 4 */ + 7133 "00011000" // /* MW 3 */ + 7134 "00001110" // /* MW 2 */ + 7135 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 108 13 first +.delay_slot + 7136 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7137 "00000111" // /* MW 3 */ + 7138 "01100010" // /* MW 2 */ + 7139 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 107 11 first +.src_ref 7 "superkernels.cpp" 108 13 +.delay_slot + 7140 "01011100" // ST r17, [p2]; ADD r17, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7141 "00001110" // /* MW 5 */ + 7142 "01000100" // /* MW 4 */ + 7143 "00111001" // /* MW 3 */ + 7144 "11000110" // /* MW 2 */ + 7145 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 106 12 first +.delay_slot + 7146 "00011000" // ADD r19, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7147 "00000111" // /* MW 3 */ + 7148 "00100110" // /* MW 2 */ + 7149 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 106 12 +.delay_slot + 7150 "10011000" // ST r19, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7151 "01110001" // /* MW 3 */ + 7152 "00000110" // /* MW 2 */ + 7153 "00001110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 107 11 first +.delay_slot + 7154 "10011000" // ST r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7155 "00110001" // /* MW 3 */ + 7156 "00000110" // /* MW 2 */ + 7157 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 first + 7158 "00011000" // ADD.NC p2, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7159 "10000110" // /* MW 3 */ + 7160 "01100111" // /* MW 2 */ + 7161 "00011010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 7162 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7163 "01110110" // /* MW 3 */ + 7164 "11111111" // /* MW 2 */ + 7165 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 7166 "10011000" // LDA r16, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7167 "00010110" // /* MW 3 */ + 7168 "11111110" // /* MW 2 */ + 7169 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 7170 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7171 "00110110" // /* MW 3 */ + 7172 "11111110" // /* MW 2 */ + 7173 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 7174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7175 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 7176 "10011000" // LDA r16, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7177 "00010110" // /* MW 3 */ + 7178 "01000110" // /* MW 2 */ + 7179 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7181 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7183 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7185 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7187 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7188 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7189 "00000010" // /* MW 3 */ + 7190 "01100001" // /* MW 2 */ + 7191 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7192 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7193 "00010001" // /* MW 3 */ + 7194 "00000110" // /* MW 2 */ + 7195 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 + 7196 "00011000" // MOVX r17, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7197 "11111101" // /* MW 3 */ + 7198 "11100010" // /* MW 2 */ + 7199 "00010111" // /* MW 1 */ + 7200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7201 "00000000" // /* MW 1 */ + 7202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7203 "00000000" // /* MW 1 */ + 7204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7205 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 7206 "01111010" // NOPA; NOPS; ACQ r16, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7207 "00011000" // /* MW 9 */ + 7208 "00010011" // /* MW 8 */ + 7209 "00000100" // /* MW 7 */ + 7210 "00000000" // /* MW 6 */ + 7211 "01011011" // /* MW 5 */ + 7212 "00000001" // /* MW 4 */ + 7213 "11110000" // /* MW 3 */ + 7214 "00101100" // /* MW 2 */ + 7215 "00000000" // /* MW 1 */ +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_336 +.src_ref 7 "superkernels.cpp" 113 2 first +.no_stack_arguments + 7216 "00000100" // JL #4464 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4464 delay_slots=5 */ + 7217 "00000001" // /* MW 5 */ + 7218 "00000000" // /* MW 4 */ + 7219 "10111000" // /* MW 3 */ + 7220 "00001000" // /* MW 2 */ + 7221 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 113 2 +.delay_slot + 7222 "01000100" // MOVXM p3, #509376 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7223 "10000000" // /* MW 5 */ + 7224 "11001011" // /* MW 4 */ + 7225 "11000110" // /* MW 3 */ + 7226 "00000111" // /* MW 2 */ + 7227 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7229 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7231 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7233 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 113 2 +.delay_slot + 7234 "00101110" // NOPA; NOPS; MOV p2, r15; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 7235 "00011100" // /* MW 13 */ + 7236 "00000000" // /* MW 12 */ + 7237 "00000000" // /* MW 11 */ + 7238 "00000111" // /* MW 10 */ + 7239 "00111101" // /* MW 9 */ + 7240 "01010011" // /* MW 8 */ + 7241 "00000000" // /* MW 7 */ + 7242 "00000000" // /* MW 6 */ + 7243 "10110110" // /* MW 5 */ + 7244 "00000010" // /* MW 4 */ + 7245 "11110000" // /* MW 3 */ + 7246 "00101100" // /* MW 2 */ + 7247 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 117 6 first +.src_ref 7 "superkernels.cpp" 117 20 +.return_address + 7248 "10111010" // LDA r16, [p6]; MOVXM p1, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7249 "00010000" // /* MW 9 */ + 7250 "00100010" // /* MW 8 */ + 7251 "10110010" // /* MW 7 */ + 7252 "11110000" // /* MW 6 */ + 7253 "00000001" // /* MW 5 */ + 7254 "00000000" // /* MW 4 */ + 7255 "11010000" // /* MW 3 */ + 7256 "11000010" // /* MW 2 */ + 7257 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 117 20 + 7258 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7259 "00110110" // /* MW 3 */ + 7260 "00000110" // /* MW 2 */ + 7261 "00000001" // /* MW 1 */ + 7262 "00011000" // LDA r0, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7263 "00010001" // /* MW 3 */ + 7264 "11110000" // /* MW 2 */ + 7265 "00000111" // /* MW 1 */ + 7266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7267 "00000000" // /* MW 1 */ + 7268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7269 "00000000" // /* MW 1 */ + 7270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7271 "00000000" // /* MW 1 */ + 7272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7273 "00000000" // /* MW 1 */ + 7274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7275 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 117 17 + 7276 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7277 "00001000" // /* MW 3 */ + 7278 "01100001" // /* MW 2 */ + 7279 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 117 6 + 7280 "10000100" // JNZ r16, #7360 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7360 delay_slots=5 */ + 7281 "00000001" // /* MW 5 */ + 7282 "01000000" // /* MW 4 */ + 7283 "01100000" // /* MW 3 */ + 7284 "00001110" // /* MW 2 */ + 7285 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 136 15 +.src_ref 7 "superkernels.cpp" 140 14 +.delay_slot + 7286 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7287 "00000001" // /* MW 3 */ + 7288 "00110000" // /* MW 2 */ + 7289 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7291 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7293 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7294 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7295 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7297 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 first +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 + 7298 "00100100" // MOVX r16, #1; ADD.NC p1, r15, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7299 "00010100" // /* MW 5 */ + 7300 "11001111" // /* MW 4 */ + 7301 "10100010" // /* MW 3 */ + 7302 "00000000" // /* MW 2 */ + 7303 "00000100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 + 7304 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7305 "00110110" // /* MW 3 */ + 7306 "00000110" // /* MW 2 */ + 7307 "00000001" // /* MW 1 */ + 7308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7309 "00000000" // /* MW 1 */ + 7310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7311 "00000000" // /* MW 1 */ + 7312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7313 "00000000" // /* MW 1 */ + 7314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7315 "00000000" // /* MW 1 */ + 7316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7317 "00000000" // /* MW 1 */ + 7318 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7319 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 7320 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7321 "00001000" // /* MW 3 */ + 7322 "01010001" // /* MW 2 */ + 7323 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 136 15 first +.src_ref 1 "io_buffer_main.h" 327 40 first + 7324 "00001100" // LDA r17, [p1, #-8]; ST r24, [p6] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7325 "00100011" // /* MW 5 */ + 7326 "00001110" // /* MW 4 */ + 7327 "11011100" // /* MW 3 */ + 7328 "11000110" // /* MW 2 */ + 7329 "00111100" // /* MW 1 */ + 7330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7331 "00000000" // /* MW 1 */ + 7332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7333 "00000000" // /* MW 1 */ + 7334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7335 "00000000" // /* MW 1 */ + 7336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7337 "00000000" // /* MW 1 */ + 7338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7339 "00000000" // /* MW 1 */ + 7340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7341 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 + 7342 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7343 "00010001" // /* MW 3 */ + 7344 "00100001" // /* MW 2 */ + 7345 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 7346 "00101110" // NOPA; ST r16, [p1, #-8]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 7347 "00011100" // /* MW 13 */ + 7348 "00000000" // /* MW 12 */ + 7349 "00000000" // /* MW 11 */ + 7350 "01010111" // /* MW 10 */ + 7351 "00011010" // /* MW 9 */ + 7352 "01000000" // /* MW 8 */ + 7353 "00000000" // /* MW 7 */ + 7354 "00000000" // /* MW 6 */ + 7355 "00100011" // /* MW 5 */ + 7356 "11001100" // /* MW 4 */ + 7357 "11110011" // /* MW 3 */ + 7358 "00101100" // /* MW 2 */ + 7359 "00000000" // /* MW 1 */ +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_480 +.src_ref 7 "superkernels.cpp" 139 6 first +.src_ref 7 "superkernels.cpp" 139 19 + 7360 "10111010" // LDA r16, [p7]; MOVXM p6, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7361 "00010000" // /* MW 9 */ + 7362 "00110000" // /* MW 8 */ + 7363 "00110010" // /* MW 7 */ + 7364 "11110011" // /* MW 6 */ + 7365 "00000001" // /* MW 5 */ + 7366 "00000000" // /* MW 4 */ + 7367 "11010000" // /* MW 3 */ + 7368 "11000010" // /* MW 2 */ + 7369 "11100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 139 19 + 7370 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7371 "00110110" // /* MW 3 */ + 7372 "00000110" // /* MW 2 */ + 7373 "00000110" // /* MW 1 */ + 7374 "00011000" // LDA p1, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7375 "10011001" // /* MW 3 */ + 7376 "11111000" // /* MW 2 */ + 7377 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 142 + 7378 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7379 "00111001" // /* MW 3 */ + 7380 "11110100" // /* MW 2 */ + 7381 "00000111" // /* MW 1 */ + 7382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7383 "00000000" // /* MW 1 */ + 7384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7385 "00000000" // /* MW 1 */ + 7386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7387 "00000000" // /* MW 1 */ + 7388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7389 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 139 16 + 7390 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7391 "00001000" // /* MW 3 */ + 7392 "01100001" // /* MW 2 */ + 7393 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 139 6 + 7394 "10000100" // JNZ r16, #7424 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7424 delay_slots=5 */ + 7395 "00000001" // /* MW 5 */ + 7396 "01000000" // /* MW 4 */ + 7397 "10000000" // /* MW 3 */ + 7398 "00001110" // /* MW 2 */ + 7399 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7401 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7403 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7405 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7407 "00000000" // /* MW 1 */ +.delay_slot + 7408 "11111000" // MOV r15, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7409 "00100000" // /* MW 3 */ + 7410 "11010000" // /* MW 2 */ + 7411 "00011011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 140 14 first + 7412 "00110110" // NOPA; NOPB; ST r24, [p7]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7413 "11000001" // /* MW 11 */ + 7414 "10001000" // /* MW 10 */ + 7415 "10000011" // /* MW 9 */ + 7416 "00000011" // /* MW 8 */ + 7417 "00000000" // /* MW 7 */ + 7418 "00000000" // /* MW 6 */ + 7419 "00100000" // /* MW 5 */ + 7420 "00000000" // /* MW 4 */ + 7421 "11110000" // /* MW 3 */ + 7422 "00101100" // /* MW 2 */ + 7423 "00000000" // /* MW 1 */ +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_544 + 7424 "00011000" // LDA p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7425 "00011001" // /* MW 3 */ + 7426 "11111111" // /* MW 2 */ + 7427 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 142 first + 7428 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 7429 "00000000" // /* MW 3 */ + 7430 "00101000" // /* MW 2 */ + 7431 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 142 +.delay_slot + 7432 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7433 "00000001" // /* MW 5 */ + 7434 "00000000" // /* MW 4 */ + 7435 "00000000" // /* MW 3 */ + 7436 "11111000" // /* MW 2 */ + 7437 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7439 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7441 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7443 "00000000" // /* MW 1 */ +.delay_slot + 7444 "00011000" // MOVS p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7445 "10001011" // /* MW 3 */ + 7446 "10000100" // /* MW 2 */ +.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 7447 "00001111" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.src_ref 3 "elementwise_binary.h" 141 first +.src_ref 3 "elementwise_binary.h" 142 23 +.src_ref 3 "elementwise_binary.h" 144 4 first +.function_start + 7456 "01100100" // RET lr; MOV r0, #64 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 7457 "00000001" // /* MW 5 */ + 7458 "00100001" // /* MW 4 */ + 7459 "00000000" // /* MW 3 */ + 7460 "00000000" // /* MW 2 */ + 7461 "00000101" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 141 +.delay_slot + 7462 "11111000" // MOV r1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7463 "11000000" // /* MW 3 */ + 7464 "01010000" // /* MW 2 */ + 7465 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 141 +.delay_slot + 7466 "00011000" // ADD.NC p0, r1, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7467 "10010000" // /* MW 3 */ + 7468 "01100000" // /* MW 2 */ + 7469 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 142 23 first +.delay_slot + 7470 "10011000" // ST r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7471 "00010001" // /* MW 3 */ + 7472 "00000100" // /* MW 2 */ + 7473 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 142 23 +.delay_slot + 7474 "10011000" // ST r0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7475 "00010001" // /* MW 3 */ + 7476 "00010100" // /* MW 2 */ + 7477 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_end0 + 7479 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 3 "elementwise_binary.h" 130 first +.src_ref 3 "elementwise_binary.h" 133 24 first +.function_start + 7488 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7489 "00101110" // /* MW 3 */ + 7490 "00011100" // /* MW 2 */ + 7491 "00000001" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 130 + 7492 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7493 "00000001" // /* MW 5 */ + 7494 "00000000" // /* MW 4 */ + 7495 "00000000" // /* MW 3 */ + 7496 "00001000" // /* MW 2 */ + 7497 "00000000" // /* MW 1 */ + 7498 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7499 "00111101" // /* MW 3 */ + 7500 "11111000" // /* MW 2 */ + 7501 "00001111" // /* MW 1 */ + 7502 "10011000" // ST r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7503 "11110101" // /* MW 3 */ + 7504 "11111101" // /* MW 2 */ + 7505 "00001111" // /* MW 1 */ + 7506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7507 "00000000" // /* MW 1 */ + 7508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7509 "00000000" // /* MW 1 */ + 7510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7511 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 133 22 first + 7512 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7513 "00101001" // /* MW 3 */ + 7514 "00011100" // /* MW 2 */ + 7515 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 134 24 first + 7516 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7517 "00101110" // /* MW 3 */ + 7518 "00011100" // /* MW 2 */ + 7519 "00000001" // /* MW 1 */ + 7520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7521 "00000000" // /* MW 1 */ + 7522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7523 "00000000" // /* MW 1 */ + 7524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7525 "00000000" // /* MW 1 */ + 7526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7527 "00000000" // /* MW 1 */ + 7528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7529 "00000000" // /* MW 1 */ + 7530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7531 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 134 22 + 7532 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7533 "00101001" // /* MW 3 */ + 7534 "00011100" // /* MW 2 */ + 7535 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 135 24 first + 7536 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7537 "00101110" // /* MW 3 */ + 7538 "00000100" // /* MW 2 */ + 7539 "00000001" // /* MW 1 */ + 7540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7541 "00000000" // /* MW 1 */ + 7542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7543 "00000000" // /* MW 1 */ + 7544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7545 "00000000" // /* MW 1 */ + 7546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7547 "00000000" // /* MW 1 */ + 7548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7549 "00000000" // /* MW 1 */ + 7550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7551 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 135 22 + 7552 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7553 "00101001" // /* MW 3 */ + 7554 "00011100" // /* MW 2 */ + 7555 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 136 24 first + 7556 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7557 "00101110" // /* MW 3 */ + 7558 "00010100" // /* MW 2 */ + 7559 "00000001" // /* MW 1 */ + 7560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7561 "00000000" // /* MW 1 */ + 7562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7563 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 137 8 first +.no_stack_arguments + 7564 "00000100" // JL #7456 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7456 delay_slots=5 */ + 7565 "00000001" // /* MW 5 */ + 7566 "00000000" // /* MW 4 */ + 7567 "10010000" // /* MW 3 */ + 7568 "00001110" // /* MW 2 */ + 7569 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7571 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7572 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7573 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7575 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 136 22 first +.delay_slot + 7576 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7577 "00101001" // /* MW 3 */ + 7578 "11011100" // /* MW 2 */ + 7579 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 137 8 +.delay_slot + 7580 "11111000" // MOV r15, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7581 "11000000" // /* MW 3 */ + 7582 "11010000" // /* MW 2 */ + 7583 "00011011" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 137 8 +.src_ref 3 "elementwise_binary.h" 139 4 +.src_ref 8 "add_impl.h" 146 29 +.return_address + 7584 "10111010" // LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7585 "00001000" // /* MW 9 */ + 7586 "11000100" // /* MW 8 */ + 7587 "00110011" // /* MW 7 */ + 7588 "01101000" // /* MW 6 */ + 7589 "00000000" // /* MW 5 */ + 7590 "00000001" // /* MW 4 */ + 7591 "00100000" // /* MW 3 */ + 7592 "00000111" // /* MW 2 */ + 7593 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 146 29 +.src_ref 8 "add_impl.h" 147 37 +.src_ref 8 "add_impl.h" 147 39 + 7594 "10111010" // MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7595 "01011000" // /* MW 9 */ + 7596 "11111101" // /* MW 8 */ + 7597 "00000111" // /* MW 7 */ + 7598 "00001000" // /* MW 6 */ + 7599 "10000000" // /* MW 5 */ + 7600 "00000001" // /* MW 4 */ + 7601 "10000000" // /* MW 3 */ + 7602 "11100010" // /* MW 2 */ + 7603 "00000001" // /* MW 1 */ +.src_ref 8 "add_impl.h" 146 29 first +.src_ref 8 "add_impl.h" 147 39 + 7604 "01111010" // LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7605 "00000001" // /* MW 9 */ + 7606 "10100000" // /* MW 8 */ + 7607 "00000111" // /* MW 7 */ + 7608 "10000000" // /* MW 6 */ + 7609 "00010001" // /* MW 5 */ + 7610 "00001010" // /* MW 4 */ + 7611 "00100000" // /* MW 3 */ + 7612 "10111110" // /* MW 2 */ + 7613 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 50 first + 7614 "10011000" // LDA.u8 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7615 "01001010" // /* MW 3 */ + 7616 "00000110" // /* MW 2 */ + 7617 "00000000" // /* MW 1 */ + 7618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7619 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7621 "00000000" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 37 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7622 "00011000" // ST.s16 r16, [p0, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7623 "00010111" // /* MW 3 */ + 7624 "00000010" // /* MW 2 */ + 7625 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7626 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 7627 "00000000" // /* MW 3 */ + 7628 "00101000" // /* MW 2 */ + 7629 "00010000" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 54 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7630 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7631 "00000101" // /* MW 3 */ + 7632 "00100010" // /* MW 2 */ + 7633 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7634 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7635 "00000001" // /* MW 5 */ + 7636 "00000000" // /* MW 4 */ + 7637 "00000000" // /* MW 3 */ + 7638 "11111000" // /* MW 2 */ + 7639 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 54 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7640 "10011000" // EQ r27, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7641 "00100111" // /* MW 3 */ + 7642 "01110111" // /* MW 2 */ + 7643 "00010100" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 39 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7644 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7645 "10000010" // /* MW 3 */ + 7646 "00100001" // /* MW 2 */ + 7647 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 7649 "00000000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.src_ref 3 "elementwise_binary_broadcasting.h" 81 +.src_ref 3 "elementwise_binary_broadcasting.h" 81 first +.src_ref 3 "elementwise_binary_broadcasting.h" 82 25 +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 +.src_ref 3 "elementwise_binary_broadcasting.h" 83 25 +.function_start + 7664 "10111010" // MOVA m0, #20; MOVX r1, #6; MOV r0, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7665 "01111000" // /* MW 9 */ + 7666 "01100000" // /* MW 8 */ + 7667 "00001000" // /* MW 7 */ + 7668 "11001000" // /* MW 6 */ + 7669 "00010000" // /* MW 5 */ + 7670 "00000000" // /* MW 4 */ + 7671 "10000000" // /* MW 3 */ + 7672 "10000000" // /* MW 2 */ + 7673 "00000010" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 81 +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 + 7674 "00100100" // MOVX r0, #1; ADD.NC p0, r0, #12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7675 "00001100" // /* MW 5 */ + 7676 "11000000" // /* MW 4 */ + 7677 "10100000" // /* MW 3 */ + 7678 "00000000" // /* MW 2 */ + 7679 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first + 7680 "10011000" // LDA.u8 r2, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7681 "01001010" // /* MW 3 */ + 7682 "00001000" // /* MW 2 */ + 7683 "00000000" // /* MW 1 */ + 7684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7685 "00000000" // /* MW 1 */ + 7686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7687 "00000000" // /* MW 1 */ + 7688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7689 "00000000" // /* MW 1 */ + 7690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7691 "00000000" // /* MW 1 */ + 7692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7693 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 84 4 first + 7694 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 7695 "00000000" // /* MW 3 */ + 7696 "00101000" // /* MW 2 */ + 7697 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first +.delay_slot + 7698 "10011000" // NE r0, r2, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7699 "00001000" // /* MW 3 */ + 7700 "10000000" // /* MW 2 */ + 7701 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 83 25 first +.delay_slot + 7702 "10011000" // LSHL r0, r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7703 "00011101" // /* MW 3 */ + 7704 "00000000" // /* MW 2 */ + 7705 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first +.src_ref 3 "elementwise_binary_broadcasting.h" 83 23 +.delay_slot + 7706 "01011100" // ST r0, [p0, #4]; NEZ r3, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7707 "11100000" // /* MW 5 */ + 7708 "00001101" // /* MW 4 */ + 7709 "00110001" // /* MW 3 */ + 7710 "10000010" // /* MW 2 */ + 7711 "00000010" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 25 +.delay_slot + 7712 "10011000" // LSHL r2, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7713 "00011101" // /* MW 3 */ + 7714 "11000100" // /* MW 2 */ + 7715 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 23 +.delay_slot + 7716 "10011000" // ST r2, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7717 "01010001" // /* MW 3 */ + 7718 "00000100" // /* MW 2 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_end0 + 7719 "00001000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 3 "elementwise_binary_broadcasting.h" 76 +.src_ref 3 "elementwise_binary_broadcasting.h" 76 first +.function_start + 7728 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7729 "00000001" // /* MW 5 */ + 7730 "00000000" // /* MW 4 */ + 7731 "00000000" // /* MW 3 */ + 7732 "00001000" // /* MW 2 */ + 7733 "00000000" // /* MW 1 */ + 7734 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7735 "00111101" // /* MW 3 */ + 7736 "11111100" // /* MW 2 */ + 7737 "00001111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 77 8 first +.no_stack_arguments + 7738 "00000100" // JL #7488 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7488 delay_slots=5 */ + 7739 "00000001" // /* MW 5 */ + 7740 "00000000" // /* MW 4 */ + 7741 "10100000" // /* MW 3 */ + 7742 "00001110" // /* MW 2 */ + 7743 "00000000" // /* MW 1 */ +.delay_slot + 7744 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7745 "10011101" // /* MW 3 */ + 7746 "11111011" // /* MW 2 */ + 7747 "00001111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 78 8 +.delay_slot + 7748 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7749 "11000000" // /* MW 3 */ + 7750 "01100000" // /* MW 2 */ + 7751 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7753 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7755 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7756 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7757 "01100111" // /* MW 3 */ + 7758 "00000001" // /* MW 2 */ + 7759 "00000000" // /* MW 1 */ +.return_address +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7760 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7761 "10011001" // /* MW 3 */ + 7762 "11111011" // /* MW 2 */ + 7763 "00000111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 78 8 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7764 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7765 "00111001" // /* MW 3 */ + 7766 "11111100" // /* MW 2 */ + 7767 "00000111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 78 8 first +.tail_call +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7768 "10000100" // J #7664 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=7664 delay_slots=5 */ + 7769 "00000000" // /* MW 5 */ + 7770 "00000000" // /* MW 4 */ + 7771 "11111000" // /* MW 3 */ + 7772 "00001110" // /* MW 2 */ + 7773 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 78 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7774 "11111000" // MOV p0, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7775 "11000000" // /* MW 3 */ + 7776 "01101110" // /* MW 2 */ + 7777 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 79 4 first +.delay_slot + 7778 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7779 "00000001" // /* MW 5 */ + 7780 "00000000" // /* MW 4 */ + 7781 "00000000" // /* MW 3 */ + 7782 "11111000" // /* MW 2 */ + 7783 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7785 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7787 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 7789 "00000000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.src_ref 3 "elementwise_binary_broadcasting.h" 89 first +.src_ref 3 "elementwise_binary_broadcasting.h" 96 37 first +.src_ref 3 "elementwise_binary_broadcasting.h" 102 19 +.function_start + 7792 "01010100" // LDA r0, [p3], #12; MOV m0, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7793 "01010001" // /* MW 5 */ + 7794 "00000000" // /* MW 4 */ + 7795 "11010000" // /* MW 3 */ + 7796 "10000010" // /* MW 2 */ + 7797 "01100111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 102 19 first +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 7798 "11010100" // LDA.u8 r1, [p3], m0; MOV p4, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7799 "10000001" // /* MW 5 */ + 7800 "11001101" // /* MW 4 */ + 7801 "01011000" // /* MW 3 */ + 7802 "00000101" // /* MW 2 */ + 7803 "01100001" // /* MW 1 */ + 7804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7805 "00000000" // /* MW 1 */ + 7806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7807 "00000000" // /* MW 1 */ + 7808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7809 "00000000" // /* MW 1 */ + 7810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7811 "00000000" // /* MW 1 */ + 7812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7813 "00000000" // /* MW 1 */ + 7814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7815 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 102 12 +.src_ref 3 "elementwise_binary_broadcasting.h" 102 35 + 7816 "10000100" // JNZ r1, #7872 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7872 delay_slots=5 */ + 7817 "00000001" // /* MW 5 */ + 7818 "01000000" // /* MW 4 */ + 7819 "01100000" // /* MW 3 */ + 7820 "00001111" // /* MW 2 */ + 7821 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 96 78 +.delay_slot + 7822 "00011000" // MOVX r2, #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7823 "11101001" // /* MW 3 */ + 7824 "11000100" // /* MW 2 */ + 7825 "00010111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 96 78 first +.delay_slot + 7826 "10011000" // LSHL r0, r0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7827 "00101101" // /* MW 3 */ + 7828 "00000000" // /* MW 2 */ + 7829 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7831 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7833 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7835 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 103 28 first + 7836 "10011000" // LDA.s16 r1, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7837 "00110010" // /* MW 3 */ + 7838 "00000100" // /* MW 2 */ + 7839 "00000000" // /* MW 1 */ + 7840 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7841 "00000000" // /* MW 1 */ + 7842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7843 "00000000" // /* MW 1 */ + 7844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7845 "00000000" // /* MW 1 */ + 7846 "10000100" // J #7904 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=7904 delay_slots=5 */ + 7847 "00000000" // /* MW 5 */ + 7848 "00000000" // /* MW 4 */ + 7849 "01110000" // /* MW 3 */ + 7850 "00001111" // /* MW 2 */ + 7851 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7853 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7854 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7855 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first +.delay_slot + 7856 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7857 "01110010" // /* MW 3 */ + 7858 "00000101" // /* MW 2 */ + 7859 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7861 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.delay_slot + 7862 "01111010" // NOPA; VST x0, [p0]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7863 "00000000" // /* MW 9 */ + 7864 "00000000" // /* MW 8 */ + 7865 "00000000" // /* MW 7 */ + 7866 "00000000" // /* MW 6 */ + 7867 "00010011" // /* MW 5 */ + 7868 "00000100" // /* MW 4 */ + 7869 "11110000" // /* MW 3 */ + 7870 "00101100" // /* MW 2 */ + 7871 "00000000" // /* MW 1 */ +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_80 +.src_ref 3 "elementwise_binary_broadcasting.h" 106 28 first + 7872 "10011000" // LDA.s16 r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7873 "00110010" // /* MW 3 */ + 7874 "00000100" // /* MW 2 */ + 7875 "00000001" // /* MW 1 */ + 7876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7877 "00000000" // /* MW 1 */ + 7878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7879 "00000000" // /* MW 1 */ + 7880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7881 "00000000" // /* MW 1 */ + 7882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7883 "00000000" // /* MW 1 */ + 7884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7885 "00000000" // /* MW 1 */ + 7886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7887 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first + 7888 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7889 "01110010" // /* MW 3 */ + 7890 "00000101" // /* MW 2 */ + 7891 "00011000" // /* MW 1 */ + 7892 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7893 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first + 7894 "01111010" // NOPA; VST x0, [p1]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7895 "00000000" // /* MW 9 */ + 7896 "00000000" // /* MW 8 */ + 7897 "00000000" // /* MW 7 */ + 7898 "00000000" // /* MW 6 */ + 7899 "00010011" // /* MW 5 */ + 7900 "00000100" // /* MW 4 */ + 7901 "11110001" // /* MW 3 */ + 7902 "00101100" // /* MW 2 */ + 7903 "00000000" // /* MW 1 */ +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_112 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 first +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 first + 7904 "10111010" // LDA m0, [p4, #20]; MOVX r0, #60; ADD.NC lc, r0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7905 "01001000" // /* MW 9 */ + 7906 "00111111" // /* MW 8 */ + 7907 "10111000" // /* MW 7 */ + 7908 "10001010" // /* MW 6 */ + 7909 "00000111" // /* MW 5 */ + 7910 "00000000" // /* MW 4 */ + 7911 "11010000" // /* MW 3 */ + 7912 "10000000" // /* MW 2 */ + 7913 "10001010" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 7914 "10111010" // LDA m1, [p3, #4]; MOVXM ls, #8016 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7915 "00010000" // /* MW 9 */ + 7916 "10101000" // /* MW 8 */ + 7917 "01111111" // /* MW 7 */ + 7918 "00000100" // /* MW 6 */ + 7919 "00000000" // /* MW 5 */ + 7920 "00000000" // /* MW 4 */ + 7921 "11010000" // /* MW 3 */ + 7922 "10010000" // /* MW 2 */ + 7923 "01100010" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 7924 "01000100" // MOVXM le, #8048 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7925 "11100000" // /* MW 5 */ + 7926 "11111110" // /* MW 4 */ + 7927 "00010110" // /* MW 3 */ + 7928 "00000000" // /* MW 2 */ + 7929 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 7930 "01000100" // MOVXM p4, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7931 "11010000" // /* MW 5 */ + 7932 "11001000" // /* MW 4 */ + 7933 "11001000" // /* MW 3 */ + 7934 "00000111" // /* MW 2 */ + 7935 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 7936 "10011000" // LDA.s8 r1, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7937 "00100010" // /* MW 3 */ + 7938 "00000100" // /* MW 2 */ + 7939 "00000100" // /* MW 1 */ + 7940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7941 "00000000" // /* MW 1 */ + 7942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7943 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 187 20 first + 7944 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7945 "10101011" // /* MW 3 */ + 7946 "00001000" // /* MW 2 */ + 7947 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary.h" 189 20 first + 7948 "10011000" // VLDA.CONV.fp32.bf16 cml2, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7949 "00101011" // /* MW 3 */ + 7950 "00101001" // /* MW 2 */ + 7951 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 211 20 first + 7952 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7953 "00101011" // /* MW 3 */ + 7954 "00001000" // /* MW 2 */ + 7955 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7956 "10011000" // VLDA.CONV.fp32.bf16 cml4, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7957 "00101011" // /* MW 3 */ + 7958 "00101010" // /* MW 2 */ + 7959 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 195 20 +.src_ref 3 "elementwise_binary.h" 218 20 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7960 "00101100" // VLDA.CONV.fp32.bf16 cml1, [p0], m0; MOVX crRnd, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7961 "00000000" // /* MW 5 */ + 7962 "11110101" // /* MW 4 */ + 7963 "01110000" // /* MW 3 */ + 7964 "00010101" // /* MW 2 */ + 7965 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7966 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7967 "00111101" // /* MW 7 */ + 7968 "00101000" // /* MW 6 */ + 7969 "00000011" // /* MW 5 */ + 7970 "00000100" // /* MW 4 */ + 7971 "01110000" // /* MW 3 */ + 7972 "00100101" // /* MW 2 */ + 7973 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7974 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7975 "00101011" // /* MW 3 */ + 7976 "00001000" // /* MW 2 */ + 7977 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7978 "01100010" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7979 "00111101" // /* MW 7 */ + 7980 "00010000" // /* MW 6 */ + 7981 "00000100" // /* MW 5 */ + 7982 "00000100" // /* MW 4 */ + 7983 "01110000" // /* MW 3 */ + 7984 "01000101" // /* MW 2 */ + 7985 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 187 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7986 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7987 "10101011" // /* MW 3 */ + 7988 "00001000" // /* MW 2 */ + 7989 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7990 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7991 "00111101" // /* MW 7 */ + 7992 "00101000" // /* MW 6 */ + 7993 "00000011" // /* MW 5 */ + 7994 "00000100" // /* MW 4 */ + 7995 "01110000" // /* MW 3 */ + 7996 "00100101" // /* MW 2 */ + 7997 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7998 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7999 "00101011" // /* MW 3 */ + 8000 "00001000" // /* MW 2 */ + 8001 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8002 "01101110" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VST.CONV.bf16.fp32 cml3, [p2], #64; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 8003 "00111101" // /* MW 13 */ + 8004 "00010000" // /* MW 12 */ + 8005 "00000100" // /* MW 11 */ + 8006 "01010111" // /* MW 10 */ + 8007 "00011010" // /* MW 9 */ + 8008 "01000000" // /* MW 8 */ + 8009 "00000000" // /* MW 7 */ + 8010 "00000000" // /* MW 6 */ + 8011 "01000110" // /* MW 5 */ + 8012 "00111011" // /* MW 4 */ + 8013 "01110100" // /* MW 3 */ + 8014 "01000101" // /* MW 2 */ + 8015 "00100101" // /* MW 1 */ +.label ZLS_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_224 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 187 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 8016 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8017 "10101011" // /* MW 3 */ + 8018 "00001000" // /* MW 2 */ + 8019 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8020 "01100110" // VLDA.CONV.fp32.bf16 cml2, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8021 "00111101" // /* MW 11 */ + 8022 "00101000" // /* MW 10 */ + 8023 "00000011" // /* MW 9 */ + 8024 "10001110" // /* MW 8 */ + 8025 "00010001" // /* MW 7 */ + 8026 "00001111" // /* MW 6 */ + 8027 "00100001" // /* MW 5 */ + 8028 "00000000" // /* MW 4 */ + 8029 "01110000" // /* MW 3 */ + 8030 "00100101" // /* MW 2 */ + 8031 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8032 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8033 "00000000" // /* MW 15 */ + 8034 "00000000" // /* MW 14 */ + 8035 "01111000" // /* MW 13 */ + 8036 "10100101" // /* MW 12 */ + 8037 "00000001" // /* MW 11 */ + 8038 "00000000" // /* MW 10 */ + 8039 "00000000" // /* MW 9 */ + 8040 "00000000" // /* MW 8 */ + 8041 "01011011" // /* MW 7 */ + 8042 "00000001" // /* MW 6 */ + 8043 "00100000" // /* MW 5 */ + 8044 "00000000" // /* MW 4 */ + 8045 "01110000" // /* MW 3 */ + 8046 "00000101" // /* MW 2 */ + 8047 "00000001" // /* MW 1 */ +.label ZLE_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_256 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8048 "11101011" // VLDA.CONV.fp32.bf16 cml4, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml3, [p2], #64;NOPX; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8049 "10000001" // /* MW 15 */ + 8050 "00100000" // /* MW 14 */ + 8051 "01111000" // /* MW 13 */ + 8052 "10100101" // /* MW 12 */ + 8053 "00000001" // /* MW 11 */ + 8054 "00000000" // /* MW 10 */ + 8055 "00000000" // /* MW 9 */ + 8056 "00000000" // /* MW 8 */ + 8057 "10100011" // /* MW 7 */ + 8058 "00011101" // /* MW 6 */ + 8059 "00100010" // /* MW 5 */ + 8060 "00000000" // /* MW 4 */ + 8061 "01110000" // /* MW 3 */ + 8062 "01000101" // /* MW 2 */ + 8063 "00100101" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 8064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8065 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8066 "01100010" // VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8067 "00111101" // /* MW 7 */ + 8068 "00101000" // /* MW 6 */ + 8069 "00000011" // /* MW 5 */ + 8070 "00000010" // /* MW 4 */ + 8071 "01100000" // /* MW 3 */ + 8072 "11000100" // /* MW 2 */ + 8073 "01000011" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8075 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8076 "01100010" // VST.CONV.bf16.fp32 cml3, [p2], #64; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8077 "00111101" // /* MW 7 */ + 8078 "00010000" // /* MW 6 */ + 8079 "00000100" // /* MW 5 */ + 8080 "00000010" // /* MW 4 */ + 8081 "01100000" // /* MW 3 */ + 8082 "10110100" // /* MW 2 */ + 8083 "01000011" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8085 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 195 20 first +.src_ref 3 "elementwise_binary_broadcasting.h" 121 4 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8086 "01011100" // VST.CONV.bf16.fp32 cml4, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 8087 "00000000" // /* MW 5 */ + 8088 "01010000" // /* MW 4 */ + 8089 "01100000" // /* MW 3 */ + 8090 "11000100" // /* MW 2 */ + 8091 "01000011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8093 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.delay_slot + 8094 "00011000" // VST.CONV.bf16.fp32 cml3, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8095 "10100011" // /* MW 3 */ + 8096 "00011101" // /* MW 2 */ + 8097 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8099 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 195 20 first +.delay_slot + 8100 "00011000" // VST.CONV.bf16.fp32 cml4, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8101 "00100011" // /* MW 3 */ + 8102 "00011110" // /* MW 2 */ + 8103 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 8105 "00000000" // /* MW 1 */ +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 82 +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 82 first +.function_start + 8112 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8113 "00000001" // /* MW 5 */ + 8114 "00000000" // /* MW 4 */ + 8115 "00000000" // /* MW 3 */ + 8116 "00010000" // /* MW 2 */ + 8117 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 90 24 + 8118 "00000010" // ST lr, [sp, #-4]; MOV r16, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8119 "01110000" // /* MW 7 */ + 8120 "01100000" // /* MW 6 */ + 8121 "00001010" // /* MW 5 */ + 8122 "00000010" // /* MW 4 */ + 8123 "10110000" // /* MW 3 */ + 8124 "10000111" // /* MW 2 */ + 8125 "11111111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 90 24 first +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8 + 8126 "00000010" // MOVS p2, p1; ADD.NC p3, r16, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8127 "00000000" // /* MW 7 */ + 8128 "00000011" // /* MW 6 */ + 8129 "10110100" // /* MW 5 */ + 8130 "00000001" // /* MW 4 */ + 8131 "01100000" // /* MW 3 */ + 8132 "10010001" // /* MW 2 */ + 8133 "01010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 19 first +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35 + 8134 "11010100" // LDA.u8 r27, [p3], #2; MOV r16, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8135 "10000001" // /* MW 5 */ + 8136 "00100001" // /* MW 4 */ + 8137 "01011000" // /* MW 3 */ + 8138 "11101101" // /* MW 2 */ + 8139 "01100101" // /* MW 1 */ + 8140 "11010100" // LDA.s16 r18, [p3], #-14; MOV r17, sp /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8141 "11000001" // /* MW 5 */ + 8142 "10101011" // /* MW 4 */ + 8143 "01011000" // /* MW 3 */ + 8144 "11001010" // /* MW 2 */ + 8145 "01110011" // /* MW 1 */ + 8146 "00011000" // ADD.NC p0, r17, #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8147 "11000000" // /* MW 3 */ + 8148 "01101000" // /* MW 2 */ + 8149 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 538 13 first +.src_ref 4 "vector_native_types.hpp" 374 137 first + 8150 "00011000" // VST sfh, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8151 "00101011" // /* MW 3 */ + 8152 "00000111" // /* MW 2 */ + 8153 "00001000" // /* MW 1 */ + 8154 "00011000" // ST.s16 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8155 "01010111" // /* MW 3 */ + 8156 "00000110" // /* MW 2 */ + 8157 "00000000" // /* MW 1 */ + 8158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8159 "00000000" // /* MW 1 */ + 8160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8161 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8 first +.no_stack_arguments + 8162 "00000100" // JL #7792 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7792 delay_slots=5 */ + 8163 "00000001" // /* MW 5 */ + 8164 "00000000" // /* MW 4 */ + 8165 "00111000" // /* MW 3 */ + 8166 "00001111" // /* MW 2 */ + 8167 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35 +.delay_slot + 8168 "11111000" // MOV r17, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8169 "11000000" // /* MW 3 */ + 8170 "01010000" // /* MW 2 */ + 8171 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8173 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35 first +.delay_slot + 8174 "00011000" // SEL.EQZ r18, r16, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8175 "00010010" // /* MW 3 */ + 8176 "00100101" // /* MW 2 */ + 8177 "00010100" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35 +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8 +.delay_slot + 8178 "11100100" // SEL.EQZ r16, r17, r16, r27; MOV p1, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8179 "01000001" // /* MW 5 */ + 8180 "11010010" // /* MW 4 */ + 8181 "01000010" // /* MW 3 */ + 8182 "00100000" // /* MW 2 */ + 8183 "10001100" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8 +.delay_slot + 8184 "00000010" // NOPS; MOV p0, r16 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8185 "01110000" // /* MW 7 */ + 8186 "00010000" // /* MW 6 */ + 8187 "00110100" // /* MW 5 */ + 8188 "00000000" // /* MW 4 */ + 8189 "01100000" // /* MW 3 */ + 8190 "00101011" // /* MW 2 */ + 8191 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4 +.return_address + 8192 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8193 "00111001" // /* MW 3 */ + 8194 "11111100" // /* MW 2 */ + 8195 "00000111" // /* MW 1 */ + 8196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8197 "00000000" // /* MW 1 */ + 8198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8199 "00000000" // /* MW 1 */ + 8200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8201 "00000000" // /* MW 1 */ + 8202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8203 "00000000" // /* MW 1 */ + 8204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8205 "00000000" // /* MW 1 */ + 8206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8207 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4 first + 8208 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 8209 "00000000" // /* MW 3 */ + 8210 "00101000" // /* MW 2 */ + 8211 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4 +.delay_slot + 8212 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8213 "00000001" // /* MW 5 */ + 8214 "00000000" // /* MW 4 */ + 8215 "00000000" // /* MW 3 */ + 8216 "11110000" // /* MW 2 */ + 8217 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8219 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8221 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8223 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 8225 "00000000" // /* MW 1 */ +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_add1d_attribute_broadcasting _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 7 "superkernels.cpp" 147 first +.src_ref 7 "superkernels.cpp" 152 6 +.function_start + 8240 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8241 "10000000" // /* MW 5 */ + 8242 "11001000" // /* MW 4 */ + 8243 "11000110" // /* MW 3 */ + 8244 "00000111" // /* MW 2 */ + 8245 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 152 6 first + 8246 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8247 "11000001" // /* MW 5 */ + 8248 "10110101" // /* MW 4 */ + 8249 "11011000" // /* MW 3 */ + 8250 "11000010" // /* MW 2 */ + 8251 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 147 + 8252 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8253 "00000001" // /* MW 5 */ + 8254 "00000000" // /* MW 4 */ + 8255 "00000000" // /* MW 3 */ + 8256 "00001000" // /* MW 2 */ + 8257 "00000000" // /* MW 1 */ + 8258 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8259 "01110000" // /* MW 7 */ + 8260 "11010000" // /* MW 6 */ + 8261 "00001011" // /* MW 5 */ + 8262 "00000000" // /* MW 4 */ + 8263 "10110000" // /* MW 3 */ + 8264 "01100011" // /* MW 2 */ + 8265 "11111111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 149 11 + 8266 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8267 "00010001" // /* MW 9 */ + 8268 "00101000" // /* MW 8 */ + 8269 "00110010" // /* MW 7 */ + 8270 "11110011" // /* MW 6 */ + 8271 "00000001" // /* MW 5 */ + 8272 "00000000" // /* MW 4 */ + 8273 "10110000" // /* MW 3 */ + 8274 "10000010" // /* MW 2 */ + 8275 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 8276 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8277 "11000000" // /* MW 3 */ + 8278 "11010100" // /* MW 2 */ + 8279 "00011011" // /* MW 1 */ + 8280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8281 "00000000" // /* MW 1 */ + 8282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8283 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 152 6 +.src_ref 7 "superkernels.cpp" 152 16 + 8284 "10000100" // JNZ r16, #8448 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8448 delay_slots=5 */ + 8285 "00000001" // /* MW 5 */ + 8286 "01000000" // /* MW 4 */ + 8287 "10000000" // /* MW 3 */ + 8288 "00010000" // /* MW 2 */ + 8289 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 149 22 first +.delay_slot + 8290 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8291 "10010000" // /* MW 3 */ + 8292 "01100010" // /* MW 2 */ + 8293 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 149 30 +.delay_slot + 8294 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8295 "11111011" // /* MW 3 */ + 8296 "01100011" // /* MW 2 */ + 8297 "00010100" // /* MW 1 */ +.delay_slot + 8298 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8299 "00111101" // /* MW 3 */ + 8300 "11110100" // /* MW 2 */ + 8301 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 149 11 +.src_ref 1 "io_buffer_main.h" 125 25 +.delay_slot + 8302 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8303 "01110000" // /* MW 7 */ + 8304 "01100000" // /* MW 6 */ + 8305 "00110000" // /* MW 5 */ + 8306 "00000011" // /* MW 4 */ + 8307 "00110000" // /* MW 3 */ + 8308 "11000110" // /* MW 2 */ + 8309 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 155 4 +.src_ref 7 "superkernels.cpp" 166 2 +.delay_slot + 8310 "01000100" // MOVXM p0, #509120 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8311 "10000000" // /* MW 5 */ + 8312 "11001001" // /* MW 4 */ + 8313 "11000000" // /* MW 3 */ + 8314 "00000111" // /* MW 2 */ + 8315 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8316 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8317 "11010000" // /* MW 5 */ + 8318 "11001000" // /* MW 4 */ + 8319 "11000100" // /* MW 3 */ + 8320 "00000111" // /* MW 2 */ + 8321 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8322 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8323 "00010000" // /* MW 9 */ + 8324 "00110010" // /* MW 8 */ + 8325 "00110010" // /* MW 7 */ + 8326 "11110001" // /* MW 6 */ + 8327 "00000001" // /* MW 5 */ + 8328 "00000000" // /* MW 4 */ + 8329 "11100000" // /* MW 3 */ + 8330 "11000000" // /* MW 2 */ + 8331 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8333 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 155 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 8334 "00000100" // JL #7728 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7728 delay_slots=5 */ + 8335 "00000001" // /* MW 5 */ + 8336 "00000000" // /* MW 4 */ + 8337 "00011000" // /* MW 3 */ + 8338 "00001111" // /* MW 2 */ + 8339 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8341 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8343 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8344 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8345 "00110001" // /* MW 3 */ + 8346 "00100000" // /* MW 2 */ + 8347 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 8348 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8349 "00000101" // /* MW 3 */ + 8350 "00100000" // /* MW 2 */ + 8351 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 8352 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8353 "00000000" // /* MW 15 */ + 8354 "00000000" // /* MW 14 */ + 8355 "01111000" // /* MW 13 */ + 8356 "10100101" // /* MW 12 */ + 8357 "00000001" // /* MW 11 */ + 8358 "00000000" // /* MW 10 */ + 8359 "00000000" // /* MW 9 */ + 8360 "10000000" // /* MW 8 */ + 8361 "00010001" // /* MW 7 */ + 8362 "00000110" // /* MW 6 */ + 8363 "00100010" // /* MW 5 */ + 8364 "00000000" // /* MW 4 */ + 8365 "11110000" // /* MW 3 */ + 8366 "00101100" // /* MW 2 */ + 8367 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 159 18 +.return_address + 8368 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8369 "10100000" // /* MW 5 */ + 8370 "11001000" // /* MW 4 */ + 8371 "11000100" // /* MW 3 */ + 8372 "00000111" // /* MW 2 */ + 8373 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 159 18 first +.src_ref 7 "superkernels.cpp" 159 65 + 8374 "10111010" // LDA r16, [p2]; MOVXM p2, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8375 "00010000" // /* MW 9 */ + 8376 "01100000" // /* MW 8 */ + 8377 "00110010" // /* MW 7 */ + 8378 "11110001" // /* MW 6 */ + 8379 "00000001" // /* MW 5 */ + 8380 "00000000" // /* MW 4 */ + 8381 "11010000" // /* MW 3 */ + 8382 "11000010" // /* MW 2 */ + 8383 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 157 51 +.src_ref 7 "superkernels.cpp" 159 65 +.src_ref 7 "superkernels.cpp" 166 2 + 8384 "10111010" // LDA r17, [p2]; MOVXM p2, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8385 "00010000" // /* MW 9 */ + 8386 "01100000" // /* MW 8 */ + 8387 "00110010" // /* MW 7 */ + 8388 "11110001" // /* MW 6 */ + 8389 "00000001" // /* MW 5 */ + 8390 "00000000" // /* MW 4 */ + 8391 "11010000" // /* MW 3 */ + 8392 "11000110" // /* MW 2 */ + 8393 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 157 51 first +.src_ref 7 "superkernels.cpp" 159 16 +.src_ref 7 "superkernels.cpp" 164 47 + 8394 "10111010" // LDA.u16 r18, [p2, #10]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8395 "00010000" // /* MW 9 */ + 8396 "00101010" // /* MW 8 */ + 8397 "10110010" // /* MW 7 */ + 8398 "11110000" // /* MW 6 */ + 8399 "00000001" // /* MW 5 */ + 8400 "00000000" // /* MW 4 */ + 8401 "01010000" // /* MW 3 */ + 8402 "11001011" // /* MW 2 */ + 8403 "01001010" // /* MW 1 */ + 8404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8405 "00000000" // /* MW 1 */ + 8406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8407 "00000000" // /* MW 1 */ + 8408 "10000100" // J #8464 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8464 delay_slots=5 */ + 8409 "00000000" // /* MW 5 */ + 8410 "00000000" // /* MW 4 */ + 8411 "10001000" // /* MW 3 */ + 8412 "00010000" // /* MW 2 */ + 8413 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 157 13 +.delay_slot + 8414 "01000100" // MOVXM p0, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8415 "11000000" // /* MW 5 */ + 8416 "11001000" // /* MW 4 */ + 8417 "11000000" // /* MW 3 */ + 8418 "00000111" // /* MW 2 */ + 8419 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8421 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 159 27 first +.delay_slot + 8422 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8423 "00001111" // /* MW 3 */ + 8424 "01100001" // /* MW 2 */ + 8425 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 157 13 first +.delay_slot + 8426 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8427 "10100011" // /* MW 5 */ + 8428 "00001100" // /* MW 4 */ + 8429 "11110000" // /* MW 3 */ + 8430 "00101100" // /* MW 2 */ + 8431 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 159 16 first +.delay_slot + 8432 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8433 "00000000" // /* MW 15 */ + 8434 "00000000" // /* MW 14 */ + 8435 "01111000" // /* MW 13 */ + 8436 "10100101" // /* MW 12 */ + 8437 "00000001" // /* MW 11 */ + 8438 "00000000" // /* MW 10 */ + 8439 "00000000" // /* MW 9 */ + 8440 "10000000" // /* MW 8 */ + 8441 "00010001" // /* MW 7 */ + 8442 "00000110" // /* MW 6 */ + 8443 "00100001" // /* MW 5 */ + 8444 "00000000" // /* MW 4 */ + 8445 "11110000" // /* MW 3 */ + 8446 "00101100" // /* MW 2 */ + 8447 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 7 "superkernels.cpp" 164 47 +.src_ref 7 "superkernels.cpp" 166 2 + 8448 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8449 "00000000" // /* MW 15 */ + 8450 "00000000" // /* MW 14 */ + 8451 "00010000" // /* MW 13 */ + 8452 "00101010" // /* MW 12 */ + 8453 "10110010" // /* MW 11 */ + 8454 "11110000" // /* MW 10 */ + 8455 "00000001" // /* MW 9 */ + 8456 "00000000" // /* MW 8 */ + 8457 "10001011" // /* MW 7 */ + 8458 "10000000" // /* MW 6 */ + 8459 "00100010" // /* MW 5 */ + 8460 "00000000" // /* MW 4 */ + 8461 "11110000" // /* MW 3 */ + 8462 "00101100" // /* MW 2 */ + 8463 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 218 49 first + 8464 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8465 "00000000" // /* MW 7 */ + 8466 "11000011" // /* MW 6 */ + 8467 "10110011" // /* MW 5 */ + 8468 "00000011" // /* MW 4 */ + 8469 "01100000" // /* MW 3 */ + 8470 "10010001" // /* MW 2 */ + 8471 "01110011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 163 2 +.src_ref 1 "io_buffer_main.h" 218 49 + 8472 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8473 "00010000" // /* MW 9 */ + 8474 "00100000" // /* MW 8 */ + 8475 "00110010" // /* MW 7 */ + 8476 "11110000" // /* MW 6 */ + 8477 "00000001" // /* MW 5 */ + 8478 "00000000" // /* MW 4 */ + 8479 "11010000" // /* MW 3 */ + 8480 "11101110" // /* MW 2 */ + 8481 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 8482 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8483 "00010110" // /* MW 3 */ + 8484 "11111110" // /* MW 2 */ + 8485 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 8486 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8487 "00110110" // /* MW 3 */ + 8488 "11111110" // /* MW 2 */ + 8489 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 8490 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8491 "01010110" // /* MW 3 */ + 8492 "01000110" // /* MW 2 */ + 8493 "00000111" // /* MW 1 */ + 8494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8495 "00000000" // /* MW 1 */ + 8496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8497 "00000000" // /* MW 1 */ + 8498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8499 "00000000" // /* MW 1 */ + 8500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8501 "00000000" // /* MW 1 */ + 8502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8503 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 8504 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8505 "00000010" // /* MW 3 */ + 8506 "01100001" // /* MW 2 */ + 8507 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 8508 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8509 "00010001" // /* MW 3 */ + 8510 "00000110" // /* MW 2 */ + 8511 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 + 8512 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8513 "11111101" // /* MW 3 */ + 8514 "11100000" // /* MW 2 */ + 8515 "00010111" // /* MW 1 */ + 8516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8517 "00000000" // /* MW 1 */ + 8518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8519 "00000000" // /* MW 1 */ + 8520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8521 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 8522 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8523 "00001000" // /* MW 3 */ + 8524 "10010011" // /* MW 2 */ + 8525 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 164 45 + 8526 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8527 "10000001" // /* MW 5 */ + 8528 "10101101" // /* MW 4 */ + 8529 "10100111" // /* MW 3 */ + 8530 "00000000" // /* MW 2 */ + 8531 "00000100" // /* MW 1 */ + 8532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8533 "00000000" // /* MW 1 */ + 8534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8535 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 163 2 first + 8536 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8537 "00110110" // /* MW 3 */ + 8538 "00000110" // /* MW 2 */ + 8539 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 1 "io_buffer_main.h" 324 51 + 8540 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8541 "10000001" // /* MW 5 */ + 8542 "11011101" // /* MW 4 */ + 8543 "11011100" // /* MW 3 */ + 8544 "11001010" // /* MW 2 */ + 8545 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 164 47 first + 8546 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8547 "01110110" // /* MW 3 */ + 8548 "00000110" // /* MW 2 */ + 8549 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 8550 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8551 "10011110" // /* MW 3 */ + 8552 "01011100" // /* MW 2 */ + 8553 "00000111" // /* MW 1 */ + 8554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8555 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 166 2 first +.no_stack_arguments + 8556 "00000100" // JL #8112 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8112 delay_slots=5 */ + 8557 "00000001" // /* MW 5 */ + 8558 "00000000" // /* MW 4 */ + 8559 "11011000" // /* MW 3 */ + 8560 "00001111" // /* MW 2 */ + 8561 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8563 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 163 2 first +.delay_slot + 8564 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8565 "00000111" // /* MW 3 */ + 8566 "01100010" // /* MW 2 */ + 8567 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 163 2 +.delay_slot + 8568 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8569 "00110001" // /* MW 3 */ + 8570 "00000110" // /* MW 2 */ + 8571 "00001000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 164 45 first +.delay_slot + 8572 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8573 "00001101" // /* MW 3 */ + 8574 "11100001" // /* MW 2 */ + 8575 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 164 45 +.delay_slot + 8576 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8577 "00000000" // /* MW 15 */ + 8578 "00000000" // /* MW 14 */ + 8579 "10101000" // /* MW 13 */ + 8580 "10100000" // /* MW 12 */ + 8581 "00110100" // /* MW 11 */ + 8582 "00000000" // /* MW 10 */ + 8583 "00000000" // /* MW 9 */ + 8584 "00000000" // /* MW 8 */ + 8585 "01011011" // /* MW 7 */ + 8586 "00000001" // /* MW 6 */ + 8587 "00100000" // /* MW 5 */ + 8588 "00000000" // /* MW 4 */ + 8589 "11110000" // /* MW 3 */ + 8590 "00101100" // /* MW 2 */ + 8591 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 6 +.src_ref 7 "superkernels.cpp" 169 14 +.src_ref 1 "io_buffer_main.h" 324 51 first +.return_address + 8592 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8593 "00010000" // /* MW 9 */ + 8594 "00100000" // /* MW 8 */ + 8595 "00110010" // /* MW 7 */ + 8596 "11110011" // /* MW 6 */ + 8597 "00000001" // /* MW 5 */ + 8598 "00000000" // /* MW 4 */ + 8599 "11010000" // /* MW 3 */ + 8600 "11000110" // /* MW 2 */ + 8601 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 + 8602 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8603 "00000101" // /* MW 3 */ + 8604 "00100000" // /* MW 2 */ + 8605 "00010000" // /* MW 1 */ + 8606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8607 "00000000" // /* MW 1 */ + 8608 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8609 "00000000" // /* MW 1 */ + 8610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8611 "00000000" // /* MW 1 */ + 8612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8613 "00000000" // /* MW 1 */ + 8614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8615 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 8616 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8617 "00001000" // /* MW 3 */ + 8618 "01010001" // /* MW 2 */ + 8619 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 19 +.src_ref 1 "io_buffer_main.h" 327 40 first + 8620 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8621 "00010000" // /* MW 9 */ + 8622 "00110000" // /* MW 8 */ + 8623 "00110010" // /* MW 7 */ + 8624 "11110001" // /* MW 6 */ + 8625 "00000001" // /* MW 5 */ + 8626 "00000000" // /* MW 4 */ + 8627 "11010000" // /* MW 3 */ + 8628 "11001110" // /* MW 2 */ + 8629 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 6 first + 8630 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8631 "00110110" // /* MW 3 */ + 8632 "00000110" // /* MW 2 */ + 8633 "00000110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 19 + 8634 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8635 "01010110" // /* MW 3 */ + 8636 "00000110" // /* MW 2 */ + 8637 "00000010" // /* MW 1 */ + 8638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8639 "00000000" // /* MW 1 */ + 8640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8641 "00000000" // /* MW 1 */ + 8642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8643 "00000000" // /* MW 1 */ + 8644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8645 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 8646 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8647 "00110001" // /* MW 3 */ + 8648 "00100001" // /* MW 2 */ + 8649 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 8650 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8651 "00010001" // /* MW 3 */ + 8652 "11100110" // /* MW 2 */ + 8653 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 16 first + 8654 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8655 "00101000" // /* MW 3 */ + 8656 "01100001" // /* MW 2 */ + 8657 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 6 + 8658 "10000100" // JNZ r16, #8688 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8688 delay_slots=5 */ + 8659 "00000001" // /* MW 5 */ + 8660 "01000000" // /* MW 4 */ + 8661 "11111000" // /* MW 3 */ + 8662 "00010000" // /* MW 2 */ + 8663 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8665 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8667 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8669 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8671 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8673 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 169 14 + 8674 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8675 "00000001" // /* MW 3 */ + 8676 "00100000" // /* MW 2 */ + 8677 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 169 14 first + 8678 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8679 "00000000" // /* MW 9 */ + 8680 "00000000" // /* MW 8 */ + 8681 "00000000" // /* MW 7 */ + 8682 "10000000" // /* MW 6 */ + 8683 "00010001" // /* MW 5 */ + 8684 "00000110" // /* MW 4 */ + 8685 "11110110" // /* MW 3 */ + 8686 "00101100" // /* MW 2 */ + 8687 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 7 "superkernels.cpp" 171 + 8688 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8689 "00111001" // /* MW 3 */ + 8690 "11110100" // /* MW 2 */ + 8691 "00000111" // /* MW 1 */ + 8692 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8693 "00011001" // /* MW 3 */ + 8694 "11111011" // /* MW 2 */ + 8695 "00000111" // /* MW 1 */ + 8696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8697 "00000000" // /* MW 1 */ + 8698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8699 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 8700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8701 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 8702 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8703 "11110001" // /* MW 3 */ + 8704 "11111101" // /* MW 2 */ + 8705 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 8706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8707 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 171 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 8708 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 8709 "00000000" // /* MW 3 */ + 8710 "00101000" // /* MW 2 */ + 8711 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8712 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8713 "10100000" // /* MW 3 */ + 8714 "01100111" // /* MW 2 */ + 8715 "00011111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 171 +.delay_slot + 8716 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8717 "00000001" // /* MW 5 */ + 8718 "00000000" // /* MW 4 */ + 8719 "00000000" // /* MW 3 */ + 8720 "11111000" // /* MW 2 */ + 8721 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8723 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8725 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 8727 "00000000" // /* MW 1 */ +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.function setup _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.src_ref 3 "elementwise_unary.h" 124 first +.src_ref 3 "elementwise_unary.h" 126 24 first +.function_start + 8736 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8737 "00101110" // /* MW 3 */ + 8738 "00011100" // /* MW 2 */ + 8739 "00000001" // /* MW 1 */ + 8740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8741 "00000000" // /* MW 1 */ + 8742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8743 "00000000" // /* MW 1 */ + 8744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8745 "00000000" // /* MW 1 */ + 8746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8747 "00000000" // /* MW 1 */ + 8748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8749 "00000000" // /* MW 1 */ + 8750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8751 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 126 22 first + 8752 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8753 "00101001" // /* MW 3 */ + 8754 "00011100" // /* MW 2 */ + 8755 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 127 24 first + 8756 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8757 "00101110" // /* MW 3 */ + 8758 "00011100" // /* MW 2 */ + 8759 "00000001" // /* MW 1 */ + 8760 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8761 "00000000" // /* MW 1 */ + 8762 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8763 "00000000" // /* MW 1 */ + 8764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8765 "00000000" // /* MW 1 */ + 8766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8767 "00000000" // /* MW 1 */ + 8768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8769 "00000000" // /* MW 1 */ + 8770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8771 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 127 22 + 8772 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8773 "00101001" // /* MW 3 */ + 8774 "00011100" // /* MW 2 */ + 8775 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 128 24 first + 8776 "10011000" // LDA el0, [p1], #24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8777 "00101110" // /* MW 3 */ + 8778 "01101100" // /* MW 2 */ + 8779 "00000001" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 113 33 first + 8780 "10011000" // LDA.s16 r0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8781 "00010010" // /* MW 3 */ + 8782 "00000100" // /* MW 2 */ + 8783 "00000001" // /* MW 1 */ + 8784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8785 "00000000" // /* MW 1 */ + 8786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8787 "00000000" // /* MW 1 */ + 8788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8789 "00000000" // /* MW 1 */ + 8790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8791 "00000000" // /* MW 1 */ + 8792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8793 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 128 22 first + 8794 "10011000" // ST el0, [p0], #24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8795 "00101001" // /* MW 3 */ + 8796 "01101100" // /* MW 2 */ + 8797 "00001000" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 113 33 first + 8798 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8799 "00010111" // /* MW 3 */ + 8800 "00000100" // /* MW 2 */ + 8801 "00000000" // /* MW 1 */ + 8802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8803 "00000000" // /* MW 1 */ + 8804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8805 "00000000" // /* MW 1 */ + 8806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8807 "00000000" // /* MW 1 */ + 8808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8809 "00000000" // /* MW 1 */ + 8810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8811 "00000000" // /* MW 1 */ + 8812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8813 "00000000" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 114 33 first + 8814 "10011000" // LDA.s16 r0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8815 "00010010" // /* MW 3 */ + 8816 "00100100" // /* MW 2 */ + 8817 "00000001" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 114 33 + 8818 "00011000" // ST.s16 r0, [p0, #2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8819 "00010111" // /* MW 3 */ + 8820 "00010100" // /* MW 2 */ + 8821 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 130 4 first + 8822 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 8823 "00000000" // /* MW 3 */ + 8824 "00101000" // /* MW 2 */ + 8825 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8827 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8829 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8831 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8833 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv__end +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_end0 + 8835 "00000000" // /* MW 1 */ +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.function run _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_unary.h" 136 first +.src_ref 3 "elementwise_unary.h" 142 37 +.src_ref 3 "elementwise_unary.h" 154 8 first +.src_ref 3 "elementwise_unary.h" 171 19 +.function_start + 8848 "10110110" // MOVA dj0, #-34; VLDB x4, [p0], #64; MOVXM ls, #8976 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8849 "00010000" // /* MW 11 */ + 8850 "10001000" // /* MW 10 */ + 8851 "01111001" // /* MW 9 */ + 8852 "00001000" // /* MW 8 */ + 8853 "00000000" // /* MW 7 */ + 8854 "00000000" // /* MW 6 */ + 8855 "01101000" // /* MW 5 */ + 8856 "00111010" // /* MW 4 */ + 8857 "10000000" // /* MW 3 */ + 8858 "11000010" // /* MW 2 */ + 8859 "11111011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_unary.h" 142 78 +.src_ref 3 "elementwise_unary.h" 154 8 first +.src_ref 3 "elementwise_unary.h" 190 19 first + 8860 "10110110" // MOVA r17, #-6; VLDB x2, [p0], #64; MOVXM le, #9024 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8861 "00010000" // /* MW 11 */ + 8862 "10100000" // /* MW 10 */ + 8863 "10111001" // /* MW 9 */ + 8864 "00001001" // /* MW 8 */ + 8865 "00000000" // /* MW 7 */ + 8866 "00000000" // /* MW 6 */ + 8867 "01101000" // /* MW 5 */ + 8868 "00111001" // /* MW 4 */ + 8869 "00000000" // /* MW 3 */ + 8870 "01010001" // /* MW 2 */ + 8871 "11111111" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 136 + 8872 "11111000" // MOV r0, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8873 "11000000" // /* MW 3 */ + 8874 "00010100" // /* MW 2 */ + 8875 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 136 first + 8876 "00011000" // ADD.NC p2, r0, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8877 "00010000" // /* MW 3 */ + 8878 "01100000" // /* MW 2 */ + 8879 "00011010" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 103 16 first + 8880 "10011000" // LDA.s16 r2, [p2], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8881 "01010010" // /* MW 3 */ + 8882 "00011100" // /* MW 2 */ + 8883 "00000010" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 142 37 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8884 "10011000" // LDA r0, [p2, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8885 "00010110" // /* MW 3 */ + 8886 "00000000" // /* MW 2 */ + 8887 "00000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_unary.h" 171 19 first +.src_ref 8 "clip_impl.h" 104 16 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8888 "00111100" // LDA.s16 r1, [p2]; VLDB x4, [p0], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8889 "01101000" // /* MW 5 */ + 8890 "00111010" // /* MW 4 */ + 8891 "01010000" // /* MW 3 */ + 8892 "10000110" // /* MW 2 */ + 8893 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8895 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8897 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8899 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_unary.h" 190 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8900 "00011000" // VLDB x2, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8901 "10110100" // /* MW 3 */ + 8902 "00011100" // /* MW 2 */ + 8903 "00111000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8904 "11111000" // VBCST.16 x0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8905 "01110010" // /* MW 3 */ + 8906 "00001001" // /* MW 2 */ + 8907 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 3 "elementwise_unary.h" 142 78 first +.src_ref 3 "elementwise_unary.h" 171 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8908 "00111010" // VLDB x4, [p0], #64; LSHL r17, r0, r17; VMAX_LT.bf16 x5, r16, x4, x0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8909 "01111000" // /* MW 9 */ + 8910 "00110110" // /* MW 8 */ + 8911 "01010000" // /* MW 7 */ + 8912 "11101101" // /* MW 6 */ + 8913 "00011000" // /* MW 5 */ + 8914 "00000001" // /* MW 4 */ + 8915 "01101000" // /* MW 3 */ + 8916 "00111010" // /* MW 2 */ + 8917 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 154 8 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8918 "10011000" // ADD.NC lc, r17, #-3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8919 "11111110" // /* MW 3 */ + 8920 "01111000" // /* MW 2 */ + 8921 "00011101" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8922 "11111000" // VBCST.16 x1, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8923 "01110010" // /* MW 3 */ + 8924 "10000101" // /* MW 2 */ + 8925 "00011000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8926 "11111000" // VMIN_GE.bf16 x3, r16, x5, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8927 "10101100" // /* MW 3 */ + 8928 "10101000" // /* MW 2 */ + 8929 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 3 "elementwise_unary.h" 190 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8930 "01111110" // NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 8931 "01100000" // /* MW 13 */ + 8932 "00101011" // /* MW 12 */ + 8933 "00000000" // /* MW 11 */ + 8934 "11001111" // /* MW 10 */ + 8935 "00000110" // /* MW 9 */ + 8936 "00110001" // /* MW 8 */ + 8937 "00000000" // /* MW 7 */ + 8938 "00000000" // /* MW 6 */ + 8939 "01101000" // /* MW 5 */ + 8940 "00111001" // /* MW 4 */ + 8941 "11110000" // /* MW 3 */ + 8942 "00101100" // /* MW 2 */ + 8943 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8944 "11100001" // NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8945 "00000000" // /* MW 15 */ + 8946 "00000000" // /* MW 14 */ + 8947 "01111000" // /* MW 13 */ + 8948 "01010110" // /* MW 12 */ + 8949 "11011000" // /* MW 11 */ + 8950 "00000001" // /* MW 10 */ + 8951 "00000000" // /* MW 9 */ + 8952 "00000000" // /* MW 8 */ + 8953 "11010011" // /* MW 7 */ + 8954 "00011100" // /* MW 6 */ + 8955 "00100001" // /* MW 5 */ + 8956 "00000000" // /* MW 4 */ + 8957 "11110000" // /* MW 3 */ + 8958 "00101100" // /* MW 2 */ + 8959 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8960 "11100001" // NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8961 "00000000" // /* MW 15 */ + 8962 "00000000" // /* MW 14 */ + 8963 "01111000" // /* MW 13 */ + 8964 "00110110" // /* MW 12 */ + 8965 "01010000" // /* MW 11 */ + 8966 "00000001" // /* MW 10 */ + 8967 "00000000" // /* MW 9 */ + 8968 "00000000" // /* MW 8 */ + 8969 "01011011" // /* MW 7 */ + 8970 "00000001" // /* MW 6 */ + 8971 "00100000" // /* MW 5 */ + 8972 "00000000" // /* MW 4 */ + 8973 "11110000" // /* MW 3 */ + 8974 "00101100" // /* MW 2 */ + 8975 "00000000" // /* MW 1 */ +.label ZLS_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_128 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 171 19 first +.src_ref 3 "elementwise_unary.h" 176 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 8976 "11100001" // NOPA; VLDB x4, [p0], #64; VST x7, [p1], #64; NOPX; VMIN_GE.bf16 x3, r16, x5, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8977 "00000000" // /* MW 15 */ + 8978 "00000000" // /* MW 14 */ + 8979 "01111000" // /* MW 13 */ + 8980 "01010110" // /* MW 12 */ + 8981 "11010100" // /* MW 11 */ + 8982 "00000000" // /* MW 10 */ + 8983 "00000000" // /* MW 9 */ + 8984 "00000000" // /* MW 8 */ + 8985 "11010011" // /* MW 7 */ + 8986 "00011101" // /* MW 6 */ + 8987 "01101001" // /* MW 5 */ + 8988 "00111010" // /* MW 4 */ + 8989 "11110000" // /* MW 3 */ + 8990 "00101100" // /* MW 2 */ + 8991 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 3 "elementwise_unary.h" 190 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8992 "11100001" // NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8993 "00000000" // /* MW 15 */ + 8994 "00000000" // /* MW 14 */ + 8995 "01111000" // /* MW 13 */ + 8996 "00110110" // /* MW 12 */ + 8997 "10001000" // /* MW 11 */ + 8998 "00000001" // /* MW 10 */ + 8999 "00000000" // /* MW 9 */ + 9000 "00000000" // /* MW 8 */ + 9001 "01011011" // /* MW 7 */ + 9002 "00000001" // /* MW 6 */ + 9003 "01101000" // /* MW 5 */ + 9004 "00111001" // /* MW 4 */ + 9005 "11110000" // /* MW 3 */ + 9006 "00101100" // /* MW 2 */ + 9007 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9008 "11100001" // NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9009 "00000000" // /* MW 15 */ + 9010 "00000000" // /* MW 14 */ + 9011 "01111000" // /* MW 13 */ + 9012 "01010110" // /* MW 12 */ + 9013 "11011000" // /* MW 11 */ + 9014 "00000001" // /* MW 10 */ + 9015 "00000000" // /* MW 9 */ + 9016 "00000000" // /* MW 8 */ + 9017 "11010011" // /* MW 7 */ + 9018 "00011100" // /* MW 6 */ + 9019 "00100001" // /* MW 5 */ + 9020 "00000000" // /* MW 4 */ + 9021 "11110000" // /* MW 3 */ + 9022 "00101100" // /* MW 2 */ + 9023 "00000000" // /* MW 1 */ +.label ZLE_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_176 +.src_ref 4 "max_min.hpp" 20 104 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9024 "11100001" // NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9025 "00000000" // /* MW 15 */ + 9026 "00000000" // /* MW 14 */ + 9027 "01111000" // /* MW 13 */ + 9028 "00110110" // /* MW 12 */ + 9029 "01010000" // /* MW 11 */ + 9030 "00000001" // /* MW 10 */ + 9031 "00000000" // /* MW 9 */ + 9032 "00000000" // /* MW 8 */ + 9033 "01011011" // /* MW 7 */ + 9034 "00000001" // /* MW 6 */ + 9035 "00100000" // /* MW 5 */ + 9036 "00000000" // /* MW 4 */ + 9037 "11110000" // /* MW 3 */ + 9038 "00101100" // /* MW 2 */ + 9039 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 176 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 9040 "00000010" // VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9041 "01110000" // /* MW 7 */ + 9042 "01010110" // /* MW 6 */ + 9043 "11010100" // /* MW 5 */ + 9044 "00000000" // /* MW 4 */ + 9045 "01100000" // /* MW 3 */ + 9046 "10111010" // /* MW 2 */ + 9047 "00100011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9048 "11111000" // VMAX_LT.bf16 x6, r16, x2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9049 "01101100" // /* MW 3 */ + 9050 "00010000" // /* MW 2 */ + 9051 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 195 20 first + 9052 "00000010" // VST x3, [p1], #64; VMIN_GE.bf16 x7, r16, x6, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9053 "01110000" // /* MW 7 */ + 9054 "01010110" // /* MW 6 */ + 9055 "11011000" // /* MW 5 */ + 9056 "00000001" // /* MW 4 */ + 9057 "01100000" // /* MW 3 */ + 9058 "10011010" // /* MW 2 */ + 9059 "00100011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 3 "elementwise_unary.h" 158 4 first + 9060 "11100100" // RET lr; VMAX_LT.bf16 x5, r16, x4, x0 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 9061 "11011001" // /* MW 5 */ + 9062 "01000000" // /* MW 4 */ + 9063 "00000101" // /* MW 3 */ + 9064 "00000000" // /* MW 2 */ + 9065 "00000101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 176 20 first +.delay_slot + 9066 "00000010" // VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9067 "01110000" // /* MW 7 */ + 9068 "01010110" // /* MW 6 */ + 9069 "11010100" // /* MW 5 */ + 9070 "00000000" // /* MW 4 */ + 9071 "01100000" // /* MW 3 */ + 9072 "10111010" // /* MW 2 */ + 9073 "00100011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 9074 "11111000" // VMAX_LT.bf16 x6, r16, x2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9075 "01101100" // /* MW 3 */ + 9076 "00010000" // /* MW 2 */ + 9077 "00011011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.delay_slot + 9078 "11111000" // VMIN_GE.bf16 x7, r16, x6, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9079 "10101100" // /* MW 3 */ + 9080 "10110000" // /* MW 2 */ + 9081 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "elementwise_unary.h" 195 20 first +.delay_slot + 9082 "00011000" // VST x3, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9083 "11010011" // /* MW 3 */ + 9084 "00011100" // /* MW 2 */ + 9085 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "elementwise_unary.h" 176 20 first +.delay_slot + 9086 "00011000" // VST x7, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9087 "11010011" // /* MW 3 */ + 9088 "00011101" // /* MW 2 */ +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E__end +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_end0 + 9089 "00001001" // /* MW 1 */ +.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_clip1d _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 7 "superkernels.cpp" 176 first +.src_ref 7 "superkernels.cpp" 181 6 +.function_start + 9104 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9105 "10000000" // /* MW 5 */ + 9106 "11001000" // /* MW 4 */ + 9107 "11000110" // /* MW 3 */ + 9108 "00000111" // /* MW 2 */ + 9109 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 181 6 first + 9110 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9111 "11000001" // /* MW 5 */ + 9112 "10110101" // /* MW 4 */ + 9113 "11011000" // /* MW 3 */ + 9114 "11000010" // /* MW 2 */ + 9115 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 176 + 9116 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9117 "00000001" // /* MW 5 */ + 9118 "00000000" // /* MW 4 */ + 9119 "00000000" // /* MW 3 */ + 9120 "00001000" // /* MW 2 */ + 9121 "00000000" // /* MW 1 */ + 9122 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9123 "01110000" // /* MW 7 */ + 9124 "11010000" // /* MW 6 */ + 9125 "00001011" // /* MW 5 */ + 9126 "00000000" // /* MW 4 */ + 9127 "10110000" // /* MW 3 */ + 9128 "01100011" // /* MW 2 */ + 9129 "11111111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 178 11 + 9130 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9131 "00010001" // /* MW 9 */ + 9132 "00101000" // /* MW 8 */ + 9133 "00110010" // /* MW 7 */ + 9134 "11110011" // /* MW 6 */ + 9135 "00000001" // /* MW 5 */ + 9136 "00000000" // /* MW 4 */ + 9137 "10110000" // /* MW 3 */ + 9138 "10000010" // /* MW 2 */ + 9139 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 9140 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9141 "11000000" // /* MW 3 */ + 9142 "11010100" // /* MW 2 */ + 9143 "00011011" // /* MW 1 */ + 9144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9145 "00000000" // /* MW 1 */ + 9146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9147 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 181 6 +.src_ref 7 "superkernels.cpp" 181 16 + 9148 "10000100" // JNZ r16, #9312 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9312 delay_slots=5 */ + 9149 "00000001" // /* MW 5 */ + 9150 "01000000" // /* MW 4 */ + 9151 "00110000" // /* MW 3 */ + 9152 "00010010" // /* MW 2 */ + 9153 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 178 22 first +.delay_slot + 9154 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9155 "10010000" // /* MW 3 */ + 9156 "01100010" // /* MW 2 */ + 9157 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 178 30 +.delay_slot + 9158 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9159 "11111011" // /* MW 3 */ + 9160 "01100011" // /* MW 2 */ + 9161 "00010100" // /* MW 1 */ +.delay_slot + 9162 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9163 "00111101" // /* MW 3 */ + 9164 "11110100" // /* MW 2 */ + 9165 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 178 11 +.src_ref 1 "io_buffer_main.h" 125 25 +.delay_slot + 9166 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9167 "01110000" // /* MW 7 */ + 9168 "01100000" // /* MW 6 */ + 9169 "00110000" // /* MW 5 */ + 9170 "00000011" // /* MW 4 */ + 9171 "00110000" // /* MW 3 */ + 9172 "11000110" // /* MW 2 */ + 9173 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 184 4 +.src_ref 7 "superkernels.cpp" 195 2 +.delay_slot + 9174 "01000100" // MOVXM p0, #509312 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9175 "00000000" // /* MW 5 */ + 9176 "11001011" // /* MW 4 */ + 9177 "11000000" // /* MW 3 */ + 9178 "00000111" // /* MW 2 */ + 9179 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9180 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9181 "11010000" // /* MW 5 */ + 9182 "11001000" // /* MW 4 */ + 9183 "11000100" // /* MW 3 */ + 9184 "00000111" // /* MW 2 */ + 9185 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9186 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9187 "00010000" // /* MW 9 */ + 9188 "00110010" // /* MW 8 */ + 9189 "00110010" // /* MW 7 */ + 9190 "11110001" // /* MW 6 */ + 9191 "00000001" // /* MW 5 */ + 9192 "00000000" // /* MW 4 */ + 9193 "11100000" // /* MW 3 */ + 9194 "11000000" // /* MW 2 */ + 9195 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9197 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 184 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 9198 "00000100" // JL #8736 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8736 delay_slots=5 */ + 9199 "00000001" // /* MW 5 */ + 9200 "00000000" // /* MW 4 */ + 9201 "00010000" // /* MW 3 */ + 9202 "00010001" // /* MW 2 */ + 9203 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9205 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9207 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9208 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9209 "00110001" // /* MW 3 */ + 9210 "00100000" // /* MW 2 */ + 9211 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 9212 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9213 "00000101" // /* MW 3 */ + 9214 "00100000" // /* MW 2 */ + 9215 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 9216 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9217 "00000000" // /* MW 15 */ + 9218 "00000000" // /* MW 14 */ + 9219 "01111000" // /* MW 13 */ + 9220 "10100101" // /* MW 12 */ + 9221 "00000001" // /* MW 11 */ + 9222 "00000000" // /* MW 10 */ + 9223 "00000000" // /* MW 9 */ + 9224 "10000000" // /* MW 8 */ + 9225 "00010001" // /* MW 7 */ + 9226 "00000110" // /* MW 6 */ + 9227 "00100010" // /* MW 5 */ + 9228 "00000000" // /* MW 4 */ + 9229 "11110000" // /* MW 3 */ + 9230 "00101100" // /* MW 2 */ + 9231 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 188 18 +.return_address + 9232 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9233 "10100000" // /* MW 5 */ + 9234 "11001000" // /* MW 4 */ + 9235 "11000100" // /* MW 3 */ + 9236 "00000111" // /* MW 2 */ + 9237 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 188 18 first +.src_ref 7 "superkernels.cpp" 188 43 + 9238 "10111010" // LDA r16, [p2]; MOVXM p2, #509312 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9239 "00010000" // /* MW 9 */ + 9240 "11000000" // /* MW 8 */ + 9241 "00110010" // /* MW 7 */ + 9242 "11110001" // /* MW 6 */ + 9243 "00000001" // /* MW 5 */ + 9244 "00000000" // /* MW 4 */ + 9245 "11010000" // /* MW 3 */ + 9246 "11000010" // /* MW 2 */ + 9247 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 186 29 +.src_ref 7 "superkernels.cpp" 188 43 +.src_ref 7 "superkernels.cpp" 195 2 + 9248 "10111010" // LDA r17, [p2]; MOVXM p2, #509312 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9249 "00010000" // /* MW 9 */ + 9250 "11000000" // /* MW 8 */ + 9251 "00110010" // /* MW 7 */ + 9252 "11110001" // /* MW 6 */ + 9253 "00000001" // /* MW 5 */ + 9254 "00000000" // /* MW 4 */ + 9255 "11010000" // /* MW 3 */ + 9256 "11000110" // /* MW 2 */ + 9257 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 186 29 first +.src_ref 7 "superkernels.cpp" 188 16 +.src_ref 7 "superkernels.cpp" 193 47 + 9258 "10111010" // LDA.u16 r18, [p2, #8]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9259 "00010000" // /* MW 9 */ + 9260 "00101010" // /* MW 8 */ + 9261 "10110010" // /* MW 7 */ + 9262 "11110000" // /* MW 6 */ + 9263 "00000001" // /* MW 5 */ + 9264 "00000000" // /* MW 4 */ + 9265 "01010000" // /* MW 3 */ + 9266 "11001011" // /* MW 2 */ + 9267 "01001000" // /* MW 1 */ + 9268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9269 "00000000" // /* MW 1 */ + 9270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9271 "00000000" // /* MW 1 */ + 9272 "10000100" // J #9328 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9328 delay_slots=5 */ + 9273 "00000000" // /* MW 5 */ + 9274 "00000000" // /* MW 4 */ + 9275 "00111000" // /* MW 3 */ + 9276 "00010010" // /* MW 2 */ + 9277 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 186 13 +.delay_slot + 9278 "01000100" // MOVXM p0, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9279 "11000000" // /* MW 5 */ + 9280 "11001000" // /* MW 4 */ + 9281 "11000000" // /* MW 3 */ + 9282 "00000111" // /* MW 2 */ + 9283 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9285 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 188 27 first +.delay_slot + 9286 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9287 "00001111" // /* MW 3 */ + 9288 "01100001" // /* MW 2 */ + 9289 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 186 13 first +.delay_slot + 9290 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9291 "10100011" // /* MW 5 */ + 9292 "00001100" // /* MW 4 */ + 9293 "11110000" // /* MW 3 */ + 9294 "00101100" // /* MW 2 */ + 9295 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 188 16 first +.delay_slot + 9296 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9297 "00000000" // /* MW 15 */ + 9298 "00000000" // /* MW 14 */ + 9299 "01111000" // /* MW 13 */ + 9300 "10100101" // /* MW 12 */ + 9301 "00000001" // /* MW 11 */ + 9302 "00000000" // /* MW 10 */ + 9303 "00000000" // /* MW 9 */ + 9304 "10000000" // /* MW 8 */ + 9305 "00010001" // /* MW 7 */ + 9306 "00000110" // /* MW 6 */ + 9307 "00100001" // /* MW 5 */ + 9308 "00000000" // /* MW 4 */ + 9309 "11110000" // /* MW 3 */ + 9310 "00101100" // /* MW 2 */ + 9311 "00000000" // /* MW 1 */ +.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 7 "superkernels.cpp" 193 47 +.src_ref 7 "superkernels.cpp" 195 2 + 9312 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9313 "00000000" // /* MW 15 */ + 9314 "00000000" // /* MW 14 */ + 9315 "00010000" // /* MW 13 */ + 9316 "00101010" // /* MW 12 */ + 9317 "10110010" // /* MW 11 */ + 9318 "11110000" // /* MW 10 */ + 9319 "00000001" // /* MW 9 */ + 9320 "00000000" // /* MW 8 */ + 9321 "10001011" // /* MW 7 */ + 9322 "10000000" // /* MW 6 */ + 9323 "00100010" // /* MW 5 */ + 9324 "00000000" // /* MW 4 */ + 9325 "11110000" // /* MW 3 */ + 9326 "00101100" // /* MW 2 */ + 9327 "00000000" // /* MW 1 */ +.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 218 49 first + 9328 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9329 "00000000" // /* MW 7 */ + 9330 "11000011" // /* MW 6 */ + 9331 "10110011" // /* MW 5 */ + 9332 "00000011" // /* MW 4 */ + 9333 "01100000" // /* MW 3 */ + 9334 "10010001" // /* MW 2 */ + 9335 "01110011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 192 2 +.src_ref 1 "io_buffer_main.h" 218 49 + 9336 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9337 "00010000" // /* MW 9 */ + 9338 "00100000" // /* MW 8 */ + 9339 "00110010" // /* MW 7 */ + 9340 "11110000" // /* MW 6 */ + 9341 "00000001" // /* MW 5 */ + 9342 "00000000" // /* MW 4 */ + 9343 "11010000" // /* MW 3 */ + 9344 "11101110" // /* MW 2 */ + 9345 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 9346 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9347 "00010110" // /* MW 3 */ + 9348 "11111110" // /* MW 2 */ + 9349 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 9350 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9351 "00110110" // /* MW 3 */ + 9352 "11111110" // /* MW 2 */ + 9353 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 9354 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9355 "01010110" // /* MW 3 */ + 9356 "01000110" // /* MW 2 */ + 9357 "00000111" // /* MW 1 */ + 9358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9359 "00000000" // /* MW 1 */ + 9360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9361 "00000000" // /* MW 1 */ + 9362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9363 "00000000" // /* MW 1 */ + 9364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9365 "00000000" // /* MW 1 */ + 9366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9367 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 9368 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9369 "00000010" // /* MW 3 */ + 9370 "01100001" // /* MW 2 */ + 9371 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 9372 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9373 "00010001" // /* MW 3 */ + 9374 "00000110" // /* MW 2 */ + 9375 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 + 9376 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9377 "11111101" // /* MW 3 */ + 9378 "11100000" // /* MW 2 */ + 9379 "00010111" // /* MW 1 */ + 9380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9381 "00000000" // /* MW 1 */ + 9382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9383 "00000000" // /* MW 1 */ + 9384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9385 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 9386 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9387 "00001000" // /* MW 3 */ + 9388 "10010011" // /* MW 2 */ + 9389 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 193 45 + 9390 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9391 "10000001" // /* MW 5 */ + 9392 "10101101" // /* MW 4 */ + 9393 "10100111" // /* MW 3 */ + 9394 "00000000" // /* MW 2 */ + 9395 "00000100" // /* MW 1 */ + 9396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9397 "00000000" // /* MW 1 */ + 9398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9399 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 192 2 first + 9400 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9401 "00110110" // /* MW 3 */ + 9402 "00000110" // /* MW 2 */ + 9403 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 1 "io_buffer_main.h" 324 51 + 9404 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9405 "10000001" // /* MW 5 */ + 9406 "11011101" // /* MW 4 */ + 9407 "11011100" // /* MW 3 */ + 9408 "11001010" // /* MW 2 */ + 9409 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 193 47 first + 9410 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9411 "01110110" // /* MW 3 */ + 9412 "00000110" // /* MW 2 */ + 9413 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 9414 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9415 "10011110" // /* MW 3 */ + 9416 "01011100" // /* MW 2 */ + 9417 "00000111" // /* MW 1 */ + 9418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9419 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 195 2 first +.no_stack_arguments + 9420 "00000100" // JL #8848 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8848 delay_slots=5 */ + 9421 "00000001" // /* MW 5 */ + 9422 "00000000" // /* MW 4 */ + 9423 "01001000" // /* MW 3 */ + 9424 "00010001" // /* MW 2 */ + 9425 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9427 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 192 2 first +.delay_slot + 9428 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9429 "00000111" // /* MW 3 */ + 9430 "01100010" // /* MW 2 */ + 9431 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 192 2 +.delay_slot + 9432 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9433 "00110001" // /* MW 3 */ + 9434 "00000110" // /* MW 2 */ + 9435 "00001000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 193 45 first +.delay_slot + 9436 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9437 "00001101" // /* MW 3 */ + 9438 "11100001" // /* MW 2 */ + 9439 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 193 45 +.delay_slot + 9440 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9441 "00000000" // /* MW 15 */ + 9442 "00000000" // /* MW 14 */ + 9443 "10101000" // /* MW 13 */ + 9444 "10100000" // /* MW 12 */ + 9445 "00110100" // /* MW 11 */ + 9446 "00000000" // /* MW 10 */ + 9447 "00000000" // /* MW 9 */ + 9448 "00000000" // /* MW 8 */ + 9449 "01011011" // /* MW 7 */ + 9450 "00000001" // /* MW 6 */ + 9451 "00100000" // /* MW 5 */ + 9452 "00000000" // /* MW 4 */ + 9453 "11110000" // /* MW 3 */ + 9454 "00101100" // /* MW 2 */ + 9455 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 6 +.src_ref 7 "superkernels.cpp" 198 14 +.src_ref 1 "io_buffer_main.h" 324 51 first +.return_address + 9456 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9457 "00010000" // /* MW 9 */ + 9458 "00100000" // /* MW 8 */ + 9459 "00110010" // /* MW 7 */ + 9460 "11110011" // /* MW 6 */ + 9461 "00000001" // /* MW 5 */ + 9462 "00000000" // /* MW 4 */ + 9463 "11010000" // /* MW 3 */ + 9464 "11000110" // /* MW 2 */ + 9465 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 + 9466 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9467 "00000101" // /* MW 3 */ + 9468 "00100000" // /* MW 2 */ + 9469 "00010000" // /* MW 1 */ + 9470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9471 "00000000" // /* MW 1 */ + 9472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9473 "00000000" // /* MW 1 */ + 9474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9475 "00000000" // /* MW 1 */ + 9476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9477 "00000000" // /* MW 1 */ + 9478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9479 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 9480 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9481 "00001000" // /* MW 3 */ + 9482 "01010001" // /* MW 2 */ + 9483 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 19 +.src_ref 1 "io_buffer_main.h" 327 40 first + 9484 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9485 "00010000" // /* MW 9 */ + 9486 "00110000" // /* MW 8 */ + 9487 "00110010" // /* MW 7 */ + 9488 "11110001" // /* MW 6 */ + 9489 "00000001" // /* MW 5 */ + 9490 "00000000" // /* MW 4 */ + 9491 "11010000" // /* MW 3 */ + 9492 "11001110" // /* MW 2 */ + 9493 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 6 first + 9494 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9495 "00110110" // /* MW 3 */ + 9496 "00000110" // /* MW 2 */ + 9497 "00000110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 19 + 9498 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9499 "01010110" // /* MW 3 */ + 9500 "00000110" // /* MW 2 */ + 9501 "00000010" // /* MW 1 */ + 9502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9503 "00000000" // /* MW 1 */ + 9504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9505 "00000000" // /* MW 1 */ + 9506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9507 "00000000" // /* MW 1 */ + 9508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9509 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 9510 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9511 "00110001" // /* MW 3 */ + 9512 "00100001" // /* MW 2 */ + 9513 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 9514 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9515 "00010001" // /* MW 3 */ + 9516 "11100110" // /* MW 2 */ + 9517 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 16 first + 9518 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9519 "00101000" // /* MW 3 */ + 9520 "01100001" // /* MW 2 */ + 9521 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 6 + 9522 "10000100" // JNZ r16, #9552 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9552 delay_slots=5 */ + 9523 "00000001" // /* MW 5 */ + 9524 "01000000" // /* MW 4 */ + 9525 "10101000" // /* MW 3 */ + 9526 "00010010" // /* MW 2 */ + 9527 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9529 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9531 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9533 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9535 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9536 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9537 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 198 14 + 9538 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9539 "00000001" // /* MW 3 */ + 9540 "00100000" // /* MW 2 */ + 9541 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 198 14 first + 9542 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9543 "00000000" // /* MW 9 */ + 9544 "00000000" // /* MW 8 */ + 9545 "00000000" // /* MW 7 */ + 9546 "10000000" // /* MW 6 */ + 9547 "00010001" // /* MW 5 */ + 9548 "00000110" // /* MW 4 */ + 9549 "11110110" // /* MW 3 */ + 9550 "00101100" // /* MW 2 */ + 9551 "00000000" // /* MW 1 */ +.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 7 "superkernels.cpp" 200 + 9552 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9553 "00111001" // /* MW 3 */ + 9554 "11110100" // /* MW 2 */ + 9555 "00000111" // /* MW 1 */ + 9556 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9557 "00011001" // /* MW 3 */ + 9558 "11111011" // /* MW 2 */ + 9559 "00000111" // /* MW 1 */ + 9560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9561 "00000000" // /* MW 1 */ + 9562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9563 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 9564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9565 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 9566 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9567 "11110001" // /* MW 3 */ + 9568 "11111101" // /* MW 2 */ + 9569 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9571 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 200 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9572 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9573 "00000000" // /* MW 3 */ + 9574 "00101000" // /* MW 2 */ + 9575 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9576 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9577 "10100000" // /* MW 3 */ + 9578 "01100111" // /* MW 2 */ + 9579 "00011111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 200 +.delay_slot + 9580 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9581 "00000001" // /* MW 5 */ + 9582 "00000000" // /* MW 4 */ + 9583 "00000000" // /* MW 3 */ + 9584 "11111000" // /* MW 2 */ + 9585 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9587 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9589 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 9591 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv +.function shared_setup_backbone _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv +.src_ref 3 "elementwise_binary_shared.h" 205 first +.src_ref 3 "elementwise_binary_shared.h" 211 24 first +.src_ref 3 "elementwise_binary_shared.h" 216 36 +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.function_start + 9600 "10111010" // LDA el0, [p1], #4; MOVX r2, #256; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9601 "01011000" // /* MW 9 */ + 9602 "00000000" // /* MW 8 */ + 9603 "00001000" // /* MW 7 */ + 9604 "00001011" // /* MW 6 */ + 9605 "00100000" // /* MW 5 */ + 9606 "00001000" // /* MW 4 */ + 9607 "11010000" // /* MW 3 */ + 9608 "10000101" // /* MW 2 */ + 9609 "00100011" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 36 + 9610 "00011000" // MOVX r0, #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9611 "00000001" // /* MW 3 */ + 9612 "10000000" // /* MW 2 */ + 9613 "00010111" // /* MW 1 */ + 9614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9615 "00000000" // /* MW 1 */ + 9616 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9617 "00000000" // /* MW 1 */ + 9618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9619 "00000000" // /* MW 1 */ + 9620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9621 "00000000" // /* MW 1 */ + 9622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9623 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 211 22 first + 9624 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9625 "00101001" // /* MW 3 */ + 9626 "00011100" // /* MW 2 */ + 9627 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 212 24 first + 9628 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9629 "00101110" // /* MW 3 */ + 9630 "00011100" // /* MW 2 */ + 9631 "00000001" // /* MW 1 */ + 9632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9633 "00000000" // /* MW 1 */ + 9634 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9635 "00000000" // /* MW 1 */ + 9636 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9637 "00000000" // /* MW 1 */ + 9638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9639 "00000000" // /* MW 1 */ + 9640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9641 "00000000" // /* MW 1 */ + 9642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9643 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 212 22 + 9644 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9645 "00101001" // /* MW 3 */ + 9646 "00011100" // /* MW 2 */ + 9647 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 213 24 first + 9648 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9649 "00101110" // /* MW 3 */ + 9650 "00000100" // /* MW 2 */ + 9651 "00000001" // /* MW 1 */ + 9652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9653 "00000000" // /* MW 1 */ + 9654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9655 "00000000" // /* MW 1 */ + 9656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9657 "00000000" // /* MW 1 */ + 9658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9659 "00000000" // /* MW 1 */ + 9660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9661 "00000000" // /* MW 1 */ + 9662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9663 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 213 22 + 9664 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9665 "00101001" // /* MW 3 */ + 9666 "00011100" // /* MW 2 */ + 9667 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 214 24 first + 9668 "10011000" // LDA r3, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9669 "01110110" // /* MW 3 */ + 9670 "00010100" // /* MW 2 */ + 9671 "00000001" // /* MW 1 */ + 9672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9673 "00000000" // /* MW 1 */ + 9674 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9675 "00000000" // /* MW 1 */ + 9676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9677 "00000000" // /* MW 1 */ + 9678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9679 "00000000" // /* MW 1 */ + 9680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9681 "00000000" // /* MW 1 */ + 9682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9683 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 214 22 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9684 "10011000" // ST r3, [p0], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9685 "01110001" // /* MW 3 */ + 9686 "01001100" // /* MW 2 */ + 9687 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 34 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9688 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9689 "00010111" // /* MW 3 */ + 9690 "00000100" // /* MW 2 */ + 9691 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 217 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9692 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9693 "00000000" // /* MW 3 */ + 9694 "00101000" // /* MW 2 */ + 9695 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9696 "01000100" // MOVXM r1, #65280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9697 "00000000" // /* MW 5 */ + 9698 "10111110" // /* MW 4 */ + 9699 "11110000" // /* MW 3 */ + 9700 "00000000" // /* MW 2 */ + 9701 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9702 "10011000" // AND r1, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9703 "00010100" // /* MW 3 */ + 9704 "11000010" // /* MW 2 */ + 9705 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9706 "10011000" // EQ r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9707 "00100111" // /* MW 3 */ + 9708 "01110110" // /* MW 2 */ + 9709 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 36 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9710 "00011000" // SEL.EQZ r0, r0, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9711 "10000010" // /* MW 3 */ + 9712 "00000001" // /* MW 2 */ + 9713 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_end0 + 9715 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv +.function setup _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv +.src_ref 3 "elementwise_binary_shared.h" 219 +.src_ref 3 "elementwise_binary_shared.h" 219 first +.function_start + 9728 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9729 "00000001" // /* MW 5 */ + 9730 "00000000" // /* MW 4 */ + 9731 "00000000" // /* MW 3 */ + 9732 "00001000" // /* MW 2 */ + 9733 "00000000" // /* MW 1 */ + 9734 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9735 "00111101" // /* MW 3 */ + 9736 "11111000" // /* MW 2 */ + 9737 "00001111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 220 8 first +.no_stack_arguments + 9738 "00000100" // JL #9600 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9600 delay_slots=5 */ + 9739 "00000001" // /* MW 5 */ + 9740 "00000000" // /* MW 4 */ + 9741 "11000000" // /* MW 3 */ + 9742 "00010010" // /* MW 2 */ + 9743 "00000000" // /* MW 1 */ +.delay_slot + 9744 "10011000" // ST p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9745 "10011101" // /* MW 3 */ + 9746 "11111111" // /* MW 2 */ + 9747 "00001111" // /* MW 1 */ +.src_ref 8 "mul_impl.h" 193 25 +.delay_slot + 9748 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9749 "11000000" // /* MW 3 */ + 9750 "01100000" // /* MW 2 */ + 9751 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9753 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9755 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9756 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9757 "01100111" // /* MW 3 */ + 9758 "00000001" // /* MW 2 */ + 9759 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 222 4 +.return_address + 9760 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9761 "00111001" // /* MW 3 */ + 9762 "11111000" // /* MW 2 */ + 9763 "00000111" // /* MW 1 */ + 9764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9765 "00000000" // /* MW 1 */ + 9766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9767 "00000000" // /* MW 1 */ + 9768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9769 "00000000" // /* MW 1 */ + 9770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9771 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9773 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9774 "00011000" // LDA p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9775 "10011001" // /* MW 3 */ + 9776 "11111111" // /* MW 2 */ + 9777 "00000111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 222 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9778 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9779 "00000000" // /* MW 3 */ + 9780 "00101000" // /* MW 2 */ + 9781 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9783 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9785 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9787 "00000000" // /* MW 1 */ +.src_ref 8 "mul_impl.h" 193 25 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9788 "00011000" // MOVX r16, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9789 "00001001" // /* MW 3 */ + 9790 "00100000" // /* MW 2 */ + 9791 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 222 4 +.src_ref 8 "mul_impl.h" 193 25 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9792 "00111010" // ST r16, [p7, #16]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9793 "01110001" // /* MW 9 */ + 9794 "00000000" // /* MW 8 */ + 9795 "00000000" // /* MW 7 */ + 9796 "00000000" // /* MW 6 */ + 9797 "11111110" // /* MW 5 */ + 9798 "00111111" // /* MW 4 */ + 9799 "00110000" // /* MW 3 */ + 9800 "11000010" // /* MW 2 */ +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + 9801 "11101000" // /* MW 1 */ +.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE +.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_begin0 +.function shared_run_backbone _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE +.src_ref 3 "elementwise_binary_shared.h" 107 first +.src_ref 3 "elementwise_binary_shared.h" 119 37 +.src_ref 3 "elementwise_binary_shared.h" 126 34 +.src_ref 3 "elementwise_binary_shared.h" 131 19 +.function_start + 9808 "11111000" // MOV r0, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9809 "11000000" // /* MW 3 */ + 9810 "00010110" // /* MW 2 */ + 9811 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 119 37 first + 9812 "00011000" // ADD.NC p3, r0, #14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9813 "00000111" // /* MW 3 */ + 9814 "01100000" // /* MW 2 */ + 9815 "00011011" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 122 22 first + 9816 "10011000" // LDA.s16 r2, [p3], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9817 "01010010" // /* MW 3 */ + 9818 "00011100" // /* MW 2 */ + 9819 "00000011" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 124 15 first + 9820 "10011000" // LDA r4, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9821 "10010110" // /* MW 3 */ + 9822 "00000100" // /* MW 2 */ + 9823 "00000011" // /* MW 1 */ + 9824 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9825 "00000000" // /* MW 1 */ + 9826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9827 "00000000" // /* MW 1 */ + 9828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9829 "00000000" // /* MW 1 */ + 9830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9831 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 124 26 + 9832 "00011000" // MOVX r3, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9833 "00001001" // /* MW 3 */ + 9834 "00000110" // /* MW 2 */ + 9835 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 107 + 9836 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9837 "00000001" // /* MW 5 */ + 9838 "00000000" // /* MW 4 */ + 9839 "00000000" // /* MW 3 */ + 9840 "00010000" // /* MW 2 */ + 9841 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 124 26 + 9842 "10011000" // LTU r3, r3, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9843 "01001100" // /* MW 3 */ + 9844 "11000110" // /* MW 2 */ + 9845 "00010000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 65 25 +.src_ref 3 "elementwise_binary_shared.h" 124 8 + 9846 "10111010" // MOVA r1, #0; JNZ r3, #10000 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10000 delay_slots=5 */ + 9847 "01100000" // /* MW 9 */ + 9848 "00000000" // /* MW 8 */ + 9849 "00010000" // /* MW 7 */ + 9850 "11100010" // /* MW 6 */ + 9851 "00000100" // /* MW 5 */ + 9852 "00000110" // /* MW 4 */ + 9853 "00000000" // /* MW 3 */ + 9854 "00000001" // /* MW 2 */ + 9855 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 65 25 first +.delay_slot + 9856 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9857 "01110010" // /* MW 3 */ + 9858 "00000101" // /* MW 2 */ + 9859 "00011000" // /* MW 1 */ +.delay_slot + 9860 "11111000" // MOV r1, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9861 "11000000" // /* MW 3 */ + 9862 "01011110" // /* MW 2 */ + 9863 "00011000" // /* MW 1 */ +.delay_slot + 9864 "11111000" // MOV p7, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9865 "11100000" // /* MW 3 */ + 9866 "01100101" // /* MW 2 */ + 9867 "00011111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.delay_slot + 9868 "11110100" // PADDB [p7], #-64; MOV p5, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9869 "10000001" // /* MW 5 */ + 9870 "11011101" // /* MW 4 */ + 9871 "00001010" // /* MW 3 */ + 9872 "11110010" // /* MW 2 */ + 9873 "11111111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 119 37 first +.delay_slot + 9874 "00011000" // VST x0, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9875 "00010011" // /* MW 3 */ + 9876 "00000100" // /* MW 2 */ + 9877 "00001111" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first +.src_ref 3 "elementwise_binary_shared.h" 126 34 +.src_ref 3 "elementwise_binary_shared.h" 126 34 +.src_ref 3 "elementwise_binary_shared.h" 131 19 +.src_ref 3 "elementwise_binary_shared.h" 131 19 + 9878 "10111010" // MOVA dj0, #12; MOVS p4, r0; VBCST.16 x0, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9879 "01110010" // /* MW 9 */ + 9880 "10111001" // /* MW 8 */ + 9881 "00000100" // /* MW 7 */ + 9882 "00000000" // /* MW 6 */ + 9883 "00001011" // /* MW 5 */ + 9884 "10000000" // /* MW 4 */ + 9885 "10000100" // /* MW 3 */ + 9886 "10000010" // /* MW 2 */ + 9887 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 126 34 first +.src_ref 3 "elementwise_binary_shared.h" 131 19 first +.src_ref 3 "elementwise_binary_shared.h" 171 16 + 9888 "01010100" // LDA.u8 r0, [p4, dj0]; MOV m2, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9889 "00000001" // /* MW 5 */ + 9890 "00000001" // /* MW 4 */ + 9891 "01010100" // /* MW 3 */ + 9892 "00000001" // /* MW 2 */ + 9893 "10000000" // /* MW 1 */ + 9894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9895 "00000000" // /* MW 1 */ + 9896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9897 "00000000" // /* MW 1 */ + 9898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9899 "00000000" // /* MW 1 */ + 9900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9901 "00000000" // /* MW 1 */ + 9902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9903 "00000000" // /* MW 1 */ + 9904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9905 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 131 12 +.src_ref 3 "elementwise_binary_shared.h" 131 35 + 9906 "10000100" // JNZ r0, #9952 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9952 delay_slots=5 */ + 9907 "00000001" // /* MW 5 */ + 9908 "01000000" // /* MW 4 */ + 9909 "01110000" // /* MW 3 */ + 9910 "00010011" // /* MW 2 */ + 9911 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary_shared.h" 173 18 +.delay_slot + 9912 "10111000" // MOV m0, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9913 "00000000" // /* MW 3 */ + 9914 "00000000" // /* MW 2 */ + 9915 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 166 31 +.delay_slot + 9916 "01000100" // MOVXM p4, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9917 "11010000" // /* MW 5 */ + 9918 "11001000" // /* MW 4 */ + 9919 "11001000" // /* MW 3 */ + 9920 "00000111" // /* MW 2 */ + 9921 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9923 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9925 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9927 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 169 16 + 9928 "10111010" // MOVA m1, #0; J #9968 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=9968 delay_slots=5 */ + 9929 "00100000" // /* MW 9 */ + 9930 "00000000" // /* MW 8 */ + 9931 "00000000" // /* MW 7 */ + 9932 "11011110" // /* MW 6 */ + 9933 "00000100" // /* MW 5 */ + 9934 "00000000" // /* MW 4 */ + 9935 "10000000" // /* MW 3 */ + 9936 "00000100" // /* MW 2 */ + 9937 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9939 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9941 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9943 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9945 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.delay_slot + 9946 "00001100" // NOPA; VST x0, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9947 "00100110" // /* MW 5 */ + 9948 "00001000" // /* MW 4 */ + 9949 "11110000" // /* MW 3 */ + 9950 "00101100" // /* MW 2 */ + 9951 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_144 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 169 16 + 9952 "10111000" // MOV m1, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9953 "10000000" // /* MW 3 */ + 9954 "00000000" // /* MW 2 */ + 9955 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "elementwise_binary_shared.h" 171 16 + 9956 "11110110" // NOPA; NOPB; VST x0, [p1]; MOV m2, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9957 "01010000" // /* MW 11 */ + 9958 "00000000" // /* MW 10 */ + 9959 "00000000" // /* MW 9 */ + 9960 "00000001" // /* MW 8 */ + 9961 "00010011" // /* MW 7 */ + 9962 "00000100" // /* MW 6 */ + 9963 "00100001" // /* MW 5 */ + 9964 "00000000" // /* MW 4 */ + 9965 "11110000" // /* MW 3 */ + 9966 "00101100" // /* MW 2 */ + 9967 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_160 + 9968 "10000100" // J #10128 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10128 delay_slots=5 */ + 9969 "00000000" // /* MW 5 */ + 9970 "00000000" // /* MW 4 */ + 9971 "11001000" // /* MW 3 */ + 9972 "00010011" // /* MW 2 */ + 9973 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary_shared.h" 169 16 +.src_ref 3 "elementwise_binary_shared.h" 173 18 +.delay_slot + 9974 "00000010" // MOVS p0, p7; MOV p7, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9975 "01110000" // /* MW 7 */ + 9976 "01100000" // /* MW 6 */ + 9977 "10110000" // /* MW 5 */ + 9978 "00000011" // /* MW 4 */ + 9979 "01100000" // /* MW 3 */ + 9980 "10010001" // /* MW 2 */ + 9981 "00010011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9983 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9985 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9987 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9988 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9989 "10000001" // /* MW 11 */ + 9990 "10101101" // /* MW 10 */ + 9991 "00000000" // /* MW 9 */ + 9992 "00000000" // /* MW 8 */ + 9993 "00000000" // /* MW 7 */ + 9994 "00000000" // /* MW 6 */ + 9995 "00100000" // /* MW 5 */ + 9996 "00000000" // /* MW 4 */ + 9997 "11110000" // /* MW 3 */ + 9998 "00101100" // /* MW 2 */ + 9999 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_192 +.src_ref 3 "elementwise_binary_shared.h" 150 97 + 10000 "00011000" // MOVX r2, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10001 "00001101" // /* MW 3 */ + 10002 "00000100" // /* MW 2 */ + 10003 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 97 first + 10004 "10011000" // EQ r2, r2, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10005 "01000111" // /* MW 3 */ + 10006 "10000100" // /* MW 2 */ + 10007 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 + 10008 "10000100" // JNZ r2, #10048 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10048 delay_slots=5 */ + 10009 "00000001" // /* MW 5 */ + 10010 "01000000" // /* MW 4 */ + 10011 "10100000" // /* MW 3 */ + 10012 "00010011" // /* MW 2 */ + 10013 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.delay_slot + 10014 "01000100" // MOVXM r0, #1065353216 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10015 "00000000" // /* MW 5 */ + 10016 "00100000" // /* MW 4 */ + 10017 "00000000" // /* MW 3 */ + 10018 "10000000" // /* MW 2 */ + 10019 "00111111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.src_ref 3 "elementwise_binary_shared.h" 166 31 +.delay_slot + 10020 "01000100" // MOVXM p4, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10021 "11010000" // /* MW 5 */ + 10022 "11001000" // /* MW 4 */ + 10023 "11001000" // /* MW 3 */ + 10024 "00000111" // /* MW 2 */ + 10025 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10027 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10029 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10031 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 10032 "11100001" // NOPA; NOPB; NOPS; MOVXM r0, #-1082130432; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10033 "00000000" // /* MW 15 */ + 10034 "00000000" // /* MW 14 */ + 10035 "00010000" // /* MW 13 */ + 10036 "00000000" // /* MW 12 */ + 10037 "00001000" // /* MW 11 */ + 10038 "00000000" // /* MW 10 */ + 10039 "11100000" // /* MW 9 */ + 10040 "00101111" // /* MW 8 */ + 10041 "01011011" // /* MW 7 */ + 10042 "00000001" // /* MW 6 */ + 10043 "00100000" // /* MW 5 */ + 10044 "00000000" // /* MW 4 */ + 10045 "11110000" // /* MW 3 */ + 10046 "00101100" // /* MW 2 */ + 10047 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_240 +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10048 "10111010" // LDA.s8 r0, [p4]; MOVX vaddSign0, #1; MOV dj0, #-66 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10049 "01011000" // /* MW 9 */ + 10050 "10111110" // /* MW 8 */ + 10051 "01000111" // /* MW 7 */ + 10052 "00000000" // /* MW 6 */ + 10053 "11010010" // /* MW 5 */ + 10054 "00000010" // /* MW 4 */ + 10055 "01010000" // /* MW 3 */ + 10056 "10000000" // /* MW 2 */ + 10057 "10000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary_shared.h" 173 18 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10058 "10111000" // MOV m0, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10059 "10000000" // /* MW 3 */ + 10060 "00000000" // /* MW 2 */ + 10061 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 169 16 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10062 "10111000" // MOV m1, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10063 "00000000" // /* MW 3 */ + 10064 "00000000" // /* MW 2 */ + 10065 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 171 16 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10066 "10111000" // MOV m2, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10067 "10000000" // /* MW 3 */ + 10068 "00000000" // /* MW 2 */ + 10069 "00011010" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10071 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10072 "01111000" // VINSERT.32 x0, x0, #0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10073 "00010001" // /* MW 3 */ + 10074 "00000000" // /* MW 2 */ + 10075 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10076 "11010100" // ST.s16 r0, [p5, dj0]; VMOV bmll1, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10077 "00100101" // /* MW 5 */ + 10078 "00000001" // /* MW 4 */ + 10079 "11100010" // /* MW 3 */ + 10080 "00000010" // /* MW 2 */ + 10081 "10100000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10082 "00011000" // MOVX crRnd, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10083 "10000000" // /* MW 3 */ + 10084 "00111010" // /* MW 2 */ + 10085 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10086 "00011000" // VCONV.bf16.fp32 wl0, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10087 "10010110" // /* MW 3 */ + 10088 "01000000" // /* MW 2 */ + 10089 "00001000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10091 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10092 "10111000" // VEXTRACT.16 r0, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10093 "00000001" // /* MW 3 */ + 10094 "00000001" // /* MW 2 */ + 10095 "00011000" // /* MW 1 */ + 10096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10097 "00000000" // /* MW 1 */ + 10098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10099 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 + 10100 "10011000" // LDA.s16 r0, [p5, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10101 "00010010" // /* MW 3 */ + 10102 "00000000" // /* MW 2 */ + 10103 "00000101" // /* MW 1 */ + 10104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10105 "00000000" // /* MW 1 */ + 10106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10107 "00000000" // /* MW 1 */ + 10108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10109 "00000000" // /* MW 1 */ + 10110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10111 "00000000" // /* MW 1 */ + 10112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10113 "00000000" // /* MW 1 */ + 10114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10115 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first + 10116 "11111000" // VBCST.16 x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10117 "01110010" // /* MW 3 */ + 10118 "00000001" // /* MW 2 */ + 10119 "00011000" // /* MW 1 */ + 10120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10121 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first + 10122 "00001100" // NOPA; VST x0, [sp, #-64] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10123 "01100110" // /* MW 5 */ + 10124 "11111000" // /* MW 4 */ + 10125 "11111111" // /* MW 3 */ + 10126 "00101100" // /* MW 2 */ + 10127 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_320 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary_shared.h" 166 4 first +.src_ref 3 "elementwise_binary_shared.h" 166 31 first +.src_ref 3 "elementwise_binary_shared.h" 169 16 first + 10128 "10110110" // LDA r2, [p3, #-16]; VLDB x1, [p7], m1; MOVXM ls, #10240 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10129 "00010000" // /* MW 11 */ + 10130 "00000000" // /* MW 10 */ + 10131 "01111100" // /* MW 9 */ + 10132 "00001000" // /* MW 8 */ + 10133 "00000000" // /* MW 7 */ + 10134 "00000000" // /* MW 6 */ + 10135 "11101000" // /* MW 5 */ + 10136 "01010000" // /* MW 4 */ + 10137 "11011110" // /* MW 3 */ + 10138 "10001010" // /* MW 2 */ + 10139 "01111000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 166 4 +.src_ref 3 "elementwise_binary_shared.h" 166 31 +.src_ref 3 "elementwise_binary_shared.h" 171 16 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 10140 "10110110" // MOVA r3, #-5; VLDB x0, [p1], m2; MOVXM le, #10288 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10141 "00010000" // /* MW 11 */ + 10142 "00011000" // /* MW 10 */ + 10143 "10111100" // /* MW 9 */ + 10144 "00001001" // /* MW 8 */ + 10145 "00000000" // /* MW 7 */ + 10146 "00000000" // /* MW 6 */ + 10147 "01101000" // /* MW 5 */ + 10148 "10010000" // /* MW 4 */ + 10149 "00000010" // /* MW 3 */ + 10150 "01100011" // /* MW 2 */ + 10151 "11111111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary_shared.h" 169 16 first +.src_ref 3 "elementwise_binary_shared.h" 173 18 first +.src_ref 3 "elementwise_binary_shared.h" 177 44 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10152 "00010010" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;VLDB x1, [p7], m1; MOVX r0, #60 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10153 "11110001" // /* MW 7 */ + 10154 "00000000" // /* MW 6 */ + 10155 "11101000" // /* MW 5 */ + 10156 "01010000" // /* MW 4 */ + 10157 "01111110" // /* MW 3 */ + 10158 "00000101" // /* MW 2 */ + 10159 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 166 31 first +.src_ref 3 "elementwise_binary_shared.h" 171 16 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10160 "00111100" // LDA.s8 r4, [p4]; VLDB x0, [p1], m2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10161 "01101000" // /* MW 5 */ + 10162 "10010000" // /* MW 4 */ + 10163 "01010010" // /* MW 3 */ + 10164 "10010000" // /* MW 2 */ + 10165 "10000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10167 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary_shared.h" 173 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10168 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10169 "00101011" // /* MW 3 */ + 10170 "00001000" // /* MW 2 */ + 10171 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10173 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 166 31 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10174 "10011000" // LSHL r2, r2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10175 "00111101" // /* MW 3 */ + 10176 "10000100" // /* MW 2 */ + 10177 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 166 4 +.src_ref 3 "elementwise_binary_shared.h" 177 44 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10178 "01100010" // ADD.NC lc, r2, #-3; VMAC.f dm1, dm0, x1, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10179 "00000001" // /* MW 7 */ + 10180 "00000010" // /* MW 6 */ + 10181 "00000001" // /* MW 5 */ + 10182 "10000110" // /* MW 4 */ + 10183 "01111110" // /* MW 3 */ + 10184 "01110001" // /* MW 2 */ + 10185 "00000101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary_shared.h" 169 16 first +.src_ref 3 "elementwise_binary_shared.h" 171 16 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10186 "00111100" // VLDA x0, [p1], m2; VLDB x1, [p7], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10187 "11101000" // /* MW 5 */ + 10188 "01010000" // /* MW 4 */ + 10189 "01111110" // /* MW 3 */ + 10190 "00000011" // /* MW 2 */ + 10191 "00101001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary_shared.h" 173 18 first +.src_ref 3 "elementwise_binary_shared.h" 185 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10192 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; MOVX crRnd, r4; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10193 "00000000" // /* MW 15 */ + 10194 "00000000" // /* MW 14 */ + 10195 "01111000" // /* MW 13 */ + 10196 "10100101" // /* MW 12 */ + 10197 "00000001" // /* MW 11 */ + 10198 "00000000" // /* MW 10 */ + 10199 "11010100" // /* MW 9 */ + 10200 "00001001" // /* MW 8 */ + 10201 "01011011" // /* MW 7 */ + 10202 "00000001" // /* MW 6 */ + 10203 "00100000" // /* MW 5 */ + 10204 "00000000" // /* MW 4 */ + 10205 "01110000" // /* MW 3 */ + 10206 "00000101" // /* MW 2 */ + 10207 "00000001" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10208 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10209 "00000000" // /* MW 15 */ + 10210 "00000000" // /* MW 14 */ + 10211 "01111000" // /* MW 13 */ + 10212 "10100101" // /* MW 12 */ + 10213 "00000001" // /* MW 11 */ + 10214 "00000000" // /* MW 10 */ + 10215 "00000000" // /* MW 9 */ + 10216 "00000000" // /* MW 8 */ + 10217 "01011011" // /* MW 7 */ + 10218 "00000001" // /* MW 6 */ + 10219 "00100000" // /* MW 5 */ + 10220 "00000000" // /* MW 4 */ + 10221 "11110000" // /* MW 3 */ + 10222 "00101100" // /* MW 2 */ + 10223 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 177 44 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10224 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10225 "00010000" // /* MW 15 */ + 10226 "00001000" // /* MW 14 */ + 10227 "01111000" // /* MW 13 */ + 10228 "10100101" // /* MW 12 */ + 10229 "00000001" // /* MW 11 */ + 10230 "00000000" // /* MW 10 */ + 10231 "00000000" // /* MW 9 */ + 10232 "00000000" // /* MW 8 */ + 10233 "01011011" // /* MW 7 */ + 10234 "00000001" // /* MW 6 */ + 10235 "00100000" // /* MW 5 */ + 10236 "00000000" // /* MW 4 */ + 10237 "11110000" // /* MW 3 */ + 10238 "00101100" // /* MW 2 */ + 10239 "00000000" // /* MW 1 */ +.label ZLS_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_432 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary_shared.h" 169 16 first +.src_ref 3 "elementwise_binary_shared.h" 171 16 first +.begin_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 10240 "11100001" // VLDA x0, [p1], m2; VLDB x1, [p7], m1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10241 "00000000" // /* MW 15 */ + 10242 "00000000" // /* MW 14 */ + 10243 "01111000" // /* MW 13 */ + 10244 "10100101" // /* MW 12 */ + 10245 "00000001" // /* MW 11 */ + 10246 "00000000" // /* MW 10 */ + 10247 "00000000" // /* MW 9 */ + 10248 "00000000" // /* MW 8 */ + 10249 "01011011" // /* MW 7 */ + 10250 "00000001" // /* MW 6 */ + 10251 "11101000" // /* MW 5 */ + 10252 "01010000" // /* MW 4 */ + 10253 "01111110" // /* MW 3 */ + 10254 "00000011" // /* MW 2 */ + 10255 "00101001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary_shared.h" 173 18 first +.src_ref 3 "elementwise_binary_shared.h" 185 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10256 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10257 "00000000" // /* MW 15 */ + 10258 "00000000" // /* MW 14 */ + 10259 "01111000" // /* MW 13 */ + 10260 "10100101" // /* MW 12 */ + 10261 "00000001" // /* MW 11 */ + 10262 "00000000" // /* MW 10 */ + 10263 "00000000" // /* MW 9 */ + 10264 "00000000" // /* MW 8 */ + 10265 "10100011" // /* MW 7 */ + 10266 "00011100" // /* MW 6 */ + 10267 "00100010" // /* MW 5 */ + 10268 "00000000" // /* MW 4 */ + 10269 "01110000" // /* MW 3 */ + 10270 "00000101" // /* MW 2 */ + 10271 "00000001" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10272 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10273 "00000000" // /* MW 15 */ + 10274 "00000000" // /* MW 14 */ + 10275 "01111000" // /* MW 13 */ + 10276 "10100101" // /* MW 12 */ + 10277 "00000001" // /* MW 11 */ + 10278 "00000000" // /* MW 10 */ + 10279 "00000000" // /* MW 9 */ + 10280 "00000000" // /* MW 8 */ + 10281 "01011011" // /* MW 7 */ + 10282 "00000001" // /* MW 6 */ + 10283 "00100000" // /* MW 5 */ + 10284 "00000000" // /* MW 4 */ + 10285 "11110000" // /* MW 3 */ + 10286 "00101100" // /* MW 2 */ + 10287 "00000000" // /* MW 1 */ +.label ZLE_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_480 +.src_ref 3 "elementwise_binary_shared.h" 177 44 first +.end_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10288 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10289 "00010000" // /* MW 15 */ + 10290 "00001000" // /* MW 14 */ + 10291 "01111000" // /* MW 13 */ + 10292 "10100101" // /* MW 12 */ + 10293 "00000001" // /* MW 11 */ + 10294 "00000000" // /* MW 10 */ + 10295 "00000000" // /* MW 9 */ + 10296 "00000000" // /* MW 8 */ + 10297 "01011011" // /* MW 7 */ + 10298 "00000001" // /* MW 6 */ + 10299 "00100000" // /* MW 5 */ + 10300 "00000000" // /* MW 4 */ + 10301 "11110000" // /* MW 3 */ + 10302 "00101100" // /* MW 2 */ + 10303 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 187 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 10304 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10305 "00000001" // /* MW 5 */ + 10306 "00000000" // /* MW 4 */ + 10307 "00000000" // /* MW 3 */ + 10308 "11110000" // /* MW 2 */ + 10309 "11111111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary_shared.h" 185 18 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10310 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10311 "10100011" // /* MW 3 */ + 10312 "00011100" // /* MW 2 */ + 10313 "00001010" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 10314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10315 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 177 44 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 10316 "01001000" // VMAC.f dm1, dm0, x1, x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10317 "00000001" // /* MW 3 */ + 10318 "00000010" // /* MW 2 */ + 10319 "00000001" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 10320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10321 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 187 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 10322 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10323 "00000000" // /* MW 3 */ + 10324 "00101000" // /* MW 2 */ + 10325 "00010000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary_shared.h" 185 18 first +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10326 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10327 "10100011" // /* MW 3 */ + 10328 "00011100" // /* MW 2 */ + 10329 "00001010" // /* MW 1 */ +.delay_slot + 10330 "11111000" // MOV p7, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10331 "10100000" // /* MW 3 */ + 10332 "01100000" // /* MW 2 */ + 10333 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10335 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary_shared.h" 185 18 +.delay_slot + 10336 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10337 "10100011" // /* MW 3 */ + 10338 "00011100" // /* MW 2 */ + 10339 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE__end +.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_end0 + 10341 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E +.function run _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E +.src_ref 4 "vector.hpp" 538 13 +.src_ref 3 "elementwise_binary_shared.h" 237 first +.src_ref 3 "elementwise_binary_shared.h" 244 19 +.src_ref 3 "elementwise_binary_shared.h" 245 12 +.src_ref 3 "elementwise_binary_shared.h" 247 12 +.src_ref 3 "elementwise_binary_shared.h" 250 4 +.function_start + 10352 "10111010" // MOVA dj0, #12; MOVS p3, p2; MOV dc0, lr /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10353 "01110010" // /* MW 9 */ + 10354 "11110000" // /* MW 8 */ + 10355 "01100000" // /* MW 7 */ + 10356 "00000000" // /* MW 6 */ + 10357 "10001011" // /* MW 5 */ + 10358 "10001000" // /* MW 4 */ + 10359 "10000011" // /* MW 3 */ + 10360 "10000010" // /* MW 2 */ + 10361 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 538 13 first +.src_ref 3 "elementwise_binary_shared.h" 244 19 first +.src_ref 3 "elementwise_binary_shared.h" 245 12 +.src_ref 3 "elementwise_binary_shared.h" 247 12 + 10362 "11010100" // LDA.u8 r0, [p2, dj0]; MOV p2, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10363 "10000001" // /* MW 5 */ + 10364 "11000101" // /* MW 4 */ + 10365 "01010100" // /* MW 3 */ + 10366 "00000001" // /* MW 2 */ + 10367 "01000000" // /* MW 1 */ + 10368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10369 "00000000" // /* MW 1 */ + 10370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10371 "00000000" // /* MW 1 */ + 10372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10373 "00000000" // /* MW 1 */ + 10374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10375 "00000000" // /* MW 1 */ + 10376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10377 "00000000" // /* MW 1 */ + 10378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10379 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 244 12 +.src_ref 3 "elementwise_binary_shared.h" 244 35 + 10380 "10000100" // JZ r0, #10448 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10448 delay_slots=5 */ + 10381 "00000001" // /* MW 5 */ + 10382 "00000000" // /* MW 4 */ + 10383 "01101000" // /* MW 3 */ + 10384 "00010100" // /* MW 2 */ + 10385 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 237 +.delay_slot + 10386 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10387 "00000001" // /* MW 5 */ + 10388 "00000000" // /* MW 4 */ + 10389 "00000000" // /* MW 3 */ + 10390 "00001000" // /* MW 2 */ + 10391 "00000000" // /* MW 1 */ +.delay_slot + 10392 "11111000" // MOV r1, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10393 "11100000" // /* MW 3 */ + 10394 "01010101" // /* MW 2 */ + 10395 "00011000" // /* MW 1 */ +.delay_slot + 10396 "00011000" // ADD.NC p1, r1, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10397 "11100000" // /* MW 3 */ + 10398 "01100000" // /* MW 2 */ + 10399 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 538 13 +.src_ref 4 "vector_native_types.hpp" 374 137 first +.delay_slot + 10400 "00011000" // VST sfh, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10401 "00101011" // /* MW 3 */ + 10402 "00000111" // /* MW 2 */ + 10403 "00001001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10405 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 247 12 first +.no_stack_arguments + 10406 "00000100" // JL #9808 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9808 delay_slots=5 */ + 10407 "00000001" // /* MW 5 */ + 10408 "00000000" // /* MW 4 */ + 10409 "00101000" // /* MW 3 */ + 10410 "00010011" // /* MW 2 */ + 10411 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10413 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10415 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10417 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10419 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10420 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10421 "10000001" // /* MW 11 */ + 10422 "10101101" // /* MW 10 */ + 10423 "00000000" // /* MW 9 */ + 10424 "00000000" // /* MW 8 */ + 10425 "00000000" // /* MW 7 */ + 10426 "00000000" // /* MW 6 */ + 10427 "00100000" // /* MW 5 */ + 10428 "00000000" // /* MW 4 */ + 10429 "11110000" // /* MW 3 */ + 10430 "00101100" // /* MW 2 */ + 10431 "00000000" // /* MW 1 */ +.return_address + 10432 "10000100" // J #10480 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10480 delay_slots=5 */ + 10433 "00000000" // /* MW 5 */ + 10434 "00000000" // /* MW 4 */ + 10435 "01111000" // /* MW 3 */ + 10436 "00010100" // /* MW 2 */ + 10437 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10439 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10441 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10443 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10445 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10447 "00000000" // /* MW 1 */ +.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_96 +.src_ref 3 "elementwise_binary_shared.h" 245 12 first +.no_stack_arguments + 10448 "00000100" // JL #9808 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9808 delay_slots=5 */ + 10449 "00000001" // /* MW 5 */ + 10450 "00000000" // /* MW 4 */ + 10451 "00101000" // /* MW 3 */ + 10452 "00010011" // /* MW 2 */ + 10453 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 245 12 +.src_ref 3 "elementwise_binary_shared.h" 245 12 +.delay_slot + 10454 "00000010" // MOVS p0, p1; MOV p1, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10455 "01110000" // /* MW 7 */ + 10456 "01100000" // /* MW 6 */ + 10457 "10110000" // /* MW 5 */ + 10458 "00000000" // /* MW 4 */ + 10459 "01100000" // /* MW 3 */ + 10460 "10010001" // /* MW 2 */ + 10461 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10463 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10465 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10467 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10468 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10469 "10000001" // /* MW 11 */ + 10470 "10101101" // /* MW 10 */ + 10471 "00000000" // /* MW 9 */ + 10472 "00000000" // /* MW 8 */ + 10473 "00000000" // /* MW 7 */ + 10474 "00000000" // /* MW 6 */ + 10475 "00100000" // /* MW 5 */ + 10476 "00000000" // /* MW 4 */ + 10477 "11110000" // /* MW 3 */ + 10478 "00101100" // /* MW 2 */ + 10479 "00000000" // /* MW 1 */ +.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_128 +.src_ref 3 "elementwise_binary_shared.h" 250 4 +.return_address + 10480 "11111000" // MOV lr, dc0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10481 "10000000" // /* MW 3 */ + 10482 "01110001" // /* MW 2 */ + 10483 "00011111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 250 4 first + 10484 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10485 "00000000" // /* MW 3 */ + 10486 "00101000" // /* MW 2 */ + 10487 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 250 4 +.delay_slot + 10488 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10489 "00000001" // /* MW 5 */ + 10490 "00000000" // /* MW 4 */ + 10491 "00000000" // /* MW 3 */ + 10492 "11111000" // /* MW 2 */ + 10493 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10495 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10497 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10499 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_end0 + 10501 "00000000" // /* MW 1 */ +.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_mul1d_attribute_broadcasting _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 7 "superkernels.cpp" 205 first +.src_ref 7 "superkernels.cpp" 210 6 +.function_start + 10512 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10513 "10000000" // /* MW 5 */ + 10514 "11001000" // /* MW 4 */ + 10515 "11000110" // /* MW 3 */ + 10516 "00000111" // /* MW 2 */ + 10517 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 210 6 first + 10518 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10519 "11000001" // /* MW 5 */ + 10520 "10110101" // /* MW 4 */ + 10521 "11011000" // /* MW 3 */ + 10522 "11000010" // /* MW 2 */ + 10523 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 205 + 10524 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10525 "00000001" // /* MW 5 */ + 10526 "00000000" // /* MW 4 */ + 10527 "00000000" // /* MW 3 */ + 10528 "00001000" // /* MW 2 */ + 10529 "00000000" // /* MW 1 */ + 10530 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10531 "01110000" // /* MW 7 */ + 10532 "11010000" // /* MW 6 */ + 10533 "00001011" // /* MW 5 */ + 10534 "00000000" // /* MW 4 */ + 10535 "10110000" // /* MW 3 */ + 10536 "01100011" // /* MW 2 */ + 10537 "11111111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 207 11 + 10538 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10539 "00010001" // /* MW 9 */ + 10540 "00101000" // /* MW 8 */ + 10541 "00110010" // /* MW 7 */ + 10542 "11110011" // /* MW 6 */ + 10543 "00000001" // /* MW 5 */ + 10544 "00000000" // /* MW 4 */ + 10545 "10110000" // /* MW 3 */ + 10546 "10000010" // /* MW 2 */ + 10547 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 10548 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10549 "11000000" // /* MW 3 */ + 10550 "11010100" // /* MW 2 */ + 10551 "00011011" // /* MW 1 */ + 10552 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10553 "00000000" // /* MW 1 */ + 10554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10555 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 210 6 +.src_ref 7 "superkernels.cpp" 210 16 + 10556 "10000100" // JNZ r16, #10720 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10720 delay_slots=5 */ + 10557 "00000001" // /* MW 5 */ + 10558 "01000000" // /* MW 4 */ + 10559 "11110000" // /* MW 3 */ + 10560 "00010100" // /* MW 2 */ + 10561 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 207 22 first +.delay_slot + 10562 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10563 "10010000" // /* MW 3 */ + 10564 "01100010" // /* MW 2 */ + 10565 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 207 30 +.delay_slot + 10566 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10567 "11111011" // /* MW 3 */ + 10568 "01100011" // /* MW 2 */ + 10569 "00010100" // /* MW 1 */ +.delay_slot + 10570 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10571 "00111101" // /* MW 3 */ + 10572 "11110100" // /* MW 2 */ + 10573 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 207 11 +.src_ref 1 "io_buffer_main.h" 125 25 +.delay_slot + 10574 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10575 "01110000" // /* MW 7 */ + 10576 "01100000" // /* MW 6 */ + 10577 "00110000" // /* MW 5 */ + 10578 "00000011" // /* MW 4 */ + 10579 "00110000" // /* MW 3 */ + 10580 "11000110" // /* MW 2 */ + 10581 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 213 4 +.src_ref 7 "superkernels.cpp" 224 2 +.delay_slot + 10582 "01000100" // MOVXM p0, #509184 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10583 "00000000" // /* MW 5 */ + 10584 "11001010" // /* MW 4 */ + 10585 "11000000" // /* MW 3 */ + 10586 "00000111" // /* MW 2 */ + 10587 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 10588 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10589 "11010000" // /* MW 5 */ + 10590 "11001000" // /* MW 4 */ + 10591 "11000100" // /* MW 3 */ + 10592 "00000111" // /* MW 2 */ + 10593 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10594 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10595 "00010000" // /* MW 9 */ + 10596 "00110010" // /* MW 8 */ + 10597 "00110010" // /* MW 7 */ + 10598 "11110001" // /* MW 6 */ + 10599 "00000001" // /* MW 5 */ + 10600 "00000000" // /* MW 4 */ + 10601 "11100000" // /* MW 3 */ + 10602 "11000000" // /* MW 2 */ + 10603 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10605 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 213 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 10606 "00000100" // JL #9728 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9728 delay_slots=5 */ + 10607 "00000001" // /* MW 5 */ + 10608 "00000000" // /* MW 4 */ + 10609 "00000000" // /* MW 3 */ + 10610 "00010011" // /* MW 2 */ + 10611 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10613 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10615 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10616 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10617 "00110001" // /* MW 3 */ + 10618 "00100000" // /* MW 2 */ + 10619 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 10620 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10621 "00000101" // /* MW 3 */ + 10622 "00100000" // /* MW 2 */ + 10623 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 10624 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10625 "00000000" // /* MW 15 */ + 10626 "00000000" // /* MW 14 */ + 10627 "01111000" // /* MW 13 */ + 10628 "10100101" // /* MW 12 */ + 10629 "00000001" // /* MW 11 */ + 10630 "00000000" // /* MW 10 */ + 10631 "00000000" // /* MW 9 */ + 10632 "10000000" // /* MW 8 */ + 10633 "00010001" // /* MW 7 */ + 10634 "00000110" // /* MW 6 */ + 10635 "00100010" // /* MW 5 */ + 10636 "00000000" // /* MW 4 */ + 10637 "11110000" // /* MW 3 */ + 10638 "00101100" // /* MW 2 */ + 10639 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 217 18 +.return_address + 10640 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10641 "10100000" // /* MW 5 */ + 10642 "11001000" // /* MW 4 */ + 10643 "11000100" // /* MW 3 */ + 10644 "00000111" // /* MW 2 */ + 10645 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 217 18 first +.src_ref 7 "superkernels.cpp" 217 65 + 10646 "10111010" // LDA r16, [p2]; MOVXM p2, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10647 "00010000" // /* MW 9 */ + 10648 "10000000" // /* MW 8 */ + 10649 "00110010" // /* MW 7 */ + 10650 "11110001" // /* MW 6 */ + 10651 "00000001" // /* MW 5 */ + 10652 "00000000" // /* MW 4 */ + 10653 "11010000" // /* MW 3 */ + 10654 "11000010" // /* MW 2 */ + 10655 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 215 51 +.src_ref 7 "superkernels.cpp" 217 65 +.src_ref 7 "superkernels.cpp" 224 2 + 10656 "10111010" // LDA r17, [p2]; MOVXM p2, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10657 "00010000" // /* MW 9 */ + 10658 "10000000" // /* MW 8 */ + 10659 "00110010" // /* MW 7 */ + 10660 "11110001" // /* MW 6 */ + 10661 "00000001" // /* MW 5 */ + 10662 "00000000" // /* MW 4 */ + 10663 "11010000" // /* MW 3 */ + 10664 "11000110" // /* MW 2 */ + 10665 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 215 51 first +.src_ref 7 "superkernels.cpp" 217 16 +.src_ref 7 "superkernels.cpp" 222 47 + 10666 "10111010" // LDA.u16 r18, [p2, #10]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10667 "00010000" // /* MW 9 */ + 10668 "00101010" // /* MW 8 */ + 10669 "10110010" // /* MW 7 */ + 10670 "11110000" // /* MW 6 */ + 10671 "00000001" // /* MW 5 */ + 10672 "00000000" // /* MW 4 */ + 10673 "01010000" // /* MW 3 */ + 10674 "11001011" // /* MW 2 */ + 10675 "01001010" // /* MW 1 */ + 10676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10677 "00000000" // /* MW 1 */ + 10678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10679 "00000000" // /* MW 1 */ + 10680 "10000100" // J #10736 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10736 delay_slots=5 */ + 10681 "00000000" // /* MW 5 */ + 10682 "00000000" // /* MW 4 */ + 10683 "11111000" // /* MW 3 */ + 10684 "00010100" // /* MW 2 */ + 10685 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 215 13 +.delay_slot + 10686 "01000100" // MOVXM p0, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10687 "11000000" // /* MW 5 */ + 10688 "11001000" // /* MW 4 */ + 10689 "11000000" // /* MW 3 */ + 10690 "00000111" // /* MW 2 */ + 10691 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10693 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 217 27 first +.delay_slot + 10694 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10695 "00001111" // /* MW 3 */ + 10696 "01100001" // /* MW 2 */ + 10697 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 215 13 first +.delay_slot + 10698 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10699 "10100011" // /* MW 5 */ + 10700 "00001100" // /* MW 4 */ + 10701 "11110000" // /* MW 3 */ + 10702 "00101100" // /* MW 2 */ + 10703 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 217 16 first +.delay_slot + 10704 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10705 "00000000" // /* MW 15 */ + 10706 "00000000" // /* MW 14 */ + 10707 "01111000" // /* MW 13 */ + 10708 "10100101" // /* MW 12 */ + 10709 "00000001" // /* MW 11 */ + 10710 "00000000" // /* MW 10 */ + 10711 "00000000" // /* MW 9 */ + 10712 "10000000" // /* MW 8 */ + 10713 "00010001" // /* MW 7 */ + 10714 "00000110" // /* MW 6 */ + 10715 "00100001" // /* MW 5 */ + 10716 "00000000" // /* MW 4 */ + 10717 "11110000" // /* MW 3 */ + 10718 "00101100" // /* MW 2 */ + 10719 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 7 "superkernels.cpp" 222 47 +.src_ref 7 "superkernels.cpp" 224 2 + 10720 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10721 "00000000" // /* MW 15 */ + 10722 "00000000" // /* MW 14 */ + 10723 "00010000" // /* MW 13 */ + 10724 "00101010" // /* MW 12 */ + 10725 "10110010" // /* MW 11 */ + 10726 "11110000" // /* MW 10 */ + 10727 "00000001" // /* MW 9 */ + 10728 "00000000" // /* MW 8 */ + 10729 "10001011" // /* MW 7 */ + 10730 "10000000" // /* MW 6 */ + 10731 "00100010" // /* MW 5 */ + 10732 "00000000" // /* MW 4 */ + 10733 "11110000" // /* MW 3 */ + 10734 "00101100" // /* MW 2 */ + 10735 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 218 49 first + 10736 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10737 "00000000" // /* MW 7 */ + 10738 "11000011" // /* MW 6 */ + 10739 "10110011" // /* MW 5 */ + 10740 "00000011" // /* MW 4 */ + 10741 "01100000" // /* MW 3 */ + 10742 "10010001" // /* MW 2 */ + 10743 "01110011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 221 2 +.src_ref 1 "io_buffer_main.h" 218 49 + 10744 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10745 "00010000" // /* MW 9 */ + 10746 "00100000" // /* MW 8 */ + 10747 "00110010" // /* MW 7 */ + 10748 "11110000" // /* MW 6 */ + 10749 "00000001" // /* MW 5 */ + 10750 "00000000" // /* MW 4 */ + 10751 "11010000" // /* MW 3 */ + 10752 "11101110" // /* MW 2 */ + 10753 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 10754 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10755 "00010110" // /* MW 3 */ + 10756 "11111110" // /* MW 2 */ + 10757 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 10758 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10759 "00110110" // /* MW 3 */ + 10760 "11111110" // /* MW 2 */ + 10761 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 10762 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10763 "01010110" // /* MW 3 */ + 10764 "01000110" // /* MW 2 */ + 10765 "00000111" // /* MW 1 */ + 10766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10767 "00000000" // /* MW 1 */ + 10768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10769 "00000000" // /* MW 1 */ + 10770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10771 "00000000" // /* MW 1 */ + 10772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10773 "00000000" // /* MW 1 */ + 10774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10775 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 10776 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10777 "00000010" // /* MW 3 */ + 10778 "01100001" // /* MW 2 */ + 10779 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 10780 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10781 "00010001" // /* MW 3 */ + 10782 "00000110" // /* MW 2 */ + 10783 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 + 10784 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10785 "11111101" // /* MW 3 */ + 10786 "11100000" // /* MW 2 */ + 10787 "00010111" // /* MW 1 */ + 10788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10789 "00000000" // /* MW 1 */ + 10790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10791 "00000000" // /* MW 1 */ + 10792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10793 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 10794 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10795 "00001000" // /* MW 3 */ + 10796 "10010011" // /* MW 2 */ + 10797 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 222 45 + 10798 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10799 "10000001" // /* MW 5 */ + 10800 "10101101" // /* MW 4 */ + 10801 "10100111" // /* MW 3 */ + 10802 "00000000" // /* MW 2 */ + 10803 "00000100" // /* MW 1 */ + 10804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10805 "00000000" // /* MW 1 */ + 10806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10807 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 221 2 first + 10808 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10809 "00110110" // /* MW 3 */ + 10810 "00000110" // /* MW 2 */ + 10811 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 1 "io_buffer_main.h" 324 51 + 10812 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10813 "10000001" // /* MW 5 */ + 10814 "11011101" // /* MW 4 */ + 10815 "11011100" // /* MW 3 */ + 10816 "11001010" // /* MW 2 */ + 10817 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 222 47 first + 10818 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10819 "01110110" // /* MW 3 */ + 10820 "00000110" // /* MW 2 */ + 10821 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 10822 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10823 "10011110" // /* MW 3 */ + 10824 "01011100" // /* MW 2 */ + 10825 "00000111" // /* MW 1 */ + 10826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10827 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 224 2 first +.no_stack_arguments + 10828 "00000100" // JL #10352 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10352 delay_slots=5 */ + 10829 "00000001" // /* MW 5 */ + 10830 "00000000" // /* MW 4 */ + 10831 "00111000" // /* MW 3 */ + 10832 "00010100" // /* MW 2 */ + 10833 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10835 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 221 2 first +.delay_slot + 10836 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10837 "00000111" // /* MW 3 */ + 10838 "01100010" // /* MW 2 */ + 10839 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 221 2 +.delay_slot + 10840 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10841 "00110001" // /* MW 3 */ + 10842 "00000110" // /* MW 2 */ + 10843 "00001000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 222 45 first +.delay_slot + 10844 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10845 "00001101" // /* MW 3 */ + 10846 "11100001" // /* MW 2 */ + 10847 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 222 45 +.delay_slot + 10848 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10849 "00000000" // /* MW 15 */ + 10850 "00000000" // /* MW 14 */ + 10851 "10101000" // /* MW 13 */ + 10852 "10100000" // /* MW 12 */ + 10853 "00110100" // /* MW 11 */ + 10854 "00000000" // /* MW 10 */ + 10855 "00000000" // /* MW 9 */ + 10856 "00000000" // /* MW 8 */ + 10857 "01011011" // /* MW 7 */ + 10858 "00000001" // /* MW 6 */ + 10859 "00100000" // /* MW 5 */ + 10860 "00000000" // /* MW 4 */ + 10861 "11110000" // /* MW 3 */ + 10862 "00101100" // /* MW 2 */ + 10863 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 6 +.src_ref 7 "superkernels.cpp" 227 14 +.src_ref 1 "io_buffer_main.h" 324 51 first +.return_address + 10864 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10865 "00010000" // /* MW 9 */ + 10866 "00100000" // /* MW 8 */ + 10867 "00110010" // /* MW 7 */ + 10868 "11110011" // /* MW 6 */ + 10869 "00000001" // /* MW 5 */ + 10870 "00000000" // /* MW 4 */ + 10871 "11010000" // /* MW 3 */ + 10872 "11000110" // /* MW 2 */ + 10873 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 + 10874 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10875 "00000101" // /* MW 3 */ + 10876 "00100000" // /* MW 2 */ + 10877 "00010000" // /* MW 1 */ + 10878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10879 "00000000" // /* MW 1 */ + 10880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10881 "00000000" // /* MW 1 */ + 10882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10883 "00000000" // /* MW 1 */ + 10884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10885 "00000000" // /* MW 1 */ + 10886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10887 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 10888 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10889 "00001000" // /* MW 3 */ + 10890 "01010001" // /* MW 2 */ + 10891 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 19 +.src_ref 1 "io_buffer_main.h" 327 40 first + 10892 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10893 "00010000" // /* MW 9 */ + 10894 "00110000" // /* MW 8 */ + 10895 "00110010" // /* MW 7 */ + 10896 "11110001" // /* MW 6 */ + 10897 "00000001" // /* MW 5 */ + 10898 "00000000" // /* MW 4 */ + 10899 "11010000" // /* MW 3 */ + 10900 "11001110" // /* MW 2 */ + 10901 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 6 first + 10902 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10903 "00110110" // /* MW 3 */ + 10904 "00000110" // /* MW 2 */ + 10905 "00000110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 19 + 10906 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10907 "01010110" // /* MW 3 */ + 10908 "00000110" // /* MW 2 */ + 10909 "00000010" // /* MW 1 */ + 10910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10911 "00000000" // /* MW 1 */ + 10912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10913 "00000000" // /* MW 1 */ + 10914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10915 "00000000" // /* MW 1 */ + 10916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10917 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 10918 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10919 "00110001" // /* MW 3 */ + 10920 "00100001" // /* MW 2 */ + 10921 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 10922 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10923 "00010001" // /* MW 3 */ + 10924 "11100110" // /* MW 2 */ + 10925 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 16 first + 10926 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10927 "00101000" // /* MW 3 */ + 10928 "01100001" // /* MW 2 */ + 10929 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 6 + 10930 "10000100" // JNZ r16, #10960 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10960 delay_slots=5 */ + 10931 "00000001" // /* MW 5 */ + 10932 "01000000" // /* MW 4 */ + 10933 "01101000" // /* MW 3 */ + 10934 "00010101" // /* MW 2 */ + 10935 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10937 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10939 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10941 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10943 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10945 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 227 14 + 10946 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10947 "00000001" // /* MW 3 */ + 10948 "00100000" // /* MW 2 */ + 10949 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 227 14 first + 10950 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10951 "00000000" // /* MW 9 */ + 10952 "00000000" // /* MW 8 */ + 10953 "00000000" // /* MW 7 */ + 10954 "10000000" // /* MW 6 */ + 10955 "00010001" // /* MW 5 */ + 10956 "00000110" // /* MW 4 */ + 10957 "11110110" // /* MW 3 */ + 10958 "00101100" // /* MW 2 */ + 10959 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 7 "superkernels.cpp" 229 + 10960 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10961 "00111001" // /* MW 3 */ + 10962 "11110100" // /* MW 2 */ + 10963 "00000111" // /* MW 1 */ + 10964 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10965 "00011001" // /* MW 3 */ + 10966 "11111011" // /* MW 2 */ + 10967 "00000111" // /* MW 1 */ + 10968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10969 "00000000" // /* MW 1 */ + 10970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10971 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 10972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10973 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10974 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10975 "11110001" // /* MW 3 */ + 10976 "11111101" // /* MW 2 */ + 10977 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10979 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 229 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10980 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10981 "00000000" // /* MW 3 */ + 10982 "00101000" // /* MW 2 */ + 10983 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10984 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10985 "10100000" // /* MW 3 */ + 10986 "01100111" // /* MW 2 */ + 10987 "00011111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 229 +.delay_slot + 10988 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10989 "00000001" // /* MW 5 */ + 10990 "00000000" // /* MW 4 */ + 10991 "00000000" // /* MW 3 */ + 10992 "11111000" // /* MW 2 */ + 10993 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10995 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10997 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 10999 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.src_ref 3 "elementwise_binary.h" 141 first +.src_ref 3 "elementwise_binary.h" 142 23 +.src_ref 3 "elementwise_binary.h" 144 4 first +.function_start + 11008 "01100100" // RET lr; MOV r0, #64 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 11009 "00000001" // /* MW 5 */ + 11010 "00100001" // /* MW 4 */ + 11011 "00000000" // /* MW 3 */ + 11012 "00000000" // /* MW 2 */ + 11013 "00000101" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 141 +.delay_slot + 11014 "11111000" // MOV r1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11015 "11000000" // /* MW 3 */ + 11016 "01010000" // /* MW 2 */ + 11017 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 141 +.delay_slot + 11018 "00011000" // ADD.NC p0, r1, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11019 "10010000" // /* MW 3 */ + 11020 "01100000" // /* MW 2 */ + 11021 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 142 23 first +.delay_slot + 11022 "10011000" // ST r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11023 "00010001" // /* MW 3 */ + 11024 "00000100" // /* MW 2 */ + 11025 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 142 23 +.delay_slot + 11026 "10011000" // ST r0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11027 "00010001" // /* MW 3 */ + 11028 "00010100" // /* MW 2 */ + 11029 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_end0 + 11031 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.src_ref 3 "elementwise_binary.h" 130 first +.src_ref 3 "elementwise_binary.h" 133 24 first +.function_start + 11040 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11041 "00101110" // /* MW 3 */ + 11042 "00011100" // /* MW 2 */ + 11043 "00000001" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 130 + 11044 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11045 "00000001" // /* MW 5 */ + 11046 "00000000" // /* MW 4 */ + 11047 "00000000" // /* MW 3 */ + 11048 "00001000" // /* MW 2 */ + 11049 "00000000" // /* MW 1 */ + 11050 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11051 "00111101" // /* MW 3 */ + 11052 "11111100" // /* MW 2 */ + 11053 "00001111" // /* MW 1 */ + 11054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11055 "00000000" // /* MW 1 */ + 11056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11057 "00000000" // /* MW 1 */ + 11058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11059 "00000000" // /* MW 1 */ + 11060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11061 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 133 22 first + 11062 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11063 "00101001" // /* MW 3 */ + 11064 "00011100" // /* MW 2 */ + 11065 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 134 24 first + 11066 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11067 "00101110" // /* MW 3 */ + 11068 "00011100" // /* MW 2 */ + 11069 "00000001" // /* MW 1 */ + 11070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11071 "00000000" // /* MW 1 */ + 11072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11073 "00000000" // /* MW 1 */ + 11074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11075 "00000000" // /* MW 1 */ + 11076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11077 "00000000" // /* MW 1 */ + 11078 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11079 "00000000" // /* MW 1 */ + 11080 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11081 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 134 22 + 11082 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11083 "00101001" // /* MW 3 */ + 11084 "00011100" // /* MW 2 */ + 11085 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 135 24 first + 11086 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11087 "00101110" // /* MW 3 */ + 11088 "00000100" // /* MW 2 */ + 11089 "00000001" // /* MW 1 */ + 11090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11091 "00000000" // /* MW 1 */ + 11092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11093 "00000000" // /* MW 1 */ + 11094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11095 "00000000" // /* MW 1 */ + 11096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11097 "00000000" // /* MW 1 */ + 11098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11099 "00000000" // /* MW 1 */ + 11100 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11101 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 135 22 + 11102 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11103 "00101001" // /* MW 3 */ + 11104 "00011100" // /* MW 2 */ + 11105 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 136 24 first + 11106 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11107 "00101110" // /* MW 3 */ + 11108 "00010100" // /* MW 2 */ + 11109 "00000001" // /* MW 1 */ + 11110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11111 "00000000" // /* MW 1 */ + 11112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11113 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 137 8 first +.no_stack_arguments + 11114 "00000100" // JL #11008 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11008 delay_slots=5 */ + 11115 "00000001" // /* MW 5 */ + 11116 "00000000" // /* MW 4 */ + 11117 "10000000" // /* MW 3 */ + 11118 "00010101" // /* MW 2 */ + 11119 "00000000" // /* MW 1 */ +.delay_slot + 11120 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11121 "10011101" // /* MW 3 */ + 11122 "11111011" // /* MW 2 */ + 11123 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11125 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11127 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 136 22 first +.delay_slot + 11128 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11129 "00101001" // /* MW 3 */ + 11130 "11011100" // /* MW 2 */ + 11131 "00001000" // /* MW 1 */ +.src_ref 8 "mul_impl.h" 134 25 +.delay_slot + 11132 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11133 "11000000" // /* MW 3 */ + 11134 "01100000" // /* MW 2 */ + 11135 "00011111" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 +.return_address + 11136 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11137 "00111001" // /* MW 3 */ + 11138 "11111100" // /* MW 2 */ + 11139 "00000111" // /* MW 1 */ + 11140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11141 "00000000" // /* MW 1 */ + 11142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11143 "00000000" // /* MW 1 */ + 11144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11145 "00000000" // /* MW 1 */ + 11146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11147 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11149 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11150 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11151 "10011001" // /* MW 3 */ + 11152 "11111011" // /* MW 2 */ + 11153 "00000111" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11154 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11155 "00000000" // /* MW 3 */ + 11156 "00101000" // /* MW 2 */ + 11157 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11159 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11161 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11163 "00000000" // /* MW 1 */ +.src_ref 8 "mul_impl.h" 134 25 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11164 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11165 "00000001" // /* MW 3 */ + 11166 "00100000" // /* MW 2 */ + 11167 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 +.src_ref 8 "mul_impl.h" 134 25 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11168 "00111010" // ST r16, [p7, #16]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11169 "01110001" // /* MW 9 */ + 11170 "00000000" // /* MW 8 */ + 11171 "00000000" // /* MW 7 */ + 11172 "00000000" // /* MW 6 */ + 11173 "11111110" // /* MW 5 */ + 11174 "00111111" // /* MW 4 */ + 11175 "00110000" // /* MW 3 */ + 11176 "11000010" // /* MW 2 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + 11177 "11101000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.function run _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.src_ref 3 "elementwise_binary.h" 149 first +.src_ref 3 "elementwise_binary.h" 156 37 +.src_ref 3 "elementwise_binary.h" 168 8 first +.function_start + 11184 "10111010" // MOVA m0, #32; MOVXM ls, #11360 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11185 "00010000" // /* MW 9 */ + 11186 "00110000" // /* MW 8 */ + 11187 "01111110" // /* MW 7 */ + 11188 "00001000" // /* MW 6 */ + 11189 "00000000" // /* MW 5 */ + 11190 "00000000" // /* MW 4 */ + 11191 "10000000" // /* MW 3 */ + 11192 "00000000" // /* MW 2 */ + 11193 "00000100" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 156 37 first +.src_ref 3 "elementwise_binary.h" 168 8 first + 11194 "10111010" // LDA r3, [p3], m0; MOVXM le, #11376 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11195 "00010000" // /* MW 9 */ + 11196 "00111000" // /* MW 8 */ + 11197 "10111110" // /* MW 7 */ + 11198 "00001001" // /* MW 6 */ + 11199 "00000000" // /* MW 5 */ + 11200 "00000000" // /* MW 4 */ + 11201 "11010000" // /* MW 3 */ + 11202 "00001110" // /* MW 2 */ + 11203 "01100001" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 156 78 +.src_ref 3 "elementwise_binary.h" 156 78 + 11204 "10111010" // LDA m1, [p3]; MOVX r1, #-6; MOV r0, #828 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11205 "01011000" // /* MW 9 */ + 11206 "00111100" // /* MW 8 */ + 11207 "00001011" // /* MW 7 */ + 11208 "01001000" // /* MW 6 */ + 11209 "00010111" // /* MW 5 */ + 11210 "00111110" // /* MW 4 */ + 11211 "11010000" // /* MW 3 */ + 11212 "10010000" // /* MW 2 */ + 11213 "01100000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 156 78 +.src_ref 3 "elementwise_binary.h" 156 78 + 11214 "10111010" // LDA m0, [p3, #4]; MOVXM p4, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11215 "00010000" // /* MW 9 */ + 11216 "00110100" // /* MW 8 */ + 11217 "00110010" // /* MW 7 */ + 11218 "11110010" // /* MW 6 */ + 11219 "00000001" // /* MW 5 */ + 11220 "00000000" // /* MW 4 */ + 11221 "11010000" // /* MW 3 */ + 11222 "10000000" // /* MW 2 */ + 11223 "01100010" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 156 78 + 11224 "10011000" // LDA.s8 r2, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11225 "01000010" // /* MW 3 */ + 11226 "00000100" // /* MW 2 */ + 11227 "00000100" // /* MW 1 */ + 11228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11229 "00000000" // /* MW 1 */ + 11230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11231 "00000000" // /* MW 1 */ + 11232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11233 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 156 78 + 11234 "10011000" // LSHL r1, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11235 "00011101" // /* MW 3 */ + 11236 "11000010" // /* MW 2 */ + 11237 "00010000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary.h" 168 8 +.src_ref 3 "elementwise_binary.h" 187 20 first + 11238 "00110100" // VLDB x1, [p0], m1; ADD.NC lc, r1, #-7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11239 "11111001" // /* MW 5 */ + 11240 "11100001" // /* MW 4 */ + 11241 "10001010" // /* MW 3 */ + 11242 "00001110" // /* MW 2 */ + 11243 "00000101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary.h" 189 20 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11244 "00111100" // VLDA x2, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11245 "01101000" // /* MW 5 */ + 11246 "01010000" // /* MW 4 */ + 11247 "01110000" // /* MW 3 */ + 11248 "00010011" // /* MW 2 */ + 11249 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 195 20 +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11250 "00010010" // VLDA x3, [p1], m0; VLDB x1, [p0], m1; MOVX crRnd, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11251 "10000000" // /* MW 7 */ + 11252 "10111010" // /* MW 6 */ + 11253 "11101000" // /* MW 5 */ + 11254 "01010000" // /* MW 4 */ + 11255 "01110000" // /* MW 3 */ + 11256 "00011011" // /* MW 2 */ + 11257 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary.h" 189 20 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11258 "00111100" // VLDA x2, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11259 "01101000" // /* MW 5 */ + 11260 "01010000" // /* MW 4 */ + 11261 "01110000" // /* MW 3 */ + 11262 "00010011" // /* MW 2 */ + 11263 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11264 "00111100" // VLDA x3, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11265 "11101000" // /* MW 5 */ + 11266 "01010000" // /* MW 4 */ + 11267 "01110000" // /* MW 3 */ + 11268 "00011011" // /* MW 2 */ + 11269 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11270 "10011000" // VLDA x2, [p1], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11271 "10011011" // /* MW 3 */ + 11272 "00001000" // /* MW 2 */ + 11273 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11274 "00111100" // VLDA x3, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11275 "01101000" // /* MW 5 */ + 11276 "01010000" // /* MW 4 */ + 11277 "01110000" // /* MW 3 */ + 11278 "00011011" // /* MW 2 */ + 11279 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11280 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11281 "11101000" // /* MW 5 */ + 11282 "01010000" // /* MW 4 */ + 11283 "01110000" // /* MW 3 */ + 11284 "00010011" // /* MW 2 */ + 11285 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11286 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11287 "01000001" // /* MW 9 */ + 11288 "11100010" // /* MW 8 */ + 11289 "00000000" // /* MW 7 */ + 11290 "00011101" // /* MW 6 */ + 11291 "00110100" // /* MW 5 */ + 11292 "00101000" // /* MW 4 */ + 11293 "01110000" // /* MW 3 */ + 11294 "00011011" // /* MW 2 */ + 11295 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11296 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11297 "01100001" // /* MW 9 */ + 11298 "11100000" // /* MW 8 */ + 11299 "00000001" // /* MW 7 */ + 11300 "00011101" // /* MW 6 */ + 11301 "01110100" // /* MW 5 */ + 11302 "00101000" // /* MW 4 */ + 11303 "01110000" // /* MW 3 */ + 11304 "00010011" // /* MW 2 */ + 11305 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11306 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11307 "01000001" // /* MW 9 */ + 11308 "11100010" // /* MW 8 */ + 11309 "00000000" // /* MW 7 */ + 11310 "00011101" // /* MW 6 */ + 11311 "00110100" // /* MW 5 */ + 11312 "00101000" // /* MW 4 */ + 11313 "01110000" // /* MW 3 */ + 11314 "00011011" // /* MW 2 */ + 11315 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11316 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11317 "01100001" // /* MW 9 */ + 11318 "11100000" // /* MW 8 */ + 11319 "00000001" // /* MW 7 */ + 11320 "00011101" // /* MW 6 */ + 11321 "01110100" // /* MW 5 */ + 11322 "00101000" // /* MW 4 */ + 11323 "01110000" // /* MW 3 */ + 11324 "00010011" // /* MW 2 */ + 11325 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11326 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11327 "01000001" // /* MW 9 */ + 11328 "11100010" // /* MW 8 */ + 11329 "00000000" // /* MW 7 */ + 11330 "00011101" // /* MW 6 */ + 11331 "00110100" // /* MW 5 */ + 11332 "00101000" // /* MW 4 */ + 11333 "01110000" // /* MW 3 */ + 11334 "00011011" // /* MW 2 */ + 11335 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11336 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11337 "01100001" // /* MW 9 */ + 11338 "11100000" // /* MW 8 */ + 11339 "00000001" // /* MW 7 */ + 11340 "00011101" // /* MW 6 */ + 11341 "01110100" // /* MW 5 */ + 11342 "00101000" // /* MW 4 */ + 11343 "01110000" // /* MW 3 */ + 11344 "00010011" // /* MW 2 */ + 11345 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11346 "01101110" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; VMUL.f dm0, x1, x2, r0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 11347 "01000001" // /* MW 13 */ + 11348 "11100010" // /* MW 12 */ + 11349 "00000000" // /* MW 11 */ + 11350 "10001100" // /* MW 10 */ + 11351 "01110000" // /* MW 9 */ + 11352 "00001000" // /* MW 8 */ + 11353 "00000000" // /* MW 7 */ + 11354 "00000000" // /* MW 6 */ + 11355 "01101000" // /* MW 5 */ + 11356 "01010000" // /* MW 4 */ + 11357 "01110000" // /* MW 3 */ + 11358 "00011011" // /* MW 2 */ + 11359 "00100001" // /* MW 1 */ +.label ZLS_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_176 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.src_ref 3 "elementwise_binary.h" 195 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 11360 "00001011" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; VMUL.f dm1, x0, x3, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11361 "00000011" // /* MW 15 */ + 11362 "00001111" // /* MW 14 */ + 11363 "01111000" // /* MW 13 */ + 11364 "10100101" // /* MW 12 */ + 11365 "00000001" // /* MW 11 */ + 11366 "00000000" // /* MW 10 */ + 11367 "00000000" // /* MW 9 */ + 11368 "00000000" // /* MW 8 */ + 11369 "10100011" // /* MW 7 */ + 11370 "00011100" // /* MW 6 */ + 11371 "11101010" // /* MW 5 */ + 11372 "01010000" // /* MW 4 */ + 11373 "01110000" // /* MW 3 */ + 11374 "00010011" // /* MW 2 */ + 11375 "00100001" // /* MW 1 */ +.label ZLE_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_192 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11376 "00001011" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11377 "00010010" // /* MW 15 */ + 11378 "00000111" // /* MW 14 */ + 11379 "01111000" // /* MW 13 */ + 11380 "10100101" // /* MW 12 */ + 11381 "00000001" // /* MW 11 */ + 11382 "00000000" // /* MW 10 */ + 11383 "00000000" // /* MW 9 */ + 11384 "00000000" // /* MW 8 */ + 11385 "00100011" // /* MW 7 */ + 11386 "00011100" // /* MW 6 */ + 11387 "01101010" // /* MW 5 */ + 11388 "01010000" // /* MW 4 */ + 11389 "01110000" // /* MW 3 */ + 11390 "00011011" // /* MW 2 */ + 11391 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 11392 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11393 "01100001" // /* MW 7 */ + 11394 "11100000" // /* MW 6 */ + 11395 "00000001" // /* MW 5 */ + 11396 "00000010" // /* MW 4 */ + 11397 "01100000" // /* MW 3 */ + 11398 "10010100" // /* MW 2 */ + 11399 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11400 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11401 "01000001" // /* MW 7 */ + 11402 "11100010" // /* MW 6 */ + 11403 "00000000" // /* MW 5 */ + 11404 "00000010" // /* MW 4 */ + 11405 "01100000" // /* MW 3 */ + 11406 "10000100" // /* MW 2 */ + 11407 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11408 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11409 "01100001" // /* MW 7 */ + 11410 "11100000" // /* MW 6 */ + 11411 "00000001" // /* MW 5 */ + 11412 "00000010" // /* MW 4 */ + 11413 "01100000" // /* MW 3 */ + 11414 "10010100" // /* MW 2 */ + 11415 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11416 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11417 "01000001" // /* MW 7 */ + 11418 "11100010" // /* MW 6 */ + 11419 "00000000" // /* MW 5 */ + 11420 "00000010" // /* MW 4 */ + 11421 "01100000" // /* MW 3 */ + 11422 "10000100" // /* MW 2 */ + 11423 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11424 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11425 "01100001" // /* MW 7 */ + 11426 "11100000" // /* MW 6 */ + 11427 "00000001" // /* MW 5 */ + 11428 "00000010" // /* MW 4 */ + 11429 "01100000" // /* MW 3 */ + 11430 "10010100" // /* MW 2 */ + 11431 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11432 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11433 "01000001" // /* MW 7 */ + 11434 "11100010" // /* MW 6 */ + 11435 "00000000" // /* MW 5 */ + 11436 "00000010" // /* MW 4 */ + 11437 "01100000" // /* MW 3 */ + 11438 "10000100" // /* MW 2 */ + 11439 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11440 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11441 "01100001" // /* MW 7 */ + 11442 "11100000" // /* MW 6 */ + 11443 "00000001" // /* MW 5 */ + 11444 "00000010" // /* MW 4 */ + 11445 "01100000" // /* MW 3 */ + 11446 "10010100" // /* MW 2 */ + 11447 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11448 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11449 "00100011" // /* MW 3 */ + 11450 "00011100" // /* MW 2 */ + 11451 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 172 4 first +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11452 "01011100" // VST.CONV.bf16.fp32 cml1, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 11453 "00000000" // /* MW 5 */ + 11454 "01010000" // /* MW 4 */ + 11455 "01100000" // /* MW 3 */ + 11456 "10010100" // /* MW 2 */ + 11457 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11458 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11459 "00100011" // /* MW 3 */ + 11460 "00011100" // /* MW 2 */ + 11461 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 195 20 first +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11462 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11463 "10100011" // /* MW 3 */ + 11464 "00011100" // /* MW 2 */ + 11465 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.delay_slot + 11466 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11467 "00100011" // /* MW 3 */ + 11468 "00011100" // /* MW 2 */ + 11469 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 195 20 first +.delay_slot + 11470 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11471 "10100011" // /* MW 3 */ + 11472 "00011100" // /* MW 2 */ + 11473 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_end0 + 11475 "00000000" // /* MW 1 */ +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.function superkernel_mul1d _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.src_ref 7 "superkernels.cpp" 369 first +.src_ref 7 "superkernels.cpp" 374 6 +.function_start + 11488 "01000100" // MOVXM p4, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11489 "10000000" // /* MW 5 */ + 11490 "11001000" // /* MW 4 */ + 11491 "11001000" // /* MW 3 */ + 11492 "00000111" // /* MW 2 */ + 11493 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 374 6 first + 11494 "11010100" // LDA r16, [p4]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11495 "11000001" // /* MW 5 */ + 11496 "10110101" // /* MW 4 */ + 11497 "11011000" // /* MW 3 */ + 11498 "11000010" // /* MW 2 */ + 11499 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 369 + 11500 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11501 "00000001" // /* MW 5 */ + 11502 "00000000" // /* MW 4 */ + 11503 "00000000" // /* MW 3 */ + 11504 "00001000" // /* MW 2 */ + 11505 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 371 22 first +.src_ref 1 "io_buffer_main.h" 218 49 + 11506 "00111010" // ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11507 "01111001" // /* MW 9 */ + 11508 "01100000" // /* MW 8 */ + 11509 "11001010" // /* MW 7 */ + 11510 "10000001" // /* MW 6 */ + 11511 "00010100" // /* MW 5 */ + 11512 "00100011" // /* MW 4 */ + 11513 "10110000" // /* MW 3 */ + 11514 "00111010" // /* MW 2 */ + 11515 "11111111" // /* MW 1 */ + 11516 "00000010" // ST p0, [sp, #-20]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11517 "01110000" // /* MW 7 */ + 11518 "11010000" // /* MW 6 */ + 11519 "00001011" // /* MW 5 */ + 11520 "00000000" // /* MW 4 */ + 11521 "10110000" // /* MW 3 */ + 11522 "10000011" // /* MW 2 */ + 11523 "11111101" // /* MW 1 */ + 11524 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11525 "00010101" // /* MW 3 */ + 11526 "11111100" // /* MW 2 */ + 11527 "00001111" // /* MW 1 */ + 11528 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11529 "00111101" // /* MW 3 */ + 11530 "11110000" // /* MW 2 */ + 11531 "00001111" // /* MW 1 */ + 11532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11533 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 374 6 first +.src_ref 7 "superkernels.cpp" 374 16 first + 11534 "10000100" // JNZ r16, #11680 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11680 delay_slots=5 */ + 11535 "00000001" // /* MW 5 */ + 11536 "01000000" // /* MW 4 */ + 11537 "11010000" // /* MW 3 */ + 11538 "00010110" // /* MW 2 */ + 11539 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 371 30 first +.delay_slot + 11540 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11541 "11111011" // /* MW 3 */ + 11542 "01100011" // /* MW 2 */ + 11543 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 371 11 +.delay_slot + 11544 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11545 "10100000" // /* MW 5 */ + 11546 "11001000" // /* MW 4 */ + 11547 "11000100" // /* MW 3 */ + 11548 "00000111" // /* MW 2 */ + 11549 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 371 11 +.delay_slot + 11550 "00000010" // ST r17, [p2]; MOV p2, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11551 "01110000" // /* MW 7 */ + 11552 "01100000" // /* MW 6 */ + 11553 "00110111" // /* MW 5 */ + 11554 "00000001" // /* MW 4 */ + 11555 "00110000" // /* MW 3 */ + 11556 "11000110" // /* MW 2 */ + 11557 "01000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 +.delay_slot + 11558 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11559 "11000000" // /* MW 3 */ + 11560 "11010110" // /* MW 2 */ + 11561 "00011011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 377 4 +.src_ref 7 "superkernels.cpp" 379 28 +.src_ref 7 "superkernels.cpp" 381 42 +.src_ref 7 "superkernels.cpp" 393 2 +.delay_slot + 11562 "00111010" // ST p2, [sp, #-12]; MOVXM p7, #509248 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11563 "00010001" // /* MW 9 */ + 11564 "10100000" // /* MW 8 */ + 11565 "10110010" // /* MW 7 */ + 11566 "11110011" // /* MW 6 */ + 11567 "00000001" // /* MW 5 */ + 11568 "00000000" // /* MW 4 */ + 11569 "10110000" // /* MW 3 */ + 11570 "10100011" // /* MW 2 */ + 11571 "11111110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 377 4 +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11572 "00111010" // MOVS p0, p7; MOVXM p2, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11573 "00010001" // /* MW 9 */ + 11574 "00110100" // /* MW 8 */ + 11575 "00110010" // /* MW 7 */ + 11576 "11110001" // /* MW 6 */ + 11577 "00000001" // /* MW 5 */ + 11578 "00000000" // /* MW 4 */ + 11579 "01100000" // /* MW 3 */ + 11580 "10010001" // /* MW 2 */ + 11581 "00010011" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11582 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11583 "00010000" // /* MW 9 */ + 11584 "00110010" // /* MW 8 */ + 11585 "00110010" // /* MW 7 */ + 11586 "11110001" // /* MW 6 */ + 11587 "00000001" // /* MW 5 */ + 11588 "00000000" // /* MW 4 */ + 11589 "11100000" // /* MW 3 */ + 11590 "11000000" // /* MW 2 */ + 11591 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11593 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 377 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 11594 "00000100" // JL #11040 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11040 delay_slots=5 */ + 11595 "00000001" // /* MW 5 */ + 11596 "00000000" // /* MW 4 */ + 11597 "10010000" // /* MW 3 */ + 11598 "00010101" // /* MW 2 */ + 11599 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11601 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11603 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11604 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11605 "00110001" // /* MW 3 */ + 11606 "00100000" // /* MW 2 */ + 11607 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 11608 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11609 "00000101" // /* MW 3 */ + 11610 "00100000" // /* MW 2 */ + 11611 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 11612 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11613 "00010001" // /* MW 3 */ + 11614 "00000110" // /* MW 2 */ + 11615 "00001010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 381 18 +.src_ref 7 "superkernels.cpp" 381 42 first +.return_address + 11616 "10111010" // LDA r16, [p7]; MOVXM p1, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11617 "00010000" // /* MW 9 */ + 11618 "00101000" // /* MW 8 */ + 11619 "10110010" // /* MW 7 */ + 11620 "11110000" // /* MW 6 */ + 11621 "00000001" // /* MW 5 */ + 11622 "00000000" // /* MW 4 */ + 11623 "11010000" // /* MW 3 */ + 11624 "11000010" // /* MW 2 */ + 11625 "11100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 381 16 +.src_ref 7 "superkernels.cpp" 381 18 +.src_ref 7 "superkernels.cpp" 390 48 + 11626 "10111010" // LDA r17, [p1]; MOVXM p3, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11627 "00010000" // /* MW 9 */ + 11628 "00101010" // /* MW 8 */ + 11629 "10110010" // /* MW 7 */ + 11630 "11110001" // /* MW 6 */ + 11631 "00000001" // /* MW 5 */ + 11632 "00000000" // /* MW 4 */ + 11633 "11010000" // /* MW 3 */ + 11634 "11000110" // /* MW 2 */ + 11635 "00100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 379 28 first +.src_ref 7 "superkernels.cpp" 382 16 +.src_ref 7 "superkernels.cpp" 391 48 + 11636 "10111010" // LDA.u16 r18, [p7, #10]; MOVXM p1, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11637 "00010000" // /* MW 9 */ + 11638 "00101110" // /* MW 8 */ + 11639 "10110010" // /* MW 7 */ + 11640 "11110000" // /* MW 6 */ + 11641 "00000001" // /* MW 5 */ + 11642 "00000000" // /* MW 4 */ + 11643 "01010000" // /* MW 3 */ + 11644 "11001011" // /* MW 2 */ + 11645 "11101010" // /* MW 1 */ + 11646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11647 "00000000" // /* MW 1 */ + 11648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11649 "00000000" // /* MW 1 */ + 11650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11651 "00000000" // /* MW 1 */ + 11652 "10000100" // J #11696 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11696 delay_slots=5 */ + 11653 "00000000" // /* MW 5 */ + 11654 "00000000" // /* MW 4 */ + 11655 "11011000" // /* MW 3 */ + 11656 "00010110" // /* MW 2 */ + 11657 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 379 13 +.delay_slot + 11658 "01000100" // MOVXM p2, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11659 "11000000" // /* MW 5 */ + 11660 "11001000" // /* MW 4 */ + 11661 "11000100" // /* MW 3 */ + 11662 "00000111" // /* MW 2 */ + 11663 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 381 27 first +.delay_slot + 11664 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11665 "00001111" // /* MW 3 */ + 11666 "01100001" // /* MW 2 */ + 11667 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 379 13 first +.delay_slot + 11668 "10011000" // ST r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11669 "01010001" // /* MW 3 */ + 11670 "00000110" // /* MW 2 */ + 11671 "00001010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 381 16 first +.delay_slot + 11672 "10011000" // ST r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11673 "00010001" // /* MW 3 */ + 11674 "00000110" // /* MW 2 */ + 11675 "00001011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 382 16 first +.delay_slot + 11676 "10011000" // ST r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11677 "00010001" // /* MW 3 */ + 11678 "00000110" // /* MW 2 */ + 11679 "00001001" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 +.src_ref 7 "superkernels.cpp" 390 48 + 11680 "01000100" // MOVXM p3, #509012 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11681 "10101000" // /* MW 5 */ + 11682 "11001000" // /* MW 4 */ + 11683 "11000110" // /* MW 3 */ + 11684 "00000111" // /* MW 2 */ + 11685 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 391 48 + 11686 "10111010" // NOPA; MOVXM p1, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11687 "00010000" // /* MW 9 */ + 11688 "00101110" // /* MW 8 */ + 11689 "10110010" // /* MW 7 */ + 11690 "11110000" // /* MW 6 */ + 11691 "00000001" // /* MW 5 */ + 11692 "00000000" // /* MW 4 */ + 11693 "11110000" // /* MW 3 */ + 11694 "00101100" // /* MW 2 */ + 11695 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 +.src_ref 1 "io_buffer_main.h" 218 49 first + 11696 "00011000" // ADD.NC p0, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11697 "10000110" // /* MW 3 */ + 11698 "01100111" // /* MW 2 */ + 11699 "00011000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 385 2 +.src_ref 1 "io_buffer_main.h" 218 49 + 11700 "10111010" // LDA r27, [p0], #-4; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11701 "00010000" // /* MW 9 */ + 11702 "00100000" // /* MW 8 */ + 11703 "00110010" // /* MW 7 */ + 11704 "11110001" // /* MW 6 */ + 11705 "00000001" // /* MW 5 */ + 11706 "00000000" // /* MW 4 */ + 11707 "11010000" // /* MW 3 */ + 11708 "11101110" // /* MW 2 */ + 11709 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 11710 "10011000" // LDA r16, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11711 "00010110" // /* MW 3 */ + 11712 "11111110" // /* MW 2 */ + 11713 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 11714 "10011000" // LDA r17, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11715 "00110110" // /* MW 3 */ + 11716 "11111110" // /* MW 2 */ + 11717 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 385 2 first + 11718 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11719 "01010110" // /* MW 3 */ + 11720 "00000110" // /* MW 2 */ + 11721 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 11722 "10011000" // LDA r19, [p0, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11723 "01110110" // /* MW 3 */ + 11724 "01000110" // /* MW 2 */ + 11725 "00000000" // /* MW 1 */ + 11726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11727 "00000000" // /* MW 1 */ + 11728 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11729 "00000000" // /* MW 1 */ + 11730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11731 "00000000" // /* MW 1 */ + 11732 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11733 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 11734 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11735 "00000010" // /* MW 3 */ + 11736 "01100001" // /* MW 2 */ + 11737 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 385 2 first +.src_ref 1 "io_buffer_main.h" 218 20 + 11738 "01011100" // ST r16, [p0]; ADD r16, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11739 "00001110" // /* MW 5 */ + 11740 "01000000" // /* MW 4 */ + 11741 "00111001" // /* MW 3 */ + 11742 "11000010" // /* MW 2 */ + 11743 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 385 2 + 11744 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11745 "00010001" // /* MW 3 */ + 11746 "00000110" // /* MW 2 */ + 11747 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 +.src_ref 1 "io_buffer_main.h" 395 8 + 11748 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11749 "11111101" // /* MW 3 */ + 11750 "11100000" // /* MW 2 */ + 11751 "00010111" // /* MW 1 */ + 11752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11753 "00000000" // /* MW 1 */ + 11754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11755 "00000000" // /* MW 1 */ + 11756 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11757 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 11758 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11759 "00001000" // /* MW 3 */ + 11760 "11010011" // /* MW 2 */ + 11761 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 first + 11762 "00011000" // ADD.NC p2, r14, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11763 "00000110" // /* MW 3 */ + 11764 "01100111" // /* MW 2 */ + 11765 "00011010" // /* MW 1 */ + 11766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11767 "00000000" // /* MW 1 */ + 11768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11769 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 11770 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11771 "01110110" // /* MW 3 */ + 11772 "11111111" // /* MW 2 */ + 11773 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 11774 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11775 "00110110" // /* MW 3 */ + 11776 "11111110" // /* MW 2 */ + 11777 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 11778 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11779 "01010110" // /* MW 3 */ + 11780 "11111110" // /* MW 2 */ + 11781 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 47 first + 11782 "10011000" // LDA r19, [p2, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11783 "01110110" // /* MW 3 */ + 11784 "01010110" // /* MW 2 */ + 11785 "00000010" // /* MW 1 */ + 11786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11787 "00000000" // /* MW 1 */ + 11788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11789 "00000000" // /* MW 1 */ + 11790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11791 "00000000" // /* MW 1 */ + 11792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11793 "00000000" // /* MW 1 */ + 11794 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11795 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 11796 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11797 "00010010" // /* MW 3 */ + 11798 "10100011" // /* MW 2 */ + 11799 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 11800 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11801 "00110001" // /* MW 3 */ + 11802 "00000110" // /* MW 2 */ + 11803 "00001010" // /* MW 1 */ + 11804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11805 "00000000" // /* MW 1 */ + 11806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11807 "00000000" // /* MW 1 */ + 11808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11809 "00000000" // /* MW 1 */ + 11810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11811 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 11812 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11813 "00001000" // /* MW 3 */ + 11814 "11010011" // /* MW 2 */ + 11815 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 390 46 +.src_ref 7 "superkernels.cpp" 391 46 +.src_ref 1 "io_buffer_main.h" 324 32 + 11816 "00111010" // MOVS p6, p2; MOVX r16, #1; MOV r14, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11817 "01111001" // /* MW 9 */ + 11818 "01100000" // /* MW 8 */ + 11819 "11001110" // /* MW 7 */ + 11820 "00101001" // /* MW 6 */ + 11821 "00000000" // /* MW 5 */ + 11822 "00000001" // /* MW 4 */ + 11823 "01100000" // /* MW 3 */ + 11824 "00010001" // /* MW 2 */ + 11825 "11010001" // /* MW 1 */ + 11826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11827 "00000000" // /* MW 1 */ + 11828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11829 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 + 11830 "00011000" // LDA p4, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11831 "00011001" // /* MW 3 */ + 11832 "11101110" // /* MW 2 */ + 11833 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 390 48 first + 11834 "00001100" // LDA r17, [p3]; ST p0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11835 "00111011" // /* MW 5 */ + 11836 "11011000" // /* MW 4 */ + 11837 "11011111" // /* MW 3 */ + 11838 "11000110" // /* MW 2 */ + 11839 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 391 48 first +.src_ref 7 "superkernels.cpp" 393 2 + 11840 "11010100" // LDA r20, [p1]; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11841 "10000001" // /* MW 5 */ + 11842 "11011101" // /* MW 4 */ + 11843 "11010110" // /* MW 3 */ + 11844 "11010010" // /* MW 2 */ + 11845 "00100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 11846 "10011000" // LDA r18, [p2], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11847 "01010110" // /* MW 3 */ + 11848 "01001110" // /* MW 2 */ + 11849 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 11850 "10011000" // LDA p2, [p0], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11851 "00011110" // /* MW 3 */ + 11852 "01011101" // /* MW 2 */ + 11853 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 +.src_ref 1 "io_buffer_main.h" 327 40 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11854 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11855 "11000000" // /* MW 3 */ + 11856 "01100000" // /* MW 2 */ + 11857 "00011111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11859 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11860 "10011000" // LDA r19, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11861 "01110110" // /* MW 3 */ + 11862 "00000110" // /* MW 2 */ + 11863 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11865 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 393 2 first +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 11866 "00000100" // JL #11184 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11184 delay_slots=5 */ + 11867 "00000001" // /* MW 5 */ + 11868 "00000000" // /* MW 4 */ + 11869 "11011000" // /* MW 3 */ + 11870 "00010101" // /* MW 2 */ + 11871 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 +.src_ref 1 "io_buffer_main.h" 327 40 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11872 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11873 "11000000" // /* MW 3 */ + 11874 "11010100" // /* MW 2 */ + 11875 "00011011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 390 46 first +.delay_slot + 11876 "10011000" // LSHL r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11877 "00001101" // /* MW 3 */ + 11878 "01100011" // /* MW 2 */ + 11879 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 391 46 first +.delay_slot + 11880 "10011000" // LSHL r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11881 "00001101" // /* MW 3 */ + 11882 "00100001" // /* MW 2 */ + 11883 "00010101" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 391 46 +.delay_slot + 11884 "01011000" // ADD.NC p1, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11885 "01000001" // /* MW 3 */ + 11886 "01101001" // /* MW 2 */ + 11887 "00011001" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 390 46 first +.delay_slot + 11888 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11889 "00000000" // /* MW 15 */ + 11890 "00000000" // /* MW 14 */ + 11891 "10101000" // /* MW 13 */ + 11892 "11100010" // /* MW 12 */ + 11893 "00110100" // /* MW 11 */ + 11894 "00000000" // /* MW 10 */ + 11895 "00000000" // /* MW 9 */ + 11896 "00000000" // /* MW 8 */ + 11897 "01011011" // /* MW 7 */ + 11898 "00000001" // /* MW 6 */ + 11899 "00100000" // /* MW 5 */ + 11900 "00000000" // /* MW 4 */ + 11901 "11110000" // /* MW 3 */ + 11902 "00101100" // /* MW 2 */ + 11903 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 32 first +.src_ref 1 "io_buffer_main.h" 327 28 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 40 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 +.return_address + 11904 "10111010" // LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11905 "01111000" // /* MW 9 */ + 11906 "11010000" // /* MW 8 */ + 11907 "10110011" // /* MW 7 */ + 11908 "00101000" // /* MW 6 */ + 11909 "00000000" // /* MW 5 */ + 11910 "00000001" // /* MW 4 */ + 11911 "11010000" // /* MW 3 */ + 11912 "11000110" // /* MW 2 */ + 11913 "11001000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 19 + 11914 "01000100" // MOVXM p6, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11915 "11000000" // /* MW 5 */ + 11916 "11001000" // /* MW 4 */ + 11917 "11001100" // /* MW 3 */ + 11918 "00000111" // /* MW 2 */ + 11919 "00000000" // /* MW 1 */ + 11920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11921 "00000000" // /* MW 1 */ + 11922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11923 "00000000" // /* MW 1 */ + 11924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11925 "00000000" // /* MW 1 */ + 11926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11927 "00000000" // /* MW 1 */ + 11928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11929 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 11930 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11931 "00001000" // /* MW 3 */ + 11932 "01010001" // /* MW 2 */ + 11933 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 40 first + 11934 "10011000" // LDA r17, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11935 "00110110" // /* MW 3 */ + 11936 "11110110" // /* MW 2 */ + 11937 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 + 11938 "00011000" // LDA p2, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11939 "00011001" // /* MW 3 */ + 11940 "11101101" // /* MW 2 */ + 11941 "00000111" // /* MW 1 */ + 11942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11943 "00000000" // /* MW 1 */ + 11944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11945 "00000000" // /* MW 1 */ + 11946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11947 "00000000" // /* MW 1 */ + 11948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11949 "00000000" // /* MW 1 */ + 11950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11951 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 + 11952 "10011000" // SUB r17, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11953 "00010001" // /* MW 3 */ + 11954 "00100011" // /* MW 2 */ + 11955 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 first +.src_ref 1 "io_buffer_main.h" 327 28 + 11956 "00001100" // LDA r17, [p2, #20]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11957 "01100011" // /* MW 5 */ + 11958 "11101100" // /* MW 4 */ + 11959 "11010011" // /* MW 3 */ + 11960 "11000110" // /* MW 2 */ + 11961 "01001010" // /* MW 1 */ + 11962 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11963 "00000000" // /* MW 1 */ + 11964 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11965 "00000000" // /* MW 1 */ + 11966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11967 "00000000" // /* MW 1 */ + 11968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11969 "00000000" // /* MW 1 */ + 11970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11971 "00000000" // /* MW 1 */ + 11972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11973 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 11974 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11975 "00001000" // /* MW 3 */ + 11976 "01010001" // /* MW 2 */ + 11977 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 6 +.src_ref 7 "superkernels.cpp" 398 14 +.src_ref 1 "io_buffer_main.h" 327 40 first + 11978 "10111010" // LDA r19, [p7, #-8]; MOVXM p1, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11979 "00010000" // /* MW 9 */ + 11980 "00100000" // /* MW 8 */ + 11981 "10110010" // /* MW 7 */ + 11982 "11110000" // /* MW 6 */ + 11983 "00000001" // /* MW 5 */ + 11984 "00000000" // /* MW 4 */ + 11985 "11010000" // /* MW 3 */ + 11986 "11001110" // /* MW 2 */ + 11987 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 19 first + 11988 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11989 "01010110" // /* MW 3 */ + 11990 "00000110" // /* MW 2 */ + 11991 "00000110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 6 + 11992 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11993 "00110110" // /* MW 3 */ + 11994 "00000110" // /* MW 2 */ + 11995 "00000001" // /* MW 1 */ + 11996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11997 "00000000" // /* MW 1 */ + 11998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11999 "00000000" // /* MW 1 */ + 12000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12001 "00000000" // /* MW 1 */ + 12002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12003 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 12004 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12005 "00110001" // /* MW 3 */ + 12006 "00100001" // /* MW 2 */ + 12007 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 12008 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12009 "00010001" // /* MW 3 */ + 12010 "11100110" // /* MW 2 */ + 12011 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 16 first + 12012 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12013 "00101000" // /* MW 3 */ + 12014 "01100001" // /* MW 2 */ + 12015 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 6 + 12016 "10000100" // JNZ r16, #12048 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12048 delay_slots=5 */ + 12017 "00000001" // /* MW 5 */ + 12018 "01000000" // /* MW 4 */ + 12019 "10001000" // /* MW 3 */ + 12020 "00010111" // /* MW 2 */ + 12021 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12023 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12025 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12027 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12029 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12031 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 398 14 + 12032 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12033 "00000001" // /* MW 3 */ + 12034 "00100000" // /* MW 2 */ + 12035 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 398 14 first + 12036 "00110110" // NOPA; NOPB; ST r16, [p1]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12037 "11000001" // /* MW 11 */ + 12038 "00001000" // /* MW 10 */ + 12039 "10000011" // /* MW 9 */ + 12040 "00000000" // /* MW 8 */ + 12041 "00000000" // /* MW 7 */ + 12042 "00000000" // /* MW 6 */ + 12043 "00100000" // /* MW 5 */ + 12044 "00000000" // /* MW 4 */ + 12045 "11110000" // /* MW 3 */ + 12046 "00101100" // /* MW 2 */ + 12047 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 +.src_ref 7 "superkernels.cpp" 400 + 12048 "00011000" // LDA lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12049 "00111001" // /* MW 3 */ + 12050 "11110000" // /* MW 2 */ + 12051 "00000111" // /* MW 1 */ + 12052 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12053 "11110001" // /* MW 3 */ + 12054 "11111101" // /* MW 2 */ + 12055 "00000111" // /* MW 1 */ + 12056 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12057 "10011001" // /* MW 3 */ + 12058 "11110111" // /* MW 2 */ + 12059 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 12060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12061 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.noswbrkpt + 12062 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12063 "11010001" // /* MW 3 */ + 12064 "11111001" // /* MW 2 */ + 12065 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 12066 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12067 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 12068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12069 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 400 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 12070 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12071 "00000000" // /* MW 3 */ + 12072 "00101000" // /* MW 2 */ + 12073 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12074 "00011000" // MOVS p6, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12075 "00001011" // /* MW 3 */ + 12076 "10001110" // /* MW 2 */ + 12077 "00001110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 400 +.delay_slot + 12078 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12079 "00000001" // /* MW 5 */ + 12080 "00000000" // /* MW 4 */ + 12081 "00000000" // /* MW 3 */ + 12082 "11111000" // /* MW 2 */ + 12083 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12085 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12087 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 + 12089 "00000000" // /* MW 1 */ +.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh +.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_begin0 +.function setup_conv2d_dw_params_bf16 _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh +.src_ref 2 "conv2d_dw_bf16_params.h" 211 first +.src_ref 2 "conv2d_dw_bf16_params.h" 215 15 +.src_ref 2 "conv2d_dw_bf16_params.h" 215 17 first +.function_start + 12096 "10111010" // LDA el0, [p0], #4; MOVXM p1, #509824 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12097 "00010000" // /* MW 9 */ + 12098 "11000000" // /* MW 8 */ + 12099 "10110011" // /* MW 7 */ + 12100 "11110000" // /* MW 6 */ + 12101 "00000001" // /* MW 5 */ + 12102 "00000000" // /* MW 4 */ + 12103 "11010000" // /* MW 3 */ + 12104 "10000101" // /* MW 2 */ + 12105 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 17 first +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.src_ref 2 "conv2d_dw_bf16_params.h" 218 80 + 12106 "10111010" // LDA eh0, [p0], #4; MOVX r16, #2; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12107 "01011000" // /* MW 9 */ + 12108 "00000000" // /* MW 8 */ + 12109 "00001000" // /* MW 7 */ + 12110 "01001011" // /* MW 6 */ + 12111 "00000000" // /* MW 5 */ + 12112 "00000001" // /* MW 4 */ + 12113 "11010000" // /* MW 3 */ + 12114 "10000001" // /* MW 2 */ + 12115 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 211 + 12116 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12117 "00000001" // /* MW 5 */ + 12118 "00000000" // /* MW 4 */ + 12119 "00000000" // /* MW 3 */ + 12120 "00001000" // /* MW 2 */ + 12121 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 32 + 12122 "00111010" // ST p7, [sp, #-12]; MOVXM p7, #509824 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12123 "00010001" // /* MW 9 */ + 12124 "11000000" // /* MW 8 */ + 12125 "10110011" // /* MW 7 */ + 12126 "11110011" // /* MW 6 */ + 12127 "00000001" // /* MW 5 */ + 12128 "00000000" // /* MW 4 */ + 12129 "10110000" // /* MW 3 */ + 12130 "11110011" // /* MW 2 */ + 12131 "11111110" // /* MW 1 */ + 12132 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12133 "00111101" // /* MW 3 */ + 12134 "11111100" // /* MW 2 */ + 12135 "00001111" // /* MW 1 */ + 12136 "10011000" // ST r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12137 "11110101" // /* MW 3 */ + 12138 "11111001" // /* MW 2 */ + 12139 "00001111" // /* MW 1 */ + 12140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12141 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 15 + 12142 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12143 "00101001" // /* MW 3 */ + 12144 "00011100" // /* MW 2 */ + 12145 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 15 + 12146 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12147 "00001001" // /* MW 3 */ + 12148 "00011100" // /* MW 2 */ + 12149 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 17 + 12150 "10011000" // LDA el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12151 "00101110" // /* MW 3 */ + 12152 "00000100" // /* MW 2 */ + 12153 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 17 + 12154 "10011000" // LDA eh0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12155 "00001110" // /* MW 3 */ + 12156 "00010100" // /* MW 2 */ + 12157 "00000000" // /* MW 1 */ + 12158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12159 "00000000" // /* MW 1 */ + 12160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12161 "00000000" // /* MW 1 */ + 12162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12163 "00000000" // /* MW 1 */ + 12164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12165 "00000000" // /* MW 1 */ + 12166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12167 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 15 + 12168 "10011000" // ST el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12169 "00101001" // /* MW 3 */ + 12170 "00000100" // /* MW 2 */ + 12171 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 15 + 12172 "10011000" // ST eh0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12173 "00001001" // /* MW 3 */ + 12174 "00010100" // /* MW 2 */ + 12175 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 32 first + 12176 "10011000" // LDA.u8 r17, [p7], #5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12177 "00101010" // /* MW 3 */ + 12178 "01011110" // /* MW 2 */ + 12179 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 52 + 12180 "10011000" // LDA.u8 r18, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12181 "01001010" // /* MW 3 */ + 12182 "11101110" // /* MW 2 */ + 12183 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 80 + 12184 "10011000" // LDA.u8 r1, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12185 "00101010" // /* MW 3 */ + 12186 "11101100" // /* MW 2 */ + 12187 "00000111" // /* MW 1 */ + 12188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12189 "00000000" // /* MW 1 */ + 12190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12191 "00000000" // /* MW 1 */ + 12192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12193 "00000000" // /* MW 1 */ + 12194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12195 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.no_stack_arguments + 12196 "00000100" // JL #14224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=14224 delay_slots=5 */ + 12197 "00000001" // /* MW 5 */ + 12198 "00000000" // /* MW 4 */ + 12199 "11001000" // /* MW 3 */ + 12200 "00011011" // /* MW 2 */ + 12201 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 38 +.delay_slot + 12202 "01011100" // ST r18, [sp, #-28]; SUB r15, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12203 "01000011" // /* MW 5 */ + 12204 "10111110" // /* MW 4 */ + 12205 "10111000" // /* MW 3 */ + 12206 "11001010" // /* MW 2 */ + 12207 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 80 +.delay_slot + 12208 "01011100" // ST r1, [sp, #-20]; NE r16, r1, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12209 "00010001" // /* MW 5 */ + 12210 "11000010" // /* MW 4 */ + 12211 "10110000" // /* MW 3 */ + 12212 "10000110" // /* MW 2 */ + 12213 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.delay_slot + 12214 "01011100" // ST r16, [sp, #-16]; LT r27, r15, r24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12215 "00010101" // /* MW 5 */ + 12216 "11101111" // /* MW 4 */ + 12217 "10110111" // /* MW 3 */ + 12218 "01000010" // /* MW 2 */ + 12219 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.delay_slot + 12220 "10011000" // SUB r17, r24, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12221 "11110001" // /* MW 3 */ + 12222 "00100010" // /* MW 2 */ + 12223 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.delay_slot + 12224 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r0, r15, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12225 "00000000" // /* MW 15 */ + 12226 "00000000" // /* MW 14 */ + 12227 "01111000" // /* MW 13 */ + 12228 "10100101" // /* MW 12 */ + 12229 "00000001" // /* MW 11 */ + 12230 "10010000" // /* MW 10 */ + 12231 "00001000" // /* MW 9 */ + 12232 "00011110" // /* MW 8 */ + 12233 "01011011" // /* MW 7 */ + 12234 "00000001" // /* MW 6 */ + 12235 "00100000" // /* MW 5 */ + 12236 "00000000" // /* MW 4 */ + 12237 "11110000" // /* MW 3 */ + 12238 "00101100" // /* MW 2 */ + 12239 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.return_address + 12240 "00101100" // LDA r20, [sp, #-20]; MOVX r16, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12241 "00000010" // /* MW 5 */ + 12242 "01000000" // /* MW 4 */ + 12243 "00100000" // /* MW 3 */ + 12244 "11010010" // /* MW 2 */ + 12245 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.src_ref 2 "conv2d_dw_bf16_params.h" 219 32 first + 12246 "00101100" // LDA.u8 r17, [p7], #3; SUB r18, r16, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12247 "01000011" // /* MW 5 */ + 12248 "01001000" // /* MW 4 */ + 12249 "01011000" // /* MW 3 */ + 12250 "11000101" // /* MW 2 */ + 12251 "11100111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 52 + 12252 "10011000" // LDA.u8 r19, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12253 "01101010" // /* MW 3 */ + 12254 "11101110" // /* MW 2 */ + 12255 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 + 12256 "00011000" // LDA r1, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12257 "00110001" // /* MW 3 */ + 12258 "11101100" // /* MW 2 */ + 12259 "00000111" // /* MW 1 */ + 12260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12261 "00000000" // /* MW 1 */ + 12262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12263 "00000000" // /* MW 1 */ + 12264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12265 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 first + 12266 "10011000" // XOR r20, r15, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12267 "01000110" // /* MW 3 */ + 12268 "11101001" // /* MW 2 */ + 12269 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 + 12270 "10011000" // LT r27, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12271 "00001010" // /* MW 3 */ + 12272 "00110111" // /* MW 2 */ + 12273 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 38 first + 12274 "01011100" // ST r19, [sp, #-24]; SUB r17, r17, r19 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12275 "01100011" // /* MW 5 */ + 12276 "11000110" // /* MW 4 */ + 12277 "10111000" // /* MW 3 */ + 12278 "01001110" // /* MW 2 */ + 12279 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.no_stack_arguments + 12280 "00111010" // ST r17, [sp, #-32]; JL #14224 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=14224 delay_slots=5 */ + 12281 "01000001" // /* MW 9 */ + 12282 "00000000" // /* MW 8 */ + 12283 "00000000" // /* MW 7 */ + 12284 "11110010" // /* MW 6 */ + 12285 "00000110" // /* MW 5 */ + 12286 "00000000" // /* MW 4 */ + 12287 "10110000" // /* MW 3 */ + 12288 "01000110" // /* MW 2 */ + 12289 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 first +.delay_slot + 12290 "00011000" // SEL.EQZ r20, r2, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12291 "00100010" // /* MW 3 */ + 12292 "10101001" // /* MW 2 */ + 12293 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first +.delay_slot + 12294 "10011000" // LT r27, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12295 "00001010" // /* MW 3 */ + 12296 "01110111" // /* MW 2 */ + 12297 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.delay_slot + 12298 "10011000" // SUB r18, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12299 "00010001" // /* MW 3 */ + 12300 "00100101" // /* MW 2 */ + 12301 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 first +.delay_slot + 12302 "00011000" // EXTEND.s16 r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12303 "01110000" // /* MW 3 */ + 12304 "00100110" // /* MW 2 */ + 12305 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 87 +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first +.delay_slot + 12306 "01111110" // NOPA; NOPB; NOPS; SEL.EQZ r0, r17, r18, r27; ADD.NC r15, r19, #1 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 12307 "01100000" // /* MW 13 */ + 12308 "00101011" // /* MW 12 */ + 12309 "00000000" // /* MW 11 */ + 12310 "00001001" // /* MW 10 */ + 12311 "10011000" // /* MW 9 */ + 12312 "00111101" // /* MW 8 */ + 12313 "00100010" // /* MW 7 */ + 12314 "01000001" // /* MW 6 */ + 12315 "00100100" // /* MW 5 */ + 12316 "00000000" // /* MW 4 */ + 12317 "11110000" // /* MW 3 */ + 12318 "00101100" // /* MW 2 */ + 12319 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.src_ref 2 "conv2d_dw_bf16_params.h" 226 50 +.src_ref 2 "conv2d_dw_bf16_params.h" 231 68 +.return_address + 12320 "10111010" // LDA r3, [sp, #-32]; MOVX r19, #-2; MOV m0, #66 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12321 "01011000" // /* MW 9 */ + 12322 "01000010" // /* MW 8 */ + 12323 "00000000" // /* MW 7 */ + 12324 "11001000" // /* MW 6 */ + 12325 "00110111" // /* MW 5 */ + 12326 "00111111" // /* MW 4 */ + 12327 "00100000" // /* MW 3 */ + 12328 "00001110" // /* MW 2 */ + 12329 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.src_ref 2 "conv2d_dw_bf16_params.h" 220 23 +.src_ref 2 "conv2d_dw_bf16_params.h" 220 84 +.src_ref 2 "conv2d_dw_bf16_params.h" 232 45 +.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 +.src_ref 2 "conv2d_dw_bf16_params.h" 234 64 +.src_ref 2 "conv2d_dw_bf16_params.h" 243 100 +.src_ref 2 "conv2d_dw_bf16_params.h" 250 53 +.src_ref 2 "conv2d_dw_bf16_params.h" 253 63 +.src_ref 2 "conv2d_dw_bf16_params.h" 260 49 +.src_ref 2 "conv2d_dw_bf16_params.h" 264 47 + 12330 "10111010" // LDA r16, [sp, #-20]; MOVX r24, #0; MOV r1, #508 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12331 "01011000" // /* MW 9 */ + 12332 "11111100" // /* MW 8 */ + 12333 "00101001" // /* MW 7 */ + 12334 "00001000" // /* MW 6 */ + 12335 "10000000" // /* MW 5 */ + 12336 "00000001" // /* MW 4 */ + 12337 "00100000" // /* MW 3 */ + 12338 "11000010" // /* MW 2 */ + 12339 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 220 74 +.src_ref 2 "conv2d_dw_bf16_params.h" 234 64 +.src_ref 2 "conv2d_dw_bf16_params.h" 246 52 +.src_ref 2 "conv2d_dw_bf16_params.h" 253 53 + 12340 "10111010" // LDA r22, [sp, #-28]; MOVX r6, #4; MOV r4, #2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12341 "01011000" // /* MW 9 */ + 12342 "00000010" // /* MW 8 */ + 12343 "10001000" // /* MW 7 */ + 12344 "10001000" // /* MW 6 */ + 12345 "01100000" // /* MW 5 */ + 12346 "00000000" // /* MW 4 */ + 12347 "00100000" // /* MW 3 */ + 12348 "11011010" // /* MW 2 */ + 12349 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 226 50 first +.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 +.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 + 12350 "10111010" // LDA.u8 r17, [p7], m0; MOVX r5, #8; MOV r28, #23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12351 "01011000" // /* MW 9 */ + 12352 "00010111" // /* MW 8 */ + 12353 "10001000" // /* MW 7 */ + 12354 "00001011" // /* MW 6 */ + 12355 "01010001" // /* MW 5 */ + 12356 "00000000" // /* MW 4 */ + 12357 "01010000" // /* MW 3 */ + 12358 "01000101" // /* MW 2 */ + 12359 "11100001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 231 78 +.src_ref 2 "conv2d_dw_bf16_params.h" 232 39 +.src_ref 2 "conv2d_dw_bf16_params.h" 232 76 + 12360 "10111010" // LDA r21, [sp, #-24]; MOVX r18, #-6; MOV m1, #32 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12361 "01011000" // /* MW 9 */ + 12362 "00100000" // /* MW 8 */ + 12363 "10000000" // /* MW 7 */ + 12364 "01001000" // /* MW 6 */ + 12365 "00100111" // /* MW 5 */ + 12366 "00111111" // /* MW 4 */ + 12367 "00100000" // /* MW 3 */ + 12368 "01010110" // /* MW 2 */ + 12369 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 235 50 +.src_ref 2 "conv2d_dw_bf16_params.h" 235 50 +.src_ref 2 "conv2d_dw_bf16_params.h" 242 16 +.src_ref 2 "conv2d_dw_bf16_params.h" 242 63 +.src_ref 2 "conv2d_dw_bf16_params.h" 243 93 +.src_ref 2 "conv2d_dw_bf16_params.h" 250 53 +.src_ref 2 "conv2d_dw_bf16_params.h" 255 73 +.src_ref 2 "conv2d_dw_bf16_params.h" 260 49 +.src_ref 2 "conv2d_dw_bf16_params.h" 264 47 + 12370 "10111010" // LDA r30, [sp, #-16]; MOVX r23, #6; MOV r26, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12371 "01011000" // /* MW 9 */ + 12372 "00000001" // /* MW 8 */ + 12373 "01001000" // /* MW 7 */ + 12374 "11001011" // /* MW 6 */ + 12375 "01110000" // /* MW 5 */ + 12376 "00000001" // /* MW 4 */ + 12377 "00100000" // /* MW 3 */ + 12378 "01111010" // /* MW 2 */ + 12379 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 +.src_ref 2 "conv2d_dw_bf16_params.h" 239 42 +.src_ref 2 "conv2d_dw_bf16_params.h" 242 71 +.src_ref 2 "conv2d_dw_bf16_params.h" 248 72 +.src_ref 2 "conv2d_dw_bf16_params.h" 253 63 +.src_ref 2 "conv2d_dw_bf16_params.h" 264 41 + 12380 "10111010" // MOVA m0, #-178; MOVX r29, #128; MOV r31, #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12381 "01011000" // /* MW 9 */ + 12382 "11000000" // /* MW 8 */ + 12383 "11101111" // /* MW 7 */ + 12384 "00001011" // /* MW 6 */ + 12385 "11010000" // /* MW 5 */ + 12386 "00000101" // /* MW 4 */ + 12387 "10000000" // /* MW 3 */ + 12388 "11000000" // /* MW 2 */ + 12389 "11101001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first + 12390 "10011000" // SUB r20, r24, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12391 "00100001" // /* MW 3 */ + 12392 "00101000" // /* MW 2 */ + 12393 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 + 12394 "10011000" // XOR r3, r3, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12395 "00000110" // /* MW 3 */ + 12396 "11000111" // /* MW 2 */ + 12397 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.src_ref 2 "conv2d_dw_bf16_params.h" 220 74 + 12398 "00100100" // LT r27, r3, r24; ADD.NC r0, r22, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12399 "00000010" // /* MW 5 */ + 12400 "00110110" // /* MW 4 */ + 12401 "01010000" // /* MW 3 */ + 12402 "11110001" // /* MW 2 */ + 12403 "00011110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.src_ref 2 "conv2d_dw_bf16_params.h" 243 42 +.src_ref 2 "conv2d_dw_bf16_params.h" 247 69 + 12404 "01100100" // SEL.EQZ r20, r2, r20, r27; MOV r22, #-3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12405 "11110101" // /* MW 5 */ + 12406 "00111111" // /* MW 4 */ + 12407 "01001011" // /* MW 3 */ + 12408 "00101000" // /* MW 2 */ + 12409 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 220 23 first +.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 + 12410 "01100100" // MUL r3, r15, r16; MOV r2, #7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12411 "00011101" // /* MW 5 */ + 12412 "00100000" // /* MW 4 */ + 12413 "11110001" // /* MW 3 */ + 12414 "11100001" // /* MW 2 */ + 12415 "01111000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first + 12416 "00011000" // EXTEND.s16 r20, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12417 "01110000" // /* MW 3 */ + 12418 "00101000" // /* MW 2 */ + 12419 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 220 84 first +.src_ref 2 "conv2d_dw_bf16_params.h" 231 68 + 12420 "00100100" // AND r0, r1, r0; ADD.NC r1, r0, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12421 "00000001" // /* MW 5 */ + 12422 "10100000" // /* MW 4 */ + 12423 "10010000" // /* MW 3 */ + 12424 "00000000" // /* MW 2 */ + 12425 "00001000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 88 first +.src_ref 2 "conv2d_dw_bf16_params.h" 231 68 first + 12426 "00100100" // LSHL r19, r1, r19; ADD.NC r27, r20, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12427 "00000001" // /* MW 5 */ + 12428 "10110100" // /* MW 4 */ + 12429 "10111101" // /* MW 3 */ + 12430 "11100111" // /* MW 2 */ + 12431 "00001100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 220 44 first +.src_ref 2 "conv2d_dw_bf16_params.h" 253 53 first + 12432 "10100100" // LSHL r20, r15, r6; ADD.NC r1, r3, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12433 "00000010" // /* MW 5 */ + 12434 "10100011" // /* MW 4 */ + 12435 "10110000" // /* MW 3 */ + 12436 "00001101" // /* MW 2 */ + 12437 "01111101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 240 70 +.src_ref 2 "conv2d_dw_bf16_params.h" 246 52 first + 12438 "00100100" // LSHL r7, r1, r6; ADD.NC r0, r21, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12439 "11111111" // /* MW 5 */ + 12440 "00110101" // /* MW 4 */ + 12441 "10110000" // /* MW 3 */ + 12442 "11001101" // /* MW 2 */ + 12443 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 232 45 first + 12444 "10011000" // MUL r6, r27, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12445 "00001111" // /* MW 3 */ + 12446 "11001101" // /* MW 2 */ + 12447 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 226 22 first + 12448 "10011000" // MUL r15, r15, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12449 "00011111" // /* MW 3 */ + 12450 "11011111" // /* MW 2 */ + 12451 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 231 78 first +.src_ref 2 "conv2d_dw_bf16_params.h" 238 79 + 12452 "00100100" // MUL r21, r19, r21; ADD.NC r19, r19, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12453 "11111111" // /* MW 5 */ + 12454 "10110011" // /* MW 4 */ + 12455 "11111001" // /* MW 3 */ + 12456 "01101011" // /* MW 2 */ + 12457 "10011101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 234 64 first + 12458 "10011000" // EQ r27, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12459 "00000111" // /* MW 3 */ + 12460 "00110111" // /* MW 2 */ + 12461 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 231 39 first +.src_ref 2 "conv2d_dw_bf16_params.h" 232 55 first + 12462 "01011100" // ST r21, [p7], #-4; MUL r4, r15, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12463 "11011111" // /* MW 5 */ + 12464 "10010000" // /* MW 4 */ + 12465 "00110111" // /* MW 3 */ + 12466 "11010110" // /* MW 2 */ + 12467 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 first + 12468 "00011000" // SEL.EQZ r28, r28, r5, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12469 "01010010" // /* MW 3 */ + 12470 "00111000" // /* MW 2 */ + 12471 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 232 76 first + 12472 "10011000" // LSHL r18, r4, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12473 "00101101" // /* MW 3 */ + 12474 "00100101" // /* MW 2 */ + 12475 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 227 22 first +.src_ref 2 "conv2d_dw_bf16_params.h" 232 39 + 12476 "01011100" // ST r18, [p7], m1; MUL r18, r17, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12477 "00111111" // /* MW 5 */ + 12478 "11001000" // /* MW 4 */ + 12479 "00111000" // /* MW 3 */ + 12480 "01001010" // /* MW 2 */ + 12481 "11100101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 234 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 235 50 first + 12482 "01011100" // ST r28, [p7], #-16; LSHL r28, r30, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12483 "11111011" // /* MW 5 */ + 12484 "01110010" // /* MW 4 */ + 12485 "00111111" // /* MW 3 */ + 12486 "11110010" // /* MW 2 */ + 12487 "11111001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 235 47 +.src_ref 2 "conv2d_dw_bf16_params.h" 243 53 first + 12488 "01011100" // ST r28, [p7], #24; MUL r28, r18, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12489 "00011111" // /* MW 5 */ + 12490 "01110000" // /* MW 4 */ + 12491 "00111001" // /* MW 3 */ + 12492 "11110010" // /* MW 2 */ + 12493 "11101101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 238 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 242 63 first + 12494 "01011100" // ST r19, [p7], #4; LSHL r19, r19, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12495 "11111011" // /* MW 5 */ + 12496 "11001110" // /* MW 4 */ + 12497 "00111001" // /* MW 3 */ + 12498 "11001110" // /* MW 2 */ + 12499 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 242 71 +.src_ref 2 "conv2d_dw_bf16_params.h" 243 93 first + 12500 "10100100" // LSHL r28, r28, r26; ADD.NC r19, r19, r29 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12501 "11101010" // /* MW 5 */ + 12502 "10110011" // /* MW 4 */ + 12503 "10111001" // /* MW 3 */ + 12504 "00110101" // /* MW 2 */ + 12505 "11100111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 239 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 242 16 first + 12506 "01011100" // ST r31, [p7], #4; LSHL r30, r18, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12507 "01011011" // /* MW 5 */ + 12508 "01111011" // /* MW 4 */ + 12509 "00111001" // /* MW 3 */ + 12510 "11111110" // /* MW 2 */ + 12511 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 243 100 first +.src_ref 2 "conv2d_dw_bf16_params.h" 250 53 first + 12512 "10100100" // MUL r16, r18, r16; ADD.NC r18, r19, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12513 "11100010" // /* MW 5 */ + 12514 "00110011" // /* MW 4 */ + 12515 "11111001" // /* MW 3 */ + 12516 "00100001" // /* MW 2 */ + 12517 "10010100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 first +.src_ref 2 "conv2d_dw_bf16_params.h" 240 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 253 63 first + 12518 "01011100" // ST r0, [p7], #4; SEL.EQZ r28, r31, r24, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12519 "00000100" // /* MW 5 */ + 12520 "11110011" // /* MW 4 */ + 12521 "00111111" // /* MW 3 */ + 12522 "10000010" // /* MW 2 */ + 12523 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 247 69 first + 12524 "10011000" // LSHL r31, r3, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12525 "01101101" // /* MW 3 */ + 12526 "11111111" // /* MW 2 */ + 12527 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 242 23 first +.src_ref 2 "conv2d_dw_bf16_params.h" 247 73 + 12528 "00100100" // SUB r1, r30, r19; ADD.NC r19, r31, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12529 "11111111" // /* MW 5 */ + 12530 "10111111" // /* MW 4 */ + 12531 "00111001" // /* MW 3 */ + 12532 "01100110" // /* MW 2 */ + 12533 "11110000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 241 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 243 42 first + 12534 "01011100" // ST r1, [p7], #4; LSHL r17, r17, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12535 "11011011" // /* MW 5 */ + 12536 "11000110" // /* MW 4 */ + 12537 "00111000" // /* MW 3 */ + 12538 "10000110" // /* MW 2 */ + 12539 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 243 100 +.src_ref 2 "conv2d_dw_bf16_params.h" 245 77 first + 12540 "00100100" // SUB r22, r24, r18; ADD.NC r18, r17, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12541 "11111111" // /* MW 5 */ + 12542 "00110001" // /* MW 4 */ + 12543 "00111001" // /* MW 3 */ + 12544 "10100100" // /* MW 2 */ + 12545 "11000101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 243 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 248 72 first + 12546 "01011100" // ST r22, [p7], #4; SUB r22, r7, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12547 "11000011" // /* MW 5 */ + 12548 "11011011" // /* MW 4 */ + 12549 "00110011" // /* MW 3 */ + 12550 "11011010" // /* MW 2 */ + 12551 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 245 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 250 53 first + 12552 "01011100" // ST r18, [p7], #4; LSHL r16, r16, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12553 "01011011" // /* MW 5 */ + 12554 "01000011" // /* MW 4 */ + 12555 "00111000" // /* MW 3 */ + 12556 "11001010" // /* MW 2 */ + 12557 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 246 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 + 12558 "01011100" // ST r7, [p7], #4; LSHL r31, r19, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12559 "01011011" // /* MW 5 */ + 12560 "11111100" // /* MW 4 */ + 12561 "00111001" // /* MW 3 */ + 12562 "10011110" // /* MW 2 */ + 12563 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 247 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 248 72 first + 12564 "01011100" // ST r19, [p7], #4; ADD r22, r29, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12565 "11000001" // /* MW 5 */ + 12566 "11011010" // /* MW 4 */ + 12567 "00111110" // /* MW 3 */ + 12568 "11001110" // /* MW 2 */ + 12569 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 first +.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 first + 12570 "10100100" // ADD r16, r7, r16; ADD.NC r29, r31, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12571 "11110010" // /* MW 5 */ + 12572 "10111111" // /* MW 4 */ + 12573 "00011110" // /* MW 3 */ + 12574 "00100000" // /* MW 2 */ + 12575 "00111100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 248 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 + 12576 "01011100" // ST r22, [p7], #4; SUB r16, r16, r29 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12577 "10100011" // /* MW 5 */ + 12578 "01000011" // /* MW 4 */ + 12579 "00111000" // /* MW 3 */ + 12580 "11011010" // /* MW 2 */ + 12581 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 249 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 255 73 first +.src_ref 2 "conv2d_dw_bf16_params.h" 258 116 +.src_ref 2 "conv2d_dw_bf16_params.h" 258 140 + 12582 "00111010" // ST r16, [p7], #4; LSHL r22, r15, r26; MOV r16, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12583 "01011001" // /* MW 9 */ + 12584 "11111111" // /* MW 8 */ + 12585 "00001111" // /* MW 7 */ + 12586 "01101110" // /* MW 6 */ + 12587 "01101101" // /* MW 5 */ + 12588 "00011111" // /* MW 4 */ + 12589 "00110000" // /* MW 3 */ + 12590 "11000010" // /* MW 2 */ + 12591 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 252 43 first +.src_ref 2 "conv2d_dw_bf16_params.h" 253 60 first + 12592 "01011100" // ST r18, [p7], #4; ADD r26, r28, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12593 "10000001" // /* MW 5 */ + 12594 "01101010" // /* MW 4 */ + 12595 "00111110" // /* MW 3 */ + 12596 "11001010" // /* MW 2 */ + 12597 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 253 43 +.src_ref 2 "conv2d_dw_bf16_params.h" 255 73 first + 12598 "01011100" // ST r26, [p7], #4; SUB r20, r20, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12599 "11000011" // /* MW 5 */ + 12600 "01010010" // /* MW 4 */ + 12601 "00111010" // /* MW 3 */ + 12602 "11101010" // /* MW 2 */ + 12603 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 254 43 first +.src_ref 2 "conv2d_dw_bf16_params.h" 255 73 +.src_ref 2 "conv2d_dw_bf16_params.h" 256 43 +.src_ref 2 "conv2d_dw_bf16_params.h" 258 116 first +.src_ref 2 "conv2d_dw_bf16_params.h" 258 140 first +.src_ref 2 "conv2d_dw_bf16_params.h" 259 43 +.src_ref 2 "conv2d_dw_bf16_params.h" 263 41 + 12604 "01110110" // MOVA r17, #64; ST r19, [p7], #4; MAC r16, r16, r21, r17; ADD.NC r19, r20, #64 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12605 "00001000" // /* MW 11 */ + 12606 "00010000" // /* MW 10 */ + 12607 "01101101" // /* MW 9 */ + 12608 "10110010" // /* MW 8 */ + 12609 "00001000" // /* MW 7 */ + 12610 "10101011" // /* MW 6 */ + 12611 "01110001" // /* MW 5 */ + 12612 "00011110" // /* MW 4 */ + 12613 "00000111" // /* MW 3 */ + 12614 "00010001" // /* MW 2 */ + 12615 "00001000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 255 43 first + 12616 "10011000" // ST r19, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12617 "01110001" // /* MW 3 */ + 12618 "00011110" // /* MW 2 */ + 12619 "00001111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 256 43 first +.src_ref 2 "conv2d_dw_bf16_params.h" 260 49 first + 12620 "01011100" // ST r17, [p7], #4; LSHL r20, r16, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12621 "11111011" // /* MW 5 */ + 12622 "01010010" // /* MW 4 */ + 12623 "00111000" // /* MW 3 */ + 12624 "11000110" // /* MW 2 */ + 12625 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 258 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 260 49 + 12626 "01011100" // ST r16, [p7], #4; SUB r16, r24, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12627 "10000011" // /* MW 5 */ + 12628 "01000010" // /* MW 4 */ + 12629 "00111100" // /* MW 3 */ + 12630 "11000010" // /* MW 2 */ + 12631 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 259 43 first +.src_ref 2 "conv2d_dw_bf16_params.h" 264 47 first + 12632 "01011100" // ST r17, [p7], #4; LSHL r20, r18, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12633 "11111011" // /* MW 5 */ + 12634 "01010010" // /* MW 4 */ + 12635 "00111001" // /* MW 3 */ + 12636 "11000110" // /* MW 2 */ + 12637 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 260 43 first +.src_ref 2 "conv2d_dw_bf16_params.h" 264 47 + 12638 "01011100" // ST r16, [p7], #4; SUB r16, r24, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12639 "10000011" // /* MW 5 */ + 12640 "01000010" // /* MW 4 */ + 12641 "00111100" // /* MW 3 */ + 12642 "11000010" // /* MW 2 */ + 12643 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 262 40 first + 12644 "10011000" // ST r18, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12645 "01010001" // /* MW 3 */ + 12646 "00011110" // /* MW 2 */ + 12647 "00001111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 263 41 first + 12648 "10011000" // ST r17, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12649 "00110001" // /* MW 3 */ + 12650 "00011110" // /* MW 2 */ + 12651 "00001111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 264 41 first + 12652 "10011000" // ST r16, [p7], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12653 "00010001" // /* MW 3 */ + 12654 "00001010" // /* MW 2 */ + 12655 "00001111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 54 first + 12656 "10011000" // LDA.u8 r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12657 "00001010" // /* MW 3 */ + 12658 "00000110" // /* MW 2 */ + 12659 "00000111" // /* MW 1 */ + 12660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12661 "00000000" // /* MW 1 */ + 12662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12663 "00000000" // /* MW 1 */ + 12664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12665 "00000000" // /* MW 1 */ + 12666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12667 "00000000" // /* MW 1 */ + 12668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12669 "00000000" // /* MW 1 */ + 12670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12671 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.src_ref 2 "conv2d_dw_bf16_params.h" 266 58 + 12672 "10000100" // JZ r16, #12704 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12704 delay_slots=5 */ + 12673 "00000001" // /* MW 5 */ + 12674 "00000000" // /* MW 4 */ + 12675 "11010000" // /* MW 3 */ + 12676 "00011000" // /* MW 2 */ + 12677 "10000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.delay_slot + 12678 "11111000" // MOV vaddSign0, crMCDEn /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12679 "01100000" // /* MW 3 */ + 12680 "00111011" // /* MW 2 */ + 12681 "00011001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.delay_slot + 12682 "01000100" // MOVXM r19, #-8454144 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12683 "00000000" // /* MW 5 */ + 12684 "10100000" // /* MW 4 */ + 12685 "00001001" // /* MW 3 */ + 12686 "01111111" // /* MW 2 */ + 12687 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12689 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12691 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12693 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 + 12694 "01111010" // NOPA; NOPS; MOVX r19, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12695 "00000001" // /* MW 9 */ + 12696 "00100110" // /* MW 8 */ + 12697 "00000000" // /* MW 7 */ + 12698 "00000000" // /* MW 6 */ + 12699 "01011011" // /* MW 5 */ + 12700 "00000001" // /* MW 4 */ + 12701 "11110000" // /* MW 3 */ + 12702 "00101100" // /* MW 2 */ + 12703 "00000000" // /* MW 1 */ +.label TGT_F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_608 +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.src_ref 2 "conv2d_dw_bf16_params.h" 267 + 12704 "10111010" // LDA lr, [sp, #-4]; MOVXM p0, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12705 "00010000" // /* MW 9 */ + 12706 "00110100" // /* MW 8 */ + 12707 "00110010" // /* MW 7 */ + 12708 "11110000" // /* MW 6 */ + 12709 "00000001" // /* MW 5 */ + 12710 "00000000" // /* MW 4 */ + 12711 "00100000" // /* MW 3 */ + 12712 "10000111" // /* MW 2 */ + 12713 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 + 12714 "11010100" // LDA.s8 r16, [p0]; VINSERT.32 x0, x0, #0, r19 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12715 "11100010" // /* MW 5 */ + 12716 "00000100" // /* MW 4 */ + 12717 "01010000" // /* MW 3 */ + 12718 "11000000" // /* MW 2 */ + 12719 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 39 + 12720 "01010100" // LDA p0, [sp, #-12]; MOV dj0, #186 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12721 "11101001" // /* MW 5 */ + 12722 "00000010" // /* MW 4 */ + 12723 "00100001" // /* MW 3 */ + 12724 "10000011" // /* MW 2 */ + 12725 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 + 12726 "11010100" // LDA r15, [sp, #-8]; VMOV bmll0, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12727 "00100101" // /* MW 5 */ + 12728 "00000001" // /* MW 4 */ + 12729 "00100000" // /* MW 3 */ + 12730 "00111110" // /* MW 2 */ + 12731 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 267 first + 12732 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12733 "00000001" // /* MW 5 */ + 12734 "00000000" // /* MW 4 */ + 12735 "00000000" // /* MW 3 */ + 12736 "11111000" // /* MW 2 */ + 12737 "11111111" // /* MW 1 */ + 12738 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12739 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 12740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12741 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 39 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 12742 "00011000" // ST.s16 r16, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12743 "00010111" // /* MW 3 */ + 12744 "00000010" // /* MW 2 */ + 12745 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.src_ref 2 "conv2d_dw_bf16_params.h" 267 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12746 "11100100" // RET lr; MOV crRnd, r16 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 12747 "01000001" // /* MW 5 */ + 12748 "01110000" // /* MW 4 */ + 12749 "00001111" // /* MW 3 */ + 12750 "00000000" // /* MW 2 */ + 12751 "00000101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12752 "00011000" // VCONV.bf16.fp32 wl0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12753 "00010110" // /* MW 3 */ + 12754 "01000000" // /* MW 2 */ + 12755 "00001000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12756 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12757 "11000000" // /* MW 3 */ + 12758 "01100000" // /* MW 2 */ + 12759 "00011111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12760 "10111000" // VEXTRACT.16 r16, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12761 "00000001" // /* MW 3 */ + 12762 "00000001" // /* MW 2 */ + 12763 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12765 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh__end +.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_end0 + 12767 "00000000" // /* MW 1 */ +.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_begin0 +.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params +.function conv2d_dw<(unsigned char)'\x01', bfloat16, bfloat16, bfloat16, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::async, adf::addressing::linear, adf::margin<0U> > > _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params +.src_ref 1 "io_buffer_main.h" 125 12 +.src_ref 2 "conv2d_dw_bf16.h" 199 first +.function_start + 12768 "11111000" // MOV r17, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12769 "11000000" // /* MW 3 */ + 12770 "01010110" // /* MW 2 */ + 12771 "00011100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 2 "conv2d_dw_bf16.h" 204 82 + 12772 "01010100" // LDA p1, [p1]; MOV m7, #106 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12773 "10101001" // /* MW 5 */ + 12774 "00000001" // /* MW 4 */ + 12775 "11011110" // /* MW 3 */ + 12776 "10010011" // /* MW 2 */ + 12777 "00100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 12 +.src_ref 1 "io_buffer_main.h" 125 25 + 12778 "00010100" // LDA p0, [p0]; ADD.NC p3, r17, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12779 "00000010" // /* MW 5 */ + 12780 "11010001" // /* MW 4 */ + 12781 "11010110" // /* MW 3 */ + 12782 "10000011" // /* MW 2 */ + 12783 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 204 82 first + 12784 "10011000" // LDA.u8 r4, [p3], m7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12785 "10001010" // /* MW 3 */ + 12786 "11101000" // /* MW 2 */ + 12787 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 221 4 first + 12788 "10011000" // LDA dj2, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12789 "01000110" // /* MW 3 */ + 12790 "11111101" // /* MW 2 */ + 12791 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 221 4 + 12792 "10011000" // LDA dn2, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12793 "00100110" // /* MW 3 */ + 12794 "00111101" // /* MW 2 */ + 12795 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 221 4 + 12796 "10011000" // LDA dj6, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12797 "01000110" // /* MW 3 */ + 12798 "11111111" // /* MW 2 */ + 12799 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 221 4 + 12800 "10011000" // LDA dn6, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12801 "00100110" // /* MW 3 */ + 12802 "00101111" // /* MW 2 */ + 12803 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 221 4 + 12804 "10011000" // LDA m2, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12805 "00000110" // /* MW 3 */ + 12806 "00101101" // /* MW 2 */ + 12807 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 222 4 first + 12808 "10011000" // LDA dj0, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12809 "01000110" // /* MW 3 */ + 12810 "11111100" // /* MW 2 */ + 12811 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 222 4 + 12812 "10011000" // LDA dn0, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12813 "00100110" // /* MW 3 */ + 12814 "00111100" // /* MW 2 */ + 12815 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 222 4 + 12816 "10011000" // LDA dj4, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12817 "01000110" // /* MW 3 */ + 12818 "11111110" // /* MW 2 */ + 12819 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 222 4 + 12820 "10011000" // LDA dn4, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12821 "00100110" // /* MW 3 */ + 12822 "00101110" // /* MW 2 */ + 12823 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 222 4 + 12824 "10011000" // LDA m0, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12825 "00000110" // /* MW 3 */ + 12826 "00101100" // /* MW 2 */ + 12827 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 223 4 first + 12828 "10011000" // LDA dj1, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12829 "11000110" // /* MW 3 */ + 12830 "11111100" // /* MW 2 */ + 12831 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 223 4 + 12832 "10011000" // LDA dn1, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12833 "10100110" // /* MW 3 */ + 12834 "00111100" // /* MW 2 */ + 12835 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 223 4 + 12836 "10011000" // LDA dj5, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12837 "11000110" // /* MW 3 */ + 12838 "11111110" // /* MW 2 */ + 12839 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 223 4 + 12840 "10011000" // LDA dn5, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12841 "10100110" // /* MW 3 */ + 12842 "00101110" // /* MW 2 */ + 12843 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 223 4 + 12844 "10011000" // LDA m1, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12845 "10000110" // /* MW 3 */ + 12846 "00101100" // /* MW 2 */ + 12847 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 224 4 first + 12848 "10011000" // LDA dj7, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12849 "11000110" // /* MW 3 */ + 12850 "11111111" // /* MW 2 */ + 12851 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 224 4 + 12852 "10011000" // LDA dn7, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12853 "10100110" // /* MW 3 */ + 12854 "00101111" // /* MW 2 */ + 12855 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 224 4 +.src_ref 2 "conv2d_dw_bf16.h" 244 56 + 12856 "10111010" // LDA m7, [p3], #8; MOVXM p4, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12857 "00010000" // /* MW 9 */ + 12858 "00110100" // /* MW 8 */ + 12859 "00110010" // /* MW 7 */ + 12860 "11110010" // /* MW 6 */ + 12861 "00000001" // /* MW 5 */ + 12862 "00000000" // /* MW 4 */ + 12863 "11010000" // /* MW 3 */ + 12864 "11110000" // /* MW 2 */ + 12865 "01100101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_dw_bf16.h" 244 56 first + 12866 "11010100" // LDA.s8 r6, [p4]; MOV p4, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12867 "10000001" // /* MW 5 */ + 12868 "11000101" // /* MW 4 */ + 12869 "01011000" // /* MW 3 */ + 12870 "10011000" // /* MW 2 */ + 12871 "10000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 225 4 + 12872 "10111000" // MOV m3, #-120 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12873 "00010000" // /* MW 3 */ + 12874 "00001111" // /* MW 2 */ + 12875 "00011011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 6 "aie_core.h" 81 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_dw_bf16.h" 204 43 + 12876 "10110110" // VLDA.CONV.fp32.bf16 cml0, [p4];VLDB x6, [p0], #64; MOVX r2, #3; MOV dc4, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12877 "01011000" // /* MW 11 */ + 12878 "00000000" // /* MW 10 */ + 12879 "01100000" // /* MW 9 */ + 12880 "01101010" // /* MW 8 */ + 12881 "00100000" // /* MW 7 */ + 12882 "00000000" // /* MW 6 */ + 12883 "01101000" // /* MW 5 */ + 12884 "00111011" // /* MW 4 */ + 12885 "01110000" // /* MW 3 */ + 12886 "10000101" // /* MW 2 */ + 12887 "10000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 6 "aie_core.h" 81 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 2 "conv2d_dw_bf16.h" 204 43 first +.src_ref 2 "conv2d_dw_bf16.h" 225 4 first +.src_ref 2 "conv2d_dw_bf16.h" 244 56 + 12888 "01111110" // LDA dj3, [p3], #-4; VLDB x1, [p0], #64; MOVS dc3, dc4; LSHL r2, r4, r2; MOV m6, #128 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 12889 "01100000" // /* MW 13 */ + 12890 "00001001" // /* MW 12 */ + 12891 "01100010" // /* MW 11 */ + 12892 "00001011" // /* MW 10 */ + 12893 "00010000" // /* MW 9 */ + 12894 "11100000" // /* MW 8 */ + 12895 "00101101" // /* MW 7 */ + 12896 "00000100" // /* MW 6 */ + 12897 "11101001" // /* MW 5 */ + 12898 "00111000" // /* MW 4 */ + 12899 "11010000" // /* MW 3 */ + 12900 "10111000" // /* MW 2 */ + 12901 "01111111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 2 "conv2d_dw_bf16.h" 204 43 +.src_ref 2 "conv2d_dw_bf16.h" 225 4 + 12902 "10111010" // LDA dn3, [p3], #8; MOVS dc1, dc3; MOV m5, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12903 "01110010" // /* MW 9 */ + 12904 "10010000" // /* MW 8 */ + 12905 "10000000" // /* MW 7 */ + 12906 "00000010" // /* MW 6 */ + 12907 "01001011" // /* MW 5 */ + 12908 "00001100" // /* MW 4 */ + 12909 "11010001" // /* MW 3 */ + 12910 "10110100" // /* MW 2 */ + 12911 "01100101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "conv2d_dw_bf16.h" 204 43 +.src_ref 2 "conv2d_dw_bf16.h" 225 4 + 12912 "10111010" // LDA m3, [p3], m3; PADDB [p1], m5; MOV dc7, dc1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12913 "01111110" // /* MW 9 */ + 12914 "11000000" // /* MW 8 */ + 12915 "11100001" // /* MW 7 */ + 12916 "00000011" // /* MW 6 */ + 12917 "10010000" // /* MW 5 */ + 12918 "10101011" // /* MW 4 */ + 12919 "11010001" // /* MW 3 */ + 12920 "00110000" // /* MW 2 */ + 12921 "01101101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "conv2d_dw_bf16.h" 244 56 +.src_ref 2 "conv2d_dw_bf16.h" 244 56 first + 12922 "10111010" // LDA r2, [p3], m6; VLDB.2D x3, [p1], d7; MOV m4, #-112 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12923 "01011110" // /* MW 9 */ + 12924 "10010000" // /* MW 8 */ + 12925 "00000111" // /* MW 7 */ + 12926 "00000010" // /* MW 6 */ + 12927 "11110100" // /* MW 5 */ + 12928 "11110000" // /* MW 4 */ + 12929 "11010001" // /* MW 3 */ + 12930 "00001010" // /* MW 2 */ + 12931 "01111001" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 2 "conv2d_dw_bf16.h" 244 56 + 12932 "00101100" // LDA.s16 r7, [p3], m4; MOVX r0, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12933 "10000010" // /* MW 5 */ + 12934 "00000000" // /* MW 4 */ + 12935 "01010000" // /* MW 3 */ + 12936 "00011110" // /* MW 2 */ + 12937 "01110001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "conv2d_dw_bf16.h" 250 8 first + 12938 "01110110" // LDA m4, [p3], #16; MOVS dc6, dc4; MOVXM ls, #13040 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12939 "00010000" // /* MW 11 */ + 12940 "01111000" // /* MW 10 */ + 12941 "01111001" // /* MW 9 */ + 12942 "00001100" // /* MW 8 */ + 12943 "00000000" // /* MW 7 */ + 12944 "00000000" // /* MW 6 */ + 12945 "01001011" // /* MW 5 */ + 12946 "00010000" // /* MW 4 */ + 12947 "11010110" // /* MW 3 */ + 12948 "11000000" // /* MW 2 */ + 12949 "01101001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "conv2d_dw_bf16.h" 244 56 first +.src_ref 2 "conv2d_dw_bf16.h" 250 8 + 12950 "01110110" // LDA r4, [p3, #-28]; MOVS dc2, dc4; MOVXM le, #13136 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12951 "00010000" // /* MW 11 */ + 12952 "10101000" // /* MW 10 */ + 12953 "10111001" // /* MW 9 */ + 12954 "00001101" // /* MW 8 */ + 12955 "00000000" // /* MW 7 */ + 12956 "00000000" // /* MW 6 */ + 12957 "01001011" // /* MW 5 */ + 12958 "00010000" // /* MW 4 */ + 12959 "11010010" // /* MW 3 */ + 12960 "10010010" // /* MW 2 */ + 12961 "01110010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 142 18 first + 12962 "10110100" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12963 "00000101" // /* MW 5 */ + 12964 "01100001" // /* MW 4 */ + 12965 "10000100" // /* MW 3 */ + 12966 "00010110" // /* MW 2 */ + 12967 "00001011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 270 12 + 12968 "11111000" // VMOV cml3, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12969 "10001010" // /* MW 3 */ + 12970 "00000000" // /* MW 2 */ + 12971 "00011011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 244 4 + 12972 "10111010" // LDA r5, [p3]; MOVXM p3, #13200 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12973 "00010000" // /* MW 9 */ + 12974 "11001000" // /* MW 8 */ + 12975 "10110001" // /* MW 7 */ + 12976 "00001101" // /* MW 6 */ + 12977 "00000000" // /* MW 5 */ + 12978 "00000000" // /* MW 4 */ + 12979 "11010000" // /* MW 3 */ + 12980 "10010110" // /* MW 2 */ + 12981 "01100000" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 2 "conv2d_dw_bf16.h" 268 12 first + 12982 "10111010" // NOPA; MOVX r1, #32; VEXTBCST.128 x10, x3, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12983 "10101000" // /* MW 9 */ + 12984 "00000001" // /* MW 8 */ + 12985 "10001110" // /* MW 7 */ + 12986 "00001010" // /* MW 6 */ + 12987 "00010100" // /* MW 5 */ + 12988 "00000000" // /* MW 4 */ + 12989 "11110000" // /* MW 3 */ + 12990 "00101100" // /* MW 2 */ + 12991 "00000000" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 2 "conv2d_dw_bf16.h" 270 12 +.src_ref 2 "conv2d_dw_bf16.h" 271 12 +.src_ref 2 "conv2d_dw_bf16.h" 272 12 +.src_ref 2 "conv2d_dw_bf16.h" 273 12 +.src_ref 2 "conv2d_dw_bf16.h" 274 12 +.src_ref 2 "conv2d_dw_bf16.h" 275 12 +.src_ref 2 "conv2d_dw_bf16.h" 276 12 +.src_ref 2 "conv2d_dw_bf16.h" 277 12 + 12992 "11100001" // MOVA r17, #60; NOPB; NOPS; MOVX r3, #48; VBCST.16 x0, r7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12993 "00000000" // /* MW 15 */ + 12994 "00000000" // /* MW 14 */ + 12995 "01111000" // /* MW 13 */ + 12996 "10111001" // /* MW 12 */ + 12997 "00001110" // /* MW 11 */ + 12998 "00001000" // /* MW 10 */ + 12999 "00110110" // /* MW 9 */ + 13000 "00000000" // /* MW 8 */ + 13001 "01011011" // /* MW 7 */ + 13002 "00000001" // /* MW 6 */ + 13003 "00100000" // /* MW 5 */ + 13004 "00000000" // /* MW 4 */ + 13005 "00000000" // /* MW 3 */ + 13006 "10010001" // /* MW 2 */ + 13007 "00000111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_dw_bf16.h" 265 12 first +.src_ref 2 "conv2d_dw_bf16.h" 270 12 first + 13008 "00001011" // NOPA; NOPB; MOVS dc0, dc4; MOVX crRnd, r6; VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13009 "01101010" // /* MW 15 */ + 13010 "01100011" // /* MW 14 */ + 13011 "10101100" // /* MW 13 */ + 13012 "00000011" // /* MW 12 */ + 13013 "00001110" // /* MW 11 */ + 13014 "00000010" // /* MW 10 */ + 13015 "11010100" // /* MW 9 */ + 13016 "00001101" // /* MW 8 */ + 13017 "01001011" // /* MW 7 */ + 13018 "00010000" // /* MW 6 */ + 13019 "00100000" // /* MW 5 */ + 13020 "00000000" // /* MW 4 */ + 13021 "11110000" // /* MW 3 */ + 13022 "00101100" // /* MW 2 */ + 13023 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 2 "conv2d_dw_bf16.h" 250 8 first +.src_ref 2 "conv2d_dw_bf16.h" 274 12 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 13024 "00001011" // LDA p2, [p2]; NOPB; MOVS dc5, dc4; ADD r2, r2, #-2; ADD.NC lc, r4, #-1; VMAC.f dm1, dm0, x1, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13025 "00011010" // /* MW 15 */ + 13026 "01001000" // /* MW 14 */ + 13027 "11001100" // /* MW 13 */ + 13028 "00111111" // /* MW 12 */ + 13029 "10111001" // /* MW 11 */ + 13030 "11011010" // /* MW 10 */ + 13031 "00101111" // /* MW 9 */ + 13032 "00000100" // /* MW 8 */ + 13033 "01001011" // /* MW 7 */ + 13034 "00010000" // /* MW 6 */ + 13035 "00100101" // /* MW 5 */ + 13036 "00000000" // /* MW 4 */ + 13037 "11010000" // /* MW 3 */ + 13038 "10100011" // /* MW 2 */ + 13039 "01000000" // /* MW 1 */ +.label ZLS_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_272 +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 142 18 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt +.loop_nesting 1 + 13040 "10111010" // VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13041 "01101110" // /* MW 9 */ + 13042 "10000001" // /* MW 8 */ + 13043 "10000100" // /* MW 7 */ + 13044 "00000010" // /* MW 6 */ + 13045 "11110100" // /* MW 5 */ + 13046 "11110000" // /* MW 4 */ + 13047 "01110001" // /* MW 3 */ + 13048 "10110011" // /* MW 2 */ + 13049 "00000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "conv2d_dw_bf16.h" 266 12 first +.src_ref 2 "conv2d_dw_bf16.h" 271 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13050 "01001010" // VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13051 "00000001" // /* MW 9 */ + 13052 "10001001" // /* MW 8 */ + 13053 "10001010" // /* MW 7 */ + 13054 "01000110" // /* MW 6 */ + 13055 "00001011" // /* MW 5 */ + 13056 "10011100" // /* MW 4 */ + 13057 "11101010" // /* MW 3 */ + 13058 "00111000" // /* MW 2 */ + 13059 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 275 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13060 "01001010" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13061 "00000001" // /* MW 9 */ + 13062 "00110101" // /* MW 8 */ + 13063 "10001001" // /* MW 7 */ + 13064 "11000110" // /* MW 6 */ + 13065 "10000110" // /* MW 5 */ + 13066 "00110000" // /* MW 4 */ + 13067 "01101010" // /* MW 3 */ + 13068 "10110001" // /* MW 2 */ + 13069 "00000000" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13070 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13071 "00000110" // /* MW 3 */ + 13072 "10001001" // /* MW 2 */ + 13073 "00011101" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 2 "conv2d_dw_bf16.h" 272 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13074 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13075 "10100001" // /* MW 7 */ + 13076 "01001000" // /* MW 6 */ + 13077 "10001100" // /* MW 5 */ + 13078 "11000110" // /* MW 4 */ + 13079 "10001110" // /* MW 3 */ + 13080 "10110000" // /* MW 2 */ + 13081 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 267 12 first +.src_ref 2 "conv2d_dw_bf16.h" 276 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13082 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13083 "10100001" // /* MW 7 */ + 13084 "00110110" // /* MW 6 */ + 13085 "10001010" // /* MW 5 */ + 13086 "01000110" // /* MW 4 */ + 13087 "00001111" // /* MW 3 */ + 13088 "10011100" // /* MW 2 */ + 13089 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13090 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13091 "00001110" // /* MW 3 */ + 13092 "10001001" // /* MW 2 */ + 13093 "00011101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 268 12 first +.src_ref 2 "conv2d_dw_bf16.h" 273 12 first + 13094 "01100010" // VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13095 "11100001" // /* MW 7 */ + 13096 "10010010" // /* MW 6 */ + 13097 "10001011" // /* MW 5 */ + 13098 "01000110" // /* MW 4 */ + 13099 "00000011" // /* MW 3 */ + 13100 "00011100" // /* MW 2 */ + 13101 "00000101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 265 12 first +.src_ref 2 "conv2d_dw_bf16.h" 277 12 first + 13102 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13103 "11100001" // /* MW 7 */ + 13104 "01010110" // /* MW 6 */ + 13105 "10001000" // /* MW 5 */ + 13106 "01000110" // /* MW 4 */ + 13107 "00000111" // /* MW 3 */ + 13108 "00011100" // /* MW 2 */ + 13109 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first + 13110 "10111010" // NOPA; NOPB; VSHIFT x4, x6, x1, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13111 "01101110" // /* MW 9 */ + 13112 "01000001" // /* MW 8 */ + 13113 "00011000" // /* MW 7 */ + 13114 "00000001" // /* MW 6 */ + 13115 "00010000" // /* MW 5 */ + 13116 "00000000" // /* MW 4 */ + 13117 "11110000" // /* MW 3 */ + 13118 "00101100" // /* MW 2 */ + 13119 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 270 12 first + 13120 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm4, dm3, x6, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13121 "01101010" // /* MW 15 */ + 13122 "01100011" // /* MW 14 */ + 13123 "01111100" // /* MW 13 */ + 13124 "10100101" // /* MW 12 */ + 13125 "00000001" // /* MW 11 */ + 13126 "00000000" // /* MW 10 */ + 13127 "00000000" // /* MW 9 */ + 13128 "00000000" // /* MW 8 */ + 13129 "01011011" // /* MW 7 */ + 13130 "00000001" // /* MW 6 */ + 13131 "00100000" // /* MW 5 */ + 13132 "00000000" // /* MW 4 */ + 13133 "11110000" // /* MW 3 */ + 13134 "00101100" // /* MW 2 */ + 13135 "00000000" // /* MW 1 */ +.label ZLE_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_368 +.src_ref 2 "conv2d_dw_bf16.h" 274 12 first +.end_of_loop +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 13136 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13137 "00011010" // /* MW 15 */ + 13138 "01001000" // /* MW 14 */ + 13139 "01111100" // /* MW 13 */ + 13140 "10100101" // /* MW 12 */ + 13141 "00000001" // /* MW 11 */ + 13142 "00000000" // /* MW 10 */ + 13143 "00000000" // /* MW 9 */ + 13144 "00000000" // /* MW 8 */ + 13145 "01011011" // /* MW 7 */ + 13146 "00000001" // /* MW 6 */ + 13147 "00100000" // /* MW 5 */ + 13148 "00000000" // /* MW 4 */ + 13149 "11110000" // /* MW 3 */ + 13150 "00101100" // /* MW 2 */ + 13151 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "shuffle.hpp" 142 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 13152 "10111010" // PADDA.3D [p0], d0; PADDB.2D [p4], d3; VSHIFT x10, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13153 "01101110" // /* MW 9 */ + 13154 "10000001" // /* MW 8 */ + 13155 "10000100" // /* MW 7 */ + 13156 "00000010" // /* MW 6 */ + 13157 "10010000" // /* MW 5 */ + 13158 "01110011" // /* MW 4 */ + 13159 "11110100" // /* MW 3 */ + 13160 "00001100" // /* MW 2 */ + 13161 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 266 12 first +.src_ref 2 "conv2d_dw_bf16.h" 271 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13162 "01100010" // VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13163 "00000001" // /* MW 7 */ + 13164 "10001001" // /* MW 6 */ + 13165 "10001010" // /* MW 5 */ + 13166 "01000110" // /* MW 4 */ + 13167 "00001011" // /* MW 3 */ + 13168 "10011100" // /* MW 2 */ + 13169 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 275 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13170 "01100010" // VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13171 "00000001" // /* MW 7 */ + 13172 "00110101" // /* MW 6 */ + 13173 "10001001" // /* MW 5 */ + 13174 "11000110" // /* MW 4 */ + 13175 "10000110" // /* MW 3 */ + 13176 "00110000" // /* MW 2 */ + 13177 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13178 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13179 "00000110" // /* MW 3 */ + 13180 "10001001" // /* MW 2 */ + 13181 "00011101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 267 12 first +.src_ref 2 "conv2d_dw_bf16.h" 272 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13182 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13183 "10100001" // /* MW 7 */ + 13184 "01001000" // /* MW 6 */ + 13185 "10001100" // /* MW 5 */ + 13186 "01000110" // /* MW 4 */ + 13187 "00001111" // /* MW 3 */ + 13188 "10011100" // /* MW 2 */ + 13189 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 276 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13190 "01001010" // NOPA; VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13191 "10100001" // /* MW 9 */ + 13192 "00110110" // /* MW 8 */ + 13193 "10001010" // /* MW 7 */ + 13194 "11000010" // /* MW 6 */ + 13195 "10001110" // /* MW 5 */ + 13196 "10110000" // /* MW 4 */ + 13197 "11110100" // /* MW 3 */ + 13198 "00101100" // /* MW 2 */ + 13199 "00000000" // /* MW 1 */ +.label TGT_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_432 +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 142 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 13200 "10110100" // VLDB.2D x3, [p1], d7; VSHIFT x11, x1, x2, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13201 "00011101" // /* MW 5 */ + 13202 "00010010" // /* MW 4 */ + 13203 "10001011" // /* MW 3 */ + 13204 "00011110" // /* MW 2 */ + 13205 "00111110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 250 8 first +.src_ref 2 "conv2d_dw_bf16.h" 273 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13206 "01011010" // MOVXM le, #13376; VMAC.f dm3, dm4, x9, x7, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13207 "11100001" // /* MW 9 */ + 13208 "10010010" // /* MW 8 */ + 13209 "10001011" // /* MW 7 */ + 13210 "00000010" // /* MW 6 */ + 13211 "01000100" // /* MW 5 */ + 13212 "10110111" // /* MW 4 */ + 13213 "00000001" // /* MW 3 */ + 13214 "00000000" // /* MW 2 */ + 13215 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_dw_bf16.h" 250 8 +.src_ref 2 "conv2d_dw_bf16.h" 277 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13216 "01000110" // VLDA.CONV.fp32.bf16 cml0, [p4]; MOVXM ls, #13296; VMAC.f dm0, dm2, x11, x7, r17 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 13217 "11100001" // /* MW 11 */ + 13218 "01010110" // /* MW 10 */ + 13219 "10001000" // /* MW 9 */ + 13220 "00000010" // /* MW 8 */ + 13221 "00111111" // /* MW 7 */ + 13222 "10001111" // /* MW 6 */ + 13223 "00000001" // /* MW 5 */ + 13224 "00000000" // /* MW 4 */ + 13225 "01110000" // /* MW 3 */ + 13226 "10000101" // /* MW 2 */ + 13227 "10000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 250 8 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13228 "10011000" // ADD.NC lc, r4, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13229 "01111111" // /* MW 3 */ + 13230 "01110010" // /* MW 2 */ + 13231 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13232 "10011000" // VLDA x6, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13233 "10011011" // /* MW 3 */ + 13234 "00011101" // /* MW 2 */ + 13235 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13236 "00011000" // VLDB x1, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13237 "01110100" // /* MW 3 */ + 13238 "00011100" // /* MW 2 */ + 13239 "00111000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13240 "00011000" // VLDB.3D x2, [p0], d2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13241 "10110100" // /* MW 3 */ + 13242 "01011000" // /* MW 2 */ + 13243 "00111000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1110 102 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13244 "00011000" // VCONV.bf16.fp32 x10, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13245 "10010110" // /* MW 3 */ + 13246 "00010001" // /* MW 2 */ + 13247 "00001101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1110 102 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13248 "00011000" // VCONV.bf16.fp32 x6, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13249 "00010110" // /* MW 3 */ + 13250 "00010000" // /* MW 2 */ + 13251 "00001011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13252 "11111000" // VMAX_LT.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13253 "01101100" // /* MW 3 */ + 13254 "01010000" // /* MW 2 */ + 13255 "00011100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 286 17 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13256 "01111000" // VSHUFFLE x10, x10, x6, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13257 "00010100" // /* MW 3 */ + 13258 "01010011" // /* MW 2 */ + 13259 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 2 "conv2d_dw_bf16.h" 285 16 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13260 "00000010" // VST x8, [p2], m4; VMAX_LT.bf16 x10, r16, x10, x0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13261 "01110000" // /* MW 7 */ + 13262 "00110110" // /* MW 6 */ + 13263 "10101000" // /* MW 5 */ + 13264 "00000010" // /* MW 4 */ + 13265 "01100000" // /* MW 3 */ + 13266 "01000010" // /* MW 2 */ + 13267 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 268 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13268 "01011000" // VEXTBCST.128 x10, x3, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13269 "00000011" // /* MW 3 */ + 13270 "00011100" // /* MW 2 */ + 13271 "00011101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 2 "conv2d_dw_bf16.h" 270 12 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13272 "00000010" // VST.3D x10, [p2], d1; VMOV cml3, cml0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13273 "01110000" // /* MW 7 */ + 13274 "01000101" // /* MW 6 */ + 13275 "10000000" // /* MW 5 */ + 13276 "00000001" // /* MW 4 */ + 13277 "01100000" // /* MW 3 */ + 13278 "01010010" // /* MW 2 */ + 13279 "01000111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 265 12 first +.src_ref 2 "conv2d_dw_bf16.h" 270 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13280 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13281 "01000001" // /* MW 7 */ + 13282 "01101101" // /* MW 6 */ + 13283 "10001100" // /* MW 5 */ + 13284 "01000110" // /* MW 4 */ + 13285 "00000111" // /* MW 3 */ + 13286 "00011100" // /* MW 2 */ + 13287 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 274 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13288 "01100010" // VSHIFT x4, x6, x1, r0; VMAC.f dm1, dm0, x1, x10, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13289 "01000001" // /* MW 7 */ + 13290 "00000011" // /* MW 6 */ + 13291 "10001001" // /* MW 5 */ + 13292 "11000110" // /* MW 4 */ + 13293 "10000010" // /* MW 3 */ + 13294 "00110000" // /* MW 2 */ + 13295 "00000010" // /* MW 1 */ +.label ZLS_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_528 +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 142 18 +.begin_of_loop +.aggressive_scheduled_block_id 2 +.noswbrkpt +.loop_nesting 2 + 13296 "10111010" // VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13297 "01101110" // /* MW 9 */ + 13298 "10000001" // /* MW 8 */ + 13299 "10000100" // /* MW 7 */ + 13300 "00000010" // /* MW 6 */ + 13301 "11110100" // /* MW 5 */ + 13302 "11110000" // /* MW 4 */ + 13303 "01110001" // /* MW 3 */ + 13304 "10110011" // /* MW 2 */ + 13305 "00000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "conv2d_dw_bf16.h" 266 12 first +.src_ref 2 "conv2d_dw_bf16.h" 271 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13306 "01001010" // VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13307 "00000001" // /* MW 9 */ + 13308 "10001001" // /* MW 8 */ + 13309 "10001010" // /* MW 7 */ + 13310 "01000110" // /* MW 6 */ + 13311 "00001011" // /* MW 5 */ + 13312 "10011100" // /* MW 4 */ + 13313 "11101010" // /* MW 3 */ + 13314 "00111000" // /* MW 2 */ + 13315 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 275 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13316 "01001010" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13317 "00000001" // /* MW 9 */ + 13318 "00110101" // /* MW 8 */ + 13319 "10001001" // /* MW 7 */ + 13320 "11000110" // /* MW 6 */ + 13321 "10000110" // /* MW 5 */ + 13322 "00110000" // /* MW 4 */ + 13323 "01101010" // /* MW 3 */ + 13324 "10110001" // /* MW 2 */ + 13325 "00000000" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13326 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13327 "00000110" // /* MW 3 */ + 13328 "10001001" // /* MW 2 */ + 13329 "00011101" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 2 "conv2d_dw_bf16.h" 272 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13330 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13331 "10100001" // /* MW 7 */ + 13332 "01001000" // /* MW 6 */ + 13333 "10001100" // /* MW 5 */ + 13334 "11000110" // /* MW 4 */ + 13335 "10001110" // /* MW 3 */ + 13336 "10110000" // /* MW 2 */ + 13337 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 267 12 first +.src_ref 2 "conv2d_dw_bf16.h" 276 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13338 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13339 "10100001" // /* MW 7 */ + 13340 "00110110" // /* MW 6 */ + 13341 "10001010" // /* MW 5 */ + 13342 "01000110" // /* MW 4 */ + 13343 "00001111" // /* MW 3 */ + 13344 "10011100" // /* MW 2 */ + 13345 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13346 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13347 "00001110" // /* MW 3 */ + 13348 "10001001" // /* MW 2 */ + 13349 "00011101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 268 12 first +.src_ref 2 "conv2d_dw_bf16.h" 273 12 first + 13350 "01100010" // VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13351 "11100001" // /* MW 7 */ + 13352 "10010010" // /* MW 6 */ + 13353 "10001011" // /* MW 5 */ + 13354 "01000110" // /* MW 4 */ + 13355 "00000011" // /* MW 3 */ + 13356 "00011100" // /* MW 2 */ + 13357 "00000101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 265 12 first +.src_ref 2 "conv2d_dw_bf16.h" 277 12 first + 13358 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13359 "11100001" // /* MW 7 */ + 13360 "01010110" // /* MW 6 */ + 13361 "10001000" // /* MW 5 */ + 13362 "01000110" // /* MW 4 */ + 13363 "00000111" // /* MW 3 */ + 13364 "00011100" // /* MW 2 */ + 13365 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first + 13366 "10010100" // NOPA; VSHIFT x4, x6, x1, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13367 "00000101" // /* MW 5 */ + 13368 "01100001" // /* MW 4 */ + 13369 "11110100" // /* MW 3 */ + 13370 "00101100" // /* MW 2 */ + 13371 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 270 12 first + 13372 "01001000" // VMAC.f dm4, dm3, x6, x10, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13373 "01000001" // /* MW 3 */ + 13374 "01101101" // /* MW 2 */ + 13375 "10001100" // /* MW 1 */ +.label ZLE_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_608 +.src_ref 2 "conv2d_dw_bf16.h" 274 12 first +.end_of_loop +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 13376 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13377 "00011010" // /* MW 15 */ + 13378 "01001000" // /* MW 14 */ + 13379 "01111100" // /* MW 13 */ + 13380 "10100101" // /* MW 12 */ + 13381 "00000001" // /* MW 11 */ + 13382 "00000000" // /* MW 10 */ + 13383 "00000000" // /* MW 9 */ + 13384 "00000000" // /* MW 8 */ + 13385 "01011011" // /* MW 7 */ + 13386 "00000001" // /* MW 6 */ + 13387 "00100000" // /* MW 5 */ + 13388 "00000000" // /* MW 4 */ + 13389 "11110000" // /* MW 3 */ + 13390 "00101100" // /* MW 2 */ + 13391 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 244 4 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 13392 "10110110" // PADDA.3D [p0], d0; PADDB.2D [p4], d3; JNZD r2, r2, p3; VSHIFT x10, x1, x2, r0 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 13393 "01101000" // /* MW 11 */ + 13394 "10000001" // /* MW 10 */ + 13395 "10000100" // /* MW 9 */ + 13396 "00000010" // /* MW 8 */ + 13397 "00100111" // /* MW 7 */ + 13398 "00000100" // /* MW 6 */ + 13399 "00100000" // /* MW 5 */ + 13400 "11100111" // /* MW 4 */ + 13401 "11111000" // /* MW 3 */ + 13402 "00001100" // /* MW 2 */ + 13403 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 266 12 first +.src_ref 2 "conv2d_dw_bf16.h" 271 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13404 "01100010" // VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13405 "00000001" // /* MW 7 */ + 13406 "10001001" // /* MW 6 */ + 13407 "10001010" // /* MW 5 */ + 13408 "01000110" // /* MW 4 */ + 13409 "00001011" // /* MW 3 */ + 13410 "10011100" // /* MW 2 */ + 13411 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 275 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13412 "01100010" // VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13413 "00000001" // /* MW 7 */ + 13414 "00110101" // /* MW 6 */ + 13415 "10001001" // /* MW 5 */ + 13416 "11000110" // /* MW 4 */ + 13417 "10000110" // /* MW 3 */ + 13418 "00110000" // /* MW 2 */ + 13419 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13420 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13421 "00000110" // /* MW 3 */ + 13422 "10001001" // /* MW 2 */ + 13423 "00011101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 267 12 first +.src_ref 2 "conv2d_dw_bf16.h" 272 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13424 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13425 "10100001" // /* MW 7 */ + 13426 "01001000" // /* MW 6 */ + 13427 "10001100" // /* MW 5 */ + 13428 "01000110" // /* MW 4 */ + 13429 "00001111" // /* MW 3 */ + 13430 "10011100" // /* MW 2 */ + 13431 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 276 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13432 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13433 "10100001" // /* MW 7 */ + 13434 "00110110" // /* MW 6 */ + 13435 "10001010" // /* MW 5 */ + 13436 "11000110" // /* MW 4 */ + 13437 "10001110" // /* MW 3 */ + 13438 "10110000" // /* MW 2 */ + 13439 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 13440 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13441 "00001110" // /* MW 3 */ + 13442 "10001001" // /* MW 2 */ + 13443 "00011101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 273 12 first + 13444 "01001000" // VMAC.f dm3, dm4, x9, x7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13445 "11100001" // /* MW 3 */ + 13446 "10010010" // /* MW 2 */ + 13447 "10001011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 277 12 first + 13448 "01001000" // VMAC.f dm0, dm2, x11, x7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13449 "11100001" // /* MW 3 */ + 13450 "01010110" // /* MW 2 */ + 13451 "10001000" // /* MW 1 */ + 13452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13453 "00000000" // /* MW 1 */ + 13454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13455 "00000000" // /* MW 1 */ + 13456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13457 "00000000" // /* MW 1 */ + 13458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13459 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1110 102 first + 13460 "00011000" // VCONV.bf16.fp32 x10, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13461 "10010110" // /* MW 3 */ + 13462 "00010001" // /* MW 2 */ + 13463 "00001101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_dw_bf16.h" 290 first + 13464 "01011100" // VCONV.bf16.fp32 x6, cml0; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 13465 "00000000" // /* MW 5 */ + 13466 "01010000" // /* MW 4 */ + 13467 "11000000" // /* MW 3 */ + 13468 "00000010" // /* MW 2 */ + 13469 "01100010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 13470 "11111000" // VMAX_LT.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13471 "01101100" // /* MW 3 */ + 13472 "01010000" // /* MW 2 */ + 13473 "00011100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 286 17 first +.delay_slot + 13474 "01111000" // VSHUFFLE x10, x10, x6, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13475 "00010100" // /* MW 3 */ + 13476 "01010011" // /* MW 2 */ + 13477 "00011101" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 13478 "11111000" // VMAX_LT.bf16 x10, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13479 "01101100" // /* MW 3 */ + 13480 "01010000" // /* MW 2 */ + 13481 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 2 "conv2d_dw_bf16.h" 285 16 first +.delay_slot + 13482 "00011000" // VST x8, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13483 "00010011" // /* MW 3 */ + 13484 "10001010" // /* MW 2 */ + 13485 "00001010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1159 33 +.delay_slot + 13486 "00011000" // VST.3D x10, [p2], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13487 "10010011" // /* MW 3 */ + 13488 "00111010" // /* MW 2 */ +.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params__end +.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_end0 + 13489 "00001010" // /* MW 1 */ +.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_conv2d_dwc _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 7 "superkernels.cpp" 444 first +.src_ref 7 "superkernels.cpp" 449 6 +.function_start + 13504 "01000100" // MOVXM p4, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13505 "10000000" // /* MW 5 */ + 13506 "11001000" // /* MW 4 */ + 13507 "11001000" // /* MW 3 */ + 13508 "00000111" // /* MW 2 */ + 13509 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 449 6 first + 13510 "11010100" // LDA r16, [p4]; MOV r0, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13511 "01000001" // /* MW 5 */ + 13512 "00101111" // /* MW 4 */ + 13513 "11010000" // /* MW 3 */ + 13514 "11000010" // /* MW 2 */ + 13515 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 444 + 13516 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13517 "00000001" // /* MW 5 */ + 13518 "00000000" // /* MW 4 */ + 13519 "00000000" // /* MW 3 */ + 13520 "00010000" // /* MW 2 */ + 13521 "00000000" // /* MW 1 */ + 13522 "00000010" // ST r14, [sp, #-8]; MOV r17, CORE_ID /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13523 "01110000" // /* MW 7 */ + 13524 "01110000" // /* MW 6 */ + 13525 "00101101" // /* MW 5 */ + 13526 "00000010" // /* MW 4 */ + 13527 "10110000" // /* MW 3 */ + 13528 "00111010" // /* MW 2 */ + 13529 "11111111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 467 + 13530 "00000010" // ST r13, [sp, #-4]; MOV r13, lr /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13531 "01110000" // /* MW 7 */ + 13532 "11110000" // /* MW 6 */ + 13533 "10101000" // /* MW 5 */ + 13534 "00000001" // /* MW 4 */ + 13535 "10110000" // /* MW 3 */ + 13536 "10110110" // /* MW 2 */ + 13537 "11111111" // /* MW 1 */ + 13538 "10011000" // ST p0, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13539 "00011101" // /* MW 3 */ + 13540 "11101100" // /* MW 2 */ + 13541 "00001111" // /* MW 1 */ + 13542 "10011000" // ST p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13543 "10011101" // /* MW 3 */ + 13544 "11110111" // /* MW 2 */ + 13545 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 452 44 + 13546 "00000010" // ST r0, [sp, #-16]; MOV r14, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13547 "01110000" // /* MW 7 */ + 13548 "01100000" // /* MW 6 */ + 13549 "11001010" // /* MW 5 */ + 13550 "00000001" // /* MW 4 */ + 13551 "10110000" // /* MW 3 */ + 13552 "00000010" // /* MW 2 */ + 13553 "11111110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 449 6 +.src_ref 7 "superkernels.cpp" 449 16 + 13554 "10000100" // JNZ r16, #13680 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=13680 delay_slots=5 */ + 13555 "00000001" // /* MW 5 */ + 13556 "01000000" // /* MW 4 */ + 13557 "10111000" // /* MW 3 */ + 13558 "00011010" // /* MW 2 */ + 13559 "10000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 +.delay_slot + 13560 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13561 "11000000" // /* MW 3 */ + 13562 "11010110" // /* MW 2 */ + 13563 "00011011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 446 22 first +.delay_slot + 13564 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13565 "10010000" // /* MW 3 */ + 13566 "01100010" // /* MW 2 */ + 13567 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 446 30 +.delay_slot + 13568 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13569 "11111011" // /* MW 3 */ + 13570 "01100011" // /* MW 2 */ + 13571 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 446 11 +.delay_slot + 13572 "01000100" // MOVXM p3, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13573 "10100000" // /* MW 5 */ + 13574 "11001000" // /* MW 4 */ + 13575 "11000110" // /* MW 3 */ + 13576 "00000111" // /* MW 2 */ + 13577 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 446 11 +.delay_slot + 13578 "10011000" // ST r17, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13579 "00110001" // /* MW 3 */ + 13580 "00000110" // /* MW 2 */ + 13581 "00001011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 461 2 +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 13582 "00111010" // MOVS p7, p1; MOVXM p1, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13583 "00010001" // /* MW 9 */ + 13584 "00110100" // /* MW 8 */ + 13585 "10110010" // /* MW 7 */ + 13586 "11110000" // /* MW 6 */ + 13587 "00000001" // /* MW 5 */ + 13588 "00000000" // /* MW 4 */ + 13589 "01100000" // /* MW 3 */ + 13590 "10010001" // /* MW 2 */ + 13591 "11110000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 451 4 +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 13592 "01110110" // ST.s8 r16, [p1]; MOVS p0, p2; MOVXM p1, #509028 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 13593 "00010000" // /* MW 11 */ + 13594 "00110010" // /* MW 10 */ + 13595 "10110010" // /* MW 9 */ + 13596 "11110000" // /* MW 8 */ + 13597 "00000001" // /* MW 7 */ + 13598 "00000000" // /* MW 6 */ + 13599 "10001011" // /* MW 5 */ + 13600 "10001000" // /* MW 4 */ + 13601 "11100000" // /* MW 3 */ + 13602 "11000000" // /* MW 2 */ + 13603 "00100000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13605 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 451 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 13606 "00000100" // JL #12096 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12096 delay_slots=5 */ + 13607 "00000001" // /* MW 5 */ + 13608 "00000000" // /* MW 4 */ + 13609 "10100000" // /* MW 3 */ + 13610 "00010111" // /* MW 2 */ + 13611 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13613 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13615 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13616 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13617 "00110001" // /* MW 3 */ + 13618 "00100000" // /* MW 2 */ + 13619 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 13620 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13621 "00000101" // /* MW 3 */ + 13622 "00100000" // /* MW 2 */ + 13623 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 13624 "00000010" // ST r16, [p1]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13625 "01110000" // /* MW 7 */ + 13626 "10100101" // /* MW 6 */ + 13627 "00000001" // /* MW 5 */ + 13628 "00000000" // /* MW 4 */ + 13629 "00110000" // /* MW 3 */ + 13630 "11000010" // /* MW 2 */ + 13631 "00100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 452 44 +.src_ref 7 "superkernels.cpp" 461 2 +.return_address + 13632 "00000010" // MOVS p1, p7; ADD.NC p2, r14, #8 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13633 "00000000" // /* MW 7 */ + 13634 "10000010" // /* MW 6 */ + 13635 "00110011" // /* MW 5 */ + 13636 "00000001" // /* MW 4 */ + 13637 "01100000" // /* MW 3 */ + 13638 "10010001" // /* MW 2 */ + 13639 "00110011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 452 17 first + 13640 "10011000" // LDA.u16 r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13641 "00111010" // /* MW 3 */ + 13642 "00000110" // /* MW 2 */ + 13643 "00000010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 453 13 +.src_ref 7 "superkernels.cpp" 453 15 first + 13644 "10111010" // LDA.u16 r16, [p2, #4]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13645 "00010000" // /* MW 9 */ + 13646 "00110000" // /* MW 8 */ + 13647 "00110010" // /* MW 7 */ + 13648 "11110001" // /* MW 6 */ + 13649 "00000001" // /* MW 5 */ + 13650 "00000000" // /* MW 4 */ + 13651 "01010000" // /* MW 3 */ + 13652 "11000011" // /* MW 2 */ + 13653 "01000100" // /* MW 1 */ + 13654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13655 "00000000" // /* MW 1 */ + 13656 "10000100" // J #13696 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=13696 delay_slots=5 */ + 13657 "00000000" // /* MW 5 */ + 13658 "00000000" // /* MW 4 */ + 13659 "11000000" // /* MW 3 */ + 13660 "00011010" // /* MW 2 */ + 13661 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 452 15 +.src_ref 7 "superkernels.cpp" 457 26 +.delay_slot + 13662 "01000100" // MOVXM p3, #509016 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13663 "10110000" // /* MW 5 */ + 13664 "11001000" // /* MW 4 */ + 13665 "11000110" // /* MW 3 */ + 13666 "00000111" // /* MW 2 */ + 13667 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13669 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13671 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 452 15 first +.delay_slot + 13672 "10011000" // ST r17, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13673 "00110001" // /* MW 3 */ + 13674 "00000110" // /* MW 2 */ + 13675 "00001011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 453 13 first +.delay_slot + 13676 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13677 "00010001" // /* MW 3 */ + 13678 "00000110" // /* MW 2 */ + 13679 "00001010" // /* MW 1 */ +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_176 +.src_ref 7 "superkernels.cpp" 457 26 + 13680 "11100001" // NOPA; NOPB; NOPS; MOVXM p3, #509016; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13681 "00000000" // /* MW 15 */ + 13682 "00000000" // /* MW 14 */ + 13683 "00010000" // /* MW 13 */ + 13684 "00101100" // /* MW 12 */ + 13685 "10110010" // /* MW 11 */ + 13686 "11110001" // /* MW 10 */ + 13687 "00000001" // /* MW 9 */ + 13688 "00000000" // /* MW 8 */ + 13689 "01011011" // /* MW 7 */ + 13690 "00000001" // /* MW 6 */ + 13691 "00100000" // /* MW 5 */ + 13692 "00000000" // /* MW 4 */ + 13693 "11110000" // /* MW 3 */ + 13694 "00101100" // /* MW 2 */ + 13695 "00000000" // /* MW 1 */ +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_192 +.src_ref 1 "io_buffer_main.h" 218 49 first + 13696 "00011000" // ADD.NC p2, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13697 "10000110" // /* MW 3 */ + 13698 "01100111" // /* MW 2 */ + 13699 "00011010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 457 15 +.src_ref 1 "io_buffer_main.h" 218 49 + 13700 "10111010" // LDA r27, [p2], #-4; MOVXM p4, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13701 "00010000" // /* MW 9 */ + 13702 "00101000" // /* MW 8 */ + 13703 "00110010" // /* MW 7 */ + 13704 "11110010" // /* MW 6 */ + 13705 "00000001" // /* MW 5 */ + 13706 "00000000" // /* MW 4 */ + 13707 "11010000" // /* MW 3 */ + 13708 "11101110" // /* MW 2 */ + 13709 "01011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 13710 "10011000" // LDA r16, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13711 "00010110" // /* MW 3 */ + 13712 "11111110" // /* MW 2 */ + 13713 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 13714 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13715 "00110110" // /* MW 3 */ + 13716 "11111110" // /* MW 2 */ + 13717 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 13718 "10011000" // LDA r18, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13719 "01010110" // /* MW 3 */ + 13720 "01000110" // /* MW 2 */ + 13721 "00000010" // /* MW 1 */ + 13722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13723 "00000000" // /* MW 1 */ + 13724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13725 "00000000" // /* MW 1 */ + 13726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13727 "00000000" // /* MW 1 */ + 13728 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13729 "00000000" // /* MW 1 */ + 13730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13731 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 13732 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13733 "00000010" // /* MW 3 */ + 13734 "01100001" // /* MW 2 */ + 13735 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 13736 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13737 "00010001" // /* MW 3 */ + 13738 "00000110" // /* MW 2 */ + 13739 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 + 13740 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13741 "11111101" // /* MW 3 */ + 13742 "11100000" // /* MW 2 */ + 13743 "00010111" // /* MW 1 */ + 13744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13745 "00000000" // /* MW 1 */ + 13746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13747 "00000000" // /* MW 1 */ + 13748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13749 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 13750 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13751 "00001000" // /* MW 3 */ + 13752 "10010011" // /* MW 2 */ + 13753 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 456 11 +.src_ref 7 "superkernels.cpp" 459 47 +.src_ref 7 "superkernels.cpp" 464 6 +.src_ref 7 "superkernels.cpp" 465 16 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 + 13754 "10111010" // MOVA r15, #1; MOVXM p7, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13755 "00010000" // /* MW 9 */ + 13756 "00100000" // /* MW 8 */ + 13757 "10110010" // /* MW 7 */ + 13758 "11110011" // /* MW 6 */ + 13759 "00000001" // /* MW 5 */ + 13760 "00000000" // /* MW 4 */ + 13761 "00000000" // /* MW 3 */ + 13762 "00101111" // /* MW 2 */ + 13763 "00000000" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 52 16 + 13764 "11100100" // MOVX r24, #0; MOV r16, sp /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13765 "11000001" // /* MW 5 */ + 13766 "00101011" // /* MW 4 */ + 13767 "00101000" // /* MW 3 */ + 13768 "00000000" // /* MW 2 */ + 13769 "00000110" // /* MW 1 */ + 13770 "00011000" // ADD.NC p0, r16, #-76 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13771 "01011010" // /* MW 3 */ + 13772 "01101000" // /* MW 2 */ + 13773 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 1 "io_buffer_main.h" 324 51 + 13774 "11010100" // LDA p5, [sp, #-20]; MOV r14, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13775 "10000001" // /* MW 5 */ + 13776 "00101001" // /* MW 4 */ + 13777 "00100111" // /* MW 3 */ + 13778 "11010011" // /* MW 2 */ + 13779 "11111101" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 457 15 first + 13780 "10011000" // LDA r17, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13781 "00110110" // /* MW 3 */ + 13782 "00000110" // /* MW 2 */ + 13783 "00000100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 457 26 +.src_ref 7 "superkernels.cpp" 461 2 + 13784 "10111010" // LDA r16, [p3]; MOVXM p3, #509824 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13785 "00010000" // /* MW 9 */ + 13786 "11000000" // /* MW 8 */ + 13787 "10110011" // /* MW 7 */ + 13788 "11110001" // /* MW 6 */ + 13789 "00000001" // /* MW 5 */ + 13790 "00000000" // /* MW 4 */ + 13791 "11010000" // /* MW 3 */ + 13792 "11000010" // /* MW 2 */ + 13793 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 456 11 first + 13794 "10011000" // LDA r18, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13795 "01010110" // /* MW 3 */ + 13796 "00000110" // /* MW 2 */ + 13797 "00000111" // /* MW 1 */ + 13798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13799 "00000000" // /* MW 1 */ + 13800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13801 "00000000" // /* MW 1 */ + 13802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13803 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 13804 "10011000" // LDA r19, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13805 "01110110" // /* MW 3 */ + 13806 "00000110" // /* MW 2 */ + 13807 "00000101" // /* MW 1 */ + 13808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13809 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 457 24 first + 13810 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13811 "00001111" // /* MW 3 */ + 13812 "01100001" // /* MW 2 */ + 13813 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 456 11 first + 13814 "00011000" // ADD r17, r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13815 "00000111" // /* MW 3 */ + 13816 "10100010" // /* MW 2 */ + 13817 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 459 47 first + 13818 "10011000" // LSHL r16, r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13819 "11111101" // /* MW 3 */ + 13820 "00100000" // /* MW 2 */ + 13821 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 461 2 first +.no_stack_arguments + 13822 "00000100" // JL #12768 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12768 delay_slots=5 */ + 13823 "00000001" // /* MW 5 */ + 13824 "00000000" // /* MW 4 */ + 13825 "11110000" // /* MW 3 */ + 13826 "00011000" // /* MW 2 */ + 13827 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 456 11 first +.delay_slot + 13828 "10011000" // ST r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13829 "00110001" // /* MW 3 */ + 13830 "00000110" // /* MW 2 */ + 13831 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 459 47 first +.delay_slot + 13832 "01011000" // ADD.NC dn0, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13833 "11000001" // /* MW 3 */ + 13834 "01001001" // /* MW 2 */ + 13835 "00011000" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 201 10 first +.delay_slot + 13836 "10011000" // ST dn0, [sp, #-76] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13837 "00100101" // /* MW 3 */ + 13838 "10110100" // /* MW 2 */ + 13839 "00001111" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 52 16 first +.delay_slot + 13840 "10011000" // ST r24, [sp, #-72] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13841 "00010101" // /* MW 3 */ + 13842 "10111011" // /* MW 2 */ + 13843 "00001111" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 52 16 +.delay_slot + 13844 "00110110" // NOPA; NOPB; ST r24, [sp, #-68]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 13845 "11000001" // /* MW 11 */ + 13846 "10001010" // /* MW 10 */ + 13847 "11011111" // /* MW 9 */ + 13848 "00000011" // /* MW 8 */ + 13849 "00000000" // /* MW 7 */ + 13850 "00000000" // /* MW 6 */ + 13851 "00100000" // /* MW 5 */ + 13852 "00000000" // /* MW 4 */ + 13853 "11110000" // /* MW 3 */ + 13854 "00101100" // /* MW 2 */ + 13855 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 +.return_address + 13856 "00011000" // ADD.NC p2, r14, #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13857 "00001010" // /* MW 3 */ + 13858 "01100111" // /* MW 2 */ + 13859 "00011010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 first + 13860 "10011000" // LDA r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13861 "00010110" // /* MW 3 */ + 13862 "00000110" // /* MW 2 */ + 13863 "00000010" // /* MW 1 */ + 13864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13865 "00000000" // /* MW 1 */ + 13866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13867 "00000000" // /* MW 1 */ + 13868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13869 "00000000" // /* MW 1 */ + 13870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13871 "00000000" // /* MW 1 */ + 13872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13873 "00000000" // /* MW 1 */ + 13874 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13875 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 13876 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13877 "11111000" // /* MW 3 */ + 13878 "00010000" // /* MW 2 */ + 13879 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 464 19 +.src_ref 1 "io_buffer_main.h" 327 40 first + 13880 "10111010" // LDA r16, [p2, #-8]; MOVXM p1, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13881 "00010000" // /* MW 9 */ + 13882 "00110000" // /* MW 8 */ + 13883 "10110010" // /* MW 7 */ + 13884 "11110000" // /* MW 6 */ + 13885 "00000001" // /* MW 5 */ + 13886 "00000000" // /* MW 4 */ + 13887 "11010000" // /* MW 3 */ + 13888 "11000010" // /* MW 2 */ + 13889 "01011100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 464 19 first + 13890 "10011000" // LDA r18, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13891 "01010110" // /* MW 3 */ + 13892 "00000110" // /* MW 2 */ + 13893 "00000001" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 464 6 + 13894 "10011000" // LDA r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13895 "00110110" // /* MW 3 */ + 13896 "00000110" // /* MW 2 */ + 13897 "00000111" // /* MW 1 */ + 13898 "00011000" // LDA p1, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13899 "10011001" // /* MW 3 */ + 13900 "11110100" // /* MW 2 */ + 13901 "00000111" // /* MW 1 */ + 13902 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13903 "11010001" // /* MW 3 */ + 13904 "11111001" // /* MW 2 */ + 13905 "00000111" // /* MW 1 */ + 13906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13907 "00000000" // /* MW 1 */ + 13908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13909 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 13910 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13911 "00000001" // /* MW 3 */ + 13912 "11100001" // /* MW 2 */ + 13913 "00010011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 13914 "10011000" // ST r16, [p2, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13915 "00010001" // /* MW 3 */ + 13916 "11100110" // /* MW 2 */ + 13917 "00001010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 464 16 first + 13918 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13919 "00101000" // /* MW 3 */ + 13920 "01100001" // /* MW 2 */ + 13921 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 464 6 + 13922 "10000100" // JNZ r16, #13952 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=13952 delay_slots=5 */ + 13923 "00000001" // /* MW 5 */ + 13924 "01000000" // /* MW 4 */ + 13925 "01000000" // /* MW 3 */ + 13926 "00011011" // /* MW 2 */ + 13927 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 465 16 +.delay_slot + 13928 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13929 "00000001" // /* MW 3 */ + 13930 "00110000" // /* MW 2 */ + 13931 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13933 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13935 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13937 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13939 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 465 16 first + 13940 "00110110" // NOPA; NOPB; ST r24, [p7]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 13941 "11000001" // /* MW 11 */ + 13942 "10001000" // /* MW 10 */ + 13943 "10000011" // /* MW 9 */ + 13944 "00000011" // /* MW 8 */ + 13945 "00000000" // /* MW 7 */ + 13946 "00000000" // /* MW 6 */ + 13947 "00100000" // /* MW 5 */ + 13948 "00000000" // /* MW 4 */ + 13949 "11110000" // /* MW 3 */ + 13950 "00101100" // /* MW 2 */ + 13951 "00000000" // /* MW 1 */ +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 7 "superkernels.cpp" 467 + 13952 "11010100" // LDA r13, [sp, #-4]; MOV lr, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13953 "01000001" // /* MW 5 */ + 13954 "11101101" // /* MW 4 */ + 13955 "00101110" // /* MW 3 */ + 13956 "10110110" // /* MW 2 */ + 13957 "11111111" // /* MW 1 */ + 13958 "00011000" // LDA r15, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13959 "11110001" // /* MW 3 */ + 13960 "11110001" // /* MW 2 */ + 13961 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 467 first + 13962 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 13963 "00000000" // /* MW 3 */ + 13964 "00101000" // /* MW 2 */ + 13965 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 467 +.delay_slot + 13966 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13967 "00000001" // /* MW 5 */ + 13968 "00000000" // /* MW 4 */ + 13969 "00000000" // /* MW 3 */ + 13970 "11110000" // /* MW 2 */ + 13971 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13973 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13975 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13977 "00000000" // /* MW 1 */ +.delay_slot + 13978 "11111000" // MOV p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13979 "11000000" // /* MW 3 */ + 13980 "01100010" // /* MW 2 */ +.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 13981 "00011111" // /* MW 1 */ +.label __Z13_b896_wrapperPPv___func_begin0 +.label _Z13_b896_wrapperPPv +.function _b896_wrapper _Z13_b896_wrapperPPv +.src_ref 0 "0_0_reloadable3.cc" 20 first +.src_ref 0 "0_0_reloadable3.cc" 22 79 +.function_start + 13984 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13985 "11000000" // /* MW 3 */ + 13986 "01100000" // /* MW 2 */ + 13987 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 22 79 first + 13988 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13989 "00011110" // /* MW 3 */ + 13990 "00011100" // /* MW 2 */ + 13991 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 23 79 first + 13992 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13993 "10011110" // /* MW 3 */ + 13994 "00101100" // /* MW 2 */ + 13995 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 25 81 first + 13996 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13997 "10011110" // /* MW 3 */ + 13998 "11110101" // /* MW 2 */ + 13999 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 24 47 first + 14000 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14001 "00011110" // /* MW 3 */ + 14002 "00000101" // /* MW 2 */ + 14003 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 21 4 first +.tail_call + 14004 "10000100" // J #6880 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=6880 delay_slots=5 */ + 14005 "00000000" // /* MW 5 */ + 14006 "00000000" // /* MW 4 */ + 14007 "01110000" // /* MW 3 */ + 14008 "00001101" // /* MW 2 */ + 14009 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14011 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14013 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14015 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14017 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14018 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b896_wrapperPPv__end +.label __Z13_b896_wrapperPPv___func_end0 + 14019 "00000000" // /* MW 1 */ +.label __Z13_b901_wrapperPPv___func_begin0 +.label _Z13_b901_wrapperPPv +.function _b901_wrapper _Z13_b901_wrapperPPv +.src_ref 0 "0_0_reloadable3.cc" 29 first +.src_ref 0 "0_0_reloadable3.cc" 31 79 +.function_start + 14032 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14033 "11000000" // /* MW 3 */ + 14034 "01100000" // /* MW 2 */ + 14035 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 31 79 first + 14036 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14037 "00011110" // /* MW 3 */ + 14038 "00101100" // /* MW 2 */ + 14039 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 33 81 first + 14040 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14041 "00011110" // /* MW 3 */ + 14042 "11110101" // /* MW 2 */ + 14043 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 32 47 first + 14044 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14045 "10011110" // /* MW 3 */ + 14046 "00000100" // /* MW 2 */ + 14047 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 30 4 first +.tail_call + 14048 "10000100" // J #8240 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=8240 delay_slots=5 */ + 14049 "00000000" // /* MW 5 */ + 14050 "00000000" // /* MW 4 */ + 14051 "00011000" // /* MW 3 */ + 14052 "00010000" // /* MW 2 */ + 14053 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14055 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14057 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14059 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14061 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b901_wrapperPPv__end +.label __Z13_b901_wrapperPPv___func_end0 + 14063 "00000000" // /* MW 1 */ +.label __Z13_b906_wrapperPPv___func_begin0 +.label _Z13_b906_wrapperPPv +.function _b906_wrapper _Z13_b906_wrapperPPv +.src_ref 0 "0_0_reloadable3.cc" 37 first +.src_ref 0 "0_0_reloadable3.cc" 39 79 +.function_start + 14064 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14065 "11000000" // /* MW 3 */ + 14066 "01100000" // /* MW 2 */ + 14067 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 39 79 first + 14068 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14069 "00011110" // /* MW 3 */ + 14070 "00101100" // /* MW 2 */ + 14071 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 41 81 first + 14072 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14073 "00011110" // /* MW 3 */ + 14074 "11110101" // /* MW 2 */ + 14075 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 40 47 first + 14076 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14077 "10011110" // /* MW 3 */ + 14078 "00000100" // /* MW 2 */ + 14079 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 38 4 first +.tail_call + 14080 "10000100" // J #9104 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=9104 delay_slots=5 */ + 14081 "00000000" // /* MW 5 */ + 14082 "00000000" // /* MW 4 */ + 14083 "11001000" // /* MW 3 */ + 14084 "00010001" // /* MW 2 */ + 14085 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14087 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14089 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14091 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14093 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b906_wrapperPPv__end +.label __Z13_b906_wrapperPPv___func_end0 + 14095 "00000000" // /* MW 1 */ +.label __Z13_b881_wrapperPPv___func_begin0 +.label _Z13_b881_wrapperPPv +.function _b881_wrapper _Z13_b881_wrapperPPv +.src_ref 0 "0_0_reloadable3.cc" 45 first +.src_ref 0 "0_0_reloadable3.cc" 47 79 +.function_start + 14096 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14097 "11000000" // /* MW 3 */ + 14098 "01100000" // /* MW 2 */ + 14099 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 47 79 first + 14100 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14101 "00011110" // /* MW 3 */ + 14102 "00101100" // /* MW 2 */ + 14103 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 49 81 first + 14104 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14105 "00011110" // /* MW 3 */ + 14106 "11110101" // /* MW 2 */ + 14107 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 48 47 first + 14108 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14109 "10011110" // /* MW 3 */ + 14110 "00000100" // /* MW 2 */ + 14111 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 46 4 first +.tail_call + 14112 "10000100" // J #10512 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10512 delay_slots=5 */ + 14113 "00000000" // /* MW 5 */ + 14114 "00000000" // /* MW 4 */ + 14115 "10001000" // /* MW 3 */ + 14116 "00010100" // /* MW 2 */ + 14117 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14119 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14121 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14123 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14125 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b881_wrapperPPv__end +.label __Z13_b881_wrapperPPv___func_end0 + 14127 "00000000" // /* MW 1 */ +.label __Z13_b891_wrapperPPv___func_begin0 +.label _Z13_b891_wrapperPPv +.function _b891_wrapper _Z13_b891_wrapperPPv +.src_ref 0 "0_0_reloadable3.cc" 53 first +.src_ref 0 "0_0_reloadable3.cc" 55 79 +.function_start + 14128 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14129 "11000000" // /* MW 3 */ + 14130 "01100000" // /* MW 2 */ + 14131 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 55 79 first + 14132 "10011000" // LDA p0, [p2], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14133 "00011110" // /* MW 3 */ + 14134 "00111100" // /* MW 2 */ + 14135 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 56 47 first + 14136 "10011000" // LDA p1, [p2], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14137 "10011110" // /* MW 3 */ + 14138 "11101100" // /* MW 2 */ + 14139 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 58 81 first + 14140 "10011000" // LDA p3, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14141 "10011110" // /* MW 3 */ + 14142 "00010101" // /* MW 2 */ + 14143 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 57 80 first + 14144 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14145 "00011110" // /* MW 3 */ + 14146 "00000101" // /* MW 2 */ + 14147 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 54 4 first +.tail_call + 14148 "10000100" // J #11488 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=11488 delay_slots=5 */ + 14149 "00000000" // /* MW 5 */ + 14150 "00000000" // /* MW 4 */ + 14151 "01110000" // /* MW 3 */ + 14152 "00010110" // /* MW 2 */ + 14153 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14155 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14157 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14159 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14161 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b891_wrapperPPv__end +.label __Z13_b891_wrapperPPv___func_end0 + 14163 "00000000" // /* MW 1 */ +.label __Z13_b919_wrapperPPv___func_begin0 +.label _Z13_b919_wrapperPPv +.function _b919_wrapper _Z13_b919_wrapperPPv +.src_ref 0 "0_0_reloadable3.cc" 62 first +.src_ref 0 "0_0_reloadable3.cc" 64 79 +.function_start + 14176 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14177 "11000000" // /* MW 3 */ + 14178 "01100000" // /* MW 2 */ + 14179 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 64 79 first + 14180 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14181 "00011110" // /* MW 3 */ + 14182 "00011100" // /* MW 2 */ + 14183 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 65 79 first + 14184 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14185 "10011110" // /* MW 3 */ + 14186 "00101100" // /* MW 2 */ + 14187 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 67 81 first + 14188 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14189 "10011110" // /* MW 3 */ + 14190 "11110101" // /* MW 2 */ + 14191 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 66 47 first + 14192 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14193 "00011110" // /* MW 3 */ + 14194 "00000101" // /* MW 2 */ + 14195 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 63 4 first +.tail_call + 14196 "10000100" // J #13504 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=13504 delay_slots=5 */ + 14197 "00000000" // /* MW 5 */ + 14198 "00000000" // /* MW 4 */ + 14199 "01100000" // /* MW 3 */ + 14200 "00011010" // /* MW 2 */ + 14201 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14203 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14205 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14207 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14209 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b919_wrapperPPv__end +.label __Z13_b919_wrapperPPv___func_end0 + 14211 "00000000" // /* MW 1 */ +.label _ZN12me_primitive10udiv_dstepEjjRjS0_ +.function udiv_dstep _ZN12me_primitive10udiv_dstepEjjRjS0_ +.src_ref 10 "me_div.c" 108 19 +.src_ref 10 "me_div.c" 108 19 +.src_ref 10 "me_div.c" 115 4 first +.function_start + 14224 "11100100" // MOVX r3, #0; MOV r31, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14225 "01000001" // /* MW 5 */ + 14226 "10100000" // /* MW 4 */ + 14227 "00101111" // /* MW 3 */ + 14228 "11000000" // /* MW 2 */ + 14229 "00000000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14230 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14231 "00011100" // /* MW 3 */ + 14232 "11000110" // /* MW 2 */ + 14233 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14234 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14235 "00011100" // /* MW 3 */ + 14236 "11000110" // /* MW 2 */ + 14237 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14238 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14239 "00011100" // /* MW 3 */ + 14240 "11000110" // /* MW 2 */ + 14241 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14242 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14243 "00011100" // /* MW 3 */ + 14244 "11000110" // /* MW 2 */ + 14245 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14246 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14247 "00011100" // /* MW 3 */ + 14248 "11000110" // /* MW 2 */ + 14249 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14250 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14251 "00011100" // /* MW 3 */ + 14252 "11000110" // /* MW 2 */ + 14253 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14254 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14255 "00011100" // /* MW 3 */ + 14256 "11000110" // /* MW 2 */ + 14257 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14258 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14259 "00011100" // /* MW 3 */ + 14260 "11000110" // /* MW 2 */ + 14261 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14262 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14263 "00011100" // /* MW 3 */ + 14264 "11000110" // /* MW 2 */ + 14265 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14266 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14267 "00011100" // /* MW 3 */ + 14268 "11000110" // /* MW 2 */ + 14269 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14270 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14271 "00011100" // /* MW 3 */ + 14272 "11000110" // /* MW 2 */ + 14273 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14274 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14275 "00011100" // /* MW 3 */ + 14276 "11000110" // /* MW 2 */ + 14277 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14278 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14279 "00011100" // /* MW 3 */ + 14280 "11000110" // /* MW 2 */ + 14281 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14282 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14283 "00011100" // /* MW 3 */ + 14284 "11000110" // /* MW 2 */ + 14285 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14286 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14287 "00011100" // /* MW 3 */ + 14288 "11000110" // /* MW 2 */ + 14289 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14290 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14291 "00011100" // /* MW 3 */ + 14292 "11000110" // /* MW 2 */ + 14293 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14294 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14295 "00011100" // /* MW 3 */ + 14296 "11000110" // /* MW 2 */ + 14297 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14298 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14299 "00011100" // /* MW 3 */ + 14300 "11000110" // /* MW 2 */ + 14301 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14302 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14303 "00011100" // /* MW 3 */ + 14304 "11000110" // /* MW 2 */ + 14305 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14306 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14307 "00011100" // /* MW 3 */ + 14308 "11000110" // /* MW 2 */ + 14309 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14310 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14311 "00011100" // /* MW 3 */ + 14312 "11000110" // /* MW 2 */ + 14313 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14314 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14315 "00011100" // /* MW 3 */ + 14316 "11000110" // /* MW 2 */ + 14317 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14318 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14319 "00011100" // /* MW 3 */ + 14320 "11000110" // /* MW 2 */ + 14321 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14322 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14323 "00011100" // /* MW 3 */ + 14324 "11000110" // /* MW 2 */ + 14325 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14326 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14327 "00011100" // /* MW 3 */ + 14328 "11000110" // /* MW 2 */ + 14329 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14330 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14331 "00011100" // /* MW 3 */ + 14332 "11000110" // /* MW 2 */ + 14333 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14334 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14335 "00011100" // /* MW 3 */ + 14336 "11000110" // /* MW 2 */ + 14337 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14338 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14339 "00011100" // /* MW 3 */ + 14340 "11000110" // /* MW 2 */ + 14341 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 119 first + 14342 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 14343 "00000000" // /* MW 3 */ + 14344 "00101000" // /* MW 2 */ + 14345 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 first +.delay_slot + 14346 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14347 "00011100" // /* MW 3 */ + 14348 "11000110" // /* MW 2 */ + 14349 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 +.delay_slot + 14350 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14351 "00011100" // /* MW 3 */ + 14352 "11000110" // /* MW 2 */ + 14353 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 +.delay_slot + 14354 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14355 "00011100" // /* MW 3 */ + 14356 "11000110" // /* MW 2 */ + 14357 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 +.delay_slot + 14358 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14359 "00011100" // /* MW 3 */ + 14360 "11000110" // /* MW 2 */ + 14361 "00010000" // /* MW 1 */ +.delay_slot + 14362 "11111000" // MOV r2, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14363 "10100000" // /* MW 3 */ + 14364 "10011111" // /* MW 2 */ +.label _ZN12me_primitive10udiv_dstepEjjRjS0___end + 14365 "00011000" // /* MW 1 */ +.dir 0 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable3/src" +.dir 1 "/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer" +.dir 2 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/conv" +.dir 3 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common" +.dir 4 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2" +.dir 5 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p" +.dir 6 "/usr/local/lib/python3.10/dist-packages/data/aie2p/lib" +.dir 7 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend" +.dir 8 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc" +.dir 9 "/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/detail" +.dir 10 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21708/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib" diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/Release/0_2_reloadable9.cmico b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/Release/0_2_reloadable9.cmico new file mode 100644 index 0000000000000000000000000000000000000000..f377058758269f564988080a1597f499edc1b997 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/Release/0_2_reloadable9.cmico @@ -0,0 +1 @@ ++Mdec diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/Release/0_2_reloadable9.lst b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/Release/0_2_reloadable9.lst new file mode 100644 index 0000000000000000000000000000000000000000..dbc805287627de5330bc9a4f15514421ea46bfa2 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/Release/0_2_reloadable9.lst @@ -0,0 +1,4814 @@ + +// File generated by darts version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:48:02 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// darts -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -d -h -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable3 me + +// Release: ipp V-2024.06-TGT-241219 + +.text_segment PM 2352 +.entry_point +.label __Z13kernelWrapperPPvjjjj___func_begin0 +.label _Z13kernelWrapperPPvjjjj +.function_start + 2352 0x00 0xc6 0xd1 0x21 0x41 0xd4 LDA r17, [p0]; MOV r2, r1 + 2358 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 2364 0xfe 0xf3 0xb0 0x00 0x2b 0xd0 0x70 0x02 ST p7, [sp, #-12]; MOV r1, r15 + 2372 0xff 0x87 0xb0 0x01 0xe8 0x90 0x70 0x02 ST lr, [sp, #-4]; MOV r15, r2 + 2380 0xff 0x06 0xb7 0xc1 0xe0 0x5c ST r1, [sp, #-8]; NEZ r16, r15 + 2386 0x1e 0x98 0x20 0xf8 MOV r26, r16 + 2390 0x00 0x00 NOPX + 2392 0x1f 0x68 0x82 0x18 ADD.NC p7, r17, #4 + 2396 0x07 0x1e 0x36 0x98 LDA r17, [p7], #4 + 2400 0x07 0x3e 0x76 0x98 LDA r19, [p7], #12 + 2404 0x07 0xee 0x56 0x98 LDA r18, [p7], #-8 + 2408 0x07 0x07 0x76 0x98 LDA r27, [p7] + 2412 0x00 0x00 NOPX + 2414 0x00 0x00 NOPX + 2416 0x00 0x00 NOPX + 2418 0x00 0x00 NOPX + 2420 0x00 0x00 NOPX + 2422 0x00 0x00 NOPX + 2424 0x14 0x63 0x32 0x18 SEL.EQZ r17, r17, r19, r27 + 2428 0x0f 0xd6 0x31 0x98 ST r17, [p7, #-12] + 2432 0x17 0xe2 0xfd 0x18 MOVX r17, #-1 + 2436 0x00 0x00 NOPX + 2438 0x00 0x00 NOPX + 2440 0x00 0x00 NOPX + 2442 0x14 0x97 0x18 0x18 ACQ.COND r18, r17, r26 + 2446 0x10 0x24 0x09 0x18 MOVX r18, #2 + 2450 0x14 0x29 0x2d 0x98 LSHL r20, r16, r18 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 2454 0x18 0x8a 0x20 0xf8 MOV dj0, r20 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 2458 0x00 0x4e 0xdf 0xd8 0x8b 0x0c LDA r19, [p0, dj0]; ST dj0, [sp, #-20] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2464 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2466 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2468 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2470 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2472 0x10 0x26 0x05 0x18 MOVX r19, #1 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2476 0x14 0xf4 0xfc 0x98 LTU r26, r19, r15 + 2480 0xfe 0x6a 0xb0 0x03 0xb4 0xc1 0x00 0x02 ST r26, [sp, #-16]; ADD.NC p7, r19, #4 + 2488 0x07 0x1e 0x76 0x98 LDA r19, [p7], #4 + 2492 0x07 0x3e 0xb6 0x98 LDA r21, [p7], #12 + 2496 0x07 0xee 0x96 0x98 LDA r20, [p7], #-8 + 2500 0x07 0x07 0x76 0x98 LDA r27, [p7] + 2504 0x00 0x00 NOPX + 2506 0x00 0x00 NOPX + 2508 0x00 0x00 NOPX + 2510 0x00 0x00 NOPX + 2512 0x00 0x00 NOPX + 2514 0x00 0x00 NOPX + 2516 0x14 0xe7 0x52 0x18 SEL.EQZ r19, r19, r21, r27 + 2520 0x0f 0xd6 0x71 0x98 ST r19, [p7, #-12] + 2524 0x00 0x00 NOPX + 2526 0x00 0x00 NOPX + 2528 0x00 0x00 NOPX + 2530 0x00 0x00 NOPX + 2532 0x15 0x17 0x18 0x18 ACQ.COND r20, r17, r26 + 2536 0x10 0x23 0x2d 0x98 LSHL r17, r0, r18 + 2540 0x18 0x88 0xa0 0xf8 MOV dj0, r17 + 2544 0x00 0x07 0xce 0xc9 0x00 0x44 MOVXM p7, #509056 + 2550 0xe0 0x13 0xdf 0xd4 0x2b 0x0c LDA p1, [p7, dj0]; ST r16, [sp, #-24] + 2556 0x00 0x00 NOPX + 2558 0x00 0x00 NOPX + 2560 0x00 0x00 NOPX + 2562 0x00 0x00 NOPX + 2564 0x00 0x00 NOPX + 2566 0x00 0x00 NOPX +.no_stack_arguments + 2568 0x10 0x30 0x40 0x18 JL p1 +.delay_slot + 2572 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.delay_slot +.swstall delay_slot + 2576 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2578 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2580 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2582 0x00 0x2c 0xf0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba NOPA; NOPB; NOPM +.return_address + 2592 0xe0 0xc6 0xd0 0x40 0x0a 0x2c LDA r17, [p7]; MOVX r16, #1 + 2598 0x07 0xeb 0x51 0x18 LDA r26, [sp, #-24] + 2602 0x07 0xec 0x41 0x18 LDA dj0, [sp, #-20] + 2606 0x07 0xf0 0x29 0x18 LDA el0, [sp, #-16] + 2610 0x00 0x00 NOPX + 2612 0x00 0x00 NOPX + 2614 0x00 0x00 NOPX + 2616 0x19 0x68 0x88 0x18 ADD.NC p1, r17, #16 + 2620 0x01 0x06 0x36 0x98 LDA r17, [p1] + 2624 0x00 0x00 NOPX + 2626 0x00 0x00 NOPX + 2628 0x00 0x00 NOPX + 2630 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 2632 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.noswbrkpt + 2634 0x1e 0xa0 0x1c 0xf8 MOV r26, el0 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2638 0x14 0x55 0x08 0x18 REL.COND r17, r16, r26 + 2642 0x3e 0xc6 0xdd 0xaf 0x41 0xd4 LDA r17, [p1, #-4]; MOV r27, r15 + 2648 0x07 0x02 0x56 0x98 LDA r18, [p7, dj0] + 2652 0x00 0x00 NOPX + 2654 0x00 0x00 NOPX + 2656 0x00 0x00 NOPX + 2658 0x00 0x00 NOPX + 2660 0x00 0x00 NOPX + 2662 0x14 0x27 0x11 0x98 SUB r19, r16, r17 + 2666 0x8c 0x66 0x40 0xd2 0x10 0x24 SEL.EQZ r17, r17, r19, r27; ADD.NC p0, r18, #16 + 2672 0x00 0xc6 0xd3 0xec 0x63 0x0c LDA r17, [p0]; ST r17, [p1, #-4] + 2678 0x00 0x00 NOPX + 2680 0x00 0x00 NOPX + 2682 0x00 0x00 NOPX + 2684 0x00 0x00 NOPX + 2686 0x00 0x00 NOPX + 2688 0x00 0x00 NOPX + 2690 0x14 0x55 0x08 0x18 REL.COND r17, r16, r26 + 2694 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] + 2698 0x00 0xf6 0x36 0x98 LDA r17, [p0, #-4] + 2702 0x07 0xf7 0x99 0x18 LDA p7, [sp, #-12] + 2706 0x07 0xf9 0xf1 0x18 LDA r15, [sp, #-8] + 2710 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 + 2716 0x00 0x00 NOPX + 2718 0x00 0x00 NOPX + 2720 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 2724 0x1e 0xe0 0x1c 0xf8 MOV r27, el0 +.delay_slot + 2728 0x14 0x21 0x11 0x98 SUB r16, r16, r17 +.delay_slot + 2732 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 +.delay_slot + 2736 0x08 0xf6 0x11 0x98 ST r16, [p0, #-4] +.delay_slot +.swstall delay_slot + 2740 0x00 0x00 NOPX +.label _Z13kernelWrapperPPvjjjj__end +.label __Z13kernelWrapperPPvjjjj___func_end0 + +.text_segment PM 2752 +.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_begin0 +.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh +.function_start + 2752 0x03 0x85 0xd0 0x00 0x40 0x88 0x49 0x60 0x78 0xba LDA el0, [p0], #4; MOVX r4, #4; MOV r2, p1 + 2762 0x03 0x81 0xd0 0x3e 0x57 0xe9 0x30 0x82 0x48 0xba LDA eh0, [p0], #4; MOVX r5, #-1; ADD.NC p2, r2, #9 + 2772 0xff 0x81 0x00 0x00 0x02 0x00 0x00 0x00 0x70 0xba MOVA r1, #-4; PADDXM [sp], #64 + 2782 0x01 0x86 0x07 0xfd 0xb5 0x81 0x00 0x28 0x00 0x10 0x58 0x76 MOVA r6, #12; ST r13, [sp, #-4]; MOVX r16, #1; MOV m0, #16 + 2794 0x00 0x63 0x07 0xf9 0xd5 0xbf 0x57 0xaa 0x88 0x0f 0x58 0x76 MOVA r3, #3; ST r14, [sp, #-8]; MOVX r21, #-3; MOV r20, #15 + 2806 0xfe 0xbe 0xb0 0x60 0x02 0x5c ST r15, [sp, #-12]; MOVX r24, #0 + 2812 0x00 0x00 NOPX + 2814 0x09 0x1c 0x29 0x98 ST el0, [p1], #4 + 2818 0x09 0x1c 0x09 0x98 ST eh0, [p1], #4 + 2822 0x00 0x1c 0x2e 0x98 LDA el0, [p0], #4 + 2826 0x00 0x1c 0x0e 0x98 LDA eh0, [p0], #4 + 2830 0x00 0x00 NOPX + 2832 0x00 0x00 NOPX + 2834 0x00 0x00 NOPX + 2836 0x00 0x00 NOPX + 2838 0x00 0x00 NOPX + 2840 0x09 0x1c 0x29 0x98 ST el0, [p1], #4 + 2844 0x09 0x1c 0x09 0x98 ST eh0, [p1], #4 + 2848 0x00 0x1c 0x2e 0x98 LDA el0, [p0], #4 + 2852 0x00 0x1c 0x0e 0x98 LDA eh0, [p0], #4 + 2856 0x00 0x00 NOPX + 2858 0x00 0x00 NOPX + 2860 0x00 0x00 NOPX + 2862 0x00 0x00 NOPX + 2864 0x00 0x00 NOPX + 2866 0x09 0x1c 0x29 0x98 ST el0, [p1], #4 + 2870 0x09 0x1c 0x09 0x98 ST eh0, [p1], #4 + 2874 0x00 0x04 0x0e 0x98 LDA eh0, [p0] + 2878 0x00 0x14 0x2e 0x98 LDA el0, [p0, #4] + 2882 0x00 0x00 NOPX + 2884 0x00 0x00 NOPX + 2886 0x00 0x00 NOPX + 2888 0x00 0x00 NOPX + 2890 0x00 0x00 NOPX + 2892 0x09 0x04 0x09 0x98 ST eh0, [p1] + 2896 0x09 0x14 0x29 0x98 ST el0, [p1, #4] + 2900 0x02 0xdd 0xaa 0x98 LDA.u8 r13, [p2], #-3 + 2904 0x02 0x1e 0x2a 0x98 LDA.u8 r17, [p2], #1 + 2908 0x02 0xbd 0xca 0x98 LDA.u8 r14, [p2], #-5 + 2912 0x02 0xfd 0xfa 0x98 LDA.u16 r15, [p2], #-2 + 2916 0x02 0x0a 0x6a 0x98 LDA.u8 r19, [p2], m0 + 2920 0x02 0xac 0xea 0x98 LDA.u8 r7, [p2], #-6 + 2924 0x00 0x00 NOPX + 2926 0x13 0x42 0x1d 0x98 LSHL r1, r13, r1 + 2930 0x0c 0x20 0xf9 0x31 0x01 0x24 EQ r16, r1, r16; ADD.NC r18, r17, #1 + 2936 0x14 0xa4 0x5d 0x98 LSHL r18, r18, r5 + 2940 0x13 0xf6 0x47 0x98 EQ r27, r15, r4 + 2944 0xc1 0x4a 0x40 0xb7 0x39 0xe4 SEL.EQZ r5, r24, r5, r27; MOV eh0, r27 + 2950 0x14 0x7b 0x22 0x18 SEL.EQZ r29, r17, r18, r27 + 2954 0x11 0xcc 0x67 0x98 EQ r6, r7, r6 + 2958 0x11 0xb7 0x04 0x98 AND r27, r6, r16 + 2962 0x7b 0xeb 0xbc 0xbb 0x41 0xe4 LSHL r15, r15, r21; MOV r25, r27 + 2968 0xfd 0xbe 0xb3 0x9b 0x04 0x5c ST r15, [sp, #-20]; SEL.EQZ r6, r7, r24, r27 + 2974 0xc8 0x05 0xf8 0x40 0x01 0x84 JNZ r25, #3056 +.delay_slot + 2980 0x11 0xb6 0x47 0x98 EQ r27, r6, r4 +.delay_slot + 2984 0x13 0x71 0x44 0x98 AND r24, r13, r20 +.delay_slot + 2988 0x14 0xfc 0x5d 0x98 LSHL r30, r19, r5 +.delay_slot + 2992 0x16 0xe8 0x4d 0x98 LSHL r20, r27, r4 +.delay_slot + 2996 0x11 0x8c 0x32 0x18 SEL.EQZ r6, r6, r3, r27 + 3000 0xd8 0x05 0xf8 0x40 0x01 0x84 JNZ r27, #3056 +.delay_slot +.swstall delay_slot + 3006 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3008 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3010 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3012 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3014 0x00 0x00 NOPX + 3016 0x00 0x2f 0x00 0x00 0x01 0x84 0x00 0x00 0x20 0xba MOVA r15, #1; J #3104 +.delay_slot + 3026 0x00 0x1a 0x00 0x3e 0x57 0xab 0x88 0x0c 0x58 0xba MOVA r26, #0; MOVX r5, #-3; MOV r28, #12 +.delay_slot + 3036 0x05 0x42 0x21 0x20 0x41 0x64 MOVX r21, #4; MOV r2, #16 +.delay_slot + 3042 0x10 0x1a 0x0d 0x18 MOVX r13, #3 +.delay_slot + 3046 0x10 0x0e 0x3d 0x18 MOVX r7, #15 +.delay_slot + 3050 0x00 0x2c 0xff 0x91 0xe2 0x2c NOPA; MOVX r4, #-4 +.label __ll6__Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh + 3056 0x02 0x02 0x81 0x82 0x0b 0x01 0x50 0x88 0x8f 0xfc 0x58 0x76 MOVA dj0, #16; MOVS p1, r2; MOVX r21, #4; MOV r4, #-4 + 3068 0x20 0x18 0xe0 0x01 0xa0 0x0b 0x88 0x0c 0x58 0xba ST.s8 r6, [p1, dj0]; MOVX r26, #0; MOV r28, #12 + 3078 0x02 0x02 0x00 0x3e 0x57 0xa9 0xe8 0x01 0x58 0xba MOVA r2, #16; MOVX r5, #-3; MOV r15, #1 + 3088 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x71 0xe9 0xa8 0x03 0x58 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVX r7, #15; MOV r13, #3; NOPV +.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_352 + 3104 0x5d 0xc5 0x50 0x1b 0xb3 0x3c 0x00 0x3c 0x58 0xba LDA.u8 r17, [p2], #-2; EQ r27, r13, r6; MOV m0, #60 + 3114 0x41 0x05 0x50 0x03 0x2d 0x12 0x87 0xcd 0x58 0xba LDA.u8 r1, [p2], m0; SEL.EQZ r18, r1, r26, r27; MOV m5, #-51 + 3124 0x00 0x57 0x00 0x3b 0xda 0x91 0x80 0x37 0x58 0xba MOVA r23, #2; SEL.EQZ r29, r29, r21, r27; MOV m3, #55 + 3134 0x01 0x03 0x00 0x2b 0xb0 0x3d 0x07 0xbc 0x58 0xba MOVA r3, #8; EQ r27, r21, r0; MOV m2, #-68 + 3144 0x40 0x10 0x00 0x1f 0x6c 0x6c 0x80 0x70 0x58 0xba MOVA r16, #512; LSHL r22, r15, r24; MOV m1, #112 + 3154 0xb5 0x92 0x08 0x1e 0x5d 0x64 EXTEND.u8 r22, r22; MOV m4, #-105 + 3160 0xfe 0x5a 0xb0 0x2d 0x61 0x6f 0x80 0x31 0x59 0x3a ST r22, [sp, #-16]; LSHL r22, r22, r2; MOV m7, #49 + 3170 0xf7 0xba 0x3c 0x1f 0x05 0x64 SUB r30, r30, r29; MOV m6, #-63 + 3176 0x13 0xc2 0x11 0x98 SUB r1, r15, r1 + 3180 0x8f 0xc3 0xf0 0xa0 0x1d 0x64 MUL r31, r17, r1; MOV r1, #7 + 3186 0x16 0xa3 0x21 0x98 SUB r17, r26, r18 + 3190 0x17 0xfe 0x1d 0x98 LSHL r31, r31, r1 + 3194 0x55 0x7e 0x30 0x3b 0xf1 0xee 0x80 0x57 0x59 0x3a ST r31, [p2], m5; LSHL r31, r29, r3; MOV m5, #87 + 3204 0x4d 0x55 0x50 0x2f 0x30 0x3d 0x87 0xb2 0x58 0xba LDA.u8 r21, [p2], m3; EQ r19, r23, r0; MOV m3, #-78 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3214 0xfd 0x4e 0xb9 0xcc 0x7b 0x5c ST r19, [sp, #-24]; LSHL r19, r19, r3 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3220 0x49 0x54 0xe0 0x3f 0x6b 0x2d 0x00 0xf6 0x58 0xba ST.s8 r21, [p2], m2; OR r22, r31, r22; MOV m2, #246 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3230 0x45 0x43 0x50 0x27 0x38 0x10 0x87 0x50 0x58 0xba LDA.u16 r16, [p2], m1; SEL.EQZ r19, r19, r16, r27; MOV m1, #-176 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3240 0x15 0xfe 0x67 0x98 EQ r31, r23, r6 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3244 0x10 0xe0 0x67 0x98 EQ r16, r3, r6 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3248 0x17 0xf7 0x05 0x98 OR r27, r31, r16 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3252 0x11 0xeb 0x54 0x98 AND r21, r7, r21 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3256 0xf7 0xa3 0xd8 0xa0 0x61 0x64 ASHL r30, r30, r17; MOV r17, #24 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3262 0xfc 0x42 0xb0 0x1f 0x29 0x6f 0xcf 0x80 0x49 0x3a ST r16, [sp, #-32]; LSHL r18, r15, r18; ADD.NC r30, r30, #1 + 3272 0x43 0xea 0x3f 0x46 0x3b 0x5c ST r26, [p2], #4; LSHL r17, r30, r17 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 3278 0x51 0x6a 0x30 0x02 0x00 0xa8 0x50 0x02 ST r26, [p2], m4; MOV m4, #168 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 3286 0x5d 0x49 0x57 0xe7 0xf5 0xa7 0xb0 0x2c 0x0d 0xce 0x78 0x76 LDA.u8 r18, [p2], m7; ST r31, [sp, #-28]; OR r27, r19, r0; MOV el0, r27 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3298 0x16 0xe3 0x15 0x98 OR r17, r27, r17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3302 0x16 0xb7 0x81 0x98 SUB r27, r26, r24 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3306 0x14 0xb0 0x90 0x18 EXTEND.u8 r24, r18 + 3310 0x00 0x00 NOPX + 3312 0x00 0x00 NOPX + 3314 0x00 0x00 NOPX + 3316 0x13 0xe5 0x21 0x98 SUB r18, r15, r18 + 3320 0x0a 0xca 0x51 0x98 ST r18, [p2], m6 + 3324 0x02 0xaa 0x4a 0x98 LDA.u8 r18, [p2], m5 + 3328 0x00 0x00 NOPX + 3330 0x00 0x00 NOPX + 3332 0x00 0x00 NOPX + 3334 0x00 0x00 NOPX + 3336 0x00 0x00 NOPX + 3338 0x00 0x00 NOPX + 3340 0x14 0xa4 0xe1 0x98 SUB r18, r18, r14 + 3344 0x14 0xa5 0xbe 0x98 ASHL r18, r18, r27 + 3348 0x14 0xa4 0x2d 0x98 LSHL r18, r18, r2 + 3352 0x00 0x01 0x0d 0xa0 0x00 0x44 MOVXM r27, #65536 + 3358 0x16 0xe5 0x20 0x98 ADD r18, r27, r18 + 3362 0x00 0xff 0x0d 0xa0 0x00 0x44 MOVXM r27, #16711680 + 3368 0xde 0xe4 0x99 0x3f 0xc1 0x64 AND r27, r27, r18; MOV r18, #-16 + 3374 0xde 0xe2 0xb8 0xbf 0xe1 0x64 OR r27, r27, r17; MOV r17, #-8 + 3380 0x43 0xee 0x39 0xce 0x3b 0x5c ST r27, [p2], #4; LSHL r19, r19, r17 + 3386 0x16 0xb5 0x31 0x98 SUB r26, r26, r19 + 3390 0x15 0x29 0xad 0x98 LSHL r20, r20, r26 + 3394 0x13 0xb5 0x65 0x98 OR r26, r14, r22 + 3398 0x4d 0x6a 0x3f 0x69 0x20 0x5c ST r26, [p2], m3; EXTEND.u8 r26, r30 + 3404 0x49 0x65 0x50 0x37 0x49 0x6f 0xce 0xa8 0xa8 0xba LDA.u8 r25, [p2], m2; LSHL r20, r27, r18; ADD.NC r30, r26, r20 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 3414 0xb5 0xa3 0xb8 0xa3 0xf9 0x64 LSHL r22, r22, r17; MOV r17, #254 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 3420 0x45 0x64 0xed 0x6b 0x1f 0x2c ST.s8 r25, [p2], m1; MUL r26, r26, r24 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3426 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3428 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3430 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3432 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3434 0x86 0x5f 0xbd 0xb5 0xca 0xa4 LSHL r25, r16, r15; ADD.NC r27, r21, r25 + 3440 0xf8 0x06 0xf8 0x40 0x01 0x84 JNZ r31, #3568 +.delay_slot + 3446 0x9d 0x41 0xed 0xbb 0xf2 0xa4 ADD r21, r19, #3; ADD.NC r27, r27, r30 +.delay_slot + 3452 0x16 0xeb 0x5d 0x98 LSHL r21, r27, r21 +.delay_slot + 3456 0x16 0x63 0x14 0x98 AND r17, r25, r17 +.delay_slot + 3460 0x51 0x46 0x30 0x0d 0xbe 0x3e 0x28 0x01 0x59 0x3a ST r17, [p2], m4; EQ r27, r6, r28; MOV r17, #1 +.delay_slot + 3470 0x18 0x9b 0x9c 0xf8 MOV el1, r27 + 3474 0x07 0xe3 0x91 0x18 LDA r28, [sp, #-32] + 3478 0x00 0x00 NOPX + 3480 0x00 0x00 NOPX + 3482 0x00 0x00 NOPX + 3484 0x00 0x00 NOPX + 3486 0x00 0x00 NOPX + 3488 0x00 0x00 NOPX + 3490 0xe0 0x06 0xf8 0x40 0x01 0x84 JNZ r28, #3568 +.delay_slot +.swstall delay_slot + 3496 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3498 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3500 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3502 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3504 0x00 0x00 NOPX + 3506 0x00 0xbc 0x00 0x01 0x10 0x8b 0x28 0x40 0x58 0xba MOVA r28, #5; MOVX r17, #4; MOV r25, #64 + 3516 0x14 0x7e 0xd2 0x18 SEL.EQZ r31, r17, r13, r27 + 3520 0x16 0x76 0x67 0x98 EQ r27, r25, r6 + 3524 0xff 0x38 0x4f 0xa0 0x01 0x64 SEL.EQZ r28, r31, r28, r27; MOV r31, #0 + 3530 0x10 0x32 0x50 0x18 EXTEND.s8 r25, r0 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 3534 0x16 0x7d 0xef 0x98 MUL r30, r25, r30 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 3538 0xce 0xe3 0x5d 0xc4 0x39 0xe4 LT r27, r25, r17; MOV r27, el1 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3544 0x13 0xe3 0x82 0x18 SEL.EQZ r17, r15, r24, r27 + 3548 0x14 0x63 0xef 0x98 MUL r17, r17, r30 + 3552 0x17 0xf9 0xc1 0x98 SUB r28, r31, r28 + 3556 0x14 0x63 0xce 0x98 ASHL r17, r17, r28 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 3560 0x00 0x23 0x14 0x81 0x00 0x00 0x1c 0x22 EXTEND.u8 r17, r17; NOPV +.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_816 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 3568 0x00 0x19 0x00 0x3f 0xc7 0xeb 0x70 0x0e 0x78 0xba MOVA r25, #0; MOVX r28, #-1; MOV r27, el0 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3578 0x16 0x7f 0xc2 0x18 SEL.EQZ r31, r25, r28, r27 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 3582 0xfd 0x6e 0x20 0x21 0x04 0x83 0x4f 0x74 0xa8 0xba LDA r27, [sp, #-24]; EXTEND.u8 r16, r16; ADD.NC r26, r29, r26 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3592 0x09 0x1e 0x00 0x29 0x44 0x83 0xa8 0x09 0x58 0xba MOVA r30, #72; EXTEND.u8 r20, r20; MOV r29, #9 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3602 0x17 0x73 0xe2 0x18 SEL.EQZ r25, r29, r30, r27 + 3606 0x15 0xf9 0x88 0x98 NE r28, r23, r24 +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 3610 0x17 0x7b 0x3d 0x98 LSHL r29, r29, r19 +.aggressive_scheduled_block_id 6 +.noswbrkpt + 3614 0xfd 0xde 0x20 0x00 0x00 0x03 0x0a 0x04 0x10 0xba LDA r23, [sp, #-20]; MOVXM r24, #1032 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 3624 0xcc 0xe7 0xbf 0x3a 0xff 0x24 LSHL r19, r25, r19; ADD.NC r30, r26, #-1 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 3630 0x14 0xcf 0xe6 0x18 MAC r7, r7, r19, r30 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 3634 0xb4 0xd2 0x0b 0xa8 0x29 0x64 EXTEND.u8 r19, r22; MOV r23, #522 +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3640 0xbd 0xb0 0x4d 0x21 0x01 0x64 SEL.EQZ r22, r23, r24, r27; MOV r26, #64 + 3646 0x31 0xb5 0x1d 0xc2 0x39 0xe4 NE r6, r6, r26; MOV r27, eh0 + 3652 0x11 0xcf 0x24 0x98 AND r7, r7, r18 + 3656 0xbd 0xde 0x4d 0xa6 0x41 0xe4 SEL.EQZ r23, r23, r15, r27; MOV r27, r6 + 3662 0x29 0x08 0x49 0x20 0x7d 0x64 SEL.EQZ r4, r5, r4, r27; MOV r18, #31 + 3668 0x15 0xef 0x24 0x98 AND r23, r23, r18 + 3672 0xdc 0x1e 0x00 0x20 0x42 0x6e 0x4f 0x70 0x58 0xba MOVA r30, #-288; LSHL r4, r16, r4; MOV r18, #-144 + 3682 0x17 0xbd 0x22 0x18 SEL.EQZ r30, r30, r18, r27 + 3686 0x12 0x05 0x00 0x2f 0xa9 0xfe 0x09 0x20 0x58 0xba MOVA r5, #144; MUL r26, r23, r19; MOV r16, #288 + 3696 0x14 0x20 0x52 0x18 SEL.EQZ r16, r16, r5, r27 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 3700 0x8e 0x09 0xfd 0xbd 0xf2 0xa4 MUL r24, r17, r4; ADD.NC r27, r29, r30 +.aggressive_scheduled_block_id 7 +.noswbrkpt + 3706 0x84 0x3f 0xbd 0xc4 0x39 0xe4 LSHL r16, r16, r31; MOV r27, el1 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3712 0xfb 0xee 0xb7 0x6b 0x5f 0x5c ST r27, [sp, #-36]; MUL r26, r14, r26 + 3718 0x10 0x84 0x32 0x18 SEL.EQZ r2, r2, r3, r27 + 3722 0x13 0x7e 0x0c 0x98 LTU r31, r13, r0 + 3726 0x15 0x31 0x8f 0x98 MUL r24, r20, r24 + 3730 0x17 0xf7 0xc5 0x98 OR r27, r31, r28 + 3734 0x10 0x03 0x07 0xee 0x95 0xb7 0xc0 0xee 0x89 0x00 0x58 0x76 MOVA r3, #128; ST r20, [sp, #-20]; LSHL r28, r27, r1; MOV r20, #256 + 3746 0x1d 0x28 0x40 0xb7 0x39 0xe4 SEL.EQZ r20, r3, r20, r27; MOV eh0, r27 + 3752 0x00 0x00 0x0f 0xac 0x0c 0x44 MOVXM r31, #1542 + 3758 0xfd 0x12 0xb0 0x1f 0xb0 0x3c 0x89 0x3f 0xc9 0x3a ST r4, [sp, #-24]; EQ r27, r15, r0; ADD.NC r4, r4, #-1 + 3768 0xed 0x8c 0x82 0x1c 0x91 0xad 0xff 0x92 0xcc 0x7f 0xc8 0x76 MOVA m3, #-148; ST r4, [p2], #4; SEL.EQZ r31, r22, r31, r27; ADD.NC r22, r17, #-1 + 3780 0x4d 0x5a 0x30 0x2b 0x57 0xef 0x70 0x8e 0x79 0x3a ST r22, [p2], m3; LSHL r21, r21, r15; MOV r27, eh0 + 3790 0x02 0xd9 0x02 0x1f 0x51 0xa9 0x4e 0x0e 0x00 0x58 0x58 0x76 MOVA r25, #22; ST r26, [p2], #4; SUB r20, r20, r28; MOV m4, #88 +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 3802 0x51 0x62 0x3f 0xe3 0x24 0x5c ST r24, [p2], m4; SEL.EQZ r24, r31, r25, r27 +.aggressive_scheduled_block_id 8 +.noswbrkpt + 3808 0xfc 0x6e 0x22 0xef 0x91 0xab 0xce 0x0f 0x69 0x90 0x78 0x76 LDA r27, [sp, #-32]; ST r28, [p2], #-8; SUB r28, r21, r28; MOV r27, r6 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3820 0x22 0xf3 0x00 0x29 0xce 0x12 0x8c 0xff 0xc8 0xba MOVA r19, #279; SEL.EQZ r28, r20, r28, r27; ADD.NC r20, r19, #-1 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3830 0xf7 0x1d 0x00 0x3b 0xea 0x73 0x70 0x8e 0x78 0xba MOVA r29, #-72; MSC r30, r30, r29, r20; MOV r27, eh0 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3840 0xfc 0xee 0x2e 0xca 0x44 0x2c LDA r27, [sp, #-28]; SEL.EQZ r18, r29, r18, r27 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3846 0x04 0x1f 0x00 0x3f 0x39 0x93 0x69 0x90 0x78 0xba MOVA r31, #32; SEL.EQZ r19, r31, r19, r27; MOV r27, r6 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3856 0x17 0xc4 0x22 0x18 SEL.EQZ r2, r31, r2, r27 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3860 0x10 0xeb 0x51 0x98 SUB r21, r3, r21 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3864 0x47 0x8e 0x30 0x04 0x27 0x90 0x6f 0xc0 0x59 0x3a ST r3, [p2], #12; SEL.EQZ r2, r2, r15, r27; MOV r3, #-64 +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3874 0x17 0x38 0x32 0x18 SEL.EQZ r28, r28, r3, r27 +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id first + 3878 0x15 0xfe 0x7f 0x98 MUL r31, r23, r7 +.aggressive_scheduled_block_id 9 +.noswbrkpt + 3882 0xfb 0xc6 0x2e 0x0c 0x64 0x2c LDA r17, [sp, #-36]; SEL.EQZ r3, r28, r3, r27 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3888 0x5d 0x8e 0x30 0x35 0x29 0x7c 0x80 0x28 0x59 0x3a ST r3, [p2], #-8; MUL r18, r26, r18; MOV m1, #40 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3898 0x45 0x56 0x31 0x0d 0xe4 0x5c ST r21, [p2], m1; SEL.EQZ r3, r2, r15, r27 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3904 0xfe 0x02 0x20 0x06 0x38 0xfe 0xa9 0xfc 0xa8 0xba LDA r0, [sp, #-16]; MUL r3, r3, r17; ADD.NC r21, r7, r30 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3914 0xfd 0xc6 0x22 0x1f 0x11 0x80 0x05 0x06 0x06 0x7a LDA r17, [sp, #-20]; ST r24, [p2], #4; MAC r3, r3, r20, r0 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3924 0x4f 0xce 0x30 0x00 0x00 0x3e 0x6f 0xf8 0x11 0x3a ST r19, [p2], #28; MOVXM r19, #65520 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3934 0x43 0xd2 0x30 0x3f 0x49 0xa4 0x4b 0xbf 0xc9 0x3a ST r20, [p2], #4; AND r20, r31, r19; ADD.NC r2, r14, #-1 +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3944 0x43 0xc6 0x31 0x56 0x9c 0x5c ST r17, [p2], #4; MSC r21, r21, r2, r20 + 3950 0x43 0x8a 0x3f 0x7a 0x81 0x5c ST r2, [p2], #4; ADD r30, r30, r20 +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id first + 3956 0x43 0xfa 0x38 0x73 0xe3 0x5c ST r30, [p2], #4; SUB r28, r16, r31 +.aggressive_scheduled_block_id 10 +.noswbrkpt + 3962 0x43 0xd6 0x30 0x2d 0xf8 0x30 0x60 0x00 0x59 0x3a ST r21, [p2], #4; MAC r31, r31, r22, r16; MOV dc0, #0 +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3972 0x43 0x8c 0x30 0x3e 0x20 0x7e 0x2c 0x7f 0xc9 0x3a ST dc0, [p2], #4; MUL r2, r31, r0; ADD.NC r17, r17, #-1 + 3982 0x43 0x8c 0x38 0xb8 0x0c 0x5c ST dc0, [p2], #4; MAC r14, r14, r17, r0 + 3988 0x43 0xda 0x30 0x27 0x01 0x24 0x48 0x00 0x59 0x3a ST r22, [p2], #4; AND r16, r19, r2; MOV r2, #0 + 3998 0x43 0xf2 0x30 0x05 0x1f 0x8f 0x70 0x0e 0x79 0x3a ST r28, [p2], #4; SUB r17, r2, r31; MOV r27, el0 + 4008 0x43 0x92 0x3b 0xb9 0xdf 0x5c ST r4, [p2], #4; MUL r14, r23, r14 + 4014 0x43 0xc6 0x30 0x21 0x0f 0x8c 0x08 0x06 0x59 0x3a ST r17, [p2], #4; SUB r16, r16, r31; MOV r0, #6 +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id first + 4024 0x09 0x00 0x02 0x1e 0x11 0x9b 0x00 0x13 0x69 0x90 0x78 0x76 MOVA r0, #72; ST r16, [p2], #4; SEL.EQZ r16, r13, r0, r27; MOV r27, r6 +.aggressive_scheduled_block_id 11 +.noswbrkpt + 4036 0xfd 0x16 0x20 0x14 0xa4 0x2c LDA r5, [sp, #-24]; SEL.EQZ r5, r0, r5, r27 +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4042 0x11 0x63 0xaf 0x98 MUL r17, r5, r26 + 4046 0x43 0x8c 0x30 0x07 0x08 0x6d 0x07 0xc8 0x59 0x3a ST dc0, [p2], #4; LSHL r16, r3, r16; MOV m2, #-56 + 4056 0xfc 0x63 0x02 0x48 0x61 0xa0 0xf7 0xed 0xa8 0xc1 0xc8 0x76 MOVA r3, #-29; ST dc0, [p2], m2; LSHL r15, r16, r15; ADD.NC r13, r3, #7 + 4068 0x41 0x0a 0x36 0xc0 0x7b 0x5c ST r2, [p2], m0; LSHL r16, r13, r3 + 4074 0x43 0xda 0x38 0x8e 0x41 0x5c ST r22, [p2], #4; ADD r3, r17, r18 + 4080 0x43 0xca 0x38 0xc8 0x9c 0x5c ST r18, [p2], #4; MSC r18, r18, r17, r4 + 4086 0x43 0x92 0x32 0x94 0xdb 0x5c ST r4, [p2], #4; LSHL r5, r5, r6 + 4092 0x43 0x8e 0x30 0x1a 0x38 0x04 0x0f 0xfd 0x59 0x3a ST r3, [p2], #4; ADD r3, r13, r16; MOV r0, #-3 + 4102 0x10 0xc0 0x0e 0x98 ASHL r0, r3, r0 + 4106 0x43 0xca 0x37 0x10 0x1f 0x5c ST r18, [p2], #4; MUL r4, r14, r0 + 4112 0x43 0x8c 0x30 0x0c 0x3b 0x5c ST dc0, [p2], #4; LSHL r3, r0, r1 + 4118 0xff 0xb6 0x22 0x1c 0x61 0x80 0x03 0xc6 0x31 0xfa LDA r13, [sp, #-4]; ST dc0, [p2], #4; SUB r3, r15, r3 + 4128 0xff 0x3a 0x22 0x1c 0x91 0xba 0x70 0x30 0x28 0x3f 0xc8 0x76 LDA r14, [sp, #-8]; ST r4, [p2], #4; MAC r7, r7, r29, r0; ADD.NC r1, r0, #-1 + 4140 0xfe 0xbe 0x22 0x1c 0x31 0x80 0x01 0x41 0xaf 0xfa LDA r15, [sp, #-12]; ST r1, [p2], #4; MUL r0, r5, r26 + 4150 0x43 0x8e 0x30 0x50 0x00 0x5c ST r3, [p2], #4; RET lr +.delay_slot + 4156 0x0a 0x5c 0xf1 0x98 ST r7, [p2], #20 +.delay_slot + 4160 0x0a 0x1c 0x11 0x98 ST r0, [p2], #4 +.delay_slot + 4164 0x0a 0x1c 0x51 0x98 ST r2, [p2], #4 +.delay_slot + 4168 0x0a 0x04 0x51 0x98 ST r2, [p2] +.delay_slot + 4172 0x42 0x8a 0x30 0x3f 0xfe 0x00 0x00 0x00 0x71 0x3a ST r2, [p2, #4]; PADDXM [sp], #-64 +.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh__end +.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_end0 + +.text_segment PM 4192 +.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_begin0 +.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams +.function_start + 4192 0x00 0x03 0x82 0x84 0x8b 0x01 0x80 0x08 0x0a 0x60 0x78 0x76 MOVA dc0, #0; MOVS p2, p1; MOVX r24, #0; MOV r0, p2 + 4204 0x00 0x06 0x88 0x28 0x28 0x34 0x01 0x36 0x00 0x21 0x20 0x09 0x60 0x7e MOVA dj1, #0; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc1, dc0; MOVX r26, #0; ADD.NC p3, r0, #4 + 4218 0x63 0x94 0xd0 0x00 0x01 0xf2 0x32 0x34 0x10 0xba LDA dn1, [p3], #4; MOVXM p4, #509032 + 4228 0x63 0x90 0xd0 0x00 0x00 0x04 0x78 0x78 0x10 0xba LDA m1, [p3], #4; MOVXM ls, #4336 + 4238 0x60 0x80 0xd0 0x00 0x00 0x05 0xb8 0x90 0x10 0xba LDA m0, [p3]; MOVXM le, #4384 + 4248 0x7a 0x82 0xd1 0x00 0x01 0x54 LDA r0, [p3, #-12]; MOV dj0, #0 + 4254 0x04 0x04 0x22 0x98 LDA.s8 r1, [p4] + 4258 0x00 0x00 NOPX + 4260 0x00 0x00 NOPX + 4262 0x00 0x0a 0x80 0x85 0x01 0xf4 VLDB.POP.512 x1, [p0, lf0, r24]; MOV dn0, dn1 + 4268 0x3e 0x30 0x14 0x18 VLDB.POP.512.2D x0, [p0, lf0, r24, d1] +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4272 0x3c 0x14 0x14 0x18 VLDB.FILL.512 [p0, lf0, r24] +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4276 0x00 0x0a 0x8a 0xe0 0xfd 0x34 VLDB.POP.512 x1, [p0, lf0, r24]; ADD.NC lc, r0, #-3 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4282 0xc6 0x02 0x80 0xf5 0x00 0x1c VLDB.POP.512.2D x0, [p0, lf0, r24, d1]; MOVX crRnd, r1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4288 0x3c 0x14 0x14 0x18 VLDB.FILL.512 [p0, lf0, r24] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4292 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4294 0x00 0x2c 0xf0 0x00 0x54 0x00 0x01 0xa5 0x7e 0xba NOPA; VLDB.POP.512 x1, [p0, lf0, r24]; NOPM +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4304 0x00 0x2c 0xfc 0x60 0x28 0x01 0x5b 0x00 0x00 0x00 0x01 0xc5 0x78 0x00 0x00 0xe1 NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];NOPS; NOPX; VCONV.fp32.bf16 cml0, x1; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4320 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x40 0xc5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV +.label ZLS_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_144 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4336 0x00 0x2c 0xf8 0x28 0x28 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB.FILL.512 [p0, lf0, r24]; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4352 0x00 0x2c 0xf0 0x00 0xad 0x80 0x03 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB.POP.512 x1, [p0, lf0, r24];VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4368 0x00 0x2c 0xfc 0x60 0x29 0x00 0x03 0x00 0x00 0x00 0x01 0xc5 0x78 0x00 0x00 0xe1 NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];VST.FLUSH.512.CONV [p2, sf, r26];NOPX; VCONV.fp32.bf16 cml0, x1; NOPV +.label ZLE_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_192 +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4384 0x00 0x2c 0xf0 0x00 0x23 0x00 0x03 0x00 0x00 0x00 0x40 0xc5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST.FLUSH.512.CONV.2D [p2, sf, r26, d0];NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV +.loop_nesting 0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4400 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4402 0x0d 0x80 0x03 0x18 VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4406 0x20 0x00 0x60 0x00 0x01 0xc5 0x70 0x02 VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4414 0x18 0x81 0x8a 0xf8 VCONV.fp32.bf16 cmh0, x0 + 4418 0x0b 0x00 0x03 0x18 VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] + 4422 0xb0 0x00 0x60 0x00 0x01 0xc5 0x70 0x02 VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1 + 4430 0x20 0x00 0x60 0x00 0x40 0xc5 0x70 0x02 VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cmh0, x0 + 4438 0x0b 0x00 0x03 0x18 VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] + 4442 0xb0 0x00 0x60 0x50 0x00 0x5c VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];RET lr +.delay_slot + 4448 0x09 0x00 0x03 0x18 VST.FLUSH.512.CONV [p2, sf, r26] +.delay_slot + 4452 0x0b 0x00 0x03 0x18 VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] +.delay_slot +.swstall delay_slot + 4456 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4458 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4460 0x00 0x00 NOPX +.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams__end +.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_end0 + +.text_segment PM 4464 +.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_begin0 +.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params +.function_start + 4464 0xf5 0xe0 0x86 0x3f 0x20 0x00 0x80 0x00 0x00 0x0e 0x91 0x11 0x60 0x7e MOVA m0, #-81; PADDB [p3], #64; MOVS p4, p2; PADDXM [sp], #128 + 4478 0x00 0x73 0x07 0xf1 0x95 0xbf 0xc5 0x0a 0x2b 0x60 0x78 0x76 MOVA r19, #3; ST r12, [sp, #-16]; MOVX r28, #-24; MOV r17, p3 + 4490 0x00 0x19 0x07 0xda 0x35 0x81 0x10 0x29 0x34 0x47 0x08 0x76 MOVA r25, #0; ST r17, [sp, #-40]; MOVX r17, #1; ADD.NC p2, r17, #28 + 4502 0x40 0xca 0xd7 0xf5 0x35 0x80 0x40 0x03 0xa8 0x00 0x10 0x76 LDA r18, [p2]; ST r9, [sp, #-12]; MOVXM r29, #16777216 + 4514 0x0b 0x18 0x87 0xfd 0xd5 0x80 0x7f 0xff 0xef 0xff 0x90 0x76 MOVA m6, #88; ST r14, [sp, #-4]; MOVXM r31, #33554431 + 4526 0x00 0xb4 0x07 0xe1 0xb5 0x81 0x61 0x0a 0x07 0xec 0x58 0x76 MOVA r20, #5; ST r13, [sp, #-32]; MOVX r22, #8; MOV m4, #-20 + 4538 0x01 0x95 0x07 0xed 0xf5 0x87 0x77 0xca 0x87 0xc4 0x58 0x76 MOVA r21, #12; ST r15, [sp, #-20]; MOVX r23, #254; MOV m5, #-60 + 4550 0xff 0x73 0xb0 0x03 0x80 0x40 0x50 0x02 ST p7, [sp, #-8]; MOV m7, #64 + 4558 0x0f 0xe4 0x3d 0x98 ST lr, [sp, #-28] + 4562 0x00 0x00 NOPX + 4564 0x17 0x59 0x20 0x98 ADD r12, r29, r18 + 4568 0x41 0x32 0x36 0x77 0x9b 0x5c ST r12, [p2], m0; LSHL r29, r12, r28 + 4574 0x5b 0xf9 0x5e 0xf2 0x2f 0x2c LDA.u8 r30, [p2], #-3; EQ r28, r29, r17 + 4580 0x02 0xc9 0x2a 0x98 LDA.u8 r9, [p2], m6 + 4584 0x00 0x00 NOPX + 4586 0x00 0x00 NOPX + 4588 0x00 0x00 NOPX + 4590 0x00 0x00 NOPX + 4592 0x00 0x00 NOPX + 4594 0x17 0x77 0xec 0x98 LTU r27, r29, r30 + 4598 0x16 0x5d 0x32 0x18 SEL.EQZ r14, r25, r19, r27 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4602 0x17 0xf6 0xcc 0x98 LTU r27, r31, r12 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4606 0x51 0x70 0xee 0xb7 0xcf 0x2c ST.s8 r28, [p2], m4; EQ r13, r29, r30 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4612 0x13 0x7f 0x1d 0x98 LSHL r31, r13, r17 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4616 0x16 0x58 0xe2 0x18 SEL.EQZ r12, r25, r14, r27 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4620 0x17 0xf9 0xc5 0x98 OR r28, r31, r28 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4624 0x8e 0xfd 0x9e 0x3c 0x62 0xa4 LTU r27, r17, r30; ADD.NC r28, r28, r12 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4630 0x16 0x79 0xc2 0x18 SEL.EQZ r28, r25, r28, r27 + 4634 0x14 0x7f 0xcc 0x98 LTU r31, r17, r28 + 4638 0x55 0x7e 0x3e 0xf7 0xd1 0x5c ST r31, [p2], m5; NE r29, r29, r30 + 4644 0x5d 0x79 0x54 0xb2 0x31 0x2c LDA.u8 r30, [p2], m7; NE r12, r9, r17 + 4650 0x00 0x00 NOPX + 4652 0x00 0x00 NOPX + 4654 0x00 0x00 NOPX + 4656 0x00 0x00 NOPX + 4658 0x00 0x00 NOPX + 4660 0x00 0x00 NOPX + 4662 0xf5 0xad 0x1f 0xbe 0xfc 0x24 NE r22, r30, r22; ADD.NC r31, r30, #-4 + 4668 0x60 0x09 0x40 0x40 0x01 0x84 JNZ r12, #4736 +.delay_slot + 4674 0x17 0x93 0x48 0x98 NE r9, r30, r20 +.delay_slot + 4678 0x17 0xfe 0x90 0x18 EXTEND.u8 r31, r31 +.delay_slot + 4682 0x12 0x6d 0x64 0x98 AND r22, r9, r22 +.delay_slot + 4686 0x17 0xef 0x7c 0x98 LTU r23, r31, r23 +.delay_slot + 4690 0x15 0xe1 0x64 0x98 AND r16, r23, r22 + 4694 0xe8 0x09 0x40 0x40 0x01 0x84 JNZ r29, #4736 +.delay_slot + 4700 0x0f 0xeb 0x1d 0x98 ST p6, [sp, #-24] +.delay_slot +.swstall delay_slot + 4704 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4706 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4708 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4710 0x00 0x00 NOPX + 4712 0x00 0x3b 0x00 0x00 0x02 0x56 0x00 0x00 0x20 0xba MOVA r27, #1; J #4784 +.delay_slot + 4722 0x18 0x19 0x9c 0xf8 MOV el0, r25 +.delay_slot + 4726 0x10 0x26 0x05 0x18 MOVX r19, #1 +.delay_slot +.swstall delay_slot + 4730 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4732 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4734 0x00 0x00 NOPX +.label __ll6__Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params + 4736 0x00 0x95 0x07 0xeb 0x1d 0xab 0xbf 0x3c 0x0c 0xce 0x78 0x76 MOVA r21, #4; ST p6, [sp, #-24]; EQ r27, r21, r30; MOV el0, r25 + 4748 0x17 0xab 0x5d 0x98 LSHL r21, r30, r21 + 4752 0x15 0x6b 0x92 0x18 SEL.EQZ r21, r21, r25, r27 + 4756 0x14 0xf7 0xe7 0x98 EQ r27, r19, r30 + 4760 0xac 0xf2 0x4d 0xb0 0x41 0xe4 SEL.EQZ r19, r21, r25, r27; MOV r27, r16 + 4766 0x16 0x67 0x32 0x18 SEL.EQZ r19, r25, r19, r27 + 4770 0x17 0x29 0x44 0x98 AND r20, r28, r20 + 4774 0x15 0x36 0xf0 0x18 NEZ r27, r20 + 4778 0x00 0x2c 0xf9 0xcf 0x8b 0x2c NOPA; OR r19, r19, r28 +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_320 + 4784 0x01 0x90 0x82 0x6f 0x71 0xba 0x02 0x5c 0x10 0x00 0x60 0x76 MOVA m4, #12; ST r27, [p2], #24; JNZ r29, #4832 +.delay_slot + 4796 0x02 0x8a 0x67 0x18 ST.s8 r19, [p2], m4 +.delay_slot +.swstall delay_slot + 4800 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4802 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4804 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4806 0x00 0x00 NOPX + 4808 0x00 0xff 0xfa 0x3f 0xfe 0x44 MOVXM r20, #16777215 + 4814 0x14 0xa5 0x44 0x98 AND r18, r18, r20 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4818 0x00 0x2c 0xf6 0xec 0xa3 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; ST r18, [p3, #28]; NOPM; NOPV +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_368 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4832 0x51 0xd2 0xd0 0x27 0x44 0x82 0xcf 0xfd 0x58 0xba LDA r20, [p2], #-32; EXTEND.u8 r20, r19; MOV r22, #-3 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4842 0x00 0x52 0x00 0x29 0x5f 0xfa 0x00 0x24 0x58 0xba MOVA r18, #2; ADD r21, r20, #-1; MOV m4, #36 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4852 0x51 0x5a 0xd7 0xd0 0x2d 0xab 0x6b 0x26 0x07 0xcc 0x58 0x76 LDA r22, [p2], m4; ST el0, [sp, #-48]; AND r22, r21, r22; MOV m4, #-52 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4864 0x51 0x5e 0xd7 0xde 0xd5 0xbf 0x37 0xea 0x00 0xc4 0x58 0x76 LDA r23, [p2], m4; ST r22, [sp, #-36]; MOVX r19, #-1; MOV m4, #196 + 4876 0x02 0xff 0xb6 0x98 LDA r29, [p2], #-4 + 4880 0x02 0x8b 0xf6 0x98 LDA r31, [p2], m4 + 4884 0x01 0x06 0xb6 0x98 LDA r21, [p1] + 4888 0x00 0xd2 0xda 0x26 0x5b 0x2c LDA r20, [p0]; LSHL r9, r20, r18 + 4894 0x04 0x07 0xd6 0x98 LDA r30, [p4] + 4898 0x15 0xad 0x2d 0x98 LSHL r22, r22, r18 + 4902 0x00 0x00 NOPX + 4904 0x17 0x67 0x3e 0x98 ASHL r19, r29, r19 + 4908 0x17 0xe3 0x18 0x98 NE r17, r31, r17 + 4912 0x88 0x09 0xd0 0x40 0x01 0x84 JNZ r17, #5024 +.delay_slot + 4918 0xbd 0xa5 0xba 0xb5 0xb2 0xa4 LSHL r22, r23, r18; ADD.NC r21, r21, r22 +.delay_slot + 4924 0x9d 0x65 0xb0 0x95 0xb2 0xa4 LSHL r21, r19, r18; ADD.NC dn0, r21, r22 +.delay_slot + 4930 0xfa 0x84 0xb0 0x01 0xca 0x68 0xa0 0x02 ST dn0, [sp, #-44]; ADD.NC r14, r9, r20 +.delay_slot + 4938 0x1b 0xd0 0x80 0xf8 MOV r15, dn0 +.delay_slot + 4942 0x1e 0x6a 0xf9 0x58 ADD.NC p6, r21, r30 + 4946 0x00 0x07 0xce 0xc8 0xd0 0x44 MOVXM p7, #509032 + 4952 0xe0 0xc4 0x50 0xb4 0x80 0x2c LDA.s8 r17, [p7]; MOVX vaddSign0, #1 + 4958 0x00 0x00 NOPX + 4960 0xff 0x7f 0x0a 0x20 0x00 0x44 MOVXM r20, #-8454144 + 4966 0x18 0x02 0x91 0x78 VINSERT.32 x0, x0, #0, r20 + 4970 0x1d 0x15 0xe0 0xf8 MOV r20, sp +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 4974 0x1f 0x6a 0x5f 0x18 ADD.NC p7, r20, #-66 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 4978 0xe0 0xc6 0xe0 0x01 0x25 0xd4 ST.s16 r17, [p7]; VMOV bmll0, x0 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4984 0x14 0x7a 0x80 0x18 MOVX crRnd, r17 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4988 0x08 0x40 0x16 0x18 VCONV.bf16.fp32 wl0, bmll0 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4992 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4994 0x1c 0x41 0x01 0xb8 VEXTRACT.16 r17, x0, #0, vaddSign0 + 4998 0x00 0x00 NOPX + 5000 0x00 0x00 NOPX + 5002 0x07 0x06 0x32 0x98 LDA.s16 r17, [p7] + 5006 0x00 0x00 NOPX + 5008 0x00 0x00 NOPX + 5010 0x00 0x00 NOPX + 5012 0x00 0x00 NOPX + 5014 0x00 0x00 NOPX + 5016 0x00 0x00 NOPX + 5018 0x00 0x2c 0xff 0xa4 0x6b 0x0c NOPA; ST r17, [sp, #-48] +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_560 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 5024 0x0b 0x90 0x81 0x8e 0x0b 0x00 0x01 0xf1 0xb2 0x34 0x10 0x76 MOVA m4, #92; MOVS p1, r14; MOVXM p3, #509032 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 5036 0x51 0x45 0x50 0x84 0x8b 0x33 0x19 0x92 0x68 0x0b 0x58 0x76 LDA.u8 r17, [p2], m4; MOVS p0, p1; SEL.EQZ r17, r25, r19, r27; MOV r19, #11 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5048 0x61 0x96 0x00 0x39 0xb9 0x65 0xaa 0x60 0x78 0xba MOVA r22, #780; LTU r27, r28, r18; MOV r13, p2 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5058 0x03 0x06 0x67 0x18 ST.s8 r19, [p3] +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5062 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 5064 0x00 0x08 0x30 0x00 0x01 0x04 JL #4192 +.delay_slot +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5070 0x14 0x6b 0x2d 0x98 LSHL r21, r17, r18 +.delay_slot + 5074 0x1f 0x6a 0xf9 0x58 ADD.NC p7, r21, r30 +.delay_slot + 5078 0x16 0x63 0x11 0x98 SUB r17, r25, r17 +.delay_slot + 5082 0x8c 0x65 0xba 0x2c 0x35 0x64 LSHL r17, r17, r18; MOV r20, #781 +.delay_slot + 5088 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x2c 0x9a 0x11 0x8b 0xe2 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SEL.EQZ r9, r22, r20, r27; ADD.NC r12, r15, r17; NOPV +.return_address + 5104 0x07 0xd4 0x99 0x18 LDA p1, [sp, #-44] +.no_stack_arguments + 5108 0x00 0x08 0x30 0x00 0x01 0x04 JL #4192 +.delay_slot +.swstall delay_slot + 5114 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5116 0x00 0x00 NOPX +.delay_slot + 5118 0x1b 0x56 0x90 0x18 ADD.NC r13, r13, #32 +.delay_slot + 5122 0x1a 0x66 0xa0 0xf8 MOV p2, r13 +.delay_slot + 5126 0x00 0x2c 0xf0 0x8f 0x0b 0x00 0x00 0x00 0x00 0x7a NOPA; MOVS p0, r15; NOPX +.return_address + 5136 0xd6 0x9a 0x80 0x01 0x37 0xea 0x33 0x63 0x08 0xba MOVA dj6, #-332; MOVX r19, #63; ADD.NC p4, r13, #-116 + 5146 0x83 0x84 0xd0 0x34 0x62 0x2c LDA dn0, [p4], #4; MOVX r13, #12 + 5152 0x04 0x1c 0x46 0x98 LDA dj0, [p4], #4 + 5156 0x04 0x1e 0x26 0x98 LDA dn4, [p4], #4 + 5160 0x04 0x1e 0x46 0x98 LDA dj4, [p4], #4 + 5164 0x04 0x1c 0x06 0x98 LDA m0, [p4], #4 + 5168 0x04 0x1c 0x66 0x98 LDA dc0, [p4], #4 + 5172 0x04 0x1e 0x66 0x98 LDA dc4, [p4], #4 + 5176 0x04 0x1e 0xd6 0x98 LDA r22, [p4], #4 + 5180 0x04 0x1e 0x36 0x98 LDA r17, [p4], #4 + 5184 0x04 0x1f 0x96 0x98 LDA r28, [p4], #4 + 5188 0x04 0x1e 0xb6 0x98 LDA r21, [p4], #4 + 5192 0x04 0x1e 0xf6 0x98 LDA r23, [p4], #4 + 5196 0x04 0x1d 0x9e 0x98 LDA p3, [p4], #4 + 5200 0x04 0x1d 0x26 0x98 LDA dn2, [p4], #4 + 5204 0x04 0x1c 0xa6 0x98 LDA dn1, [p4], #4 + 5208 0x04 0x1c 0xc6 0x98 LDA dj1, [p4], #4 + 5212 0x04 0x1e 0xa6 0x98 LDA dn5, [p4], #4 + 5216 0x04 0x1f 0xd6 0x98 LDA r30, [p4], #4 + 5220 0x04 0x1f 0xb6 0x98 LDA r29, [p4], #4 + 5224 0x04 0x1c 0xe6 0x98 LDA dc1, [p4], #4 + 5228 0x04 0xc2 0x4a 0x98 LDA.u8 r18, [p4, dj6] + 5232 0x07 0xd2 0x91 0x18 LDA r20, [sp, #-48] + 5236 0x04 0x04 0x56 0x98 LDA r2, [p4] + 5240 0x00 0x00 NOPX + 5242 0x00 0x00 NOPX + 5244 0x00 0x00 NOPX + 5246 0x00 0x00 NOPX + 5248 0x14 0xe7 0x2c 0x98 LTU r19, r19, r18 + 5252 0x98 0x0c 0x10 0x40 0x01 0x84 JNZ r19, #6176 +.delay_slot + 5258 0x00 0x07 0xc4 0xc8 0xd0 0x44 MOVXM p2, #509032 +.delay_slot + 5264 0x02 0x05 0xa7 0x18 ST.s8 r13, [p2] +.delay_slot + 5268 0x1c 0xd1 0x72 0xf8 VBCST.16 x9, r20 +.delay_slot +.swstall delay_slot + 5272 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5274 0x00 0x00 NOPX + 5276 0xfb 0x43 0x20 0x1b 0xb9 0x3f 0x80 0x84 0x58 0xba LDA p4, [sp, #-40]; EQ r27, r13, r18; MOV m7, #132 + 5286 0x00 0x13 0x00 0x3d 0x20 0x0a 0x00 0x3c 0x58 0xba MOVA r19, #0; MOVX r18, #-128; MOV m4, #60 + 5296 0xf8 0x14 0x80 0x01 0xa0 0x0b 0xe4 0xd0 0x78 0xba MOVA m5, #-64; MOVX r26, #0; MOV dc7, r19 + 5306 0xef 0x98 0x82 0x1c 0x4b 0x1b 0xd4 0x01 0xa7 0xc0 0x78 0x76 MOVA m6, #-132; MOVS dc2, dc7; MOVX crRnd, r13; MOV dn3, dc7 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 5318 0xfa 0x96 0x26 0x1c 0x4b 0x01 0xf7 0x89 0xe8 0x07 0x58 0x76 LDA r5, [sp, #-44]; MOVS dc6, dc7; MOVX r31, #60; MOV r15, #7 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 5330 0xfb 0xca 0x20 0x00 0x00 0x05 0x32 0xa0 0x10 0xba LDA r18, [sp, #-36]; MOVXM p2, #5440 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5340 0xfc 0x36 0x20 0x34 0x69 0x12 0x8b 0x0c 0x58 0xba LDA r13, [sp, #-32]; SEL.EQZ r6, r26, r18, r27; MOV r20, #780 + 5350 0xfc 0x87 0x29 0xd7 0x20 0x01 0x90 0x0b 0x08 0x00 0x58 0xb6 LDA lr, [sp, #-28]; PADDB [p4], m7; MOVX r25, #0; MOV r24, #0 + 5362 0x04 0x88 0x16 0x98 LDA r0, [p4], m4 + 5366 0x04 0xab 0x26 0x98 LDA dn6, [p4], m5 + 5370 0x04 0x2f 0x76 0x98 LDA r27, [p4], #8 + 5374 0x04 0x1e 0x86 0x98 LDA m5, [p4], #4 + 5378 0x04 0x8a 0xc6 0x98 LDA dj5, [p4], m4 + 5382 0x04 0x9e 0x06 0x98 LDA m4, [p4], #-28 + 5386 0x04 0x1c 0x36 0x98 LDA r1, [p4], #4 + 5390 0x99 0x02 0xdd 0x06 0x02 0x94 LDA r0, [p4], m6; ADD.NC dj6, r6, r0 + 5396 0x04 0x14 0x76 0x98 LDA r3, [p4, #4] + 5400 0x04 0x04 0x96 0x98 LDA r4, [p4] + 5404 0x19 0xda 0x00 0xf8 MOV r7, m5 + 5408 0x1a 0x83 0x99 0x58 ADD.NC dj2, r7, r6 + 5412 0x1c 0x1b 0x00 0xf8 MOV r16, dj5 + 5416 0x1a 0x0d 0x99 0x58 ADD.NC m2, r27, r6 + 5420 0x1e 0x03 0xe0 0x18 ADD.NC m6, r7, #-64 + 5424 0x18 0xff 0xee 0x10 0xc0 0x24 ADD r3, r3, #-1; ADD.NC m7, r16, #-64 + 5430 0x00 0x2c 0xf0 0x00 0x10 0x00 0x82 0x80 0x7e 0xba NOPA; NOPB; MOV m1, dj2 +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_976 +.loop_nesting 1 + 5440 0xc3 0x85 0x71 0x85 0x0b 0x04 0xe7 0xec 0x33 0x90 0x78 0x76 VLDA.CONV.fp32.bf16 cml0, [p6], #64; MOVS p1, r5; LSHL r14, r2, r15; MOV p0, r14 + 5452 0x22 0x81 0x78 0x28 0x2b 0x0e 0x4b 0x02 0x33 0x98 0xa0 0xf6 VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc3, dn3; ADD.NC p4, r14, r12 + 5464 0xa0 0x39 0x78 0x28 0x2f 0x5a 0x4b 0x03 0xc6 0x80 0x70 0xf6 VLDA.POP.576 ex7, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];MOVS dn7, dn6; MOV dj7, dj6 + 5476 0xd9 0x0d 0x74 0x03 0x2b 0x53 0x0b 0x01 0x82 0x00 0x70 0xf6 VLDA.CONV.fp32.bf16 cmh0, [p6], m6;VLDB.POP.576 ex6, [p0, lf0, r24];MOVS dn3, r19; MOV m3, m2 + 5488 0x71 0x41 0x74 0x12 0xd4 0x01 0xc0 0x00 0x5e 0xba VLDA.POP.576 ex8, [p1, lf1, r25, m4];VLDB.POP.576.3D ex11, [p0, lf0, r24, d0]; MOV dj3, #0 + 5498 0xc3 0x95 0x78 0x28 0x28 0x00 0x00 0x05 0xbb 0x40 0x10 0xb6 VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.FILL.512 [p0, lf0, r24]; MOVXM le, #5760 + 5510 0xdd 0x1d 0x78 0x28 0x28 0x00 0x00 0x04 0x7b 0x28 0x10 0xb6 VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.FILL.512 [p0, lf0, r24]; MOVXM ls, #5712 + 5522 0x80 0xb5 0x74 0x01 0x28 0x3c VLDA.CONV.fp32.bf16 cml3, [p4];VLDB.POP.576 ex2, [p0, lf0, r24] + 5528 0xc3 0xa5 0x78 0x22 0x28 0x3c VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.POP.576.3D ex4, [p0, lf0, r24, d0] + 5534 0xd9 0x2d 0x78 0x28 0x28 0x3c VLDA.CONV.fp32.bf16 cmh2, [p6], m6;VLDB.FILL.512 [p0, lf0, r24] +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 5540 0x22 0x81 0x78 0x28 0x28 0x3c VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24] +.aggressive_scheduled_block_id 6 +.noswbrkpt + 5546 0x83 0xbd 0x74 0x01 0x28 0x3c VLDA.CONV.fp32.bf16 cmh3, [p4], #64;VLDB.POP.576 ex2, [p0, lf0, r24] +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5552 0x80 0xcd 0x74 0x11 0x14 0x02 0x9a 0xc3 0xee 0xba VLDA.CONV.fp32.bf16 cmh4, [p4];VLDB.POP.576.3D ex4, [p0, lf0, r24, d0]; VSHUFFLE ex10, ex6, ex11, r1 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5562 0x80 0xc5 0x78 0x28 0x2c 0x98 0x8b 0x01 0x9a 0xc1 0xe0 0xf6 VLDA.CONV.fp32.bf16 cml4, [p4];VLDB.FILL.512 [p0, lf0, r24];MOVS p4, p6; VSHUFFLE ex6, ex6, ex11, r0 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5574 0x02 0x81 0x73 0x00 0x54 0x1d 0x48 0x14 0xe9 0x4a VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex1, [p1, lf1, r25]; VMAC.f dm0, dm0, ex10, ex7, r9 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5584 0xcf 0x35 0x76 0x94 0x96 0x00 0x00 0x5c 0x58 0x07 0x49 0x2c 0xe9 0x6e VLDA.3D.CONV.fp32.bf16 cml3, [p6], d3; MOVS dn3, dn2; MOV dj3, dj5; VMAC.f dm1, dm1, ex6, ex7, r9 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5598 0x82 0xbd 0x7a 0x38 0x96 0x00 0x00 0x4c 0x90 0x3e 0x4a 0x55 0x09 0x6e VLDA.CONV.fp32.bf16 cmh3, [p4, #64]; MOVS dc5, dc7; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm2, dm2, ex10, ex8, r9 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5612 0x71 0x01 0x74 0x98 0x96 0x00 0x00 0x54 0x90 0x1e 0xf8 0x60 0x3d 0x6e VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dn2, dc3; VSHUFFLE ex5, ex2, ex4, r0; VADD.f dm0, dm3, dm0, r31 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5626 0x22 0x81 0x70 0x04 0xf9 0x64 0x3d 0x62 VLDA.FILL.512 [p1, lf1, r25]; VADD.f dm1, dm3, dm1, r31 +.aggressive_scheduled_block_id 6 +.noswbrkpt + 5634 0xa0 0x09 0x70 0x04 0xfa 0x88 0x3d 0x62 VLDA.POP.576 ex1, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r31 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5642 0x71 0x01 0x70 0x04 0x4b 0x6d 0x09 0x62 VLDA.POP.576 ex0, [p1, lf1, r25, m4]; VMAC.f dm3, dm3, ex6, ex8, r9 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5650 0x22 0x81 0x74 0x01 0x28 0x3c VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24] +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5656 0x3c 0x11 0x14 0x18 VLDB.POP.576.3D ex4, [p0, lf0, r24, d0] +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5660 0xa0 0x09 0x78 0x28 0x2d 0x72 0x7d 0x82 0xfb 0x8c 0x3d 0x66 VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24]; ADD.NC lc, r4, #-5; VADD.f dm3, dm4, dm3, r31 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5672 0x71 0x01 0x74 0x14 0x14 0x1d 0xa0 0x06 0x29 0x4a VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24]; VMAC.f dm0, dm0, ex3, ex1, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5682 0x22 0x81 0x74 0x01 0x28 0x00 0x00 0x58 0xaa 0x0f 0xa2 0x46 0x09 0x4e VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24]; NOPX; MOV dj5, r21; VMAC.f dm2, dm2, ex3, ex0, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5696 0x00 0x2c 0xf8 0x22 0x28 0x01 0x5b 0x00 0x00 0x00 0xc9 0x03 0xed 0x09 0x51 0x4b NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 +.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1248 +.loop_nesting 2 +.begin_of_loop +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5712 0xa0 0x09 0x78 0x28 0x28 0x01 0x5b 0x00 0x00 0x01 0x49 0x01 0xed 0x1b 0x50 0x4b VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5728 0x71 0x01 0x78 0x28 0x28 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7d 0x00 0x31 0x4b VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm0, dm0, ex3, ex1, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5744 0x22 0x81 0x74 0x01 0x28 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7d 0x12 0x30 0x4b VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm2, dm2, ex3, ex0, r20 +.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1296 +.end_of_loop +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5760 0x00 0x2c 0xf8 0x22 0x28 0x01 0x5b 0x00 0x00 0x00 0xc9 0x03 0xed 0x09 0x51 0x4b NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 +.loop_nesting 1 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5776 0xa0 0x09 0x7c 0xbc 0x96 0x00 0x00 0x54 0x90 0x1e 0xa3 0x6a 0x09 0x6e VLDA.POP.576 ex1, [p1, lf1, r25]; MOVS dn6, dn7; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5790 0x71 0x01 0x7e 0x1c 0x96 0x00 0x00 0x7c 0x38 0x07 0xa0 0x06 0x29 0x6e VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dc7, dn3; MOV dj7, dj3; VMAC.f dm0, dm0, ex3, ex1, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5804 0x61 0x91 0x61 0x55 0x00 0xe4 0xa2 0x46 0x09 0x4a MOVS dc3, p3; MOV r5, dj2; VMAC.f dm2, dm2, ex3, ex0, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5814 0x6a 0xc1 0x61 0x92 0x07 0xc4 0xa1 0x2a 0x29 0x4a MOVS dn3, r22; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5824 0xeb 0x81 0x62 0x92 0x03 0xc4 0xa3 0x6a 0x09 0x4a MOVS dn7, r28; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5834 0xb3 0x91 0x6f 0x57 0x22 0x8f 0x00 0xe6 0xa0 0x06 0x29 0x66 PADDB [p7], m5; MOVS p5, p7; MOV dj2, dj7; VMAC.f dm0, dm0, ex3, ex1, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5846 0x93 0x91 0x62 0x06 0x00 0xe4 0xa2 0x46 0x09 0x4a MOVS p4, p7; MOV m2, m3; VMAC.f dm2, dm2, ex3, ex0, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5856 0x02 0x92 0x03 0xc6 0xa1 0x2a 0x29 0x62 VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm1, dm1, ex5, ex1, r20 +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5864 0x01 0x92 0x07 0xc6 0xa3 0x6a 0x09 0x62 VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm3, dm3, ex5, ex0, r20 + 5872 0x1f 0x8b 0x00 0xf8 MOV dj7, dj5 + 5876 0x03 0x0b 0xa0 0xe6 0xa1 0x2a 0x29 0x62 MOV m3, r23; VMAC.f dm1, dm1, ex5, ex1, r20 + 5884 0x03 0x88 0xa0 0xe6 0xa0 0x06 0x29 0x62 MOV dj3, r17; VMAC.f dm0, dm0, ex3, ex1, r20 + 5892 0x00 0xf7 0x23 0x05 0x00 0xe6 0xa3 0x6a 0x09 0x4a PADDB.3D [p0], d3; MOV m3, dj2; VMAC.f dm3, dm3, ex5, ex0, r20 + 5902 0x71 0x89 0x6e 0xd7 0x25 0x82 0xa0 0xe6 0xa2 0x46 0x09 0x66 PADDB [p7], m3; MOVS p3, dc3; MOV dj5, r5; VMAC.f dm2, dm2, ex3, ex0, r20 + 5914 0x62 0x89 0x60 0x03 0xc5 0x80 0x70 0x02 MOVS dc3, dc5; MOV dj7, dj5 + 5922 0xa0 0x41 0x60 0x01 0x81 0x00 0x70 0x02 MOVS dc5, r2; MOV m3, m1 + 5930 0xb2 0x12 0xc0 0x00 0x87 0x50 0x70 0x02 VCONV.bf16.fp32 x11, cml1; MOV m1, r29 + 5938 0xa2 0x02 0xc0 0x02 0xc7 0x90 0x70 0x02 VCONV.bf16.fp32 x10, cml0; MOV dj5, r30 + 5946 0x13 0x91 0x61 0x3b 0x90 0x01 0xc8 0x60 0x76 0xba PADDB.3D [p1], d1; MOVS p0, p7; MOV r14, p0 + 5956 0x62 0x0a 0xc0 0x00 0x83 0x00 0x70 0x02 VCONV.bf16.fp32 x6, cmh0; MOV m1, m3 + 5964 0x52 0x22 0xc0 0x57 0x20 0x24 0x02 0xfa 0x00 0x00 0x60 0x36 PADDB [p0], m1; VCONV.bf16.fp32 x5, cml2; JZ r18, #6096 +.delay_slot + 5976 0x72 0x1a 0xc0 0x00 0xa9 0x60 0x70 0x02 VCONV.bf16.fp32 x7, cmh1; MOV r5, p1 +.delay_slot + 5984 0x82 0x32 0xc0 0x03 0xa7 0xc0 0x70 0x02 VCONV.bf16.fp32 x8, cml3; MOV dn7, dc7 +.delay_slot + 5992 0x12 0x3a 0xc5 0x2b 0x90 0x00 0xb5 0x60 0x76 0xba PADDB [p5], m1; VCONV.bf16.fp32 x1, cmh3; MOV p1, p5 +.delay_slot + 6002 0x22 0x2a 0xc0 0x02 0xc2 0x80 0x70 0x02 VCONV.bf16.fp32 x2, cmh2; MOV dj5, dj2 +.delay_slot + 6010 0xe1 0x89 0x60 0x00 0x4d 0xc0 0x70 0x02 MOVS dc7, dc3; MOV r2, dc5 + 6018 0x1d 0xdc 0xec 0xf8 VMAX_LT.bf16 x11, r16, x11, x9 + 6022 0x1b 0xbc 0xec 0xf8 VMAX_LT.bf16 x7, r16, x7, x9 + 6026 0x3c 0x5a 0x60 0x02 0xaa 0x76 0x70 0x02 VST x11, [p1, dj7]; VMAX_LT.bf16 x10, r16, x10, x9 + 6034 0xa2 0xba 0x60 0x01 0xda 0x76 0x70 0x02 VST x7, [p5, #64]; VMAX_LT.bf16 x7, r16, x6, x9 + 6042 0x20 0xd2 0x60 0x00 0x02 0xfe 0x00 0x00 0x21 0x3a VST x10, [p1]; J #6128 +.delay_slot + 6052 0x22 0xba 0x60 0x02 0xa2 0x76 0x70 0x02 VST x7, [p1, #64]; VMAX_LT.bf16 x10, r16, x8, x9 +.delay_slot + 6060 0x1b 0x8c 0xec 0xf8 VMAX_LT.bf16 x7, r16, x1, x9 +.delay_slot + 6064 0x00 0xd2 0x60 0x02 0x96 0x76 0x70 0x02 VST x10, [p0]; VMAX_LT.bf16 x10, r16, x5, x9 +.delay_slot + 6072 0x02 0xba 0x60 0x00 0x8a 0x76 0x70 0x02 VST x7, [p0, #64]; VMAX_LT.bf16 x2, r16, x2, x9 +.delay_slot + 6080 0x00 0x2c 0xf0 0x00 0x24 0xa2 0x93 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x10, [p4, dj5]; NOPX; NOPM; NOPV +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1632 + 6096 0x09 0xe0 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p1, dj7] + 6100 0x0d 0x14 0xe3 0x18 VST.CONV.bf16.fp32 cmh1, [p5, #64] + 6104 0x09 0x04 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p1] + 6108 0x09 0x14 0x63 0x18 VST.CONV.bf16.fp32 cmh0, [p1, #64] + 6112 0x08 0x06 0x13 0x18 VST x8, [p0] + 6116 0x08 0x15 0xe3 0x18 VST.CONV.bf16.fp32 cmh3, [p0, #64] + 6120 0x94 0x24 0x60 0x00 0x01 0xa5 0x70 0x02 VST.CONV.bf16.fp32 cml2, [p4, dj5]; NOPM +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1664 + 6128 0xe2 0x92 0x6f 0x57 0x20 0x06 0x35 0x01 0x40 0x00 0x58 0x36 PADDB [p7], m5; VST x2, [p7, #64]; JNZD r3, r3, p2; MOV dj2, #0 +.delay_slot + 6140 0x1b 0x44 0x80 0xf8 MOV dn3, dn2 +.delay_slot + 6144 0x1a 0x49 0xa0 0xf8 MOV dn2, r19 +.delay_slot + 6148 0xeb 0x72 0x05 0x1e 0x01 0xf4 PADDB.3D [p7], d2; MOV dj2, dj7 +.delay_slot + 6154 0x1a 0x4e 0x80 0xf8 MOV dn2, dn7 +.delay_slot +.swstall delay_slot + 6158 0x00 0x00 NOPX +.loop_nesting 0 + 6160 0x00 0x0d 0x58 0x00 0x00 0x84 J #6832 +.delay_slot +.swstall delay_slot + 6166 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6168 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6170 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6172 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6174 0x00 0x00 NOPX +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1712 + 6176 0xfb 0x7e 0x22 0x0c 0x8b 0x04 0xe1 0x08 0xb3 0x90 0x78 0x76 LDA r31, [sp, #-40]; MOVS dc2, p3; MOVX r14, #136; MOV p1, r14 + 6188 0x07 0x90 0x82 0x56 0x0b 0x1b 0xd4 0x03 0x62 0x40 0x78 0x76 MOVA m4, #60; MOVS dn2, r22; MOVX crRnd, r13; MOV dc6, dn2 + 6200 0x07 0x94 0x00 0x19 0x31 0x89 0x05 0xd0 0x78 0xba MOVA r20, #60; MOVX r19, #780; MOV m2, r23 + 6210 0xef 0x98 0x86 0x5c 0x0b 0x01 0x20 0xca 0xc7 0x90 0x78 0x76 MOVA m6, #-132; MOVS dn6, r28; MOVX r18, #6; MOV dj5, r30 + 6222 0xfa 0x83 0x25 0x02 0x0b 0x01 0x90 0x08 0x87 0x50 0x78 0x76 LDA p0, [sp, #-44]; MOVS dc5, r2; MOVX r25, #0; MOV m1, r29 + 6234 0xfb 0xd6 0x20 0x01 0x80 0x0b 0x45 0x50 0x78 0xba LDA r21, [sp, #-36]; MOVX r24, #0; MOV dj6, r21 + 6244 0xfc 0x36 0x20 0x00 0x00 0x05 0x34 0x58 0x10 0xba LDA r13, [sp, #-32]; MOVXM p2, #6320 + 6254 0xfc 0x87 0x26 0xdf 0x72 0x94 LDA lr, [sp, #-28]; ADD.NC p3, r31, r14 + 6260 0x03 0x1d 0xc6 0x98 LDA dj3, [p3], #4 + 6264 0x03 0x8a 0x06 0x98 LDA m4, [p3], m4 + 6268 0x03 0x9e 0x86 0x98 LDA m5, [p3], #-28 + 6272 0x03 0x1e 0xd6 0x98 LDA r22, [p3], #4 + 6276 0x03 0xca 0xf6 0x98 LDA r23, [p3], m6 + 6280 0x03 0x17 0xb6 0x98 LDA r29, [p3, #4] + 6284 0x03 0x07 0x96 0x98 LDA r28, [p3] + 6288 0x00 0x00 NOPX + 6290 0x1f 0x98 0x00 0xf8 MOV r30, m4 + 6294 0x1e 0x07 0x00 0xf8 MOV m6, dj3 + 6298 0x1f 0xdc 0x00 0xf8 MOV r31, m6 + 6302 0x1b 0x0f 0xe0 0x18 ADD.NC m3, r31, #-64 + 6306 0xef 0x7f 0xee 0x1e 0xc0 0x24 ADD r29, r29, #-1; ADD.NC m7, r30, #-64 + 6312 0x00 0x2b 0x60 0x03 0xc7 0x90 0x70 0x02 NOPS; MOV dj7, r30 +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1856 +.loop_nesting 1 + 6320 0xc3 0x85 0x7a 0x28 0x28 0x00 0x00 0x8f 0x96 0x02 0x71 0x81 0x60 0x7e VLDA.CONV.fp32.bf16 cml0, [p6], #64;VLDB.FILL.512 [p1, lf1, r25];MOVS p3, r12; MOVXM ls, #6496 + 6334 0xcd 0x0d 0x7a 0x28 0x28 0x00 0x00 0x05 0xbc 0xc8 0x10 0xb6 VLDA.CONV.fp32.bf16 cmh0, [p6], m3;VLDB.FILL.512 [p1, lf1, r25]; MOVXM le, #6544 + 6346 0x02 0x81 0x76 0x05 0x28 0x05 0xe9 0x6e 0xbf 0x3f 0x48 0xb6 VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex10, [p1, lf1, r25]; LSHL r30, r2, r18; ADD.NC lc, r28, #-3 + 6358 0x55 0x59 0x73 0x01 0x14 0x01 0x47 0x90 0x7e 0xba VLDA.POP.576 ex11, [p0, lf0, r24, m5];VLDB.POP.576 ex4, [p1, lf1, r25]; MOV dj2, r30 + 6368 0xc3 0x95 0x76 0x01 0x28 0x3c VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.POP.576 ex2, [p1, lf1, r25] + 6374 0xdd 0x1d 0x7a 0x21 0xa8 0x3c VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.POP.576.3D ex3, [p1, lf1, r25, d0] + 6380 0xc3 0xa5 0x7a 0x28 0x28 0x3c VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.FILL.512 [p1, lf1, r25] + 6386 0xcd 0x2d 0x7a 0x28 0x28 0x3c VLDA.CONV.fp32.bf16 cmh2, [p6], m3;VLDB.FILL.512 [p1, lf1, r25] + 6392 0xc3 0xb5 0x76 0x00 0xa8 0x3c VLDA.CONV.fp32.bf16 cml3, [p6], #64;VLDB.POP.576 ex1, [p1, lf1, r25] + 6398 0xdd 0x3d 0x76 0x03 0x28 0x3c VLDA.CONV.fp32.bf16 cmh3, [p6], m7;VLDB.POP.576 ex6, [p1, lf1, r25] + 6404 0x68 0x45 0x76 0x03 0xa8 0x3c VLDA.CONV.fp32.bf16 cml4, [p3, dj2];VLDB.POP.576 ex7, [p1, lf1, r25] + 6410 0x68 0x4d 0x75 0x12 0x14 0x01 0x69 0x2d 0xee 0xba VLDA.CONV.fp32.bf16 cmh4, [p3, dj2];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VSHUFFLE ex5, ex10, ex4, r22 + 6420 0x02 0x81 0x75 0x14 0x14 0x02 0xa9 0x2f 0xee 0xba VLDA.FILL.512 [p0, lf0, r24]; VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex10, ex10, ex4, r23 + 6430 0x55 0x01 0x7a 0x28 0x2a 0x11 0xdb 0xc2 0x48 0x0b 0x69 0x66 VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex2, ex3, r22; VMAC.f dm0, dm0, ex5, ex11, r9 + 6442 0x02 0x81 0x75 0x11 0xdf 0xc2 0x49 0x35 0x69 0x4a VLDA.FILL.512 [p0, lf0, r24]; VSHUFFLE ex10, ex2, ex3, r23; VMAC.f dm1, dm1, ex10, ex11, r9 + 6452 0x4a 0x49 0x69 0x48 VMAC.f dm2, dm2, ex4, ex11, r9 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 6456 0x4b 0x75 0x69 0x48 VMAC.f dm3, dm3, ex10, ex11, r9 +.aggressive_scheduled_block_id 7 +.noswbrkpt + 6460 0x06 0x00 0xaa 0x8b 0x5f 0xc6 0xa1 0x84 0x3d 0x4a VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex5, ex1, ex6, r23; VADD.f dm1, dm4, dm1, r20 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6470 0x03 0x01 0x94 0x00 0xa0 0x80 0x3d 0x62 VLDB.POP.576 ex6, [p1, lf1, r25]; VADD.f dm0, dm4, dm0, r20 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6478 0x03 0x01 0xd4 0x00 0xa2 0x88 0x3d 0x62 VLDB.POP.576 ex7, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r20 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6486 0x55 0x01 0x75 0x12 0x14 0x1d 0xa3 0x8c 0x3d 0x4a VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VADD.f dm3, dm4, dm3, r20 +.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2032 +.loop_nesting 2 +.begin_of_loop +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6496 0xa2 0x82 0x82 0x16 0xb7 0xb4 VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex2, ex1, ex6, r22 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6502 0x0a 0x28 0x2a 0x3c 0x5f 0xc6 0x99 0x2a 0x09 0x4a VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6512 0x06 0x00 0xa9 0xbc 0x5b 0xc6 0x98 0x04 0x09 0x4a VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6522 0x03 0x01 0x94 0x00 0x9b 0x68 0x09 0x62 VLDB.POP.576 ex6, [p1, lf1, r25]; VMAC.f dm3, dm3, ex4, ex0, r19 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6530 0x02 0x81 0x76 0x03 0xa8 0x00 0x00 0x00 0x05 0x6c 0x9a 0x46 0x09 0x6e VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex7, [p1, lf1, r25];NOPS; NOPX; VMAC.f dm2, dm2, ex3, ex0, r19 +.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2080 +.end_of_loop +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6544 0x55 0x01 0x7a 0x24 0x28 0x01 0x5b 0x00 0x00 0x01 0x45 0xaf 0xe8 0x00 0x00 0xe1 VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0];NOPS; NOPX; VSHUFFLE ex5, ex1, ex6, r23; NOPV +.loop_nesting 1 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6560 0x07 0x0c 0xff 0x97 0x25 0x9c 0x8b 0x00 0x85 0xad 0xe0 0xf6 PADDA.3D [p0], d1; PADDB [p7], m6; MOVS p5, p7; VSHUFFLE ex2, ex1, ex6, r22 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6572 0x93 0x91 0x6f 0x17 0x22 0x3c 0x5f 0xc6 0x99 0x2a 0x09 0x66 PADDB [p7], m4; MOVS p4, p7; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6584 0x73 0x91 0x6f 0x97 0x21 0xbc 0x5b 0xc6 0x98 0x04 0x09 0x66 PADDB [p7], m6; MOVS p3, p7; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6596 0x02 0x88 0xa0 0xe6 0x9b 0x68 0x09 0x62 MOV dj2, r17; VMAC.f dm3, dm3, ex4, ex0, r19 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6604 0x02 0xb7 0x20 0x9b 0x80 0xe6 0x9a 0x46 0x09 0x4a PADDB.3D [p1], d2; MOV r2, dc5; VMAC.f dm2, dm2, ex3, ex0, r19 + 6614 0x19 0x0b 0x5b 0xd8 VSHUFFLE ex2, ex1, ex6, r22 + 6618 0x1a 0x8b 0x5f 0xd8 VSHUFFLE ex5, ex1, ex6, r23 + 6622 0x01 0xbc 0x5b 0xc6 0x98 0x04 0x09 0x62 VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 + 6630 0x02 0x3c 0x5f 0xc6 0x99 0x2a 0x09 0x62 VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 + 6638 0x9a 0x46 0x09 0x48 VMAC.f dm2, dm2, ex3, ex0, r19 + 6642 0x9b 0x68 0x09 0x48 VMAC.f dm3, dm3, ex4, ex0, r19 + 6646 0x00 0x00 NOPX + 6648 0x00 0x00 NOPX + 6650 0x0d 0x10 0x16 0x18 VCONV.bf16.fp32 x10, cml0 + 6654 0x0d 0x90 0x96 0x18 VCONV.bf16.fp32 x11, cml1 + 6658 0x12 0x1a 0xc0 0x2a 0x03 0x4e 0x00 0x00 0x61 0x3a VCONV.bf16.fp32 x1, cmh1; JZ r21, #6768 +.delay_slot + 6668 0x0b 0x10 0x56 0x18 VCONV.bf16.fp32 x6, cmh0 +.delay_slot + 6672 0x09 0x11 0x96 0x18 VCONV.bf16.fp32 x2, cml3 +.delay_slot + 6676 0x0b 0x91 0xd6 0x18 VCONV.bf16.fp32 x7, cmh3 +.delay_slot + 6680 0x0a 0x91 0x16 0x18 VCONV.bf16.fp32 x5, cml2 +.delay_slot + 6684 0x0c 0x11 0x56 0x18 VCONV.bf16.fp32 x8, cmh2 + 6688 0x1d 0xdc 0xec 0xf8 VMAX_LT.bf16 x11, r16, x11, x9 + 6692 0x18 0x8c 0xec 0xf8 VMAX_LT.bf16 x1, r16, x1, x9 + 6696 0xac 0x5a 0x60 0x02 0xaa 0x76 0x70 0x02 VST x11, [p5, dj3]; VMAX_LT.bf16 x10, r16, x10, x9 + 6704 0x82 0x8a 0x60 0x00 0x5a 0x76 0x70 0x02 VST x1, [p4, #64]; VMAX_LT.bf16 x1, r16, x6, x9 + 6712 0xa0 0xd2 0x60 0x00 0x03 0x52 0x00 0x00 0x21 0x3a VST x10, [p5]; J #6800 +.delay_slot + 6722 0xa2 0x8a 0x60 0x02 0x8a 0x76 0x70 0x02 VST x1, [p5, #64]; VMAX_LT.bf16 x10, r16, x2, x9 +.delay_slot + 6730 0x18 0xbc 0xec 0xf8 VMAX_LT.bf16 x1, r16, x7, x9 +.delay_slot + 6734 0x6c 0x52 0x60 0x02 0x96 0x76 0x70 0x02 VST x10, [p3, dj3]; VMAX_LT.bf16 x10, r16, x5, x9 +.delay_slot + 6742 0x00 0x2c 0xf7 0x14 0x53 0x02 0x22 0x76 0x72 0xba NOPA; VST x1, [p7, #64]; VMAX_LT.bf16 x8, r16, x8, x9 +.delay_slot + 6752 0x00 0x2c 0xf0 0x00 0x24 0xe2 0x93 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x10, [p4, dj7]; NOPX; NOPM; NOPV +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2304 + 6768 0x0d 0x60 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p5, dj3] + 6772 0x0c 0x14 0xe3 0x18 VST.CONV.bf16.fp32 cmh1, [p4, #64] + 6776 0x0d 0x04 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p5] + 6780 0x0d 0x14 0x63 0x18 VST.CONV.bf16.fp32 cmh0, [p5, #64] + 6784 0x0b 0x61 0xa3 0x18 VST.CONV.bf16.fp32 cml3, [p3, dj3] + 6788 0x0f 0x15 0xe3 0x18 VST.CONV.bf16.fp32 cmh3, [p7, #64] + 6792 0x9c 0x24 0x60 0x00 0x01 0xa5 0x70 0x02 VST.CONV.bf16.fp32 cml2, [p4, dj7]; NOPM +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2336 + 6800 0x62 0xc2 0x6e 0xf5 0x40 0x5c VST x8, [p3, #64]; JNZD r29, r29, p2 +.delay_slot + 6806 0x3f 0x8b 0x90 0x18 PADDB [p7], m4 +.delay_slot +.swstall delay_slot + 6810 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6812 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6814 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6816 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2368 +.loop_nesting 0 + 6832 0x07 0xed 0xf1 0x18 LDA r15, [sp, #-20] + 6836 0x07 0xf1 0x91 0x18 LDA r12, [sp, #-16] + 6840 0x07 0xf5 0x31 0x18 LDA r9, [sp, #-12] + 6844 0x07 0xeb 0x19 0x18 LDA p6, [sp, #-24] + 6848 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8] + 6852 0x07 0xfd 0xd1 0x18 LDA r14, [sp, #-4] + 6856 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 6860 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128 +.delay_slot +.swstall delay_slot + 6866 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6868 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6870 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6872 0x00 0x00 NOPX +.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params__end +.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_end0 + +.text_segment PM 6880 +.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function_start + 6880 0x00 0x20 0x00 0x00 0x01 0xf2 0x32 0x20 0x10 0xba MOVA r0, #1; MOVXM p4, #508992 + 6890 0x80 0xc2 0xd0 0x00 0x10 0x08 0x4b 0xd0 0x78 0xba LDA r16, [p4]; MOVX r1, #0; MOV r2, r15 + 6900 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 6906 0x0f 0xf0 0x55 0x98 ST r2, [sp, #-16] + 6910 0x00 0x00 NOPX + 6912 0x00 0x00 NOPX + 6914 0x00 0x00 NOPX + 6916 0x00 0x00 NOPX + 6918 0x80 0x0d 0xd8 0x40 0x01 0x84 JNZ r16, #7088 +.delay_slot + 6924 0x0f 0xfb 0x9d 0x98 ST p7, [sp, #-8] +.delay_slot + 6928 0x0f 0xff 0x1d 0x98 ST p6, [sp, #-4] +.delay_slot + 6932 0x0f 0xed 0x9d 0x98 ST p3, [sp, #-20] +.delay_slot + 6936 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12] +.delay_slot + 6940 0x00 0x07 0xc7 0xab 0x80 0x44 MOVXM r15, #509376 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 6946 0xd0 0x91 0x60 0x00 0x01 0xf3 0xb2 0x34 0x11 0x3a MOVS p6, p1; MOVXM p7, #509032 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 6956 0xe0 0xc0 0xe1 0x8f 0x0b 0x00 0x01 0xf3 0xb2 0x32 0x10 0x76 ST.s8 r16, [p7]; MOVS p1, r15; MOVXM p7, #509028 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6968 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6970 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6972 0x00 0x05 0x60 0x00 0x01 0x04 JL #2752 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6978 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6980 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 6984 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 6988 0xe0 0xc2 0x30 0x03 0xb0 0x60 0x70 0x02 ST r16, [p7]; MOV p7, p0 +.delay_slot + 6996 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x32 0x60 0x70 0xf6 NOPA; NOPB; NOPS; MOV p0, p2 +.return_address + 7008 0x1a 0x67 0x85 0x98 ADD.NC p2, r15, #11 + 7012 0x4f 0xc1 0x50 0x00 0x01 0xf0 0xb2 0x22 0x10 0xba LDA.u8 r16, [p2], #7; MOVXM p1, #508996 + 7022 0x43 0xcf 0x50 0x00 0x01 0xf0 0x32 0x30 0x10 0xba LDA.u16 r19, [p2], #2; MOVXM p0, #509024 + 7032 0x02 0x06 0x3a 0x98 LDA.u16 r17, [p2] + 7036 0x00 0x00 NOPX + 7038 0x02 0x16 0x5a 0x98 LDA.u16 r18, [p2, #2] + 7042 0x00 0x00 NOPX + 7044 0x00 0x00 NOPX + 7046 0x20 0xc2 0x30 0x00 0xb6 0x60 0x70 0x02 ST r16, [p1]; MOV p1, p6 + 7054 0x14 0xe1 0x0f 0x98 MUL r16, r19, r16 + 7058 0x00 0x00 NOPX + 7060 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 + 7064 0x00 0x00 NOPX + 7066 0x14 0xa1 0x0f 0x98 MUL r16, r18, r16 + 7070 0x00 0x00 NOPX + 7072 0x00 0x2c 0xf0 0x00 0x20 0x06 0x11 0x80 0x00 0x00 0x37 0x60 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p0]; NOPX; MOV p0, p7; NOPV +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 + 7088 0xfd 0xbe 0x20 0x00 0x01 0xf3 0x32 0x24 0x10 0xba LDA r15, [sp, #-20]; MOVXM p6, #509000 + 7098 0xc0 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x26 0x10 0xba LDA r16, [p6]; MOVXM p2, #509004 + 7108 0x40 0xc6 0xd0 0x00 0x01 0xf3 0xb2 0x20 0x10 0xba LDA r17, [p2]; MOVXM p7, #508992 + 7118 0x07 0x06 0x56 0x98 LDA r18, [p7] + 7122 0x00 0x00 NOPX + 7124 0x00 0x00 NOPX + 7126 0x00 0x00 NOPX + 7128 0x00 0x00 NOPX + 7130 0x80 0x0e 0x18 0x40 0x01 0x84 JNZ r16, #7216 +.delay_slot + 7136 0x14 0x62 0x07 0x18 ADD r17, r17, #1 +.delay_slot + 7140 0x40 0xc6 0x39 0x44 0x0e 0x5c ST r17, [p2]; ADD r17, r18, #1 +.delay_slot + 7146 0x14 0x26 0x07 0x18 ADD r19, r16, #1 +.delay_slot + 7150 0x0e 0x06 0x71 0x98 ST r19, [p6] +.delay_slot + 7154 0x0f 0x06 0x31 0x98 ST r17, [p7] + 7158 0x1a 0x67 0x86 0x18 ADD.NC p2, r15, #12 + 7162 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4 + 7166 0x02 0xfe 0x16 0x98 LDA r16, [p2], #-4 + 7170 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 7174 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.noswbrkpt + 7176 0x02 0x46 0x16 0x98 LDA r16, [p2, #16] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7180 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7182 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7184 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7186 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7188 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7192 0x0a 0x06 0x11 0x98 ST r16, [p2] + 7196 0x17 0xe2 0xfd 0x18 MOVX r17, #-1 + 7200 0x00 0x00 NOPX + 7202 0x00 0x00 NOPX + 7204 0x00 0x00 NOPX + 7206 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x04 0x13 0x18 0x7a NOPA; NOPS; ACQ r16, r17 +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_336 +.no_stack_arguments + 7216 0x00 0x08 0xb8 0x00 0x01 0x04 JL #4464 +.delay_slot + 7222 0x00 0x07 0xc6 0xcb 0x80 0x44 MOVXM p3, #509376 +.delay_slot +.swstall delay_slot + 7228 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7230 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7232 0x00 0x00 NOPX +.delay_slot + 7234 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x53 0x3d 0x07 0x00 0x00 0x1c 0x2e NOPA; NOPS; MOV p2, r15; NOPV +.return_address + 7248 0xc0 0xc2 0xd0 0x00 0x01 0xf0 0xb2 0x22 0x10 0xba LDA r16, [p6]; MOVXM p1, #508996 + 7258 0x01 0x06 0x36 0x98 LDA r17, [p1] + 7262 0x07 0xf0 0x11 0x18 LDA r0, [sp, #-16] + 7266 0x00 0x00 NOPX + 7268 0x00 0x00 NOPX + 7270 0x00 0x00 NOPX + 7272 0x00 0x00 NOPX + 7274 0x00 0x00 NOPX + 7276 0x14 0x61 0x08 0x98 NE r16, r17, r16 + 7280 0x80 0x0e 0x60 0x40 0x01 0x84 JNZ r16, #7360 +.delay_slot + 7286 0x10 0x30 0x01 0x18 MOVX r24, #0 +.delay_slot +.swstall delay_slot + 7290 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7292 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7294 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7296 0x00 0x00 NOPX + 7298 0x04 0x00 0xa2 0xcf 0x14 0x24 MOVX r16, #1; ADD.NC p1, r15, #20 + 7304 0x01 0x06 0x36 0x98 LDA r17, [p1] + 7308 0x00 0x00 NOPX + 7310 0x00 0x00 NOPX + 7312 0x00 0x00 NOPX + 7314 0x00 0x00 NOPX + 7316 0x00 0x00 NOPX + 7318 0x00 0x00 NOPX + 7320 0x14 0x51 0x08 0x18 REL r17, r16 + 7324 0x3c 0xc6 0xdc 0x0e 0x23 0x0c LDA r17, [p1, #-8]; ST r24, [p6] + 7330 0x00 0x00 NOPX + 7332 0x00 0x00 NOPX + 7334 0x00 0x00 NOPX + 7336 0x00 0x00 NOPX + 7338 0x00 0x00 NOPX + 7340 0x00 0x00 NOPX + 7342 0x14 0x21 0x11 0x98 SUB r16, r16, r17 + 7346 0x00 0x2c 0xf3 0xcc 0x23 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; ST r16, [p1, #-8]; NOPM; NOPV +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_480 + 7360 0xe0 0xc2 0xd0 0x00 0x01 0xf3 0x32 0x30 0x10 0xba LDA r16, [p7]; MOVXM p6, #509024 + 7370 0x06 0x06 0x36 0x98 LDA r17, [p6] + 7374 0x07 0xf8 0x99 0x18 LDA p1, [sp, #-8] + 7378 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12] + 7382 0x00 0x00 NOPX + 7384 0x00 0x00 NOPX + 7386 0x00 0x00 NOPX + 7388 0x00 0x00 NOPX + 7390 0x14 0x61 0x08 0x98 NE r16, r17, r16 + 7394 0x80 0x0e 0x80 0x40 0x01 0x84 JNZ r16, #7424 +.delay_slot +.swstall delay_slot + 7400 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7402 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7404 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7406 0x00 0x00 NOPX +.delay_slot + 7408 0x1b 0xd0 0x20 0xf8 MOV r15, r0 + 7412 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x03 0x83 0x88 0xc1 0x36 NOPA; NOPB; ST r24, [p7]; NOPX +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_544 + 7424 0x07 0xff 0x19 0x18 LDA p6, [sp, #-4] + 7428 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 7432 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 7438 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7440 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7442 0x00 0x00 NOPX +.delay_slot + 7444 0x0f 0x84 0x8b 0x18 MOVS p7, p1 +.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + +.text_segment PM 7456 +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.function_start + 7456 0x05 0x00 0x00 0x21 0x01 0x64 RET lr; MOV r0, #64 +.delay_slot + 7462 0x18 0x50 0xc0 0xf8 MOV r1, p0 +.delay_slot + 7466 0x18 0x60 0x90 0x18 ADD.NC p0, r1, #32 +.delay_slot + 7470 0x08 0x04 0x11 0x98 ST r0, [p0] +.delay_slot + 7474 0x08 0x14 0x11 0x98 ST r0, [p0, #4] +.delay_slot +.swstall delay_slot + 7478 0x00 0x00 NOPX +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_end0 + +.text_segment PM 7488 +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.function_start + 7488 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 7492 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 7498 0x0f 0xf8 0x3d 0x98 ST lr, [sp, #-8] + 7502 0x0f 0xfd 0xf5 0x98 ST r15, [sp, #-4] + 7506 0x00 0x00 NOPX + 7508 0x00 0x00 NOPX + 7510 0x00 0x00 NOPX + 7512 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 7516 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 7520 0x00 0x00 NOPX + 7522 0x00 0x00 NOPX + 7524 0x00 0x00 NOPX + 7526 0x00 0x00 NOPX + 7528 0x00 0x00 NOPX + 7530 0x00 0x00 NOPX + 7532 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 7536 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 7540 0x00 0x00 NOPX + 7542 0x00 0x00 NOPX + 7544 0x00 0x00 NOPX + 7546 0x00 0x00 NOPX + 7548 0x00 0x00 NOPX + 7550 0x00 0x00 NOPX + 7552 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 7556 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4] + 7560 0x00 0x00 NOPX + 7562 0x00 0x00 NOPX +.no_stack_arguments + 7564 0x00 0x0e 0x90 0x00 0x01 0x04 JL #7456 +.delay_slot +.swstall delay_slot + 7570 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7572 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7574 0x00 0x00 NOPX +.delay_slot + 7576 0x08 0xdc 0x29 0x98 ST el0, [p0], #-12 +.delay_slot + 7580 0x1b 0xd0 0xc0 0xf8 MOV r15, p0 +.return_address + 7584 0xff 0x07 0x20 0x01 0x00 0x68 0x33 0xc4 0x08 0xba LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 + 7594 0x01 0xe2 0x80 0x01 0x80 0x08 0x07 0xfd 0x58 0xba MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 + 7604 0xff 0xbe 0x20 0x0a 0x11 0x80 0x07 0xa0 0x01 0x7a LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 + 7614 0x00 0x06 0x4a 0x98 LDA.u8 r18, [p0] + 7618 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7620 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7622 0x00 0x02 0x17 0x18 ST.s16 r16, [p0, dj0] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7626 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7630 0x10 0x22 0x05 0x18 MOVX r17, #1 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7634 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7640 0x14 0x77 0x27 0x98 EQ r27, r17, r18 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7644 0x14 0x21 0x82 0x18 SEL.EQZ r16, r16, r24, r27 +.delay_slot +.swstall delay_slot + 7648 0x00 0x00 NOPX +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + +.text_segment PM 7664 +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.function_start + 7664 0x02 0x80 0x80 0x00 0x10 0xc8 0x08 0x60 0x78 0xba MOVA m0, #20; MOVX r1, #6; MOV r0, p0 + 7674 0x00 0x00 0xa0 0xc0 0x0c 0x24 MOVX r0, #1; ADD.NC p0, r0, #12 + 7680 0x00 0x08 0x4a 0x98 LDA.u8 r2, [p0], m0 + 7684 0x00 0x00 NOPX + 7686 0x00 0x00 NOPX + 7688 0x00 0x00 NOPX + 7690 0x00 0x00 NOPX + 7692 0x00 0x00 NOPX + 7694 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 7698 0x10 0x80 0x08 0x98 NE r0, r2, r0 +.delay_slot + 7702 0x10 0x00 0x1d 0x98 LSHL r0, r0, r1 +.delay_slot + 7706 0x02 0x82 0x31 0x0d 0xe0 0x5c ST r0, [p0, #4]; NEZ r3, r2 +.delay_slot + 7712 0x10 0xc4 0x1d 0x98 LSHL r2, r3, r1 +.delay_slot + 7716 0x08 0x04 0x51 0x98 ST r2, [p0] +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_end0 + +.text_segment PM 7728 +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.function_start + 7728 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 7734 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] +.no_stack_arguments + 7738 0x00 0x0e 0xa0 0x00 0x01 0x04 JL #7488 +.delay_slot + 7744 0x0f 0xfb 0x9d 0x98 ST p7, [sp, #-8] +.delay_slot + 7748 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.delay_slot +.swstall delay_slot + 7752 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7754 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7756 0x00 0x01 0x67 0x98 NOPA +.return_address +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7760 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7764 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] +.tail_call +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7768 0x00 0x0e 0xf8 0x00 0x00 0x84 J #7664 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7774 0x18 0x6e 0xc0 0xf8 MOV p0, p7 +.delay_slot + 7778 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 7784 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7786 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7788 0x00 0x00 NOPX +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + +.text_segment PM 7792 +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.function_start + 7792 0x67 0x82 0xd0 0x00 0x51 0x54 LDA r0, [p3], #12; MOV m0, #20 + 7798 0x61 0x05 0x58 0xcd 0x81 0xd4 LDA.u8 r1, [p3], m0; MOV p4, p3 + 7804 0x00 0x00 NOPX + 7806 0x00 0x00 NOPX + 7808 0x00 0x00 NOPX + 7810 0x00 0x00 NOPX + 7812 0x00 0x00 NOPX + 7814 0x00 0x00 NOPX + 7816 0x08 0x0f 0x60 0x40 0x01 0x84 JNZ r1, #7872 +.delay_slot + 7822 0x17 0xc4 0xe9 0x18 MOVX r2, #-6 +.delay_slot + 7826 0x10 0x00 0x2d 0x98 LSHL r0, r0, r2 +.delay_slot +.swstall delay_slot + 7830 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7832 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7834 0x00 0x00 NOPX + 7836 0x00 0x04 0x32 0x98 LDA.s16 r1, [p0] + 7840 0x00 0x00 NOPX + 7842 0x00 0x00 NOPX + 7844 0x00 0x00 NOPX + 7846 0x00 0x0f 0x70 0x00 0x00 0x84 J #7904 +.delay_slot +.swstall delay_slot + 7852 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7854 0x00 0x00 NOPX +.delay_slot + 7856 0x18 0x05 0x72 0xf8 VBCST.16 x0, r1 +.delay_slot +.swstall delay_slot + 7860 0x00 0x00 NOPX +.delay_slot + 7862 0x00 0x2c 0xf0 0x04 0x13 0x00 0x00 0x00 0x00 0x7a NOPA; VST x0, [p0]; NOPX +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_80 + 7872 0x01 0x04 0x32 0x98 LDA.s16 r1, [p1] + 7876 0x00 0x00 NOPX + 7878 0x00 0x00 NOPX + 7880 0x00 0x00 NOPX + 7882 0x00 0x00 NOPX + 7884 0x00 0x00 NOPX + 7886 0x00 0x00 NOPX + 7888 0x18 0x05 0x72 0xf8 VBCST.16 x0, r1 + 7892 0x00 0x00 NOPX + 7894 0x00 0x2c 0xf1 0x04 0x13 0x00 0x00 0x00 0x00 0x7a NOPA; VST x0, [p1]; NOPX +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_112 + 7904 0x8a 0x80 0xd0 0x00 0x07 0x8a 0xb8 0x3f 0x48 0xba LDA m0, [p4, #20]; MOVX r0, #60; ADD.NC lc, r0, #-3 + 7914 0x62 0x90 0xd0 0x00 0x00 0x04 0x7f 0xa8 0x10 0xba LDA m1, [p3, #4]; MOVXM ls, #8016 + 7924 0x00 0x00 0x16 0xfe 0xe0 0x44 MOVXM le, #8048 + 7930 0x00 0x07 0xc8 0xc8 0xd0 0x44 MOVXM p4, #509032 + 7936 0x04 0x04 0x22 0x98 LDA.s8 r1, [p4] + 7940 0x00 0x00 NOPX + 7942 0x00 0x00 NOPX + 7944 0x00 0x08 0xab 0x98 VLDA.CONV.fp32.bf16 cml1, [p0], m0 + 7948 0x01 0x29 0x2b 0x98 VLDA.CONV.fp32.bf16 cml2, [p1], m1 + 7952 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7956 0x01 0x2a 0x2b 0x98 VLDA.CONV.fp32.bf16 cml4, [p1], m1 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7960 0x01 0x15 0x70 0xf5 0x00 0x2c VLDA.CONV.fp32.bf16 cml1, [p0], m0; MOVX crRnd, r1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7966 0x25 0x25 0x70 0x04 0x03 0x28 0x3d 0x62 VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7974 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7978 0x25 0x45 0x70 0x04 0x04 0x10 0x3d 0x62 VLDA.CONV.fp32.bf16 cml4, [p1], m1; VADD.f dm4, dm0, dm4, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7986 0x00 0x08 0xab 0x98 VLDA.CONV.fp32.bf16 cml1, [p0], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7990 0x25 0x25 0x70 0x04 0x03 0x28 0x3d 0x62 VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7998 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8002 0x25 0x45 0x74 0x3b 0x46 0x00 0x00 0x40 0x1a 0x57 0x04 0x10 0x3d 0x6e VLDA.CONV.fp32.bf16 cml4, [p1], m1; VST.CONV.bf16.fp32 cml3, [p2], #64; NOPM; VADD.f dm4, dm0, dm4, r0 +.label ZLS_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_224 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8016 0x00 0x08 0xab 0x98 VLDA.CONV.fp32.bf16 cml1, [p0], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8020 0x25 0x25 0x70 0x00 0x21 0x0f 0x11 0x8e 0x03 0x28 0x3d 0x66 VLDA.CONV.fp32.bf16 cml2, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8032 0x01 0x05 0x70 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLE_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_256 +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8048 0x25 0x45 0x70 0x00 0x22 0x1d 0xa3 0x00 0x00 0x00 0x01 0xa5 0x78 0x20 0x81 0xeb VLDA.CONV.fp32.bf16 cml4, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml3, [p2], #64;NOPX; NOPM; VADD.f dm4, dm0, dm4, r0 +.loop_nesting 0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8064 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8066 0x43 0xc4 0x60 0x02 0x03 0x28 0x3d 0x62 VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8074 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8076 0x43 0xb4 0x60 0x02 0x04 0x10 0x3d 0x62 VST.CONV.bf16.fp32 cml3, [p2], #64; VADD.f dm4, dm0, dm4, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8084 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8086 0x43 0xc4 0x60 0x50 0x00 0x5c VST.CONV.bf16.fp32 cml4, [p2], #64;RET lr +.delay_slot +.swstall delay_slot + 8092 0x00 0x00 NOPX +.delay_slot + 8094 0x0a 0x1d 0xa3 0x18 VST.CONV.bf16.fp32 cml3, [p2], #64 +.delay_slot +.swstall delay_slot + 8098 0x00 0x00 NOPX +.delay_slot + 8100 0x0a 0x1e 0x23 0x18 VST.CONV.bf16.fp32 cml4, [p2], #64 +.delay_slot +.swstall delay_slot + 8104 0x00 0x00 NOPX +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 + +.text_segment PM 8112 +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.function_start + 8112 0x00 0x10 0x00 0x00 0x01 0xc4 PADDXM [sp], #128 + 8118 0xff 0x87 0xb0 0x02 0x0a 0x60 0x70 0x02 ST lr, [sp, #-4]; MOV r16, p2 + 8126 0x50 0x91 0x60 0x01 0xb4 0x03 0x00 0x02 MOVS p2, p1; ADD.NC p3, r16, #12 + 8134 0x65 0xed 0x58 0x21 0x81 0xd4 LDA.u8 r27, [p3], #2; MOV r16, p0 + 8140 0x73 0xca 0x58 0xab 0xc1 0xd4 LDA.s16 r18, [p3], #-14; MOV r17, sp + 8146 0x18 0x68 0xc0 0x18 ADD.NC p0, r17, #-128 + 8150 0x08 0x07 0x2b 0x18 VST sfh, [p0] + 8154 0x00 0x06 0x57 0x18 ST.s16 r18, [p0] + 8158 0x00 0x00 NOPX + 8160 0x00 0x00 NOPX +.no_stack_arguments + 8162 0x00 0x0f 0x38 0x00 0x01 0x04 JL #7792 +.delay_slot + 8168 0x1c 0x50 0xc0 0xf8 MOV r17, p0 +.delay_slot +.swstall delay_slot + 8172 0x00 0x00 NOPX +.delay_slot + 8174 0x14 0x25 0x12 0x18 SEL.EQZ r18, r16, r17, r27 +.delay_slot + 8178 0x8c 0x20 0x42 0xd2 0x41 0xe4 SEL.EQZ r16, r17, r16, r27; MOV p1, r18 +.delay_slot + 8184 0x00 0x2b 0x60 0x00 0x34 0x10 0x70 0x02 NOPS; MOV p0, r16 +.return_address + 8192 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] + 8196 0x00 0x00 NOPX + 8198 0x00 0x00 NOPX + 8200 0x00 0x00 NOPX + 8202 0x00 0x00 NOPX + 8204 0x00 0x00 NOPX + 8206 0x00 0x00 NOPX + 8208 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 8212 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128 +.delay_slot +.swstall delay_slot + 8218 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8220 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8222 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8224 0x00 0x00 NOPX +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_end0 + +.text_segment PM 8240 +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function_start + 8240 0x00 0x07 0xc6 0xc8 0x80 0x44 MOVXM p3, #508992 + 8246 0x60 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p3]; MOV r17, CORE_ID + 8252 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 8258 0xff 0x63 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p6, [sp, #-8]; MOV r0, r15 + 8266 0xff 0x82 0xb0 0x00 0x01 0xf3 0x32 0x28 0x11 0x3a ST r0, [sp, #-4]; MOVXM p6, #509008 + 8276 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 + 8280 0x00 0x00 NOPX + 8282 0x00 0x00 NOPX + 8284 0x80 0x10 0x80 0x40 0x01 0x84 JNZ r16, #8448 +.delay_slot + 8290 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17 +.delay_slot + 8294 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 8298 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12] +.delay_slot + 8302 0xc0 0xc6 0x30 0x03 0x30 0x60 0x70 0x02 ST r17, [p6]; MOV p6, p0 +.delay_slot + 8310 0x00 0x07 0xc0 0xc9 0x80 0x44 MOVXM p0, #509120 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8316 0x00 0x07 0xc4 0xc8 0xd0 0x44 MOVXM p2, #509032 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8322 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x32 0x32 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #509028 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8332 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 8334 0x00 0x0f 0x18 0x00 0x01 0x04 JL #7728 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8340 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8342 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8344 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 8348 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 8352 0x00 0x2c 0xf0 0x00 0x22 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV +.return_address + 8368 0x00 0x07 0xc4 0xc8 0xa0 0x44 MOVXM p2, #509008 + 8374 0x40 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x60 0x10 0xba LDA r16, [p2]; MOVXM p2, #509120 + 8384 0x40 0xc6 0xd0 0x00 0x01 0xf1 0x32 0x60 0x10 0xba LDA r17, [p2]; MOVXM p2, #509120 + 8394 0x4a 0xcb 0x50 0x00 0x01 0xf0 0xb2 0x2a 0x10 0xba LDA.u16 r18, [p2, #10]; MOVXM p1, #509012 + 8404 0x00 0x00 NOPX + 8406 0x00 0x00 NOPX + 8408 0x00 0x10 0x88 0x00 0x00 0x84 J #8464 +.delay_slot + 8414 0x00 0x07 0xc0 0xc8 0xc0 0x44 MOVXM p0, #509024 +.delay_slot +.swstall delay_slot + 8420 0x00 0x00 NOPX +.delay_slot + 8422 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 8426 0x00 0x2c 0xf0 0x0c 0xa3 0x0c NOPA; ST r18, [p0] +.delay_slot + 8432 0x00 0x2c 0xf0 0x00 0x21 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 + 8448 0x00 0x2c 0xf0 0x00 0x22 0x80 0x8b 0x00 0x01 0xf0 0xb2 0x2a 0x10 0x00 0x00 0xe1 NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 + 8464 0x73 0x91 0x60 0x03 0xb3 0xc3 0x00 0x02 MOVS p3, p7; ADD.NC p7, r15, #12 + 8472 0xff 0xee 0xd0 0x00 0x01 0xf0 0x32 0x20 0x10 0xba LDA r27, [p7], #-4; MOVXM p0, #508992 + 8482 0x07 0xfe 0x16 0x98 LDA r16, [p7], #-4 + 8486 0x07 0xfe 0x36 0x98 LDA r17, [p7], #-4 + 8490 0x07 0x46 0x56 0x98 LDA r18, [p7, #16] + 8494 0x00 0x00 NOPX + 8496 0x00 0x00 NOPX + 8498 0x00 0x00 NOPX + 8500 0x00 0x00 NOPX + 8502 0x00 0x00 NOPX + 8504 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 8508 0x0f 0x06 0x11 0x98 ST r16, [p7] + 8512 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 8516 0x00 0x00 NOPX + 8518 0x00 0x00 NOPX + 8520 0x00 0x00 NOPX + 8522 0x14 0x93 0x08 0x18 ACQ r18, r16 + 8526 0x04 0x00 0xa7 0xad 0x81 0xe4 MOVX r16, #1; MOV r15, p3 + 8532 0x00 0x00 NOPX + 8534 0x00 0x00 NOPX + 8536 0x00 0x06 0x36 0x98 LDA r17, [p0] + 8540 0xc0 0xca 0xdc 0xdd 0x81 0xd4 LDA r18, [p6]; MOV p6, p7 + 8546 0x01 0x06 0x76 0x98 LDA r19, [p1] + 8550 0x07 0x5c 0x9e 0x98 LDA p1, [p7], #20 + 8554 0x00 0x00 NOPX +.no_stack_arguments + 8556 0x00 0x0f 0xd8 0x00 0x01 0x04 JL #8112 +.delay_slot +.swstall delay_slot + 8562 0x00 0x00 NOPX +.delay_slot + 8564 0x14 0x62 0x07 0x18 ADD r17, r17, #1 +.delay_slot + 8568 0x08 0x06 0x31 0x98 ST r17, [p0] +.delay_slot + 8572 0x14 0xe1 0x0d 0x98 LSHL r16, r19, r16 +.delay_slot + 8576 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xa0 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV +.return_address + 8592 0xca 0xc6 0xd0 0x00 0x01 0xf3 0x32 0x20 0x10 0xba LDA r17, [p6, #20]; MOVXM p6, #508992 + 8602 0x10 0x20 0x05 0x18 MOVX r16, #1 + 8606 0x00 0x00 NOPX + 8608 0x00 0x00 NOPX + 8610 0x00 0x00 NOPX + 8612 0x00 0x00 NOPX + 8614 0x00 0x00 NOPX + 8616 0x14 0x51 0x08 0x18 REL r17, r16 + 8620 0xfc 0xce 0xd0 0x00 0x01 0xf1 0x32 0x30 0x10 0xba LDA r19, [p7, #-8]; MOVXM p2, #509024 + 8630 0x06 0x06 0x36 0x98 LDA r17, [p6] + 8634 0x02 0x06 0x56 0x98 LDA r18, [p2] + 8638 0x00 0x00 NOPX + 8640 0x00 0x00 NOPX + 8642 0x00 0x00 NOPX + 8644 0x00 0x00 NOPX + 8646 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 8650 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 8654 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 8658 0x80 0x10 0xf8 0x40 0x01 0x84 JNZ r16, #8688 +.delay_slot +.swstall delay_slot + 8664 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8666 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8668 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8670 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8672 0x00 0x00 NOPX + 8674 0x10 0x20 0x01 0x18 MOVX r16, #0 + 8678 0x00 0x2c 0xf6 0x06 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r16, [p6]; NOPX +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 + 8688 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12] + 8692 0x07 0xfb 0x19 0x18 LDA p6, [sp, #-8] + 8696 0x00 0x00 NOPX + 8698 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 8700 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.noswbrkpt + 8702 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 8706 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 8708 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8712 0x1f 0x67 0xa0 0xf8 MOV p7, r15 +.delay_slot + 8716 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 8722 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8724 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8726 0x00 0x00 NOPX +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + +.text_segment PM 8736 +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.function_start + 8736 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 8740 0x00 0x00 NOPX + 8742 0x00 0x00 NOPX + 8744 0x00 0x00 NOPX + 8746 0x00 0x00 NOPX + 8748 0x00 0x00 NOPX + 8750 0x00 0x00 NOPX + 8752 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 8756 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 8760 0x00 0x00 NOPX + 8762 0x00 0x00 NOPX + 8764 0x00 0x00 NOPX + 8766 0x00 0x00 NOPX + 8768 0x00 0x00 NOPX + 8770 0x00 0x00 NOPX + 8772 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 8776 0x01 0x6c 0x2e 0x98 LDA el0, [p1], #24 + 8780 0x01 0x04 0x12 0x98 LDA.s16 r0, [p1] + 8784 0x00 0x00 NOPX + 8786 0x00 0x00 NOPX + 8788 0x00 0x00 NOPX + 8790 0x00 0x00 NOPX + 8792 0x00 0x00 NOPX + 8794 0x08 0x6c 0x29 0x98 ST el0, [p0], #24 + 8798 0x00 0x04 0x17 0x18 ST.s16 r0, [p0] + 8802 0x00 0x00 NOPX + 8804 0x00 0x00 NOPX + 8806 0x00 0x00 NOPX + 8808 0x00 0x00 NOPX + 8810 0x00 0x00 NOPX + 8812 0x00 0x00 NOPX + 8814 0x01 0x24 0x12 0x98 LDA.s16 r0, [p1, #4] + 8818 0x00 0x14 0x17 0x18 ST.s16 r0, [p0, #2] + 8822 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot + 8826 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8828 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8830 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8832 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8834 0x00 0x00 NOPX +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv__end +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_end0 + +.text_segment PM 8848 +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.function_start + 8848 0xfb 0xc2 0x80 0x3a 0x68 0x00 0x00 0x08 0x79 0x88 0x10 0xb6 MOVA dj0, #-34; VLDB x4, [p0], #64; MOVXM ls, #8976 + 8860 0xff 0x51 0x00 0x39 0x68 0x00 0x00 0x09 0xb9 0xa0 0x10 0xb6 MOVA r17, #-6; VLDB x2, [p0], #64; MOVXM le, #9024 + 8872 0x18 0x14 0xc0 0xf8 MOV r0, p2 + 8876 0x1a 0x60 0x10 0x18 ADD.NC p2, r0, #32 + 8880 0x02 0x1c 0x52 0x98 LDA.s16 r2, [p2], #2 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8884 0x02 0x00 0x16 0x98 LDA r0, [p2, dj0] +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8888 0x40 0x86 0x50 0x3a 0x68 0x3c LDA.s16 r1, [p2]; VLDB x4, [p0], #64 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8894 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8896 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8898 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8900 0x38 0x1c 0xb4 0x18 VLDB x2, [p0], #64 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8904 0x18 0x09 0x72 0xf8 VBCST.16 x0, r2 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8908 0x00 0x3a 0x68 0x01 0x18 0xed 0x50 0x36 0x78 0x3a VLDB x4, [p0], #64; LSHL r17, r0, r17; VMAX_LT.bf16 x5, r16, x4, x0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8918 0x1d 0x78 0xfe 0x98 ADD.NC lc, r17, #-3 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8922 0x18 0x85 0x72 0xf8 VBCST.16 x1, r1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8926 0x19 0xa8 0xac 0xf8 VMIN_GE.bf16 x3, r16, x5, x1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8930 0x00 0x2c 0xf0 0x39 0x68 0x00 0x00 0x31 0x06 0xcf 0x00 0x2b 0x60 0x7e NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8944 0x00 0x2c 0xf0 0x00 0x21 0x1c 0xd3 0x00 0x00 0x01 0xd8 0x56 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8960 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x01 0x50 0x36 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV +.label ZLS_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_128 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8976 0x00 0x2c 0xf0 0x3a 0x69 0x1d 0xd3 0x00 0x00 0x00 0xd4 0x56 0x78 0x00 0x00 0xe1 NOPA; VLDB x4, [p0], #64; VST x7, [p1], #64; NOPX; VMIN_GE.bf16 x3, r16, x5, x1; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8992 0x00 0x2c 0xf0 0x39 0x68 0x01 0x5b 0x00 0x00 0x01 0x88 0x36 0x78 0x00 0x00 0xe1 NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9008 0x00 0x2c 0xf0 0x00 0x21 0x1c 0xd3 0x00 0x00 0x01 0xd8 0x56 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV +.label ZLE_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_176 +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9024 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x01 0x50 0x36 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV +.loop_nesting 0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9040 0x23 0xba 0x60 0x00 0xd4 0x56 0x70 0x02 VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9048 0x1b 0x10 0x6c 0xf8 VMAX_LT.bf16 x6, r16, x2, x0 + 9052 0x23 0x9a 0x60 0x01 0xd8 0x56 0x70 0x02 VST x3, [p1], #64; VMIN_GE.bf16 x7, r16, x6, x1 + 9060 0x05 0x00 0x05 0x40 0xd9 0xe4 RET lr; VMAX_LT.bf16 x5, r16, x4, x0 +.delay_slot + 9066 0x23 0xba 0x60 0x00 0xd4 0x56 0x70 0x02 VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1 +.delay_slot + 9074 0x1b 0x10 0x6c 0xf8 VMAX_LT.bf16 x6, r16, x2, x0 +.delay_slot + 9078 0x1b 0xb0 0xac 0xf8 VMIN_GE.bf16 x7, r16, x6, x1 +.delay_slot + 9082 0x09 0x1c 0xd3 0x18 VST x3, [p1], #64 +.delay_slot + 9086 0x09 0x1d 0xd3 0x18 VST x7, [p1], #64 +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E__end +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_end0 + +.text_segment PM 9104 +.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function_start + 9104 0x00 0x07 0xc6 0xc8 0x80 0x44 MOVXM p3, #508992 + 9110 0x60 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p3]; MOV r17, CORE_ID + 9116 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 9122 0xff 0x63 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p6, [sp, #-8]; MOV r0, r15 + 9130 0xff 0x82 0xb0 0x00 0x01 0xf3 0x32 0x28 0x11 0x3a ST r0, [sp, #-4]; MOVXM p6, #509008 + 9140 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 + 9144 0x00 0x00 NOPX + 9146 0x00 0x00 NOPX + 9148 0x80 0x12 0x30 0x40 0x01 0x84 JNZ r16, #9312 +.delay_slot + 9154 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17 +.delay_slot + 9158 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 9162 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12] +.delay_slot + 9166 0xc0 0xc6 0x30 0x03 0x30 0x60 0x70 0x02 ST r17, [p6]; MOV p6, p0 +.delay_slot + 9174 0x00 0x07 0xc0 0xcb 0x00 0x44 MOVXM p0, #509312 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9180 0x00 0x07 0xc4 0xc8 0xd0 0x44 MOVXM p2, #509032 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9186 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x32 0x32 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #509028 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9196 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 9198 0x00 0x11 0x10 0x00 0x01 0x04 JL #8736 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9204 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9206 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9208 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 9212 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 9216 0x00 0x2c 0xf0 0x00 0x22 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV +.return_address + 9232 0x00 0x07 0xc4 0xc8 0xa0 0x44 MOVXM p2, #509008 + 9238 0x40 0xc2 0xd0 0x00 0x01 0xf1 0x32 0xc0 0x10 0xba LDA r16, [p2]; MOVXM p2, #509312 + 9248 0x40 0xc6 0xd0 0x00 0x01 0xf1 0x32 0xc0 0x10 0xba LDA r17, [p2]; MOVXM p2, #509312 + 9258 0x48 0xcb 0x50 0x00 0x01 0xf0 0xb2 0x2a 0x10 0xba LDA.u16 r18, [p2, #8]; MOVXM p1, #509012 + 9268 0x00 0x00 NOPX + 9270 0x00 0x00 NOPX + 9272 0x00 0x12 0x38 0x00 0x00 0x84 J #9328 +.delay_slot + 9278 0x00 0x07 0xc0 0xc8 0xc0 0x44 MOVXM p0, #509024 +.delay_slot +.swstall delay_slot + 9284 0x00 0x00 NOPX +.delay_slot + 9286 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 9290 0x00 0x2c 0xf0 0x0c 0xa3 0x0c NOPA; ST r18, [p0] +.delay_slot + 9296 0x00 0x2c 0xf0 0x00 0x21 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV +.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 + 9312 0x00 0x2c 0xf0 0x00 0x22 0x80 0x8b 0x00 0x01 0xf0 0xb2 0x2a 0x10 0x00 0x00 0xe1 NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV +.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 + 9328 0x73 0x91 0x60 0x03 0xb3 0xc3 0x00 0x02 MOVS p3, p7; ADD.NC p7, r15, #12 + 9336 0xff 0xee 0xd0 0x00 0x01 0xf0 0x32 0x20 0x10 0xba LDA r27, [p7], #-4; MOVXM p0, #508992 + 9346 0x07 0xfe 0x16 0x98 LDA r16, [p7], #-4 + 9350 0x07 0xfe 0x36 0x98 LDA r17, [p7], #-4 + 9354 0x07 0x46 0x56 0x98 LDA r18, [p7, #16] + 9358 0x00 0x00 NOPX + 9360 0x00 0x00 NOPX + 9362 0x00 0x00 NOPX + 9364 0x00 0x00 NOPX + 9366 0x00 0x00 NOPX + 9368 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 9372 0x0f 0x06 0x11 0x98 ST r16, [p7] + 9376 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 9380 0x00 0x00 NOPX + 9382 0x00 0x00 NOPX + 9384 0x00 0x00 NOPX + 9386 0x14 0x93 0x08 0x18 ACQ r18, r16 + 9390 0x04 0x00 0xa7 0xad 0x81 0xe4 MOVX r16, #1; MOV r15, p3 + 9396 0x00 0x00 NOPX + 9398 0x00 0x00 NOPX + 9400 0x00 0x06 0x36 0x98 LDA r17, [p0] + 9404 0xc0 0xca 0xdc 0xdd 0x81 0xd4 LDA r18, [p6]; MOV p6, p7 + 9410 0x01 0x06 0x76 0x98 LDA r19, [p1] + 9414 0x07 0x5c 0x9e 0x98 LDA p1, [p7], #20 + 9418 0x00 0x00 NOPX +.no_stack_arguments + 9420 0x00 0x11 0x48 0x00 0x01 0x04 JL #8848 +.delay_slot +.swstall delay_slot + 9426 0x00 0x00 NOPX +.delay_slot + 9428 0x14 0x62 0x07 0x18 ADD r17, r17, #1 +.delay_slot + 9432 0x08 0x06 0x31 0x98 ST r17, [p0] +.delay_slot + 9436 0x14 0xe1 0x0d 0x98 LSHL r16, r19, r16 +.delay_slot + 9440 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xa0 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV +.return_address + 9456 0xca 0xc6 0xd0 0x00 0x01 0xf3 0x32 0x20 0x10 0xba LDA r17, [p6, #20]; MOVXM p6, #508992 + 9466 0x10 0x20 0x05 0x18 MOVX r16, #1 + 9470 0x00 0x00 NOPX + 9472 0x00 0x00 NOPX + 9474 0x00 0x00 NOPX + 9476 0x00 0x00 NOPX + 9478 0x00 0x00 NOPX + 9480 0x14 0x51 0x08 0x18 REL r17, r16 + 9484 0xfc 0xce 0xd0 0x00 0x01 0xf1 0x32 0x30 0x10 0xba LDA r19, [p7, #-8]; MOVXM p2, #509024 + 9494 0x06 0x06 0x36 0x98 LDA r17, [p6] + 9498 0x02 0x06 0x56 0x98 LDA r18, [p2] + 9502 0x00 0x00 NOPX + 9504 0x00 0x00 NOPX + 9506 0x00 0x00 NOPX + 9508 0x00 0x00 NOPX + 9510 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 9514 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 9518 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 9522 0x80 0x12 0xa8 0x40 0x01 0x84 JNZ r16, #9552 +.delay_slot +.swstall delay_slot + 9528 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9530 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9532 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9534 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9536 0x00 0x00 NOPX + 9538 0x10 0x20 0x01 0x18 MOVX r16, #0 + 9542 0x00 0x2c 0xf6 0x06 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r16, [p6]; NOPX +.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 + 9552 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12] + 9556 0x07 0xfb 0x19 0x18 LDA p6, [sp, #-8] + 9560 0x00 0x00 NOPX + 9562 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 9564 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.noswbrkpt + 9566 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9570 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9572 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9576 0x1f 0x67 0xa0 0xf8 MOV p7, r15 +.delay_slot + 9580 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 9586 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9588 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9590 0x00 0x00 NOPX +.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + +.text_segment PM 9600 +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv +.function_start + 9600 0x23 0x85 0xd0 0x08 0x20 0x0b 0x08 0x00 0x58 0xba LDA el0, [p1], #4; MOVX r2, #256; MOV r24, #0 + 9610 0x17 0x80 0x01 0x18 MOVX r0, #-128 + 9614 0x00 0x00 NOPX + 9616 0x00 0x00 NOPX + 9618 0x00 0x00 NOPX + 9620 0x00 0x00 NOPX + 9622 0x00 0x00 NOPX + 9624 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 9628 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 9632 0x00 0x00 NOPX + 9634 0x00 0x00 NOPX + 9636 0x00 0x00 NOPX + 9638 0x00 0x00 NOPX + 9640 0x00 0x00 NOPX + 9642 0x00 0x00 NOPX + 9644 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 9648 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 9652 0x00 0x00 NOPX + 9654 0x00 0x00 NOPX + 9656 0x00 0x00 NOPX + 9658 0x00 0x00 NOPX + 9660 0x00 0x00 NOPX + 9662 0x00 0x00 NOPX + 9664 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 9668 0x01 0x14 0x76 0x98 LDA r3, [p1, #4] + 9672 0x00 0x00 NOPX + 9674 0x00 0x00 NOPX + 9676 0x00 0x00 NOPX + 9678 0x00 0x00 NOPX + 9680 0x00 0x00 NOPX + 9682 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9684 0x08 0x4c 0x71 0x98 ST r3, [p0], #16 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9688 0x00 0x04 0x17 0x18 ST.s16 r0, [p0] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9692 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9696 0x00 0x00 0xf0 0xbe 0x00 0x44 MOVXM r1, #65280 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9702 0x10 0xc2 0x14 0x98 AND r1, r3, r1 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9706 0x10 0x76 0x27 0x98 EQ r27, r1, r2 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9710 0x10 0x01 0x82 0x18 SEL.EQZ r0, r0, r24, r27 +.delay_slot +.swstall delay_slot + 9714 0x00 0x00 NOPX +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_end0 + +.text_segment PM 9728 +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv +.function_start + 9728 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 9734 0x0f 0xf8 0x3d 0x98 ST lr, [sp, #-8] +.no_stack_arguments + 9738 0x00 0x12 0xc0 0x00 0x01 0x04 JL #9600 +.delay_slot + 9744 0x0f 0xff 0x9d 0x98 ST p7, [sp, #-4] +.delay_slot + 9748 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.delay_slot +.swstall delay_slot + 9752 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9754 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9756 0x00 0x01 0x67 0x98 NOPA +.return_address + 9760 0x07 0xf8 0x39 0x18 LDA lr, [sp, #-8] + 9764 0x00 0x00 NOPX + 9766 0x00 0x00 NOPX + 9768 0x00 0x00 NOPX + 9770 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9772 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9774 0x07 0xff 0x99 0x18 LDA p7, [sp, #-4] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9778 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9782 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9784 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9786 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9788 0x10 0x20 0x09 0x18 MOVX r16, #2 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9792 0xe8 0xc2 0x30 0x3f 0xfe 0x00 0x00 0x00 0x71 0x3a ST r16, [p7, #16]; PADDXM [sp], #-64 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + +.text_segment PM 9808 +.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE +.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_begin0 +.function_start + 9808 0x18 0x16 0xc0 0xf8 MOV r0, p3 + 9812 0x1b 0x60 0x07 0x18 ADD.NC p3, r0, #14 + 9816 0x03 0x1c 0x52 0x98 LDA.s16 r2, [p3], #2 + 9820 0x03 0x04 0x96 0x98 LDA r4, [p3] + 9824 0x00 0x00 NOPX + 9826 0x00 0x00 NOPX + 9828 0x00 0x00 NOPX + 9830 0x00 0x00 NOPX + 9832 0x10 0x06 0x09 0x18 MOVX r3, #2 + 9836 0x00 0x10 0x00 0x00 0x01 0xc4 PADDXM [sp], #128 + 9842 0x10 0xc6 0x4c 0x98 LTU r3, r3, r4 + 9846 0x00 0x01 0x00 0x06 0x04 0xe2 0x10 0x00 0x60 0xba MOVA r1, #0; JNZ r3, #10000 +.delay_slot + 9856 0x18 0x05 0x72 0xf8 VBCST.16 x0, r1 +.delay_slot + 9860 0x18 0x5e 0xc0 0xf8 MOV r1, p7 +.delay_slot + 9864 0x1f 0x65 0xe0 0xf8 MOV p7, sp +.delay_slot + 9868 0xff 0xf2 0x0a 0xdd 0x81 0xf4 PADDB [p7], #-64; MOV p5, p7 +.delay_slot + 9874 0x0f 0x04 0x13 0x18 VST x0, [p7] + 9878 0x01 0x82 0x84 0x80 0x0b 0x00 0x04 0xb9 0x72 0xba MOVA dj0, #12; MOVS p4, r0; VBCST.16 x0, r2 + 9888 0x80 0x01 0x54 0x01 0x01 0x54 LDA.u8 r0, [p4, dj0]; MOV m2, #64 + 9894 0x00 0x00 NOPX + 9896 0x00 0x00 NOPX + 9898 0x00 0x00 NOPX + 9900 0x00 0x00 NOPX + 9902 0x00 0x00 NOPX + 9904 0x00 0x00 NOPX + 9906 0x00 0x13 0x70 0x40 0x01 0x84 JNZ r0, #9952 +.delay_slot + 9912 0x18 0x00 0x00 0xb8 MOV m0, #0 +.delay_slot + 9916 0x00 0x07 0xc8 0xc8 0xd0 0x44 MOVXM p4, #509032 +.delay_slot +.swstall delay_slot + 9922 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9924 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9926 0x00 0x00 NOPX + 9928 0x00 0x04 0x80 0x00 0x04 0xde 0x00 0x00 0x20 0xba MOVA m1, #0; J #9968 +.delay_slot +.swstall delay_slot + 9938 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9940 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9942 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9944 0x00 0x00 NOPX +.delay_slot + 9946 0x00 0x2c 0xf0 0x08 0x26 0x0c NOPA; VST x0, [p0] +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_144 + 9952 0x19 0x00 0x80 0xb8 MOV m1, #64 + 9956 0x00 0x2c 0xf0 0x00 0x21 0x04 0x13 0x01 0x00 0x00 0x50 0xf6 NOPA; NOPB; VST x0, [p1]; MOV m2, #0 +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_160 + 9968 0x00 0x13 0xc8 0x00 0x00 0x84 J #10128 +.delay_slot + 9974 0x13 0x91 0x60 0x03 0xb0 0x60 0x70 0x02 MOVS p0, p7; MOV p7, p0 +.delay_slot +.swstall delay_slot + 9982 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9984 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9986 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9988 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_192 + 10000 0x10 0x04 0x0d 0x18 MOVX r2, #3 + 10004 0x10 0x84 0x47 0x98 EQ r2, r2, r4 + 10008 0x10 0x13 0xa0 0x40 0x01 0x84 JNZ r2, #10048 +.delay_slot + 10014 0x3f 0x80 0x00 0x20 0x00 0x44 MOVXM r0, #1065353216 +.delay_slot + 10020 0x00 0x07 0xc8 0xc8 0xd0 0x44 MOVXM p4, #509032 +.delay_slot +.swstall delay_slot + 10026 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10028 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10030 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 10032 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x2f 0xe0 0x00 0x08 0x00 0x10 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVXM r0, #-1082130432; NOPV +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_240 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10048 0x80 0x80 0x50 0x02 0xd2 0x00 0x47 0xbe 0x58 0xba LDA.s8 r0, [p4]; MOVX vaddSign0, #1; MOV dj0, #-66 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10058 0x18 0x00 0x80 0xb8 MOV m0, #64 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10062 0x19 0x00 0x00 0xb8 MOV m1, #0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10066 0x1a 0x00 0x80 0xb8 MOV m2, #64 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10070 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10072 0x18 0x00 0x11 0x78 VINSERT.32 x0, x0, #0, r0 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10076 0xa0 0x02 0xe2 0x01 0x25 0xd4 ST.s16 r0, [p5, dj0]; VMOV bmll1, x0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10082 0x10 0x3a 0x80 0x18 MOVX crRnd, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10086 0x08 0x40 0x96 0x18 VCONV.bf16.fp32 wl0, bmll1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10090 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10092 0x18 0x01 0x01 0xb8 VEXTRACT.16 r0, x0, #0, vaddSign0 + 10096 0x00 0x00 NOPX + 10098 0x00 0x00 NOPX + 10100 0x05 0x00 0x12 0x98 LDA.s16 r0, [p5, dj0] + 10104 0x00 0x00 NOPX + 10106 0x00 0x00 NOPX + 10108 0x00 0x00 NOPX + 10110 0x00 0x00 NOPX + 10112 0x00 0x00 NOPX + 10114 0x00 0x00 NOPX + 10116 0x18 0x01 0x72 0xf8 VBCST.16 x0, r0 + 10120 0x00 0x00 NOPX + 10122 0x00 0x2c 0xff 0xf8 0x66 0x0c NOPA; VST x0, [sp, #-64] +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_320 + 10128 0x78 0x8a 0xde 0x50 0xe8 0x00 0x00 0x08 0x7c 0x00 0x10 0xb6 LDA r2, [p3, #-16]; VLDB x1, [p7], m1; MOVXM ls, #10240 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 10140 0xff 0x63 0x02 0x90 0x68 0x00 0x00 0x09 0xbc 0x18 0x10 0xb6 MOVA r3, #-5; VLDB x0, [p1], m2; MOVXM le, #10288 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10152 0x01 0x05 0x7e 0x50 0xe8 0x00 0xf1 0x12 VLDA.CONV.fp32.bf16 cml0, [p0], m0;VLDB x1, [p7], m1; MOVX r0, #60 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10160 0x80 0x90 0x52 0x90 0x68 0x3c LDA.s8 r4, [p4]; VLDB x0, [p1], m2 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10166 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10168 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10172 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10174 0x10 0x84 0x3d 0x98 LSHL r2, r2, r3 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10178 0x05 0x71 0x7e 0x86 0x01 0x02 0x01 0x62 ADD.NC lc, r2, #-3; VMAC.f dm1, dm0, x1, x0, r0 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10186 0x29 0x03 0x7e 0x50 0xe8 0x3c VLDA x0, [p1], m2; VLDB x1, [p7], m1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10192 0x01 0x05 0x70 0x00 0x20 0x01 0x5b 0x09 0xd4 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; MOVX crRnd, r4; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10208 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10224 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x08 0x10 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 +.label ZLS_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_432 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10240 0x29 0x03 0x7e 0x50 0xe8 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA x0, [p1], m2; VLDB x1, [p7], m1; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10256 0x01 0x05 0x70 0x00 0x22 0x1c 0xa3 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10272 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLE_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_480 +.end_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10288 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x08 0x10 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 +.loop_nesting 0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10304 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10310 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 10314 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.noswbrkpt + 10316 0x01 0x02 0x01 0x48 VMAC.f dm1, dm0, x1, x0, r0 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 10320 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 10322 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10326 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.delay_slot + 10330 0x1f 0x60 0xa0 0xf8 MOV p7, r1 +.delay_slot +.swstall delay_slot + 10334 0x00 0x00 NOPX +.delay_slot + 10336 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.delay_slot +.swstall delay_slot + 10340 0x00 0x00 NOPX +.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE__end +.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_end0 + +.text_segment PM 10352 +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E +.function_start + 10352 0x01 0x82 0x83 0x88 0x8b 0x00 0x60 0xf0 0x72 0xba MOVA dj0, #12; MOVS p3, p2; MOV dc0, lr + 10362 0x40 0x01 0x54 0xc5 0x81 0xd4 LDA.u8 r0, [p2, dj0]; MOV p2, p1 + 10368 0x00 0x00 NOPX + 10370 0x00 0x00 NOPX + 10372 0x00 0x00 NOPX + 10374 0x00 0x00 NOPX + 10376 0x00 0x00 NOPX + 10378 0x00 0x00 NOPX + 10380 0x00 0x14 0x68 0x00 0x01 0x84 JZ r0, #10448 +.delay_slot + 10386 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 +.delay_slot + 10392 0x18 0x55 0xe0 0xf8 MOV r1, sp +.delay_slot + 10396 0x19 0x60 0xe0 0x18 ADD.NC p1, r1, #-64 +.delay_slot + 10400 0x09 0x07 0x2b 0x18 VST sfh, [p1] +.delay_slot +.swstall delay_slot + 10404 0x00 0x00 NOPX +.no_stack_arguments + 10406 0x00 0x13 0x28 0x00 0x01 0x04 JL #9808 +.delay_slot +.swstall delay_slot + 10412 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10414 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10416 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10418 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10420 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.return_address + 10432 0x00 0x14 0x78 0x00 0x00 0x84 J #10480 +.delay_slot +.swstall delay_slot + 10438 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10440 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10442 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10444 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10446 0x00 0x00 NOPX +.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_96 +.no_stack_arguments + 10448 0x00 0x13 0x28 0x00 0x01 0x04 JL #9808 +.delay_slot + 10454 0x10 0x91 0x60 0x00 0xb0 0x60 0x70 0x02 MOVS p0, p1; MOV p1, p0 +.delay_slot +.swstall delay_slot + 10462 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10464 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10466 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10468 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_128 +.return_address + 10480 0x1f 0x71 0x80 0xf8 MOV lr, dc0 + 10484 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 10488 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 10494 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10496 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10498 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10500 0x00 0x00 NOPX +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_end0 + +.text_segment PM 10512 +.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function_start + 10512 0x00 0x07 0xc6 0xc8 0x80 0x44 MOVXM p3, #508992 + 10518 0x60 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p3]; MOV r17, CORE_ID + 10524 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 10530 0xff 0x63 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p6, [sp, #-8]; MOV r0, r15 + 10538 0xff 0x82 0xb0 0x00 0x01 0xf3 0x32 0x28 0x11 0x3a ST r0, [sp, #-4]; MOVXM p6, #509008 + 10548 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 + 10552 0x00 0x00 NOPX + 10554 0x00 0x00 NOPX + 10556 0x80 0x14 0xf0 0x40 0x01 0x84 JNZ r16, #10720 +.delay_slot + 10562 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17 +.delay_slot + 10566 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 10570 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12] +.delay_slot + 10574 0xc0 0xc6 0x30 0x03 0x30 0x60 0x70 0x02 ST r17, [p6]; MOV p6, p0 +.delay_slot + 10582 0x00 0x07 0xc0 0xca 0x00 0x44 MOVXM p0, #509184 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 10588 0x00 0x07 0xc4 0xc8 0xd0 0x44 MOVXM p2, #509032 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10594 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x32 0x32 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #509028 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10604 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 10606 0x00 0x13 0x00 0x00 0x01 0x04 JL #9728 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10612 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10614 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10616 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 10620 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 10624 0x00 0x2c 0xf0 0x00 0x22 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV +.return_address + 10640 0x00 0x07 0xc4 0xc8 0xa0 0x44 MOVXM p2, #509008 + 10646 0x40 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x80 0x10 0xba LDA r16, [p2]; MOVXM p2, #509184 + 10656 0x40 0xc6 0xd0 0x00 0x01 0xf1 0x32 0x80 0x10 0xba LDA r17, [p2]; MOVXM p2, #509184 + 10666 0x4a 0xcb 0x50 0x00 0x01 0xf0 0xb2 0x2a 0x10 0xba LDA.u16 r18, [p2, #10]; MOVXM p1, #509012 + 10676 0x00 0x00 NOPX + 10678 0x00 0x00 NOPX + 10680 0x00 0x14 0xf8 0x00 0x00 0x84 J #10736 +.delay_slot + 10686 0x00 0x07 0xc0 0xc8 0xc0 0x44 MOVXM p0, #509024 +.delay_slot +.swstall delay_slot + 10692 0x00 0x00 NOPX +.delay_slot + 10694 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 10698 0x00 0x2c 0xf0 0x0c 0xa3 0x0c NOPA; ST r18, [p0] +.delay_slot + 10704 0x00 0x2c 0xf0 0x00 0x21 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 + 10720 0x00 0x2c 0xf0 0x00 0x22 0x80 0x8b 0x00 0x01 0xf0 0xb2 0x2a 0x10 0x00 0x00 0xe1 NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 + 10736 0x73 0x91 0x60 0x03 0xb3 0xc3 0x00 0x02 MOVS p3, p7; ADD.NC p7, r15, #12 + 10744 0xff 0xee 0xd0 0x00 0x01 0xf0 0x32 0x20 0x10 0xba LDA r27, [p7], #-4; MOVXM p0, #508992 + 10754 0x07 0xfe 0x16 0x98 LDA r16, [p7], #-4 + 10758 0x07 0xfe 0x36 0x98 LDA r17, [p7], #-4 + 10762 0x07 0x46 0x56 0x98 LDA r18, [p7, #16] + 10766 0x00 0x00 NOPX + 10768 0x00 0x00 NOPX + 10770 0x00 0x00 NOPX + 10772 0x00 0x00 NOPX + 10774 0x00 0x00 NOPX + 10776 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 10780 0x0f 0x06 0x11 0x98 ST r16, [p7] + 10784 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 10788 0x00 0x00 NOPX + 10790 0x00 0x00 NOPX + 10792 0x00 0x00 NOPX + 10794 0x14 0x93 0x08 0x18 ACQ r18, r16 + 10798 0x04 0x00 0xa7 0xad 0x81 0xe4 MOVX r16, #1; MOV r15, p3 + 10804 0x00 0x00 NOPX + 10806 0x00 0x00 NOPX + 10808 0x00 0x06 0x36 0x98 LDA r17, [p0] + 10812 0xc0 0xca 0xdc 0xdd 0x81 0xd4 LDA r18, [p6]; MOV p6, p7 + 10818 0x01 0x06 0x76 0x98 LDA r19, [p1] + 10822 0x07 0x5c 0x9e 0x98 LDA p1, [p7], #20 + 10826 0x00 0x00 NOPX +.no_stack_arguments + 10828 0x00 0x14 0x38 0x00 0x01 0x04 JL #10352 +.delay_slot +.swstall delay_slot + 10834 0x00 0x00 NOPX +.delay_slot + 10836 0x14 0x62 0x07 0x18 ADD r17, r17, #1 +.delay_slot + 10840 0x08 0x06 0x31 0x98 ST r17, [p0] +.delay_slot + 10844 0x14 0xe1 0x0d 0x98 LSHL r16, r19, r16 +.delay_slot + 10848 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xa0 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV +.return_address + 10864 0xca 0xc6 0xd0 0x00 0x01 0xf3 0x32 0x20 0x10 0xba LDA r17, [p6, #20]; MOVXM p6, #508992 + 10874 0x10 0x20 0x05 0x18 MOVX r16, #1 + 10878 0x00 0x00 NOPX + 10880 0x00 0x00 NOPX + 10882 0x00 0x00 NOPX + 10884 0x00 0x00 NOPX + 10886 0x00 0x00 NOPX + 10888 0x14 0x51 0x08 0x18 REL r17, r16 + 10892 0xfc 0xce 0xd0 0x00 0x01 0xf1 0x32 0x30 0x10 0xba LDA r19, [p7, #-8]; MOVXM p2, #509024 + 10902 0x06 0x06 0x36 0x98 LDA r17, [p6] + 10906 0x02 0x06 0x56 0x98 LDA r18, [p2] + 10910 0x00 0x00 NOPX + 10912 0x00 0x00 NOPX + 10914 0x00 0x00 NOPX + 10916 0x00 0x00 NOPX + 10918 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 10922 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 10926 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 10930 0x80 0x15 0x68 0x40 0x01 0x84 JNZ r16, #10960 +.delay_slot +.swstall delay_slot + 10936 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10938 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10940 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10942 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10944 0x00 0x00 NOPX + 10946 0x10 0x20 0x01 0x18 MOVX r16, #0 + 10950 0x00 0x2c 0xf6 0x06 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r16, [p6]; NOPX +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 + 10960 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12] + 10964 0x07 0xfb 0x19 0x18 LDA p6, [sp, #-8] + 10968 0x00 0x00 NOPX + 10970 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 10972 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10974 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10978 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10980 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10984 0x1f 0x67 0xa0 0xf8 MOV p7, r15 +.delay_slot + 10988 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 10994 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10996 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10998 0x00 0x00 NOPX +.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + +.text_segment PM 11008 +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.function_start + 11008 0x05 0x00 0x00 0x21 0x01 0x64 RET lr; MOV r0, #64 +.delay_slot + 11014 0x18 0x50 0xc0 0xf8 MOV r1, p0 +.delay_slot + 11018 0x18 0x60 0x90 0x18 ADD.NC p0, r1, #32 +.delay_slot + 11022 0x08 0x04 0x11 0x98 ST r0, [p0] +.delay_slot + 11026 0x08 0x14 0x11 0x98 ST r0, [p0, #4] +.delay_slot +.swstall delay_slot + 11030 0x00 0x00 NOPX +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_end0 + +.text_segment PM 11040 +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.function_start + 11040 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 11044 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 11050 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] + 11054 0x00 0x00 NOPX + 11056 0x00 0x00 NOPX + 11058 0x00 0x00 NOPX + 11060 0x00 0x00 NOPX + 11062 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 11066 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 11070 0x00 0x00 NOPX + 11072 0x00 0x00 NOPX + 11074 0x00 0x00 NOPX + 11076 0x00 0x00 NOPX + 11078 0x00 0x00 NOPX + 11080 0x00 0x00 NOPX + 11082 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 11086 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 11090 0x00 0x00 NOPX + 11092 0x00 0x00 NOPX + 11094 0x00 0x00 NOPX + 11096 0x00 0x00 NOPX + 11098 0x00 0x00 NOPX + 11100 0x00 0x00 NOPX + 11102 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 11106 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4] + 11110 0x00 0x00 NOPX + 11112 0x00 0x00 NOPX +.no_stack_arguments + 11114 0x00 0x15 0x80 0x00 0x01 0x04 JL #11008 +.delay_slot + 11120 0x0f 0xfb 0x9d 0x98 ST p7, [sp, #-8] +.delay_slot +.swstall delay_slot + 11124 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11126 0x00 0x00 NOPX +.delay_slot + 11128 0x08 0xdc 0x29 0x98 ST el0, [p0], #-12 +.delay_slot + 11132 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.return_address + 11136 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] + 11140 0x00 0x00 NOPX + 11142 0x00 0x00 NOPX + 11144 0x00 0x00 NOPX + 11146 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11148 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11150 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11154 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11158 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11160 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11162 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11164 0x10 0x20 0x01 0x18 MOVX r16, #0 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11168 0xe8 0xc2 0x30 0x3f 0xfe 0x00 0x00 0x00 0x71 0x3a ST r16, [p7, #16]; PADDXM [sp], #-64 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + +.text_segment PM 11184 +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.function_start + 11184 0x04 0x00 0x80 0x00 0x00 0x08 0x7e 0x30 0x10 0xba MOVA m0, #32; MOVXM ls, #11360 + 11194 0x61 0x0e 0xd0 0x00 0x00 0x09 0xbe 0x38 0x10 0xba LDA r3, [p3], m0; MOVXM le, #11376 + 11204 0x60 0x90 0xd0 0x3e 0x17 0x48 0x0b 0x3c 0x58 0xba LDA m1, [p3]; MOVX r1, #-6; MOV r0, #828 + 11214 0x62 0x80 0xd0 0x00 0x01 0xf2 0x32 0x34 0x10 0xba LDA m0, [p3, #4]; MOVXM p4, #509032 + 11224 0x04 0x04 0x42 0x98 LDA.s8 r2, [p4] + 11228 0x00 0x00 NOPX + 11230 0x00 0x00 NOPX + 11232 0x00 0x00 NOPX + 11234 0x10 0xc2 0x1d 0x98 LSHL r1, r3, r1 + 11238 0x05 0x0e 0x8a 0xe1 0xf9 0x34 VLDB x1, [p0], m1; ADD.NC lc, r1, #-7 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11244 0x21 0x13 0x70 0x50 0x68 0x3c VLDA x2, [p1], m0; VLDB x0, [p0], m1 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11250 0x21 0x1b 0x70 0x50 0xe8 0xba 0x80 0x12 VLDA x3, [p1], m0; VLDB x1, [p0], m1; MOVX crRnd, r2 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11258 0x21 0x13 0x70 0x50 0x68 0x3c VLDA x2, [p1], m0; VLDB x0, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11264 0x21 0x1b 0x70 0x50 0xe8 0x3c VLDA x3, [p1], m0; VLDB x1, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11270 0x01 0x08 0x9b 0x98 VLDA x2, [p1], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11274 0x21 0x1b 0x70 0x50 0x68 0x3c VLDA x3, [p1], m0; VLDB x0, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11280 0x21 0x13 0x70 0x50 0xe8 0x3c VLDA x2, [p1], m0; VLDB x1, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11286 0x21 0x1b 0x70 0x28 0x34 0x1d 0x00 0xe2 0x41 0x4a VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11296 0x21 0x13 0x70 0x28 0x74 0x1d 0x01 0xe0 0x61 0x4a VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11306 0x21 0x1b 0x70 0x28 0x34 0x1d 0x00 0xe2 0x41 0x4a VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11316 0x21 0x13 0x70 0x28 0x74 0x1d 0x01 0xe0 0x61 0x4a VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11326 0x21 0x1b 0x70 0x28 0x34 0x1d 0x00 0xe2 0x41 0x4a VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11336 0x21 0x13 0x70 0x28 0x74 0x1d 0x01 0xe0 0x61 0x4a VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11346 0x21 0x1b 0x70 0x50 0x68 0x00 0x00 0x08 0x70 0x8c 0x00 0xe2 0x41 0x6e VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; VMUL.f dm0, x1, x2, r0 +.label ZLS_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_176 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11360 0x21 0x13 0x70 0x50 0xea 0x1c 0xa3 0x00 0x00 0x00 0x01 0xa5 0x78 0x0f 0x03 0x0b VLDA x2, [p1], m0; VLDB x1, [p0], m1; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; VMUL.f dm1, x0, x3, r0 +.label ZLE_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_192 +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11376 0x21 0x1b 0x70 0x50 0x6a 0x1c 0x23 0x00 0x00 0x00 0x01 0xa5 0x78 0x07 0x12 0x0b VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 +.loop_nesting 0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11392 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11400 0x43 0x84 0x60 0x02 0x00 0xe2 0x41 0x62 VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11408 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11416 0x43 0x84 0x60 0x02 0x00 0xe2 0x41 0x62 VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11424 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11432 0x43 0x84 0x60 0x02 0x00 0xe2 0x41 0x62 VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11440 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11448 0x0a 0x1c 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p2], #64 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11452 0x43 0x94 0x60 0x50 0x00 0x5c VST.CONV.bf16.fp32 cml1, [p2], #64;RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11458 0x0a 0x1c 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p2], #64 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11462 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.delay_slot + 11466 0x0a 0x1c 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p2], #64 +.delay_slot + 11470 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.delay_slot +.swstall delay_slot + 11474 0x00 0x00 NOPX +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_end0 + +.text_segment PM 11488 +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.function_start + 11488 0x00 0x07 0xc8 0xc8 0x80 0x44 MOVXM p4, #508992 + 11494 0x80 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p4]; MOV r17, CORE_ID + 11500 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 11506 0xff 0x3a 0xb0 0x23 0x14 0x81 0xca 0x60 0x79 0x3a ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 + 11516 0xfd 0x83 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p0, [sp, #-20]; MOV r0, r15 + 11524 0x0f 0xfc 0x15 0x98 ST r0, [sp, #-4] + 11528 0x0f 0xf0 0x3d 0x98 ST lr, [sp, #-16] + 11532 0x00 0x00 NOPX + 11534 0x80 0x16 0xd0 0x40 0x01 0x84 JNZ r16, #11680 +.delay_slot + 11540 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 11544 0x00 0x07 0xc4 0xc8 0xa0 0x44 MOVXM p2, #509008 +.delay_slot + 11550 0x40 0xc6 0x30 0x01 0x37 0x60 0x70 0x02 ST r17, [p2]; MOV p2, p7 +.delay_slot + 11558 0x1b 0xd6 0xc0 0xf8 MOV r15, p3 +.delay_slot + 11562 0xfe 0xa3 0xb0 0x00 0x01 0xf3 0xb2 0xa0 0x11 0x3a ST p2, [sp, #-12]; MOVXM p7, #509248 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11572 0x13 0x91 0x60 0x00 0x01 0xf1 0x32 0x34 0x11 0x3a MOVS p0, p7; MOVXM p2, #509032 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11582 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x32 0x32 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #509028 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11592 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 11594 0x00 0x15 0x90 0x00 0x01 0x04 JL #11040 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11600 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11602 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11604 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 11608 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 11612 0x0a 0x06 0x11 0x98 ST r16, [p2] +.return_address + 11616 0xe0 0xc2 0xd0 0x00 0x01 0xf0 0xb2 0x28 0x10 0xba LDA r16, [p7]; MOVXM p1, #509008 + 11626 0x20 0xc6 0xd0 0x00 0x01 0xf1 0xb2 0x2a 0x10 0xba LDA r17, [p1]; MOVXM p3, #509012 + 11636 0xea 0xcb 0x50 0x00 0x01 0xf0 0xb2 0x2e 0x10 0xba LDA.u16 r18, [p7, #10]; MOVXM p1, #509020 + 11646 0x00 0x00 NOPX + 11648 0x00 0x00 NOPX + 11650 0x00 0x00 NOPX + 11652 0x00 0x16 0xd8 0x00 0x00 0x84 J #11696 +.delay_slot + 11658 0x00 0x07 0xc4 0xc8 0xc0 0x44 MOVXM p2, #509024 +.delay_slot + 11664 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 11668 0x0a 0x06 0x51 0x98 ST r18, [p2] +.delay_slot + 11672 0x0b 0x06 0x11 0x98 ST r16, [p3] +.delay_slot + 11676 0x09 0x06 0x11 0x98 ST r16, [p1] +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 + 11680 0x00 0x07 0xc6 0xc8 0xa8 0x44 MOVXM p3, #509012 + 11686 0x00 0x2c 0xf0 0x00 0x01 0xf0 0xb2 0x2e 0x10 0xba NOPA; MOVXM p1, #509020 +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 + 11696 0x18 0x67 0x86 0x18 ADD.NC p0, r15, #12 + 11700 0x1f 0xee 0xd0 0x00 0x01 0xf1 0x32 0x20 0x10 0xba LDA r27, [p0], #-4; MOVXM p2, #508992 + 11710 0x00 0xfe 0x16 0x98 LDA r16, [p0], #-4 + 11714 0x00 0xfe 0x36 0x98 LDA r17, [p0], #-4 + 11718 0x02 0x06 0x56 0x98 LDA r18, [p2] + 11722 0x00 0x46 0x76 0x98 LDA r19, [p0, #16] + 11726 0x00 0x00 NOPX + 11728 0x00 0x00 NOPX + 11730 0x00 0x00 NOPX + 11732 0x00 0x00 NOPX + 11734 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 11738 0x00 0xc2 0x39 0x40 0x0e 0x5c ST r16, [p0]; ADD r16, r18, #1 + 11744 0x0a 0x06 0x11 0x98 ST r16, [p2] + 11748 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 11752 0x00 0x00 NOPX + 11754 0x00 0x00 NOPX + 11756 0x00 0x00 NOPX + 11758 0x14 0xd3 0x08 0x18 ACQ r19, r16 + 11762 0x1a 0x67 0x06 0x18 ADD.NC p2, r14, #12 + 11766 0x00 0x00 NOPX + 11768 0x00 0x00 NOPX + 11770 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4 + 11774 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 + 11778 0x02 0xfe 0x56 0x98 LDA r18, [p2], #-4 + 11782 0x02 0x56 0x76 0x98 LDA r19, [p2, #20] + 11786 0x00 0x00 NOPX + 11788 0x00 0x00 NOPX + 11790 0x00 0x00 NOPX + 11792 0x00 0x00 NOPX + 11794 0x00 0x00 NOPX + 11796 0x14 0xa3 0x12 0x18 SEL.EQZ r17, r18, r17, r27 + 11800 0x0a 0x06 0x31 0x98 ST r17, [p2] + 11804 0x00 0x00 NOPX + 11806 0x00 0x00 NOPX + 11808 0x00 0x00 NOPX + 11810 0x00 0x00 NOPX + 11812 0x14 0xd3 0x08 0x18 ACQ r19, r16 + 11816 0xd1 0x11 0x60 0x01 0x00 0x29 0xce 0x60 0x79 0x3a MOVS p6, p2; MOVX r16, #1; MOV r14, p6 + 11826 0x00 0x00 NOPX + 11828 0x00 0x00 NOPX + 11830 0x07 0xee 0x19 0x18 LDA p4, [sp, #-20] + 11834 0x60 0xc6 0xdf 0xd8 0x3b 0x0c LDA r17, [p3]; ST p0, [sp, #-20] + 11840 0x20 0xd2 0xd6 0xdd 0x81 0xd4 LDA r20, [p1]; MOV p3, p7 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 11846 0x02 0x4e 0x56 0x98 LDA r18, [p2], #16 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 11850 0x00 0x5d 0x1e 0x98 LDA p2, [p0], #20 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11854 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11858 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11860 0x04 0x06 0x76 0x98 LDA r19, [p4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11864 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 11866 0x00 0x15 0xd8 0x00 0x01 0x04 JL #11184 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11872 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 +.delay_slot + 11876 0x14 0x63 0x0d 0x98 LSHL r17, r17, r16 +.delay_slot + 11880 0x15 0x21 0x0d 0x98 LSHL r16, r20, r16 +.delay_slot + 11884 0x19 0x69 0x41 0x58 ADD.NC p1, r18, r16 +.delay_slot + 11888 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xe2 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV +.return_address + 11904 0xc8 0xc6 0xd0 0x01 0x00 0x28 0xb3 0xd0 0x78 0xba LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 + 11914 0x00 0x07 0xcc 0xc8 0xc0 0x44 MOVXM p6, #509024 + 11920 0x00 0x00 NOPX + 11922 0x00 0x00 NOPX + 11924 0x00 0x00 NOPX + 11926 0x00 0x00 NOPX + 11928 0x00 0x00 NOPX + 11930 0x14 0x51 0x08 0x18 REL r17, r16 + 11934 0x01 0xf6 0x36 0x98 LDA r17, [p1, #-4] + 11938 0x07 0xed 0x19 0x18 LDA p2, [sp, #-20] + 11942 0x00 0x00 NOPX + 11944 0x00 0x00 NOPX + 11946 0x00 0x00 NOPX + 11948 0x00 0x00 NOPX + 11950 0x00 0x00 NOPX + 11952 0x14 0x23 0x11 0x98 SUB r17, r16, r17 + 11956 0x4a 0xc6 0xd3 0xec 0x63 0x0c LDA r17, [p2, #20]; ST r17, [p1, #-4] + 11962 0x00 0x00 NOPX + 11964 0x00 0x00 NOPX + 11966 0x00 0x00 NOPX + 11968 0x00 0x00 NOPX + 11970 0x00 0x00 NOPX + 11972 0x00 0x00 NOPX + 11974 0x14 0x51 0x08 0x18 REL r17, r16 + 11978 0xfc 0xce 0xd0 0x00 0x01 0xf0 0xb2 0x20 0x10 0xba LDA r19, [p7, #-8]; MOVXM p1, #508992 + 11988 0x06 0x06 0x56 0x98 LDA r18, [p6] + 11992 0x01 0x06 0x36 0x98 LDA r17, [p1] + 11996 0x00 0x00 NOPX + 11998 0x00 0x00 NOPX + 12000 0x00 0x00 NOPX + 12002 0x00 0x00 NOPX + 12004 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 12008 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 12012 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 12016 0x80 0x17 0x88 0x40 0x01 0x84 JNZ r16, #12048 +.delay_slot +.swstall delay_slot + 12022 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12024 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12026 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12028 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12030 0x00 0x00 NOPX + 12032 0x10 0x20 0x01 0x18 MOVX r16, #0 + 12036 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x83 0x08 0xc1 0x36 NOPA; NOPB; ST r16, [p1]; NOPX +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 + 12048 0x07 0xf0 0x39 0x18 LDA lr, [sp, #-16] + 12052 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] + 12056 0x07 0xf7 0x99 0x18 LDA p7, [sp, #-12] +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 12060 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.noswbrkpt + 12062 0x07 0xf9 0xd1 0x18 LDA r14, [sp, #-8] +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 12066 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 12068 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 12070 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12074 0x0e 0x8e 0x0b 0x18 MOVS p6, r14 +.delay_slot + 12078 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 12084 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12086 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12088 0x00 0x00 NOPX +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 + +.text_segment PM 12096 +.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh +.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_begin0 +.function_start + 12096 0x03 0x85 0xd0 0x00 0x01 0xf0 0xb3 0xc0 0x10 0xba LDA el0, [p0], #4; MOVXM p1, #509824 + 12106 0x03 0x81 0xd0 0x01 0x00 0x4b 0x08 0x00 0x58 0xba LDA eh0, [p0], #4; MOVX r16, #2; MOV r24, #0 + 12116 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 12122 0xfe 0xf3 0xb0 0x00 0x01 0xf3 0xb3 0xc0 0x11 0x3a ST p7, [sp, #-12]; MOVXM p7, #509824 + 12132 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] + 12136 0x0f 0xf9 0xf5 0x98 ST r15, [sp, #-8] + 12140 0x00 0x00 NOPX + 12142 0x09 0x1c 0x29 0x98 ST el0, [p1], #4 + 12146 0x09 0x1c 0x09 0x98 ST eh0, [p1], #4 + 12150 0x00 0x04 0x2e 0x98 LDA el0, [p0] + 12154 0x00 0x14 0x0e 0x98 LDA eh0, [p0, #4] + 12158 0x00 0x00 NOPX + 12160 0x00 0x00 NOPX + 12162 0x00 0x00 NOPX + 12164 0x00 0x00 NOPX + 12166 0x00 0x00 NOPX + 12168 0x09 0x04 0x29 0x98 ST el0, [p1] + 12172 0x09 0x14 0x09 0x98 ST eh0, [p1, #4] + 12176 0x07 0x5e 0x2a 0x98 LDA.u8 r17, [p7], #5 + 12180 0x07 0xee 0x4a 0x98 LDA.u8 r18, [p7], #-2 + 12184 0x07 0xec 0x2a 0x98 LDA.u8 r1, [p7], #-2 + 12188 0x00 0x00 NOPX + 12190 0x00 0x00 NOPX + 12192 0x00 0x00 NOPX + 12194 0x00 0x00 NOPX +.no_stack_arguments + 12196 0x00 0x1b 0xc8 0x00 0x01 0x04 JL #14224 +.delay_slot + 12202 0xfc 0xca 0xb8 0xbe 0x43 0x5c ST r18, [sp, #-28]; SUB r15, r17, r18 +.delay_slot + 12208 0xfd 0x86 0xb0 0xc2 0x11 0x5c ST r1, [sp, #-20]; NE r16, r1, r16 +.delay_slot + 12214 0xfe 0x42 0xb7 0xef 0x15 0x5c ST r16, [sp, #-16]; LT r27, r15, r24 +.delay_slot + 12220 0x16 0x22 0xf1 0x98 SUB r17, r24, r15 +.delay_slot + 12224 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x1e 0x08 0x90 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SEL.EQZ r0, r15, r17, r27; NOPM; NOPV +.return_address + 12240 0xfd 0xd2 0x20 0x40 0x02 0x2c LDA r20, [sp, #-20]; MOVX r16, #0 + 12246 0xe7 0xc5 0x58 0x48 0x43 0x2c LDA.u8 r17, [p7], #3; SUB r18, r16, r2 + 12252 0x07 0xee 0x6a 0x98 LDA.u8 r19, [p7], #-2 + 12256 0x07 0xec 0x31 0x18 LDA r1, [sp, #-20] + 12260 0x00 0x00 NOPX + 12262 0x00 0x00 NOPX + 12264 0x00 0x00 NOPX + 12266 0x13 0xe9 0x46 0x98 XOR r20, r15, r20 + 12270 0x15 0x37 0x0a 0x98 LT r27, r20, r16 + 12274 0xfd 0x4e 0xb8 0xc6 0x63 0x5c ST r19, [sp, #-24]; SUB r17, r17, r19 +.no_stack_arguments + 12280 0xfc 0x46 0xb0 0x00 0x06 0xf2 0x00 0x00 0x41 0x3a ST r17, [sp, #-32]; JL #14224 +.delay_slot + 12290 0x10 0xa9 0x22 0x18 SEL.EQZ r20, r2, r18, r27 +.delay_slot + 12294 0x14 0x77 0x0a 0x98 LT r27, r17, r16 +.delay_slot + 12298 0x14 0x25 0x11 0x98 SUB r18, r16, r17 +.delay_slot + 12302 0x15 0x26 0x70 0x18 EXTEND.s16 r19, r20 +.delay_slot + 12306 0x00 0x2c 0xf0 0x00 0x24 0x41 0x22 0x3d 0x98 0x09 0x00 0x2b 0x60 0x7e NOPA; NOPB; NOPS; SEL.EQZ r0, r17, r18, r27; ADD.NC r15, r19, #1 +.return_address + 12320 0xfc 0x0e 0x20 0x3f 0x37 0xc8 0x00 0x42 0x58 0xba LDA r3, [sp, #-32]; MOVX r19, #-2; MOV m0, #66 + 12330 0xfd 0xc2 0x20 0x01 0x80 0x08 0x29 0xfc 0x58 0xba LDA r16, [sp, #-20]; MOVX r24, #0; MOV r1, #508 + 12340 0xfc 0xda 0x20 0x00 0x60 0x88 0x88 0x02 0x58 0xba LDA r22, [sp, #-28]; MOVX r6, #4; MOV r4, #2 + 12350 0xe1 0x45 0x50 0x00 0x51 0x0b 0x88 0x17 0x58 0xba LDA.u8 r17, [p7], m0; MOVX r5, #8; MOV r28, #23 + 12360 0xfd 0x56 0x20 0x3f 0x27 0x48 0x80 0x20 0x58 0xba LDA r21, [sp, #-24]; MOVX r18, #-6; MOV m1, #32 + 12370 0xfe 0x7a 0x20 0x01 0x70 0xcb 0x48 0x01 0x58 0xba LDA r30, [sp, #-16]; MOVX r23, #6; MOV r26, #1 + 12380 0xe9 0xc0 0x80 0x05 0xd0 0x0b 0xef 0xc0 0x58 0xba MOVA m0, #-178; MOVX r29, #128; MOV r31, #-64 + 12390 0x16 0x28 0x21 0x98 SUB r20, r24, r2 + 12394 0x10 0xc7 0x06 0x98 XOR r3, r3, r16 + 12398 0x1e 0xf1 0x50 0x36 0x02 0x24 LT r27, r3, r24; ADD.NC r0, r22, #2 + 12404 0x15 0x28 0x4b 0x3f 0xf5 0x64 SEL.EQZ r20, r2, r20, r27; MOV r22, #-3 + 12410 0x78 0xe1 0xf1 0x20 0x1d 0x64 MUL r3, r15, r16; MOV r2, #7 + 12416 0x15 0x28 0x70 0x18 EXTEND.s16 r20, r20 + 12420 0x08 0x00 0x90 0xa0 0x01 0x24 AND r0, r1, r0; ADD.NC r1, r0, #1 + 12426 0x0c 0xe7 0xbd 0xb4 0x01 0x24 LSHL r19, r1, r19; ADD.NC r27, r20, #1 + 12432 0x7d 0x0d 0xb0 0xa3 0x02 0xa4 LSHL r20, r15, r6; ADD.NC r1, r3, r0 + 12438 0x09 0xcd 0xb0 0x35 0xff 0x24 LSHL r7, r1, r6; ADD.NC r0, r21, #-1 + 12444 0x16 0xcd 0x0f 0x98 MUL r6, r27, r16 + 12448 0x13 0xdf 0x1f 0x98 MUL r15, r15, r17 + 12452 0x9d 0x6b 0xf9 0xb3 0xff 0x24 MUL r21, r19, r21; ADD.NC r19, r19, #-1 + 12458 0x11 0x37 0x07 0x98 EQ r27, r4, r16 + 12462 0xff 0xd6 0x37 0x90 0xdf 0x5c ST r21, [p7], #-4; MUL r4, r15, r6 + 12468 0x17 0x38 0x52 0x18 SEL.EQZ r28, r28, r5, r27 + 12472 0x11 0x25 0x2d 0x98 LSHL r18, r4, r18 + 12476 0xe5 0x4a 0x38 0xc8 0x3f 0x5c ST r18, [p7], m1; MUL r18, r17, r1 + 12482 0xf9 0xf2 0x3f 0x72 0xfb 0x5c ST r28, [p7], #-16; LSHL r28, r30, r23 + 12488 0xed 0xf2 0x39 0x70 0x1f 0x5c ST r28, [p7], #24; MUL r28, r18, r0 + 12494 0xe3 0xce 0x39 0xce 0xfb 0x5c ST r19, [p7], #4; LSHL r19, r19, r23 + 12500 0xe7 0x35 0xb9 0xb3 0xea 0xa4 LSHL r28, r28, r26; ADD.NC r19, r19, r29 + 12506 0xe3 0xfe 0x39 0x7b 0x5b 0x5c ST r31, [p7], #4; LSHL r30, r18, r26 + 12512 0x94 0x21 0xf9 0x33 0xe2 0xa4 MUL r16, r18, r16; ADD.NC r18, r19, r28 + 12518 0xe3 0x82 0x3f 0xf3 0x04 0x5c ST r0, [p7], #4; SEL.EQZ r28, r31, r24, r27 + 12524 0x10 0xff 0x6d 0x98 LSHL r31, r3, r22 + 12528 0xf0 0x66 0x39 0xbf 0xff 0x24 SUB r1, r30, r19; ADD.NC r19, r31, #-1 + 12534 0xe3 0x86 0x38 0xc6 0xdb 0x5c ST r1, [p7], #4; LSHL r17, r17, r22 + 12540 0xc5 0xa4 0x39 0x31 0xff 0x24 SUB r22, r24, r18; ADD.NC r18, r17, #-1 + 12546 0xe3 0xda 0x33 0xdb 0xc3 0x5c ST r22, [p7], #4; SUB r22, r7, r30 + 12552 0xe3 0xca 0x38 0x43 0x5b 0x5c ST r18, [p7], #4; LSHL r16, r16, r26 + 12558 0xe3 0x9e 0x39 0xfc 0x5b 0x5c ST r7, [p7], #4; LSHL r31, r19, r2 + 12564 0xe3 0xce 0x3e 0xda 0xc1 0x5c ST r19, [p7], #4; ADD r22, r29, r22 + 12570 0x3c 0x20 0x1e 0xbf 0xf2 0xa4 ADD r16, r7, r16; ADD.NC r29, r31, r30 + 12576 0xe3 0xda 0x38 0x43 0xa3 0x5c ST r22, [p7], #4; SUB r16, r16, r29 + 12582 0xe3 0xc2 0x30 0x1f 0x6d 0x6e 0x0f 0xff 0x59 0x3a ST r16, [p7], #4; LSHL r22, r15, r26; MOV r16, #-1 + 12592 0xe3 0xca 0x3e 0x6a 0x81 0x5c ST r18, [p7], #4; ADD r26, r28, r20 + 12598 0xe3 0xea 0x3a 0x52 0xc3 0x5c ST r26, [p7], #4; SUB r20, r20, r22 + 12604 0x08 0x11 0x07 0x1e 0x71 0xab 0x08 0xb2 0x6d 0x10 0x08 0x76 MOVA r17, #64; ST r19, [p7], #4; MAC r16, r16, r21, r17; ADD.NC r19, r20, #64 + 12616 0x0f 0x1e 0x71 0x98 ST r19, [p7], #4 + 12620 0xe3 0xc6 0x38 0x52 0xfb 0x5c ST r17, [p7], #4; LSHL r20, r16, r23 + 12626 0xe3 0xc2 0x3c 0x42 0x83 0x5c ST r16, [p7], #4; SUB r16, r24, r20 + 12632 0xe3 0xc6 0x39 0x52 0xfb 0x5c ST r17, [p7], #4; LSHL r20, r18, r23 + 12638 0xe3 0xc2 0x3c 0x42 0x83 0x5c ST r16, [p7], #4; SUB r16, r24, r20 + 12644 0x0f 0x1e 0x51 0x98 ST r18, [p7], #4 + 12648 0x0f 0x1e 0x31 0x98 ST r17, [p7], #4 + 12652 0x0f 0x0a 0x11 0x98 ST r16, [p7], m0 + 12656 0x07 0x06 0x0a 0x98 LDA.u8 r16, [p7] + 12660 0x00 0x00 NOPX + 12662 0x00 0x00 NOPX + 12664 0x00 0x00 NOPX + 12666 0x00 0x00 NOPX + 12668 0x00 0x00 NOPX + 12670 0x00 0x00 NOPX + 12672 0x80 0x18 0xd0 0x00 0x01 0x84 JZ r16, #12704 +.delay_slot + 12678 0x19 0x3b 0x60 0xf8 MOV vaddSign0, crMCDEn +.delay_slot + 12682 0xff 0x7f 0x09 0xa0 0x00 0x44 MOVXM r19, #-8454144 +.delay_slot +.swstall delay_slot + 12688 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12690 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12692 0x00 0x00 NOPX + 12694 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x00 0x26 0x01 0x7a NOPA; NOPS; MOVX r19, #0 +.label TGT_F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_608 + 12704 0xff 0x87 0x20 0x00 0x01 0xf0 0x32 0x34 0x10 0xba LDA lr, [sp, #-4]; MOVXM p0, #509032 + 12714 0x00 0xc0 0x50 0x04 0xe2 0xd4 LDA.s8 r16, [p0]; VINSERT.32 x0, x0, #0, r19 + 12720 0xfe 0x83 0x21 0x02 0xe9 0x54 LDA p0, [sp, #-12]; MOV dj0, #186 + 12726 0xff 0x3e 0x20 0x01 0x25 0xd4 LDA r15, [sp, #-8]; VMOV bmll0, x0 + 12732 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 + 12738 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 12740 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 12742 0x07 0x02 0x17 0x18 ST.s16 r16, [p7, dj0] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12746 0x05 0x00 0x0f 0x70 0x41 0xe4 RET lr; MOV crRnd, r16 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12752 0x08 0x40 0x16 0x18 VCONV.bf16.fp32 wl0, bmll0 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12756 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12760 0x1c 0x01 0x01 0xb8 VEXTRACT.16 r16, x0, #0, vaddSign0 +.delay_slot +.swstall delay_slot + 12764 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12766 0x00 0x00 NOPX +.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh__end +.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_end0 +.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_begin0 +.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params +.function_start + 12768 0x1c 0x56 0xc0 0xf8 MOV r17, p3 + 12772 0x20 0x93 0xde 0x01 0xa9 0x54 LDA p1, [p1]; MOV m7, #106 + 12778 0x00 0x83 0xd6 0xd1 0x02 0x14 LDA p0, [p0]; ADD.NC p3, r17, #2 + 12784 0x03 0xe8 0x8a 0x98 LDA.u8 r4, [p3], m7 + 12788 0x03 0xfd 0x46 0x98 LDA dj2, [p3], #-4 + 12792 0x03 0x3d 0x26 0x98 LDA dn2, [p3], #12 + 12796 0x03 0xff 0x46 0x98 LDA dj6, [p3], #-4 + 12800 0x03 0x2f 0x26 0x98 LDA dn6, [p3], #8 + 12804 0x03 0x2d 0x06 0x98 LDA m2, [p3], #8 + 12808 0x03 0xfc 0x46 0x98 LDA dj0, [p3], #-4 + 12812 0x03 0x3c 0x26 0x98 LDA dn0, [p3], #12 + 12816 0x03 0xfe 0x46 0x98 LDA dj4, [p3], #-4 + 12820 0x03 0x2e 0x26 0x98 LDA dn4, [p3], #8 + 12824 0x03 0x2c 0x06 0x98 LDA m0, [p3], #8 + 12828 0x03 0xfc 0xc6 0x98 LDA dj1, [p3], #-4 + 12832 0x03 0x3c 0xa6 0x98 LDA dn1, [p3], #12 + 12836 0x03 0xfe 0xc6 0x98 LDA dj5, [p3], #-4 + 12840 0x03 0x2e 0xa6 0x98 LDA dn5, [p3], #8 + 12844 0x03 0x2c 0x86 0x98 LDA m1, [p3], #8 + 12848 0x03 0xff 0xc6 0x98 LDA dj7, [p3], #-4 + 12852 0x03 0x2f 0xa6 0x98 LDA dn7, [p3], #8 + 12856 0x65 0xf0 0xd0 0x00 0x01 0xf2 0x32 0x34 0x10 0xba LDA m7, [p3], #8; MOVXM p4, #509032 + 12866 0x80 0x98 0x58 0xc5 0x81 0xd4 LDA.s8 r6, [p4]; MOV p4, p1 + 12872 0x1b 0x0f 0x10 0xb8 MOV m3, #-120 + 12876 0x80 0x85 0x70 0x3b 0x68 0x00 0x20 0x6a 0x60 0x00 0x58 0xb6 VLDA.CONV.fp32.bf16 cml0, [p4];VLDB x6, [p0], #64; MOVX r2, #3; MOV dc4, #0 + 12888 0x7f 0xb8 0xd0 0x38 0xe9 0x04 0x2d 0xe0 0x10 0x0b 0x62 0x09 0x60 0x7e LDA dj3, [p3], #-4; VLDB x1, [p0], #64; MOVS dc3, dc4; LSHL r2, r4, r2; MOV m6, #128 + 12902 0x65 0xb4 0xd1 0x0c 0x4b 0x02 0x80 0x90 0x72 0xba LDA dn3, [p3], #8; MOVS dc1, dc3; MOV m5, r2 + 12912 0x6d 0x30 0xd1 0xab 0x90 0x03 0xe1 0xc0 0x7e 0xba LDA m3, [p3], m3; PADDB [p1], m5; MOV dc7, dc1 + 12922 0x79 0x0a 0xd1 0xf0 0xf4 0x02 0x07 0x90 0x5e 0xba LDA r2, [p3], m6; VLDB.2D x3, [p1], d7; MOV m4, #-112 + 12932 0x71 0x1e 0x50 0x00 0x82 0x2c LDA.s16 r7, [p3], m4; MOVX r0, #16 + 12938 0x69 0xc0 0xd6 0x10 0x4b 0x00 0x00 0x0c 0x79 0x78 0x10 0x76 LDA m4, [p3], #16; MOVS dc6, dc4; MOVXM ls, #13040 + 12950 0x72 0x92 0xd2 0x10 0x4b 0x00 0x00 0x0d 0xb9 0xa8 0x10 0x76 LDA r4, [p3, #-28]; MOVS dc2, dc4; MOVXM le, #13136 + 12962 0x0b 0x16 0x84 0x61 0x05 0xb4 VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r0 + 12968 0x1b 0x00 0x8a 0xf8 VMOV cml3, cml0 + 12972 0x60 0x96 0xd0 0x00 0x00 0x0d 0xb1 0xc8 0x10 0xba LDA r5, [p3]; MOVXM p3, #13200 + 12982 0x00 0x2c 0xf0 0x00 0x14 0x0a 0x8e 0x01 0xa8 0xba NOPA; MOVX r1, #32; VEXTBCST.128 x10, x3, #0 + 12992 0x07 0x91 0x00 0x00 0x20 0x01 0x5b 0x00 0x36 0x08 0x0e 0xb9 0x78 0x00 0x00 0xe1 MOVA r17, #60; NOPB; NOPS; MOVX r3, #48; VBCST.16 x0, r7; NOPV + 13008 0x00 0x2c 0xf0 0x00 0x20 0x10 0x4b 0x0d 0xd4 0x02 0x0e 0x03 0xac 0x63 0x6a 0x0b NOPA; NOPB; MOVS dc0, dc4; MOVX crRnd, r6; VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r17 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 13024 0x40 0xa3 0xd0 0x00 0x25 0x10 0x4b 0x04 0x2f 0xda 0xb9 0x3f 0xcc 0x48 0x1a 0x0b LDA p2, [p2]; NOPB; MOVS dc5, dc4; ADD r2, r2, #-2; ADD.NC lc, r4, #-1; VMAC.f dm1, dm0, x1, x10, r17 +.label ZLS_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_272 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt + 13040 0x03 0xb3 0x71 0xf0 0xf4 0x02 0x84 0x81 0x6e 0xba VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13050 0x00 0x38 0xea 0x9c 0x0b 0x46 0x8a 0x89 0x01 0x4a VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13060 0x00 0xb1 0x6a 0x30 0x86 0xc6 0x89 0x35 0x01 0x4a VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13070 0x1d 0x89 0x06 0xd8 VSHIFT x11, x1, x2, r1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13074 0x04 0xb0 0x8e 0xc6 0x8c 0x48 0xa1 0x62 VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r17 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13082 0x03 0x9c 0x0f 0x46 0x8a 0x36 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r17 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13090 0x1d 0x89 0x0e 0xd8 VSHIFT x11, x1, x2, r3 + 13094 0x05 0x1c 0x03 0x46 0x8b 0x92 0xe1 0x62 VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r17 + 13102 0x04 0x1c 0x07 0x46 0x88 0x56 0xe1 0x62 VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r17 + 13110 0x00 0x2c 0xf0 0x00 0x10 0x01 0x18 0x41 0x6e 0xba NOPA; NOPB; VSHIFT x4, x6, x1, r0 + 13120 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7c 0x63 0x6a 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm4, dm3, x6, x10, r17 +.label ZLE_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_368 +.end_of_loop +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 13136 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7c 0x48 0x1a 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r17 +.loop_nesting 0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13152 0x03 0x0c 0xf4 0x73 0x90 0x02 0x84 0x81 0x6e 0xba PADDA.3D [p0], d0; PADDB.2D [p4], d3; VSHIFT x10, x1, x2, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13162 0x02 0x9c 0x0b 0x46 0x8a 0x89 0x01 0x62 VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13170 0x02 0x30 0x86 0xc6 0x89 0x35 0x01 0x62 VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13178 0x1d 0x89 0x06 0xd8 VSHIFT x11, x1, x2, r1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13182 0x03 0x9c 0x0f 0x46 0x8c 0x48 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13190 0x00 0x2c 0xf4 0xb0 0x8e 0xc2 0x8a 0x36 0xa1 0x4a NOPA; VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r17 +.label TGT_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_432 +.loop_nesting 1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13200 0x3e 0x1e 0x8b 0x12 0x1d 0xb4 VLDB.2D x3, [p1], d7; VSHIFT x11, x1, x2, r3 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13206 0x00 0x00 0x01 0xb7 0x44 0x02 0x8b 0x92 0xe1 0x5a MOVXM le, #13376; VMAC.f dm3, dm4, x9, x7, r17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13216 0x80 0x85 0x70 0x00 0x01 0x8f 0x3f 0x02 0x88 0x56 0xe1 0x46 VLDA.CONV.fp32.bf16 cml0, [p4]; MOVXM ls, #13296; VMAC.f dm0, dm2, x11, x7, r17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13228 0x1d 0x72 0x7f 0x98 ADD.NC lc, r4, #-1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13232 0x00 0x1d 0x9b 0x98 VLDA x6, [p0], #64 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13236 0x38 0x1c 0x74 0x18 VLDB x1, [p0], #64 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13240 0x38 0x58 0xb4 0x18 VLDB.3D x2, [p0], d2 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13244 0x0d 0x11 0x96 0x18 VCONV.bf16.fp32 x10, cml3 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13248 0x0b 0x10 0x16 0x18 VCONV.bf16.fp32 x6, cml0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13252 0x1c 0x50 0x6c 0xf8 VMAX_LT.bf16 x8, r16, x10, x0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13256 0x1d 0x53 0x14 0x78 VSHUFFLE x10, x10, x6, r5 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13260 0x51 0x42 0x60 0x02 0xa8 0x36 0x70 0x02 VST x8, [p2], m4; VMAX_LT.bf16 x10, r16, x10, x0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13268 0x1d 0x1c 0x03 0x58 VEXTBCST.128 x10, x3, #0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13272 0x47 0x52 0x60 0x01 0x80 0x45 0x70 0x02 VST.3D x10, [p2], d1; VMOV cml3, cml0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13280 0x04 0x1c 0x07 0x46 0x8c 0x6d 0x41 0x62 VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13288 0x02 0x30 0x82 0xc6 0x89 0x03 0x41 0x62 VSHIFT x4, x6, x1, r0; VMAC.f dm1, dm0, x1, x10, r17 +.label ZLS_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_528 +.loop_nesting 2 +.begin_of_loop +.aggressive_scheduled_block_id 2 +.noswbrkpt + 13296 0x03 0xb3 0x71 0xf0 0xf4 0x02 0x84 0x81 0x6e 0xba VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13306 0x00 0x38 0xea 0x9c 0x0b 0x46 0x8a 0x89 0x01 0x4a VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13316 0x00 0xb1 0x6a 0x30 0x86 0xc6 0x89 0x35 0x01 0x4a VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13326 0x1d 0x89 0x06 0xd8 VSHIFT x11, x1, x2, r1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13330 0x04 0xb0 0x8e 0xc6 0x8c 0x48 0xa1 0x62 VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13338 0x03 0x9c 0x0f 0x46 0x8a 0x36 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r17 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13346 0x1d 0x89 0x0e 0xd8 VSHIFT x11, x1, x2, r3 + 13350 0x05 0x1c 0x03 0x46 0x8b 0x92 0xe1 0x62 VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r17 + 13358 0x04 0x1c 0x07 0x46 0x88 0x56 0xe1 0x62 VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r17 + 13366 0x00 0x2c 0xf4 0x61 0x05 0x94 NOPA; VSHIFT x4, x6, x1, r0 + 13372 0x8c 0x6d 0x41 0x48 VMAC.f dm4, dm3, x6, x10, r17 +.label ZLE_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_608 +.end_of_loop +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 13376 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7c 0x48 0x1a 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r17 +.loop_nesting 1 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13392 0x03 0x0c 0xf8 0xe7 0x20 0x04 0x27 0x02 0x84 0x81 0x68 0xb6 PADDA.3D [p0], d0; PADDB.2D [p4], d3; JNZD r2, r2, p3; VSHIFT x10, x1, x2, r0 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13404 0x02 0x9c 0x0b 0x46 0x8a 0x89 0x01 0x62 VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13412 0x02 0x30 0x86 0xc6 0x89 0x35 0x01 0x62 VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13420 0x1d 0x89 0x06 0xd8 VSHIFT x11, x1, x2, r1 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13424 0x03 0x9c 0x0f 0x46 0x8c 0x48 0xa1 0x62 VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r17 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13432 0x04 0xb0 0x8e 0xc6 0x8a 0x36 0xa1 0x62 VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r17 +.loop_nesting 0 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13440 0x1d 0x89 0x0e 0xd8 VSHIFT x11, x1, x2, r3 + 13444 0x8b 0x92 0xe1 0x48 VMAC.f dm3, dm4, x9, x7, r17 + 13448 0x88 0x56 0xe1 0x48 VMAC.f dm0, dm2, x11, x7, r17 + 13452 0x00 0x00 NOPX + 13454 0x00 0x00 NOPX + 13456 0x00 0x00 NOPX + 13458 0x00 0x00 NOPX + 13460 0x0d 0x11 0x96 0x18 VCONV.bf16.fp32 x10, cml3 + 13464 0x62 0x02 0xc0 0x50 0x00 0x5c VCONV.bf16.fp32 x6, cml0; RET lr +.delay_slot + 13470 0x1c 0x50 0x6c 0xf8 VMAX_LT.bf16 x8, r16, x10, x0 +.delay_slot + 13474 0x1d 0x53 0x14 0x78 VSHUFFLE x10, x10, x6, r5 +.delay_slot + 13478 0x1d 0x50 0x6c 0xf8 VMAX_LT.bf16 x10, r16, x10, x0 +.delay_slot + 13482 0x0a 0x8a 0x13 0x18 VST x8, [p2], m4 +.delay_slot + 13486 0x0a 0x3a 0x93 0x18 VST.3D x10, [p2], d1 +.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params__end +.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_end0 + +.text_segment PM 13504 +.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function_start + 13504 0x00 0x07 0xc8 0xc8 0x80 0x44 MOVXM p4, #508992 + 13510 0x80 0xc2 0xd0 0x2f 0x41 0xd4 LDA r16, [p4]; MOV r0, r15 + 13516 0x00 0x10 0x00 0x00 0x01 0xc4 PADDXM [sp], #128 + 13522 0xff 0x3a 0xb0 0x02 0x2d 0x70 0x70 0x02 ST r14, [sp, #-8]; MOV r17, CORE_ID + 13530 0xff 0xb6 0xb0 0x01 0xa8 0xf0 0x70 0x02 ST r13, [sp, #-4]; MOV r13, lr + 13538 0x0f 0xec 0x1d 0x98 ST p0, [sp, #-20] + 13542 0x0f 0xf7 0x9d 0x98 ST p7, [sp, #-12] + 13546 0xfe 0x02 0xb0 0x01 0xca 0x60 0x70 0x02 ST r0, [sp, #-16]; MOV r14, p2 + 13554 0x80 0x1a 0xb8 0x40 0x01 0x84 JNZ r16, #13680 +.delay_slot + 13560 0x1b 0xd6 0xc0 0xf8 MOV r15, p3 +.delay_slot + 13564 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17 +.delay_slot + 13568 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 13572 0x00 0x07 0xc6 0xc8 0xa0 0x44 MOVXM p3, #509008 +.delay_slot + 13578 0x0b 0x06 0x31 0x98 ST r17, [p3] +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 13582 0xf0 0x91 0x60 0x00 0x01 0xf0 0xb2 0x34 0x11 0x3a MOVS p7, p1; MOVXM p1, #509032 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 13592 0x20 0xc0 0xe0 0x88 0x8b 0x00 0x01 0xf0 0xb2 0x32 0x10 0x76 ST.s8 r16, [p1]; MOVS p0, p2; MOVXM p1, #509028 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13604 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 13606 0x00 0x17 0xa0 0x00 0x01 0x04 JL #12096 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13612 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13614 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13616 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 13620 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 13624 0x20 0xc2 0x30 0x00 0x01 0xa5 0x70 0x02 ST r16, [p1]; NOPM +.return_address + 13632 0x33 0x91 0x60 0x01 0x33 0x82 0x00 0x02 MOVS p1, p7; ADD.NC p2, r14, #8 + 13640 0x02 0x06 0x3a 0x98 LDA.u16 r17, [p2] + 13644 0x44 0xc3 0x50 0x00 0x01 0xf1 0x32 0x30 0x10 0xba LDA.u16 r16, [p2, #4]; MOVXM p2, #509024 + 13654 0x00 0x00 NOPX + 13656 0x00 0x1a 0xc0 0x00 0x00 0x84 J #13696 +.delay_slot + 13662 0x00 0x07 0xc6 0xc8 0xb0 0x44 MOVXM p3, #509016 +.delay_slot +.swstall delay_slot + 13668 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13670 0x00 0x00 NOPX +.delay_slot + 13672 0x0b 0x06 0x31 0x98 ST r17, [p3] +.delay_slot + 13676 0x0a 0x06 0x11 0x98 ST r16, [p2] +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_176 + 13680 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x01 0xf1 0xb2 0x2c 0x10 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVXM p3, #509016; NOPV +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_192 + 13696 0x1a 0x67 0x86 0x18 ADD.NC p2, r15, #12 + 13700 0x5f 0xee 0xd0 0x00 0x01 0xf2 0x32 0x28 0x10 0xba LDA r27, [p2], #-4; MOVXM p4, #509008 + 13710 0x02 0xfe 0x16 0x98 LDA r16, [p2], #-4 + 13714 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 + 13718 0x02 0x46 0x56 0x98 LDA r18, [p2, #16] + 13722 0x00 0x00 NOPX + 13724 0x00 0x00 NOPX + 13726 0x00 0x00 NOPX + 13728 0x00 0x00 NOPX + 13730 0x00 0x00 NOPX + 13732 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 13736 0x0a 0x06 0x11 0x98 ST r16, [p2] + 13740 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 13744 0x00 0x00 NOPX + 13746 0x00 0x00 NOPX + 13748 0x00 0x00 NOPX + 13750 0x14 0x93 0x08 0x18 ACQ r18, r16 + 13754 0x00 0x2f 0x00 0x00 0x01 0xf3 0xb2 0x20 0x10 0xba MOVA r15, #1; MOVXM p7, #508992 + 13764 0x06 0x00 0x28 0x2b 0xc1 0xe4 MOVX r24, #0; MOV r16, sp + 13770 0x18 0x68 0x5a 0x18 ADD.NC p0, r16, #-76 + 13774 0xfd 0xd3 0x27 0x29 0x81 0xd4 LDA p5, [sp, #-20]; MOV r14, p2 + 13780 0x04 0x06 0x36 0x98 LDA r17, [p4] + 13784 0x60 0xc2 0xd0 0x00 0x01 0xf1 0xb3 0xc0 0x10 0xba LDA r16, [p3]; MOVXM p3, #509824 + 13794 0x07 0x06 0x56 0x98 LDA r18, [p7] + 13798 0x00 0x00 NOPX + 13800 0x00 0x00 NOPX + 13802 0x00 0x00 NOPX + 13804 0x05 0x06 0x76 0x98 LDA r19, [p5] + 13808 0x00 0x00 NOPX + 13810 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 + 13814 0x14 0xa2 0x07 0x18 ADD r17, r18, #1 + 13818 0x14 0x20 0xfd 0x98 LSHL r16, r16, r15 +.no_stack_arguments + 13822 0x00 0x18 0xf0 0x00 0x01 0x04 JL #12768 +.delay_slot + 13828 0x0f 0x06 0x31 0x98 ST r17, [p7] +.delay_slot + 13832 0x18 0x49 0xc1 0x58 ADD.NC dn0, r19, r16 +.delay_slot + 13836 0x0f 0xb4 0x25 0x98 ST dn0, [sp, #-76] +.delay_slot + 13840 0x0f 0xbb 0x15 0x98 ST r24, [sp, #-72] +.delay_slot + 13844 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x03 0xdf 0x8a 0xc1 0x36 NOPA; NOPB; ST r24, [sp, #-68]; NOPX +.return_address + 13856 0x1a 0x67 0x0a 0x18 ADD.NC p2, r14, #20 + 13860 0x02 0x06 0x16 0x98 LDA r16, [p2] + 13864 0x00 0x00 NOPX + 13866 0x00 0x00 NOPX + 13868 0x00 0x00 NOPX + 13870 0x00 0x00 NOPX + 13872 0x00 0x00 NOPX + 13874 0x00 0x00 NOPX + 13876 0x14 0x10 0xf8 0x18 REL r16, r15 + 13880 0x5c 0xc2 0xd0 0x00 0x01 0xf0 0xb2 0x30 0x10 0xba LDA r16, [p2, #-8]; MOVXM p1, #509024 + 13890 0x01 0x06 0x56 0x98 LDA r18, [p1] + 13894 0x07 0x06 0x36 0x98 LDA r17, [p7] + 13898 0x07 0xf4 0x99 0x18 LDA p1, [sp, #-12] + 13902 0x07 0xf9 0xd1 0x18 LDA r14, [sp, #-8] + 13906 0x00 0x00 NOPX + 13908 0x00 0x00 NOPX + 13910 0x13 0xe1 0x01 0x98 SUB r16, r15, r16 + 13914 0x0a 0xe6 0x11 0x98 ST r16, [p2, #-8] + 13918 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 13922 0x80 0x1b 0x40 0x40 0x01 0x84 JNZ r16, #13952 +.delay_slot + 13928 0x10 0x30 0x01 0x18 MOVX r24, #0 +.delay_slot +.swstall delay_slot + 13932 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13934 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13936 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13938 0x00 0x00 NOPX + 13940 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x03 0x83 0x88 0xc1 0x36 NOPA; NOPB; ST r24, [p7]; NOPX +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 + 13952 0xff 0xb6 0x2e 0xed 0x41 0xd4 LDA r13, [sp, #-4]; MOV lr, r13 + 13958 0x07 0xf1 0xf1 0x18 LDA r15, [sp, #-16] + 13962 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 13966 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128 +.delay_slot +.swstall delay_slot + 13972 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13974 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13976 0x00 0x00 NOPX +.delay_slot + 13978 0x1f 0x62 0xc0 0xf8 MOV p7, p1 +.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + +.text_segment PM 13984 +.label __Z13_b896_wrapperPPv___func_begin0 +.label _Z13_b896_wrapperPPv +.function_start + 13984 0x1a 0x60 0xc0 0xf8 MOV p2, p0 + 13988 0x02 0x1c 0x1e 0x98 LDA p0, [p2], #4 + 13992 0x02 0x2c 0x9e 0x98 LDA p1, [p2], #8 + 13996 0x02 0xf5 0x9e 0x98 LDA p3, [p2, #-4] + 14000 0x02 0x05 0x1e 0x98 LDA p2, [p2] +.tail_call + 14004 0x00 0x0d 0x70 0x00 0x00 0x84 J #6880 +.delay_slot +.swstall delay_slot + 14010 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14012 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14014 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14016 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14018 0x00 0x00 NOPX +.label _Z13_b896_wrapperPPv__end +.label __Z13_b896_wrapperPPv___func_end0 + +.text_segment PM 14032 +.label __Z13_b901_wrapperPPv___func_begin0 +.label _Z13_b901_wrapperPPv +.function_start + 14032 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 14036 0x01 0x2c 0x1e 0x98 LDA p0, [p1], #8 + 14040 0x01 0xf5 0x1e 0x98 LDA p2, [p1, #-4] + 14044 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 14048 0x00 0x10 0x18 0x00 0x00 0x84 J #8240 +.delay_slot +.swstall delay_slot + 14054 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14056 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14058 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14060 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14062 0x00 0x00 NOPX +.label _Z13_b901_wrapperPPv__end +.label __Z13_b901_wrapperPPv___func_end0 +.label __Z13_b906_wrapperPPv___func_begin0 +.label _Z13_b906_wrapperPPv +.function_start + 14064 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 14068 0x01 0x2c 0x1e 0x98 LDA p0, [p1], #8 + 14072 0x01 0xf5 0x1e 0x98 LDA p2, [p1, #-4] + 14076 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 14080 0x00 0x11 0xc8 0x00 0x00 0x84 J #9104 +.delay_slot +.swstall delay_slot + 14086 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14088 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14090 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14092 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14094 0x00 0x00 NOPX +.label _Z13_b906_wrapperPPv__end +.label __Z13_b906_wrapperPPv___func_end0 +.label __Z13_b881_wrapperPPv___func_begin0 +.label _Z13_b881_wrapperPPv +.function_start + 14096 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 14100 0x01 0x2c 0x1e 0x98 LDA p0, [p1], #8 + 14104 0x01 0xf5 0x1e 0x98 LDA p2, [p1, #-4] + 14108 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 14112 0x00 0x14 0x88 0x00 0x00 0x84 J #10512 +.delay_slot +.swstall delay_slot + 14118 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14120 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14122 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14124 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14126 0x00 0x00 NOPX +.label _Z13_b881_wrapperPPv__end +.label __Z13_b881_wrapperPPv___func_end0 +.label __Z13_b891_wrapperPPv___func_begin0 +.label _Z13_b891_wrapperPPv +.function_start + 14128 0x1a 0x60 0xc0 0xf8 MOV p2, p0 + 14132 0x02 0x3c 0x1e 0x98 LDA p0, [p2], #12 + 14136 0x02 0xec 0x9e 0x98 LDA p1, [p2], #-8 + 14140 0x02 0x15 0x9e 0x98 LDA p3, [p2, #4] + 14144 0x02 0x05 0x1e 0x98 LDA p2, [p2] +.tail_call + 14148 0x00 0x16 0x70 0x00 0x00 0x84 J #11488 +.delay_slot +.swstall delay_slot + 14154 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14156 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14158 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14160 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14162 0x00 0x00 NOPX +.label _Z13_b891_wrapperPPv__end +.label __Z13_b891_wrapperPPv___func_end0 + +.text_segment PM 14176 +.label __Z13_b919_wrapperPPv___func_begin0 +.label _Z13_b919_wrapperPPv +.function_start + 14176 0x1a 0x60 0xc0 0xf8 MOV p2, p0 + 14180 0x02 0x1c 0x1e 0x98 LDA p0, [p2], #4 + 14184 0x02 0x2c 0x9e 0x98 LDA p1, [p2], #8 + 14188 0x02 0xf5 0x9e 0x98 LDA p3, [p2, #-4] + 14192 0x02 0x05 0x1e 0x98 LDA p2, [p2] +.tail_call + 14196 0x00 0x1a 0x60 0x00 0x00 0x84 J #13504 +.delay_slot +.swstall delay_slot + 14202 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14204 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14206 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14208 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14210 0x00 0x00 NOPX +.label _Z13_b919_wrapperPPv__end +.label __Z13_b919_wrapperPPv___func_end0 + +.text_segment PM 14224 +.label _ZN12me_primitive10udiv_dstepEjjRjS0_ +.function_start + 14224 0x00 0xc0 0x2f 0xa0 0x41 0xe4 MOVX r3, #0; MOV r31, r0 + 14230 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 14234 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 14238 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 14242 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 14246 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 14250 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 14254 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 14258 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 14262 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 14266 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 14270 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 14274 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 14278 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 14282 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 14286 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 14290 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 14294 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 14298 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 14302 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 14306 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 14310 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 14314 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 14318 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 14322 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 14326 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 14330 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 14334 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 14338 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 + 14342 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 14346 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 14350 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 14354 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 14358 0x10 0xc6 0x1c 0x18 DIVS r3, r31, r3, r1 +.delay_slot + 14362 0x18 0x9f 0xa0 0xf8 MOV r2, r31 +.label _ZN12me_primitive10udiv_dstepEjjRjS0___end + +.bss_segment DMb 508992 32 + +.data_segment DMb 509024 +.label _ZL8num_iter + 0x1 + 0x0 + 0x0 + 0x0 + +.bss_segment DMb 509028 4 + +.bss_segment DMb 509032 1 + +.rodata_segment DMb 509056 +.label _ZL20g_uniformKernelFuncs + 0xa0 + 0x36 + 0x0 + 0x0 + 0xd0 + 0x36 + 0x0 + 0x0 + 0xf0 + 0x36 + 0x0 + 0x0 + 0x10 + 0x37 + 0x0 + 0x0 + 0x30 + 0x37 + 0x0 + 0x0 + 0x60 + 0x37 + 0x0 + 0x0 + +.bss_segment DMb 509120 960 + +.stack DM_stack 506560 508928 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/Release/0_2_reloadable9.map b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/Release/0_2_reloadable9.map new file mode 100644 index 0000000000000000000000000000000000000000..9dbbb173fa1fb48bf48811e71fa4be69d5d5295a --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/Release/0_2_reloadable9.map @@ -0,0 +1,287 @@ + +// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:48:02 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// bridge -o../Release/0_0_reloadable3 ../Release/0_0_reloadable3.o -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/isg -g -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable3.bcf -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork3342 -pme + +// Release: ipp V-2024.06-TGT-241219 + +Memory map for memory 'DM_stack': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 2368 + + 0x0007bac0..0x0007c3ff ( 2368 items) : Stack + +Memory map for memory 'DMb': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 3393 + + 0x00000000..0x0007babf ( 506560 items) : Reserved + 0x0007bac0..0x0007c3ff ( 2368 items) : Stack + 0x0007c400..0x0007c43f ( 64 items) : Reserved + 0x0007c440..0x0007c443 ( 4 items) : ../Release/0_0_reloadable3.o::_ZL9curr_iter (Data, Local, .bss.DMb.4) + 0x0007c444..0x0007c447 ( 4 items) : ../Release/0_0_reloadable3.o::_ZL14num_depth_iter (Data, Local, .bss.DMb.4) + 0x0007c448..0x0007c44b ( 4 items) : ../Release/0_0_reloadable3.o::_ZL10depth_iter (Data, Local, .bss.DMb.4) + 0x0007c44c..0x0007c44f ( 4 items) : ../Release/0_0_reloadable3.o::_ZL11total_iters (Data, Local, .bss.DMb.4) + 0x0007c450..0x0007c453 ( 4 items) : ../Release/0_0_reloadable3.o::_ZL8core_row (Data, Local, .bss.DMb.4) + 0x0007c454..0x0007c457 ( 4 items) : ../Release/0_0_reloadable3.o::_ZL11ifm1_offset (Data, Local, .bss.DMb.4) + 0x0007c458..0x0007c45b ( 4 items) : ../Release/0_0_reloadable3.o::_ZL10ifmsv_size (Data, Local, .bss.DMb.4) + 0x0007c45c..0x0007c45f ( 4 items) : ../Release/0_0_reloadable3.o::_ZL11ifm2_offset (Data, Local, .bss.DMb.4) + 0x0007c460..0x0007c463 ( 4 items) : ../Release/0_0_reloadable3.o::_ZL8num_iter (Data, Local, .data.DMb.4) + 0x0007c464..0x0007c467 ( 4 items) : me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive11control_satE (Data, Global, .bss.DMb.4) + 0x0007c468..0x0007c468 ( 1 items) : me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive11control_rndE (Data, Global, .bss.DMb.1) + 0x0007c480..0x0007c497 ( 24 items) : ../Release/0_0_reloadable3.o::_ZL20g_uniformKernelFuncs (Data, Local, .rodata.DMb.64) + + Called functions : _Z13_b896_wrapperPPv + _Z13_b901_wrapperPPv + _Z13_b906_wrapperPPv + _Z13_b881_wrapperPPv + _Z13_b891_wrapperPPv + _Z13_b919_wrapperPPv + + 0x0007c4c0..0x0007c4ff ( 64 items) : ../Release/0_0_reloadable3.o::add1d_attribute_broadcasting_params (Data, Global, .bss.DMb.64) + 0x0007c500..0x0007c53f ( 64 items) : ../Release/0_0_reloadable3.o::mul1d_attribute_broadcasting_params (Data, Global, .bss.DMb.64) + 0x0007c540..0x0007c57f ( 64 items) : ../Release/0_0_reloadable3.o::mul1d_params (Data, Global, .bss.DMb.64) + 0x0007c580..0x0007c5bf ( 64 items) : ../Release/0_0_reloadable3.o::clip1d_params (Data, Global, .bss.DMb.64) + 0x0007c5c0..0x0007c77f ( 448 items) : ../Release/0_0_reloadable3.o::conv2d_params (Data, Global, .bss.DMb.64) + 0x0007c780..0x0007c87f ( 256 items) : ../Release/0_0_reloadable3.o::conv2d_dw_params (Data, Global, .bss.DMb.64) + 0x0007ccc0..0x000fffff ( 537408 items) : Reserved + +Memory map for memory 'PM': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 11754 + + 0x00000000..0x0000092f ( 2352 items) : Reserved + 0x00000930..0x00000ab5 ( 390 items) : ../Release/0_0_reloadable3.o::_Z13kernelWrapperPPvjjjj (Function, Global, .text) (stack frame size = 64) + + Referenced symbols: _ZL20g_uniformKernelFuncs + + 0x00000ac0..0x00001055 ( 1430 items) : ../Release/0_0_reloadable3.o::_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh (Function, Weak, .text) (stack frame size = 64) + 0x00001060..0x0000116d ( 270 items) : ../Release/0_0_reloadable3.o::_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: _ZN12me_primitive11control_rndE + + 0x00001170..0x00001ad9 ( 2410 items) : ../Release/0_0_reloadable3.o::_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params (Function, Weak, .text) (stack frame size = 128) + + Called functions : _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams + + Referenced symbols: _ZN12me_primitive11control_rndE + + 0x00001ae0..0x00001d17 ( 568 items) : ../Release/0_0_reloadable3.o::_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64) + + Called functions : _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh + _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params + + Referenced symbols: _ZL9curr_iter + conv2d_params + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL14num_depth_iter + _ZL8num_iter + _ZL10depth_iter + _ZL11total_iters + + 0x00001d20..0x00001d37 ( 24 items) : ../Release/0_0_reloadable3.o::_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0) + 0x00001d40..0x00001de1 ( 162 items) : ../Release/0_0_reloadable3.o::_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E + + 0x00001df0..0x00001e27 ( 56 items) : ../Release/0_0_reloadable3.o::_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0) + 0x00001e30..0x00001e6d ( 62 items) : ../Release/0_0_reloadable3.o::_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E + + 0x00001e70..0x00001fa9 ( 314 items) : ../Release/0_0_reloadable3.o::_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: _ZN12me_primitive11control_rndE + + 0x00001fb0..0x00002021 ( 114 items) : ../Release/0_0_reloadable3.o::_ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 128) + + Called functions : _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E + + 0x00002030..0x00002217 ( 488 items) : ../Release/0_0_reloadable3.o::_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64) + + Called functions : _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + add1d_attribute_broadcasting_params + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL11ifm1_offset + _ZL8num_iter + + 0x00002220..0x00002283 ( 100 items) : ../Release/0_0_reloadable3.o::_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 0) + 0x00002290..0x00002381 ( 242 items) : ../Release/0_0_reloadable3.o::_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E (Function, Weak, .text) (stack frame size = 0) + 0x00002390..0x00002577 ( 488 items) : ../Release/0_0_reloadable3.o::_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64) + + Called functions : _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv + _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + clip1d_params + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL11ifm1_offset + _ZL8num_iter + + 0x00002580..0x000025f3 ( 116 items) : ../Release/0_0_reloadable3.o::_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 0) + 0x00002600..0x00002649 ( 74 items) : ../Release/0_0_reloadable3.o::_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv + + 0x00002650..0x00002865 ( 534 items) : ../Release/0_0_reloadable3.o::_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE (Function, Local, .text) (stack frame size = 128) + + Referenced symbols: _ZN12me_primitive11control_rndE + + 0x00002870..0x00002905 ( 150 items) : ../Release/0_0_reloadable3.o::_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE + + 0x00002910..0x00002af7 ( 488 items) : ../Release/0_0_reloadable3.o::_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64) + + Called functions : _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv + _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + mul1d_attribute_broadcasting_params + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL11ifm1_offset + _ZL8num_iter + + 0x00002b00..0x00002b17 ( 24 items) : ../Release/0_0_reloadable3.o::_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E (Function, Weak, .text) (stack frame size = 0) + 0x00002b20..0x00002ba9 ( 138 items) : ../Release/0_0_reloadable3.o::_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E + + 0x00002bb0..0x00002cd3 ( 292 items) : ../Release/0_0_reloadable3.o::_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: _ZN12me_primitive11control_rndE + + 0x00002ce0..0x00002f39 ( 602 items) : ../Release/0_0_reloadable3.o::_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE (Function, Global, .text) (stack frame size = 64) + + Called functions : _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + mul1d_params + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL11ifm1_offset + _ZL11ifm2_offset + _ZL8num_iter + + 0x00002f40..0x000031df ( 672 items) : ../Release/0_0_reloadable3.o::_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh (Function, Local, .text) (stack frame size = 64) + + Called functions : _ZN12me_primitive10udiv_dstepEjjRjS0_ + + Referenced symbols: conv2d_dw_params + _ZN12me_primitive11control_rndE + + 0x000031e0..0x000034b1 ( 722 items) : ../Release/0_0_reloadable3.o::_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: _ZN12me_primitive11control_rndE + + 0x000034c0..0x0000369d ( 478 items) : ../Release/0_0_reloadable3.o::_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 128) + + Called functions : _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh + _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL8num_iter + _ZL10ifmsv_size + conv2d_dw_params + + 0x000036a0..0x000036c3 ( 36 items) : ../Release/0_0_reloadable3.o::_Z13_b896_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + + 0x000036d0..0x000036ef ( 32 items) : ../Release/0_0_reloadable3.o::_Z13_b901_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + + 0x000036f0..0x0000370f ( 32 items) : ../Release/0_0_reloadable3.o::_Z13_b906_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + + 0x00003710..0x0000372f ( 32 items) : ../Release/0_0_reloadable3.o::_Z13_b881_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + + 0x00003730..0x00003753 ( 36 items) : ../Release/0_0_reloadable3.o::_Z13_b891_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE + + 0x00003760..0x00003783 ( 36 items) : ../Release/0_0_reloadable3.o::_Z13_b919_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + + 0x00003790..0x0000381d ( 142 items) : me_div.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive10udiv_dstepEjjRjS0_ (Function, Global, .text) (stack frame size = 0) + +External symbols: + + __dso_handle = 0x0 + _ctors_end = 0x0 + _ctors_start = 0x0 + _dtors_end = 0x0 + _dtors_start = 0x0 + _pc_end = 0x381e + _pc_start = 0x930 + _sp_end_DM_stack = 0x7c400 + _sp_start_DM_stack = 0x7bac0 + +Section summary for memory 'DM_stack': + + .stack File + ---------- ---------- + 2368 + ---------- ---------- + 2368 Total + +Section summary for memory 'DMb': + + .bss .data .rodata File + ---------- ---------- ---------- ---------- + 992 4 24 ../Release/0_0_reloadable3.o + 5 0 0 me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a) + ---------- ---------- ---------- ---------- + 997 4 24 Total + +Section summary for memory 'PM': + + .text File + ---------- ---------- + 11612 ../Release/0_0_reloadable3.o + 142 me_div.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a) + ---------- ---------- + 11754 Total + +File summary: + +../Release/0_0_reloadable3.o + DMb 1020 + PM 11612 + +me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a) + DMb 5 + +me_div.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a) + PM 142 + diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/Release/0_2_reloadable9.sdr b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/Release/0_2_reloadable9.sdr new file mode 100644 index 0000000000000000000000000000000000000000..531300c36b89212abd7e8ea03e380e975fc5f93c --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/Release/0_2_reloadable9.sdr @@ -0,0 +1,123 @@ + +// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:48:02 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// bridge -o../Release/0_0_reloadable3 ../Release/0_0_reloadable3.o -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/isg -g -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable3.bcf -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork3342 -pme + +// Release: ipp V-2024.06-TGT-241219 + +// Symbols in memory 'DM_bankA': +// Symbols in memory 'DM_bankAB': +// Symbols in memory 'DM_bankAC': +// Symbols in memory 'DM_bankAD': +// Symbols in memory 'DM_bankB': +// Symbols in memory 'DM_bankBC': +// Symbols in memory 'DM_bankBD': +// Symbols in memory 'DM_bankC': +// Symbols in memory 'DM_bankCD': +// Symbols in memory 'DM_bankD': +// Symbols in memory 'DM_stack': +// Symbols in memory 'DM_test': +// Symbols in memory 'DMb': +_symbol _ZN12me_primitive11control_satE 0x0007c464 +_symbol _ZN12me_primitive11control_rndE 0x0007c468 +_symbol add1d_attribute_broadcasting_params 0x0007c4c0 +_symbol mul1d_attribute_broadcasting_params 0x0007c500 +_symbol mul1d_params 0x0007c540 +_symbol clip1d_params 0x0007c580 +_symbol conv2d_params 0x0007c5c0 +_symbol conv2d_dw_params 0x0007c780 +// Symbols in memory 'DMh': +// Symbols in memory 'DMh_bankA': +// Symbols in memory 'DMh_bankAB': +// Symbols in memory 'DMh_bankAC': +// Symbols in memory 'DMh_bankAD': +// Symbols in memory 'DMh_bankB': +// Symbols in memory 'DMh_bankBC': +// Symbols in memory 'DMh_bankBD': +// Symbols in memory 'DMh_bankC': +// Symbols in memory 'DMh_bankCD': +// Symbols in memory 'DMh_bankD': +// Symbols in memory 'DMh_stack': +// Symbols in memory 'DMs': +// Symbols in memory 'DMs_bankA': +// Symbols in memory 'DMs_bankAB': +// Symbols in memory 'DMs_bankAC': +// Symbols in memory 'DMs_bankAD': +// Symbols in memory 'DMs_bankB': +// Symbols in memory 'DMs_bankBC': +// Symbols in memory 'DMs_bankBD': +// Symbols in memory 'DMs_bankC': +// Symbols in memory 'DMs_bankCD': +// Symbols in memory 'DMs_bankD': +// Symbols in memory 'DMs_stack': +// Symbols in memory 'DMv': +// Symbols in memory 'DMv_bankA': +// Symbols in memory 'DMv_bankAB': +// Symbols in memory 'DMv_bankAC': +// Symbols in memory 'DMv_bankAD': +// Symbols in memory 'DMv_bankB': +// Symbols in memory 'DMv_bankBC': +// Symbols in memory 'DMv_bankBD': +// Symbols in memory 'DMv_bankC': +// Symbols in memory 'DMv_bankCD': +// Symbols in memory 'DMv_bankD': +// Symbols in memory 'DMv_stack': +// Symbols in memory 'DMw': +// Symbols in memory 'DMw_bankA': +// Symbols in memory 'DMw_bankAB': +// Symbols in memory 'DMw_bankAC': +// Symbols in memory 'DMw_bankAD': +// Symbols in memory 'DMw_bankB': +// Symbols in memory 'DMw_bankBC': +// Symbols in memory 'DMw_bankBD': +// Symbols in memory 'DMw_bankC': +// Symbols in memory 'DMw_bankCD': +// Symbols in memory 'DMw_bankD': +// Symbols in memory 'DMw_stack': +// Symbols in memory 'DMx': +// Symbols in memory 'DMx_bankA': +// Symbols in memory 'DMx_bankAB': +// Symbols in memory 'DMx_bankAC': +// Symbols in memory 'DMx_bankAD': +// Symbols in memory 'DMx_bankB': +// Symbols in memory 'DMx_bankBC': +// Symbols in memory 'DMx_bankBD': +// Symbols in memory 'DMx_bankC': +// Symbols in memory 'DMx_bankCD': +// Symbols in memory 'DMx_bankD': +// Symbols in memory 'DMx_stack': +// Symbols in memory 'PM': +_symbol _Z13kernelWrapperPPvjjjj 0x00000930 +_symbol _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh 0x00000ac0 +_symbol _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams 0x00001060 +_symbol _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params 0x00001170 +_symbol _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00001ae0 +_symbol _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E 0x00001d20 +_symbol _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv 0x00001d40 +_symbol _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E 0x00001df0 +_symbol _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv 0x00001e30 +_symbol _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E 0x00001e70 +_symbol _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E 0x00001fb0 +_symbol _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00002030 +_symbol _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv 0x00002220 +_symbol _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E 0x00002290 +_symbol _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00002390 +_symbol _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv 0x00002580 +_symbol _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv 0x00002600 +_symbol _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E 0x00002870 +_symbol _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00002910 +_symbol _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E 0x00002b00 +_symbol _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv 0x00002b20 +_symbol _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E 0x00002bb0 +_symbol _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE 0x00002ce0 +_symbol _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params 0x000031e0 +_symbol _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x000034c0 +_symbol _Z13_b896_wrapperPPv 0x000036a0 +_symbol _Z13_b901_wrapperPPv 0x000036d0 +_symbol _Z13_b906_wrapperPPv 0x000036f0 +_symbol _Z13_b881_wrapperPPv 0x00003710 +_symbol _Z13_b891_wrapperPPv 0x00003730 +_symbol _Z13_b919_wrapperPPv 0x00003760 +_symbol _ZN12me_primitive10udiv_dstepEjjRjS0_ 0x00003790 +// Symbols in memory 'PMw': +// Symbols in memory 'TM4': diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/Release/0_2_reloadable9.srv b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/Release/0_2_reloadable9.srv new file mode 100644 index 0000000000000000000000000000000000000000..f6eec0e2b8bd493ef0112849914646d03f76489e --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/Release/0_2_reloadable9.srv @@ -0,0 +1,17226 @@ + +// File generated by darts version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:48:03 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// darts -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -d -h -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable3 me + +// Release: ipp V-2024.06-TGT-241219 +.label __Z13kernelWrapperPPvjjjj___func_begin0 +.label _Z13kernelWrapperPPvjjjj +.function kernelWrapper _Z13kernelWrapperPPvjjjj +.src_ref 0 "0_0_reloadable3.cc" 82 first +.src_ref 0 "0_0_reloadable3.cc" 84 60 first +.src_ref 0 "0_0_reloadable3.cc" 84 110 +.src_ref 0 "0_0_reloadable3.cc" 86 110 +.src_ref 1 "io_buffer_compiler.h" 606 24 +.function_start + 2352 "11010100" // LDA r17, [p0]; MOV r2, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2353 "01000001" // /* MW 5 */ + 2354 "00100001" // /* MW 4 */ + 2355 "11010001" // /* MW 3 */ + 2356 "11000110" // /* MW 2 */ + 2357 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 82 + 2358 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2359 "00000001" // /* MW 5 */ + 2360 "00000000" // /* MW 4 */ + 2361 "00000000" // /* MW 3 */ + 2362 "00001000" // /* MW 2 */ + 2363 "00000000" // /* MW 1 */ + 2364 "00000010" // ST p7, [sp, #-12]; MOV r1, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2365 "01110000" // /* MW 7 */ + 2366 "11010000" // /* MW 6 */ + 2367 "00101011" // /* MW 5 */ + 2368 "00000000" // /* MW 4 */ + 2369 "10110000" // /* MW 3 */ + 2370 "11110011" // /* MW 2 */ + 2371 "11111110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 84 110 +.src_ref 0 "0_0_reloadable3.cc" 86 110 +.src_ref 1 "io_buffer_compiler.h" 606 24 + 2372 "00000010" // ST lr, [sp, #-4]; MOV r15, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2373 "01110000" // /* MW 7 */ + 2374 "10010000" // /* MW 6 */ + 2375 "11101000" // /* MW 5 */ + 2376 "00000001" // /* MW 4 */ + 2377 "10110000" // /* MW 3 */ + 2378 "10000111" // /* MW 2 */ + 2379 "11111111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 84 110 first + 2380 "01011100" // ST r1, [sp, #-8]; NEZ r16, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2381 "11100000" // /* MW 5 */ + 2382 "11000001" // /* MW 4 */ + 2383 "10110111" // /* MW 3 */ + 2384 "00000110" // /* MW 2 */ + 2385 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 + 2386 "11111000" // MOV r26, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2387 "00100000" // /* MW 3 */ + 2388 "10011000" // /* MW 2 */ + 2389 "00011110" // /* MW 1 */ + 2390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2391 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 first + 2392 "00011000" // ADD.NC p7, r17, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2393 "10000010" // /* MW 3 */ + 2394 "01101000" // /* MW 2 */ + 2395 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 + 2396 "10011000" // LDA r17, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2397 "00110110" // /* MW 3 */ + 2398 "00011110" // /* MW 2 */ + 2399 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 36 + 2400 "10011000" // LDA r19, [p7], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2401 "01110110" // /* MW 3 */ + 2402 "00111110" // /* MW 2 */ + 2403 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 46 + 2404 "10011000" // LDA r18, [p7], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2405 "01010110" // /* MW 3 */ + 2406 "11101110" // /* MW 2 */ + 2407 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 60 + 2408 "10011000" // LDA r27, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2409 "01110110" // /* MW 3 */ + 2410 "00000111" // /* MW 2 */ + 2411 "00000111" // /* MW 1 */ + 2412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2413 "00000000" // /* MW 1 */ + 2414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2415 "00000000" // /* MW 1 */ + 2416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2417 "00000000" // /* MW 1 */ + 2418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2419 "00000000" // /* MW 1 */ + 2420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2421 "00000000" // /* MW 1 */ + 2422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2423 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 195 30 first +.src_ref 1 "io_buffer_compiler.h" 195 37 first + 2424 "00011000" // SEL.EQZ r17, r17, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2425 "00110010" // /* MW 3 */ + 2426 "01100011" // /* MW 2 */ + 2427 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 194 23 first + 2428 "10011000" // ST r17, [p7, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2429 "00110001" // /* MW 3 */ + 2430 "11010110" // /* MW 2 */ + 2431 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 +.src_ref 1 "io_buffer_main.h" 410 8 + 2432 "00011000" // MOVX r17, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2433 "11111101" // /* MW 3 */ + 2434 "11100010" // /* MW 2 */ + 2435 "00010111" // /* MW 1 */ + 2436 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2437 "00000000" // /* MW 1 */ + 2438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2439 "00000000" // /* MW 1 */ + 2440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2441 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 first + 2442 "00011000" // ACQ.COND r18, r17, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2443 "00011000" // /* MW 3 */ + 2444 "10010111" // /* MW 2 */ + 2445 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 86 60 +.src_ref 0 "0_0_reloadable3.cc" 90 7 + 2446 "00011000" // MOVX r18, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2447 "00001001" // /* MW 3 */ + 2448 "00100100" // /* MW 2 */ + 2449 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 86 60 first + 2450 "10011000" // LSHL r20, r16, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2451 "00101101" // /* MW 3 */ + 2452 "00101001" // /* MW 2 */ + 2453 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 86 60 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 2454 "11111000" // MOV dj0, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2455 "00100000" // /* MW 3 */ + 2456 "10001010" // /* MW 2 */ + 2457 "00011000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 86 60 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 2458 "00001100" // LDA r19, [p0, dj0]; ST dj0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2459 "10001011" // /* MW 5 */ + 2460 "11011000" // /* MW 4 */ + 2461 "11011111" // /* MW 3 */ + 2462 "01001110" // /* MW 2 */ + 2463 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2465 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2467 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2469 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2471 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 86 110 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 2472 "00011000" // MOVX r19, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2473 "00000101" // /* MW 3 */ + 2474 "00100110" // /* MW 2 */ + 2475 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 86 110 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2476 "10011000" // LTU r26, r19, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2477 "11111100" // /* MW 3 */ + 2478 "11110100" // /* MW 2 */ + 2479 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 first + 2480 "00000010" // ST r26, [sp, #-16]; ADD.NC p7, r19, #4 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2481 "00000000" // /* MW 7 */ + 2482 "11000001" // /* MW 6 */ + 2483 "10110100" // /* MW 5 */ + 2484 "00000011" // /* MW 4 */ + 2485 "10110000" // /* MW 3 */ + 2486 "01101010" // /* MW 2 */ + 2487 "11111110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 + 2488 "10011000" // LDA r19, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2489 "01110110" // /* MW 3 */ + 2490 "00011110" // /* MW 2 */ + 2491 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 36 + 2492 "10011000" // LDA r21, [p7], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2493 "10110110" // /* MW 3 */ + 2494 "00111110" // /* MW 2 */ + 2495 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 46 + 2496 "10011000" // LDA r20, [p7], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2497 "10010110" // /* MW 3 */ + 2498 "11101110" // /* MW 2 */ + 2499 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 60 + 2500 "10011000" // LDA r27, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2501 "01110110" // /* MW 3 */ + 2502 "00000111" // /* MW 2 */ + 2503 "00000111" // /* MW 1 */ + 2504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2505 "00000000" // /* MW 1 */ + 2506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2507 "00000000" // /* MW 1 */ + 2508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2509 "00000000" // /* MW 1 */ + 2510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2511 "00000000" // /* MW 1 */ + 2512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2513 "00000000" // /* MW 1 */ + 2514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2515 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 195 30 first +.src_ref 1 "io_buffer_compiler.h" 195 37 first + 2516 "00011000" // SEL.EQZ r19, r19, r21, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2517 "01010010" // /* MW 3 */ + 2518 "11100111" // /* MW 2 */ + 2519 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 194 23 first + 2520 "10011000" // ST r19, [p7, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2521 "01110001" // /* MW 3 */ + 2522 "11010110" // /* MW 2 */ + 2523 "00001111" // /* MW 1 */ + 2524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2525 "00000000" // /* MW 1 */ + 2526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2527 "00000000" // /* MW 1 */ + 2528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2529 "00000000" // /* MW 1 */ + 2530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2531 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 first + 2532 "00011000" // ACQ.COND r20, r17, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2533 "00011000" // /* MW 3 */ + 2534 "00010111" // /* MW 2 */ + 2535 "00010101" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 90 7 first + 2536 "10011000" // LSHL r17, r0, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2537 "00101101" // /* MW 3 */ + 2538 "00100011" // /* MW 2 */ + 2539 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 90 7 + 2540 "11111000" // MOV dj0, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2541 "10100000" // /* MW 3 */ + 2542 "10001000" // /* MW 2 */ + 2543 "00011000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 90 7 + 2544 "01000100" // MOVXM p7, #509056 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2545 "00000000" // /* MW 5 */ + 2546 "11001001" // /* MW 4 */ + 2547 "11001110" // /* MW 3 */ + 2548 "00000111" // /* MW 2 */ + 2549 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 90 7 + 2550 "00001100" // LDA p1, [p7, dj0]; ST r16, [sp, #-24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2551 "00101011" // /* MW 5 */ + 2552 "11010100" // /* MW 4 */ + 2553 "11011111" // /* MW 3 */ + 2554 "00010011" // /* MW 2 */ + 2555 "11100000" // /* MW 1 */ + 2556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2557 "00000000" // /* MW 1 */ + 2558 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2559 "00000000" // /* MW 1 */ + 2560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2561 "00000000" // /* MW 1 */ + 2562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2563 "00000000" // /* MW 1 */ + 2564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2565 "00000000" // /* MW 1 */ + 2566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2567 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 90 4 +.no_stack_arguments + 2568 "00011000" // JL p1 /* MW 4 */ /* control_operation: words=4 call unconditional cycles_taken=1 indirect absolute delay_slots=5 */ + 2569 "01000000" // /* MW 3 */ + 2570 "00110000" // /* MW 2 */ + 2571 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 93 60 +.src_ref 0 "0_0_reloadable3.cc" 95 60 +.delay_slot + 2572 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2573 "11000000" // /* MW 3 */ + 2574 "01100000" // /* MW 2 */ + 2575 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2577 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2579 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2581 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2582 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2583 "01111110" // /* MW 9 */ + 2584 "10100101" // /* MW 8 */ + 2585 "00000001" // /* MW 7 */ + 2586 "00000000" // /* MW 6 */ + 2587 "00010000" // /* MW 5 */ + 2588 "00000000" // /* MW 4 */ + 2589 "11110000" // /* MW 3 */ + 2590 "00101100" // /* MW 2 */ + 2591 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 93 60 first +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_main.h" 440 8 +.src_ref 1 "io_buffer_main.h" 440 8 +.return_address + 2592 "00101100" // LDA r17, [p7]; MOVX r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2593 "00001010" // /* MW 5 */ + 2594 "01000000" // /* MW 4 */ + 2595 "11010000" // /* MW 3 */ + 2596 "11000110" // /* MW 2 */ + 2597 "11100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 + 2598 "00011000" // LDA r26, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2599 "01010001" // /* MW 3 */ + 2600 "11101011" // /* MW 2 */ + 2601 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 95 60 + 2602 "00011000" // LDA dj0, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2603 "01000001" // /* MW 3 */ + 2604 "11101100" // /* MW 2 */ + 2605 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_main.h" 440 8 + 2606 "00011000" // LDA el0, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2607 "00101001" // /* MW 3 */ + 2608 "11110000" // /* MW 2 */ + 2609 "00000111" // /* MW 1 */ + 2610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2611 "00000000" // /* MW 1 */ + 2612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2613 "00000000" // /* MW 1 */ + 2614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2615 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 first + 2616 "00011000" // ADD.NC p1, r17, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2617 "10001000" // /* MW 3 */ + 2618 "01101000" // /* MW 2 */ + 2619 "00011001" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 + 2620 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2621 "00110110" // /* MW 3 */ + 2622 "00000110" // /* MW 2 */ + 2623 "00000001" // /* MW 1 */ + 2624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2625 "00000000" // /* MW 1 */ + 2626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2627 "00000000" // /* MW 1 */ + 2628 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2629 "00000000" // /* MW 1 */ + 2630 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2631 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 2632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2633 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 2634 "11111000" // MOV r26, el0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2635 "00011100" // /* MW 3 */ + 2636 "10100000" // /* MW 2 */ + 2637 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2638 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2639 "00001000" // /* MW 3 */ + 2640 "01010101" // /* MW 2 */ + 2641 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_compiler.h" 606 24 first + 2642 "11010100" // LDA r17, [p1, #-4]; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2643 "01000001" // /* MW 5 */ + 2644 "10101111" // /* MW 4 */ + 2645 "11011101" // /* MW 3 */ + 2646 "11000110" // /* MW 2 */ + 2647 "00111110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 95 60 first + 2648 "10011000" // LDA r18, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2649 "01010110" // /* MW 3 */ + 2650 "00000010" // /* MW 2 */ + 2651 "00000111" // /* MW 1 */ + 2652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2653 "00000000" // /* MW 1 */ + 2654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2655 "00000000" // /* MW 1 */ + 2656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2657 "00000000" // /* MW 1 */ + 2658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2659 "00000000" // /* MW 1 */ + 2660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2661 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 first + 2662 "10011000" // SUB r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2663 "00010001" // /* MW 3 */ + 2664 "00100111" // /* MW 2 */ + 2665 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 first +.src_ref 1 "io_buffer_compiler.h" 606 24 + 2666 "00100100" // SEL.EQZ r17, r17, r19, r27; ADD.NC p0, r18, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2667 "00010000" // /* MW 5 */ + 2668 "11010010" // /* MW 4 */ + 2669 "01000000" // /* MW 3 */ + 2670 "01100110" // /* MW 2 */ + 2671 "10001100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 +.src_ref 1 "io_buffer_compiler.h" 606 22 first + 2672 "00001100" // LDA r17, [p0]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2673 "01100011" // /* MW 5 */ + 2674 "11101100" // /* MW 4 */ + 2675 "11010011" // /* MW 3 */ + 2676 "11000110" // /* MW 2 */ + 2677 "00000000" // /* MW 1 */ + 2678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2679 "00000000" // /* MW 1 */ + 2680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2681 "00000000" // /* MW 1 */ + 2682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2683 "00000000" // /* MW 1 */ + 2684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2685 "00000000" // /* MW 1 */ + 2686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2687 "00000000" // /* MW 1 */ + 2688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2689 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 first + 2690 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2691 "00001000" // /* MW 3 */ + 2692 "01010101" // /* MW 2 */ + 2693 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 98 + 2694 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2695 "00111001" // /* MW 3 */ + 2696 "11111100" // /* MW 2 */ + 2697 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 first + 2698 "10011000" // LDA r17, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2699 "00110110" // /* MW 3 */ + 2700 "11110110" // /* MW 2 */ + 2701 "00000000" // /* MW 1 */ + 2702 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2703 "10011001" // /* MW 3 */ + 2704 "11110111" // /* MW 2 */ + 2705 "00000111" // /* MW 1 */ + 2706 "00011000" // LDA r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2707 "11110001" // /* MW 3 */ + 2708 "11111001" // /* MW 2 */ + 2709 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 98 first + 2710 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2711 "00000001" // /* MW 5 */ + 2712 "00000000" // /* MW 4 */ + 2713 "00000000" // /* MW 3 */ + 2714 "11111000" // /* MW 2 */ + 2715 "11111111" // /* MW 1 */ + 2716 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2717 "00000000" // /* MW 1 */ + 2718 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2719 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 98 + 2720 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 2721 "00000000" // /* MW 3 */ + 2722 "00101000" // /* MW 2 */ + 2723 "00010000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.delay_slot + 2724 "11111000" // MOV r27, el0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2725 "00011100" // /* MW 3 */ + 2726 "11100000" // /* MW 2 */ + 2727 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 first +.delay_slot + 2728 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2729 "00010001" // /* MW 3 */ + 2730 "00100001" // /* MW 2 */ + 2731 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.delay_slot + 2732 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2733 "00000010" // /* MW 3 */ + 2734 "01100001" // /* MW 2 */ + 2735 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 22 +.delay_slot + 2736 "10011000" // ST r16, [p0, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2737 "00010001" // /* MW 3 */ + 2738 "11110110" // /* MW 2 */ + 2739 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13kernelWrapperPPvjjjj__end +.label __Z13kernelWrapperPPvjjjj___func_end0 + 2741 "00000000" // /* MW 1 */ +.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_begin0 +.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh +.function setup_conv2d_bf16_params _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh +.src_ref 2 "conv2d_bf16_params.h" 432 first +.src_ref 2 "conv2d_bf16_params.h" 438 17 first +.src_ref 2 "conv2d_bf16_params.h" 452 40 +.src_ref 2 "conv2d_bf16_params.h" 453 40 +.src_ref 2 "conv2d_bf16_params.h" 458 36 +.src_ref 2 "conv2d_bf16_params.h" 470 11 +.function_start + 2752 "10111010" // LDA el0, [p0], #4; MOVX r4, #4; MOV r2, p1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2753 "01111000" // /* MW 9 */ + 2754 "01100000" // /* MW 8 */ + 2755 "01001001" // /* MW 7 */ + 2756 "10001000" // /* MW 6 */ + 2757 "01000000" // /* MW 5 */ + 2758 "00000000" // /* MW 4 */ + 2759 "11010000" // /* MW 3 */ + 2760 "10000101" // /* MW 2 */ + 2761 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 +.src_ref 2 "conv2d_bf16_params.h" 438 17 first +.src_ref 2 "conv2d_bf16_params.h" 452 40 +.src_ref 2 "conv2d_bf16_params.h" 462 7 + 2762 "10111010" // LDA eh0, [p0], #4; MOVX r5, #-1; ADD.NC p2, r2, #9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2763 "01001000" // /* MW 9 */ + 2764 "10000010" // /* MW 8 */ + 2765 "00110000" // /* MW 7 */ + 2766 "11101001" // /* MW 6 */ + 2767 "01010111" // /* MW 5 */ + 2768 "00111110" // /* MW 4 */ + 2769 "11010000" // /* MW 3 */ + 2770 "10000001" // /* MW 2 */ + 2771 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 432 +.src_ref 2 "conv2d_bf16_params.h" 444 52 + 2772 "10111010" // MOVA r1, #-4; PADDXM [sp], #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2773 "01110000" // /* MW 9 */ + 2774 "00000000" // /* MW 8 */ + 2775 "00000000" // /* MW 7 */ + 2776 "00000000" // /* MW 6 */ + 2777 "00000010" // /* MW 5 */ + 2778 "00000000" // /* MW 4 */ + 2779 "00000000" // /* MW 3 */ + 2780 "10000001" // /* MW 2 */ + 2781 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 453 40 +.src_ref 2 "conv2d_bf16_params.h" 458 30 +.src_ref 2 "conv2d_bf16_params.h" 458 30 + 2782 "01110110" // MOVA r6, #12; ST r13, [sp, #-4]; MOVX r16, #1; MOV m0, #16 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 2783 "01011000" // /* MW 11 */ + 2784 "00010000" // /* MW 10 */ + 2785 "00000000" // /* MW 9 */ + 2786 "00101000" // /* MW 8 */ + 2787 "00000000" // /* MW 7 */ + 2788 "10000001" // /* MW 6 */ + 2789 "10110101" // /* MW 5 */ + 2790 "11111101" // /* MW 4 */ + 2791 "00000111" // /* MW 3 */ + 2792 "10000110" // /* MW 2 */ + 2793 "00000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 444 52 +.src_ref 2 "conv2d_bf16_params.h" 470 11 +.src_ref 2 "conv2d_bf16_params.h" 477 40 +.src_ref 2 "conv2d_bf16_params.h" 557 34 + 2794 "01110110" // MOVA r3, #3; ST r14, [sp, #-8]; MOVX r21, #-3; MOV r20, #15 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 2795 "01011000" // /* MW 11 */ + 2796 "00001111" // /* MW 10 */ + 2797 "10001000" // /* MW 9 */ + 2798 "10101010" // /* MW 8 */ + 2799 "01010111" // /* MW 7 */ + 2800 "10111111" // /* MW 6 */ + 2801 "11010101" // /* MW 5 */ + 2802 "11111001" // /* MW 4 */ + 2803 "00000111" // /* MW 3 */ + 2804 "01100011" // /* MW 2 */ + 2805 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 452 40 +.src_ref 2 "conv2d_bf16_params.h" 458 36 +.src_ref 2 "conv2d_bf16_params.h" 462 7 + 2806 "01011100" // ST r15, [sp, #-12]; MOVX r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2807 "00000010" // /* MW 5 */ + 2808 "01100000" // /* MW 4 */ + 2809 "10110000" // /* MW 3 */ + 2810 "10111110" // /* MW 2 */ + 2811 "11111110" // /* MW 1 */ + 2812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2813 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2814 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2815 "00101001" // /* MW 3 */ + 2816 "00011100" // /* MW 2 */ + 2817 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2818 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2819 "00001001" // /* MW 3 */ + 2820 "00011100" // /* MW 2 */ + 2821 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2822 "10011000" // LDA el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2823 "00101110" // /* MW 3 */ + 2824 "00011100" // /* MW 2 */ + 2825 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2826 "10011000" // LDA eh0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2827 "00001110" // /* MW 3 */ + 2828 "00011100" // /* MW 2 */ + 2829 "00000000" // /* MW 1 */ + 2830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2831 "00000000" // /* MW 1 */ + 2832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2833 "00000000" // /* MW 1 */ + 2834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2835 "00000000" // /* MW 1 */ + 2836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2837 "00000000" // /* MW 1 */ + 2838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2839 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2840 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2841 "00101001" // /* MW 3 */ + 2842 "00011100" // /* MW 2 */ + 2843 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2844 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2845 "00001001" // /* MW 3 */ + 2846 "00011100" // /* MW 2 */ + 2847 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2848 "10011000" // LDA el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2849 "00101110" // /* MW 3 */ + 2850 "00011100" // /* MW 2 */ + 2851 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2852 "10011000" // LDA eh0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2853 "00001110" // /* MW 3 */ + 2854 "00011100" // /* MW 2 */ + 2855 "00000000" // /* MW 1 */ + 2856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2857 "00000000" // /* MW 1 */ + 2858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2859 "00000000" // /* MW 1 */ + 2860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2861 "00000000" // /* MW 1 */ + 2862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2863 "00000000" // /* MW 1 */ + 2864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2865 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2866 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2867 "00101001" // /* MW 3 */ + 2868 "00011100" // /* MW 2 */ + 2869 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2870 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2871 "00001001" // /* MW 3 */ + 2872 "00011100" // /* MW 2 */ + 2873 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2874 "10011000" // LDA eh0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2875 "00001110" // /* MW 3 */ + 2876 "00000100" // /* MW 2 */ + 2877 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2878 "10011000" // LDA el0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2879 "00101110" // /* MW 3 */ + 2880 "00010100" // /* MW 2 */ + 2881 "00000000" // /* MW 1 */ + 2882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2883 "00000000" // /* MW 1 */ + 2884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2885 "00000000" // /* MW 1 */ + 2886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2887 "00000000" // /* MW 1 */ + 2888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2889 "00000000" // /* MW 1 */ + 2890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2891 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2892 "10011000" // ST eh0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2893 "00001001" // /* MW 3 */ + 2894 "00000100" // /* MW 2 */ + 2895 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2896 "10011000" // ST el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2897 "00101001" // /* MW 3 */ + 2898 "00010100" // /* MW 2 */ + 2899 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 444 40 first + 2900 "10011000" // LDA.u8 r13, [p2], #-3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2901 "10101010" // /* MW 3 */ + 2902 "11011101" // /* MW 2 */ + 2903 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 447 34 first + 2904 "10011000" // LDA.u8 r17, [p2], #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2905 "00101010" // /* MW 3 */ + 2906 "00011110" // /* MW 2 */ + 2907 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 448 34 first + 2908 "10011000" // LDA.u8 r14, [p2], #-5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2909 "11001010" // /* MW 3 */ + 2910 "10111101" // /* MW 2 */ + 2911 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 452 40 first + 2912 "10011000" // LDA.u16 r15, [p2], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2913 "11111010" // /* MW 3 */ + 2914 "11111101" // /* MW 2 */ + 2915 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 453 40 first + 2916 "10011000" // LDA.u8 r19, [p2], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2917 "01101010" // /* MW 3 */ + 2918 "00001010" // /* MW 2 */ + 2919 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 20 first + 2920 "10011000" // LDA.u8 r7, [p2], #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2921 "11101010" // /* MW 3 */ + 2922 "10101100" // /* MW 2 */ + 2923 "00000010" // /* MW 1 */ + 2924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2925 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 444 52 first + 2926 "10011000" // LSHL r1, r13, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2927 "00011101" // /* MW 3 */ + 2928 "01000010" // /* MW 2 */ + 2929 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 30 first +.src_ref 2 "conv2d_bf16_params.h" 462 7 first + 2930 "00100100" // EQ r16, r1, r16; ADD.NC r18, r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2931 "00000001" // /* MW 5 */ + 2932 "00110001" // /* MW 4 */ + 2933 "11111001" // /* MW 3 */ + 2934 "00100000" // /* MW 2 */ + 2935 "00001100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 462 7 + 2936 "10011000" // LSHL r18, r18, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2937 "01011101" // /* MW 3 */ + 2938 "10100100" // /* MW 2 */ + 2939 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 452 40 first + 2940 "10011000" // EQ r27, r15, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2941 "01000111" // /* MW 3 */ + 2942 "11110110" // /* MW 2 */ + 2943 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 452 40 +.src_ref 2 "conv2d_bf16_params.h" 462 7 first +.src_ref 2 "conv2d_bf16_params.h" 557 34 + 2944 "11100100" // SEL.EQZ r5, r24, r5, r27; MOV eh0, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2945 "00111001" // /* MW 5 */ + 2946 "10110111" // /* MW 4 */ + 2947 "01000000" // /* MW 3 */ + 2948 "01001010" // /* MW 2 */ + 2949 "11000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 462 7 + 2950 "00011000" // SEL.EQZ r29, r17, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2951 "00100010" // /* MW 3 */ + 2952 "01111011" // /* MW 2 */ + 2953 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 30 first + 2954 "10011000" // EQ r6, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2955 "01100111" // /* MW 3 */ + 2956 "11001100" // /* MW 2 */ + 2957 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 + 2958 "10011000" // AND r27, r6, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2959 "00000100" // /* MW 3 */ + 2960 "10110111" // /* MW 2 */ + 2961 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 +.src_ref 2 "conv2d_bf16_params.h" 477 40 +.src_ref 2 "conv2d_bf16_params.h" 557 34 first + 2962 "11100100" // LSHL r15, r15, r21; MOV r25, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2963 "01000001" // /* MW 5 */ + 2964 "10111011" // /* MW 4 */ + 2965 "10111100" // /* MW 3 */ + 2966 "11101011" // /* MW 2 */ + 2967 "01111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 first + 2968 "01011100" // ST r15, [sp, #-20]; SEL.EQZ r6, r7, r24, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2969 "00000100" // /* MW 5 */ + 2970 "10011011" // /* MW 4 */ + 2971 "10110011" // /* MW 3 */ + 2972 "10111110" // /* MW 2 */ + 2973 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 +.src_ref 2 "conv2d_bf16_params.h" 477 40 first + 2974 "10000100" // JNZ r25, #3056 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3056 delay_slots=5 */ + 2975 "00000001" // /* MW 5 */ + 2976 "01000000" // /* MW 4 */ + 2977 "11111000" // /* MW 3 */ + 2978 "00000101" // /* MW 2 */ + 2979 "11001000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 first +.delay_slot + 2980 "10011000" // EQ r27, r6, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2981 "01000111" // /* MW 3 */ + 2982 "10110110" // /* MW 2 */ + 2983 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 444 52 first +.delay_slot + 2984 "10011000" // AND r24, r13, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2985 "01000100" // /* MW 3 */ + 2986 "01110001" // /* MW 2 */ + 2987 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 462 7 first +.delay_slot + 2988 "10011000" // LSHL r30, r19, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2989 "01011101" // /* MW 3 */ + 2990 "11111100" // /* MW 2 */ + 2991 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 470 11 first +.delay_slot + 2992 "10011000" // LSHL r20, r27, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2993 "01001101" // /* MW 3 */ + 2994 "11101000" // /* MW 2 */ + 2995 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 470 11 +.src_ref 2 "conv2d_bf16_params.h" 477 40 first +.delay_slot + 2996 "00011000" // SEL.EQZ r6, r6, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2997 "00110010" // /* MW 3 */ + 2998 "10001100" // /* MW 2 */ + 2999 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 40 + 3000 "10000100" // JNZ r27, #3056 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3056 delay_slots=5 */ + 3001 "00000001" // /* MW 5 */ + 3002 "01000000" // /* MW 4 */ + 3003 "11111000" // /* MW 3 */ + 3004 "00000101" // /* MW 2 */ + 3005 "11011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3007 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3009 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3011 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3013 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3015 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 491 25 +.src_ref 2 "conv2d_bf16_params.h" 492 25 +.src_ref 2 "conv2d_bf16_params.h" 495 99 +.src_ref 2 "conv2d_bf16_params.h" 502 57 +.src_ref 2 "conv2d_bf16_params.h" 533 46 +.src_ref 2 "conv2d_bf16_params.h" 539 82 +.src_ref 2 "conv2d_bf16_params.h" 557 34 +.src_ref 2 "conv2d_bf16_params.h" 621 240 +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.src_ref 2 "conv2d_bf16_params.h" 709 76 + 3016 "10111010" // MOVA r15, #1; J #3104 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=3104 delay_slots=5 */ + 3017 "00100000" // /* MW 9 */ + 3018 "00000000" // /* MW 8 */ + 3019 "00000000" // /* MW 7 */ + 3020 "10000100" // /* MW 6 */ + 3021 "00000001" // /* MW 5 */ + 3022 "00000000" // /* MW 4 */ + 3023 "00000000" // /* MW 3 */ + 3024 "00101111" // /* MW 2 */ + 3025 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 481 24 +.src_ref 2 "conv2d_bf16_params.h" 500 53 +.src_ref 2 "conv2d_bf16_params.h" 506 73 +.src_ref 2 "conv2d_bf16_params.h" 507 53 +.src_ref 2 "conv2d_bf16_params.h" 524 122 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.delay_slot + 3026 "10111010" // MOVA r26, #0; MOVX r5, #-3; MOV r28, #12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3027 "01011000" // /* MW 9 */ + 3028 "00001100" // /* MW 8 */ + 3029 "10001000" // /* MW 7 */ + 3030 "10101011" // /* MW 6 */ + 3031 "01010111" // /* MW 5 */ + 3032 "00111110" // /* MW 4 */ + 3033 "00000000" // /* MW 3 */ + 3034 "00011010" // /* MW 2 */ + 3035 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 504 45 +.src_ref 2 "conv2d_bf16_params.h" 510 45 +.src_ref 2 "conv2d_bf16_params.h" 520 48 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.delay_slot + 3036 "01100100" // MOVX r21, #4; MOV r2, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3037 "01000001" // /* MW 5 */ + 3038 "00100000" // /* MW 4 */ + 3039 "00100001" // /* MW 3 */ + 3040 "01000010" // /* MW 2 */ + 3041 "00000101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 40 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 578 52 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.delay_slot + 3042 "00011000" // MOVX r13, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3043 "00001101" // /* MW 3 */ + 3044 "00011010" // /* MW 2 */ + 3045 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 78 +.src_ref 2 "conv2d_bf16_params.h" 642 20 +.src_ref 2 "conv2d_bf16_params.h" 642 87 +.delay_slot + 3046 "00011000" // MOVX r7, #15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3047 "00111101" // /* MW 3 */ + 3048 "00001110" // /* MW 2 */ + 3049 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.delay_slot + 3050 "00101100" // NOPA; MOVX r4, #-4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3051 "11100010" // /* MW 5 */ + 3052 "10010001" // /* MW 4 */ + 3053 "11111111" // /* MW 3 */ + 3054 "00101100" // /* MW 2 */ + 3055 "00000000" // /* MW 1 */ +.label __ll6__Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh +.src_ref 2 "conv2d_bf16_params.h" 453 40 +.src_ref 2 "conv2d_bf16_params.h" 453 40 +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 504 45 +.src_ref 2 "conv2d_bf16_params.h" 655 23 + 3056 "01110110" // MOVA dj0, #16; MOVS p1, r2; MOVX r21, #4; MOV r4, #-4 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3057 "01011000" // /* MW 11 */ + 3058 "11111100" // /* MW 10 */ + 3059 "10001111" // /* MW 9 */ + 3060 "10001000" // /* MW 8 */ + 3061 "01010000" // /* MW 7 */ + 3062 "00000001" // /* MW 6 */ + 3063 "00001011" // /* MW 5 */ + 3064 "10000010" // /* MW 4 */ + 3065 "10000001" // /* MW 3 */ + 3066 "00000010" // /* MW 2 */ + 3067 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 453 40 first +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 481 24 +.src_ref 2 "conv2d_bf16_params.h" 500 53 +.src_ref 2 "conv2d_bf16_params.h" 506 73 +.src_ref 2 "conv2d_bf16_params.h" 507 53 +.src_ref 2 "conv2d_bf16_params.h" 524 122 +.src_ref 2 "conv2d_bf16_params.h" 539 139 + 3068 "10111010" // ST.s8 r6, [p1, dj0]; MOVX r26, #0; MOV r28, #12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3069 "01011000" // /* MW 9 */ + 3070 "00001100" // /* MW 8 */ + 3071 "10001000" // /* MW 7 */ + 3072 "00001011" // /* MW 6 */ + 3073 "10100000" // /* MW 5 */ + 3074 "00000001" // /* MW 4 */ + 3075 "11100000" // /* MW 3 */ + 3076 "00011000" // /* MW 2 */ + 3077 "00100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 491 25 +.src_ref 2 "conv2d_bf16_params.h" 492 25 +.src_ref 2 "conv2d_bf16_params.h" 495 99 +.src_ref 2 "conv2d_bf16_params.h" 502 57 +.src_ref 2 "conv2d_bf16_params.h" 510 45 +.src_ref 2 "conv2d_bf16_params.h" 520 48 +.src_ref 2 "conv2d_bf16_params.h" 533 46 +.src_ref 2 "conv2d_bf16_params.h" 539 82 +.src_ref 2 "conv2d_bf16_params.h" 557 34 +.src_ref 2 "conv2d_bf16_params.h" 621 240 +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.src_ref 2 "conv2d_bf16_params.h" 709 76 + 3078 "10111010" // MOVA r2, #16; MOVX r5, #-3; MOV r15, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3079 "01011000" // /* MW 9 */ + 3080 "00000001" // /* MW 8 */ + 3081 "11101000" // /* MW 7 */ + 3082 "10101001" // /* MW 6 */ + 3083 "01010111" // /* MW 5 */ + 3084 "00111110" // /* MW 4 */ + 3085 "00000000" // /* MW 3 */ + 3086 "00000010" // /* MW 2 */ + 3087 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 40 +.src_ref 2 "conv2d_bf16_params.h" 529 78 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 578 52 +.src_ref 2 "conv2d_bf16_params.h" 642 20 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 642 87 + 3088 "11100001" // NOPA; NOPB; NOPS; MOVX r7, #15; MOV r13, #3; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3089 "00000000" // /* MW 15 */ + 3090 "00000000" // /* MW 14 */ + 3091 "01011000" // /* MW 13 */ + 3092 "00000011" // /* MW 12 */ + 3093 "10101000" // /* MW 11 */ + 3094 "11101001" // /* MW 10 */ + 3095 "01110001" // /* MW 9 */ + 3096 "00000000" // /* MW 8 */ + 3097 "01011011" // /* MW 7 */ + 3098 "00000001" // /* MW 6 */ + 3099 "00100000" // /* MW 5 */ + 3100 "00000000" // /* MW 4 */ + 3101 "11110000" // /* MW 3 */ + 3102 "00101100" // /* MW 2 */ + 3103 "00000000" // /* MW 1 */ +.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_352 +.src_ref 2 "conv2d_bf16_params.h" 477 40 first +.src_ref 2 "conv2d_bf16_params.h" 495 68 first +.src_ref 2 "conv2d_bf16_params.h" 495 112 +.src_ref 2 "conv2d_bf16_params.h" 682 38 + 3104 "10111010" // LDA.u8 r17, [p2], #-2; EQ r27, r13, r6; MOV m0, #60 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3105 "01011000" // /* MW 9 */ + 3106 "00111100" // /* MW 8 */ + 3107 "00000000" // /* MW 7 */ + 3108 "00111100" // /* MW 6 */ + 3109 "10110011" // /* MW 5 */ + 3110 "00011011" // /* MW 4 */ + 3111 "01010000" // /* MW 3 */ + 3112 "11000101" // /* MW 2 */ + 3113 "01011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 481 24 first +.src_ref 2 "conv2d_bf16_params.h" 495 53 +.src_ref 2 "conv2d_bf16_params.h" 495 112 + 3114 "10111010" // LDA.u8 r1, [p2], m0; SEL.EQZ r18, r1, r26, r27; MOV m5, #-51 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3115 "01011000" // /* MW 9 */ + 3116 "11001101" // /* MW 8 */ + 3117 "10000111" // /* MW 7 */ + 3118 "00010010" // /* MW 6 */ + 3119 "00101101" // /* MW 5 */ + 3120 "00000011" // /* MW 4 */ + 3121 "01010000" // /* MW 3 */ + 3122 "00000101" // /* MW 2 */ + 3123 "01000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 18 first +.src_ref 2 "conv2d_bf16_params.h" 496 68 +.src_ref 2 "conv2d_bf16_params.h" 504 35 +.src_ref 2 "conv2d_bf16_params.h" 539 14 +.src_ref 2 "conv2d_bf16_params.h" 578 47 + 3124 "10111010" // MOVA r23, #2; SEL.EQZ r29, r29, r21, r27; MOV m3, #55 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3125 "01011000" // /* MW 9 */ + 3126 "00110111" // /* MW 8 */ + 3127 "10000000" // /* MW 7 */ + 3128 "10010001" // /* MW 6 */ + 3129 "11011010" // /* MW 5 */ + 3130 "00111011" // /* MW 4 */ + 3131 "00000000" // /* MW 3 */ + 3132 "01010111" // /* MW 2 */ + 3133 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 496 53 +.src_ref 2 "conv2d_bf16_params.h" 499 51 +.src_ref 2 "conv2d_bf16_params.h" 504 45 first +.src_ref 2 "conv2d_bf16_params.h" 509 50 +.src_ref 2 "conv2d_bf16_params.h" 519 42 +.src_ref 2 "conv2d_bf16_params.h" 700 34 + 3134 "10111010" // MOVA r3, #8; EQ r27, r21, r0; MOV m2, #-68 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3135 "01011000" // /* MW 9 */ + 3136 "10111100" // /* MW 8 */ + 3137 "00000111" // /* MW 7 */ + 3138 "00111101" // /* MW 6 */ + 3139 "10110000" // /* MW 5 */ + 3140 "00101011" // /* MW 4 */ + 3141 "00000000" // /* MW 3 */ + 3142 "00000011" // /* MW 2 */ + 3143 "00000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 492 25 first +.src_ref 2 "conv2d_bf16_params.h" 497 46 +.src_ref 2 "conv2d_bf16_params.h" 509 50 + 3144 "10111010" // MOVA r16, #512; LSHL r22, r15, r24; MOV m1, #112 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3145 "01011000" // /* MW 9 */ + 3146 "01110000" // /* MW 8 */ + 3147 "10000000" // /* MW 7 */ + 3148 "01101100" // /* MW 6 */ + 3149 "01101100" // /* MW 5 */ + 3150 "00011111" // /* MW 4 */ + 3151 "00000000" // /* MW 3 */ + 3152 "00010000" // /* MW 2 */ + 3153 "01000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 500 53 +.src_ref 2 "conv2d_bf16_params.h" 520 34 first + 3154 "01100100" // EXTEND.u8 r22, r22; MOV m4, #-105 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3155 "01011101" // /* MW 5 */ + 3156 "00011110" // /* MW 4 */ + 3157 "00001000" // /* MW 3 */ + 3158 "10010010" // /* MW 2 */ + 3159 "10110101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 77 +.src_ref 2 "conv2d_bf16_params.h" 520 48 + 3160 "00111010" // ST r22, [sp, #-16]; LSHL r22, r22, r2; MOV m7, #49 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3161 "01011001" // /* MW 9 */ + 3162 "00110001" // /* MW 8 */ + 3163 "10000000" // /* MW 7 */ + 3164 "01101111" // /* MW 6 */ + 3165 "01100001" // /* MW 5 */ + 3166 "00101101" // /* MW 4 */ + 3167 "10110000" // /* MW 3 */ + 3168 "01011010" // /* MW 2 */ + 3169 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 53 +.src_ref 2 "conv2d_bf16_params.h" 507 42 first + 3170 "01100100" // SUB r30, r30, r29; MOV m6, #-63 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3171 "00000101" // /* MW 5 */ + 3172 "00011111" // /* MW 4 */ + 3173 "00111100" // /* MW 3 */ + 3174 "10111010" // /* MW 2 */ + 3175 "11110111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 495 99 first + 3176 "10011000" // SUB r1, r15, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3177 "00010001" // /* MW 3 */ + 3178 "11000010" // /* MW 2 */ + 3179 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 495 96 +.src_ref 2 "conv2d_bf16_params.h" 495 96 +.src_ref 2 "conv2d_bf16_params.h" 610 64 +.src_ref 2 "conv2d_bf16_params.h" 709 96 + 3180 "01100100" // MUL r31, r17, r1; MOV r1, #7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3181 "00011101" // /* MW 5 */ + 3182 "10100000" // /* MW 4 */ + 3183 "11110000" // /* MW 3 */ + 3184 "11000011" // /* MW 2 */ + 3185 "10001111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 507 53 first + 3186 "10011000" // SUB r17, r26, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3187 "00100001" // /* MW 3 */ + 3188 "10100011" // /* MW 2 */ + 3189 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 495 96 first + 3190 "10011000" // LSHL r31, r31, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3191 "00011101" // /* MW 3 */ + 3192 "11111110" // /* MW 2 */ + 3193 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 495 53 +.src_ref 2 "conv2d_bf16_params.h" 506 48 +.src_ref 2 "conv2d_bf16_params.h" 519 42 first + 3194 "00111010" // ST r31, [p2], m5; LSHL r31, r29, r3; MOV m5, #87 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3195 "01011001" // /* MW 9 */ + 3196 "01010111" // /* MW 8 */ + 3197 "10000000" // /* MW 7 */ + 3198 "11101110" // /* MW 6 */ + 3199 "11110001" // /* MW 5 */ + 3200 "00111011" // /* MW 4 */ + 3201 "00110000" // /* MW 3 */ + 3202 "01111110" // /* MW 2 */ + 3203 "01010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 496 68 first +.src_ref 2 "conv2d_bf16_params.h" 504 35 first +.src_ref 2 "conv2d_bf16_params.h" 522 68 + 3204 "10111010" // LDA.u8 r21, [p2], m3; EQ r19, r23, r0; MOV m3, #-78 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3205 "01011000" // /* MW 9 */ + 3206 "10110010" // /* MW 8 */ + 3207 "10000111" // /* MW 7 */ + 3208 "00111101" // /* MW 6 */ + 3209 "00110000" // /* MW 5 */ + 3210 "00101111" // /* MW 4 */ + 3211 "01010000" // /* MW 3 */ + 3212 "01010101" // /* MW 2 */ + 3213 "01001101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 509 50 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3214 "01011100" // ST r19, [sp, #-24]; LSHL r19, r19, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3215 "01111011" // /* MW 5 */ + 3216 "11001100" // /* MW 4 */ + 3217 "10111001" // /* MW 3 */ + 3218 "01001110" // /* MW 2 */ + 3219 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 496 53 first +.src_ref 2 "conv2d_bf16_params.h" 520 19 first +.src_ref 2 "conv2d_bf16_params.h" 529 62 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3220 "10111010" // ST.s8 r21, [p2], m2; OR r22, r31, r22; MOV m2, #246 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3221 "01011000" // /* MW 9 */ + 3222 "11110110" // /* MW 8 */ + 3223 "00000000" // /* MW 7 */ + 3224 "00101101" // /* MW 6 */ + 3225 "01101011" // /* MW 5 */ + 3226 "00111111" // /* MW 4 */ + 3227 "11100000" // /* MW 3 */ + 3228 "01010100" // /* MW 2 */ + 3229 "01001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 497 46 first +.src_ref 2 "conv2d_bf16_params.h" 509 50 first +.src_ref 2 "conv2d_bf16_params.h" 533 44 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3230 "10111010" // LDA.u16 r16, [p2], m1; SEL.EQZ r19, r19, r16, r27; MOV m1, #-176 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3231 "01011000" // /* MW 9 */ + 3232 "01010000" // /* MW 8 */ + 3233 "10000111" // /* MW 7 */ + 3234 "00010000" // /* MW 6 */ + 3235 "00111000" // /* MW 5 */ + 3236 "00100111" // /* MW 4 */ + 3237 "01010000" // /* MW 3 */ + 3238 "01000011" // /* MW 2 */ + 3239 "01000101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 14 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3240 "10011000" // EQ r31, r23, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3241 "01100111" // /* MW 3 */ + 3242 "11111110" // /* MW 2 */ + 3243 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 499 51 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3244 "10011000" // EQ r16, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3245 "01100111" // /* MW 3 */ + 3246 "11100000" // /* MW 2 */ + 3247 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 499 51 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3248 "10011000" // OR r27, r31, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3249 "00000101" // /* MW 3 */ + 3250 "11110111" // /* MW 2 */ + 3251 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 78 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3252 "10011000" // AND r21, r7, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3253 "01010100" // /* MW 3 */ + 3254 "11101011" // /* MW 2 */ + 3255 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 507 53 first +.src_ref 2 "conv2d_bf16_params.h" 511 47 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3256 "01100100" // ASHL r30, r30, r17; MOV r17, #24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3257 "01100001" // /* MW 5 */ + 3258 "10100000" // /* MW 4 */ + 3259 "11011000" // /* MW 3 */ + 3260 "10100011" // /* MW 2 */ + 3261 "11110111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 491 25 first +.src_ref 2 "conv2d_bf16_params.h" 507 34 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3262 "00111010" // ST r16, [sp, #-32]; LSHL r18, r15, r18; ADD.NC r30, r30, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3263 "01001001" // /* MW 9 */ + 3264 "10000000" // /* MW 8 */ + 3265 "11001111" // /* MW 7 */ + 3266 "01101111" // /* MW 6 */ + 3267 "00101001" // /* MW 5 */ + 3268 "00011111" // /* MW 4 */ + 3269 "10110000" // /* MW 3 */ + 3270 "01000010" // /* MW 2 */ + 3271 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 500 53 first +.src_ref 2 "conv2d_bf16_params.h" 511 47 first + 3272 "01011100" // ST r26, [p2], #4; LSHL r17, r30, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3273 "00111011" // /* MW 5 */ + 3274 "01000110" // /* MW 4 */ + 3275 "00111111" // /* MW 3 */ + 3276 "11101010" // /* MW 2 */ + 3277 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 500 53 +.src_ref 2 "conv2d_bf16_params.h" 534 44 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 3278 "00000010" // ST r26, [p2], m4; MOV m4, #168 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3279 "01010000" // /* MW 7 */ + 3280 "10101000" // /* MW 6 */ + 3281 "00000000" // /* MW 5 */ + 3282 "00000010" // /* MW 4 */ + 3283 "00110000" // /* MW 3 */ + 3284 "01101010" // /* MW 2 */ + 3285 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 77 first +.src_ref 2 "conv2d_bf16_params.h" 509 19 first +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 3286 "01110110" // LDA.u8 r18, [p2], m7; ST r31, [sp, #-28]; OR r27, r19, r0; MOV el0, r27 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3287 "01111000" // /* MW 11 */ + 3288 "11001110" // /* MW 10 */ + 3289 "00001101" // /* MW 9 */ + 3290 "00101100" // /* MW 8 */ + 3291 "10110000" // /* MW 7 */ + 3292 "10100111" // /* MW 6 */ + 3293 "11110101" // /* MW 5 */ + 3294 "11100111" // /* MW 4 */ + 3295 "01010111" // /* MW 3 */ + 3296 "01001001" // /* MW 2 */ + 3297 "01011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 19 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3298 "10011000" // OR r17, r27, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3299 "00010101" // /* MW 3 */ + 3300 "11100011" // /* MW 2 */ + 3301 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 506 73 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3302 "10011000" // SUB r27, r26, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3303 "10000001" // /* MW 3 */ + 3304 "10110111" // /* MW 2 */ + 3305 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 527 47 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3306 "00011000" // EXTEND.u8 r24, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3307 "10010000" // /* MW 3 */ + 3308 "10110000" // /* MW 2 */ + 3309 "00010100" // /* MW 1 */ + 3310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3311 "00000000" // /* MW 1 */ + 3312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3313 "00000000" // /* MW 1 */ + 3314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3315 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 57 first + 3316 "10011000" // SUB r18, r15, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3317 "00100001" // /* MW 3 */ + 3318 "11100101" // /* MW 2 */ + 3319 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 53 + 3320 "10011000" // ST r18, [p2], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3321 "01010001" // /* MW 3 */ + 3322 "11001010" // /* MW 2 */ + 3323 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 506 48 first + 3324 "10011000" // LDA.u8 r18, [p2], m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3325 "01001010" // /* MW 3 */ + 3326 "10101010" // /* MW 2 */ + 3327 "00000010" // /* MW 1 */ + 3328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3329 "00000000" // /* MW 1 */ + 3330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3331 "00000000" // /* MW 1 */ + 3332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3333 "00000000" // /* MW 1 */ + 3334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3335 "00000000" // /* MW 1 */ + 3336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3337 "00000000" // /* MW 1 */ + 3338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3339 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 506 62 + 3340 "10011000" // SUB r18, r18, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3341 "11100001" // /* MW 3 */ + 3342 "10100100" // /* MW 2 */ + 3343 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 506 73 + 3344 "10011000" // ASHL r18, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3345 "10111110" // /* MW 3 */ + 3346 "10100101" // /* MW 2 */ + 3347 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 first + 3348 "10011000" // LSHL r18, r18, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3349 "00101101" // /* MW 3 */ + 3350 "10100100" // /* MW 2 */ + 3351 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 + 3352 "01000100" // MOVXM r27, #65536 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3353 "00000000" // /* MW 5 */ + 3354 "10100000" // /* MW 4 */ + 3355 "00001101" // /* MW 3 */ + 3356 "00000001" // /* MW 2 */ + 3357 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 + 3358 "10011000" // ADD r18, r27, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3359 "00100000" // /* MW 3 */ + 3360 "11100101" // /* MW 2 */ + 3361 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 + 3362 "01000100" // MOVXM r27, #16711680 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3363 "00000000" // /* MW 5 */ + 3364 "10100000" // /* MW 4 */ + 3365 "00001101" // /* MW 3 */ + 3366 "11111111" // /* MW 2 */ + 3367 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 +.src_ref 2 "conv2d_bf16_params.h" 539 14 +.src_ref 2 "conv2d_bf16_params.h" 642 99 + 3368 "01100100" // AND r27, r27, r18; MOV r18, #-16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3369 "11000001" // /* MW 5 */ + 3370 "00111111" // /* MW 4 */ + 3371 "10011001" // /* MW 3 */ + 3372 "11100100" // /* MW 2 */ + 3373 "11011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 511 19 first +.src_ref 2 "conv2d_bf16_params.h" 524 122 +.src_ref 2 "conv2d_bf16_params.h" 539 14 + 3374 "01100100" // OR r27, r27, r17; MOV r17, #-8 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3375 "11100001" // /* MW 5 */ + 3376 "10111111" // /* MW 4 */ + 3377 "10111000" // /* MW 3 */ + 3378 "11100010" // /* MW 2 */ + 3379 "11011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 512 64 first +.src_ref 2 "conv2d_bf16_params.h" 524 122 first + 3380 "01011100" // ST r27, [p2], #4; LSHL r19, r19, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3381 "00111011" // /* MW 5 */ + 3382 "11001110" // /* MW 4 */ + 3383 "00111001" // /* MW 3 */ + 3384 "11101110" // /* MW 2 */ + 3385 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 524 122 + 3386 "10011000" // SUB r26, r26, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3387 "00110001" // /* MW 3 */ + 3388 "10110101" // /* MW 2 */ + 3389 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 524 122 + 3390 "10011000" // LSHL r20, r20, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3391 "10101101" // /* MW 3 */ + 3392 "00101001" // /* MW 2 */ + 3393 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 520 19 first + 3394 "10011000" // OR r26, r14, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3395 "01100101" // /* MW 3 */ + 3396 "10110101" // /* MW 2 */ + 3397 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 511 36 first +.src_ref 2 "conv2d_bf16_params.h" 522 68 first + 3398 "01011100" // ST r26, [p2], m3; EXTEND.u8 r26, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3399 "00100000" // /* MW 5 */ + 3400 "01101001" // /* MW 4 */ + 3401 "00111111" // /* MW 3 */ + 3402 "01101010" // /* MW 2 */ + 3403 "01001101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 524 65 first +.src_ref 2 "conv2d_bf16_params.h" 529 62 first +.src_ref 2 "conv2d_bf16_params.h" 539 14 first + 3404 "10111010" // LDA.u8 r25, [p2], m2; LSHL r20, r27, r18; ADD.NC r30, r26, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3405 "10101000" // /* MW 9 */ + 3406 "10101000" // /* MW 8 */ + 3407 "11001110" // /* MW 7 */ + 3408 "01101111" // /* MW 6 */ + 3409 "01001001" // /* MW 5 */ + 3410 "00110111" // /* MW 4 */ + 3411 "01010000" // /* MW 3 */ + 3412 "01100101" // /* MW 2 */ + 3413 "01001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 534 93 +.src_ref 2 "conv2d_bf16_params.h" 539 14 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 3414 "01100100" // LSHL r22, r22, r17; MOV r17, #254 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3415 "11111001" // /* MW 5 */ + 3416 "10100011" // /* MW 4 */ + 3417 "10111000" // /* MW 3 */ + 3418 "10100011" // /* MW 2 */ + 3419 "10110101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 527 45 first +.src_ref 2 "conv2d_bf16_params.h" 533 44 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 3420 "00101100" // ST.s8 r25, [p2], m1; MUL r26, r26, r24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3421 "00011111" // /* MW 5 */ + 3422 "01101011" // /* MW 4 */ + 3423 "11101101" // /* MW 3 */ + 3424 "01100100" // /* MW 2 */ + 3425 "01000101" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3427 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3428 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3429 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3431 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3433 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 48 first +.src_ref 2 "conv2d_bf16_params.h" 533 46 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3434 "10100100" // LSHL r25, r16, r15; ADD.NC r27, r21, r25 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3435 "11001010" // /* MW 5 */ + 3436 "10110101" // /* MW 4 */ + 3437 "10111101" // /* MW 3 */ + 3438 "01011111" // /* MW 2 */ + 3439 "10000110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 14 first + 3440 "10000100" // JNZ r31, #3568 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3568 delay_slots=5 */ + 3441 "00000001" // /* MW 5 */ + 3442 "01000000" // /* MW 4 */ + 3443 "11111000" // /* MW 3 */ + 3444 "00000110" // /* MW 2 */ + 3445 "11111000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 76 first +.src_ref 2 "conv2d_bf16_params.h" 529 122 +.delay_slot + 3446 "10100100" // ADD r21, r19, #3; ADD.NC r27, r27, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3447 "11110010" // /* MW 5 */ + 3448 "10111011" // /* MW 4 */ + 3449 "11101101" // /* MW 3 */ + 3450 "01000001" // /* MW 2 */ + 3451 "10011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 122 +.delay_slot + 3452 "10011000" // LSHL r21, r27, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3453 "01011101" // /* MW 3 */ + 3454 "11101011" // /* MW 2 */ + 3455 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 534 93 first +.delay_slot + 3456 "10011000" // AND r17, r25, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3457 "00010100" // /* MW 3 */ + 3458 "01100011" // /* MW 2 */ + 3459 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 534 44 +.src_ref 2 "conv2d_bf16_params.h" 539 139 first +.src_ref 2 "conv2d_bf16_params.h" 555 59 +.src_ref 2 "conv2d_bf16_params.h" 559 59 +.src_ref 2 "conv2d_bf16_params.h" 700 17 +.delay_slot + 3460 "00111010" // ST r17, [p2], m4; EQ r27, r6, r28; MOV r17, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3461 "01011001" // /* MW 9 */ + 3462 "00000001" // /* MW 8 */ + 3463 "00101000" // /* MW 7 */ + 3464 "00111110" // /* MW 6 */ + 3465 "10111110" // /* MW 5 */ + 3466 "00001101" // /* MW 4 */ + 3467 "00110000" // /* MW 3 */ + 3468 "01000110" // /* MW 2 */ + 3469 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 669 63 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.delay_slot + 3470 "11111000" // MOV el1, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3471 "10011100" // /* MW 3 */ + 3472 "10011011" // /* MW 2 */ + 3473 "00011000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 14 + 3474 "00011000" // LDA r28, [sp, #-32] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3475 "10010001" // /* MW 3 */ + 3476 "11100011" // /* MW 2 */ + 3477 "00000111" // /* MW 1 */ + 3478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3479 "00000000" // /* MW 1 */ + 3480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3481 "00000000" // /* MW 1 */ + 3482 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3483 "00000000" // /* MW 1 */ + 3484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3485 "00000000" // /* MW 1 */ + 3486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3487 "00000000" // /* MW 1 */ + 3488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3489 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 14 + 3490 "10000100" // JNZ r28, #3568 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3568 delay_slots=5 */ + 3491 "00000001" // /* MW 5 */ + 3492 "01000000" // /* MW 4 */ + 3493 "11111000" // /* MW 3 */ + 3494 "00000110" // /* MW 2 */ + 3495 "11100000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3497 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3499 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3501 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3503 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3505 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 115 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 539 162 + 3506 "10111010" // MOVA r28, #5; MOVX r17, #4; MOV r25, #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3507 "01011000" // /* MW 9 */ + 3508 "01000000" // /* MW 8 */ + 3509 "00101000" // /* MW 7 */ + 3510 "10001011" // /* MW 6 */ + 3511 "00010000" // /* MW 5 */ + 3512 "00000001" // /* MW 4 */ + 3513 "00000000" // /* MW 3 */ + 3514 "10111100" // /* MW 2 */ + 3515 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 139 + 3516 "00011000" // SEL.EQZ r31, r17, r13, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3517 "11010010" // /* MW 3 */ + 3518 "01111110" // /* MW 2 */ + 3519 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 162 + 3520 "10011000" // EQ r27, r25, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3521 "01100111" // /* MW 3 */ + 3522 "01110110" // /* MW 2 */ + 3523 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 135 +.src_ref 2 "conv2d_bf16_params.h" 539 139 + 3524 "01100100" // SEL.EQZ r28, r31, r28, r27; MOV r31, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3525 "00000001" // /* MW 5 */ + 3526 "10100000" // /* MW 4 */ + 3527 "01001111" // /* MW 3 */ + 3528 "00111000" // /* MW 2 */ + 3529 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 46 + 3530 "00011000" // EXTEND.s8 r25, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3531 "01010000" // /* MW 3 */ + 3532 "00110010" // /* MW 2 */ + 3533 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 44 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 3534 "10011000" // MUL r30, r25, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3535 "11101111" // /* MW 3 */ + 3536 "01111101" // /* MW 2 */ + 3537 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 115 +.src_ref 2 "conv2d_bf16_params.h" 669 63 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 3538 "11100100" // LT r27, r25, r17; MOV r27, el1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3539 "00111001" // /* MW 5 */ + 3540 "11000100" // /* MW 4 */ + 3541 "01011101" // /* MW 3 */ + 3542 "11100011" // /* MW 2 */ + 3543 "11001110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 82 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3544 "00011000" // SEL.EQZ r17, r15, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3545 "10000010" // /* MW 3 */ + 3546 "11100011" // /* MW 2 */ + 3547 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 79 + 3548 "10011000" // MUL r17, r17, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3549 "11101111" // /* MW 3 */ + 3550 "01100011" // /* MW 2 */ + 3551 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 135 + 3552 "10011000" // SUB r28, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3553 "11000001" // /* MW 3 */ + 3554 "11111001" // /* MW 2 */ + 3555 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 135 + 3556 "10011000" // ASHL r17, r17, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3557 "11001110" // /* MW 3 */ + 3558 "01100011" // /* MW 2 */ + 3559 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 555 55 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 3560 "00100010" // EXTEND.u8 r17, r17; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3561 "00011100" // /* MW 7 */ + 3562 "00000000" // /* MW 6 */ + 3563 "00000000" // /* MW 5 */ + 3564 "10000001" // /* MW 4 */ + 3565 "00010100" // /* MW 3 */ + 3566 "00100011" // /* MW 2 */ + 3567 "00000000" // /* MW 1 */ +.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_816 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 669 63 +.src_ref 2 "conv2d_bf16_params.h" 669 63 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 3568 "10111010" // MOVA r25, #0; MOVX r28, #-1; MOV r27, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3569 "01111000" // /* MW 9 */ + 3570 "00001110" // /* MW 8 */ + 3571 "01110000" // /* MW 7 */ + 3572 "11101011" // /* MW 6 */ + 3573 "11000111" // /* MW 5 */ + 3574 "00111111" // /* MW 4 */ + 3575 "00000000" // /* MW 3 */ + 3576 "00011001" // /* MW 2 */ + 3577 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 669 63 first +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3578 "00011000" // SEL.EQZ r31, r25, r28, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3579 "11000010" // /* MW 3 */ + 3580 "01111111" // /* MW 2 */ + 3581 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 497 34 first +.src_ref 2 "conv2d_bf16_params.h" 641 32 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 3582 "10111010" // LDA r27, [sp, #-24]; EXTEND.u8 r16, r16; ADD.NC r26, r29, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3583 "10101000" // /* MW 9 */ + 3584 "01110100" // /* MW 8 */ + 3585 "01001111" // /* MW 7 */ + 3586 "10000011" // /* MW 6 */ + 3587 "00000100" // /* MW 5 */ + 3588 "00100001" // /* MW 4 */ + 3589 "00100000" // /* MW 3 */ + 3590 "01101110" // /* MW 2 */ + 3591 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 559 61 first +.src_ref 2 "conv2d_bf16_params.h" 640 16 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3592 "10111010" // MOVA r30, #72; EXTEND.u8 r20, r20; MOV r29, #9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3593 "01011000" // /* MW 9 */ + 3594 "00001001" // /* MW 8 */ + 3595 "10101000" // /* MW 7 */ + 3596 "10000011" // /* MW 6 */ + 3597 "01000100" // /* MW 5 */ + 3598 "00101001" // /* MW 4 */ + 3599 "00000000" // /* MW 3 */ + 3600 "00011110" // /* MW 2 */ + 3601 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 25 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3602 "00011000" // SEL.EQZ r25, r29, r30, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3603 "11100010" // /* MW 3 */ + 3604 "01110011" // /* MW 2 */ + 3605 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 578 47 first + 3606 "10011000" // NE r28, r23, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3607 "10001000" // /* MW 3 */ + 3608 "11111001" // /* MW 2 */ + 3609 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 640 16 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 3610 "10011000" // LSHL r29, r29, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3611 "00111101" // /* MW 3 */ + 3612 "01111011" // /* MW 2 */ + 3613 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 557 34 +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.aggressive_scheduled_block_id 6 +.noswbrkpt + 3614 "10111010" // LDA r23, [sp, #-20]; MOVXM r24, #1032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3615 "00010000" // /* MW 9 */ + 3616 "00000100" // /* MW 8 */ + 3617 "00001010" // /* MW 7 */ + 3618 "00000011" // /* MW 6 */ + 3619 "00000000" // /* MW 5 */ + 3620 "00000000" // /* MW 4 */ + 3621 "00100000" // /* MW 3 */ + 3622 "11011110" // /* MW 2 */ + 3623 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 641 44 first +.src_ref 2 "conv2d_bf16_params.h" 642 45 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 3624 "00100100" // LSHL r19, r25, r19; ADD.NC r30, r26, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3625 "11111111" // /* MW 5 */ + 3626 "00111010" // /* MW 4 */ + 3627 "10111111" // /* MW 3 */ + 3628 "11100111" // /* MW 2 */ + 3629 "11001100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 20 +.src_ref 2 "conv2d_bf16_params.h" 642 87 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 3630 "00011000" // MAC r7, r7, r19, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3631 "11100110" // /* MW 3 */ + 3632 "11001111" // /* MW 2 */ + 3633 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 55 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 3634 "01100100" // EXTEND.u8 r19, r22; MOV r23, #522 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3635 "00101001" // /* MW 5 */ + 3636 "10101000" // /* MW 4 */ + 3637 "00001011" // /* MW 3 */ + 3638 "11010010" // /* MW 2 */ + 3639 "10110100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 540 38 +.src_ref 2 "conv2d_bf16_params.h" 645 41 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3640 "01100100" // SEL.EQZ r22, r23, r24, r27; MOV r26, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3641 "00000001" // /* MW 5 */ + 3642 "00100001" // /* MW 4 */ + 3643 "01001101" // /* MW 3 */ + 3644 "10110000" // /* MW 2 */ + 3645 "10111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 540 38 first +.src_ref 2 "conv2d_bf16_params.h" 557 34 + 3646 "11100100" // NE r6, r6, r26; MOV r27, eh0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3647 "00111001" // /* MW 5 */ + 3648 "11000010" // /* MW 4 */ + 3649 "00011101" // /* MW 3 */ + 3650 "10110101" // /* MW 2 */ + 3651 "00110001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 99 first + 3652 "10011000" // AND r7, r7, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3653 "00100100" // /* MW 3 */ + 3654 "11001111" // /* MW 2 */ + 3655 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 557 34 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 + 3656 "11100100" // SEL.EQZ r23, r23, r15, r27; MOV r27, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3657 "01000001" // /* MW 5 */ + 3658 "10100110" // /* MW 4 */ + 3659 "01001101" // /* MW 3 */ + 3660 "11011110" // /* MW 2 */ + 3661 "10111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 119 +.src_ref 2 "conv2d_bf16_params.h" 655 23 first + 3662 "01100100" // SEL.EQZ r4, r5, r4, r27; MOV r18, #31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3663 "01111101" // /* MW 5 */ + 3664 "00100000" // /* MW 4 */ + 3665 "01001001" // /* MW 3 */ + 3666 "00001000" // /* MW 2 */ + 3667 "00101001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 119 first + 3668 "10011000" // AND r23, r23, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3669 "00100100" // /* MW 3 */ + 3670 "11101111" // /* MW 2 */ + 3671 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 540 15 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 + 3672 "10111010" // MOVA r30, #-288; LSHL r4, r16, r4; MOV r18, #-144 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3673 "01011000" // /* MW 9 */ + 3674 "01110000" // /* MW 8 */ + 3675 "01001111" // /* MW 7 */ + 3676 "01101110" // /* MW 6 */ + 3677 "01000010" // /* MW 5 */ + 3678 "00100000" // /* MW 4 */ + 3679 "00000000" // /* MW 3 */ + 3680 "00011110" // /* MW 2 */ + 3681 "11011100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first + 3682 "00011000" // SEL.EQZ r30, r30, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3683 "00100010" // /* MW 3 */ + 3684 "10111101" // /* MW 2 */ + 3685 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 85 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 + 3686 "10111010" // MOVA r5, #144; MUL r26, r23, r19; MOV r16, #288 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3687 "01011000" // /* MW 9 */ + 3688 "00100000" // /* MW 8 */ + 3689 "00001001" // /* MW 7 */ + 3690 "11111110" // /* MW 6 */ + 3691 "10101001" // /* MW 5 */ + 3692 "00101111" // /* MW 4 */ + 3693 "00000000" // /* MW 3 */ + 3694 "00000101" // /* MW 2 */ + 3695 "00010010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first + 3696 "00011000" // SEL.EQZ r16, r16, r5, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3697 "01010010" // /* MW 3 */ + 3698 "00100000" // /* MW 2 */ + 3699 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 126 21 first +.src_ref 2 "conv2d_bf16_params.h" 559 59 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 3700 "10100100" // MUL r24, r17, r4; ADD.NC r27, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3701 "11110010" // /* MW 5 */ + 3702 "10111101" // /* MW 4 */ + 3703 "11111101" // /* MW 3 */ + 3704 "00001001" // /* MW 2 */ + 3705 "10001110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 669 41 first +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.aggressive_scheduled_block_id 7 +.noswbrkpt + 3706 "11100100" // LSHL r16, r16, r31; MOV r27, el1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3707 "00111001" // /* MW 5 */ + 3708 "11000100" // /* MW 4 */ + 3709 "10111101" // /* MW 3 */ + 3710 "00111111" // /* MW 2 */ + 3711 "10000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 117 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3712 "01011100" // ST r27, [sp, #-36]; MUL r26, r14, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3713 "01011111" // /* MW 5 */ + 3714 "01101011" // /* MW 4 */ + 3715 "10110111" // /* MW 3 */ + 3716 "11101110" // /* MW 2 */ + 3717 "11111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 700 34 first + 3718 "00011000" // SEL.EQZ r2, r2, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3719 "00110010" // /* MW 3 */ + 3720 "10000100" // /* MW 2 */ + 3721 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 578 52 first + 3722 "10011000" // LTU r31, r13, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3723 "00001100" // /* MW 3 */ + 3724 "01111110" // /* MW 2 */ + 3725 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 559 92 first + 3726 "10011000" // MUL r24, r20, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3727 "10001111" // /* MW 3 */ + 3728 "00110001" // /* MW 2 */ + 3729 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 578 36 first + 3730 "10011000" // OR r27, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3731 "11000101" // /* MW 3 */ + 3732 "11110111" // /* MW 2 */ + 3733 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 610 64 first +.src_ref 2 "conv2d_bf16_params.h" 611 47 +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 629 82 + 3734 "01110110" // MOVA r3, #128; ST r20, [sp, #-20]; LSHL r28, r27, r1; MOV r20, #256 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3735 "01011000" // /* MW 11 */ + 3736 "00000000" // /* MW 10 */ + 3737 "10001001" // /* MW 9 */ + 3738 "11101110" // /* MW 8 */ + 3739 "11000000" // /* MW 7 */ + 3740 "10110111" // /* MW 6 */ + 3741 "10010101" // /* MW 5 */ + 3742 "11101110" // /* MW 4 */ + 3743 "00000111" // /* MW 3 */ + 3744 "00000011" // /* MW 2 */ + 3745 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 621 156 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.src_ref 2 "conv2d_bf16_params.h" 649 41 + 3746 "11100100" // SEL.EQZ r20, r3, r20, r27; MOV eh0, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3747 "00111001" // /* MW 5 */ + 3748 "10110111" // /* MW 4 */ + 3749 "01000000" // /* MW 3 */ + 3750 "00101000" // /* MW 2 */ + 3751 "00011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 645 41 + 3752 "01000100" // MOVXM r31, #1542 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3753 "00001100" // /* MW 5 */ + 3754 "10101100" // /* MW 4 */ + 3755 "00001111" // /* MW 3 */ + 3756 "00000000" // /* MW 2 */ + 3757 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 554 60 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 first + 3758 "00111010" // ST r4, [sp, #-24]; EQ r27, r15, r0; ADD.NC r4, r4, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3759 "11001001" // /* MW 9 */ + 3760 "00111111" // /* MW 8 */ + 3761 "10001001" // /* MW 7 */ + 3762 "00111100" // /* MW 6 */ + 3763 "10110000" // /* MW 5 */ + 3764 "00011111" // /* MW 4 */ + 3765 "10110000" // /* MW 3 */ + 3766 "00010010" // /* MW 2 */ + 3767 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 554 53 +.src_ref 2 "conv2d_bf16_params.h" 555 53 +.src_ref 2 "conv2d_bf16_params.h" 555 59 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 + 3768 "01110110" // MOVA m3, #-148; ST r4, [p2], #4; SEL.EQZ r31, r22, r31, r27; ADD.NC r22, r17, #-1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3769 "11001000" // /* MW 11 */ + 3770 "01111111" // /* MW 10 */ + 3771 "11001100" // /* MW 9 */ + 3772 "10010010" // /* MW 8 */ + 3773 "11111111" // /* MW 7 */ + 3774 "10101101" // /* MW 6 */ + 3775 "10010001" // /* MW 5 */ + 3776 "00011100" // /* MW 4 */ + 3777 "10000010" // /* MW 3 */ + 3778 "10001100" // /* MW 2 */ + 3779 "11101101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 555 53 +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 621 240 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 + 3780 "00111010" // ST r22, [p2], m3; LSHL r21, r21, r15; MOV r27, eh0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3781 "01111001" // /* MW 9 */ + 3782 "10001110" // /* MW 8 */ + 3783 "01110000" // /* MW 7 */ + 3784 "11101111" // /* MW 6 */ + 3785 "01010111" // /* MW 5 */ + 3786 "00101011" // /* MW 4 */ + 3787 "00110000" // /* MW 3 */ + 3788 "01011010" // /* MW 2 */ + 3789 "01001101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 53 first +.src_ref 2 "conv2d_bf16_params.h" 559 53 +.src_ref 2 "conv2d_bf16_params.h" 621 140 +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 645 41 + 3790 "01110110" // MOVA r25, #22; ST r26, [p2], #4; SUB r20, r20, r28; MOV m4, #88 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3791 "01011000" // /* MW 11 */ + 3792 "01011000" // /* MW 10 */ + 3793 "00000000" // /* MW 9 */ + 3794 "00001110" // /* MW 8 */ + 3795 "01001110" // /* MW 7 */ + 3796 "10101001" // /* MW 6 */ + 3797 "01010001" // /* MW 5 */ + 3798 "00011111" // /* MW 4 */ + 3799 "00000010" // /* MW 3 */ + 3800 "11011001" // /* MW 2 */ + 3801 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 559 53 first +.src_ref 2 "conv2d_bf16_params.h" 621 156 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 first +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 3802 "01011100" // ST r24, [p2], m4; SEL.EQZ r24, r31, r25, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3803 "00100100" // /* MW 5 */ + 3804 "11100011" // /* MW 4 */ + 3805 "00111111" // /* MW 3 */ + 3806 "01100010" // /* MW 2 */ + 3807 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 610 47 first +.src_ref 2 "conv2d_bf16_params.h" 621 222 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 +.aggressive_scheduled_block_id 8 +.noswbrkpt + 3808 "01110110" // LDA r27, [sp, #-32]; ST r28, [p2], #-8; SUB r28, r21, r28; MOV r27, r6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3809 "01111000" // /* MW 11 */ + 3810 "10010000" // /* MW 10 */ + 3811 "01101001" // /* MW 9 */ + 3812 "00001111" // /* MW 8 */ + 3813 "11001110" // /* MW 7 */ + 3814 "10101011" // /* MW 6 */ + 3815 "10010001" // /* MW 5 */ + 3816 "11101111" // /* MW 4 */ + 3817 "00100010" // /* MW 3 */ + 3818 "01101110" // /* MW 2 */ + 3819 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 649 41 +.src_ref 2 "conv2d_bf16_params.h" 655 23 first +.src_ref 2 "conv2d_bf16_params.h" 661 61 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3820 "10111010" // MOVA r19, #279; SEL.EQZ r28, r20, r28, r27; ADD.NC r20, r19, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3821 "11001000" // /* MW 9 */ + 3822 "11111111" // /* MW 8 */ + 3823 "10001100" // /* MW 7 */ + 3824 "00010010" // /* MW 6 */ + 3825 "11001110" // /* MW 5 */ + 3826 "00101001" // /* MW 4 */ + 3827 "00000000" // /* MW 3 */ + 3828 "11110011" // /* MW 2 */ + 3829 "00100010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 127 19 first +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 649 41 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 +.src_ref 2 "conv2d_bf16_params.h" 710 60 +.src_ref 2 "conv2d_bf16_params.h" 710 65 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3830 "10111010" // MOVA r29, #-72; MSC r30, r30, r29, r20; MOV r27, eh0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3831 "01111000" // /* MW 9 */ + 3832 "10001110" // /* MW 8 */ + 3833 "01110000" // /* MW 7 */ + 3834 "01110011" // /* MW 6 */ + 3835 "11101010" // /* MW 5 */ + 3836 "00111011" // /* MW 4 */ + 3837 "00000000" // /* MW 3 */ + 3838 "00011101" // /* MW 2 */ + 3839 "11110111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first +.src_ref 2 "conv2d_bf16_params.h" 679 23 first +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3840 "00101100" // LDA r27, [sp, #-28]; SEL.EQZ r18, r29, r18, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3841 "01000100" // /* MW 5 */ + 3842 "11001010" // /* MW 4 */ + 3843 "00101110" // /* MW 3 */ + 3844 "11101110" // /* MW 2 */ + 3845 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 621 156 first +.src_ref 2 "conv2d_bf16_params.h" 649 41 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3846 "10111010" // MOVA r31, #32; SEL.EQZ r19, r31, r19, r27; MOV r27, r6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3847 "01111000" // /* MW 9 */ + 3848 "10010000" // /* MW 8 */ + 3849 "01101001" // /* MW 7 */ + 3850 "10010011" // /* MW 6 */ + 3851 "00111001" // /* MW 5 */ + 3852 "00111111" // /* MW 4 */ + 3853 "00000000" // /* MW 3 */ + 3854 "00011111" // /* MW 2 */ + 3855 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first +.src_ref 2 "conv2d_bf16_params.h" 700 34 first +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3856 "00011000" // SEL.EQZ r2, r31, r2, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3857 "00100010" // /* MW 3 */ + 3858 "11000100" // /* MW 2 */ + 3859 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 629 82 first +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3860 "10011000" // SUB r21, r3, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3861 "01010001" // /* MW 3 */ + 3862 "11101011" // /* MW 2 */ + 3863 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 611 47 first +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3864 "00111010" // ST r3, [p2], #12; SEL.EQZ r2, r2, r15, r27; MOV r3, #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3865 "01011001" // /* MW 9 */ + 3866 "11000000" // /* MW 8 */ + 3867 "01101111" // /* MW 7 */ + 3868 "10010000" // /* MW 6 */ + 3869 "00100111" // /* MW 5 */ + 3870 "00000100" // /* MW 4 */ + 3871 "00110000" // /* MW 3 */ + 3872 "10001110" // /* MW 2 */ + 3873 "01000111" // /* MW 1 */ +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3874 "00011000" // SEL.EQZ r28, r28, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3875 "00110010" // /* MW 3 */ + 3876 "00111000" // /* MW 2 */ + 3877 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 643 22 first +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id first + 3878 "10011000" // MUL r31, r23, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3879 "01111111" // /* MW 3 */ + 3880 "11111110" // /* MW 2 */ + 3881 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.aggressive_scheduled_block_id 9 +.noswbrkpt + 3882 "00101100" // LDA r17, [sp, #-36]; SEL.EQZ r3, r28, r3, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3883 "01100100" // /* MW 5 */ + 3884 "00001100" // /* MW 4 */ + 3885 "00101110" // /* MW 3 */ + 3886 "11000110" // /* MW 2 */ + 3887 "11111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 621 47 first +.src_ref 2 "conv2d_bf16_params.h" 629 45 +.src_ref 2 "conv2d_bf16_params.h" 684 30 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3888 "00111010" // ST r3, [p2], #-8; MUL r18, r26, r18; MOV m1, #40 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3889 "01011001" // /* MW 9 */ + 3890 "00101000" // /* MW 8 */ + 3891 "10000000" // /* MW 7 */ + 3892 "01111100" // /* MW 6 */ + 3893 "00101001" // /* MW 5 */ + 3894 "00110101" // /* MW 4 */ + 3895 "00110000" // /* MW 3 */ + 3896 "10001110" // /* MW 2 */ + 3897 "01011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 629 45 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3898 "01011100" // ST r21, [p2], m1; SEL.EQZ r3, r2, r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3899 "11100100" // /* MW 5 */ + 3900 "00001101" // /* MW 4 */ + 3901 "00110001" // /* MW 3 */ + 3902 "01010110" // /* MW 2 */ + 3903 "01000101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 644 22 +.src_ref 2 "conv2d_bf16_params.h" 700 17 first +.src_ref 2 "conv2d_bf16_params.h" 705 50 +.src_ref 2 "conv2d_bf16_params.h" 705 61 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3904 "10111010" // LDA r0, [sp, #-16]; MUL r3, r3, r17; ADD.NC r21, r7, r30 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3905 "10101000" // /* MW 9 */ + 3906 "11111100" // /* MW 8 */ + 3907 "10101001" // /* MW 7 */ + 3908 "11111110" // /* MW 6 */ + 3909 "00111000" // /* MW 5 */ + 3910 "00000110" // /* MW 4 */ + 3911 "00100000" // /* MW 3 */ + 3912 "00000010" // /* MW 2 */ + 3913 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 645 38 first +.src_ref 2 "conv2d_bf16_params.h" 700 111 +.src_ref 2 "conv2d_bf16_params.h" 700 149 +.src_ref 2 "conv2d_bf16_params.h" 705 45 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3914 "01111010" // LDA r17, [sp, #-20]; ST r24, [p2], #4; MAC r3, r3, r20, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3915 "00000110" // /* MW 9 */ + 3916 "00000110" // /* MW 8 */ + 3917 "00000101" // /* MW 7 */ + 3918 "10000000" // /* MW 6 */ + 3919 "00010001" // /* MW 5 */ + 3920 "00011111" // /* MW 4 */ + 3921 "00100010" // /* MW 3 */ + 3922 "11000110" // /* MW 2 */ + 3923 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 644 14 +.src_ref 2 "conv2d_bf16_params.h" 649 38 first +.src_ref 2 "conv2d_bf16_params.h" 674 24 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3924 "00111010" // ST r19, [p2], #28; MOVXM r19, #65520 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3925 "00010001" // /* MW 9 */ + 3926 "11111000" // /* MW 8 */ + 3927 "01101111" // /* MW 7 */ + 3928 "00111110" // /* MW 6 */ + 3929 "00000000" // /* MW 5 */ + 3930 "00000000" // /* MW 4 */ + 3931 "00110000" // /* MW 3 */ + 3932 "11001110" // /* MW 2 */ + 3933 "01001111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 644 14 first +.src_ref 2 "conv2d_bf16_params.h" 662 61 +.src_ref 2 "conv2d_bf16_params.h" 664 38 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 3934 "00111010" // ST r20, [p2], #4; AND r20, r31, r19; ADD.NC r2, r14, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3935 "11001001" // /* MW 9 */ + 3936 "10111111" // /* MW 8 */ + 3937 "01001011" // /* MW 7 */ + 3938 "10100100" // /* MW 6 */ + 3939 "01001001" // /* MW 5 */ + 3940 "00111111" // /* MW 4 */ + 3941 "00110000" // /* MW 3 */ + 3942 "11010010" // /* MW 2 */ + 3943 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 19 first +.src_ref 2 "conv2d_bf16_params.h" 663 22 first +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3944 "01011100" // ST r17, [p2], #4; MSC r21, r21, r2, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3945 "10011100" // /* MW 5 */ + 3946 "01010110" // /* MW 4 */ + 3947 "00110001" // /* MW 3 */ + 3948 "11000110" // /* MW 2 */ + 3949 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 126 21 first +.src_ref 2 "conv2d_bf16_params.h" 664 38 first + 3950 "01011100" // ST r2, [p2], #4; ADD r30, r30, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3951 "10000001" // /* MW 5 */ + 3952 "01111010" // /* MW 4 */ + 3953 "00111111" // /* MW 3 */ + 3954 "10001010" // /* MW 2 */ + 3955 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 126 21 +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id first + 3956 "01011100" // ST r30, [p2], #4; SUB r28, r16, r31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3957 "11100011" // /* MW 5 */ + 3958 "01110011" // /* MW 4 */ + 3959 "00111000" // /* MW 3 */ + 3960 "11111010" // /* MW 2 */ + 3961 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 127 19 first +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.src_ref 2 "conv2d_bf16_params.h" 675 38 +.src_ref 2 "conv2d_bf16_params.h" 696 37 +.aggressive_scheduled_block_id 10 +.noswbrkpt + 3962 "00111010" // ST r21, [p2], #4; MAC r31, r31, r22, r16; MOV dc0, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3963 "01011001" // /* MW 9 */ + 3964 "00000000" // /* MW 8 */ + 3965 "01100000" // /* MW 7 */ + 3966 "00110000" // /* MW 6 */ + 3967 "11111000" // /* MW 5 */ + 3968 "00101101" // /* MW 4 */ + 3969 "00110000" // /* MW 3 */ + 3970 "11010110" // /* MW 2 */ + 3971 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 644 22 first +.src_ref 2 "conv2d_bf16_params.h" 664 38 first +.src_ref 2 "conv2d_bf16_params.h" 705 45 +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3972 "00111010" // ST dc0, [p2], #4; MUL r2, r31, r0; ADD.NC r17, r17, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3973 "11001001" // /* MW 9 */ + 3974 "01111111" // /* MW 8 */ + 3975 "00101100" // /* MW 7 */ + 3976 "01111110" // /* MW 6 */ + 3977 "00100000" // /* MW 5 */ + 3978 "00111110" // /* MW 4 */ + 3979 "00110000" // /* MW 3 */ + 3980 "10001100" // /* MW 2 */ + 3981 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.src_ref 2 "conv2d_bf16_params.h" 705 50 first +.src_ref 2 "conv2d_bf16_params.h" 705 61 first + 3982 "01011100" // ST dc0, [p2], #4; MAC r14, r14, r17, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3983 "00001100" // /* MW 5 */ + 3984 "10111000" // /* MW 4 */ + 3985 "00111000" // /* MW 3 */ + 3986 "10001100" // /* MW 2 */ + 3987 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 +.src_ref 2 "conv2d_bf16_params.h" 674 24 first +.src_ref 2 "conv2d_bf16_params.h" 675 38 first +.src_ref 2 "conv2d_bf16_params.h" 682 38 +.src_ref 2 "conv2d_bf16_params.h" 718 48 +.src_ref 2 "conv2d_bf16_params.h" 720 50 + 3988 "00111010" // ST r22, [p2], #4; AND r16, r19, r2; MOV r2, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3989 "01011001" // /* MW 9 */ + 3990 "00000000" // /* MW 8 */ + 3991 "01001000" // /* MW 7 */ + 3992 "00100100" // /* MW 6 */ + 3993 "00000001" // /* MW 5 */ + 3994 "00100111" // /* MW 4 */ + 3995 "00110000" // /* MW 3 */ + 3996 "11011010" // /* MW 2 */ + 3997 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 675 38 + 3998 "00111010" // ST r28, [p2], #4; SUB r17, r2, r31; MOV r27, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3999 "01111001" // /* MW 9 */ + 4000 "00001110" // /* MW 8 */ + 4001 "01110000" // /* MW 7 */ + 4002 "10001111" // /* MW 6 */ + 4003 "00011111" // /* MW 5 */ + 4004 "00000101" // /* MW 4 */ + 4005 "00110000" // /* MW 3 */ + 4006 "11110010" // /* MW 2 */ + 4007 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 675 38 first +.src_ref 2 "conv2d_bf16_params.h" 707 61 first + 4008 "01011100" // ST r4, [p2], #4; MUL r14, r23, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4009 "11011111" // /* MW 5 */ + 4010 "10111001" // /* MW 4 */ + 4011 "00111011" // /* MW 3 */ + 4012 "10010010" // /* MW 2 */ + 4013 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 674 22 first +.src_ref 2 "conv2d_bf16_params.h" 675 38 + 4014 "00111010" // ST r17, [p2], #4; SUB r16, r16, r31; MOV r0, #6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4015 "01011001" // /* MW 9 */ + 4016 "00000110" // /* MW 8 */ + 4017 "00001000" // /* MW 7 */ + 4018 "10001100" // /* MW 6 */ + 4019 "00001111" // /* MW 5 */ + 4020 "00100001" // /* MW 4 */ + 4021 "00110000" // /* MW 3 */ + 4022 "11000110" // /* MW 2 */ + 4023 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 25 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 675 38 first +.src_ref 2 "conv2d_bf16_params.h" 679 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id first + 4024 "01110110" // MOVA r0, #72; ST r16, [p2], #4; SEL.EQZ r16, r13, r0, r27; MOV r27, r6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4025 "01111000" // /* MW 11 */ + 4026 "10010000" // /* MW 10 */ + 4027 "01101001" // /* MW 9 */ + 4028 "00010011" // /* MW 8 */ + 4029 "00000000" // /* MW 7 */ + 4030 "10011011" // /* MW 6 */ + 4031 "00010001" // /* MW 5 */ + 4032 "00011110" // /* MW 4 */ + 4033 "00000010" // /* MW 3 */ + 4034 "00000000" // /* MW 2 */ + 4035 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first +.src_ref 2 "conv2d_bf16_params.h" 679 23 first +.src_ref 2 "conv2d_bf16_params.h" 713 12 +.aggressive_scheduled_block_id 11 +.noswbrkpt + 4036 "00101100" // LDA r5, [sp, #-24]; SEL.EQZ r5, r0, r5, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4037 "10100100" // /* MW 5 */ + 4038 "00010100" // /* MW 4 */ + 4039 "00100000" // /* MW 3 */ + 4040 "00010110" // /* MW 2 */ + 4041 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 691 56 first +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4042 "10011000" // MUL r17, r5, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4043 "10101111" // /* MW 3 */ + 4044 "01100011" // /* MW 2 */ + 4045 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 675 38 +.src_ref 2 "conv2d_bf16_params.h" 675 38 first +.src_ref 2 "conv2d_bf16_params.h" 709 71 first + 4046 "00111010" // ST dc0, [p2], #4; LSHL r16, r3, r16; MOV m2, #-56 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4047 "01011001" // /* MW 9 */ + 4048 "11001000" // /* MW 8 */ + 4049 "00000111" // /* MW 7 */ + 4050 "01101101" // /* MW 6 */ + 4051 "00001000" // /* MW 5 */ + 4052 "00000111" // /* MW 4 */ + 4053 "00110000" // /* MW 3 */ + 4054 "10001100" // /* MW 2 */ + 4055 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 675 38 +.src_ref 2 "conv2d_bf16_params.h" 706 23 first +.src_ref 2 "conv2d_bf16_params.h" 706 28 +.src_ref 2 "conv2d_bf16_params.h" 709 76 + 4056 "01110110" // MOVA r3, #-29; ST dc0, [p2], m2; LSHL r15, r16, r15; ADD.NC r13, r3, #7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4057 "11001000" // /* MW 11 */ + 4058 "11000001" // /* MW 10 */ + 4059 "10101000" // /* MW 9 */ + 4060 "11101101" // /* MW 8 */ + 4061 "11110111" // /* MW 7 */ + 4062 "10100000" // /* MW 6 */ + 4063 "01100001" // /* MW 5 */ + 4064 "01001000" // /* MW 4 */ + 4065 "00000010" // /* MW 3 */ + 4066 "01100011" // /* MW 2 */ + 4067 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 682 38 first +.src_ref 2 "conv2d_bf16_params.h" 706 28 + 4068 "01011100" // ST r2, [p2], m0; LSHL r16, r13, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4069 "01111011" // /* MW 5 */ + 4070 "11000000" // /* MW 4 */ + 4071 "00110110" // /* MW 3 */ + 4072 "00001010" // /* MW 2 */ + 4073 "01000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 126 21 first +.src_ref 2 "conv2d_bf16_params.h" 696 37 first + 4074 "01011100" // ST r22, [p2], #4; ADD r3, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4075 "01000001" // /* MW 5 */ + 4076 "10001110" // /* MW 4 */ + 4077 "00111000" // /* MW 3 */ + 4078 "11011010" // /* MW 2 */ + 4079 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 127 19 first +.src_ref 2 "conv2d_bf16_params.h" 696 37 + 4080 "01011100" // ST r18, [p2], #4; MSC r18, r18, r17, r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4081 "10011100" // /* MW 5 */ + 4082 "11001000" // /* MW 4 */ + 4083 "00111000" // /* MW 3 */ + 4084 "11001010" // /* MW 2 */ + 4085 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 first +.src_ref 2 "conv2d_bf16_params.h" 713 12 first + 4086 "01011100" // ST r4, [p2], #4; LSHL r5, r5, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4087 "11011011" // /* MW 5 */ + 4088 "10010100" // /* MW 4 */ + 4089 "00110010" // /* MW 3 */ + 4090 "10010010" // /* MW 2 */ + 4091 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 +.src_ref 2 "conv2d_bf16_params.h" 706 28 +.src_ref 2 "conv2d_bf16_params.h" 706 28 first + 4092 "00111010" // ST r3, [p2], #4; ADD r3, r13, r16; MOV r0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4093 "01011001" // /* MW 9 */ + 4094 "11111101" // /* MW 8 */ + 4095 "00001111" // /* MW 7 */ + 4096 "00000100" // /* MW 6 */ + 4097 "00111000" // /* MW 5 */ + 4098 "00011010" // /* MW 4 */ + 4099 "00110000" // /* MW 3 */ + 4100 "10001110" // /* MW 2 */ + 4101 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 706 28 + 4102 "10011000" // ASHL r0, r3, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4103 "00001110" // /* MW 3 */ + 4104 "11000000" // /* MW 2 */ + 4105 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 first +.src_ref 2 "conv2d_bf16_params.h" 707 66 first + 4106 "01011100" // ST r18, [p2], #4; MUL r4, r14, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4107 "00011111" // /* MW 5 */ + 4108 "00010000" // /* MW 4 */ + 4109 "00110111" // /* MW 3 */ + 4110 "11001010" // /* MW 2 */ + 4111 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 +.src_ref 2 "conv2d_bf16_params.h" 709 96 first + 4112 "01011100" // ST dc0, [p2], #4; LSHL r3, r0, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4113 "00111011" // /* MW 5 */ + 4114 "00001100" // /* MW 4 */ + 4115 "00110000" // /* MW 3 */ + 4116 "10001100" // /* MW 2 */ + 4117 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 first +.src_ref 2 "conv2d_bf16_params.h" 709 90 + 4118 "11111010" // LDA r13, [sp, #-4]; ST dc0, [p2], #4; SUB r3, r15, r3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4119 "00110001" // /* MW 9 */ + 4120 "11000110" // /* MW 8 */ + 4121 "00000011" // /* MW 7 */ + 4122 "10000000" // /* MW 6 */ + 4123 "01100001" // /* MW 5 */ + 4124 "00011100" // /* MW 4 */ + 4125 "00100010" // /* MW 3 */ + 4126 "10110110" // /* MW 2 */ + 4127 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 707 50 first +.src_ref 2 "conv2d_bf16_params.h" 708 59 +.src_ref 2 "conv2d_bf16_params.h" 710 60 first +.src_ref 2 "conv2d_bf16_params.h" 710 65 first + 4128 "01110110" // LDA r14, [sp, #-8]; ST r4, [p2], #4; MAC r7, r7, r29, r0; ADD.NC r1, r0, #-1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4129 "11001000" // /* MW 11 */ + 4130 "00111111" // /* MW 10 */ + 4131 "00101000" // /* MW 9 */ + 4132 "00110000" // /* MW 8 */ + 4133 "01110000" // /* MW 7 */ + 4134 "10111010" // /* MW 6 */ + 4135 "10010001" // /* MW 5 */ + 4136 "00011100" // /* MW 4 */ + 4137 "00100010" // /* MW 3 */ + 4138 "00111010" // /* MW 2 */ + 4139 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 708 48 first +.src_ref 2 "conv2d_bf16_params.h" 713 12 first + 4140 "11111010" // LDA r15, [sp, #-12]; ST r1, [p2], #4; MUL r0, r5, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4141 "10101111" // /* MW 9 */ + 4142 "01000001" // /* MW 8 */ + 4143 "00000001" // /* MW 7 */ + 4144 "10000000" // /* MW 6 */ + 4145 "00110001" // /* MW 5 */ + 4146 "00011100" // /* MW 4 */ + 4147 "00100010" // /* MW 3 */ + 4148 "10111110" // /* MW 2 */ + 4149 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 709 50 first +.src_ref 2 "conv2d_bf16_params.h" 800 first + 4150 "01011100" // ST r3, [p2], #4; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 4151 "00000000" // /* MW 5 */ + 4152 "01010000" // /* MW 4 */ + 4153 "00110000" // /* MW 3 */ + 4154 "10001110" // /* MW 2 */ + 4155 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 710 50 first +.delay_slot + 4156 "10011000" // ST r7, [p2], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4157 "11110001" // /* MW 3 */ + 4158 "01011100" // /* MW 2 */ + 4159 "00001010" // /* MW 1 */ +.delay_slot + 4160 "10011000" // ST r0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4161 "00010001" // /* MW 3 */ + 4162 "00011100" // /* MW 2 */ + 4163 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 718 48 first +.delay_slot + 4164 "10011000" // ST r2, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4165 "01010001" // /* MW 3 */ + 4166 "00011100" // /* MW 2 */ + 4167 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 718 48 +.delay_slot + 4168 "10011000" // ST r2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4169 "01010001" // /* MW 3 */ + 4170 "00000100" // /* MW 2 */ + 4171 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 720 50 first +.src_ref 2 "conv2d_bf16_params.h" 800 first +.delay_slot + 4172 "00111010" // ST r2, [p2, #4]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4173 "01110001" // /* MW 9 */ + 4174 "00000000" // /* MW 8 */ + 4175 "00000000" // /* MW 7 */ + 4176 "00000000" // /* MW 6 */ + 4177 "11111110" // /* MW 5 */ + 4178 "00111111" // /* MW 4 */ + 4179 "00110000" // /* MW 3 */ + 4180 "10001010" // /* MW 2 */ +.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh__end +.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_end0 + 4181 "01000010" // /* MW 1 */ +.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_begin0 +.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams +.function convert_bf16_to_bfp16 _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams +.src_ref 3 "utils.h" 526 11 +.src_ref 3 "utils.h" 531 4 +.src_ref 2 "conv2d_bf16.h" 689 first +.src_ref 2 "conv2d_bf16.h" 698 28 +.src_ref 2 "conv2d_bf16.h" 704 12 +.src_ref 2 "conv2d_bf16.h" 707 12 +.src_ref 2 "conv2d_bf16.h" 707 30 +.function_start + 4192 "01110110" // MOVA dc0, #0; MOVS p2, p1; MOVX r24, #0; MOV r0, p2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4193 "01111000" // /* MW 11 */ + 4194 "01100000" // /* MW 10 */ + 4195 "00001010" // /* MW 9 */ + 4196 "00001000" // /* MW 8 */ + 4197 "10000000" // /* MW 7 */ + 4198 "00000001" // /* MW 6 */ + 4199 "10001011" // /* MW 5 */ + 4200 "10000100" // /* MW 4 */ + 4201 "10000010" // /* MW 3 */ + 4202 "00000011" // /* MW 2 */ + 4203 "00000000" // /* MW 1 */ +.src_ref 3 "utils.h" 526 11 +.src_ref 3 "utils.h" 526 11 +.src_ref 2 "conv2d_bf16.h" 698 28 first +.src_ref 2 "conv2d_bf16.h" 704 12 first +.src_ref 2 "conv2d_bf16.h" 707 12 +.src_ref 2 "conv2d_bf16.h" 707 30 + 4204 "01111110" // MOVA dj1, #0; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc1, dc0; MOVX r26, #0; ADD.NC p3, r0, #4 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 4205 "01100000" // /* MW 13 */ + 4206 "00001001" // /* MW 12 */ + 4207 "00100000" // /* MW 11 */ + 4208 "00100001" // /* MW 10 */ + 4209 "00000000" // /* MW 9 */ + 4210 "00110110" // /* MW 8 */ + 4211 "00000001" // /* MW 7 */ + 4212 "00110100" // /* MW 6 */ + 4213 "00101000" // /* MW 5 */ + 4214 "00101000" // /* MW 4 */ + 4215 "10001000" // /* MW 3 */ + 4216 "00000110" // /* MW 2 */ + 4217 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 698 28 +.src_ref 2 "conv2d_bf16.h" 702 37 + 4218 "10111010" // LDA dn1, [p3], #4; MOVXM p4, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4219 "00010000" // /* MW 9 */ + 4220 "00110100" // /* MW 8 */ + 4221 "00110010" // /* MW 7 */ + 4222 "11110010" // /* MW 6 */ + 4223 "00000001" // /* MW 5 */ + 4224 "00000000" // /* MW 4 */ + 4225 "11010000" // /* MW 3 */ + 4226 "10010100" // /* MW 2 */ + 4227 "01100011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 698 43 +.src_ref 2 "conv2d_bf16.h" 702 4 first + 4228 "10111010" // LDA m1, [p3], #4; MOVXM ls, #4336 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4229 "00010000" // /* MW 9 */ + 4230 "01111000" // /* MW 8 */ + 4231 "01111000" // /* MW 7 */ + 4232 "00000100" // /* MW 6 */ + 4233 "00000000" // /* MW 5 */ + 4234 "00000000" // /* MW 4 */ + 4235 "11010000" // /* MW 3 */ + 4236 "10010000" // /* MW 2 */ + 4237 "01100011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 699 43 first +.src_ref 2 "conv2d_bf16.h" 702 4 + 4238 "10111010" // LDA m0, [p3]; MOVXM le, #4384 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4239 "00010000" // /* MW 9 */ + 4240 "10010000" // /* MW 8 */ + 4241 "10111000" // /* MW 7 */ + 4242 "00000101" // /* MW 6 */ + 4243 "00000000" // /* MW 5 */ + 4244 "00000000" // /* MW 4 */ + 4245 "11010000" // /* MW 3 */ + 4246 "10000000" // /* MW 2 */ + 4247 "01100000" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 +.src_ref 2 "conv2d_bf16.h" 702 37 first + 4248 "01010100" // LDA r0, [p3, #-12]; MOV dj0, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4249 "00000001" // /* MW 5 */ + 4250 "00000000" // /* MW 4 */ + 4251 "11010001" // /* MW 3 */ + 4252 "10000010" // /* MW 2 */ + 4253 "01111010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 702 37 + 4254 "10011000" // LDA.s8 r1, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4255 "00100010" // /* MW 3 */ + 4256 "00000100" // /* MW 2 */ + 4257 "00000100" // /* MW 1 */ + 4258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4259 "00000000" // /* MW 1 */ + 4260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4261 "00000000" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 +.src_ref 2 "conv2d_bf16.h" 705 66 first + 4262 "11110100" // VLDB.POP.512 x1, [p0, lf0, r24]; MOV dn0, dn1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4263 "00000001" // /* MW 5 */ + 4264 "10000101" // /* MW 4 */ + 4265 "10000000" // /* MW 3 */ + 4266 "00001010" // /* MW 2 */ + 4267 "00000000" // /* MW 1 */ +.src_ref 3 "utils.h" 526 11 first + 4268 "00011000" // VLDB.POP.512.2D x0, [p0, lf0, r24, d1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4269 "00010100" // /* MW 3 */ + 4270 "00110000" // /* MW 2 */ + 4271 "00111110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 704 12 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4272 "00011000" // VLDB.FILL.512 [p0, lf0, r24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4273 "00010100" // /* MW 3 */ + 4274 "00010100" // /* MW 2 */ + 4275 "00111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 702 4 first +.src_ref 2 "conv2d_bf16.h" 705 66 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4276 "00110100" // VLDB.POP.512 x1, [p0, lf0, r24]; ADD.NC lc, r0, #-3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4277 "11111101" // /* MW 5 */ + 4278 "11100000" // /* MW 4 */ + 4279 "10001010" // /* MW 3 */ + 4280 "00001010" // /* MW 2 */ + 4281 "00000000" // /* MW 1 */ +.src_ref 3 "utils.h" 526 11 first +.src_ref 2 "conv2d_bf16.h" 707 12 +.src_ref 2 "conv2d_bf16.h" 707 30 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4282 "00011100" // VLDB.POP.512.2D x0, [p0, lf0, r24, d1]; MOVX crRnd, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4283 "00000000" // /* MW 5 */ + 4284 "11110101" // /* MW 4 */ + 4285 "10000000" // /* MW 3 */ + 4286 "00000010" // /* MW 2 */ + 4287 "11000110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 704 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4288 "00011000" // VLDB.FILL.512 [p0, lf0, r24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4289 "00010100" // /* MW 3 */ + 4290 "00010100" // /* MW 2 */ + 4291 "00111100" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4293 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 705 66 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4294 "10111010" // NOPA; VLDB.POP.512 x1, [p0, lf0, r24]; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4295 "01111110" // /* MW 9 */ + 4296 "10100101" // /* MW 8 */ + 4297 "00000001" // /* MW 7 */ + 4298 "00000000" // /* MW 6 */ + 4299 "01010100" // /* MW 5 */ + 4300 "00000000" // /* MW 4 */ + 4301 "11110000" // /* MW 3 */ + 4302 "00101100" // /* MW 2 */ + 4303 "00000000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 first +.src_ref 3 "utils.h" 526 11 first +.src_ref 2 "conv2d_bf16.h" 705 30 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4304 "11100001" // NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];NOPS; NOPX; VCONV.fp32.bf16 cml0, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4305 "00000000" // /* MW 15 */ + 4306 "00000000" // /* MW 14 */ + 4307 "01111000" // /* MW 13 */ + 4308 "11000101" // /* MW 12 */ + 4309 "00000001" // /* MW 11 */ + 4310 "00000000" // /* MW 10 */ + 4311 "00000000" // /* MW 9 */ + 4312 "00000000" // /* MW 8 */ + 4313 "01011011" // /* MW 7 */ + 4314 "00000001" // /* MW 6 */ + 4315 "00101000" // /* MW 5 */ + 4316 "01100000" // /* MW 4 */ + 4317 "11111100" // /* MW 3 */ + 4318 "00101100" // /* MW 2 */ + 4319 "00000000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 +.src_ref 2 "conv2d_bf16.h" 706 18 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4320 "11100001" // NOPA; NOPB; NOPS; NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4321 "00000000" // /* MW 15 */ + 4322 "00000000" // /* MW 14 */ + 4323 "01111000" // /* MW 13 */ + 4324 "11000101" // /* MW 12 */ + 4325 "01000000" // /* MW 11 */ + 4326 "00000000" // /* MW 10 */ + 4327 "00000000" // /* MW 9 */ + 4328 "00000000" // /* MW 8 */ + 4329 "01011011" // /* MW 7 */ + 4330 "00000001" // /* MW 6 */ + 4331 "00100000" // /* MW 5 */ + 4332 "00000000" // /* MW 4 */ + 4333 "11110000" // /* MW 3 */ + 4334 "00101100" // /* MW 2 */ + 4335 "00000000" // /* MW 1 */ +.label ZLS_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_144 +.src_ref 2 "conv2d_bf16.h" 704 12 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 4336 "11100001" // NOPA; VLDB.FILL.512 [p0, lf0, r24]; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4337 "00000000" // /* MW 15 */ + 4338 "00000000" // /* MW 14 */ + 4339 "01111000" // /* MW 13 */ + 4340 "10100101" // /* MW 12 */ + 4341 "00000001" // /* MW 11 */ + 4342 "00000000" // /* MW 10 */ + 4343 "00000000" // /* MW 9 */ + 4344 "00000000" // /* MW 8 */ + 4345 "01011011" // /* MW 7 */ + 4346 "00000001" // /* MW 6 */ + 4347 "00101000" // /* MW 5 */ + 4348 "00101000" // /* MW 4 */ + 4349 "11111000" // /* MW 3 */ + 4350 "00101100" // /* MW 2 */ + 4351 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 705 66 first +.src_ref 2 "conv2d_bf16.h" 707 12 first +.src_ref 2 "conv2d_bf16.h" 707 30 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4352 "11100001" // NOPA; VLDB.POP.512 x1, [p0, lf0, r24];VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4353 "00000000" // /* MW 15 */ + 4354 "00000000" // /* MW 14 */ + 4355 "01111000" // /* MW 13 */ + 4356 "10100101" // /* MW 12 */ + 4357 "00000001" // /* MW 11 */ + 4358 "00000000" // /* MW 10 */ + 4359 "00000000" // /* MW 9 */ + 4360 "00000000" // /* MW 8 */ + 4361 "00000011" // /* MW 7 */ + 4362 "10000000" // /* MW 6 */ + 4363 "10101101" // /* MW 5 */ + 4364 "00000000" // /* MW 4 */ + 4365 "11110000" // /* MW 3 */ + 4366 "00101100" // /* MW 2 */ + 4367 "00000000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 first +.src_ref 3 "utils.h" 526 11 first +.src_ref 2 "conv2d_bf16.h" 705 30 +.src_ref 2 "conv2d_bf16.h" 708 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4368 "11100001" // NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];VST.FLUSH.512.CONV [p2, sf, r26];NOPX; VCONV.fp32.bf16 cml0, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4369 "00000000" // /* MW 15 */ + 4370 "00000000" // /* MW 14 */ + 4371 "01111000" // /* MW 13 */ + 4372 "11000101" // /* MW 12 */ + 4373 "00000001" // /* MW 11 */ + 4374 "00000000" // /* MW 10 */ + 4375 "00000000" // /* MW 9 */ + 4376 "00000000" // /* MW 8 */ + 4377 "00000011" // /* MW 7 */ + 4378 "00000000" // /* MW 6 */ + 4379 "00101001" // /* MW 5 */ + 4380 "01100000" // /* MW 4 */ + 4381 "11111100" // /* MW 3 */ + 4382 "00101100" // /* MW 2 */ + 4383 "00000000" // /* MW 1 */ +.label ZLE_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_192 +.src_ref 3 "kernel_helpers.h" 350 11 +.src_ref 3 "utils.h" 531 4 first +.src_ref 2 "conv2d_bf16.h" 706 18 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4384 "11100001" // NOPA; NOPB; VST.FLUSH.512.CONV.2D [p2, sf, r26, d0];NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4385 "00000000" // /* MW 15 */ + 4386 "00000000" // /* MW 14 */ + 4387 "01111000" // /* MW 13 */ + 4388 "11000101" // /* MW 12 */ + 4389 "01000000" // /* MW 11 */ + 4390 "00000000" // /* MW 10 */ + 4391 "00000000" // /* MW 9 */ + 4392 "00000000" // /* MW 8 */ + 4393 "00000011" // /* MW 7 */ + 4394 "00000000" // /* MW 6 */ + 4395 "00100011" // /* MW 5 */ + 4396 "00000000" // /* MW 4 */ + 4397 "11110000" // /* MW 3 */ + 4398 "00101100" // /* MW 2 */ + 4399 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 4400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4401 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 707 12 first +.src_ref 2 "conv2d_bf16.h" 707 30 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4402 "00011000" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4403 "00000011" // /* MW 3 */ + 4404 "10000000" // /* MW 2 */ + 4405 "00001101" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 first +.src_ref 2 "conv2d_bf16.h" 705 30 first +.src_ref 2 "conv2d_bf16.h" 708 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4406 "00000010" // VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4407 "01110000" // /* MW 7 */ + 4408 "11000101" // /* MW 6 */ + 4409 "00000001" // /* MW 5 */ + 4410 "00000000" // /* MW 4 */ + 4411 "01100000" // /* MW 3 */ + 4412 "00000000" // /* MW 2 */ + 4413 "00100000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 +.src_ref 2 "conv2d_bf16.h" 706 18 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4414 "11111000" // VCONV.fp32.bf16 cmh0, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4415 "10001010" // /* MW 3 */ + 4416 "10000001" // /* MW 2 */ + 4417 "00011000" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 first + 4418 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4419 "00000011" // /* MW 3 */ + 4420 "00000000" // /* MW 2 */ + 4421 "00001011" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 first +.src_ref 2 "conv2d_bf16.h" 705 30 first +.src_ref 2 "conv2d_bf16.h" 707 12 first +.src_ref 2 "conv2d_bf16.h" 707 30 first + 4422 "00000010" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4423 "01110000" // /* MW 7 */ + 4424 "11000101" // /* MW 6 */ + 4425 "00000001" // /* MW 5 */ + 4426 "00000000" // /* MW 4 */ + 4427 "01100000" // /* MW 3 */ + 4428 "00000000" // /* MW 2 */ + 4429 "10110000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 +.src_ref 2 "conv2d_bf16.h" 706 18 first +.src_ref 2 "conv2d_bf16.h" 708 12 first + 4430 "00000010" // VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cmh0, x0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4431 "01110000" // /* MW 7 */ + 4432 "11000101" // /* MW 6 */ + 4433 "01000000" // /* MW 5 */ + 4434 "00000000" // /* MW 4 */ + 4435 "01100000" // /* MW 3 */ + 4436 "00000000" // /* MW 2 */ + 4437 "00100000" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 first + 4438 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4439 "00000011" // /* MW 3 */ + 4440 "00000000" // /* MW 2 */ + 4441 "00001011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 707 12 first +.src_ref 2 "conv2d_bf16.h" 707 30 first +.src_ref 2 "conv2d_bf16.h" 723 first + 4442 "01011100" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 4443 "00000000" // /* MW 5 */ + 4444 "01010000" // /* MW 4 */ + 4445 "01100000" // /* MW 3 */ + 4446 "00000000" // /* MW 2 */ + 4447 "10110000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 708 12 first +.delay_slot + 4448 "00011000" // VST.FLUSH.512.CONV [p2, sf, r26] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4449 "00000011" // /* MW 3 */ + 4450 "00000000" // /* MW 2 */ + 4451 "00001001" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 first +.delay_slot + 4452 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4453 "00000011" // /* MW 3 */ + 4454 "00000000" // /* MW 2 */ + 4455 "00001011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4457 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4459 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4460 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams__end +.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_end0 + 4461 "00000000" // /* MW 1 */ +.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_begin0 +.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params +.function conv2d_bf16<(unsigned char)'\x01', (act_t)0, bfloat16, bfloat16, bfloat16, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::async, adf::addressing::linear, adf::margin<0U> >, false, false, true, false> _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 2 "conv2d_bf16.h" 1836 +.src_ref 2 "conv2d_bf16.h" 1836 first +.src_ref 2 "conv2d_bf16.h" 1836 first +.src_ref 2 "conv2d_bf16_params.h" 240 68 +.function_start + 4464 "01111110" // MOVA m0, #-81; PADDB [p3], #64; MOVS p4, p2; PADDXM [sp], #128 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 4465 "01100000" // /* MW 13 */ + 4466 "00010001" // /* MW 12 */ + 4467 "10010001" // /* MW 11 */ + 4468 "00001110" // /* MW 10 */ + 4469 "00000000" // /* MW 9 */ + 4470 "00000000" // /* MW 8 */ + 4471 "10000000" // /* MW 7 */ + 4472 "00000000" // /* MW 6 */ + 4473 "00100000" // /* MW 5 */ + 4474 "00111111" // /* MW 4 */ + 4475 "10000110" // /* MW 3 */ + 4476 "11100000" // /* MW 2 */ + 4477 "11110101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1836 +.src_ref 2 "conv2d_bf16_params.h" 241 95 +.src_ref 2 "conv2d_bf16_params.h" 242 153 +.src_ref 2 "conv2d_bf16_params.h" 250 129 + 4478 "01110110" // MOVA r19, #3; ST r12, [sp, #-16]; MOVX r28, #-24; MOV r17, p3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4479 "01111000" // /* MW 11 */ + 4480 "01100000" // /* MW 10 */ + 4481 "00101011" // /* MW 9 */ + 4482 "00001010" // /* MW 8 */ + 4483 "11000101" // /* MW 7 */ + 4484 "10111111" // /* MW 6 */ + 4485 "10010101" // /* MW 5 */ + 4486 "11110001" // /* MW 4 */ + 4487 "00000111" // /* MW 3 */ + 4488 "01110011" // /* MW 2 */ + 4489 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 866 21 +.src_ref 2 "conv2d_bf16.h" 876 12 +.src_ref 2 "conv2d_bf16.h" 876 51 +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16.h" 1836 +.src_ref 2 "conv2d_bf16_params.h" 242 41 +.src_ref 2 "conv2d_bf16_params.h" 242 54 +.src_ref 2 "conv2d_bf16_params.h" 242 94 +.src_ref 2 "conv2d_bf16_params.h" 242 100 +.src_ref 2 "conv2d_bf16_params.h" 242 153 +.src_ref 2 "conv2d_bf16_params.h" 243 80 +.src_ref 2 "conv2d_bf16_params.h" 245 28 +.src_ref 2 "conv2d_bf16_params.h" 250 106 +.src_ref 2 "conv2d_bf16_params.h" 250 133 + 4490 "01110110" // MOVA r25, #0; ST r17, [sp, #-40]; MOVX r17, #1; ADD.NC p2, r17, #28 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4491 "00001000" // /* MW 11 */ + 4492 "01000111" // /* MW 10 */ + 4493 "00110100" // /* MW 9 */ + 4494 "00101001" // /* MW 8 */ + 4495 "00010000" // /* MW 7 */ + 4496 "10000001" // /* MW 6 */ + 4497 "00110101" // /* MW 5 */ + 4498 "11011010" // /* MW 4 */ + 4499 "00000111" // /* MW 3 */ + 4500 "00011001" // /* MW 2 */ + 4501 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 240 68 +.src_ref 2 "conv2d_bf16_params.h" 240 68 first + 4502 "01110110" // LDA r18, [p2]; ST r9, [sp, #-12]; MOVXM r29, #16777216 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4503 "00010000" // /* MW 11 */ + 4504 "00000000" // /* MW 10 */ + 4505 "10101000" // /* MW 9 */ + 4506 "00000011" // /* MW 8 */ + 4507 "01000000" // /* MW 7 */ + 4508 "10000000" // /* MW 6 */ + 4509 "00110101" // /* MW 5 */ + 4510 "11110101" // /* MW 4 */ + 4511 "11010111" // /* MW 3 */ + 4512 "11001010" // /* MW 2 */ + 4513 "01000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 171 +.src_ref 2 "conv2d_bf16_params.h" 245 20 + 4514 "01110110" // MOVA m6, #88; ST r14, [sp, #-4]; MOVXM r31, #33554431 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4515 "10010000" // /* MW 11 */ + 4516 "11111111" // /* MW 10 */ + 4517 "11101111" // /* MW 9 */ + 4518 "11111111" // /* MW 8 */ + 4519 "01111111" // /* MW 7 */ + 4520 "10000000" // /* MW 6 */ + 4521 "11010101" // /* MW 5 */ + 4522 "11111101" // /* MW 4 */ + 4523 "10000111" // /* MW 3 */ + 4524 "00011000" // /* MW 2 */ + 4525 "00001011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 39 +.src_ref 2 "conv2d_bf16_params.h" 244 82 +.src_ref 2 "conv2d_bf16_params.h" 244 156 +.src_ref 2 "conv2d_bf16_params.h" 249 87 + 4526 "01110110" // MOVA r20, #5; ST r13, [sp, #-32]; MOVX r22, #8; MOV m4, #-20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4527 "01011000" // /* MW 11 */ + 4528 "11101100" // /* MW 10 */ + 4529 "00000111" // /* MW 9 */ + 4530 "00001010" // /* MW 8 */ + 4531 "01100001" // /* MW 7 */ + 4532 "10000001" // /* MW 6 */ + 4533 "10110101" // /* MW 5 */ + 4534 "11100001" // /* MW 4 */ + 4535 "00000111" // /* MW 3 */ + 4536 "10110100" // /* MW 2 */ + 4537 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 243 39 +.src_ref 2 "conv2d_bf16_params.h" 244 87 +.src_ref 2 "conv2d_bf16_params.h" 250 71 + 4538 "01110110" // MOVA r21, #12; ST r15, [sp, #-20]; MOVX r23, #254; MOV m5, #-60 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4539 "01011000" // /* MW 11 */ + 4540 "11000100" // /* MW 10 */ + 4541 "10000111" // /* MW 9 */ + 4542 "11001010" // /* MW 8 */ + 4543 "01110111" // /* MW 7 */ + 4544 "10000111" // /* MW 6 */ + 4545 "11110101" // /* MW 5 */ + 4546 "11101101" // /* MW 4 */ + 4547 "00000111" // /* MW 3 */ + 4548 "10010101" // /* MW 2 */ + 4549 "00000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 44 + 4550 "00000010" // ST p7, [sp, #-8]; MOV m7, #64 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4551 "01010000" // /* MW 7 */ + 4552 "01000000" // /* MW 6 */ + 4553 "10000000" // /* MW 5 */ + 4554 "00000011" // /* MW 4 */ + 4555 "10110000" // /* MW 3 */ + 4556 "01110011" // /* MW 2 */ + 4557 "11111111" // /* MW 1 */ + 4558 "10011000" // ST lr, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4559 "00111101" // /* MW 3 */ + 4560 "11100100" // /* MW 2 */ + 4561 "00001111" // /* MW 1 */ + 4562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4563 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 240 68 + 4564 "10011000" // ADD r12, r29, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4565 "00100000" // /* MW 3 */ + 4566 "01011001" // /* MW 2 */ + 4567 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 240 68 +.src_ref 2 "conv2d_bf16_params.h" 241 95 first + 4568 "01011100" // ST r12, [p2], m0; LSHL r29, r12, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4569 "10011011" // /* MW 5 */ + 4570 "01110111" // /* MW 4 */ + 4571 "00110110" // /* MW 3 */ + 4572 "00110010" // /* MW 2 */ + 4573 "01000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 54 first +.src_ref 2 "conv2d_bf16_params.h" 242 94 first + 4574 "00101100" // LDA.u8 r30, [p2], #-3; EQ r28, r29, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4575 "00101111" // /* MW 5 */ + 4576 "11110010" // /* MW 4 */ + 4577 "01011110" // /* MW 3 */ + 4578 "11111001" // /* MW 2 */ + 4579 "01011011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 245 20 first + 4580 "10011000" // LDA.u8 r9, [p2], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4581 "00101010" // /* MW 3 */ + 4582 "11001001" // /* MW 2 */ + 4583 "00000010" // /* MW 1 */ + 4584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4585 "00000000" // /* MW 1 */ + 4586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4587 "00000000" // /* MW 1 */ + 4588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4589 "00000000" // /* MW 1 */ + 4590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4591 "00000000" // /* MW 1 */ + 4592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4593 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 174 first + 4594 "10011000" // LTU r27, r29, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4595 "11101100" // /* MW 3 */ + 4596 "01110111" // /* MW 2 */ + 4597 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 153 + 4598 "00011000" // SEL.EQZ r14, r25, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4599 "00110010" // /* MW 3 */ + 4600 "01011101" // /* MW 2 */ + 4601 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 171 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4602 "10011000" // LTU r27, r31, r12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4603 "11001100" // /* MW 3 */ + 4604 "11110110" // /* MW 2 */ + 4605 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 241 95 first +.src_ref 2 "conv2d_bf16_params.h" 242 39 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4606 "00101100" // ST.s8 r28, [p2], m4; EQ r13, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4607 "11001111" // /* MW 5 */ + 4608 "10110111" // /* MW 4 */ + 4609 "11101110" // /* MW 3 */ + 4610 "01110000" // /* MW 2 */ + 4611 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 100 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4612 "10011000" // LSHL r31, r13, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4613 "00011101" // /* MW 3 */ + 4614 "01111111" // /* MW 2 */ + 4615 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 153 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4616 "00011000" // SEL.EQZ r12, r25, r14, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4617 "11100010" // /* MW 3 */ + 4618 "01011000" // /* MW 2 */ + 4619 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 98 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4620 "10011000" // OR r28, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4621 "11000101" // /* MW 3 */ + 4622 "11111001" // /* MW 2 */ + 4623 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 54 +.src_ref 2 "conv2d_bf16_params.h" 242 151 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4624 "10100100" // LTU r27, r17, r30; ADD.NC r28, r28, r12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4625 "01100010" // /* MW 5 */ + 4626 "00111100" // /* MW 4 */ + 4627 "10011110" // /* MW 3 */ + 4628 "11111101" // /* MW 2 */ + 4629 "10001110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 41 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4630 "00011000" // SEL.EQZ r28, r25, r28, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4631 "11000010" // /* MW 3 */ + 4632 "01111001" // /* MW 2 */ + 4633 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 243 80 first + 4634 "10011000" // LTU r31, r17, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4635 "11001100" // /* MW 3 */ + 4636 "01111111" // /* MW 2 */ + 4637 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 117 first +.src_ref 2 "conv2d_bf16_params.h" 243 39 + 4638 "01011100" // ST r31, [p2], m5; NE r29, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4639 "11010001" // /* MW 5 */ + 4640 "11110111" // /* MW 4 */ + 4641 "00111110" // /* MW 3 */ + 4642 "01111110" // /* MW 2 */ + 4643 "01010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 44 first +.src_ref 2 "conv2d_bf16_params.h" 245 28 first + 4644 "00101100" // LDA.u8 r30, [p2], m7; NE r12, r9, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4645 "00110001" // /* MW 5 */ + 4646 "10110010" // /* MW 4 */ + 4647 "01010100" // /* MW 3 */ + 4648 "01111001" // /* MW 2 */ + 4649 "01011101" // /* MW 1 */ + 4650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4651 "00000000" // /* MW 1 */ + 4652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4653 "00000000" // /* MW 1 */ + 4654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4655 "00000000" // /* MW 1 */ + 4656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4657 "00000000" // /* MW 1 */ + 4658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4659 "00000000" // /* MW 1 */ + 4660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4661 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 82 +.src_ref 2 "conv2d_bf16_params.h" 244 87 + 4662 "00100100" // NE r22, r30, r22; ADD.NC r31, r30, #-4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4663 "11111100" // /* MW 5 */ + 4664 "10111110" // /* MW 4 */ + 4665 "00011111" // /* MW 3 */ + 4666 "10101101" // /* MW 2 */ + 4667 "11110101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 245 33 + 4668 "10000100" // JNZ r12, #4736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4736 delay_slots=5 */ + 4669 "00000001" // /* MW 5 */ + 4670 "01000000" // /* MW 4 */ + 4671 "01000000" // /* MW 3 */ + 4672 "00001001" // /* MW 2 */ + 4673 "01100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 156 +.delay_slot + 4674 "10011000" // NE r9, r30, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4675 "01001000" // /* MW 3 */ + 4676 "10010011" // /* MW 2 */ + 4677 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 87 +.delay_slot + 4678 "00011000" // EXTEND.u8 r31, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4679 "10010000" // /* MW 3 */ + 4680 "11111110" // /* MW 2 */ + 4681 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 87 +.delay_slot + 4682 "10011000" // AND r22, r9, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4683 "01100100" // /* MW 3 */ + 4684 "01101101" // /* MW 2 */ + 4685 "00010010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 87 +.delay_slot + 4686 "10011000" // LTU r23, r31, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4687 "01111100" // /* MW 3 */ + 4688 "11101111" // /* MW 2 */ + 4689 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 132 +.delay_slot + 4690 "10011000" // AND r16, r23, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4691 "01100100" // /* MW 3 */ + 4692 "11100001" // /* MW 2 */ + 4693 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 245 33 + 4694 "10000100" // JNZ r29, #4736 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4736 delay_slots=5 */ + 4695 "00000001" // /* MW 5 */ + 4696 "01000000" // /* MW 4 */ + 4697 "01000000" // /* MW 3 */ + 4698 "00001001" // /* MW 2 */ + 4699 "11101000" // /* MW 1 */ +.delay_slot + 4700 "10011000" // ST p6, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4701 "00011101" // /* MW 3 */ + 4702 "11101011" // /* MW 2 */ + 4703 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4704 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4705 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4707 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4709 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4711 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 +.src_ref 2 "conv2d_bf16.h" 876 51 + 4712 "10111010" // MOVA r27, #1; J #4784 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=4784 delay_slots=5 */ + 4713 "00100000" // /* MW 9 */ + 4714 "00000000" // /* MW 8 */ + 4715 "00000000" // /* MW 7 */ + 4716 "01010110" // /* MW 6 */ + 4717 "00000010" // /* MW 5 */ + 4718 "00000000" // /* MW 4 */ + 4719 "00000000" // /* MW 3 */ + 4720 "00111011" // /* MW 2 */ + 4721 "00000000" // /* MW 1 */ +.delay_slot + 4722 "11111000" // MOV el0, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4723 "10011100" // /* MW 3 */ + 4724 "00011001" // /* MW 2 */ + 4725 "00011000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1849 12 +.delay_slot + 4726 "00011000" // MOVX r19, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4727 "00000101" // /* MW 3 */ + 4728 "00100110" // /* MW 2 */ + 4729 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4731 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4732 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4733 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4735 "00000000" // /* MW 1 */ +.label __ll6__Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params +.src_ref 2 "conv2d_bf16_params.h" 250 71 first +.src_ref 2 "conv2d_bf16_params.h" 250 101 + 4736 "01110110" // MOVA r21, #4; ST p6, [sp, #-24]; EQ r27, r21, r30; MOV el0, r25 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4737 "01111000" // /* MW 11 */ + 4738 "11001110" // /* MW 10 */ + 4739 "00001100" // /* MW 9 */ + 4740 "00111100" // /* MW 8 */ + 4741 "10111111" // /* MW 7 */ + 4742 "10101011" // /* MW 6 */ + 4743 "00011101" // /* MW 5 */ + 4744 "11101011" // /* MW 4 */ + 4745 "00000111" // /* MW 3 */ + 4746 "10010101" // /* MW 2 */ + 4747 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 101 + 4748 "10011000" // LSHL r21, r30, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4749 "01011101" // /* MW 3 */ + 4750 "10101011" // /* MW 2 */ + 4751 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 106 + 4752 "00011000" // SEL.EQZ r21, r21, r25, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4753 "10010010" // /* MW 3 */ + 4754 "01101011" // /* MW 2 */ + 4755 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 129 + 4756 "10011000" // EQ r27, r19, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4757 "11100111" // /* MW 3 */ + 4758 "11110111" // /* MW 2 */ + 4759 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 106 +.src_ref 2 "conv2d_bf16_params.h" 250 133 + 4760 "11100100" // SEL.EQZ r19, r21, r25, r27; MOV r27, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4761 "01000001" // /* MW 5 */ + 4762 "10110000" // /* MW 4 */ + 4763 "01001101" // /* MW 3 */ + 4764 "11110010" // /* MW 2 */ + 4765 "10101100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 133 + 4766 "00011000" // SEL.EQZ r19, r25, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4767 "00110010" // /* MW 3 */ + 4768 "01100111" // /* MW 2 */ + 4769 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 249 87 first + 4770 "10011000" // AND r20, r28, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4771 "01000100" // /* MW 3 */ + 4772 "00101001" // /* MW 2 */ + 4773 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 249 87 + 4774 "00011000" // NEZ r27, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4775 "11110000" // /* MW 3 */ + 4776 "00110110" // /* MW 2 */ + 4777 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 152 first + 4778 "00101100" // NOPA; OR r19, r19, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4779 "10001011" // /* MW 5 */ + 4780 "11001111" // /* MW 4 */ + 4781 "11111001" // /* MW 3 */ + 4782 "00101100" // /* MW 2 */ + 4783 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_320 +.src_ref 2 "conv2d_bf16_params.h" 258 8 first + 4784 "01110110" // MOVA m4, #12; ST r27, [p2], #24; JNZ r29, #4832 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4832 delay_slots=5 */ + 4785 "01100000" // /* MW 11 */ + 4786 "00000000" // /* MW 10 */ + 4787 "00010000" // /* MW 9 */ + 4788 "01011100" // /* MW 8 */ + 4789 "00000010" // /* MW 7 */ + 4790 "10111010" // /* MW 6 */ + 4791 "01110001" // /* MW 5 */ + 4792 "01101111" // /* MW 4 */ + 4793 "10000010" // /* MW 3 */ + 4794 "10010000" // /* MW 2 */ + 4795 "00000001" // /* MW 1 */ +.delay_slot + 4796 "00011000" // ST.s8 r19, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4797 "01100111" // /* MW 3 */ + 4798 "10001010" // /* MW 2 */ + 4799 "00000010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4801 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4803 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4805 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4807 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 259 71 + 4808 "01000100" // MOVXM r20, #16777215 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4809 "11111110" // /* MW 5 */ + 4810 "00111111" // /* MW 4 */ + 4811 "11111010" // /* MW 3 */ + 4812 "11111111" // /* MW 2 */ + 4813 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 259 71 first + 4814 "10011000" // AND r18, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4815 "01000100" // /* MW 3 */ + 4816 "10100101" // /* MW 2 */ + 4817 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 259 71 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4818 "00101110" // NOPA; ST r18, [p3, #28]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 4819 "00011100" // /* MW 13 */ + 4820 "00000000" // /* MW 12 */ + 4821 "00000000" // /* MW 11 */ + 4822 "01010111" // /* MW 10 */ + 4823 "00011010" // /* MW 9 */ + 4824 "01000000" // /* MW 8 */ + 4825 "00000000" // /* MW 7 */ + 4826 "00000000" // /* MW 6 */ + 4827 "10100011" // /* MW 5 */ + 4828 "11101100" // /* MW 4 */ + 4829 "11110110" // /* MW 3 */ + 4830 "00101100" // /* MW 2 */ + 4831 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_368 +.src_ref 2 "conv2d_bf16.h" 1841 65 first +.src_ref 2 "conv2d_bf16.h" 1849 4 +.src_ref 2 "conv2d_bf16.h" 1849 12 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4832 "10111010" // LDA r20, [p2], #-32; EXTEND.u8 r20, r19; MOV r22, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4833 "01011000" // /* MW 9 */ + 4834 "11111101" // /* MW 8 */ + 4835 "11001111" // /* MW 7 */ + 4836 "10000010" // /* MW 6 */ + 4837 "01000100" // /* MW 5 */ + 4838 "00100111" // /* MW 4 */ + 4839 "11010000" // /* MW 3 */ + 4840 "11010010" // /* MW 2 */ + 4841 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16.h" 1841 34 +.src_ref 2 "conv2d_bf16.h" 1842 36 +.src_ref 2 "conv2d_bf16.h" 1842 67 +.src_ref 2 "conv2d_bf16.h" 1842 75 +.src_ref 2 "conv2d_bf16.h" 1845 31 +.src_ref 2 "conv2d_bf16.h" 1849 4 +.src_ref 2 "conv2d_bf16_params.h" 243 80 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 4842 "10111010" // MOVA r18, #2; ADD r21, r20, #-1; MOV m4, #36 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4843 "01011000" // /* MW 9 */ + 4844 "00100100" // /* MW 8 */ + 4845 "00000000" // /* MW 7 */ + 4846 "11111010" // /* MW 6 */ + 4847 "01011111" // /* MW 5 */ + 4848 "00101001" // /* MW 4 */ + 4849 "00000000" // /* MW 3 */ + 4850 "01010010" // /* MW 2 */ + 4851 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1842 67 first +.src_ref 2 "conv2d_bf16.h" 1842 106 +.src_ref 2 "conv2d_bf16.h" 1849 4 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4852 "01110110" // LDA r22, [p2], m4; ST el0, [sp, #-48]; AND r22, r21, r22; MOV m4, #-52 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4853 "01011000" // /* MW 11 */ + 4854 "11001100" // /* MW 10 */ + 4855 "00000111" // /* MW 9 */ + 4856 "00100110" // /* MW 8 */ + 4857 "01101011" // /* MW 7 */ + 4858 "10101011" // /* MW 6 */ + 4859 "00101101" // /* MW 5 */ + 4860 "11010000" // /* MW 4 */ + 4861 "11010111" // /* MW 3 */ + 4862 "01011010" // /* MW 2 */ + 4863 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 862 52 +.src_ref 2 "conv2d_bf16.h" 1842 106 +.src_ref 2 "conv2d_bf16.h" 1845 80 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4864 "01110110" // LDA r23, [p2], m4; ST r22, [sp, #-36]; MOVX r19, #-1; MOV m4, #196 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4865 "01011000" // /* MW 11 */ + 4866 "11000100" // /* MW 10 */ + 4867 "00000000" // /* MW 9 */ + 4868 "11101010" // /* MW 8 */ + 4869 "00110111" // /* MW 7 */ + 4870 "10111111" // /* MW 6 */ + 4871 "11010101" // /* MW 5 */ + 4872 "11011110" // /* MW 4 */ + 4873 "11010111" // /* MW 3 */ + 4874 "01011110" // /* MW 2 */ + 4875 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1845 63 first + 4876 "10011000" // LDA r29, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4877 "10110110" // /* MW 3 */ + 4878 "11111111" // /* MW 2 */ + 4879 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 862 52 first + 4880 "10011000" // LDA r31, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4881 "11110110" // /* MW 3 */ + 4882 "10001011" // /* MW 2 */ + 4883 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 4884 "10011000" // LDA r21, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4885 "10110110" // /* MW 3 */ + 4886 "00000110" // /* MW 2 */ + 4887 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 2 "conv2d_bf16.h" 1841 34 first + 4888 "00101100" // LDA r20, [p0]; LSHL r9, r20, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4889 "01011011" // /* MW 5 */ + 4890 "00100110" // /* MW 4 */ + 4891 "11011010" // /* MW 3 */ + 4892 "11010010" // /* MW 2 */ + 4893 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 4894 "10011000" // LDA r30, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4895 "11010110" // /* MW 3 */ + 4896 "00000111" // /* MW 2 */ + 4897 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1842 36 first + 4898 "10011000" // LSHL r22, r22, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4899 "00101101" // /* MW 3 */ + 4900 "10101101" // /* MW 2 */ + 4901 "00010101" // /* MW 1 */ + 4902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4903 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1845 80 first + 4904 "10011000" // ASHL r19, r29, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4905 "00111110" // /* MW 3 */ + 4906 "01100111" // /* MW 2 */ + 4907 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 866 21 first + 4908 "10011000" // NE r17, r31, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4909 "00011000" // /* MW 3 */ + 4910 "11100011" // /* MW 2 */ + 4911 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 866 12 + 4912 "10000100" // JNZ r17, #5024 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5024 delay_slots=5 */ + 4913 "00000001" // /* MW 5 */ + 4914 "01000000" // /* MW 4 */ + 4915 "11010000" // /* MW 3 */ + 4916 "00001001" // /* MW 2 */ + 4917 "10001000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1842 36 first +.src_ref 2 "conv2d_bf16.h" 1842 75 first +.delay_slot + 4918 "10100100" // LSHL r22, r23, r18; ADD.NC r21, r21, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4919 "10110010" // /* MW 5 */ + 4920 "10110101" // /* MW 4 */ + 4921 "10111010" // /* MW 3 */ + 4922 "10100101" // /* MW 2 */ + 4923 "10111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1842 75 +.src_ref 2 "conv2d_bf16.h" 1845 31 first +.delay_slot + 4924 "10100100" // LSHL r21, r19, r18; ADD.NC dn0, r21, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4925 "10110010" // /* MW 5 */ + 4926 "10010101" // /* MW 4 */ + 4927 "10110000" // /* MW 3 */ + 4928 "01100101" // /* MW 2 */ + 4929 "10011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1841 34 first +.delay_slot + 4930 "00000010" // ST dn0, [sp, #-44]; ADD.NC r14, r9, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4931 "10100000" // /* MW 7 */ + 4932 "01101000" // /* MW 6 */ + 4933 "11001010" // /* MW 5 */ + 4934 "00000001" // /* MW 4 */ + 4935 "10110000" // /* MW 3 */ + 4936 "10000100" // /* MW 2 */ + 4937 "11111010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16.h" 885 4 +.delay_slot + 4938 "11111000" // MOV r15, dn0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4939 "10000000" // /* MW 3 */ + 4940 "11010000" // /* MW 2 */ + 4941 "00011011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1845 31 first +.delay_slot + 4942 "01011000" // ADD.NC p6, r21, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4943 "11111001" // /* MW 3 */ + 4944 "01101010" // /* MW 2 */ + 4945 "00011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 + 4946 "01000100" // MOVXM p7, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4947 "11010000" // /* MW 5 */ + 4948 "11001000" // /* MW 4 */ + 4949 "11001110" // /* MW 3 */ + 4950 "00000111" // /* MW 2 */ + 4951 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.src_ref 2 "conv2d_bf16.h" 867 18 first + 4952 "00101100" // LDA.s8 r17, [p7]; MOVX vaddSign0, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4953 "10000000" // /* MW 5 */ + 4954 "10110100" // /* MW 4 */ + 4955 "01010000" // /* MW 3 */ + 4956 "11000100" // /* MW 2 */ + 4957 "11100000" // /* MW 1 */ + 4958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4959 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 + 4960 "01000100" // MOVXM r20, #-8454144 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4961 "00000000" // /* MW 5 */ + 4962 "00100000" // /* MW 4 */ + 4963 "00001010" // /* MW 3 */ + 4964 "01111111" // /* MW 2 */ + 4965 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 + 4966 "01111000" // VINSERT.32 x0, x0, #0, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4967 "10010001" // /* MW 3 */ + 4968 "00000010" // /* MW 2 */ + 4969 "00011000" // /* MW 1 */ + 4970 "11111000" // MOV r20, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4971 "11100000" // /* MW 3 */ + 4972 "00010101" // /* MW 2 */ + 4973 "00011101" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 4974 "00011000" // ADD.NC p7, r20, #-66 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4975 "01011111" // /* MW 3 */ + 4976 "01101010" // /* MW 2 */ + 4977 "00011111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.src_ref 2 "conv2d_bf16.h" 867 18 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 4978 "11010100" // ST.s16 r17, [p7]; VMOV bmll0, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4979 "00100101" // /* MW 5 */ + 4980 "00000001" // /* MW 4 */ + 4981 "11100000" // /* MW 3 */ + 4982 "11000110" // /* MW 2 */ + 4983 "11100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4984 "00011000" // MOVX crRnd, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4985 "10000000" // /* MW 3 */ + 4986 "01111010" // /* MW 2 */ + 4987 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4988 "00011000" // VCONV.bf16.fp32 wl0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4989 "00010110" // /* MW 3 */ + 4990 "01000000" // /* MW 2 */ + 4991 "00001000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 4992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4993 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4994 "10111000" // VEXTRACT.16 r17, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4995 "00000001" // /* MW 3 */ + 4996 "01000001" // /* MW 2 */ + 4997 "00011100" // /* MW 1 */ + 4998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4999 "00000000" // /* MW 1 */ + 5000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5001 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 + 5002 "10011000" // LDA.s16 r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5003 "00110010" // /* MW 3 */ + 5004 "00000110" // /* MW 2 */ + 5005 "00000111" // /* MW 1 */ + 5006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5007 "00000000" // /* MW 1 */ + 5008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5009 "00000000" // /* MW 1 */ + 5010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5011 "00000000" // /* MW 1 */ + 5012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5013 "00000000" // /* MW 1 */ + 5014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5015 "00000000" // /* MW 1 */ + 5016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5017 "00000000" // /* MW 1 */ + 5018 "00001100" // NOPA; ST r17, [sp, #-48] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5019 "01101011" // /* MW 5 */ + 5020 "10100100" // /* MW 4 */ + 5021 "11111111" // /* MW 3 */ + 5022 "00101100" // /* MW 2 */ + 5023 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_560 +.src_ref 2 "conv2d_bf16.h" 881 76 +.src_ref 2 "conv2d_bf16.h" 883 4 +.src_ref 2 "conv2d_bf16.h" 884 4 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 5024 "01110110" // MOVA m4, #92; MOVS p1, r14; MOVXM p3, #509032 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5025 "00010000" // /* MW 11 */ + 5026 "00110100" // /* MW 10 */ + 5027 "10110010" // /* MW 9 */ + 5028 "11110001" // /* MW 8 */ + 5029 "00000001" // /* MW 7 */ + 5030 "00000000" // /* MW 6 */ + 5031 "00001011" // /* MW 5 */ + 5032 "10001110" // /* MW 4 */ + 5033 "10000001" // /* MW 3 */ + 5034 "10010000" // /* MW 2 */ + 5035 "00001011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 first +.src_ref 2 "conv2d_bf16.h" 876 51 first +.src_ref 2 "conv2d_bf16.h" 881 76 first +.src_ref 2 "conv2d_bf16.h" 883 4 +.src_ref 2 "conv2d_bf16.h" 884 4 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 5036 "01110110" // LDA.u8 r17, [p2], m4; MOVS p0, p1; SEL.EQZ r17, r25, r19, r27; MOV r19, #11 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5037 "01011000" // /* MW 11 */ + 5038 "00001011" // /* MW 10 */ + 5039 "01101000" // /* MW 9 */ + 5040 "10010010" // /* MW 8 */ + 5041 "00011001" // /* MW 7 */ + 5042 "00110011" // /* MW 6 */ + 5043 "10001011" // /* MW 5 */ + 5044 "10000100" // /* MW 4 */ + 5045 "01010000" // /* MW 3 */ + 5046 "01000101" // /* MW 2 */ + 5047 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 44 +.src_ref 2 "conv2d_bf16_params.h" 243 80 +.src_ref 2 "conv2d_bf16_params.h" 243 80 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5048 "10111010" // MOVA r22, #780; LTU r27, r28, r18; MOV r13, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5049 "01111000" // /* MW 9 */ + 5050 "01100000" // /* MW 8 */ + 5051 "10101010" // /* MW 7 */ + 5052 "01100101" // /* MW 6 */ + 5053 "10111001" // /* MW 5 */ + 5054 "00111001" // /* MW 4 */ + 5055 "00000000" // /* MW 3 */ + 5056 "10010110" // /* MW 2 */ + 5057 "01100001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 883 4 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5058 "00011000" // ST.s8 r19, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5059 "01100111" // /* MW 3 */ + 5060 "00000110" // /* MW 2 */ + 5061 "00000011" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5063 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 884 4 first +.aggressive_scheduled_block_id 4 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 5064 "00000100" // JL #4192 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4192 delay_slots=5 */ + 5065 "00000001" // /* MW 5 */ + 5066 "00000000" // /* MW 4 */ + 5067 "00110000" // /* MW 3 */ + 5068 "00001000" // /* MW 2 */ + 5069 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 first +.delay_slot +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5070 "10011000" // LSHL r21, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5071 "00101101" // /* MW 3 */ + 5072 "01101011" // /* MW 2 */ + 5073 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 +.delay_slot + 5074 "01011000" // ADD.NC p7, r21, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5075 "11111001" // /* MW 3 */ + 5076 "01101010" // /* MW 2 */ + 5077 "00011111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 881 45 first +.delay_slot + 5078 "10011000" // SUB r17, r25, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5079 "00010001" // /* MW 3 */ + 5080 "01100011" // /* MW 2 */ + 5081 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16_params.h" 243 80 +.delay_slot + 5082 "01100100" // LSHL r17, r17, r18; MOV r20, #781 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5083 "00110101" // /* MW 5 */ + 5084 "00101100" // /* MW 4 */ + 5085 "10111010" // /* MW 3 */ + 5086 "01100101" // /* MW 2 */ + 5087 "10001100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16_params.h" 243 80 first +.delay_slot + 5088 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r9, r22, r20, r27; ADD.NC r12, r15, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5089 "00000000" // /* MW 15 */ + 5090 "00000000" // /* MW 14 */ + 5091 "10101000" // /* MW 13 */ + 5092 "11100010" // /* MW 12 */ + 5093 "10001011" // /* MW 11 */ + 5094 "00010001" // /* MW 10 */ + 5095 "10011010" // /* MW 9 */ + 5096 "00101100" // /* MW 8 */ + 5097 "01011011" // /* MW 7 */ + 5098 "00000001" // /* MW 6 */ + 5099 "00100000" // /* MW 5 */ + 5100 "00000000" // /* MW 4 */ + 5101 "11110000" // /* MW 3 */ + 5102 "00101100" // /* MW 2 */ + 5103 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 4 +.return_address + 5104 "00011000" // LDA p1, [sp, #-44] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5105 "10011001" // /* MW 3 */ + 5106 "11010100" // /* MW 2 */ + 5107 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 4 first +.no_stack_arguments + 5108 "00000100" // JL #4192 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4192 delay_slots=5 */ + 5109 "00000001" // /* MW 5 */ + 5110 "00000000" // /* MW 4 */ + 5111 "00110000" // /* MW 3 */ + 5112 "00001000" // /* MW 2 */ + 5113 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5115 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5117 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 44 +.delay_slot + 5118 "00011000" // ADD.NC r13, r13, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5119 "10010000" // /* MW 3 */ + 5120 "01010110" // /* MW 2 */ + 5121 "00011011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 4 +.delay_slot + 5122 "11111000" // MOV p2, r13 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5123 "10100000" // /* MW 3 */ + 5124 "01100110" // /* MW 2 */ + 5125 "00011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 4 +.delay_slot + 5126 "01111010" // NOPA; MOVS p0, r15; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5127 "00000000" // /* MW 9 */ + 5128 "00000000" // /* MW 8 */ + 5129 "00000000" // /* MW 7 */ + 5130 "00000000" // /* MW 6 */ + 5131 "00001011" // /* MW 5 */ + 5132 "10001111" // /* MW 4 */ + 5133 "11110000" // /* MW 3 */ + 5134 "00101100" // /* MW 2 */ + 5135 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 +.src_ref 2 "conv2d_bf16.h" 1115 26 +.src_ref 2 "conv2d_bf16.h" 1115 26 +.return_address + 5136 "10111010" // MOVA dj6, #-332; MOVX r19, #63; ADD.NC p4, r13, #-116 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5137 "00001000" // /* MW 9 */ + 5138 "01100011" // /* MW 8 */ + 5139 "00110011" // /* MW 7 */ + 5140 "11101010" // /* MW 6 */ + 5141 "00110111" // /* MW 5 */ + 5142 "00000001" // /* MW 4 */ + 5143 "10000000" // /* MW 3 */ + 5144 "10011010" // /* MW 2 */ + 5145 "11010110" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 886 4 +.src_ref 2 "conv2d_bf16.h" 896 23 first +.src_ref 2 "conv2d_bf16.h" 1123 71 + 5146 "00101100" // LDA dn0, [p4], #4; MOVX r13, #12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5147 "01100010" // /* MW 5 */ + 5148 "00110100" // /* MW 4 */ + 5149 "11010000" // /* MW 3 */ + 5150 "10000100" // /* MW 2 */ + 5151 "10000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5152 "10011000" // LDA dj0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5153 "01000110" // /* MW 3 */ + 5154 "00011100" // /* MW 2 */ + 5155 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5156 "10011000" // LDA dn4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5157 "00100110" // /* MW 3 */ + 5158 "00011110" // /* MW 2 */ + 5159 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5160 "10011000" // LDA dj4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5161 "01000110" // /* MW 3 */ + 5162 "00011110" // /* MW 2 */ + 5163 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5164 "10011000" // LDA m0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5165 "00000110" // /* MW 3 */ + 5166 "00011100" // /* MW 2 */ + 5167 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5168 "10011000" // LDA dc0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5169 "01100110" // /* MW 3 */ + 5170 "00011100" // /* MW 2 */ + 5171 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5172 "10011000" // LDA dc4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5173 "01100110" // /* MW 3 */ + 5174 "00011110" // /* MW 2 */ + 5175 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 first + 5176 "10011000" // LDA r22, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5177 "11010110" // /* MW 3 */ + 5178 "00011110" // /* MW 2 */ + 5179 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5180 "10011000" // LDA r17, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5181 "00110110" // /* MW 3 */ + 5182 "00011110" // /* MW 2 */ + 5183 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5184 "10011000" // LDA r28, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5185 "10010110" // /* MW 3 */ + 5186 "00011111" // /* MW 2 */ + 5187 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5188 "10011000" // LDA r21, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5189 "10110110" // /* MW 3 */ + 5190 "00011110" // /* MW 2 */ + 5191 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5192 "10011000" // LDA r23, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5193 "11110110" // /* MW 3 */ + 5194 "00011110" // /* MW 2 */ + 5195 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5196 "10011000" // LDA p3, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5197 "10011110" // /* MW 3 */ + 5198 "00011101" // /* MW 2 */ + 5199 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5200 "10011000" // LDA dn2, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5201 "00100110" // /* MW 3 */ + 5202 "00011101" // /* MW 2 */ + 5203 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 first + 5204 "10011000" // LDA dn1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5205 "10100110" // /* MW 3 */ + 5206 "00011100" // /* MW 2 */ + 5207 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5208 "10011000" // LDA dj1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5209 "11000110" // /* MW 3 */ + 5210 "00011100" // /* MW 2 */ + 5211 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5212 "10011000" // LDA dn5, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5213 "10100110" // /* MW 3 */ + 5214 "00011110" // /* MW 2 */ + 5215 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5216 "10011000" // LDA r30, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5217 "11010110" // /* MW 3 */ + 5218 "00011111" // /* MW 2 */ + 5219 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5220 "10011000" // LDA r29, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5221 "10110110" // /* MW 3 */ + 5222 "00011111" // /* MW 2 */ + 5223 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5224 "10011000" // LDA dc1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5225 "11100110" // /* MW 3 */ + 5226 "00011100" // /* MW 2 */ + 5227 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1115 26 first + 5228 "10011000" // LDA.u8 r18, [p4, dj6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5229 "01001010" // /* MW 3 */ + 5230 "11000010" // /* MW 2 */ + 5231 "00000100" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 + 5232 "00011000" // LDA r20, [sp, #-48] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5233 "10010001" // /* MW 3 */ + 5234 "11010010" // /* MW 2 */ + 5235 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 first + 5236 "10011000" // LDA r2, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5237 "01010110" // /* MW 3 */ + 5238 "00000100" // /* MW 2 */ + 5239 "00000100" // /* MW 1 */ + 5240 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5241 "00000000" // /* MW 1 */ + 5242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5243 "00000000" // /* MW 1 */ + 5244 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5245 "00000000" // /* MW 1 */ + 5246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5247 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1115 26 first + 5248 "10011000" // LTU r19, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5249 "00101100" // /* MW 3 */ + 5250 "11100111" // /* MW 2 */ + 5251 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1115 12 + 5252 "10000100" // JNZ r19, #6176 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6176 delay_slots=5 */ + 5253 "00000001" // /* MW 5 */ + 5254 "01000000" // /* MW 4 */ + 5255 "00010000" // /* MW 3 */ + 5256 "00001100" // /* MW 2 */ + 5257 "10011000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 886 4 +.delay_slot + 5258 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5259 "11010000" // /* MW 5 */ + 5260 "11001000" // /* MW 4 */ + 5261 "11000100" // /* MW 3 */ + 5262 "00000111" // /* MW 2 */ + 5263 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 886 4 first +.delay_slot + 5264 "00011000" // ST.s8 r13, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5265 "10100111" // /* MW 3 */ + 5266 "00000101" // /* MW 2 */ + 5267 "00000010" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first +.delay_slot + 5268 "11111000" // VBCST.16 x9, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5269 "01110010" // /* MW 3 */ + 5270 "11010001" // /* MW 2 */ + 5271 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5273 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5275 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1123 71 first + 5276 "10111010" // LDA p4, [sp, #-40]; EQ r27, r13, r18; MOV m7, #132 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5277 "01011000" // /* MW 9 */ + 5278 "10000100" // /* MW 8 */ + 5279 "10000000" // /* MW 7 */ + 5280 "00111111" // /* MW 6 */ + 5281 "10111001" // /* MW 5 */ + 5282 "00011011" // /* MW 4 */ + 5283 "00100000" // /* MW 3 */ + 5284 "01000011" // /* MW 2 */ + 5285 "11111011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1125 16 +.src_ref 2 "conv2d_bf16.h" 1154 80 + 5286 "10111010" // MOVA r19, #0; MOVX r18, #-128; MOV m4, #60 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5287 "01011000" // /* MW 9 */ + 5288 "00111100" // /* MW 8 */ + 5289 "00000000" // /* MW 7 */ + 5290 "00001010" // /* MW 6 */ + 5291 "00100000" // /* MW 5 */ + 5292 "00111101" // /* MW 4 */ + 5293 "00000000" // /* MW 3 */ + 5294 "00010011" // /* MW 2 */ + 5295 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1125 16 + 5296 "10111010" // MOVA m5, #-64; MOVX r26, #0; MOV dc7, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5297 "01111000" // /* MW 9 */ + 5298 "11010000" // /* MW 8 */ + 5299 "11100100" // /* MW 7 */ + 5300 "00001011" // /* MW 6 */ + 5301 "10100000" // /* MW 5 */ + 5302 "00000001" // /* MW 4 */ + 5303 "10000000" // /* MW 3 */ + 5304 "00010100" // /* MW 2 */ + 5305 "11111000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 746 83 + 5306 "01110110" // MOVA m6, #-132; MOVS dc2, dc7; MOVX crRnd, r13; MOV dn3, dc7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5307 "01111000" // /* MW 11 */ + 5308 "11000000" // /* MW 10 */ + 5309 "10100111" // /* MW 9 */ + 5310 "00000001" // /* MW 8 */ + 5311 "11010100" // /* MW 7 */ + 5312 "00011011" // /* MW 6 */ + 5313 "01001011" // /* MW 5 */ + 5314 "00011100" // /* MW 4 */ + 5315 "10000010" // /* MW 3 */ + 5316 "10011000" // /* MW 2 */ + 5317 "11101111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 2 "conv2d_bf16.h" 738 8 +.src_ref 2 "conv2d_bf16.h" 1187 40 +.src_ref 2 "conv2d_bf16.h" 1199 26 +.src_ref 2 "conv2d_bf16.h" 1200 26 +.src_ref 2 "conv2d_bf16.h" 1201 26 +.src_ref 2 "conv2d_bf16.h" 1202 26 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 5318 "01110110" // LDA r5, [sp, #-44]; MOVS dc6, dc7; MOVX r31, #60; MOV r15, #7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5319 "01011000" // /* MW 11 */ + 5320 "00000111" // /* MW 10 */ + 5321 "11101000" // /* MW 9 */ + 5322 "10001001" // /* MW 8 */ + 5323 "11110111" // /* MW 7 */ + 5324 "00000001" // /* MW 6 */ + 5325 "01001011" // /* MW 5 */ + 5326 "00011100" // /* MW 4 */ + 5327 "00100110" // /* MW 3 */ + 5328 "10010110" // /* MW 2 */ + 5329 "11111010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1143 12 +.src_ref 2 "conv2d_bf16.h" 1218 20 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 5330 "10111010" // LDA r18, [sp, #-36]; MOVXM p2, #5440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5331 "00010000" // /* MW 9 */ + 5332 "10100000" // /* MW 8 */ + 5333 "00110010" // /* MW 7 */ + 5334 "00000101" // /* MW 6 */ + 5335 "00000000" // /* MW 5 */ + 5336 "00000000" // /* MW 4 */ + 5337 "00100000" // /* MW 3 */ + 5338 "11001010" // /* MW 2 */ + 5339 "11111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 749 26 +.src_ref 2 "conv2d_bf16.h" 750 26 +.src_ref 2 "conv2d_bf16.h" 751 26 +.src_ref 2 "conv2d_bf16.h" 752 26 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5340 "10111010" // LDA r13, [sp, #-32]; SEL.EQZ r6, r26, r18, r27; MOV r20, #780 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5341 "01011000" // /* MW 9 */ + 5342 "00001100" // /* MW 8 */ + 5343 "10001011" // /* MW 7 */ + 5344 "00010010" // /* MW 6 */ + 5345 "01101001" // /* MW 5 */ + 5346 "00110100" // /* MW 4 */ + 5347 "00100000" // /* MW 3 */ + 5348 "00110110" // /* MW 2 */ + 5349 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 736 8 +.src_ref 2 "conv2d_bf16.h" 738 8 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1873 + 5350 "10110110" // LDA lr, [sp, #-28]; PADDB [p4], m7; MOVX r25, #0; MOV r24, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5351 "01011000" // /* MW 11 */ + 5352 "00000000" // /* MW 10 */ + 5353 "00001000" // /* MW 9 */ + 5354 "00001011" // /* MW 8 */ + 5355 "10010000" // /* MW 7 */ + 5356 "00000001" // /* MW 6 */ + 5357 "00100000" // /* MW 5 */ + 5358 "11010111" // /* MW 4 */ + 5359 "00101001" // /* MW 3 */ + 5360 "10000111" // /* MW 2 */ + 5361 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1125 16 first + 5362 "10011000" // LDA r0, [p4], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5363 "00010110" // /* MW 3 */ + 5364 "10001000" // /* MW 2 */ + 5365 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1125 16 + 5366 "10011000" // LDA dn6, [p4], m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5367 "00100110" // /* MW 3 */ + 5368 "10101011" // /* MW 2 */ + 5369 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1125 16 + 5370 "10011000" // LDA r27, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5371 "01110110" // /* MW 3 */ + 5372 "00101111" // /* MW 2 */ + 5373 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1149 80 first + 5374 "10011000" // LDA m5, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5375 "10000110" // /* MW 3 */ + 5376 "00011110" // /* MW 2 */ + 5377 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1154 80 first + 5378 "10011000" // LDA dj5, [p4], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5379 "11000110" // /* MW 3 */ + 5380 "10001010" // /* MW 2 */ + 5381 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 743 87 first + 5382 "10011000" // LDA m4, [p4], #-28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5383 "00000110" // /* MW 3 */ + 5384 "10011110" // /* MW 2 */ + 5385 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 745 83 first + 5386 "10011000" // LDA r1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5387 "00110110" // /* MW 3 */ + 5388 "00011100" // /* MW 2 */ + 5389 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 746 83 first +.src_ref 2 "conv2d_bf16.h" 1125 16 first + 5390 "10010100" // LDA r0, [p4], m6; ADD.NC dj6, r6, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5391 "00000010" // /* MW 5 */ + 5392 "00000110" // /* MW 4 */ + 5393 "11011101" // /* MW 3 */ + 5394 "00000010" // /* MW 2 */ + 5395 "10011001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1143 66 first + 5396 "10011000" // LDA r3, [p4, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5397 "01110110" // /* MW 3 */ + 5398 "00010100" // /* MW 2 */ + 5399 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1206 63 first + 5400 "10011000" // LDA r4, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5401 "10010110" // /* MW 3 */ + 5402 "00000100" // /* MW 2 */ + 5403 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1149 89 + 5404 "11111000" // MOV r7, m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5405 "00000000" // /* MW 3 */ + 5406 "11011010" // /* MW 2 */ + 5407 "00011001" // /* MW 1 */ + 5408 "01011000" // ADD.NC dj2, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5409 "10011001" // /* MW 3 */ + 5410 "10000011" // /* MW 2 */ + 5411 "00011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1154 89 + 5412 "11111000" // MOV r16, dj5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5413 "00000000" // /* MW 3 */ + 5414 "00011011" // /* MW 2 */ + 5415 "00011100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1125 16 first + 5416 "01011000" // ADD.NC m2, r27, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5417 "10011001" // /* MW 3 */ + 5418 "00001101" // /* MW 2 */ + 5419 "00011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1149 89 first + 5420 "00011000" // ADD.NC m6, r7, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5421 "11100000" // /* MW 3 */ + 5422 "00000011" // /* MW 2 */ + 5423 "00011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1154 89 first + 5424 "00100100" // ADD r3, r3, #-1; ADD.NC m7, r16, #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5425 "11000000" // /* MW 5 */ + 5426 "00010000" // /* MW 4 */ + 5427 "11101110" // /* MW 3 */ + 5428 "11111111" // /* MW 2 */ + 5429 "00011000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1287 37 + 5430 "10111010" // NOPA; NOPB; MOV m1, dj2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5431 "01111110" // /* MW 9 */ + 5432 "10000000" // /* MW 8 */ + 5433 "10000010" // /* MW 7 */ + 5434 "00000000" // /* MW 6 */ + 5435 "00010000" // /* MW 5 */ + 5436 "00000000" // /* MW 4 */ + 5437 "11110000" // /* MW 3 */ + 5438 "00101100" // /* MW 2 */ + 5439 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_976 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 736 8 +.src_ref 2 "conv2d_bf16.h" 738 8 +.src_ref 2 "conv2d_bf16.h" 1147 31 first +.src_ref 2 "conv2d_bf16.h" 1187 40 first +.loop_nesting 1 + 5440 "01110110" // VLDA.CONV.fp32.bf16 cml0, [p6], #64; MOVS p1, r5; LSHL r14, r2, r15; MOV p0, r14 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5441 "01111000" // /* MW 11 */ + 5442 "10010000" // /* MW 10 */ + 5443 "00110011" // /* MW 9 */ + 5444 "11101100" // /* MW 8 */ + 5445 "11100111" // /* MW 7 */ + 5446 "00000100" // /* MW 6 */ + 5447 "00001011" // /* MW 5 */ + 5448 "10000101" // /* MW 4 */ + 5449 "01110001" // /* MW 3 */ + 5450 "10000101" // /* MW 2 */ + 5451 "11000011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 1188 50 first + 5452 "11110110" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc3, dn3; ADD.NC p4, r14, r12 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5453 "10100000" // /* MW 11 */ + 5454 "10011000" // /* MW 10 */ + 5455 "00110011" // /* MW 9 */ + 5456 "00000010" // /* MW 8 */ + 5457 "01001011" // /* MW 7 */ + 5458 "00001110" // /* MW 6 */ + 5459 "00101011" // /* MW 5 */ + 5460 "00101000" // /* MW 4 */ + 5461 "01111000" // /* MW 3 */ + 5462 "10000001" // /* MW 2 */ + 5463 "00100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 742 30 first + 5464 "11110110" // VLDA.POP.576 ex7, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];MOVS dn7, dn6; MOV dj7, dj6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5465 "01110000" // /* MW 11 */ + 5466 "10000000" // /* MW 10 */ + 5467 "11000110" // /* MW 9 */ + 5468 "00000011" // /* MW 8 */ + 5469 "01001011" // /* MW 7 */ + 5470 "01011010" // /* MW 6 */ + 5471 "00101111" // /* MW 5 */ + 5472 "00101000" // /* MW 4 */ + 5473 "01111000" // /* MW 3 */ + 5474 "00111001" // /* MW 2 */ + 5475 "10100000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.src_ref 2 "conv2d_bf16.h" 1149 31 first + 5476 "11110110" // VLDA.CONV.fp32.bf16 cmh0, [p6], m6;VLDB.POP.576 ex6, [p0, lf0, r24];MOVS dn3, r19; MOV m3, m2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5477 "01110000" // /* MW 11 */ + 5478 "00000000" // /* MW 10 */ + 5479 "10000010" // /* MW 9 */ + 5480 "00000001" // /* MW 8 */ + 5481 "00001011" // /* MW 7 */ + 5482 "01010011" // /* MW 6 */ + 5483 "00101011" // /* MW 5 */ + 5484 "00000011" // /* MW 4 */ + 5485 "01110100" // /* MW 3 */ + 5486 "00001101" // /* MW 2 */ + 5487 "11011001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 743 30 first + 5488 "10111010" // VLDA.POP.576 ex8, [p1, lf1, r25, m4];VLDB.POP.576.3D ex11, [p0, lf0, r24, d0]; MOV dj3, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5489 "01011110" // /* MW 9 */ + 5490 "00000000" // /* MW 8 */ + 5491 "11000000" // /* MW 7 */ + 5492 "00000001" // /* MW 6 */ + 5493 "11010100" // /* MW 5 */ + 5494 "00010010" // /* MW 4 */ + 5495 "01110100" // /* MW 3 */ + 5496 "01000001" // /* MW 2 */ + 5497 "01110001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 1152 31 first +.src_ref 2 "conv2d_bf16.h" 1206 8 first + 5498 "10110110" // VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.FILL.512 [p0, lf0, r24]; MOVXM le, #5760 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5499 "00010000" // /* MW 11 */ + 5500 "01000000" // /* MW 10 */ + 5501 "10111011" // /* MW 9 */ + 5502 "00000101" // /* MW 8 */ + 5503 "00000000" // /* MW 7 */ + 5504 "00000000" // /* MW 6 */ + 5505 "00101000" // /* MW 5 */ + 5506 "00101000" // /* MW 4 */ + 5507 "01111000" // /* MW 3 */ + 5508 "10010101" // /* MW 2 */ + 5509 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 1154 31 first +.src_ref 2 "conv2d_bf16.h" 1206 8 + 5510 "10110110" // VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.FILL.512 [p0, lf0, r24]; MOVXM ls, #5712 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5511 "00010000" // /* MW 11 */ + 5512 "00101000" // /* MW 10 */ + 5513 "01111011" // /* MW 9 */ + 5514 "00000100" // /* MW 8 */ + 5515 "00000000" // /* MW 7 */ + 5516 "00000000" // /* MW 6 */ + 5517 "00101000" // /* MW 5 */ + 5518 "00101000" // /* MW 4 */ + 5519 "01111000" // /* MW 3 */ + 5520 "00011101" // /* MW 2 */ + 5521 "11011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 740 30 first + 5522 "00111100" // VLDA.CONV.fp32.bf16 cml3, [p4];VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5523 "00101000" // /* MW 5 */ + 5524 "00000001" // /* MW 4 */ + 5525 "01110100" // /* MW 3 */ + 5526 "10110101" // /* MW 2 */ + 5527 "10000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 1157 31 first + 5528 "00111100" // VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.POP.576.3D ex4, [p0, lf0, r24, d0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5529 "00101000" // /* MW 5 */ + 5530 "00100010" // /* MW 4 */ + 5531 "01111000" // /* MW 3 */ + 5532 "10100101" // /* MW 2 */ + 5533 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 1159 31 first + 5534 "00111100" // VLDA.CONV.fp32.bf16 cmh2, [p6], m6;VLDB.FILL.512 [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5535 "00101000" // /* MW 5 */ + 5536 "00101000" // /* MW 4 */ + 5537 "01111000" // /* MW 3 */ + 5538 "00101101" // /* MW 2 */ + 5539 "11011001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 738 8 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 5540 "00111100" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5541 "00101000" // /* MW 5 */ + 5542 "00101000" // /* MW 4 */ + 5543 "01111000" // /* MW 3 */ + 5544 "10000001" // /* MW 2 */ + 5545 "00100010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.src_ref 2 "conv2d_bf16.h" 1192 29 first +.aggressive_scheduled_block_id 6 +.noswbrkpt + 5546 "00111100" // VLDA.CONV.fp32.bf16 cmh3, [p4], #64;VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5547 "00101000" // /* MW 5 */ + 5548 "00000001" // /* MW 4 */ + 5549 "01110100" // /* MW 3 */ + 5550 "10111101" // /* MW 2 */ + 5551 "10000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 745 30 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5552 "10111010" // VLDA.CONV.fp32.bf16 cmh4, [p4];VLDB.POP.576.3D ex4, [p0, lf0, r24, d0]; VSHUFFLE ex10, ex6, ex11, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5553 "11101110" // /* MW 9 */ + 5554 "11000011" // /* MW 8 */ + 5555 "10011010" // /* MW 7 */ + 5556 "00000010" // /* MW 6 */ + 5557 "00010100" // /* MW 5 */ + 5558 "00010001" // /* MW 4 */ + 5559 "01110100" // /* MW 3 */ + 5560 "11001101" // /* MW 2 */ + 5561 "10000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 1162 81 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5562 "11110110" // VLDA.CONV.fp32.bf16 cml4, [p4];VLDB.FILL.512 [p0, lf0, r24];MOVS p4, p6; VSHUFFLE ex6, ex6, ex11, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5563 "11100000" // /* MW 11 */ + 5564 "11000001" // /* MW 10 */ + 5565 "10011010" // /* MW 9 */ + 5566 "00000001" // /* MW 8 */ + 5567 "10001011" // /* MW 7 */ + 5568 "10011000" // /* MW 6 */ + 5569 "00101100" // /* MW 5 */ + 5570 "00101000" // /* MW 4 */ + 5571 "01111000" // /* MW 3 */ + 5572 "11000101" // /* MW 2 */ + 5573 "10000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 749 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5574 "01001010" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex1, [p1, lf1, r25]; VMAC.f dm0, dm0, ex10, ex7, r9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5575 "11101001" // /* MW 9 */ + 5576 "00010100" // /* MW 8 */ + 5577 "01001000" // /* MW 7 */ + 5578 "00011101" // /* MW 6 */ + 5579 "01010100" // /* MW 5 */ + 5580 "00000000" // /* MW 4 */ + 5581 "01110011" // /* MW 3 */ + 5582 "10000001" // /* MW 2 */ + 5583 "00000010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.src_ref 2 "conv2d_bf16.h" 1286 32 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5584 "01101110" // VLDA.3D.CONV.fp32.bf16 cml3, [p6], d3; MOVS dn3, dn2; MOV dj3, dj5; VMAC.f dm1, dm1, ex6, ex7, r9 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5585 "11101001" // /* MW 13 */ + 5586 "00101100" // /* MW 12 */ + 5587 "01001001" // /* MW 11 */ + 5588 "00000111" // /* MW 10 */ + 5589 "01011000" // /* MW 9 */ + 5590 "01011100" // /* MW 8 */ + 5591 "00000000" // /* MW 7 */ + 5592 "00000000" // /* MW 6 */ + 5593 "10010110" // /* MW 5 */ + 5594 "10010100" // /* MW 4 */ + 5595 "01110110" // /* MW 3 */ + 5596 "00110101" // /* MW 2 */ + 5597 "11001111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 751 26 first +.src_ref 2 "conv2d_bf16.h" 1162 81 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5598 "01101110" // VLDA.CONV.fp32.bf16 cmh3, [p4, #64]; MOVS dc5, dc7; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm2, dm2, ex10, ex8, r9 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5599 "00001001" // /* MW 13 */ + 5600 "01010101" // /* MW 12 */ + 5601 "01001010" // /* MW 11 */ + 5602 "00111110" // /* MW 10 */ + 5603 "10010000" // /* MW 9 */ + 5604 "01001100" // /* MW 8 */ + 5605 "00000000" // /* MW 7 */ + 5606 "00000000" // /* MW 6 */ + 5607 "10010110" // /* MW 5 */ + 5608 "00111000" // /* MW 4 */ + 5609 "01111010" // /* MW 3 */ + 5610 "10111101" // /* MW 2 */ + 5611 "10000010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 1199 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5612 "01101110" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dn2, dc3; VSHUFFLE ex5, ex2, ex4, r0; VADD.f dm0, dm3, dm0, r31 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5613 "00111101" // /* MW 13 */ + 5614 "01100000" // /* MW 12 */ + 5615 "11111000" // /* MW 11 */ + 5616 "00011110" // /* MW 10 */ + 5617 "10010000" // /* MW 9 */ + 5618 "01010100" // /* MW 8 */ + 5619 "00000000" // /* MW 7 */ + 5620 "00000000" // /* MW 6 */ + 5621 "10010110" // /* MW 5 */ + 5622 "10011000" // /* MW 4 */ + 5623 "01110100" // /* MW 3 */ + 5624 "00000001" // /* MW 2 */ + 5625 "01110001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 1200 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5626 "01100010" // VLDA.FILL.512 [p1, lf1, r25]; VADD.f dm1, dm3, dm1, r31 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5627 "00111101" // /* MW 7 */ + 5628 "01100100" // /* MW 6 */ + 5629 "11111001" // /* MW 5 */ + 5630 "00000100" // /* MW 4 */ + 5631 "01110000" // /* MW 3 */ + 5632 "10000001" // /* MW 2 */ + 5633 "00100010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 1201 26 first +.aggressive_scheduled_block_id 6 +.noswbrkpt + 5634 "01100010" // VLDA.POP.576 ex1, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r31 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5635 "00111101" // /* MW 7 */ + 5636 "10001000" // /* MW 6 */ + 5637 "11111010" // /* MW 5 */ + 5638 "00000100" // /* MW 4 */ + 5639 "01110000" // /* MW 3 */ + 5640 "00001001" // /* MW 2 */ + 5641 "10100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5642 "01100010" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; VMAC.f dm3, dm3, ex6, ex8, r9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5643 "00001001" // /* MW 7 */ + 5644 "01101101" // /* MW 6 */ + 5645 "01001011" // /* MW 5 */ + 5646 "00000100" // /* MW 4 */ + 5647 "01110000" // /* MW 3 */ + 5648 "00000001" // /* MW 2 */ + 5649 "01110001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5650 "00111100" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5651 "00101000" // /* MW 5 */ + 5652 "00000001" // /* MW 4 */ + 5653 "01110100" // /* MW 3 */ + 5654 "10000001" // /* MW 2 */ + 5655 "00100010" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 978 11 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5656 "00011000" // VLDB.POP.576.3D ex4, [p0, lf0, r24, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5657 "00010100" // /* MW 3 */ + 5658 "00010001" // /* MW 2 */ + 5659 "00111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 1202 26 first +.src_ref 2 "conv2d_bf16.h" 1206 8 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5660 "01100110" // VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24]; ADD.NC lc, r4, #-5; VADD.f dm3, dm4, dm3, r31 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5661 "00111101" // /* MW 11 */ + 5662 "10001100" // /* MW 10 */ + 5663 "11111011" // /* MW 9 */ + 5664 "10000010" // /* MW 8 */ + 5665 "01111101" // /* MW 7 */ + 5666 "01110010" // /* MW 6 */ + 5667 "00101101" // /* MW 5 */ + 5668 "00101000" // /* MW 4 */ + 5669 "01111000" // /* MW 3 */ + 5670 "00001001" // /* MW 2 */ + 5671 "10100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 749 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5672 "01001010" // VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24]; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5673 "00101001" // /* MW 9 */ + 5674 "00000110" // /* MW 8 */ + 5675 "10100000" // /* MW 7 */ + 5676 "00011101" // /* MW 6 */ + 5677 "00010100" // /* MW 5 */ + 5678 "00010100" // /* MW 4 */ + 5679 "01110100" // /* MW 3 */ + 5680 "00000001" // /* MW 2 */ + 5681 "01110001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.src_ref 2 "conv2d_bf16.h" 751 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5682 "01001110" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24]; NOPX; MOV dj5, r21; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5683 "00001001" // /* MW 13 */ + 5684 "01000110" // /* MW 12 */ + 5685 "10100010" // /* MW 11 */ + 5686 "00001111" // /* MW 10 */ + 5687 "10101010" // /* MW 9 */ + 5688 "01011000" // /* MW 8 */ + 5689 "00000000" // /* MW 7 */ + 5690 "00000000" // /* MW 6 */ + 5691 "00101000" // /* MW 5 */ + 5692 "00000001" // /* MW 4 */ + 5693 "01110100" // /* MW 3 */ + 5694 "10000001" // /* MW 2 */ + 5695 "00100010" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5696 "01001011" // NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5697 "01010001" // /* MW 15 */ + 5698 "00001001" // /* MW 14 */ + 5699 "11101101" // /* MW 13 */ + 5700 "00000011" // /* MW 12 */ + 5701 "11001001" // /* MW 11 */ + 5702 "00000000" // /* MW 10 */ + 5703 "00000000" // /* MW 9 */ + 5704 "00000000" // /* MW 8 */ + 5705 "01011011" // /* MW 7 */ + 5706 "00000001" // /* MW 6 */ + 5707 "00101000" // /* MW 5 */ + 5708 "00100010" // /* MW 4 */ + 5709 "11111000" // /* MW 3 */ + 5710 "00101100" // /* MW 2 */ + 5711 "00000000" // /* MW 1 */ +.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1248 +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.begin_of_loop +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt +.loop_nesting 2 + 5712 "01001011" // VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5713 "01010000" // /* MW 15 */ + 5714 "00011011" // /* MW 14 */ + 5715 "11101101" // /* MW 13 */ + 5716 "00000001" // /* MW 12 */ + 5717 "01001001" // /* MW 11 */ + 5718 "00000001" // /* MW 10 */ + 5719 "00000000" // /* MW 9 */ + 5720 "00000000" // /* MW 8 */ + 5721 "01011011" // /* MW 7 */ + 5722 "00000001" // /* MW 6 */ + 5723 "00101000" // /* MW 5 */ + 5724 "00101000" // /* MW 4 */ + 5725 "01111000" // /* MW 3 */ + 5726 "00001001" // /* MW 2 */ + 5727 "10100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 749 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5728 "01001011" // VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5729 "00110001" // /* MW 15 */ + 5730 "00000000" // /* MW 14 */ + 5731 "01111101" // /* MW 13 */ + 5732 "10100101" // /* MW 12 */ + 5733 "00000001" // /* MW 11 */ + 5734 "00000000" // /* MW 10 */ + 5735 "00000000" // /* MW 9 */ + 5736 "00000000" // /* MW 8 */ + 5737 "01011011" // /* MW 7 */ + 5738 "00000001" // /* MW 6 */ + 5739 "00101000" // /* MW 5 */ + 5740 "00101000" // /* MW 4 */ + 5741 "01111000" // /* MW 3 */ + 5742 "00000001" // /* MW 2 */ + 5743 "01110001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.src_ref 2 "conv2d_bf16.h" 751 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5744 "01001011" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5745 "00110000" // /* MW 15 */ + 5746 "00010010" // /* MW 14 */ + 5747 "01111101" // /* MW 13 */ + 5748 "10100101" // /* MW 12 */ + 5749 "00000001" // /* MW 11 */ + 5750 "00000000" // /* MW 10 */ + 5751 "00000000" // /* MW 9 */ + 5752 "00000000" // /* MW 8 */ + 5753 "01011011" // /* MW 7 */ + 5754 "00000001" // /* MW 6 */ + 5755 "00101000" // /* MW 5 */ + 5756 "00000001" // /* MW 4 */ + 5757 "01110100" // /* MW 3 */ + 5758 "10000001" // /* MW 2 */ + 5759 "00100010" // /* MW 1 */ +.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1296 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.end_of_loop +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5760 "01001011" // NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5761 "01010001" // /* MW 15 */ + 5762 "00001001" // /* MW 14 */ + 5763 "11101101" // /* MW 13 */ + 5764 "00000011" // /* MW 12 */ + 5765 "11001001" // /* MW 11 */ + 5766 "00000000" // /* MW 10 */ + 5767 "00000000" // /* MW 9 */ + 5768 "00000000" // /* MW 8 */ + 5769 "01011011" // /* MW 7 */ + 5770 "00000001" // /* MW 6 */ + 5771 "00101000" // /* MW 5 */ + 5772 "00100010" // /* MW 4 */ + 5773 "11111000" // /* MW 3 */ + 5774 "00101100" // /* MW 2 */ + 5775 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 5776 "01101110" // VLDA.POP.576 ex1, [p1, lf1, r25]; MOVS dn6, dn7; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5777 "00001001" // /* MW 13 */ + 5778 "01101010" // /* MW 12 */ + 5779 "10100011" // /* MW 11 */ + 5780 "00011110" // /* MW 10 */ + 5781 "10010000" // /* MW 9 */ + 5782 "01010100" // /* MW 8 */ + 5783 "00000000" // /* MW 7 */ + 5784 "00000000" // /* MW 6 */ + 5785 "10010110" // /* MW 5 */ + 5786 "10111100" // /* MW 4 */ + 5787 "01111100" // /* MW 3 */ + 5788 "00001001" // /* MW 2 */ + 5789 "10100000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 749 26 first +.src_ref 2 "conv2d_bf16.h" 1286 32 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5790 "01101110" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dc7, dn3; MOV dj7, dj3; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5791 "00101001" // /* MW 13 */ + 5792 "00000110" // /* MW 12 */ + 5793 "10100000" // /* MW 11 */ + 5794 "00000111" // /* MW 10 */ + 5795 "00111000" // /* MW 9 */ + 5796 "01111100" // /* MW 8 */ + 5797 "00000000" // /* MW 7 */ + 5798 "00000000" // /* MW 6 */ + 5799 "10010110" // /* MW 5 */ + 5800 "00011100" // /* MW 4 */ + 5801 "01111110" // /* MW 3 */ + 5802 "00000001" // /* MW 2 */ + 5803 "01110001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 751 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5804 "01001010" // MOVS dc3, p3; MOV r5, dj2; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5805 "00001001" // /* MW 9 */ + 5806 "01000110" // /* MW 8 */ + 5807 "10100010" // /* MW 7 */ + 5808 "11100100" // /* MW 6 */ + 5809 "00000000" // /* MW 5 */ + 5810 "01010101" // /* MW 4 */ + 5811 "01100001" // /* MW 3 */ + 5812 "10010001" // /* MW 2 */ + 5813 "01100001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5814 "01001010" // MOVS dn3, r22; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5815 "00101001" // /* MW 9 */ + 5816 "00101010" // /* MW 8 */ + 5817 "10100001" // /* MW 7 */ + 5818 "11000100" // /* MW 6 */ + 5819 "00000111" // /* MW 5 */ + 5820 "10010010" // /* MW 4 */ + 5821 "01100001" // /* MW 3 */ + 5822 "11000001" // /* MW 2 */ + 5823 "01101010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5824 "01001010" // MOVS dn7, r28; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5825 "00001001" // /* MW 9 */ + 5826 "01101010" // /* MW 8 */ + 5827 "10100011" // /* MW 7 */ + 5828 "11000100" // /* MW 6 */ + 5829 "00000011" // /* MW 5 */ + 5830 "10010010" // /* MW 4 */ + 5831 "01100010" // /* MW 3 */ + 5832 "10000001" // /* MW 2 */ + 5833 "11101011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 749 26 first +.src_ref 2 "conv2d_bf16.h" 1285 32 first +.src_ref 2 "conv2d_bf16.h" 1286 32 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5834 "01100110" // PADDB [p7], m5; MOVS p5, p7; MOV dj2, dj7; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5835 "00101001" // /* MW 11 */ + 5836 "00000110" // /* MW 10 */ + 5837 "10100000" // /* MW 9 */ + 5838 "11100110" // /* MW 8 */ + 5839 "00000000" // /* MW 7 */ + 5840 "10001111" // /* MW 6 */ + 5841 "00100010" // /* MW 5 */ + 5842 "01010111" // /* MW 4 */ + 5843 "01101111" // /* MW 3 */ + 5844 "10010001" // /* MW 2 */ + 5845 "10110011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 751 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5846 "01001010" // MOVS p4, p7; MOV m2, m3; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5847 "00001001" // /* MW 9 */ + 5848 "01000110" // /* MW 8 */ + 5849 "10100010" // /* MW 7 */ + 5850 "11100100" // /* MW 6 */ + 5851 "00000000" // /* MW 5 */ + 5852 "00000110" // /* MW 4 */ + 5853 "01100010" // /* MW 3 */ + 5854 "10010001" // /* MW 2 */ + 5855 "10010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5856 "01100010" // VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5857 "00101001" // /* MW 7 */ + 5858 "00101010" // /* MW 6 */ + 5859 "10100001" // /* MW 5 */ + 5860 "11000110" // /* MW 4 */ + 5861 "00000011" // /* MW 3 */ + 5862 "10010010" // /* MW 2 */ + 5863 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5864 "01100010" // VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5865 "00001001" // /* MW 7 */ + 5866 "01101010" // /* MW 6 */ + 5867 "10100011" // /* MW 5 */ + 5868 "11000110" // /* MW 4 */ + 5869 "00000111" // /* MW 3 */ + 5870 "10010010" // /* MW 2 */ + 5871 "00000001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 + 5872 "11111000" // MOV dj7, dj5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5873 "00000000" // /* MW 3 */ + 5874 "10001011" // /* MW 2 */ + 5875 "00011111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 750 26 first + 5876 "01100010" // MOV m3, r23; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5877 "00101001" // /* MW 7 */ + 5878 "00101010" // /* MW 6 */ + 5879 "10100001" // /* MW 5 */ + 5880 "11100110" // /* MW 4 */ + 5881 "10100000" // /* MW 3 */ + 5882 "00001011" // /* MW 2 */ + 5883 "00000011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 749 26 first + 5884 "01100010" // MOV dj3, r17; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5885 "00101001" // /* MW 7 */ + 5886 "00000110" // /* MW 6 */ + 5887 "10100000" // /* MW 5 */ + 5888 "11100110" // /* MW 4 */ + 5889 "10100000" // /* MW 3 */ + 5890 "10001000" // /* MW 2 */ + 5891 "00000011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.src_ref 2 "conv2d_bf16.h" 1286 32 + 5892 "01001010" // PADDB.3D [p0], d3; MOV m3, dj2; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5893 "00001001" // /* MW 9 */ + 5894 "01101010" // /* MW 8 */ + 5895 "10100011" // /* MW 7 */ + 5896 "11100110" // /* MW 6 */ + 5897 "00000000" // /* MW 5 */ + 5898 "00000101" // /* MW 4 */ + 5899 "00100011" // /* MW 3 */ + 5900 "11110111" // /* MW 2 */ + 5901 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 751 26 first +.src_ref 2 "conv2d_bf16.h" 1286 32 first + 5902 "01100110" // PADDB [p7], m3; MOVS p3, dc3; MOV dj5, r5; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5903 "00001001" // /* MW 11 */ + 5904 "01000110" // /* MW 10 */ + 5905 "10100010" // /* MW 9 */ + 5906 "11100110" // /* MW 8 */ + 5907 "10100000" // /* MW 7 */ + 5908 "10000010" // /* MW 6 */ + 5909 "00100101" // /* MW 5 */ + 5910 "11010111" // /* MW 4 */ + 5911 "01101110" // /* MW 3 */ + 5912 "10001001" // /* MW 2 */ + 5913 "01110001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 + 5914 "00000010" // MOVS dc3, dc5; MOV dj7, dj5 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5915 "01110000" // /* MW 7 */ + 5916 "10000000" // /* MW 6 */ + 5917 "11000101" // /* MW 5 */ + 5918 "00000011" // /* MW 4 */ + 5919 "01100000" // /* MW 3 */ + 5920 "10001001" // /* MW 2 */ + 5921 "01100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 1287 37 + 5922 "00000010" // MOVS dc5, r2; MOV m3, m1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5923 "01110000" // /* MW 7 */ + 5924 "00000000" // /* MW 6 */ + 5925 "10000001" // /* MW 5 */ + 5926 "00000001" // /* MW 4 */ + 5927 "01100000" // /* MW 3 */ + 5928 "01000001" // /* MW 2 */ + 5929 "10100000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first + 5930 "00000010" // VCONV.bf16.fp32 x11, cml1; MOV m1, r29 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5931 "01110000" // /* MW 7 */ + 5932 "01010000" // /* MW 6 */ + 5933 "10000111" // /* MW 5 */ + 5934 "00000000" // /* MW 4 */ + 5935 "11000000" // /* MW 3 */ + 5936 "00010010" // /* MW 2 */ + 5937 "10110010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 + 5938 "00000010" // VCONV.bf16.fp32 x10, cml0; MOV dj5, r30 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5939 "01110000" // /* MW 7 */ + 5940 "10010000" // /* MW 6 */ + 5941 "11000111" // /* MW 5 */ + 5942 "00000010" // /* MW 4 */ + 5943 "11000000" // /* MW 3 */ + 5944 "00000010" // /* MW 2 */ + 5945 "10100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 first +.src_ref 2 "conv2d_bf16.h" 736 8 +.src_ref 2 "conv2d_bf16.h" 1287 37 + 5946 "10111010" // PADDB.3D [p1], d1; MOVS p0, p7; MOV r14, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5947 "01110110" // /* MW 9 */ + 5948 "01100000" // /* MW 8 */ + 5949 "11001000" // /* MW 7 */ + 5950 "00000001" // /* MW 6 */ + 5951 "10010000" // /* MW 5 */ + 5952 "00111011" // /* MW 4 */ + 5953 "01100001" // /* MW 3 */ + 5954 "10010001" // /* MW 2 */ + 5955 "00010011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 2 "conv2d_bf16.h" 1287 37 + 5956 "00000010" // VCONV.bf16.fp32 x6, cmh0; MOV m1, m3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5957 "01110000" // /* MW 7 */ + 5958 "00000000" // /* MW 6 */ + 5959 "10000011" // /* MW 5 */ + 5960 "00000000" // /* MW 4 */ + 5961 "11000000" // /* MW 3 */ + 5962 "00001010" // /* MW 2 */ + 5963 "01100010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 1218 20 first +.src_ref 2 "conv2d_bf16.h" 1287 37 first + 5964 "00110110" // PADDB [p0], m1; VCONV.bf16.fp32 x5, cml2; JZ r18, #6096 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6096 delay_slots=5 */ + 5965 "01100000" // /* MW 11 */ + 5966 "00000000" // /* MW 10 */ + 5967 "00000000" // /* MW 9 */ + 5968 "11111010" // /* MW 8 */ + 5969 "00000010" // /* MW 7 */ + 5970 "00100100" // /* MW 6 */ + 5971 "00100000" // /* MW 5 */ + 5972 "01010111" // /* MW 4 */ + 5973 "11000000" // /* MW 3 */ + 5974 "00100010" // /* MW 2 */ + 5975 "01010010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 2 "conv2d_bf16.h" 738 8 +.delay_slot + 5976 "00000010" // VCONV.bf16.fp32 x7, cmh1; MOV r5, p1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5977 "01110000" // /* MW 7 */ + 5978 "01100000" // /* MW 6 */ + 5979 "10101001" // /* MW 5 */ + 5980 "00000000" // /* MW 4 */ + 5981 "11000000" // /* MW 3 */ + 5982 "00011010" // /* MW 2 */ + 5983 "01110010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 5984 "00000010" // VCONV.bf16.fp32 x8, cml3; MOV dn7, dc7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5985 "01110000" // /* MW 7 */ + 5986 "11000000" // /* MW 6 */ + 5987 "10100111" // /* MW 5 */ + 5988 "00000011" // /* MW 4 */ + 5989 "11000000" // /* MW 3 */ + 5990 "00110010" // /* MW 2 */ + 5991 "10000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 5992 "10111010" // PADDB [p5], m1; VCONV.bf16.fp32 x1, cmh3; MOV p1, p5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5993 "01110110" // /* MW 9 */ + 5994 "01100000" // /* MW 8 */ + 5995 "10110101" // /* MW 7 */ + 5996 "00000000" // /* MW 6 */ + 5997 "10010000" // /* MW 5 */ + 5998 "00101011" // /* MW 4 */ + 5999 "11000101" // /* MW 3 */ + 6000 "00111010" // /* MW 2 */ + 6001 "00010010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 2 "conv2d_bf16.h" 1286 32 +.delay_slot + 6002 "00000010" // VCONV.bf16.fp32 x2, cmh2; MOV dj5, dj2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6003 "01110000" // /* MW 7 */ + 6004 "10000000" // /* MW 6 */ + 6005 "11000010" // /* MW 5 */ + 6006 "00000010" // /* MW 4 */ + 6007 "11000000" // /* MW 3 */ + 6008 "00101010" // /* MW 2 */ + 6009 "00100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 1187 40 +.delay_slot + 6010 "00000010" // MOVS dc7, dc3; MOV r2, dc5 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6011 "01110000" // /* MW 7 */ + 6012 "11000000" // /* MW 6 */ + 6013 "01001101" // /* MW 5 */ + 6014 "00000000" // /* MW 4 */ + 6015 "01100000" // /* MW 3 */ + 6016 "10001001" // /* MW 2 */ + 6017 "11100001" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first + 6018 "11111000" // VMAX_LT.bf16 x11, r16, x11, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6019 "11101100" // /* MW 3 */ + 6020 "11011100" // /* MW 2 */ + 6021 "00011101" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 + 6022 "11111000" // VMAX_LT.bf16 x7, r16, x7, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6023 "11101100" // /* MW 3 */ + 6024 "10111100" // /* MW 2 */ + 6025 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.src_ref 4 "max_min.hpp" 20 104 + 6026 "00000010" // VST x11, [p1, dj7]; VMAX_LT.bf16 x10, r16, x10, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6027 "01110000" // /* MW 7 */ + 6028 "01110110" // /* MW 6 */ + 6029 "10101010" // /* MW 5 */ + 6030 "00000010" // /* MW 4 */ + 6031 "01100000" // /* MW 3 */ + 6032 "01011010" // /* MW 2 */ + 6033 "00111100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first + 6034 "00000010" // VST x7, [p5, #64]; VMAX_LT.bf16 x7, r16, x6, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6035 "01110000" // /* MW 7 */ + 6036 "01110110" // /* MW 6 */ + 6037 "11011010" // /* MW 5 */ + 6038 "00000001" // /* MW 4 */ + 6039 "01100000" // /* MW 3 */ + 6040 "10111010" // /* MW 2 */ + 6041 "10100010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first + 6042 "00111010" // VST x10, [p1]; J #6128 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=6128 delay_slots=5 */ + 6043 "00100001" // /* MW 9 */ + 6044 "00000000" // /* MW 8 */ + 6045 "00000000" // /* MW 7 */ + 6046 "11111110" // /* MW 6 */ + 6047 "00000010" // /* MW 5 */ + 6048 "00000000" // /* MW 4 */ + 6049 "01100000" // /* MW 3 */ + 6050 "11010010" // /* MW 2 */ + 6051 "00100000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 6052 "00000010" // VST x7, [p1, #64]; VMAX_LT.bf16 x10, r16, x8, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6053 "01110000" // /* MW 7 */ + 6054 "01110110" // /* MW 6 */ + 6055 "10100010" // /* MW 5 */ + 6056 "00000010" // /* MW 4 */ + 6057 "01100000" // /* MW 3 */ + 6058 "10111010" // /* MW 2 */ + 6059 "00100010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 +.delay_slot + 6060 "11111000" // VMAX_LT.bf16 x7, r16, x1, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6061 "11101100" // /* MW 3 */ + 6062 "10001100" // /* MW 2 */ + 6063 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.src_ref 4 "max_min.hpp" 20 104 +.delay_slot + 6064 "00000010" // VST x10, [p0]; VMAX_LT.bf16 x10, r16, x5, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6065 "01110000" // /* MW 7 */ + 6066 "01110110" // /* MW 6 */ + 6067 "10010110" // /* MW 5 */ + 6068 "00000010" // /* MW 4 */ + 6069 "01100000" // /* MW 3 */ + 6070 "11010010" // /* MW 2 */ + 6071 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 6072 "00000010" // VST x7, [p0, #64]; VMAX_LT.bf16 x2, r16, x2, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6073 "01110000" // /* MW 7 */ + 6074 "01110110" // /* MW 6 */ + 6075 "10001010" // /* MW 5 */ + 6076 "00000000" // /* MW 4 */ + 6077 "01100000" // /* MW 3 */ + 6078 "10111010" // /* MW 2 */ + 6079 "00000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.delay_slot + 6080 "11100001" // NOPA; NOPB; VST x10, [p4, dj5]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6081 "00000000" // /* MW 15 */ + 6082 "00000000" // /* MW 14 */ + 6083 "01111000" // /* MW 13 */ + 6084 "10100101" // /* MW 12 */ + 6085 "00000001" // /* MW 11 */ + 6086 "00000000" // /* MW 10 */ + 6087 "00000000" // /* MW 9 */ + 6088 "00000000" // /* MW 8 */ + 6089 "10010011" // /* MW 7 */ + 6090 "10100010" // /* MW 6 */ + 6091 "00100100" // /* MW 5 */ + 6092 "00000000" // /* MW 4 */ + 6093 "11110000" // /* MW 3 */ + 6094 "00101100" // /* MW 2 */ + 6095 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1632 +.src_ref 4 "vector.hpp" 1152 43 + 6096 "00011000" // VST.CONV.bf16.fp32 cml1, [p1, dj7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6097 "10100011" // /* MW 3 */ + 6098 "11100000" // /* MW 2 */ + 6099 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6100 "00011000" // VST.CONV.bf16.fp32 cmh1, [p5, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6101 "11100011" // /* MW 3 */ + 6102 "00010100" // /* MW 2 */ + 6103 "00001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6104 "00011000" // VST.CONV.bf16.fp32 cml0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6105 "00100011" // /* MW 3 */ + 6106 "00000100" // /* MW 2 */ + 6107 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6108 "00011000" // VST.CONV.bf16.fp32 cmh0, [p1, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6109 "01100011" // /* MW 3 */ + 6110 "00010100" // /* MW 2 */ + 6111 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6112 "00011000" // VST x8, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6113 "00010011" // /* MW 3 */ + 6114 "00000110" // /* MW 2 */ + 6115 "00001000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6116 "00011000" // VST.CONV.bf16.fp32 cmh3, [p0, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6117 "11100011" // /* MW 3 */ + 6118 "00010101" // /* MW 2 */ + 6119 "00001000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6120 "00000010" // VST.CONV.bf16.fp32 cml2, [p4, dj5]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6121 "01110000" // /* MW 7 */ + 6122 "10100101" // /* MW 6 */ + 6123 "00000001" // /* MW 5 */ + 6124 "00000000" // /* MW 4 */ + 6125 "01100000" // /* MW 3 */ + 6126 "00100100" // /* MW 2 */ + 6127 "10010100" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1664 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 1143 12 first + 6128 "00110110" // PADDB [p7], m5; VST x2, [p7, #64]; JNZD r3, r3, p2; MOV dj2, #0 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 6129 "01011000" // /* MW 11 */ + 6130 "00000000" // /* MW 10 */ + 6131 "01000000" // /* MW 9 */ + 6132 "00000001" // /* MW 8 */ + 6133 "00110101" // /* MW 7 */ + 6134 "00000110" // /* MW 6 */ + 6135 "00100000" // /* MW 5 */ + 6136 "01010111" // /* MW 4 */ + 6137 "01101111" // /* MW 3 */ + 6138 "10010010" // /* MW 2 */ + 6139 "11100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.delay_slot + 6140 "11111000" // MOV dn3, dn2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6141 "10000000" // /* MW 3 */ + 6142 "01000100" // /* MW 2 */ + 6143 "00011011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.delay_slot + 6144 "11111000" // MOV dn2, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6145 "10100000" // /* MW 3 */ + 6146 "01001001" // /* MW 2 */ + 6147 "00011010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.delay_slot + 6148 "11110100" // PADDB.3D [p7], d2; MOV dj2, dj7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6149 "00000001" // /* MW 5 */ + 6150 "00011110" // /* MW 4 */ + 6151 "00000101" // /* MW 3 */ + 6152 "01110010" // /* MW 2 */ + 6153 "11101011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.delay_slot + 6154 "11111000" // MOV dn2, dn7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6155 "10000000" // /* MW 3 */ + 6156 "01001110" // /* MW 2 */ + 6157 "00011010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6159 "00000000" // /* MW 1 */ +.loop_nesting 0 + 6160 "10000100" // J #6832 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6832 delay_slots=5 */ + 6161 "00000000" // /* MW 5 */ + 6162 "00000000" // /* MW 4 */ + 6163 "01011000" // /* MW 3 */ + 6164 "00001101" // /* MW 2 */ + 6165 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6167 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6168 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6169 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6171 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6173 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6175 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1712 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 792 8 +.src_ref 2 "conv2d_bf16.h" 1364 80 +.src_ref 2 "conv2d_bf16.h" 1364 80 + 6176 "01110110" // LDA r31, [sp, #-40]; MOVS dc2, p3; MOVX r14, #136; MOV p1, r14 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6177 "01111000" // /* MW 11 */ + 6178 "10010000" // /* MW 10 */ + 6179 "10110011" // /* MW 9 */ + 6180 "00001000" // /* MW 8 */ + 6181 "11100001" // /* MW 7 */ + 6182 "00000100" // /* MW 6 */ + 6183 "10001011" // /* MW 5 */ + 6184 "00001100" // /* MW 4 */ + 6185 "00100010" // /* MW 3 */ + 6186 "01111110" // /* MW 2 */ + 6187 "11111011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 1369 80 + 6188 "01110110" // MOVA m4, #60; MOVS dn2, r22; MOVX crRnd, r13; MOV dc6, dn2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6189 "01111000" // /* MW 11 */ + 6190 "01000000" // /* MW 10 */ + 6191 "01100010" // /* MW 9 */ + 6192 "00000011" // /* MW 8 */ + 6193 "11010100" // /* MW 7 */ + 6194 "00011011" // /* MW 6 */ + 6195 "00001011" // /* MW 5 */ + 6196 "01010110" // /* MW 4 */ + 6197 "10000010" // /* MW 3 */ + 6198 "10010000" // /* MW 2 */ + 6199 "00000111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 807 26 +.src_ref 2 "conv2d_bf16.h" 808 26 +.src_ref 2 "conv2d_bf16.h" 809 26 +.src_ref 2 "conv2d_bf16.h" 810 26 +.src_ref 2 "conv2d_bf16.h" 1436 26 +.src_ref 2 "conv2d_bf16.h" 1437 26 +.src_ref 2 "conv2d_bf16.h" 1438 26 +.src_ref 2 "conv2d_bf16.h" 1439 26 + 6200 "10111010" // MOVA r20, #60; MOVX r19, #780; MOV m2, r23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6201 "01111000" // /* MW 9 */ + 6202 "11010000" // /* MW 8 */ + 6203 "00000101" // /* MW 7 */ + 6204 "10001001" // /* MW 6 */ + 6205 "00110001" // /* MW 5 */ + 6206 "00011001" // /* MW 4 */ + 6207 "00000000" // /* MW 3 */ + 6208 "10010100" // /* MW 2 */ + 6209 "00000111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 802 83 +.src_ref 2 "conv2d_bf16.h" 1428 39 + 6210 "01110110" // MOVA m6, #-132; MOVS dn6, r28; MOVX r18, #6; MOV dj5, r30 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6211 "01111000" // /* MW 11 */ + 6212 "10010000" // /* MW 10 */ + 6213 "11000111" // /* MW 9 */ + 6214 "11001010" // /* MW 8 */ + 6215 "00100000" // /* MW 7 */ + 6216 "00000001" // /* MW 6 */ + 6217 "00001011" // /* MW 5 */ + 6218 "01011100" // /* MW 4 */ + 6219 "10000110" // /* MW 3 */ + 6220 "10011000" // /* MW 2 */ + 6221 "11101111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 792 8 +.src_ref 2 "conv2d_bf16.h" 794 8 + 6222 "01110110" // LDA p0, [sp, #-44]; MOVS dc5, r2; MOVX r25, #0; MOV m1, r29 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6223 "01111000" // /* MW 11 */ + 6224 "01010000" // /* MW 10 */ + 6225 "10000111" // /* MW 9 */ + 6226 "00001000" // /* MW 8 */ + 6227 "10010000" // /* MW 7 */ + 6228 "00000001" // /* MW 6 */ + 6229 "00001011" // /* MW 5 */ + 6230 "00000010" // /* MW 4 */ + 6231 "00100101" // /* MW 3 */ + 6232 "10000011" // /* MW 2 */ + 6233 "11111010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 794 8 +.src_ref 2 "conv2d_bf16.h" 1455 20 + 6234 "10111010" // LDA r21, [sp, #-36]; MOVX r24, #0; MOV dj6, r21 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6235 "01111000" // /* MW 9 */ + 6236 "01010000" // /* MW 8 */ + 6237 "01000101" // /* MW 7 */ + 6238 "00001011" // /* MW 6 */ + 6239 "10000000" // /* MW 5 */ + 6240 "00000001" // /* MW 4 */ + 6241 "00100000" // /* MW 3 */ + 6242 "11010110" // /* MW 2 */ + 6243 "11111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1337 12 + 6244 "10111010" // LDA r13, [sp, #-32]; MOVXM p2, #6320 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6245 "00010000" // /* MW 9 */ + 6246 "01011000" // /* MW 8 */ + 6247 "00110100" // /* MW 7 */ + 6248 "00000101" // /* MW 6 */ + 6249 "00000000" // /* MW 5 */ + 6250 "00000000" // /* MW 4 */ + 6251 "00100000" // /* MW 3 */ + 6252 "00110110" // /* MW 2 */ + 6253 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 80 first +.src_ref 2 "conv2d_bf16.h" 1873 + 6254 "10010100" // LDA lr, [sp, #-28]; ADD.NC p3, r31, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6255 "01110010" // /* MW 5 */ + 6256 "11011111" // /* MW 4 */ + 6257 "00100110" // /* MW 3 */ + 6258 "10000111" // /* MW 2 */ + 6259 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 80 + 6260 "10011000" // LDA dj3, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6261 "11000110" // /* MW 3 */ + 6262 "00011101" // /* MW 2 */ + 6263 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1369 80 first + 6264 "10011000" // LDA m4, [p3], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6265 "00000110" // /* MW 3 */ + 6266 "10001010" // /* MW 2 */ + 6267 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 799 87 first + 6268 "10011000" // LDA m5, [p3], #-28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6269 "10000110" // /* MW 3 */ + 6270 "10011110" // /* MW 2 */ + 6271 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 801 83 first + 6272 "10011000" // LDA r22, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6273 "11010110" // /* MW 3 */ + 6274 "00011110" // /* MW 2 */ + 6275 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 802 83 first + 6276 "10011000" // LDA r23, [p3], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6277 "11110110" // /* MW 3 */ + 6278 "11001010" // /* MW 2 */ + 6279 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1337 66 first + 6280 "10011000" // LDA r29, [p3, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6281 "10110110" // /* MW 3 */ + 6282 "00010111" // /* MW 2 */ + 6283 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1443 71 first + 6284 "10011000" // LDA r28, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6285 "10010110" // /* MW 3 */ + 6286 "00000111" // /* MW 2 */ + 6287 "00000011" // /* MW 1 */ + 6288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6289 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 1369 89 + 6290 "11111000" // MOV r30, m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6291 "00000000" // /* MW 3 */ + 6292 "10011000" // /* MW 2 */ + 6293 "00011111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 89 +.src_ref 2 "conv2d_bf16.h" 1518 37 + 6294 "11111000" // MOV m6, dj3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6295 "00000000" // /* MW 3 */ + 6296 "00000111" // /* MW 2 */ + 6297 "00011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 89 + 6298 "11111000" // MOV r31, m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6299 "00000000" // /* MW 3 */ + 6300 "11011100" // /* MW 2 */ + 6301 "00011111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 89 first + 6302 "00011000" // ADD.NC m3, r31, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6303 "11100000" // /* MW 3 */ + 6304 "00001111" // /* MW 2 */ + 6305 "00011011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1369 89 first + 6306 "00100100" // ADD r29, r29, #-1; ADD.NC m7, r30, #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6307 "11000000" // /* MW 5 */ + 6308 "00011110" // /* MW 4 */ + 6309 "11101110" // /* MW 3 */ + 6310 "01111111" // /* MW 2 */ + 6311 "11101111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 + 6312 "00000010" // NOPS; MOV dj7, r30 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6313 "01110000" // /* MW 7 */ + 6314 "10010000" // /* MW 6 */ + 6315 "11000111" // /* MW 5 */ + 6316 "00000011" // /* MW 4 */ + 6317 "01100000" // /* MW 3 */ + 6318 "00101011" // /* MW 2 */ + 6319 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1856 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 792 8 first +.src_ref 2 "conv2d_bf16.h" 1362 31 first +.src_ref 2 "conv2d_bf16.h" 1429 50 +.src_ref 2 "conv2d_bf16.h" 1443 16 first +.loop_nesting 1 + 6320 "01111110" // VLDA.CONV.fp32.bf16 cml0, [p6], #64;VLDB.FILL.512 [p1, lf1, r25];MOVS p3, r12; MOVXM ls, #6496 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 6321 "01100000" // /* MW 13 */ + 6322 "10000001" // /* MW 12 */ + 6323 "01110001" // /* MW 11 */ + 6324 "00000010" // /* MW 10 */ + 6325 "10010110" // /* MW 9 */ + 6326 "10001111" // /* MW 8 */ + 6327 "00000000" // /* MW 7 */ + 6328 "00000000" // /* MW 6 */ + 6329 "00101000" // /* MW 5 */ + 6330 "00101000" // /* MW 4 */ + 6331 "01111010" // /* MW 3 */ + 6332 "10000101" // /* MW 2 */ + 6333 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 793 8 first +.src_ref 2 "conv2d_bf16.h" 1364 31 first +.src_ref 2 "conv2d_bf16.h" 1443 16 + 6334 "10110110" // VLDA.CONV.fp32.bf16 cmh0, [p6], m3;VLDB.FILL.512 [p1, lf1, r25]; MOVXM le, #6544 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6335 "00010000" // /* MW 11 */ + 6336 "11001000" // /* MW 10 */ + 6337 "10111100" // /* MW 9 */ + 6338 "00000101" // /* MW 8 */ + 6339 "00000000" // /* MW 7 */ + 6340 "00000000" // /* MW 6 */ + 6341 "00101000" // /* MW 5 */ + 6342 "00101000" // /* MW 4 */ + 6343 "01111010" // /* MW 3 */ + 6344 "00001101" // /* MW 2 */ + 6345 "11001101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 794 8 first +.src_ref 2 "conv2d_bf16.h" 795 30 first +.src_ref 2 "conv2d_bf16.h" 1428 39 first +.src_ref 2 "conv2d_bf16.h" 1443 16 first + 6346 "10110110" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex10, [p1, lf1, r25]; LSHL r30, r2, r18; ADD.NC lc, r28, #-3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6347 "01001000" // /* MW 11 */ + 6348 "00111111" // /* MW 10 */ + 6349 "10111111" // /* MW 9 */ + 6350 "01101110" // /* MW 8 */ + 6351 "11101001" // /* MW 7 */ + 6352 "00000101" // /* MW 6 */ + 6353 "00101000" // /* MW 5 */ + 6354 "00000101" // /* MW 4 */ + 6355 "01110110" // /* MW 3 */ + 6356 "10000001" // /* MW 2 */ + 6357 "00000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 796 30 first +.src_ref 2 "conv2d_bf16.h" 799 30 first +.src_ref 2 "conv2d_bf16.h" 1429 50 + 6358 "10111010" // VLDA.POP.576 ex11, [p0, lf0, r24, m5];VLDB.POP.576 ex4, [p1, lf1, r25]; MOV dj2, r30 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6359 "01111110" // /* MW 9 */ + 6360 "10010000" // /* MW 8 */ + 6361 "01000111" // /* MW 7 */ + 6362 "00000001" // /* MW 6 */ + 6363 "00010100" // /* MW 5 */ + 6364 "00000001" // /* MW 4 */ + 6365 "01110011" // /* MW 3 */ + 6366 "01011001" // /* MW 2 */ + 6367 "01010101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 797 30 first +.src_ref 2 "conv2d_bf16.h" 1367 31 first + 6368 "00111100" // VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.POP.576 ex2, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6369 "00101000" // /* MW 5 */ + 6370 "00000001" // /* MW 4 */ + 6371 "01110110" // /* MW 3 */ + 6372 "10010101" // /* MW 2 */ + 6373 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 1369 31 first + 6374 "00111100" // VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.POP.576.3D ex3, [p1, lf1, r25, d0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6375 "10101000" // /* MW 5 */ + 6376 "00100001" // /* MW 4 */ + 6377 "01111010" // /* MW 3 */ + 6378 "00011101" // /* MW 2 */ + 6379 "11011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 792 8 first +.src_ref 2 "conv2d_bf16.h" 1372 31 first + 6380 "00111100" // VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.FILL.512 [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6381 "00101000" // /* MW 5 */ + 6382 "00101000" // /* MW 4 */ + 6383 "01111010" // /* MW 3 */ + 6384 "10100101" // /* MW 2 */ + 6385 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 793 8 first +.src_ref 2 "conv2d_bf16.h" 1374 31 first + 6386 "00111100" // VLDA.CONV.fp32.bf16 cmh2, [p6], m3;VLDB.FILL.512 [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6387 "00101000" // /* MW 5 */ + 6388 "00101000" // /* MW 4 */ + 6389 "01111010" // /* MW 3 */ + 6390 "00101101" // /* MW 2 */ + 6391 "11001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 795 30 first +.src_ref 2 "conv2d_bf16.h" 1377 31 first + 6392 "00111100" // VLDA.CONV.fp32.bf16 cml3, [p6], #64;VLDB.POP.576 ex1, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6393 "10101000" // /* MW 5 */ + 6394 "00000000" // /* MW 4 */ + 6395 "01110110" // /* MW 3 */ + 6396 "10110101" // /* MW 2 */ + 6397 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 796 30 first +.src_ref 2 "conv2d_bf16.h" 1379 31 first + 6398 "00111100" // VLDA.CONV.fp32.bf16 cmh3, [p6], m7;VLDB.POP.576 ex6, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6399 "00101000" // /* MW 5 */ + 6400 "00000011" // /* MW 4 */ + 6401 "01110110" // /* MW 3 */ + 6402 "00111101" // /* MW 2 */ + 6403 "11011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 797 30 first +.src_ref 2 "conv2d_bf16.h" 1429 50 first + 6404 "00111100" // VLDA.CONV.fp32.bf16 cml4, [p3, dj2];VLDB.POP.576 ex7, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6405 "10101000" // /* MW 5 */ + 6406 "00000011" // /* MW 4 */ + 6407 "01110110" // /* MW 3 */ + 6408 "01000101" // /* MW 2 */ + 6409 "01101000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 801 30 first +.src_ref 2 "conv2d_bf16.h" 1429 50 + 6410 "10111010" // VLDA.CONV.fp32.bf16 cmh4, [p3, dj2];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VSHUFFLE ex5, ex10, ex4, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6411 "11101110" // /* MW 9 */ + 6412 "00101101" // /* MW 8 */ + 6413 "01101001" // /* MW 7 */ + 6414 "00000001" // /* MW 6 */ + 6415 "00010100" // /* MW 5 */ + 6416 "00010010" // /* MW 4 */ + 6417 "01110101" // /* MW 3 */ + 6418 "01001101" // /* MW 2 */ + 6419 "01101000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 792 8 first +.src_ref 2 "conv2d_bf16.h" 794 8 first +.src_ref 2 "conv2d_bf16.h" 802 30 first + 6420 "10111010" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex10, ex10, ex4, r23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6421 "11101110" // /* MW 9 */ + 6422 "00101111" // /* MW 8 */ + 6423 "10101001" // /* MW 7 */ + 6424 "00000010" // /* MW 6 */ + 6425 "00010100" // /* MW 5 */ + 6426 "00010100" // /* MW 4 */ + 6427 "01110101" // /* MW 3 */ + 6428 "10000001" // /* MW 2 */ + 6429 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 793 8 first +.src_ref 2 "conv2d_bf16.h" 799 30 first +.src_ref 2 "conv2d_bf16.h" 803 30 first +.src_ref 2 "conv2d_bf16.h" 807 26 first + 6430 "01100110" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex2, ex3, r22; VMAC.f dm0, dm0, ex5, ex11, r9 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6431 "01101001" // /* MW 11 */ + 6432 "00001011" // /* MW 10 */ + 6433 "01001000" // /* MW 9 */ + 6434 "11000010" // /* MW 8 */ + 6435 "11011011" // /* MW 7 */ + 6436 "00010001" // /* MW 6 */ + 6437 "00101010" // /* MW 5 */ + 6438 "00101000" // /* MW 4 */ + 6439 "01111010" // /* MW 3 */ + 6440 "00000001" // /* MW 2 */ + 6441 "01010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 794 8 first +.src_ref 2 "conv2d_bf16.h" 804 30 first +.src_ref 2 "conv2d_bf16.h" 808 26 first + 6442 "01001010" // VLDA.FILL.512 [p0, lf0, r24]; VSHUFFLE ex10, ex2, ex3, r23; VMAC.f dm1, dm1, ex10, ex11, r9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6443 "01101001" // /* MW 9 */ + 6444 "00110101" // /* MW 8 */ + 6445 "01001001" // /* MW 7 */ + 6446 "11000010" // /* MW 6 */ + 6447 "11011111" // /* MW 5 */ + 6448 "00010001" // /* MW 4 */ + 6449 "01110101" // /* MW 3 */ + 6450 "10000001" // /* MW 2 */ + 6451 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 809 26 first + 6452 "01001000" // VMAC.f dm2, dm2, ex4, ex11, r9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6453 "01101001" // /* MW 3 */ + 6454 "01001001" // /* MW 2 */ + 6455 "01001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 810 26 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 6456 "01001000" // VMAC.f dm3, dm3, ex10, ex11, r9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6457 "01101001" // /* MW 3 */ + 6458 "01110101" // /* MW 2 */ + 6459 "01001011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 795 30 first +.src_ref 2 "conv2d_bf16.h" 802 30 first +.src_ref 2 "conv2d_bf16.h" 1437 26 first +.aggressive_scheduled_block_id 7 +.noswbrkpt + 6460 "01001010" // VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex5, ex1, ex6, r23; VADD.f dm1, dm4, dm1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6461 "00111101" // /* MW 9 */ + 6462 "10000100" // /* MW 8 */ + 6463 "10100001" // /* MW 7 */ + 6464 "11000110" // /* MW 6 */ + 6465 "01011111" // /* MW 5 */ + 6466 "10001011" // /* MW 4 */ + 6467 "10101010" // /* MW 3 */ + 6468 "00000000" // /* MW 2 */ + 6469 "00000110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 796 30 first +.src_ref 2 "conv2d_bf16.h" 1436 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6470 "01100010" // VLDB.POP.576 ex6, [p1, lf1, r25]; VADD.f dm0, dm4, dm0, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6471 "00111101" // /* MW 7 */ + 6472 "10000000" // /* MW 6 */ + 6473 "10100000" // /* MW 5 */ + 6474 "00000000" // /* MW 4 */ + 6475 "10010100" // /* MW 3 */ + 6476 "00000001" // /* MW 2 */ + 6477 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 797 30 first +.src_ref 2 "conv2d_bf16.h" 1438 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6478 "01100010" // VLDB.POP.576 ex7, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6479 "00111101" // /* MW 7 */ + 6480 "10001000" // /* MW 6 */ + 6481 "10100010" // /* MW 5 */ + 6482 "00000000" // /* MW 4 */ + 6483 "11010100" // /* MW 3 */ + 6484 "00000001" // /* MW 2 */ + 6485 "00000011" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 799 30 first +.src_ref 2 "conv2d_bf16.h" 1439 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6486 "01001010" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VADD.f dm3, dm4, dm3, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6487 "00111101" // /* MW 9 */ + 6488 "10001100" // /* MW 8 */ + 6489 "10100011" // /* MW 7 */ + 6490 "00011101" // /* MW 6 */ + 6491 "00010100" // /* MW 5 */ + 6492 "00010010" // /* MW 4 */ + 6493 "01110101" // /* MW 3 */ + 6494 "00000001" // /* MW 2 */ + 6495 "01010101" // /* MW 1 */ +.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2032 +.src_ref 2 "conv2d_bf16.h" 792 8 first +.src_ref 2 "conv2d_bf16.h" 801 30 first +.begin_of_loop +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt +.loop_nesting 2 + 6496 "10110100" // VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex2, ex1, ex6, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6497 "10110111" // /* MW 5 */ + 6498 "00010110" // /* MW 4 */ + 6499 "10000010" // /* MW 3 */ + 6500 "10000010" // /* MW 2 */ + 6501 "10100010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 793 8 first +.src_ref 2 "conv2d_bf16.h" 804 30 first +.src_ref 2 "conv2d_bf16.h" 808 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6502 "01001010" // VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6503 "00001001" // /* MW 9 */ + 6504 "00101010" // /* MW 8 */ + 6505 "10011001" // /* MW 7 */ + 6506 "11000110" // /* MW 6 */ + 6507 "01011111" // /* MW 5 */ + 6508 "00111100" // /* MW 4 */ + 6509 "00101010" // /* MW 3 */ + 6510 "00101000" // /* MW 2 */ + 6511 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 795 30 first +.src_ref 2 "conv2d_bf16.h" 803 30 first +.src_ref 2 "conv2d_bf16.h" 807 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6512 "01001010" // VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6513 "00001001" // /* MW 9 */ + 6514 "00000100" // /* MW 8 */ + 6515 "10011000" // /* MW 7 */ + 6516 "11000110" // /* MW 6 */ + 6517 "01011011" // /* MW 5 */ + 6518 "10111100" // /* MW 4 */ + 6519 "10101001" // /* MW 3 */ + 6520 "00000000" // /* MW 2 */ + 6521 "00000110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 796 30 first +.src_ref 2 "conv2d_bf16.h" 810 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6522 "01100010" // VLDB.POP.576 ex6, [p1, lf1, r25]; VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6523 "00001001" // /* MW 7 */ + 6524 "01101000" // /* MW 6 */ + 6525 "10011011" // /* MW 5 */ + 6526 "00000000" // /* MW 4 */ + 6527 "10010100" // /* MW 3 */ + 6528 "00000001" // /* MW 2 */ + 6529 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 794 8 first +.src_ref 2 "conv2d_bf16.h" 797 30 first +.src_ref 2 "conv2d_bf16.h" 809 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6530 "01101110" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex7, [p1, lf1, r25];NOPS; NOPX; VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 6531 "00001001" // /* MW 13 */ + 6532 "01000110" // /* MW 12 */ + 6533 "10011010" // /* MW 11 */ + 6534 "01101100" // /* MW 10 */ + 6535 "00000101" // /* MW 9 */ + 6536 "00000000" // /* MW 8 */ + 6537 "00000000" // /* MW 7 */ + 6538 "00000000" // /* MW 6 */ + 6539 "10101000" // /* MW 5 */ + 6540 "00000011" // /* MW 4 */ + 6541 "01110110" // /* MW 3 */ + 6542 "10000001" // /* MW 2 */ + 6543 "00000010" // /* MW 1 */ +.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2080 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 799 30 first +.src_ref 2 "conv2d_bf16.h" 802 30 first +.end_of_loop +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6544 "11100001" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0];NOPS; NOPX; VSHUFFLE ex5, ex1, ex6, r23; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6545 "00000000" // /* MW 15 */ + 6546 "00000000" // /* MW 14 */ + 6547 "11101000" // /* MW 13 */ + 6548 "10101111" // /* MW 12 */ + 6549 "01000101" // /* MW 11 */ + 6550 "00000001" // /* MW 10 */ + 6551 "00000000" // /* MW 9 */ + 6552 "00000000" // /* MW 8 */ + 6553 "01011011" // /* MW 7 */ + 6554 "00000001" // /* MW 6 */ + 6555 "00101000" // /* MW 5 */ + 6556 "00100100" // /* MW 4 */ + 6557 "01111010" // /* MW 3 */ + 6558 "00000001" // /* MW 2 */ + 6559 "01010101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 first +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 801 30 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 6560 "11110110" // PADDA.3D [p0], d1; PADDB [p7], m6; MOVS p5, p7; VSHUFFLE ex2, ex1, ex6, r22 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6561 "11100000" // /* MW 11 */ + 6562 "10101101" // /* MW 10 */ + 6563 "10000101" // /* MW 9 */ + 6564 "00000000" // /* MW 8 */ + 6565 "10001011" // /* MW 7 */ + 6566 "10011100" // /* MW 6 */ + 6567 "00100101" // /* MW 5 */ + 6568 "10010111" // /* MW 4 */ + 6569 "11111111" // /* MW 3 */ + 6570 "00001100" // /* MW 2 */ + 6571 "00000111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 804 30 first +.src_ref 2 "conv2d_bf16.h" 808 26 first +.src_ref 2 "conv2d_bf16.h" 1517 32 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6572 "01100110" // PADDB [p7], m4; MOVS p4, p7; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6573 "00001001" // /* MW 11 */ + 6574 "00101010" // /* MW 10 */ + 6575 "10011001" // /* MW 9 */ + 6576 "11000110" // /* MW 8 */ + 6577 "01011111" // /* MW 7 */ + 6578 "00111100" // /* MW 6 */ + 6579 "00100010" // /* MW 5 */ + 6580 "00010111" // /* MW 4 */ + 6581 "01101111" // /* MW 3 */ + 6582 "10010001" // /* MW 2 */ + 6583 "10010011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 803 30 first +.src_ref 2 "conv2d_bf16.h" 807 26 first +.src_ref 2 "conv2d_bf16.h" 1518 37 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6584 "01100110" // PADDB [p7], m6; MOVS p3, p7; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6585 "00001001" // /* MW 11 */ + 6586 "00000100" // /* MW 10 */ + 6587 "10011000" // /* MW 9 */ + 6588 "11000110" // /* MW 8 */ + 6589 "01011011" // /* MW 7 */ + 6590 "10111100" // /* MW 6 */ + 6591 "00100001" // /* MW 5 */ + 6592 "10010111" // /* MW 4 */ + 6593 "01101111" // /* MW 3 */ + 6594 "10010001" // /* MW 2 */ + 6595 "01110011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 810 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6596 "01100010" // MOV dj2, r17; VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6597 "00001001" // /* MW 7 */ + 6598 "01101000" // /* MW 6 */ + 6599 "10011011" // /* MW 5 */ + 6600 "11100110" // /* MW 4 */ + 6601 "10100000" // /* MW 3 */ + 6602 "10001000" // /* MW 2 */ + 6603 "00000010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 first +.src_ref 2 "conv2d_bf16.h" 809 26 first +.src_ref 2 "conv2d_bf16.h" 1428 39 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6604 "01001010" // PADDB.3D [p1], d2; MOV r2, dc5; VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6605 "00001001" // /* MW 9 */ + 6606 "01000110" // /* MW 8 */ + 6607 "10011010" // /* MW 7 */ + 6608 "11100110" // /* MW 6 */ + 6609 "10000000" // /* MW 5 */ + 6610 "10011011" // /* MW 4 */ + 6611 "00100000" // /* MW 3 */ + 6612 "10110111" // /* MW 2 */ + 6613 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 801 30 first + 6614 "11011000" // VSHUFFLE ex2, ex1, ex6, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6615 "01011011" // /* MW 3 */ + 6616 "00001011" // /* MW 2 */ + 6617 "00011001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 802 30 first + 6618 "11011000" // VSHUFFLE ex5, ex1, ex6, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6619 "01011111" // /* MW 3 */ + 6620 "10001011" // /* MW 2 */ + 6621 "00011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 803 30 first +.src_ref 2 "conv2d_bf16.h" 807 26 first + 6622 "01100010" // VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6623 "00001001" // /* MW 7 */ + 6624 "00000100" // /* MW 6 */ + 6625 "10011000" // /* MW 5 */ + 6626 "11000110" // /* MW 4 */ + 6627 "01011011" // /* MW 3 */ + 6628 "10111100" // /* MW 2 */ + 6629 "00000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 804 30 first +.src_ref 2 "conv2d_bf16.h" 808 26 first + 6630 "01100010" // VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6631 "00001001" // /* MW 7 */ + 6632 "00101010" // /* MW 6 */ + 6633 "10011001" // /* MW 5 */ + 6634 "11000110" // /* MW 4 */ + 6635 "01011111" // /* MW 3 */ + 6636 "00111100" // /* MW 2 */ + 6637 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 809 26 first + 6638 "01001000" // VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6639 "00001001" // /* MW 3 */ + 6640 "01000110" // /* MW 2 */ + 6641 "10011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 810 26 first + 6642 "01001000" // VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6643 "00001001" // /* MW 3 */ + 6644 "01101000" // /* MW 2 */ + 6645 "10011011" // /* MW 1 */ + 6646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6647 "00000000" // /* MW 1 */ + 6648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6649 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first + 6650 "00011000" // VCONV.bf16.fp32 x10, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6651 "00010110" // /* MW 3 */ + 6652 "00010000" // /* MW 2 */ + 6653 "00001101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 + 6654 "00011000" // VCONV.bf16.fp32 x11, cml1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6655 "10010110" // /* MW 3 */ + 6656 "10010000" // /* MW 2 */ + 6657 "00001101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 1455 20 first + 6658 "00111010" // VCONV.bf16.fp32 x1, cmh1; JZ r21, #6768 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6768 delay_slots=5 */ + 6659 "01100001" // /* MW 9 */ + 6660 "00000000" // /* MW 8 */ + 6661 "00000000" // /* MW 7 */ + 6662 "01001110" // /* MW 6 */ + 6663 "00000011" // /* MW 5 */ + 6664 "00101010" // /* MW 4 */ + 6665 "11000000" // /* MW 3 */ + 6666 "00011010" // /* MW 2 */ + 6667 "00010010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first +.delay_slot + 6668 "00011000" // VCONV.bf16.fp32 x6, cmh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6669 "01010110" // /* MW 3 */ + 6670 "00010000" // /* MW 2 */ + 6671 "00001011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 6672 "00011000" // VCONV.bf16.fp32 x2, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6673 "10010110" // /* MW 3 */ + 6674 "00010001" // /* MW 2 */ + 6675 "00001001" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 6676 "00011000" // VCONV.bf16.fp32 x7, cmh3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6677 "11010110" // /* MW 3 */ + 6678 "10010001" // /* MW 2 */ + 6679 "00001011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 6680 "00011000" // VCONV.bf16.fp32 x5, cml2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6681 "00010110" // /* MW 3 */ + 6682 "10010001" // /* MW 2 */ + 6683 "00001010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 6684 "00011000" // VCONV.bf16.fp32 x8, cmh2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6685 "01010110" // /* MW 3 */ + 6686 "00010001" // /* MW 2 */ + 6687 "00001100" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first + 6688 "11111000" // VMAX_LT.bf16 x11, r16, x11, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6689 "11101100" // /* MW 3 */ + 6690 "11011100" // /* MW 2 */ + 6691 "00011101" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 + 6692 "11111000" // VMAX_LT.bf16 x1, r16, x1, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6693 "11101100" // /* MW 3 */ + 6694 "10001100" // /* MW 2 */ + 6695 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.src_ref 4 "max_min.hpp" 20 104 + 6696 "00000010" // VST x11, [p5, dj3]; VMAX_LT.bf16 x10, r16, x10, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6697 "01110000" // /* MW 7 */ + 6698 "01110110" // /* MW 6 */ + 6699 "10101010" // /* MW 5 */ + 6700 "00000010" // /* MW 4 */ + 6701 "01100000" // /* MW 3 */ + 6702 "01011010" // /* MW 2 */ + 6703 "10101100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first + 6704 "00000010" // VST x1, [p4, #64]; VMAX_LT.bf16 x1, r16, x6, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6705 "01110000" // /* MW 7 */ + 6706 "01110110" // /* MW 6 */ + 6707 "01011010" // /* MW 5 */ + 6708 "00000000" // /* MW 4 */ + 6709 "01100000" // /* MW 3 */ + 6710 "10001010" // /* MW 2 */ + 6711 "10000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first + 6712 "00111010" // VST x10, [p5]; J #6800 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=6800 delay_slots=5 */ + 6713 "00100001" // /* MW 9 */ + 6714 "00000000" // /* MW 8 */ + 6715 "00000000" // /* MW 7 */ + 6716 "01010010" // /* MW 6 */ + 6717 "00000011" // /* MW 5 */ + 6718 "00000000" // /* MW 4 */ + 6719 "01100000" // /* MW 3 */ + 6720 "11010010" // /* MW 2 */ + 6721 "10100000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 6722 "00000010" // VST x1, [p5, #64]; VMAX_LT.bf16 x10, r16, x2, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6723 "01110000" // /* MW 7 */ + 6724 "01110110" // /* MW 6 */ + 6725 "10001010" // /* MW 5 */ + 6726 "00000010" // /* MW 4 */ + 6727 "01100000" // /* MW 3 */ + 6728 "10001010" // /* MW 2 */ + 6729 "10100010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 +.delay_slot + 6730 "11111000" // VMAX_LT.bf16 x1, r16, x7, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6731 "11101100" // /* MW 3 */ + 6732 "10111100" // /* MW 2 */ + 6733 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.src_ref 4 "max_min.hpp" 20 104 +.delay_slot + 6734 "00000010" // VST x10, [p3, dj3]; VMAX_LT.bf16 x10, r16, x5, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6735 "01110000" // /* MW 7 */ + 6736 "01110110" // /* MW 6 */ + 6737 "10010110" // /* MW 5 */ + 6738 "00000010" // /* MW 4 */ + 6739 "01100000" // /* MW 3 */ + 6740 "01010010" // /* MW 2 */ + 6741 "01101100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 6742 "10111010" // NOPA; VST x1, [p7, #64]; VMAX_LT.bf16 x8, r16, x8, x9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6743 "01110010" // /* MW 9 */ + 6744 "01110110" // /* MW 8 */ + 6745 "00100010" // /* MW 7 */ + 6746 "00000010" // /* MW 6 */ + 6747 "01010011" // /* MW 5 */ + 6748 "00010100" // /* MW 4 */ + 6749 "11110111" // /* MW 3 */ + 6750 "00101100" // /* MW 2 */ + 6751 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.delay_slot + 6752 "11100001" // NOPA; NOPB; VST x10, [p4, dj7]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6753 "00000000" // /* MW 15 */ + 6754 "00000000" // /* MW 14 */ + 6755 "01111000" // /* MW 13 */ + 6756 "10100101" // /* MW 12 */ + 6757 "00000001" // /* MW 11 */ + 6758 "00000000" // /* MW 10 */ + 6759 "00000000" // /* MW 9 */ + 6760 "00000000" // /* MW 8 */ + 6761 "10010011" // /* MW 7 */ + 6762 "11100010" // /* MW 6 */ + 6763 "00100100" // /* MW 5 */ + 6764 "00000000" // /* MW 4 */ + 6765 "11110000" // /* MW 3 */ + 6766 "00101100" // /* MW 2 */ + 6767 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2304 +.src_ref 4 "vector.hpp" 1152 43 + 6768 "00011000" // VST.CONV.bf16.fp32 cml1, [p5, dj3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6769 "10100011" // /* MW 3 */ + 6770 "01100000" // /* MW 2 */ + 6771 "00001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6772 "00011000" // VST.CONV.bf16.fp32 cmh1, [p4, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6773 "11100011" // /* MW 3 */ + 6774 "00010100" // /* MW 2 */ + 6775 "00001100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6776 "00011000" // VST.CONV.bf16.fp32 cml0, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6777 "00100011" // /* MW 3 */ + 6778 "00000100" // /* MW 2 */ + 6779 "00001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6780 "00011000" // VST.CONV.bf16.fp32 cmh0, [p5, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6781 "01100011" // /* MW 3 */ + 6782 "00010100" // /* MW 2 */ + 6783 "00001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6784 "00011000" // VST.CONV.bf16.fp32 cml3, [p3, dj3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6785 "10100011" // /* MW 3 */ + 6786 "01100001" // /* MW 2 */ + 6787 "00001011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6788 "00011000" // VST.CONV.bf16.fp32 cmh3, [p7, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6789 "11100011" // /* MW 3 */ + 6790 "00010101" // /* MW 2 */ + 6791 "00001111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6792 "00000010" // VST.CONV.bf16.fp32 cml2, [p4, dj7]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6793 "01110000" // /* MW 7 */ + 6794 "10100101" // /* MW 6 */ + 6795 "00000001" // /* MW 5 */ + 6796 "00000000" // /* MW 4 */ + 6797 "01100000" // /* MW 3 */ + 6798 "00100100" // /* MW 2 */ + 6799 "10011100" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2336 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 1337 12 first + 6800 "01011100" // VST x8, [p3, #64]; JNZD r29, r29, p2 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 6801 "01000000" // /* MW 5 */ + 6802 "11110101" // /* MW 4 */ + 6803 "01101110" // /* MW 3 */ + 6804 "11000010" // /* MW 2 */ + 6805 "01100010" // /* MW 1 */ +.delay_slot + 6806 "00011000" // PADDB [p7], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6807 "10010000" // /* MW 3 */ + 6808 "10001011" // /* MW 2 */ + 6809 "00111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6811 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6813 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6815 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6816 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6817 "00000000" // /* MW 15 */ + 6818 "00000000" // /* MW 14 */ + 6819 "01111000" // /* MW 13 */ + 6820 "10100101" // /* MW 12 */ + 6821 "00000001" // /* MW 11 */ + 6822 "00000000" // /* MW 10 */ + 6823 "00000000" // /* MW 9 */ + 6824 "00000000" // /* MW 8 */ + 6825 "01011011" // /* MW 7 */ + 6826 "00000001" // /* MW 6 */ + 6827 "00100000" // /* MW 5 */ + 6828 "00000000" // /* MW 4 */ + 6829 "11110000" // /* MW 3 */ + 6830 "00101100" // /* MW 2 */ + 6831 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2368 +.loop_nesting 0 + 6832 "00011000" // LDA r15, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6833 "11110001" // /* MW 3 */ + 6834 "11101101" // /* MW 2 */ + 6835 "00000111" // /* MW 1 */ + 6836 "00011000" // LDA r12, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6837 "10010001" // /* MW 3 */ + 6838 "11110001" // /* MW 2 */ + 6839 "00000111" // /* MW 1 */ + 6840 "00011000" // LDA r9, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6841 "00110001" // /* MW 3 */ + 6842 "11110101" // /* MW 2 */ + 6843 "00000111" // /* MW 1 */ + 6844 "00011000" // LDA p6, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6845 "00011001" // /* MW 3 */ + 6846 "11101011" // /* MW 2 */ + 6847 "00000111" // /* MW 1 */ + 6848 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6849 "10011001" // /* MW 3 */ + 6850 "11111011" // /* MW 2 */ + 6851 "00000111" // /* MW 1 */ + 6852 "00011000" // LDA r14, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6853 "11010001" // /* MW 3 */ + 6854 "11111101" // /* MW 2 */ + 6855 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1873 first + 6856 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 6857 "00000000" // /* MW 3 */ + 6858 "00101000" // /* MW 2 */ + 6859 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1873 +.delay_slot + 6860 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6861 "00000001" // /* MW 5 */ + 6862 "00000000" // /* MW 4 */ + 6863 "00000000" // /* MW 3 */ + 6864 "11110000" // /* MW 2 */ + 6865 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6867 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6869 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6871 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params__end +.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_end0 + 6873 "00000000" // /* MW 1 */ +.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function conv2d_maxpool _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 7 "superkernels.cpp" 74 first +.src_ref 7 "superkernels.cpp" 79 6 +.src_ref 7 "superkernels.cpp" 81 4 +.function_start + 6880 "10111010" // MOVA r0, #1; MOVXM p4, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6881 "00010000" // /* MW 9 */ + 6882 "00100000" // /* MW 8 */ + 6883 "00110010" // /* MW 7 */ + 6884 "11110010" // /* MW 6 */ + 6885 "00000001" // /* MW 5 */ + 6886 "00000000" // /* MW 4 */ + 6887 "00000000" // /* MW 3 */ + 6888 "00100000" // /* MW 2 */ + 6889 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 79 6 first +.src_ref 7 "superkernels.cpp" 81 4 + 6890 "10111010" // LDA r16, [p4]; MOVX r1, #0; MOV r2, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6891 "01111000" // /* MW 9 */ + 6892 "11010000" // /* MW 8 */ + 6893 "01001011" // /* MW 7 */ + 6894 "00001000" // /* MW 6 */ + 6895 "00010000" // /* MW 5 */ + 6896 "00000000" // /* MW 4 */ + 6897 "11010000" // /* MW 3 */ + 6898 "11000010" // /* MW 2 */ + 6899 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 74 + 6900 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6901 "00000001" // /* MW 5 */ + 6902 "00000000" // /* MW 4 */ + 6903 "00000000" // /* MW 3 */ + 6904 "00001000" // /* MW 2 */ + 6905 "00000000" // /* MW 1 */ + 6906 "10011000" // ST r2, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6907 "01010101" // /* MW 3 */ + 6908 "11110000" // /* MW 2 */ + 6909 "00001111" // /* MW 1 */ + 6910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6911 "00000000" // /* MW 1 */ + 6912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6913 "00000000" // /* MW 1 */ + 6914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6915 "00000000" // /* MW 1 */ + 6916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6917 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 79 6 +.src_ref 7 "superkernels.cpp" 79 16 + 6918 "10000100" // JNZ r16, #7088 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7088 delay_slots=5 */ + 6919 "00000001" // /* MW 5 */ + 6920 "01000000" // /* MW 4 */ + 6921 "11011000" // /* MW 3 */ + 6922 "00001101" // /* MW 2 */ + 6923 "10000000" // /* MW 1 */ +.delay_slot + 6924 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6925 "10011101" // /* MW 3 */ + 6926 "11111011" // /* MW 2 */ + 6927 "00001111" // /* MW 1 */ +.delay_slot + 6928 "10011000" // ST p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6929 "00011101" // /* MW 3 */ + 6930 "11111111" // /* MW 2 */ + 6931 "00001111" // /* MW 1 */ +.delay_slot + 6932 "10011000" // ST p3, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6933 "10011101" // /* MW 3 */ + 6934 "11101101" // /* MW 2 */ + 6935 "00001111" // /* MW 1 */ +.delay_slot + 6936 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6937 "00111101" // /* MW 3 */ + 6938 "11110100" // /* MW 2 */ + 6939 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 +.delay_slot + 6940 "01000100" // MOVXM r15, #509376 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6941 "10000000" // /* MW 5 */ + 6942 "10101011" // /* MW 4 */ + 6943 "11000111" // /* MW 3 */ + 6944 "00000111" // /* MW 2 */ + 6945 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 113 2 +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 6946 "00111010" // MOVS p6, p1; MOVXM p7, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6947 "00010001" // /* MW 9 */ + 6948 "00110100" // /* MW 8 */ + 6949 "10110010" // /* MW 7 */ + 6950 "11110011" // /* MW 6 */ + 6951 "00000001" // /* MW 5 */ + 6952 "00000000" // /* MW 4 */ + 6953 "01100000" // /* MW 3 */ + 6954 "10010001" // /* MW 2 */ + 6955 "11010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 6956 "01110110" // ST.s8 r16, [p7]; MOVS p1, r15; MOVXM p7, #509028 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6957 "00010000" // /* MW 11 */ + 6958 "00110010" // /* MW 10 */ + 6959 "10110010" // /* MW 9 */ + 6960 "11110011" // /* MW 8 */ + 6961 "00000001" // /* MW 7 */ + 6962 "00000000" // /* MW 6 */ + 6963 "00001011" // /* MW 5 */ + 6964 "10001111" // /* MW 4 */ + 6965 "11100001" // /* MW 3 */ + 6966 "11000000" // /* MW 2 */ + 6967 "11100000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6969 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6971 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 6972 "00000100" // JL #2752 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=2752 delay_slots=5 */ + 6973 "00000001" // /* MW 5 */ + 6974 "00000000" // /* MW 4 */ + 6975 "01100000" // /* MW 3 */ + 6976 "00000101" // /* MW 2 */ + 6977 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 6978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6979 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6980 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6981 "00110001" // /* MW 3 */ + 6982 "00100000" // /* MW 2 */ + 6983 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 6984 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6985 "00000101" // /* MW 3 */ + 6986 "00100000" // /* MW 2 */ + 6987 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 113 2 +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 6988 "00000010" // ST r16, [p7]; MOV p7, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6989 "01110000" // /* MW 7 */ + 6990 "01100000" // /* MW 6 */ + 6991 "10110000" // /* MW 5 */ + 6992 "00000011" // /* MW 4 */ + 6993 "00110000" // /* MW 3 */ + 6994 "11000010" // /* MW 2 */ + 6995 "11100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 +.delay_slot + 6996 "11110110" // NOPA; NOPB; NOPS; MOV p0, p2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6997 "01110000" // /* MW 11 */ + 6998 "01100000" // /* MW 10 */ + 6999 "00110010" // /* MW 9 */ + 7000 "00000000" // /* MW 8 */ + 7001 "01011011" // /* MW 7 */ + 7002 "00000001" // /* MW 6 */ + 7003 "00100000" // /* MW 5 */ + 7004 "00000000" // /* MW 4 */ + 7005 "11110000" // /* MW 3 */ + 7006 "00101100" // /* MW 2 */ + 7007 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 +.return_address + 7008 "10011000" // ADD.NC p2, r15, #11 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7009 "10000101" // /* MW 3 */ + 7010 "01100111" // /* MW 2 */ + 7011 "00011010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 87 19 +.src_ref 7 "superkernels.cpp" 87 35 first + 7012 "10111010" // LDA.u8 r16, [p2], #7; MOVXM p1, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7013 "00010000" // /* MW 9 */ + 7014 "00100010" // /* MW 8 */ + 7015 "10110010" // /* MW 7 */ + 7016 "11110000" // /* MW 6 */ + 7017 "00000001" // /* MW 5 */ + 7018 "00000000" // /* MW 4 */ + 7019 "01010000" // /* MW 3 */ + 7020 "11000001" // /* MW 2 */ + 7021 "01001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 37 first +.src_ref 7 "superkernels.cpp" 89 13 + 7022 "10111010" // LDA.u16 r19, [p2], #2; MOVXM p0, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7023 "00010000" // /* MW 9 */ + 7024 "00110000" // /* MW 8 */ + 7025 "00110010" // /* MW 7 */ + 7026 "11110000" // /* MW 6 */ + 7027 "00000001" // /* MW 5 */ + 7028 "00000000" // /* MW 4 */ + 7029 "01010000" // /* MW 3 */ + 7030 "11001111" // /* MW 2 */ + 7031 "01000011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 73 + 7032 "10011000" // LDA.u16 r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7033 "00111010" // /* MW 3 */ + 7034 "00000110" // /* MW 2 */ + 7035 "00000010" // /* MW 1 */ + 7036 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7037 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 110 + 7038 "10011000" // LDA.u16 r18, [p2, #2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7039 "01011010" // /* MW 3 */ + 7040 "00010110" // /* MW 2 */ + 7041 "00000010" // /* MW 1 */ + 7042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7043 "00000000" // /* MW 1 */ + 7044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7045 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 87 19 first +.src_ref 7 "superkernels.cpp" 113 2 + 7046 "00000010" // ST r16, [p1]; MOV p1, p6 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7047 "01110000" // /* MW 7 */ + 7048 "01100000" // /* MW 6 */ + 7049 "10110110" // /* MW 5 */ + 7050 "00000000" // /* MW 4 */ + 7051 "00110000" // /* MW 3 */ + 7052 "11000010" // /* MW 2 */ + 7053 "00100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 57 first + 7054 "10011000" // MUL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7055 "00001111" // /* MW 3 */ + 7056 "11100001" // /* MW 2 */ + 7057 "00010100" // /* MW 1 */ + 7058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7059 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 94 + 7060 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7061 "00001111" // /* MW 3 */ + 7062 "01100001" // /* MW 2 */ + 7063 "00010100" // /* MW 1 */ + 7064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7065 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 89 28 first + 7066 "10011000" // MUL r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7067 "00001111" // /* MW 3 */ + 7068 "10100001" // /* MW 2 */ + 7069 "00010100" // /* MW 1 */ + 7070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7071 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 89 13 +.src_ref 7 "superkernels.cpp" 113 2 + 7072 "11100001" // NOPA; NOPB; ST r16, [p0]; NOPX; MOV p0, p7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7073 "00000000" // /* MW 15 */ + 7074 "00000000" // /* MW 14 */ + 7075 "01111000" // /* MW 13 */ + 7076 "01100000" // /* MW 12 */ + 7077 "00110111" // /* MW 11 */ + 7078 "00000000" // /* MW 10 */ + 7079 "00000000" // /* MW 9 */ + 7080 "10000000" // /* MW 8 */ + 7081 "00010001" // /* MW 7 */ + 7082 "00000110" // /* MW 6 */ + 7083 "00100000" // /* MW 5 */ + 7084 "00000000" // /* MW 4 */ + 7085 "11110000" // /* MW 3 */ + 7086 "00101100" // /* MW 2 */ + 7087 "00000000" // /* MW 1 */ +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 7 "superkernels.cpp" 106 12 +.src_ref 7 "superkernels.cpp" 113 2 +.src_ref 7 "superkernels.cpp" 117 6 +.src_ref 7 "superkernels.cpp" 136 15 +.src_ref 1 "io_buffer_main.h" 218 49 +.src_ref 1 "io_buffer_main.h" 324 51 + 7088 "10111010" // LDA r15, [sp, #-20]; MOVXM p6, #509000 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7089 "00010000" // /* MW 9 */ + 7090 "00100100" // /* MW 8 */ + 7091 "00110010" // /* MW 7 */ + 7092 "11110011" // /* MW 6 */ + 7093 "00000001" // /* MW 5 */ + 7094 "00000000" // /* MW 4 */ + 7095 "00100000" // /* MW 3 */ + 7096 "10111110" // /* MW 2 */ + 7097 "11111101" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 106 12 first +.src_ref 7 "superkernels.cpp" 108 13 + 7098 "10111010" // LDA r16, [p6]; MOVXM p2, #509004 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7099 "00010000" // /* MW 9 */ + 7100 "00100110" // /* MW 8 */ + 7101 "00110010" // /* MW 7 */ + 7102 "11110001" // /* MW 6 */ + 7103 "00000001" // /* MW 5 */ + 7104 "00000000" // /* MW 4 */ + 7105 "11010000" // /* MW 3 */ + 7106 "11000010" // /* MW 2 */ + 7107 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 107 11 +.src_ref 7 "superkernels.cpp" 108 13 first +.src_ref 7 "superkernels.cpp" 139 6 +.src_ref 7 "superkernels.cpp" 140 14 + 7108 "10111010" // LDA r17, [p2]; MOVXM p7, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7109 "00010000" // /* MW 9 */ + 7110 "00100000" // /* MW 8 */ + 7111 "10110010" // /* MW 7 */ + 7112 "11110011" // /* MW 6 */ + 7113 "00000001" // /* MW 5 */ + 7114 "00000000" // /* MW 4 */ + 7115 "11010000" // /* MW 3 */ + 7116 "11000110" // /* MW 2 */ + 7117 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 107 11 first + 7118 "10011000" // LDA r18, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7119 "01010110" // /* MW 3 */ + 7120 "00000110" // /* MW 2 */ + 7121 "00000111" // /* MW 1 */ + 7122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7123 "00000000" // /* MW 1 */ + 7124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7125 "00000000" // /* MW 1 */ + 7126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7127 "00000000" // /* MW 1 */ + 7128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7129 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 110 6 first +.src_ref 7 "superkernels.cpp" 110 17 first + 7130 "10000100" // JNZ r16, #7216 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7216 delay_slots=5 */ + 7131 "00000001" // /* MW 5 */ + 7132 "01000000" // /* MW 4 */ + 7133 "00011000" // /* MW 3 */ + 7134 "00001110" // /* MW 2 */ + 7135 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 108 13 first +.delay_slot + 7136 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7137 "00000111" // /* MW 3 */ + 7138 "01100010" // /* MW 2 */ + 7139 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 107 11 first +.src_ref 7 "superkernels.cpp" 108 13 +.delay_slot + 7140 "01011100" // ST r17, [p2]; ADD r17, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7141 "00001110" // /* MW 5 */ + 7142 "01000100" // /* MW 4 */ + 7143 "00111001" // /* MW 3 */ + 7144 "11000110" // /* MW 2 */ + 7145 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 106 12 first +.delay_slot + 7146 "00011000" // ADD r19, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7147 "00000111" // /* MW 3 */ + 7148 "00100110" // /* MW 2 */ + 7149 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 106 12 +.delay_slot + 7150 "10011000" // ST r19, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7151 "01110001" // /* MW 3 */ + 7152 "00000110" // /* MW 2 */ + 7153 "00001110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 107 11 first +.delay_slot + 7154 "10011000" // ST r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7155 "00110001" // /* MW 3 */ + 7156 "00000110" // /* MW 2 */ + 7157 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 first + 7158 "00011000" // ADD.NC p2, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7159 "10000110" // /* MW 3 */ + 7160 "01100111" // /* MW 2 */ + 7161 "00011010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 7162 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7163 "01110110" // /* MW 3 */ + 7164 "11111111" // /* MW 2 */ + 7165 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 7166 "10011000" // LDA r16, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7167 "00010110" // /* MW 3 */ + 7168 "11111110" // /* MW 2 */ + 7169 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 7170 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7171 "00110110" // /* MW 3 */ + 7172 "11111110" // /* MW 2 */ + 7173 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 7174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7175 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 7176 "10011000" // LDA r16, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7177 "00010110" // /* MW 3 */ + 7178 "01000110" // /* MW 2 */ + 7179 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7181 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7183 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7185 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7187 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7188 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7189 "00000010" // /* MW 3 */ + 7190 "01100001" // /* MW 2 */ + 7191 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7192 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7193 "00010001" // /* MW 3 */ + 7194 "00000110" // /* MW 2 */ + 7195 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 + 7196 "00011000" // MOVX r17, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7197 "11111101" // /* MW 3 */ + 7198 "11100010" // /* MW 2 */ + 7199 "00010111" // /* MW 1 */ + 7200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7201 "00000000" // /* MW 1 */ + 7202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7203 "00000000" // /* MW 1 */ + 7204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7205 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 7206 "01111010" // NOPA; NOPS; ACQ r16, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7207 "00011000" // /* MW 9 */ + 7208 "00010011" // /* MW 8 */ + 7209 "00000100" // /* MW 7 */ + 7210 "00000000" // /* MW 6 */ + 7211 "01011011" // /* MW 5 */ + 7212 "00000001" // /* MW 4 */ + 7213 "11110000" // /* MW 3 */ + 7214 "00101100" // /* MW 2 */ + 7215 "00000000" // /* MW 1 */ +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_336 +.src_ref 7 "superkernels.cpp" 113 2 first +.no_stack_arguments + 7216 "00000100" // JL #4464 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4464 delay_slots=5 */ + 7217 "00000001" // /* MW 5 */ + 7218 "00000000" // /* MW 4 */ + 7219 "10111000" // /* MW 3 */ + 7220 "00001000" // /* MW 2 */ + 7221 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 113 2 +.delay_slot + 7222 "01000100" // MOVXM p3, #509376 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7223 "10000000" // /* MW 5 */ + 7224 "11001011" // /* MW 4 */ + 7225 "11000110" // /* MW 3 */ + 7226 "00000111" // /* MW 2 */ + 7227 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7229 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7231 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7233 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 113 2 +.delay_slot + 7234 "00101110" // NOPA; NOPS; MOV p2, r15; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 7235 "00011100" // /* MW 13 */ + 7236 "00000000" // /* MW 12 */ + 7237 "00000000" // /* MW 11 */ + 7238 "00000111" // /* MW 10 */ + 7239 "00111101" // /* MW 9 */ + 7240 "01010011" // /* MW 8 */ + 7241 "00000000" // /* MW 7 */ + 7242 "00000000" // /* MW 6 */ + 7243 "10110110" // /* MW 5 */ + 7244 "00000010" // /* MW 4 */ + 7245 "11110000" // /* MW 3 */ + 7246 "00101100" // /* MW 2 */ + 7247 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 117 6 first +.src_ref 7 "superkernels.cpp" 117 20 +.return_address + 7248 "10111010" // LDA r16, [p6]; MOVXM p1, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7249 "00010000" // /* MW 9 */ + 7250 "00100010" // /* MW 8 */ + 7251 "10110010" // /* MW 7 */ + 7252 "11110000" // /* MW 6 */ + 7253 "00000001" // /* MW 5 */ + 7254 "00000000" // /* MW 4 */ + 7255 "11010000" // /* MW 3 */ + 7256 "11000010" // /* MW 2 */ + 7257 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 117 20 + 7258 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7259 "00110110" // /* MW 3 */ + 7260 "00000110" // /* MW 2 */ + 7261 "00000001" // /* MW 1 */ + 7262 "00011000" // LDA r0, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7263 "00010001" // /* MW 3 */ + 7264 "11110000" // /* MW 2 */ + 7265 "00000111" // /* MW 1 */ + 7266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7267 "00000000" // /* MW 1 */ + 7268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7269 "00000000" // /* MW 1 */ + 7270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7271 "00000000" // /* MW 1 */ + 7272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7273 "00000000" // /* MW 1 */ + 7274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7275 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 117 17 + 7276 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7277 "00001000" // /* MW 3 */ + 7278 "01100001" // /* MW 2 */ + 7279 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 117 6 + 7280 "10000100" // JNZ r16, #7360 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7360 delay_slots=5 */ + 7281 "00000001" // /* MW 5 */ + 7282 "01000000" // /* MW 4 */ + 7283 "01100000" // /* MW 3 */ + 7284 "00001110" // /* MW 2 */ + 7285 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 136 15 +.src_ref 7 "superkernels.cpp" 140 14 +.delay_slot + 7286 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7287 "00000001" // /* MW 3 */ + 7288 "00110000" // /* MW 2 */ + 7289 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7290 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7291 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7292 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7293 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7294 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7295 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7297 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 first +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 + 7298 "00100100" // MOVX r16, #1; ADD.NC p1, r15, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7299 "00010100" // /* MW 5 */ + 7300 "11001111" // /* MW 4 */ + 7301 "10100010" // /* MW 3 */ + 7302 "00000000" // /* MW 2 */ + 7303 "00000100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 + 7304 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7305 "00110110" // /* MW 3 */ + 7306 "00000110" // /* MW 2 */ + 7307 "00000001" // /* MW 1 */ + 7308 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7309 "00000000" // /* MW 1 */ + 7310 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7311 "00000000" // /* MW 1 */ + 7312 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7313 "00000000" // /* MW 1 */ + 7314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7315 "00000000" // /* MW 1 */ + 7316 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7317 "00000000" // /* MW 1 */ + 7318 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7319 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 7320 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7321 "00001000" // /* MW 3 */ + 7322 "01010001" // /* MW 2 */ + 7323 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 136 15 first +.src_ref 1 "io_buffer_main.h" 327 40 first + 7324 "00001100" // LDA r17, [p1, #-8]; ST r24, [p6] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7325 "00100011" // /* MW 5 */ + 7326 "00001110" // /* MW 4 */ + 7327 "11011100" // /* MW 3 */ + 7328 "11000110" // /* MW 2 */ + 7329 "00111100" // /* MW 1 */ + 7330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7331 "00000000" // /* MW 1 */ + 7332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7333 "00000000" // /* MW 1 */ + 7334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7335 "00000000" // /* MW 1 */ + 7336 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7337 "00000000" // /* MW 1 */ + 7338 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7339 "00000000" // /* MW 1 */ + 7340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7341 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 + 7342 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7343 "00010001" // /* MW 3 */ + 7344 "00100001" // /* MW 2 */ + 7345 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 7346 "00101110" // NOPA; ST r16, [p1, #-8]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 7347 "00011100" // /* MW 13 */ + 7348 "00000000" // /* MW 12 */ + 7349 "00000000" // /* MW 11 */ + 7350 "01010111" // /* MW 10 */ + 7351 "00011010" // /* MW 9 */ + 7352 "01000000" // /* MW 8 */ + 7353 "00000000" // /* MW 7 */ + 7354 "00000000" // /* MW 6 */ + 7355 "00100011" // /* MW 5 */ + 7356 "11001100" // /* MW 4 */ + 7357 "11110011" // /* MW 3 */ + 7358 "00101100" // /* MW 2 */ + 7359 "00000000" // /* MW 1 */ +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_480 +.src_ref 7 "superkernels.cpp" 139 6 first +.src_ref 7 "superkernels.cpp" 139 19 + 7360 "10111010" // LDA r16, [p7]; MOVXM p6, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7361 "00010000" // /* MW 9 */ + 7362 "00110000" // /* MW 8 */ + 7363 "00110010" // /* MW 7 */ + 7364 "11110011" // /* MW 6 */ + 7365 "00000001" // /* MW 5 */ + 7366 "00000000" // /* MW 4 */ + 7367 "11010000" // /* MW 3 */ + 7368 "11000010" // /* MW 2 */ + 7369 "11100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 139 19 + 7370 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7371 "00110110" // /* MW 3 */ + 7372 "00000110" // /* MW 2 */ + 7373 "00000110" // /* MW 1 */ + 7374 "00011000" // LDA p1, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7375 "10011001" // /* MW 3 */ + 7376 "11111000" // /* MW 2 */ + 7377 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 142 + 7378 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7379 "00111001" // /* MW 3 */ + 7380 "11110100" // /* MW 2 */ + 7381 "00000111" // /* MW 1 */ + 7382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7383 "00000000" // /* MW 1 */ + 7384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7385 "00000000" // /* MW 1 */ + 7386 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7387 "00000000" // /* MW 1 */ + 7388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7389 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 139 16 + 7390 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7391 "00001000" // /* MW 3 */ + 7392 "01100001" // /* MW 2 */ + 7393 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 139 6 + 7394 "10000100" // JNZ r16, #7424 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7424 delay_slots=5 */ + 7395 "00000001" // /* MW 5 */ + 7396 "01000000" // /* MW 4 */ + 7397 "10000000" // /* MW 3 */ + 7398 "00001110" // /* MW 2 */ + 7399 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7401 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7403 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7405 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7407 "00000000" // /* MW 1 */ +.delay_slot + 7408 "11111000" // MOV r15, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7409 "00100000" // /* MW 3 */ + 7410 "11010000" // /* MW 2 */ + 7411 "00011011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 140 14 first + 7412 "00110110" // NOPA; NOPB; ST r24, [p7]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7413 "11000001" // /* MW 11 */ + 7414 "10001000" // /* MW 10 */ + 7415 "10000011" // /* MW 9 */ + 7416 "00000011" // /* MW 8 */ + 7417 "00000000" // /* MW 7 */ + 7418 "00000000" // /* MW 6 */ + 7419 "00100000" // /* MW 5 */ + 7420 "00000000" // /* MW 4 */ + 7421 "11110000" // /* MW 3 */ + 7422 "00101100" // /* MW 2 */ + 7423 "00000000" // /* MW 1 */ +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_544 + 7424 "00011000" // LDA p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7425 "00011001" // /* MW 3 */ + 7426 "11111111" // /* MW 2 */ + 7427 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 142 first + 7428 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 7429 "00000000" // /* MW 3 */ + 7430 "00101000" // /* MW 2 */ + 7431 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 142 +.delay_slot + 7432 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7433 "00000001" // /* MW 5 */ + 7434 "00000000" // /* MW 4 */ + 7435 "00000000" // /* MW 3 */ + 7436 "11111000" // /* MW 2 */ + 7437 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7439 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7441 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7443 "00000000" // /* MW 1 */ +.delay_slot + 7444 "00011000" // MOVS p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7445 "10001011" // /* MW 3 */ + 7446 "10000100" // /* MW 2 */ +.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 7447 "00001111" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.src_ref 3 "elementwise_binary.h" 141 first +.src_ref 3 "elementwise_binary.h" 142 23 +.src_ref 3 "elementwise_binary.h" 144 4 first +.function_start + 7456 "01100100" // RET lr; MOV r0, #64 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 7457 "00000001" // /* MW 5 */ + 7458 "00100001" // /* MW 4 */ + 7459 "00000000" // /* MW 3 */ + 7460 "00000000" // /* MW 2 */ + 7461 "00000101" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 141 +.delay_slot + 7462 "11111000" // MOV r1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7463 "11000000" // /* MW 3 */ + 7464 "01010000" // /* MW 2 */ + 7465 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 141 +.delay_slot + 7466 "00011000" // ADD.NC p0, r1, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7467 "10010000" // /* MW 3 */ + 7468 "01100000" // /* MW 2 */ + 7469 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 142 23 first +.delay_slot + 7470 "10011000" // ST r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7471 "00010001" // /* MW 3 */ + 7472 "00000100" // /* MW 2 */ + 7473 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 142 23 +.delay_slot + 7474 "10011000" // ST r0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7475 "00010001" // /* MW 3 */ + 7476 "00010100" // /* MW 2 */ + 7477 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_end0 + 7479 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 3 "elementwise_binary.h" 130 first +.src_ref 3 "elementwise_binary.h" 133 24 first +.function_start + 7488 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7489 "00101110" // /* MW 3 */ + 7490 "00011100" // /* MW 2 */ + 7491 "00000001" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 130 + 7492 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7493 "00000001" // /* MW 5 */ + 7494 "00000000" // /* MW 4 */ + 7495 "00000000" // /* MW 3 */ + 7496 "00001000" // /* MW 2 */ + 7497 "00000000" // /* MW 1 */ + 7498 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7499 "00111101" // /* MW 3 */ + 7500 "11111000" // /* MW 2 */ + 7501 "00001111" // /* MW 1 */ + 7502 "10011000" // ST r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7503 "11110101" // /* MW 3 */ + 7504 "11111101" // /* MW 2 */ + 7505 "00001111" // /* MW 1 */ + 7506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7507 "00000000" // /* MW 1 */ + 7508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7509 "00000000" // /* MW 1 */ + 7510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7511 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 133 22 first + 7512 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7513 "00101001" // /* MW 3 */ + 7514 "00011100" // /* MW 2 */ + 7515 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 134 24 first + 7516 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7517 "00101110" // /* MW 3 */ + 7518 "00011100" // /* MW 2 */ + 7519 "00000001" // /* MW 1 */ + 7520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7521 "00000000" // /* MW 1 */ + 7522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7523 "00000000" // /* MW 1 */ + 7524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7525 "00000000" // /* MW 1 */ + 7526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7527 "00000000" // /* MW 1 */ + 7528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7529 "00000000" // /* MW 1 */ + 7530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7531 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 134 22 + 7532 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7533 "00101001" // /* MW 3 */ + 7534 "00011100" // /* MW 2 */ + 7535 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 135 24 first + 7536 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7537 "00101110" // /* MW 3 */ + 7538 "00000100" // /* MW 2 */ + 7539 "00000001" // /* MW 1 */ + 7540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7541 "00000000" // /* MW 1 */ + 7542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7543 "00000000" // /* MW 1 */ + 7544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7545 "00000000" // /* MW 1 */ + 7546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7547 "00000000" // /* MW 1 */ + 7548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7549 "00000000" // /* MW 1 */ + 7550 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7551 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 135 22 + 7552 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7553 "00101001" // /* MW 3 */ + 7554 "00011100" // /* MW 2 */ + 7555 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 136 24 first + 7556 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7557 "00101110" // /* MW 3 */ + 7558 "00010100" // /* MW 2 */ + 7559 "00000001" // /* MW 1 */ + 7560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7561 "00000000" // /* MW 1 */ + 7562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7563 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 137 8 first +.no_stack_arguments + 7564 "00000100" // JL #7456 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7456 delay_slots=5 */ + 7565 "00000001" // /* MW 5 */ + 7566 "00000000" // /* MW 4 */ + 7567 "10010000" // /* MW 3 */ + 7568 "00001110" // /* MW 2 */ + 7569 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7571 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7572 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7573 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7575 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 136 22 first +.delay_slot + 7576 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7577 "00101001" // /* MW 3 */ + 7578 "11011100" // /* MW 2 */ + 7579 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 137 8 +.delay_slot + 7580 "11111000" // MOV r15, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7581 "11000000" // /* MW 3 */ + 7582 "11010000" // /* MW 2 */ + 7583 "00011011" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 137 8 +.src_ref 3 "elementwise_binary.h" 139 4 +.src_ref 8 "add_impl.h" 146 29 +.return_address + 7584 "10111010" // LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7585 "00001000" // /* MW 9 */ + 7586 "11000100" // /* MW 8 */ + 7587 "00110011" // /* MW 7 */ + 7588 "01101000" // /* MW 6 */ + 7589 "00000000" // /* MW 5 */ + 7590 "00000001" // /* MW 4 */ + 7591 "00100000" // /* MW 3 */ + 7592 "00000111" // /* MW 2 */ + 7593 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 146 29 +.src_ref 8 "add_impl.h" 147 37 +.src_ref 8 "add_impl.h" 147 39 + 7594 "10111010" // MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7595 "01011000" // /* MW 9 */ + 7596 "11111101" // /* MW 8 */ + 7597 "00000111" // /* MW 7 */ + 7598 "00001000" // /* MW 6 */ + 7599 "10000000" // /* MW 5 */ + 7600 "00000001" // /* MW 4 */ + 7601 "10000000" // /* MW 3 */ + 7602 "11100010" // /* MW 2 */ + 7603 "00000001" // /* MW 1 */ +.src_ref 8 "add_impl.h" 146 29 first +.src_ref 8 "add_impl.h" 147 39 + 7604 "01111010" // LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7605 "00000001" // /* MW 9 */ + 7606 "10100000" // /* MW 8 */ + 7607 "00000111" // /* MW 7 */ + 7608 "10000000" // /* MW 6 */ + 7609 "00010001" // /* MW 5 */ + 7610 "00001010" // /* MW 4 */ + 7611 "00100000" // /* MW 3 */ + 7612 "10111110" // /* MW 2 */ + 7613 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 50 first + 7614 "10011000" // LDA.u8 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7615 "01001010" // /* MW 3 */ + 7616 "00000110" // /* MW 2 */ + 7617 "00000000" // /* MW 1 */ + 7618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7619 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7621 "00000000" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 37 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7622 "00011000" // ST.s16 r16, [p0, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7623 "00010111" // /* MW 3 */ + 7624 "00000010" // /* MW 2 */ + 7625 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7626 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 7627 "00000000" // /* MW 3 */ + 7628 "00101000" // /* MW 2 */ + 7629 "00010000" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 54 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7630 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7631 "00000101" // /* MW 3 */ + 7632 "00100010" // /* MW 2 */ + 7633 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7634 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7635 "00000001" // /* MW 5 */ + 7636 "00000000" // /* MW 4 */ + 7637 "00000000" // /* MW 3 */ + 7638 "11111000" // /* MW 2 */ + 7639 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 54 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7640 "10011000" // EQ r27, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7641 "00100111" // /* MW 3 */ + 7642 "01110111" // /* MW 2 */ + 7643 "00010100" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 39 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7644 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7645 "10000010" // /* MW 3 */ + 7646 "00100001" // /* MW 2 */ + 7647 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 7649 "00000000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.src_ref 3 "elementwise_binary_broadcasting.h" 81 +.src_ref 3 "elementwise_binary_broadcasting.h" 81 first +.src_ref 3 "elementwise_binary_broadcasting.h" 82 25 +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 +.src_ref 3 "elementwise_binary_broadcasting.h" 83 25 +.function_start + 7664 "10111010" // MOVA m0, #20; MOVX r1, #6; MOV r0, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7665 "01111000" // /* MW 9 */ + 7666 "01100000" // /* MW 8 */ + 7667 "00001000" // /* MW 7 */ + 7668 "11001000" // /* MW 6 */ + 7669 "00010000" // /* MW 5 */ + 7670 "00000000" // /* MW 4 */ + 7671 "10000000" // /* MW 3 */ + 7672 "10000000" // /* MW 2 */ + 7673 "00000010" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 81 +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 + 7674 "00100100" // MOVX r0, #1; ADD.NC p0, r0, #12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7675 "00001100" // /* MW 5 */ + 7676 "11000000" // /* MW 4 */ + 7677 "10100000" // /* MW 3 */ + 7678 "00000000" // /* MW 2 */ + 7679 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first + 7680 "10011000" // LDA.u8 r2, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7681 "01001010" // /* MW 3 */ + 7682 "00001000" // /* MW 2 */ + 7683 "00000000" // /* MW 1 */ + 7684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7685 "00000000" // /* MW 1 */ + 7686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7687 "00000000" // /* MW 1 */ + 7688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7689 "00000000" // /* MW 1 */ + 7690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7691 "00000000" // /* MW 1 */ + 7692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7693 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 84 4 first + 7694 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 7695 "00000000" // /* MW 3 */ + 7696 "00101000" // /* MW 2 */ + 7697 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first +.delay_slot + 7698 "10011000" // NE r0, r2, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7699 "00001000" // /* MW 3 */ + 7700 "10000000" // /* MW 2 */ + 7701 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 83 25 first +.delay_slot + 7702 "10011000" // LSHL r0, r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7703 "00011101" // /* MW 3 */ + 7704 "00000000" // /* MW 2 */ + 7705 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first +.src_ref 3 "elementwise_binary_broadcasting.h" 83 23 +.delay_slot + 7706 "01011100" // ST r0, [p0, #4]; NEZ r3, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7707 "11100000" // /* MW 5 */ + 7708 "00001101" // /* MW 4 */ + 7709 "00110001" // /* MW 3 */ + 7710 "10000010" // /* MW 2 */ + 7711 "00000010" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 25 +.delay_slot + 7712 "10011000" // LSHL r2, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7713 "00011101" // /* MW 3 */ + 7714 "11000100" // /* MW 2 */ + 7715 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 23 +.delay_slot + 7716 "10011000" // ST r2, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7717 "01010001" // /* MW 3 */ + 7718 "00000100" // /* MW 2 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_end0 + 7719 "00001000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 3 "elementwise_binary_broadcasting.h" 76 +.src_ref 3 "elementwise_binary_broadcasting.h" 76 first +.function_start + 7728 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7729 "00000001" // /* MW 5 */ + 7730 "00000000" // /* MW 4 */ + 7731 "00000000" // /* MW 3 */ + 7732 "00001000" // /* MW 2 */ + 7733 "00000000" // /* MW 1 */ + 7734 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7735 "00111101" // /* MW 3 */ + 7736 "11111100" // /* MW 2 */ + 7737 "00001111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 77 8 first +.no_stack_arguments + 7738 "00000100" // JL #7488 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7488 delay_slots=5 */ + 7739 "00000001" // /* MW 5 */ + 7740 "00000000" // /* MW 4 */ + 7741 "10100000" // /* MW 3 */ + 7742 "00001110" // /* MW 2 */ + 7743 "00000000" // /* MW 1 */ +.delay_slot + 7744 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7745 "10011101" // /* MW 3 */ + 7746 "11111011" // /* MW 2 */ + 7747 "00001111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 78 8 +.delay_slot + 7748 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7749 "11000000" // /* MW 3 */ + 7750 "01100000" // /* MW 2 */ + 7751 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7753 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7755 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7756 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7757 "01100111" // /* MW 3 */ + 7758 "00000001" // /* MW 2 */ + 7759 "00000000" // /* MW 1 */ +.return_address +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7760 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7761 "10011001" // /* MW 3 */ + 7762 "11111011" // /* MW 2 */ + 7763 "00000111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 78 8 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7764 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7765 "00111001" // /* MW 3 */ + 7766 "11111100" // /* MW 2 */ + 7767 "00000111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 78 8 first +.tail_call +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7768 "10000100" // J #7664 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=7664 delay_slots=5 */ + 7769 "00000000" // /* MW 5 */ + 7770 "00000000" // /* MW 4 */ + 7771 "11111000" // /* MW 3 */ + 7772 "00001110" // /* MW 2 */ + 7773 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 78 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7774 "11111000" // MOV p0, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7775 "11000000" // /* MW 3 */ + 7776 "01101110" // /* MW 2 */ + 7777 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 79 4 first +.delay_slot + 7778 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7779 "00000001" // /* MW 5 */ + 7780 "00000000" // /* MW 4 */ + 7781 "00000000" // /* MW 3 */ + 7782 "11111000" // /* MW 2 */ + 7783 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7785 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7787 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 7789 "00000000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.src_ref 3 "elementwise_binary_broadcasting.h" 89 first +.src_ref 3 "elementwise_binary_broadcasting.h" 96 37 first +.src_ref 3 "elementwise_binary_broadcasting.h" 102 19 +.function_start + 7792 "01010100" // LDA r0, [p3], #12; MOV m0, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7793 "01010001" // /* MW 5 */ + 7794 "00000000" // /* MW 4 */ + 7795 "11010000" // /* MW 3 */ + 7796 "10000010" // /* MW 2 */ + 7797 "01100111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 102 19 first +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 7798 "11010100" // LDA.u8 r1, [p3], m0; MOV p4, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7799 "10000001" // /* MW 5 */ + 7800 "11001101" // /* MW 4 */ + 7801 "01011000" // /* MW 3 */ + 7802 "00000101" // /* MW 2 */ + 7803 "01100001" // /* MW 1 */ + 7804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7805 "00000000" // /* MW 1 */ + 7806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7807 "00000000" // /* MW 1 */ + 7808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7809 "00000000" // /* MW 1 */ + 7810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7811 "00000000" // /* MW 1 */ + 7812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7813 "00000000" // /* MW 1 */ + 7814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7815 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 102 12 +.src_ref 3 "elementwise_binary_broadcasting.h" 102 35 + 7816 "10000100" // JNZ r1, #7872 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7872 delay_slots=5 */ + 7817 "00000001" // /* MW 5 */ + 7818 "01000000" // /* MW 4 */ + 7819 "01100000" // /* MW 3 */ + 7820 "00001111" // /* MW 2 */ + 7821 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 96 78 +.delay_slot + 7822 "00011000" // MOVX r2, #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7823 "11101001" // /* MW 3 */ + 7824 "11000100" // /* MW 2 */ + 7825 "00010111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 96 78 first +.delay_slot + 7826 "10011000" // LSHL r0, r0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7827 "00101101" // /* MW 3 */ + 7828 "00000000" // /* MW 2 */ + 7829 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7831 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7833 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7835 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 103 28 first + 7836 "10011000" // LDA.s16 r1, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7837 "00110010" // /* MW 3 */ + 7838 "00000100" // /* MW 2 */ + 7839 "00000000" // /* MW 1 */ + 7840 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7841 "00000000" // /* MW 1 */ + 7842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7843 "00000000" // /* MW 1 */ + 7844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7845 "00000000" // /* MW 1 */ + 7846 "10000100" // J #7904 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=7904 delay_slots=5 */ + 7847 "00000000" // /* MW 5 */ + 7848 "00000000" // /* MW 4 */ + 7849 "01110000" // /* MW 3 */ + 7850 "00001111" // /* MW 2 */ + 7851 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7853 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7854 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7855 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first +.delay_slot + 7856 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7857 "01110010" // /* MW 3 */ + 7858 "00000101" // /* MW 2 */ + 7859 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7861 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.delay_slot + 7862 "01111010" // NOPA; VST x0, [p0]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7863 "00000000" // /* MW 9 */ + 7864 "00000000" // /* MW 8 */ + 7865 "00000000" // /* MW 7 */ + 7866 "00000000" // /* MW 6 */ + 7867 "00010011" // /* MW 5 */ + 7868 "00000100" // /* MW 4 */ + 7869 "11110000" // /* MW 3 */ + 7870 "00101100" // /* MW 2 */ + 7871 "00000000" // /* MW 1 */ +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_80 +.src_ref 3 "elementwise_binary_broadcasting.h" 106 28 first + 7872 "10011000" // LDA.s16 r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7873 "00110010" // /* MW 3 */ + 7874 "00000100" // /* MW 2 */ + 7875 "00000001" // /* MW 1 */ + 7876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7877 "00000000" // /* MW 1 */ + 7878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7879 "00000000" // /* MW 1 */ + 7880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7881 "00000000" // /* MW 1 */ + 7882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7883 "00000000" // /* MW 1 */ + 7884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7885 "00000000" // /* MW 1 */ + 7886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7887 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first + 7888 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7889 "01110010" // /* MW 3 */ + 7890 "00000101" // /* MW 2 */ + 7891 "00011000" // /* MW 1 */ + 7892 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7893 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first + 7894 "01111010" // NOPA; VST x0, [p1]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7895 "00000000" // /* MW 9 */ + 7896 "00000000" // /* MW 8 */ + 7897 "00000000" // /* MW 7 */ + 7898 "00000000" // /* MW 6 */ + 7899 "00010011" // /* MW 5 */ + 7900 "00000100" // /* MW 4 */ + 7901 "11110001" // /* MW 3 */ + 7902 "00101100" // /* MW 2 */ + 7903 "00000000" // /* MW 1 */ +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_112 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 first +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 first + 7904 "10111010" // LDA m0, [p4, #20]; MOVX r0, #60; ADD.NC lc, r0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7905 "01001000" // /* MW 9 */ + 7906 "00111111" // /* MW 8 */ + 7907 "10111000" // /* MW 7 */ + 7908 "10001010" // /* MW 6 */ + 7909 "00000111" // /* MW 5 */ + 7910 "00000000" // /* MW 4 */ + 7911 "11010000" // /* MW 3 */ + 7912 "10000000" // /* MW 2 */ + 7913 "10001010" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 7914 "10111010" // LDA m1, [p3, #4]; MOVXM ls, #8016 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7915 "00010000" // /* MW 9 */ + 7916 "10101000" // /* MW 8 */ + 7917 "01111111" // /* MW 7 */ + 7918 "00000100" // /* MW 6 */ + 7919 "00000000" // /* MW 5 */ + 7920 "00000000" // /* MW 4 */ + 7921 "11010000" // /* MW 3 */ + 7922 "10010000" // /* MW 2 */ + 7923 "01100010" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 7924 "01000100" // MOVXM le, #8048 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7925 "11100000" // /* MW 5 */ + 7926 "11111110" // /* MW 4 */ + 7927 "00010110" // /* MW 3 */ + 7928 "00000000" // /* MW 2 */ + 7929 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 7930 "01000100" // MOVXM p4, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7931 "11010000" // /* MW 5 */ + 7932 "11001000" // /* MW 4 */ + 7933 "11001000" // /* MW 3 */ + 7934 "00000111" // /* MW 2 */ + 7935 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 7936 "10011000" // LDA.s8 r1, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7937 "00100010" // /* MW 3 */ + 7938 "00000100" // /* MW 2 */ + 7939 "00000100" // /* MW 1 */ + 7940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7941 "00000000" // /* MW 1 */ + 7942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7943 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 187 20 first + 7944 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7945 "10101011" // /* MW 3 */ + 7946 "00001000" // /* MW 2 */ + 7947 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary.h" 189 20 first + 7948 "10011000" // VLDA.CONV.fp32.bf16 cml2, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7949 "00101011" // /* MW 3 */ + 7950 "00101001" // /* MW 2 */ + 7951 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 211 20 first + 7952 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7953 "00101011" // /* MW 3 */ + 7954 "00001000" // /* MW 2 */ + 7955 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7956 "10011000" // VLDA.CONV.fp32.bf16 cml4, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7957 "00101011" // /* MW 3 */ + 7958 "00101010" // /* MW 2 */ + 7959 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 195 20 +.src_ref 3 "elementwise_binary.h" 218 20 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7960 "00101100" // VLDA.CONV.fp32.bf16 cml1, [p0], m0; MOVX crRnd, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7961 "00000000" // /* MW 5 */ + 7962 "11110101" // /* MW 4 */ + 7963 "01110000" // /* MW 3 */ + 7964 "00010101" // /* MW 2 */ + 7965 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7966 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7967 "00111101" // /* MW 7 */ + 7968 "00101000" // /* MW 6 */ + 7969 "00000011" // /* MW 5 */ + 7970 "00000100" // /* MW 4 */ + 7971 "01110000" // /* MW 3 */ + 7972 "00100101" // /* MW 2 */ + 7973 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7974 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7975 "00101011" // /* MW 3 */ + 7976 "00001000" // /* MW 2 */ + 7977 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7978 "01100010" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7979 "00111101" // /* MW 7 */ + 7980 "00010000" // /* MW 6 */ + 7981 "00000100" // /* MW 5 */ + 7982 "00000100" // /* MW 4 */ + 7983 "01110000" // /* MW 3 */ + 7984 "01000101" // /* MW 2 */ + 7985 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 187 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7986 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7987 "10101011" // /* MW 3 */ + 7988 "00001000" // /* MW 2 */ + 7989 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7990 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7991 "00111101" // /* MW 7 */ + 7992 "00101000" // /* MW 6 */ + 7993 "00000011" // /* MW 5 */ + 7994 "00000100" // /* MW 4 */ + 7995 "01110000" // /* MW 3 */ + 7996 "00100101" // /* MW 2 */ + 7997 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7998 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7999 "00101011" // /* MW 3 */ + 8000 "00001000" // /* MW 2 */ + 8001 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8002 "01101110" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VST.CONV.bf16.fp32 cml3, [p2], #64; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 8003 "00111101" // /* MW 13 */ + 8004 "00010000" // /* MW 12 */ + 8005 "00000100" // /* MW 11 */ + 8006 "01010111" // /* MW 10 */ + 8007 "00011010" // /* MW 9 */ + 8008 "01000000" // /* MW 8 */ + 8009 "00000000" // /* MW 7 */ + 8010 "00000000" // /* MW 6 */ + 8011 "01000110" // /* MW 5 */ + 8012 "00111011" // /* MW 4 */ + 8013 "01110100" // /* MW 3 */ + 8014 "01000101" // /* MW 2 */ + 8015 "00100101" // /* MW 1 */ +.label ZLS_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_224 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 187 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 8016 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8017 "10101011" // /* MW 3 */ + 8018 "00001000" // /* MW 2 */ + 8019 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8020 "01100110" // VLDA.CONV.fp32.bf16 cml2, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8021 "00111101" // /* MW 11 */ + 8022 "00101000" // /* MW 10 */ + 8023 "00000011" // /* MW 9 */ + 8024 "10001110" // /* MW 8 */ + 8025 "00010001" // /* MW 7 */ + 8026 "00001111" // /* MW 6 */ + 8027 "00100001" // /* MW 5 */ + 8028 "00000000" // /* MW 4 */ + 8029 "01110000" // /* MW 3 */ + 8030 "00100101" // /* MW 2 */ + 8031 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8032 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8033 "00000000" // /* MW 15 */ + 8034 "00000000" // /* MW 14 */ + 8035 "01111000" // /* MW 13 */ + 8036 "10100101" // /* MW 12 */ + 8037 "00000001" // /* MW 11 */ + 8038 "00000000" // /* MW 10 */ + 8039 "00000000" // /* MW 9 */ + 8040 "00000000" // /* MW 8 */ + 8041 "01011011" // /* MW 7 */ + 8042 "00000001" // /* MW 6 */ + 8043 "00100000" // /* MW 5 */ + 8044 "00000000" // /* MW 4 */ + 8045 "01110000" // /* MW 3 */ + 8046 "00000101" // /* MW 2 */ + 8047 "00000001" // /* MW 1 */ +.label ZLE_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_256 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8048 "11101011" // VLDA.CONV.fp32.bf16 cml4, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml3, [p2], #64;NOPX; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8049 "10000001" // /* MW 15 */ + 8050 "00100000" // /* MW 14 */ + 8051 "01111000" // /* MW 13 */ + 8052 "10100101" // /* MW 12 */ + 8053 "00000001" // /* MW 11 */ + 8054 "00000000" // /* MW 10 */ + 8055 "00000000" // /* MW 9 */ + 8056 "00000000" // /* MW 8 */ + 8057 "10100011" // /* MW 7 */ + 8058 "00011101" // /* MW 6 */ + 8059 "00100010" // /* MW 5 */ + 8060 "00000000" // /* MW 4 */ + 8061 "01110000" // /* MW 3 */ + 8062 "01000101" // /* MW 2 */ + 8063 "00100101" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 8064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8065 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8066 "01100010" // VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8067 "00111101" // /* MW 7 */ + 8068 "00101000" // /* MW 6 */ + 8069 "00000011" // /* MW 5 */ + 8070 "00000010" // /* MW 4 */ + 8071 "01100000" // /* MW 3 */ + 8072 "11000100" // /* MW 2 */ + 8073 "01000011" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8075 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8076 "01100010" // VST.CONV.bf16.fp32 cml3, [p2], #64; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8077 "00111101" // /* MW 7 */ + 8078 "00010000" // /* MW 6 */ + 8079 "00000100" // /* MW 5 */ + 8080 "00000010" // /* MW 4 */ + 8081 "01100000" // /* MW 3 */ + 8082 "10110100" // /* MW 2 */ + 8083 "01000011" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8085 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 195 20 first +.src_ref 3 "elementwise_binary_broadcasting.h" 121 4 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8086 "01011100" // VST.CONV.bf16.fp32 cml4, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 8087 "00000000" // /* MW 5 */ + 8088 "01010000" // /* MW 4 */ + 8089 "01100000" // /* MW 3 */ + 8090 "11000100" // /* MW 2 */ + 8091 "01000011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8093 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.delay_slot + 8094 "00011000" // VST.CONV.bf16.fp32 cml3, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8095 "10100011" // /* MW 3 */ + 8096 "00011101" // /* MW 2 */ + 8097 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8099 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 195 20 first +.delay_slot + 8100 "00011000" // VST.CONV.bf16.fp32 cml4, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8101 "00100011" // /* MW 3 */ + 8102 "00011110" // /* MW 2 */ + 8103 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 8105 "00000000" // /* MW 1 */ +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 82 +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 82 first +.function_start + 8112 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8113 "00000001" // /* MW 5 */ + 8114 "00000000" // /* MW 4 */ + 8115 "00000000" // /* MW 3 */ + 8116 "00010000" // /* MW 2 */ + 8117 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 90 24 + 8118 "00000010" // ST lr, [sp, #-4]; MOV r16, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8119 "01110000" // /* MW 7 */ + 8120 "01100000" // /* MW 6 */ + 8121 "00001010" // /* MW 5 */ + 8122 "00000010" // /* MW 4 */ + 8123 "10110000" // /* MW 3 */ + 8124 "10000111" // /* MW 2 */ + 8125 "11111111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 90 24 first +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8 + 8126 "00000010" // MOVS p2, p1; ADD.NC p3, r16, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8127 "00000000" // /* MW 7 */ + 8128 "00000011" // /* MW 6 */ + 8129 "10110100" // /* MW 5 */ + 8130 "00000001" // /* MW 4 */ + 8131 "01100000" // /* MW 3 */ + 8132 "10010001" // /* MW 2 */ + 8133 "01010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 19 first +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35 + 8134 "11010100" // LDA.u8 r27, [p3], #2; MOV r16, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8135 "10000001" // /* MW 5 */ + 8136 "00100001" // /* MW 4 */ + 8137 "01011000" // /* MW 3 */ + 8138 "11101101" // /* MW 2 */ + 8139 "01100101" // /* MW 1 */ + 8140 "11010100" // LDA.s16 r18, [p3], #-14; MOV r17, sp /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8141 "11000001" // /* MW 5 */ + 8142 "10101011" // /* MW 4 */ + 8143 "01011000" // /* MW 3 */ + 8144 "11001010" // /* MW 2 */ + 8145 "01110011" // /* MW 1 */ + 8146 "00011000" // ADD.NC p0, r17, #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8147 "11000000" // /* MW 3 */ + 8148 "01101000" // /* MW 2 */ + 8149 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 538 13 first +.src_ref 4 "vector_native_types.hpp" 374 137 first + 8150 "00011000" // VST sfh, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8151 "00101011" // /* MW 3 */ + 8152 "00000111" // /* MW 2 */ + 8153 "00001000" // /* MW 1 */ + 8154 "00011000" // ST.s16 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8155 "01010111" // /* MW 3 */ + 8156 "00000110" // /* MW 2 */ + 8157 "00000000" // /* MW 1 */ + 8158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8159 "00000000" // /* MW 1 */ + 8160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8161 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8 first +.no_stack_arguments + 8162 "00000100" // JL #7792 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7792 delay_slots=5 */ + 8163 "00000001" // /* MW 5 */ + 8164 "00000000" // /* MW 4 */ + 8165 "00111000" // /* MW 3 */ + 8166 "00001111" // /* MW 2 */ + 8167 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35 +.delay_slot + 8168 "11111000" // MOV r17, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8169 "11000000" // /* MW 3 */ + 8170 "01010000" // /* MW 2 */ + 8171 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8173 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35 first +.delay_slot + 8174 "00011000" // SEL.EQZ r18, r16, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8175 "00010010" // /* MW 3 */ + 8176 "00100101" // /* MW 2 */ + 8177 "00010100" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35 +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8 +.delay_slot + 8178 "11100100" // SEL.EQZ r16, r17, r16, r27; MOV p1, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8179 "01000001" // /* MW 5 */ + 8180 "11010010" // /* MW 4 */ + 8181 "01000010" // /* MW 3 */ + 8182 "00100000" // /* MW 2 */ + 8183 "10001100" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8 +.delay_slot + 8184 "00000010" // NOPS; MOV p0, r16 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8185 "01110000" // /* MW 7 */ + 8186 "00010000" // /* MW 6 */ + 8187 "00110100" // /* MW 5 */ + 8188 "00000000" // /* MW 4 */ + 8189 "01100000" // /* MW 3 */ + 8190 "00101011" // /* MW 2 */ + 8191 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4 +.return_address + 8192 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8193 "00111001" // /* MW 3 */ + 8194 "11111100" // /* MW 2 */ + 8195 "00000111" // /* MW 1 */ + 8196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8197 "00000000" // /* MW 1 */ + 8198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8199 "00000000" // /* MW 1 */ + 8200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8201 "00000000" // /* MW 1 */ + 8202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8203 "00000000" // /* MW 1 */ + 8204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8205 "00000000" // /* MW 1 */ + 8206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8207 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4 first + 8208 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 8209 "00000000" // /* MW 3 */ + 8210 "00101000" // /* MW 2 */ + 8211 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4 +.delay_slot + 8212 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8213 "00000001" // /* MW 5 */ + 8214 "00000000" // /* MW 4 */ + 8215 "00000000" // /* MW 3 */ + 8216 "11110000" // /* MW 2 */ + 8217 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8219 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8221 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8223 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 8225 "00000000" // /* MW 1 */ +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_add1d_attribute_broadcasting _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 7 "superkernels.cpp" 147 first +.src_ref 7 "superkernels.cpp" 152 6 +.function_start + 8240 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8241 "10000000" // /* MW 5 */ + 8242 "11001000" // /* MW 4 */ + 8243 "11000110" // /* MW 3 */ + 8244 "00000111" // /* MW 2 */ + 8245 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 152 6 first + 8246 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8247 "11000001" // /* MW 5 */ + 8248 "10110101" // /* MW 4 */ + 8249 "11011000" // /* MW 3 */ + 8250 "11000010" // /* MW 2 */ + 8251 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 147 + 8252 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8253 "00000001" // /* MW 5 */ + 8254 "00000000" // /* MW 4 */ + 8255 "00000000" // /* MW 3 */ + 8256 "00001000" // /* MW 2 */ + 8257 "00000000" // /* MW 1 */ + 8258 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8259 "01110000" // /* MW 7 */ + 8260 "11010000" // /* MW 6 */ + 8261 "00001011" // /* MW 5 */ + 8262 "00000000" // /* MW 4 */ + 8263 "10110000" // /* MW 3 */ + 8264 "01100011" // /* MW 2 */ + 8265 "11111111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 149 11 + 8266 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8267 "00010001" // /* MW 9 */ + 8268 "00101000" // /* MW 8 */ + 8269 "00110010" // /* MW 7 */ + 8270 "11110011" // /* MW 6 */ + 8271 "00000001" // /* MW 5 */ + 8272 "00000000" // /* MW 4 */ + 8273 "10110000" // /* MW 3 */ + 8274 "10000010" // /* MW 2 */ + 8275 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 8276 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8277 "11000000" // /* MW 3 */ + 8278 "11010100" // /* MW 2 */ + 8279 "00011011" // /* MW 1 */ + 8280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8281 "00000000" // /* MW 1 */ + 8282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8283 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 152 6 +.src_ref 7 "superkernels.cpp" 152 16 + 8284 "10000100" // JNZ r16, #8448 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8448 delay_slots=5 */ + 8285 "00000001" // /* MW 5 */ + 8286 "01000000" // /* MW 4 */ + 8287 "10000000" // /* MW 3 */ + 8288 "00010000" // /* MW 2 */ + 8289 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 149 22 first +.delay_slot + 8290 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8291 "10010000" // /* MW 3 */ + 8292 "01100010" // /* MW 2 */ + 8293 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 149 30 +.delay_slot + 8294 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8295 "11111011" // /* MW 3 */ + 8296 "01100011" // /* MW 2 */ + 8297 "00010100" // /* MW 1 */ +.delay_slot + 8298 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8299 "00111101" // /* MW 3 */ + 8300 "11110100" // /* MW 2 */ + 8301 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 149 11 +.src_ref 1 "io_buffer_main.h" 125 25 +.delay_slot + 8302 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8303 "01110000" // /* MW 7 */ + 8304 "01100000" // /* MW 6 */ + 8305 "00110000" // /* MW 5 */ + 8306 "00000011" // /* MW 4 */ + 8307 "00110000" // /* MW 3 */ + 8308 "11000110" // /* MW 2 */ + 8309 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 155 4 +.src_ref 7 "superkernels.cpp" 166 2 +.delay_slot + 8310 "01000100" // MOVXM p0, #509120 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8311 "10000000" // /* MW 5 */ + 8312 "11001001" // /* MW 4 */ + 8313 "11000000" // /* MW 3 */ + 8314 "00000111" // /* MW 2 */ + 8315 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8316 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8317 "11010000" // /* MW 5 */ + 8318 "11001000" // /* MW 4 */ + 8319 "11000100" // /* MW 3 */ + 8320 "00000111" // /* MW 2 */ + 8321 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8322 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8323 "00010000" // /* MW 9 */ + 8324 "00110010" // /* MW 8 */ + 8325 "00110010" // /* MW 7 */ + 8326 "11110001" // /* MW 6 */ + 8327 "00000001" // /* MW 5 */ + 8328 "00000000" // /* MW 4 */ + 8329 "11100000" // /* MW 3 */ + 8330 "11000000" // /* MW 2 */ + 8331 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8333 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 155 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 8334 "00000100" // JL #7728 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7728 delay_slots=5 */ + 8335 "00000001" // /* MW 5 */ + 8336 "00000000" // /* MW 4 */ + 8337 "00011000" // /* MW 3 */ + 8338 "00001111" // /* MW 2 */ + 8339 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8341 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8343 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8344 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8345 "00110001" // /* MW 3 */ + 8346 "00100000" // /* MW 2 */ + 8347 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 8348 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8349 "00000101" // /* MW 3 */ + 8350 "00100000" // /* MW 2 */ + 8351 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 8352 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8353 "00000000" // /* MW 15 */ + 8354 "00000000" // /* MW 14 */ + 8355 "01111000" // /* MW 13 */ + 8356 "10100101" // /* MW 12 */ + 8357 "00000001" // /* MW 11 */ + 8358 "00000000" // /* MW 10 */ + 8359 "00000000" // /* MW 9 */ + 8360 "10000000" // /* MW 8 */ + 8361 "00010001" // /* MW 7 */ + 8362 "00000110" // /* MW 6 */ + 8363 "00100010" // /* MW 5 */ + 8364 "00000000" // /* MW 4 */ + 8365 "11110000" // /* MW 3 */ + 8366 "00101100" // /* MW 2 */ + 8367 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 159 18 +.return_address + 8368 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8369 "10100000" // /* MW 5 */ + 8370 "11001000" // /* MW 4 */ + 8371 "11000100" // /* MW 3 */ + 8372 "00000111" // /* MW 2 */ + 8373 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 159 18 first +.src_ref 7 "superkernels.cpp" 159 65 + 8374 "10111010" // LDA r16, [p2]; MOVXM p2, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8375 "00010000" // /* MW 9 */ + 8376 "01100000" // /* MW 8 */ + 8377 "00110010" // /* MW 7 */ + 8378 "11110001" // /* MW 6 */ + 8379 "00000001" // /* MW 5 */ + 8380 "00000000" // /* MW 4 */ + 8381 "11010000" // /* MW 3 */ + 8382 "11000010" // /* MW 2 */ + 8383 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 157 51 +.src_ref 7 "superkernels.cpp" 159 65 +.src_ref 7 "superkernels.cpp" 166 2 + 8384 "10111010" // LDA r17, [p2]; MOVXM p2, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8385 "00010000" // /* MW 9 */ + 8386 "01100000" // /* MW 8 */ + 8387 "00110010" // /* MW 7 */ + 8388 "11110001" // /* MW 6 */ + 8389 "00000001" // /* MW 5 */ + 8390 "00000000" // /* MW 4 */ + 8391 "11010000" // /* MW 3 */ + 8392 "11000110" // /* MW 2 */ + 8393 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 157 51 first +.src_ref 7 "superkernels.cpp" 159 16 +.src_ref 7 "superkernels.cpp" 164 47 + 8394 "10111010" // LDA.u16 r18, [p2, #10]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8395 "00010000" // /* MW 9 */ + 8396 "00101010" // /* MW 8 */ + 8397 "10110010" // /* MW 7 */ + 8398 "11110000" // /* MW 6 */ + 8399 "00000001" // /* MW 5 */ + 8400 "00000000" // /* MW 4 */ + 8401 "01010000" // /* MW 3 */ + 8402 "11001011" // /* MW 2 */ + 8403 "01001010" // /* MW 1 */ + 8404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8405 "00000000" // /* MW 1 */ + 8406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8407 "00000000" // /* MW 1 */ + 8408 "10000100" // J #8464 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8464 delay_slots=5 */ + 8409 "00000000" // /* MW 5 */ + 8410 "00000000" // /* MW 4 */ + 8411 "10001000" // /* MW 3 */ + 8412 "00010000" // /* MW 2 */ + 8413 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 157 13 +.delay_slot + 8414 "01000100" // MOVXM p0, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8415 "11000000" // /* MW 5 */ + 8416 "11001000" // /* MW 4 */ + 8417 "11000000" // /* MW 3 */ + 8418 "00000111" // /* MW 2 */ + 8419 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8421 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 159 27 first +.delay_slot + 8422 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8423 "00001111" // /* MW 3 */ + 8424 "01100001" // /* MW 2 */ + 8425 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 157 13 first +.delay_slot + 8426 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8427 "10100011" // /* MW 5 */ + 8428 "00001100" // /* MW 4 */ + 8429 "11110000" // /* MW 3 */ + 8430 "00101100" // /* MW 2 */ + 8431 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 159 16 first +.delay_slot + 8432 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8433 "00000000" // /* MW 15 */ + 8434 "00000000" // /* MW 14 */ + 8435 "01111000" // /* MW 13 */ + 8436 "10100101" // /* MW 12 */ + 8437 "00000001" // /* MW 11 */ + 8438 "00000000" // /* MW 10 */ + 8439 "00000000" // /* MW 9 */ + 8440 "10000000" // /* MW 8 */ + 8441 "00010001" // /* MW 7 */ + 8442 "00000110" // /* MW 6 */ + 8443 "00100001" // /* MW 5 */ + 8444 "00000000" // /* MW 4 */ + 8445 "11110000" // /* MW 3 */ + 8446 "00101100" // /* MW 2 */ + 8447 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 7 "superkernels.cpp" 164 47 +.src_ref 7 "superkernels.cpp" 166 2 + 8448 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8449 "00000000" // /* MW 15 */ + 8450 "00000000" // /* MW 14 */ + 8451 "00010000" // /* MW 13 */ + 8452 "00101010" // /* MW 12 */ + 8453 "10110010" // /* MW 11 */ + 8454 "11110000" // /* MW 10 */ + 8455 "00000001" // /* MW 9 */ + 8456 "00000000" // /* MW 8 */ + 8457 "10001011" // /* MW 7 */ + 8458 "10000000" // /* MW 6 */ + 8459 "00100010" // /* MW 5 */ + 8460 "00000000" // /* MW 4 */ + 8461 "11110000" // /* MW 3 */ + 8462 "00101100" // /* MW 2 */ + 8463 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 218 49 first + 8464 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8465 "00000000" // /* MW 7 */ + 8466 "11000011" // /* MW 6 */ + 8467 "10110011" // /* MW 5 */ + 8468 "00000011" // /* MW 4 */ + 8469 "01100000" // /* MW 3 */ + 8470 "10010001" // /* MW 2 */ + 8471 "01110011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 163 2 +.src_ref 1 "io_buffer_main.h" 218 49 + 8472 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8473 "00010000" // /* MW 9 */ + 8474 "00100000" // /* MW 8 */ + 8475 "00110010" // /* MW 7 */ + 8476 "11110000" // /* MW 6 */ + 8477 "00000001" // /* MW 5 */ + 8478 "00000000" // /* MW 4 */ + 8479 "11010000" // /* MW 3 */ + 8480 "11101110" // /* MW 2 */ + 8481 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 8482 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8483 "00010110" // /* MW 3 */ + 8484 "11111110" // /* MW 2 */ + 8485 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 8486 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8487 "00110110" // /* MW 3 */ + 8488 "11111110" // /* MW 2 */ + 8489 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 8490 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8491 "01010110" // /* MW 3 */ + 8492 "01000110" // /* MW 2 */ + 8493 "00000111" // /* MW 1 */ + 8494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8495 "00000000" // /* MW 1 */ + 8496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8497 "00000000" // /* MW 1 */ + 8498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8499 "00000000" // /* MW 1 */ + 8500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8501 "00000000" // /* MW 1 */ + 8502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8503 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 8504 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8505 "00000010" // /* MW 3 */ + 8506 "01100001" // /* MW 2 */ + 8507 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 8508 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8509 "00010001" // /* MW 3 */ + 8510 "00000110" // /* MW 2 */ + 8511 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 + 8512 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8513 "11111101" // /* MW 3 */ + 8514 "11100000" // /* MW 2 */ + 8515 "00010111" // /* MW 1 */ + 8516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8517 "00000000" // /* MW 1 */ + 8518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8519 "00000000" // /* MW 1 */ + 8520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8521 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 8522 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8523 "00001000" // /* MW 3 */ + 8524 "10010011" // /* MW 2 */ + 8525 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 164 45 + 8526 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8527 "10000001" // /* MW 5 */ + 8528 "10101101" // /* MW 4 */ + 8529 "10100111" // /* MW 3 */ + 8530 "00000000" // /* MW 2 */ + 8531 "00000100" // /* MW 1 */ + 8532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8533 "00000000" // /* MW 1 */ + 8534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8535 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 163 2 first + 8536 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8537 "00110110" // /* MW 3 */ + 8538 "00000110" // /* MW 2 */ + 8539 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 1 "io_buffer_main.h" 324 51 + 8540 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8541 "10000001" // /* MW 5 */ + 8542 "11011101" // /* MW 4 */ + 8543 "11011100" // /* MW 3 */ + 8544 "11001010" // /* MW 2 */ + 8545 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 164 47 first + 8546 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8547 "01110110" // /* MW 3 */ + 8548 "00000110" // /* MW 2 */ + 8549 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 8550 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8551 "10011110" // /* MW 3 */ + 8552 "01011100" // /* MW 2 */ + 8553 "00000111" // /* MW 1 */ + 8554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8555 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 166 2 first +.no_stack_arguments + 8556 "00000100" // JL #8112 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8112 delay_slots=5 */ + 8557 "00000001" // /* MW 5 */ + 8558 "00000000" // /* MW 4 */ + 8559 "11011000" // /* MW 3 */ + 8560 "00001111" // /* MW 2 */ + 8561 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8563 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 163 2 first +.delay_slot + 8564 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8565 "00000111" // /* MW 3 */ + 8566 "01100010" // /* MW 2 */ + 8567 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 163 2 +.delay_slot + 8568 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8569 "00110001" // /* MW 3 */ + 8570 "00000110" // /* MW 2 */ + 8571 "00001000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 164 45 first +.delay_slot + 8572 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8573 "00001101" // /* MW 3 */ + 8574 "11100001" // /* MW 2 */ + 8575 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 164 45 +.delay_slot + 8576 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8577 "00000000" // /* MW 15 */ + 8578 "00000000" // /* MW 14 */ + 8579 "10101000" // /* MW 13 */ + 8580 "10100000" // /* MW 12 */ + 8581 "00110100" // /* MW 11 */ + 8582 "00000000" // /* MW 10 */ + 8583 "00000000" // /* MW 9 */ + 8584 "00000000" // /* MW 8 */ + 8585 "01011011" // /* MW 7 */ + 8586 "00000001" // /* MW 6 */ + 8587 "00100000" // /* MW 5 */ + 8588 "00000000" // /* MW 4 */ + 8589 "11110000" // /* MW 3 */ + 8590 "00101100" // /* MW 2 */ + 8591 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 6 +.src_ref 7 "superkernels.cpp" 169 14 +.src_ref 1 "io_buffer_main.h" 324 51 first +.return_address + 8592 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8593 "00010000" // /* MW 9 */ + 8594 "00100000" // /* MW 8 */ + 8595 "00110010" // /* MW 7 */ + 8596 "11110011" // /* MW 6 */ + 8597 "00000001" // /* MW 5 */ + 8598 "00000000" // /* MW 4 */ + 8599 "11010000" // /* MW 3 */ + 8600 "11000110" // /* MW 2 */ + 8601 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 + 8602 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8603 "00000101" // /* MW 3 */ + 8604 "00100000" // /* MW 2 */ + 8605 "00010000" // /* MW 1 */ + 8606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8607 "00000000" // /* MW 1 */ + 8608 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8609 "00000000" // /* MW 1 */ + 8610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8611 "00000000" // /* MW 1 */ + 8612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8613 "00000000" // /* MW 1 */ + 8614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8615 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 8616 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8617 "00001000" // /* MW 3 */ + 8618 "01010001" // /* MW 2 */ + 8619 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 19 +.src_ref 1 "io_buffer_main.h" 327 40 first + 8620 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8621 "00010000" // /* MW 9 */ + 8622 "00110000" // /* MW 8 */ + 8623 "00110010" // /* MW 7 */ + 8624 "11110001" // /* MW 6 */ + 8625 "00000001" // /* MW 5 */ + 8626 "00000000" // /* MW 4 */ + 8627 "11010000" // /* MW 3 */ + 8628 "11001110" // /* MW 2 */ + 8629 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 6 first + 8630 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8631 "00110110" // /* MW 3 */ + 8632 "00000110" // /* MW 2 */ + 8633 "00000110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 19 + 8634 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8635 "01010110" // /* MW 3 */ + 8636 "00000110" // /* MW 2 */ + 8637 "00000010" // /* MW 1 */ + 8638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8639 "00000000" // /* MW 1 */ + 8640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8641 "00000000" // /* MW 1 */ + 8642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8643 "00000000" // /* MW 1 */ + 8644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8645 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 8646 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8647 "00110001" // /* MW 3 */ + 8648 "00100001" // /* MW 2 */ + 8649 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 8650 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8651 "00010001" // /* MW 3 */ + 8652 "11100110" // /* MW 2 */ + 8653 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 16 first + 8654 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8655 "00101000" // /* MW 3 */ + 8656 "01100001" // /* MW 2 */ + 8657 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 6 + 8658 "10000100" // JNZ r16, #8688 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8688 delay_slots=5 */ + 8659 "00000001" // /* MW 5 */ + 8660 "01000000" // /* MW 4 */ + 8661 "11111000" // /* MW 3 */ + 8662 "00010000" // /* MW 2 */ + 8663 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8665 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8667 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8669 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8671 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8673 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 169 14 + 8674 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8675 "00000001" // /* MW 3 */ + 8676 "00100000" // /* MW 2 */ + 8677 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 169 14 first + 8678 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8679 "00000000" // /* MW 9 */ + 8680 "00000000" // /* MW 8 */ + 8681 "00000000" // /* MW 7 */ + 8682 "10000000" // /* MW 6 */ + 8683 "00010001" // /* MW 5 */ + 8684 "00000110" // /* MW 4 */ + 8685 "11110110" // /* MW 3 */ + 8686 "00101100" // /* MW 2 */ + 8687 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 7 "superkernels.cpp" 171 + 8688 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8689 "00111001" // /* MW 3 */ + 8690 "11110100" // /* MW 2 */ + 8691 "00000111" // /* MW 1 */ + 8692 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8693 "00011001" // /* MW 3 */ + 8694 "11111011" // /* MW 2 */ + 8695 "00000111" // /* MW 1 */ + 8696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8697 "00000000" // /* MW 1 */ + 8698 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8699 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 8700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8701 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 8702 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8703 "11110001" // /* MW 3 */ + 8704 "11111101" // /* MW 2 */ + 8705 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 8706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8707 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 171 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 8708 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 8709 "00000000" // /* MW 3 */ + 8710 "00101000" // /* MW 2 */ + 8711 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8712 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8713 "10100000" // /* MW 3 */ + 8714 "01100111" // /* MW 2 */ + 8715 "00011111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 171 +.delay_slot + 8716 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8717 "00000001" // /* MW 5 */ + 8718 "00000000" // /* MW 4 */ + 8719 "00000000" // /* MW 3 */ + 8720 "11111000" // /* MW 2 */ + 8721 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8723 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8725 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 8727 "00000000" // /* MW 1 */ +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.function setup _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.src_ref 3 "elementwise_unary.h" 124 first +.src_ref 3 "elementwise_unary.h" 126 24 first +.function_start + 8736 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8737 "00101110" // /* MW 3 */ + 8738 "00011100" // /* MW 2 */ + 8739 "00000001" // /* MW 1 */ + 8740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8741 "00000000" // /* MW 1 */ + 8742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8743 "00000000" // /* MW 1 */ + 8744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8745 "00000000" // /* MW 1 */ + 8746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8747 "00000000" // /* MW 1 */ + 8748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8749 "00000000" // /* MW 1 */ + 8750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8751 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 126 22 first + 8752 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8753 "00101001" // /* MW 3 */ + 8754 "00011100" // /* MW 2 */ + 8755 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 127 24 first + 8756 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8757 "00101110" // /* MW 3 */ + 8758 "00011100" // /* MW 2 */ + 8759 "00000001" // /* MW 1 */ + 8760 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8761 "00000000" // /* MW 1 */ + 8762 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8763 "00000000" // /* MW 1 */ + 8764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8765 "00000000" // /* MW 1 */ + 8766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8767 "00000000" // /* MW 1 */ + 8768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8769 "00000000" // /* MW 1 */ + 8770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8771 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 127 22 + 8772 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8773 "00101001" // /* MW 3 */ + 8774 "00011100" // /* MW 2 */ + 8775 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 128 24 first + 8776 "10011000" // LDA el0, [p1], #24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8777 "00101110" // /* MW 3 */ + 8778 "01101100" // /* MW 2 */ + 8779 "00000001" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 113 33 first + 8780 "10011000" // LDA.s16 r0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8781 "00010010" // /* MW 3 */ + 8782 "00000100" // /* MW 2 */ + 8783 "00000001" // /* MW 1 */ + 8784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8785 "00000000" // /* MW 1 */ + 8786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8787 "00000000" // /* MW 1 */ + 8788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8789 "00000000" // /* MW 1 */ + 8790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8791 "00000000" // /* MW 1 */ + 8792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8793 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 128 22 first + 8794 "10011000" // ST el0, [p0], #24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8795 "00101001" // /* MW 3 */ + 8796 "01101100" // /* MW 2 */ + 8797 "00001000" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 113 33 first + 8798 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8799 "00010111" // /* MW 3 */ + 8800 "00000100" // /* MW 2 */ + 8801 "00000000" // /* MW 1 */ + 8802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8803 "00000000" // /* MW 1 */ + 8804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8805 "00000000" // /* MW 1 */ + 8806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8807 "00000000" // /* MW 1 */ + 8808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8809 "00000000" // /* MW 1 */ + 8810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8811 "00000000" // /* MW 1 */ + 8812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8813 "00000000" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 114 33 first + 8814 "10011000" // LDA.s16 r0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8815 "00010010" // /* MW 3 */ + 8816 "00100100" // /* MW 2 */ + 8817 "00000001" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 114 33 + 8818 "00011000" // ST.s16 r0, [p0, #2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8819 "00010111" // /* MW 3 */ + 8820 "00010100" // /* MW 2 */ + 8821 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 130 4 first + 8822 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 8823 "00000000" // /* MW 3 */ + 8824 "00101000" // /* MW 2 */ + 8825 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8827 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8829 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8831 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8833 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv__end +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_end0 + 8835 "00000000" // /* MW 1 */ +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.function run _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_unary.h" 136 first +.src_ref 3 "elementwise_unary.h" 142 37 +.src_ref 3 "elementwise_unary.h" 154 8 first +.src_ref 3 "elementwise_unary.h" 171 19 +.function_start + 8848 "10110110" // MOVA dj0, #-34; VLDB x4, [p0], #64; MOVXM ls, #8976 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8849 "00010000" // /* MW 11 */ + 8850 "10001000" // /* MW 10 */ + 8851 "01111001" // /* MW 9 */ + 8852 "00001000" // /* MW 8 */ + 8853 "00000000" // /* MW 7 */ + 8854 "00000000" // /* MW 6 */ + 8855 "01101000" // /* MW 5 */ + 8856 "00111010" // /* MW 4 */ + 8857 "10000000" // /* MW 3 */ + 8858 "11000010" // /* MW 2 */ + 8859 "11111011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_unary.h" 142 78 +.src_ref 3 "elementwise_unary.h" 154 8 first +.src_ref 3 "elementwise_unary.h" 190 19 first + 8860 "10110110" // MOVA r17, #-6; VLDB x2, [p0], #64; MOVXM le, #9024 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8861 "00010000" // /* MW 11 */ + 8862 "10100000" // /* MW 10 */ + 8863 "10111001" // /* MW 9 */ + 8864 "00001001" // /* MW 8 */ + 8865 "00000000" // /* MW 7 */ + 8866 "00000000" // /* MW 6 */ + 8867 "01101000" // /* MW 5 */ + 8868 "00111001" // /* MW 4 */ + 8869 "00000000" // /* MW 3 */ + 8870 "01010001" // /* MW 2 */ + 8871 "11111111" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 136 + 8872 "11111000" // MOV r0, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8873 "11000000" // /* MW 3 */ + 8874 "00010100" // /* MW 2 */ + 8875 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 136 first + 8876 "00011000" // ADD.NC p2, r0, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8877 "00010000" // /* MW 3 */ + 8878 "01100000" // /* MW 2 */ + 8879 "00011010" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 103 16 first + 8880 "10011000" // LDA.s16 r2, [p2], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8881 "01010010" // /* MW 3 */ + 8882 "00011100" // /* MW 2 */ + 8883 "00000010" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 142 37 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8884 "10011000" // LDA r0, [p2, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8885 "00010110" // /* MW 3 */ + 8886 "00000000" // /* MW 2 */ + 8887 "00000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_unary.h" 171 19 first +.src_ref 8 "clip_impl.h" 104 16 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8888 "00111100" // LDA.s16 r1, [p2]; VLDB x4, [p0], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8889 "01101000" // /* MW 5 */ + 8890 "00111010" // /* MW 4 */ + 8891 "01010000" // /* MW 3 */ + 8892 "10000110" // /* MW 2 */ + 8893 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8895 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8897 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8899 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_unary.h" 190 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8900 "00011000" // VLDB x2, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8901 "10110100" // /* MW 3 */ + 8902 "00011100" // /* MW 2 */ + 8903 "00111000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8904 "11111000" // VBCST.16 x0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8905 "01110010" // /* MW 3 */ + 8906 "00001001" // /* MW 2 */ + 8907 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 3 "elementwise_unary.h" 142 78 first +.src_ref 3 "elementwise_unary.h" 171 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8908 "00111010" // VLDB x4, [p0], #64; LSHL r17, r0, r17; VMAX_LT.bf16 x5, r16, x4, x0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8909 "01111000" // /* MW 9 */ + 8910 "00110110" // /* MW 8 */ + 8911 "01010000" // /* MW 7 */ + 8912 "11101101" // /* MW 6 */ + 8913 "00011000" // /* MW 5 */ + 8914 "00000001" // /* MW 4 */ + 8915 "01101000" // /* MW 3 */ + 8916 "00111010" // /* MW 2 */ + 8917 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 154 8 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8918 "10011000" // ADD.NC lc, r17, #-3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8919 "11111110" // /* MW 3 */ + 8920 "01111000" // /* MW 2 */ + 8921 "00011101" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8922 "11111000" // VBCST.16 x1, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8923 "01110010" // /* MW 3 */ + 8924 "10000101" // /* MW 2 */ + 8925 "00011000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8926 "11111000" // VMIN_GE.bf16 x3, r16, x5, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8927 "10101100" // /* MW 3 */ + 8928 "10101000" // /* MW 2 */ + 8929 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 3 "elementwise_unary.h" 190 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8930 "01111110" // NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 8931 "01100000" // /* MW 13 */ + 8932 "00101011" // /* MW 12 */ + 8933 "00000000" // /* MW 11 */ + 8934 "11001111" // /* MW 10 */ + 8935 "00000110" // /* MW 9 */ + 8936 "00110001" // /* MW 8 */ + 8937 "00000000" // /* MW 7 */ + 8938 "00000000" // /* MW 6 */ + 8939 "01101000" // /* MW 5 */ + 8940 "00111001" // /* MW 4 */ + 8941 "11110000" // /* MW 3 */ + 8942 "00101100" // /* MW 2 */ + 8943 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8944 "11100001" // NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8945 "00000000" // /* MW 15 */ + 8946 "00000000" // /* MW 14 */ + 8947 "01111000" // /* MW 13 */ + 8948 "01010110" // /* MW 12 */ + 8949 "11011000" // /* MW 11 */ + 8950 "00000001" // /* MW 10 */ + 8951 "00000000" // /* MW 9 */ + 8952 "00000000" // /* MW 8 */ + 8953 "11010011" // /* MW 7 */ + 8954 "00011100" // /* MW 6 */ + 8955 "00100001" // /* MW 5 */ + 8956 "00000000" // /* MW 4 */ + 8957 "11110000" // /* MW 3 */ + 8958 "00101100" // /* MW 2 */ + 8959 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8960 "11100001" // NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8961 "00000000" // /* MW 15 */ + 8962 "00000000" // /* MW 14 */ + 8963 "01111000" // /* MW 13 */ + 8964 "00110110" // /* MW 12 */ + 8965 "01010000" // /* MW 11 */ + 8966 "00000001" // /* MW 10 */ + 8967 "00000000" // /* MW 9 */ + 8968 "00000000" // /* MW 8 */ + 8969 "01011011" // /* MW 7 */ + 8970 "00000001" // /* MW 6 */ + 8971 "00100000" // /* MW 5 */ + 8972 "00000000" // /* MW 4 */ + 8973 "11110000" // /* MW 3 */ + 8974 "00101100" // /* MW 2 */ + 8975 "00000000" // /* MW 1 */ +.label ZLS_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_128 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 171 19 first +.src_ref 3 "elementwise_unary.h" 176 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 8976 "11100001" // NOPA; VLDB x4, [p0], #64; VST x7, [p1], #64; NOPX; VMIN_GE.bf16 x3, r16, x5, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8977 "00000000" // /* MW 15 */ + 8978 "00000000" // /* MW 14 */ + 8979 "01111000" // /* MW 13 */ + 8980 "01010110" // /* MW 12 */ + 8981 "11010100" // /* MW 11 */ + 8982 "00000000" // /* MW 10 */ + 8983 "00000000" // /* MW 9 */ + 8984 "00000000" // /* MW 8 */ + 8985 "11010011" // /* MW 7 */ + 8986 "00011101" // /* MW 6 */ + 8987 "01101001" // /* MW 5 */ + 8988 "00111010" // /* MW 4 */ + 8989 "11110000" // /* MW 3 */ + 8990 "00101100" // /* MW 2 */ + 8991 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 3 "elementwise_unary.h" 190 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8992 "11100001" // NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8993 "00000000" // /* MW 15 */ + 8994 "00000000" // /* MW 14 */ + 8995 "01111000" // /* MW 13 */ + 8996 "00110110" // /* MW 12 */ + 8997 "10001000" // /* MW 11 */ + 8998 "00000001" // /* MW 10 */ + 8999 "00000000" // /* MW 9 */ + 9000 "00000000" // /* MW 8 */ + 9001 "01011011" // /* MW 7 */ + 9002 "00000001" // /* MW 6 */ + 9003 "01101000" // /* MW 5 */ + 9004 "00111001" // /* MW 4 */ + 9005 "11110000" // /* MW 3 */ + 9006 "00101100" // /* MW 2 */ + 9007 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9008 "11100001" // NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9009 "00000000" // /* MW 15 */ + 9010 "00000000" // /* MW 14 */ + 9011 "01111000" // /* MW 13 */ + 9012 "01010110" // /* MW 12 */ + 9013 "11011000" // /* MW 11 */ + 9014 "00000001" // /* MW 10 */ + 9015 "00000000" // /* MW 9 */ + 9016 "00000000" // /* MW 8 */ + 9017 "11010011" // /* MW 7 */ + 9018 "00011100" // /* MW 6 */ + 9019 "00100001" // /* MW 5 */ + 9020 "00000000" // /* MW 4 */ + 9021 "11110000" // /* MW 3 */ + 9022 "00101100" // /* MW 2 */ + 9023 "00000000" // /* MW 1 */ +.label ZLE_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_176 +.src_ref 4 "max_min.hpp" 20 104 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9024 "11100001" // NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9025 "00000000" // /* MW 15 */ + 9026 "00000000" // /* MW 14 */ + 9027 "01111000" // /* MW 13 */ + 9028 "00110110" // /* MW 12 */ + 9029 "01010000" // /* MW 11 */ + 9030 "00000001" // /* MW 10 */ + 9031 "00000000" // /* MW 9 */ + 9032 "00000000" // /* MW 8 */ + 9033 "01011011" // /* MW 7 */ + 9034 "00000001" // /* MW 6 */ + 9035 "00100000" // /* MW 5 */ + 9036 "00000000" // /* MW 4 */ + 9037 "11110000" // /* MW 3 */ + 9038 "00101100" // /* MW 2 */ + 9039 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 176 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 9040 "00000010" // VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9041 "01110000" // /* MW 7 */ + 9042 "01010110" // /* MW 6 */ + 9043 "11010100" // /* MW 5 */ + 9044 "00000000" // /* MW 4 */ + 9045 "01100000" // /* MW 3 */ + 9046 "10111010" // /* MW 2 */ + 9047 "00100011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9048 "11111000" // VMAX_LT.bf16 x6, r16, x2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9049 "01101100" // /* MW 3 */ + 9050 "00010000" // /* MW 2 */ + 9051 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 195 20 first + 9052 "00000010" // VST x3, [p1], #64; VMIN_GE.bf16 x7, r16, x6, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9053 "01110000" // /* MW 7 */ + 9054 "01010110" // /* MW 6 */ + 9055 "11011000" // /* MW 5 */ + 9056 "00000001" // /* MW 4 */ + 9057 "01100000" // /* MW 3 */ + 9058 "10011010" // /* MW 2 */ + 9059 "00100011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 3 "elementwise_unary.h" 158 4 first + 9060 "11100100" // RET lr; VMAX_LT.bf16 x5, r16, x4, x0 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 9061 "11011001" // /* MW 5 */ + 9062 "01000000" // /* MW 4 */ + 9063 "00000101" // /* MW 3 */ + 9064 "00000000" // /* MW 2 */ + 9065 "00000101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 176 20 first +.delay_slot + 9066 "00000010" // VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9067 "01110000" // /* MW 7 */ + 9068 "01010110" // /* MW 6 */ + 9069 "11010100" // /* MW 5 */ + 9070 "00000000" // /* MW 4 */ + 9071 "01100000" // /* MW 3 */ + 9072 "10111010" // /* MW 2 */ + 9073 "00100011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 9074 "11111000" // VMAX_LT.bf16 x6, r16, x2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9075 "01101100" // /* MW 3 */ + 9076 "00010000" // /* MW 2 */ + 9077 "00011011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.delay_slot + 9078 "11111000" // VMIN_GE.bf16 x7, r16, x6, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9079 "10101100" // /* MW 3 */ + 9080 "10110000" // /* MW 2 */ + 9081 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "elementwise_unary.h" 195 20 first +.delay_slot + 9082 "00011000" // VST x3, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9083 "11010011" // /* MW 3 */ + 9084 "00011100" // /* MW 2 */ + 9085 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "elementwise_unary.h" 176 20 first +.delay_slot + 9086 "00011000" // VST x7, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9087 "11010011" // /* MW 3 */ + 9088 "00011101" // /* MW 2 */ +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E__end +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_end0 + 9089 "00001001" // /* MW 1 */ +.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_clip1d _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 7 "superkernels.cpp" 176 first +.src_ref 7 "superkernels.cpp" 181 6 +.function_start + 9104 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9105 "10000000" // /* MW 5 */ + 9106 "11001000" // /* MW 4 */ + 9107 "11000110" // /* MW 3 */ + 9108 "00000111" // /* MW 2 */ + 9109 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 181 6 first + 9110 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9111 "11000001" // /* MW 5 */ + 9112 "10110101" // /* MW 4 */ + 9113 "11011000" // /* MW 3 */ + 9114 "11000010" // /* MW 2 */ + 9115 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 176 + 9116 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9117 "00000001" // /* MW 5 */ + 9118 "00000000" // /* MW 4 */ + 9119 "00000000" // /* MW 3 */ + 9120 "00001000" // /* MW 2 */ + 9121 "00000000" // /* MW 1 */ + 9122 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9123 "01110000" // /* MW 7 */ + 9124 "11010000" // /* MW 6 */ + 9125 "00001011" // /* MW 5 */ + 9126 "00000000" // /* MW 4 */ + 9127 "10110000" // /* MW 3 */ + 9128 "01100011" // /* MW 2 */ + 9129 "11111111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 178 11 + 9130 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9131 "00010001" // /* MW 9 */ + 9132 "00101000" // /* MW 8 */ + 9133 "00110010" // /* MW 7 */ + 9134 "11110011" // /* MW 6 */ + 9135 "00000001" // /* MW 5 */ + 9136 "00000000" // /* MW 4 */ + 9137 "10110000" // /* MW 3 */ + 9138 "10000010" // /* MW 2 */ + 9139 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 9140 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9141 "11000000" // /* MW 3 */ + 9142 "11010100" // /* MW 2 */ + 9143 "00011011" // /* MW 1 */ + 9144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9145 "00000000" // /* MW 1 */ + 9146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9147 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 181 6 +.src_ref 7 "superkernels.cpp" 181 16 + 9148 "10000100" // JNZ r16, #9312 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9312 delay_slots=5 */ + 9149 "00000001" // /* MW 5 */ + 9150 "01000000" // /* MW 4 */ + 9151 "00110000" // /* MW 3 */ + 9152 "00010010" // /* MW 2 */ + 9153 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 178 22 first +.delay_slot + 9154 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9155 "10010000" // /* MW 3 */ + 9156 "01100010" // /* MW 2 */ + 9157 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 178 30 +.delay_slot + 9158 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9159 "11111011" // /* MW 3 */ + 9160 "01100011" // /* MW 2 */ + 9161 "00010100" // /* MW 1 */ +.delay_slot + 9162 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9163 "00111101" // /* MW 3 */ + 9164 "11110100" // /* MW 2 */ + 9165 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 178 11 +.src_ref 1 "io_buffer_main.h" 125 25 +.delay_slot + 9166 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9167 "01110000" // /* MW 7 */ + 9168 "01100000" // /* MW 6 */ + 9169 "00110000" // /* MW 5 */ + 9170 "00000011" // /* MW 4 */ + 9171 "00110000" // /* MW 3 */ + 9172 "11000110" // /* MW 2 */ + 9173 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 184 4 +.src_ref 7 "superkernels.cpp" 195 2 +.delay_slot + 9174 "01000100" // MOVXM p0, #509312 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9175 "00000000" // /* MW 5 */ + 9176 "11001011" // /* MW 4 */ + 9177 "11000000" // /* MW 3 */ + 9178 "00000111" // /* MW 2 */ + 9179 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9180 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9181 "11010000" // /* MW 5 */ + 9182 "11001000" // /* MW 4 */ + 9183 "11000100" // /* MW 3 */ + 9184 "00000111" // /* MW 2 */ + 9185 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9186 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9187 "00010000" // /* MW 9 */ + 9188 "00110010" // /* MW 8 */ + 9189 "00110010" // /* MW 7 */ + 9190 "11110001" // /* MW 6 */ + 9191 "00000001" // /* MW 5 */ + 9192 "00000000" // /* MW 4 */ + 9193 "11100000" // /* MW 3 */ + 9194 "11000000" // /* MW 2 */ + 9195 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9197 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 184 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 9198 "00000100" // JL #8736 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8736 delay_slots=5 */ + 9199 "00000001" // /* MW 5 */ + 9200 "00000000" // /* MW 4 */ + 9201 "00010000" // /* MW 3 */ + 9202 "00010001" // /* MW 2 */ + 9203 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9205 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9207 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9208 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9209 "00110001" // /* MW 3 */ + 9210 "00100000" // /* MW 2 */ + 9211 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 9212 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9213 "00000101" // /* MW 3 */ + 9214 "00100000" // /* MW 2 */ + 9215 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 9216 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9217 "00000000" // /* MW 15 */ + 9218 "00000000" // /* MW 14 */ + 9219 "01111000" // /* MW 13 */ + 9220 "10100101" // /* MW 12 */ + 9221 "00000001" // /* MW 11 */ + 9222 "00000000" // /* MW 10 */ + 9223 "00000000" // /* MW 9 */ + 9224 "10000000" // /* MW 8 */ + 9225 "00010001" // /* MW 7 */ + 9226 "00000110" // /* MW 6 */ + 9227 "00100010" // /* MW 5 */ + 9228 "00000000" // /* MW 4 */ + 9229 "11110000" // /* MW 3 */ + 9230 "00101100" // /* MW 2 */ + 9231 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 188 18 +.return_address + 9232 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9233 "10100000" // /* MW 5 */ + 9234 "11001000" // /* MW 4 */ + 9235 "11000100" // /* MW 3 */ + 9236 "00000111" // /* MW 2 */ + 9237 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 188 18 first +.src_ref 7 "superkernels.cpp" 188 43 + 9238 "10111010" // LDA r16, [p2]; MOVXM p2, #509312 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9239 "00010000" // /* MW 9 */ + 9240 "11000000" // /* MW 8 */ + 9241 "00110010" // /* MW 7 */ + 9242 "11110001" // /* MW 6 */ + 9243 "00000001" // /* MW 5 */ + 9244 "00000000" // /* MW 4 */ + 9245 "11010000" // /* MW 3 */ + 9246 "11000010" // /* MW 2 */ + 9247 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 186 29 +.src_ref 7 "superkernels.cpp" 188 43 +.src_ref 7 "superkernels.cpp" 195 2 + 9248 "10111010" // LDA r17, [p2]; MOVXM p2, #509312 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9249 "00010000" // /* MW 9 */ + 9250 "11000000" // /* MW 8 */ + 9251 "00110010" // /* MW 7 */ + 9252 "11110001" // /* MW 6 */ + 9253 "00000001" // /* MW 5 */ + 9254 "00000000" // /* MW 4 */ + 9255 "11010000" // /* MW 3 */ + 9256 "11000110" // /* MW 2 */ + 9257 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 186 29 first +.src_ref 7 "superkernels.cpp" 188 16 +.src_ref 7 "superkernels.cpp" 193 47 + 9258 "10111010" // LDA.u16 r18, [p2, #8]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9259 "00010000" // /* MW 9 */ + 9260 "00101010" // /* MW 8 */ + 9261 "10110010" // /* MW 7 */ + 9262 "11110000" // /* MW 6 */ + 9263 "00000001" // /* MW 5 */ + 9264 "00000000" // /* MW 4 */ + 9265 "01010000" // /* MW 3 */ + 9266 "11001011" // /* MW 2 */ + 9267 "01001000" // /* MW 1 */ + 9268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9269 "00000000" // /* MW 1 */ + 9270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9271 "00000000" // /* MW 1 */ + 9272 "10000100" // J #9328 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9328 delay_slots=5 */ + 9273 "00000000" // /* MW 5 */ + 9274 "00000000" // /* MW 4 */ + 9275 "00111000" // /* MW 3 */ + 9276 "00010010" // /* MW 2 */ + 9277 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 186 13 +.delay_slot + 9278 "01000100" // MOVXM p0, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9279 "11000000" // /* MW 5 */ + 9280 "11001000" // /* MW 4 */ + 9281 "11000000" // /* MW 3 */ + 9282 "00000111" // /* MW 2 */ + 9283 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9285 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 188 27 first +.delay_slot + 9286 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9287 "00001111" // /* MW 3 */ + 9288 "01100001" // /* MW 2 */ + 9289 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 186 13 first +.delay_slot + 9290 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9291 "10100011" // /* MW 5 */ + 9292 "00001100" // /* MW 4 */ + 9293 "11110000" // /* MW 3 */ + 9294 "00101100" // /* MW 2 */ + 9295 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 188 16 first +.delay_slot + 9296 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9297 "00000000" // /* MW 15 */ + 9298 "00000000" // /* MW 14 */ + 9299 "01111000" // /* MW 13 */ + 9300 "10100101" // /* MW 12 */ + 9301 "00000001" // /* MW 11 */ + 9302 "00000000" // /* MW 10 */ + 9303 "00000000" // /* MW 9 */ + 9304 "10000000" // /* MW 8 */ + 9305 "00010001" // /* MW 7 */ + 9306 "00000110" // /* MW 6 */ + 9307 "00100001" // /* MW 5 */ + 9308 "00000000" // /* MW 4 */ + 9309 "11110000" // /* MW 3 */ + 9310 "00101100" // /* MW 2 */ + 9311 "00000000" // /* MW 1 */ +.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 7 "superkernels.cpp" 193 47 +.src_ref 7 "superkernels.cpp" 195 2 + 9312 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9313 "00000000" // /* MW 15 */ + 9314 "00000000" // /* MW 14 */ + 9315 "00010000" // /* MW 13 */ + 9316 "00101010" // /* MW 12 */ + 9317 "10110010" // /* MW 11 */ + 9318 "11110000" // /* MW 10 */ + 9319 "00000001" // /* MW 9 */ + 9320 "00000000" // /* MW 8 */ + 9321 "10001011" // /* MW 7 */ + 9322 "10000000" // /* MW 6 */ + 9323 "00100010" // /* MW 5 */ + 9324 "00000000" // /* MW 4 */ + 9325 "11110000" // /* MW 3 */ + 9326 "00101100" // /* MW 2 */ + 9327 "00000000" // /* MW 1 */ +.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 218 49 first + 9328 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9329 "00000000" // /* MW 7 */ + 9330 "11000011" // /* MW 6 */ + 9331 "10110011" // /* MW 5 */ + 9332 "00000011" // /* MW 4 */ + 9333 "01100000" // /* MW 3 */ + 9334 "10010001" // /* MW 2 */ + 9335 "01110011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 192 2 +.src_ref 1 "io_buffer_main.h" 218 49 + 9336 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9337 "00010000" // /* MW 9 */ + 9338 "00100000" // /* MW 8 */ + 9339 "00110010" // /* MW 7 */ + 9340 "11110000" // /* MW 6 */ + 9341 "00000001" // /* MW 5 */ + 9342 "00000000" // /* MW 4 */ + 9343 "11010000" // /* MW 3 */ + 9344 "11101110" // /* MW 2 */ + 9345 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 9346 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9347 "00010110" // /* MW 3 */ + 9348 "11111110" // /* MW 2 */ + 9349 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 9350 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9351 "00110110" // /* MW 3 */ + 9352 "11111110" // /* MW 2 */ + 9353 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 9354 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9355 "01010110" // /* MW 3 */ + 9356 "01000110" // /* MW 2 */ + 9357 "00000111" // /* MW 1 */ + 9358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9359 "00000000" // /* MW 1 */ + 9360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9361 "00000000" // /* MW 1 */ + 9362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9363 "00000000" // /* MW 1 */ + 9364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9365 "00000000" // /* MW 1 */ + 9366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9367 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 9368 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9369 "00000010" // /* MW 3 */ + 9370 "01100001" // /* MW 2 */ + 9371 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 9372 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9373 "00010001" // /* MW 3 */ + 9374 "00000110" // /* MW 2 */ + 9375 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 + 9376 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9377 "11111101" // /* MW 3 */ + 9378 "11100000" // /* MW 2 */ + 9379 "00010111" // /* MW 1 */ + 9380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9381 "00000000" // /* MW 1 */ + 9382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9383 "00000000" // /* MW 1 */ + 9384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9385 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 9386 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9387 "00001000" // /* MW 3 */ + 9388 "10010011" // /* MW 2 */ + 9389 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 193 45 + 9390 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9391 "10000001" // /* MW 5 */ + 9392 "10101101" // /* MW 4 */ + 9393 "10100111" // /* MW 3 */ + 9394 "00000000" // /* MW 2 */ + 9395 "00000100" // /* MW 1 */ + 9396 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9397 "00000000" // /* MW 1 */ + 9398 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9399 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 192 2 first + 9400 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9401 "00110110" // /* MW 3 */ + 9402 "00000110" // /* MW 2 */ + 9403 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 1 "io_buffer_main.h" 324 51 + 9404 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9405 "10000001" // /* MW 5 */ + 9406 "11011101" // /* MW 4 */ + 9407 "11011100" // /* MW 3 */ + 9408 "11001010" // /* MW 2 */ + 9409 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 193 47 first + 9410 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9411 "01110110" // /* MW 3 */ + 9412 "00000110" // /* MW 2 */ + 9413 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 9414 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9415 "10011110" // /* MW 3 */ + 9416 "01011100" // /* MW 2 */ + 9417 "00000111" // /* MW 1 */ + 9418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9419 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 195 2 first +.no_stack_arguments + 9420 "00000100" // JL #8848 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8848 delay_slots=5 */ + 9421 "00000001" // /* MW 5 */ + 9422 "00000000" // /* MW 4 */ + 9423 "01001000" // /* MW 3 */ + 9424 "00010001" // /* MW 2 */ + 9425 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9427 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 192 2 first +.delay_slot + 9428 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9429 "00000111" // /* MW 3 */ + 9430 "01100010" // /* MW 2 */ + 9431 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 192 2 +.delay_slot + 9432 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9433 "00110001" // /* MW 3 */ + 9434 "00000110" // /* MW 2 */ + 9435 "00001000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 193 45 first +.delay_slot + 9436 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9437 "00001101" // /* MW 3 */ + 9438 "11100001" // /* MW 2 */ + 9439 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 193 45 +.delay_slot + 9440 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9441 "00000000" // /* MW 15 */ + 9442 "00000000" // /* MW 14 */ + 9443 "10101000" // /* MW 13 */ + 9444 "10100000" // /* MW 12 */ + 9445 "00110100" // /* MW 11 */ + 9446 "00000000" // /* MW 10 */ + 9447 "00000000" // /* MW 9 */ + 9448 "00000000" // /* MW 8 */ + 9449 "01011011" // /* MW 7 */ + 9450 "00000001" // /* MW 6 */ + 9451 "00100000" // /* MW 5 */ + 9452 "00000000" // /* MW 4 */ + 9453 "11110000" // /* MW 3 */ + 9454 "00101100" // /* MW 2 */ + 9455 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 6 +.src_ref 7 "superkernels.cpp" 198 14 +.src_ref 1 "io_buffer_main.h" 324 51 first +.return_address + 9456 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9457 "00010000" // /* MW 9 */ + 9458 "00100000" // /* MW 8 */ + 9459 "00110010" // /* MW 7 */ + 9460 "11110011" // /* MW 6 */ + 9461 "00000001" // /* MW 5 */ + 9462 "00000000" // /* MW 4 */ + 9463 "11010000" // /* MW 3 */ + 9464 "11000110" // /* MW 2 */ + 9465 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 + 9466 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9467 "00000101" // /* MW 3 */ + 9468 "00100000" // /* MW 2 */ + 9469 "00010000" // /* MW 1 */ + 9470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9471 "00000000" // /* MW 1 */ + 9472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9473 "00000000" // /* MW 1 */ + 9474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9475 "00000000" // /* MW 1 */ + 9476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9477 "00000000" // /* MW 1 */ + 9478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9479 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 9480 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9481 "00001000" // /* MW 3 */ + 9482 "01010001" // /* MW 2 */ + 9483 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 19 +.src_ref 1 "io_buffer_main.h" 327 40 first + 9484 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9485 "00010000" // /* MW 9 */ + 9486 "00110000" // /* MW 8 */ + 9487 "00110010" // /* MW 7 */ + 9488 "11110001" // /* MW 6 */ + 9489 "00000001" // /* MW 5 */ + 9490 "00000000" // /* MW 4 */ + 9491 "11010000" // /* MW 3 */ + 9492 "11001110" // /* MW 2 */ + 9493 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 6 first + 9494 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9495 "00110110" // /* MW 3 */ + 9496 "00000110" // /* MW 2 */ + 9497 "00000110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 19 + 9498 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9499 "01010110" // /* MW 3 */ + 9500 "00000110" // /* MW 2 */ + 9501 "00000010" // /* MW 1 */ + 9502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9503 "00000000" // /* MW 1 */ + 9504 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9505 "00000000" // /* MW 1 */ + 9506 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9507 "00000000" // /* MW 1 */ + 9508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9509 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 9510 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9511 "00110001" // /* MW 3 */ + 9512 "00100001" // /* MW 2 */ + 9513 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 9514 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9515 "00010001" // /* MW 3 */ + 9516 "11100110" // /* MW 2 */ + 9517 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 16 first + 9518 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9519 "00101000" // /* MW 3 */ + 9520 "01100001" // /* MW 2 */ + 9521 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 6 + 9522 "10000100" // JNZ r16, #9552 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9552 delay_slots=5 */ + 9523 "00000001" // /* MW 5 */ + 9524 "01000000" // /* MW 4 */ + 9525 "10101000" // /* MW 3 */ + 9526 "00010010" // /* MW 2 */ + 9527 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9529 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9531 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9533 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9535 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9536 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9537 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 198 14 + 9538 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9539 "00000001" // /* MW 3 */ + 9540 "00100000" // /* MW 2 */ + 9541 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 198 14 first + 9542 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9543 "00000000" // /* MW 9 */ + 9544 "00000000" // /* MW 8 */ + 9545 "00000000" // /* MW 7 */ + 9546 "10000000" // /* MW 6 */ + 9547 "00010001" // /* MW 5 */ + 9548 "00000110" // /* MW 4 */ + 9549 "11110110" // /* MW 3 */ + 9550 "00101100" // /* MW 2 */ + 9551 "00000000" // /* MW 1 */ +.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 7 "superkernels.cpp" 200 + 9552 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9553 "00111001" // /* MW 3 */ + 9554 "11110100" // /* MW 2 */ + 9555 "00000111" // /* MW 1 */ + 9556 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9557 "00011001" // /* MW 3 */ + 9558 "11111011" // /* MW 2 */ + 9559 "00000111" // /* MW 1 */ + 9560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9561 "00000000" // /* MW 1 */ + 9562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9563 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 9564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9565 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 9566 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9567 "11110001" // /* MW 3 */ + 9568 "11111101" // /* MW 2 */ + 9569 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9571 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 200 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9572 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9573 "00000000" // /* MW 3 */ + 9574 "00101000" // /* MW 2 */ + 9575 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9576 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9577 "10100000" // /* MW 3 */ + 9578 "01100111" // /* MW 2 */ + 9579 "00011111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 200 +.delay_slot + 9580 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9581 "00000001" // /* MW 5 */ + 9582 "00000000" // /* MW 4 */ + 9583 "00000000" // /* MW 3 */ + 9584 "11111000" // /* MW 2 */ + 9585 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9587 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9589 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 9591 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv +.function shared_setup_backbone _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv +.src_ref 3 "elementwise_binary_shared.h" 205 first +.src_ref 3 "elementwise_binary_shared.h" 211 24 first +.src_ref 3 "elementwise_binary_shared.h" 216 36 +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.function_start + 9600 "10111010" // LDA el0, [p1], #4; MOVX r2, #256; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9601 "01011000" // /* MW 9 */ + 9602 "00000000" // /* MW 8 */ + 9603 "00001000" // /* MW 7 */ + 9604 "00001011" // /* MW 6 */ + 9605 "00100000" // /* MW 5 */ + 9606 "00001000" // /* MW 4 */ + 9607 "11010000" // /* MW 3 */ + 9608 "10000101" // /* MW 2 */ + 9609 "00100011" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 36 + 9610 "00011000" // MOVX r0, #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9611 "00000001" // /* MW 3 */ + 9612 "10000000" // /* MW 2 */ + 9613 "00010111" // /* MW 1 */ + 9614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9615 "00000000" // /* MW 1 */ + 9616 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9617 "00000000" // /* MW 1 */ + 9618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9619 "00000000" // /* MW 1 */ + 9620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9621 "00000000" // /* MW 1 */ + 9622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9623 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 211 22 first + 9624 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9625 "00101001" // /* MW 3 */ + 9626 "00011100" // /* MW 2 */ + 9627 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 212 24 first + 9628 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9629 "00101110" // /* MW 3 */ + 9630 "00011100" // /* MW 2 */ + 9631 "00000001" // /* MW 1 */ + 9632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9633 "00000000" // /* MW 1 */ + 9634 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9635 "00000000" // /* MW 1 */ + 9636 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9637 "00000000" // /* MW 1 */ + 9638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9639 "00000000" // /* MW 1 */ + 9640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9641 "00000000" // /* MW 1 */ + 9642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9643 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 212 22 + 9644 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9645 "00101001" // /* MW 3 */ + 9646 "00011100" // /* MW 2 */ + 9647 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 213 24 first + 9648 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9649 "00101110" // /* MW 3 */ + 9650 "00000100" // /* MW 2 */ + 9651 "00000001" // /* MW 1 */ + 9652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9653 "00000000" // /* MW 1 */ + 9654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9655 "00000000" // /* MW 1 */ + 9656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9657 "00000000" // /* MW 1 */ + 9658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9659 "00000000" // /* MW 1 */ + 9660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9661 "00000000" // /* MW 1 */ + 9662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9663 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 213 22 + 9664 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9665 "00101001" // /* MW 3 */ + 9666 "00011100" // /* MW 2 */ + 9667 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 214 24 first + 9668 "10011000" // LDA r3, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9669 "01110110" // /* MW 3 */ + 9670 "00010100" // /* MW 2 */ + 9671 "00000001" // /* MW 1 */ + 9672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9673 "00000000" // /* MW 1 */ + 9674 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9675 "00000000" // /* MW 1 */ + 9676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9677 "00000000" // /* MW 1 */ + 9678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9679 "00000000" // /* MW 1 */ + 9680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9681 "00000000" // /* MW 1 */ + 9682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9683 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 214 22 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9684 "10011000" // ST r3, [p0], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9685 "01110001" // /* MW 3 */ + 9686 "01001100" // /* MW 2 */ + 9687 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 34 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9688 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9689 "00010111" // /* MW 3 */ + 9690 "00000100" // /* MW 2 */ + 9691 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 217 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9692 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9693 "00000000" // /* MW 3 */ + 9694 "00101000" // /* MW 2 */ + 9695 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9696 "01000100" // MOVXM r1, #65280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9697 "00000000" // /* MW 5 */ + 9698 "10111110" // /* MW 4 */ + 9699 "11110000" // /* MW 3 */ + 9700 "00000000" // /* MW 2 */ + 9701 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9702 "10011000" // AND r1, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9703 "00010100" // /* MW 3 */ + 9704 "11000010" // /* MW 2 */ + 9705 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9706 "10011000" // EQ r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9707 "00100111" // /* MW 3 */ + 9708 "01110110" // /* MW 2 */ + 9709 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 36 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9710 "00011000" // SEL.EQZ r0, r0, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9711 "10000010" // /* MW 3 */ + 9712 "00000001" // /* MW 2 */ + 9713 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_end0 + 9715 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv +.function setup _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv +.src_ref 3 "elementwise_binary_shared.h" 219 +.src_ref 3 "elementwise_binary_shared.h" 219 first +.function_start + 9728 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9729 "00000001" // /* MW 5 */ + 9730 "00000000" // /* MW 4 */ + 9731 "00000000" // /* MW 3 */ + 9732 "00001000" // /* MW 2 */ + 9733 "00000000" // /* MW 1 */ + 9734 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9735 "00111101" // /* MW 3 */ + 9736 "11111000" // /* MW 2 */ + 9737 "00001111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 220 8 first +.no_stack_arguments + 9738 "00000100" // JL #9600 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9600 delay_slots=5 */ + 9739 "00000001" // /* MW 5 */ + 9740 "00000000" // /* MW 4 */ + 9741 "11000000" // /* MW 3 */ + 9742 "00010010" // /* MW 2 */ + 9743 "00000000" // /* MW 1 */ +.delay_slot + 9744 "10011000" // ST p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9745 "10011101" // /* MW 3 */ + 9746 "11111111" // /* MW 2 */ + 9747 "00001111" // /* MW 1 */ +.src_ref 8 "mul_impl.h" 193 25 +.delay_slot + 9748 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9749 "11000000" // /* MW 3 */ + 9750 "01100000" // /* MW 2 */ + 9751 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9753 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9755 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9756 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9757 "01100111" // /* MW 3 */ + 9758 "00000001" // /* MW 2 */ + 9759 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 222 4 +.return_address + 9760 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9761 "00111001" // /* MW 3 */ + 9762 "11111000" // /* MW 2 */ + 9763 "00000111" // /* MW 1 */ + 9764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9765 "00000000" // /* MW 1 */ + 9766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9767 "00000000" // /* MW 1 */ + 9768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9769 "00000000" // /* MW 1 */ + 9770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9771 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9773 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9774 "00011000" // LDA p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9775 "10011001" // /* MW 3 */ + 9776 "11111111" // /* MW 2 */ + 9777 "00000111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 222 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9778 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9779 "00000000" // /* MW 3 */ + 9780 "00101000" // /* MW 2 */ + 9781 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9783 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9785 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9787 "00000000" // /* MW 1 */ +.src_ref 8 "mul_impl.h" 193 25 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9788 "00011000" // MOVX r16, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9789 "00001001" // /* MW 3 */ + 9790 "00100000" // /* MW 2 */ + 9791 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 222 4 +.src_ref 8 "mul_impl.h" 193 25 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9792 "00111010" // ST r16, [p7, #16]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9793 "01110001" // /* MW 9 */ + 9794 "00000000" // /* MW 8 */ + 9795 "00000000" // /* MW 7 */ + 9796 "00000000" // /* MW 6 */ + 9797 "11111110" // /* MW 5 */ + 9798 "00111111" // /* MW 4 */ + 9799 "00110000" // /* MW 3 */ + 9800 "11000010" // /* MW 2 */ +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + 9801 "11101000" // /* MW 1 */ +.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE +.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_begin0 +.function shared_run_backbone _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE +.src_ref 3 "elementwise_binary_shared.h" 107 first +.src_ref 3 "elementwise_binary_shared.h" 119 37 +.src_ref 3 "elementwise_binary_shared.h" 126 34 +.src_ref 3 "elementwise_binary_shared.h" 131 19 +.function_start + 9808 "11111000" // MOV r0, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9809 "11000000" // /* MW 3 */ + 9810 "00010110" // /* MW 2 */ + 9811 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 119 37 first + 9812 "00011000" // ADD.NC p3, r0, #14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9813 "00000111" // /* MW 3 */ + 9814 "01100000" // /* MW 2 */ + 9815 "00011011" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 122 22 first + 9816 "10011000" // LDA.s16 r2, [p3], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9817 "01010010" // /* MW 3 */ + 9818 "00011100" // /* MW 2 */ + 9819 "00000011" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 124 15 first + 9820 "10011000" // LDA r4, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9821 "10010110" // /* MW 3 */ + 9822 "00000100" // /* MW 2 */ + 9823 "00000011" // /* MW 1 */ + 9824 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9825 "00000000" // /* MW 1 */ + 9826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9827 "00000000" // /* MW 1 */ + 9828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9829 "00000000" // /* MW 1 */ + 9830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9831 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 124 26 + 9832 "00011000" // MOVX r3, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9833 "00001001" // /* MW 3 */ + 9834 "00000110" // /* MW 2 */ + 9835 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 107 + 9836 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9837 "00000001" // /* MW 5 */ + 9838 "00000000" // /* MW 4 */ + 9839 "00000000" // /* MW 3 */ + 9840 "00010000" // /* MW 2 */ + 9841 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 124 26 + 9842 "10011000" // LTU r3, r3, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9843 "01001100" // /* MW 3 */ + 9844 "11000110" // /* MW 2 */ + 9845 "00010000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 65 25 +.src_ref 3 "elementwise_binary_shared.h" 124 8 + 9846 "10111010" // MOVA r1, #0; JNZ r3, #10000 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10000 delay_slots=5 */ + 9847 "01100000" // /* MW 9 */ + 9848 "00000000" // /* MW 8 */ + 9849 "00010000" // /* MW 7 */ + 9850 "11100010" // /* MW 6 */ + 9851 "00000100" // /* MW 5 */ + 9852 "00000110" // /* MW 4 */ + 9853 "00000000" // /* MW 3 */ + 9854 "00000001" // /* MW 2 */ + 9855 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 65 25 first +.delay_slot + 9856 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9857 "01110010" // /* MW 3 */ + 9858 "00000101" // /* MW 2 */ + 9859 "00011000" // /* MW 1 */ +.delay_slot + 9860 "11111000" // MOV r1, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9861 "11000000" // /* MW 3 */ + 9862 "01011110" // /* MW 2 */ + 9863 "00011000" // /* MW 1 */ +.delay_slot + 9864 "11111000" // MOV p7, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9865 "11100000" // /* MW 3 */ + 9866 "01100101" // /* MW 2 */ + 9867 "00011111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.delay_slot + 9868 "11110100" // PADDB [p7], #-64; MOV p5, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9869 "10000001" // /* MW 5 */ + 9870 "11011101" // /* MW 4 */ + 9871 "00001010" // /* MW 3 */ + 9872 "11110010" // /* MW 2 */ + 9873 "11111111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 119 37 first +.delay_slot + 9874 "00011000" // VST x0, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9875 "00010011" // /* MW 3 */ + 9876 "00000100" // /* MW 2 */ + 9877 "00001111" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first +.src_ref 3 "elementwise_binary_shared.h" 126 34 +.src_ref 3 "elementwise_binary_shared.h" 126 34 +.src_ref 3 "elementwise_binary_shared.h" 131 19 +.src_ref 3 "elementwise_binary_shared.h" 131 19 + 9878 "10111010" // MOVA dj0, #12; MOVS p4, r0; VBCST.16 x0, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9879 "01110010" // /* MW 9 */ + 9880 "10111001" // /* MW 8 */ + 9881 "00000100" // /* MW 7 */ + 9882 "00000000" // /* MW 6 */ + 9883 "00001011" // /* MW 5 */ + 9884 "10000000" // /* MW 4 */ + 9885 "10000100" // /* MW 3 */ + 9886 "10000010" // /* MW 2 */ + 9887 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 126 34 first +.src_ref 3 "elementwise_binary_shared.h" 131 19 first +.src_ref 3 "elementwise_binary_shared.h" 171 16 + 9888 "01010100" // LDA.u8 r0, [p4, dj0]; MOV m2, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9889 "00000001" // /* MW 5 */ + 9890 "00000001" // /* MW 4 */ + 9891 "01010100" // /* MW 3 */ + 9892 "00000001" // /* MW 2 */ + 9893 "10000000" // /* MW 1 */ + 9894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9895 "00000000" // /* MW 1 */ + 9896 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9897 "00000000" // /* MW 1 */ + 9898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9899 "00000000" // /* MW 1 */ + 9900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9901 "00000000" // /* MW 1 */ + 9902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9903 "00000000" // /* MW 1 */ + 9904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9905 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 131 12 +.src_ref 3 "elementwise_binary_shared.h" 131 35 + 9906 "10000100" // JNZ r0, #9952 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9952 delay_slots=5 */ + 9907 "00000001" // /* MW 5 */ + 9908 "01000000" // /* MW 4 */ + 9909 "01110000" // /* MW 3 */ + 9910 "00010011" // /* MW 2 */ + 9911 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary_shared.h" 173 18 +.delay_slot + 9912 "10111000" // MOV m0, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9913 "00000000" // /* MW 3 */ + 9914 "00000000" // /* MW 2 */ + 9915 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 166 31 +.delay_slot + 9916 "01000100" // MOVXM p4, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9917 "11010000" // /* MW 5 */ + 9918 "11001000" // /* MW 4 */ + 9919 "11001000" // /* MW 3 */ + 9920 "00000111" // /* MW 2 */ + 9921 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9923 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9925 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9927 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 169 16 + 9928 "10111010" // MOVA m1, #0; J #9968 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=9968 delay_slots=5 */ + 9929 "00100000" // /* MW 9 */ + 9930 "00000000" // /* MW 8 */ + 9931 "00000000" // /* MW 7 */ + 9932 "11011110" // /* MW 6 */ + 9933 "00000100" // /* MW 5 */ + 9934 "00000000" // /* MW 4 */ + 9935 "10000000" // /* MW 3 */ + 9936 "00000100" // /* MW 2 */ + 9937 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9939 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9941 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9943 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9945 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.delay_slot + 9946 "00001100" // NOPA; VST x0, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9947 "00100110" // /* MW 5 */ + 9948 "00001000" // /* MW 4 */ + 9949 "11110000" // /* MW 3 */ + 9950 "00101100" // /* MW 2 */ + 9951 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_144 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 169 16 + 9952 "10111000" // MOV m1, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9953 "10000000" // /* MW 3 */ + 9954 "00000000" // /* MW 2 */ + 9955 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "elementwise_binary_shared.h" 171 16 + 9956 "11110110" // NOPA; NOPB; VST x0, [p1]; MOV m2, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9957 "01010000" // /* MW 11 */ + 9958 "00000000" // /* MW 10 */ + 9959 "00000000" // /* MW 9 */ + 9960 "00000001" // /* MW 8 */ + 9961 "00010011" // /* MW 7 */ + 9962 "00000100" // /* MW 6 */ + 9963 "00100001" // /* MW 5 */ + 9964 "00000000" // /* MW 4 */ + 9965 "11110000" // /* MW 3 */ + 9966 "00101100" // /* MW 2 */ + 9967 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_160 + 9968 "10000100" // J #10128 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10128 delay_slots=5 */ + 9969 "00000000" // /* MW 5 */ + 9970 "00000000" // /* MW 4 */ + 9971 "11001000" // /* MW 3 */ + 9972 "00010011" // /* MW 2 */ + 9973 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary_shared.h" 169 16 +.src_ref 3 "elementwise_binary_shared.h" 173 18 +.delay_slot + 9974 "00000010" // MOVS p0, p7; MOV p7, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9975 "01110000" // /* MW 7 */ + 9976 "01100000" // /* MW 6 */ + 9977 "10110000" // /* MW 5 */ + 9978 "00000011" // /* MW 4 */ + 9979 "01100000" // /* MW 3 */ + 9980 "10010001" // /* MW 2 */ + 9981 "00010011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9983 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9985 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9987 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9988 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9989 "10000001" // /* MW 11 */ + 9990 "10101101" // /* MW 10 */ + 9991 "00000000" // /* MW 9 */ + 9992 "00000000" // /* MW 8 */ + 9993 "00000000" // /* MW 7 */ + 9994 "00000000" // /* MW 6 */ + 9995 "00100000" // /* MW 5 */ + 9996 "00000000" // /* MW 4 */ + 9997 "11110000" // /* MW 3 */ + 9998 "00101100" // /* MW 2 */ + 9999 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_192 +.src_ref 3 "elementwise_binary_shared.h" 150 97 + 10000 "00011000" // MOVX r2, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10001 "00001101" // /* MW 3 */ + 10002 "00000100" // /* MW 2 */ + 10003 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 97 first + 10004 "10011000" // EQ r2, r2, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10005 "01000111" // /* MW 3 */ + 10006 "10000100" // /* MW 2 */ + 10007 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 + 10008 "10000100" // JNZ r2, #10048 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10048 delay_slots=5 */ + 10009 "00000001" // /* MW 5 */ + 10010 "01000000" // /* MW 4 */ + 10011 "10100000" // /* MW 3 */ + 10012 "00010011" // /* MW 2 */ + 10013 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.delay_slot + 10014 "01000100" // MOVXM r0, #1065353216 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10015 "00000000" // /* MW 5 */ + 10016 "00100000" // /* MW 4 */ + 10017 "00000000" // /* MW 3 */ + 10018 "10000000" // /* MW 2 */ + 10019 "00111111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.src_ref 3 "elementwise_binary_shared.h" 166 31 +.delay_slot + 10020 "01000100" // MOVXM p4, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10021 "11010000" // /* MW 5 */ + 10022 "11001000" // /* MW 4 */ + 10023 "11001000" // /* MW 3 */ + 10024 "00000111" // /* MW 2 */ + 10025 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10027 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10029 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10031 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 10032 "11100001" // NOPA; NOPB; NOPS; MOVXM r0, #-1082130432; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10033 "00000000" // /* MW 15 */ + 10034 "00000000" // /* MW 14 */ + 10035 "00010000" // /* MW 13 */ + 10036 "00000000" // /* MW 12 */ + 10037 "00001000" // /* MW 11 */ + 10038 "00000000" // /* MW 10 */ + 10039 "11100000" // /* MW 9 */ + 10040 "00101111" // /* MW 8 */ + 10041 "01011011" // /* MW 7 */ + 10042 "00000001" // /* MW 6 */ + 10043 "00100000" // /* MW 5 */ + 10044 "00000000" // /* MW 4 */ + 10045 "11110000" // /* MW 3 */ + 10046 "00101100" // /* MW 2 */ + 10047 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_240 +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10048 "10111010" // LDA.s8 r0, [p4]; MOVX vaddSign0, #1; MOV dj0, #-66 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10049 "01011000" // /* MW 9 */ + 10050 "10111110" // /* MW 8 */ + 10051 "01000111" // /* MW 7 */ + 10052 "00000000" // /* MW 6 */ + 10053 "11010010" // /* MW 5 */ + 10054 "00000010" // /* MW 4 */ + 10055 "01010000" // /* MW 3 */ + 10056 "10000000" // /* MW 2 */ + 10057 "10000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary_shared.h" 173 18 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10058 "10111000" // MOV m0, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10059 "10000000" // /* MW 3 */ + 10060 "00000000" // /* MW 2 */ + 10061 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 169 16 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10062 "10111000" // MOV m1, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10063 "00000000" // /* MW 3 */ + 10064 "00000000" // /* MW 2 */ + 10065 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 171 16 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10066 "10111000" // MOV m2, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10067 "10000000" // /* MW 3 */ + 10068 "00000000" // /* MW 2 */ + 10069 "00011010" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10071 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10072 "01111000" // VINSERT.32 x0, x0, #0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10073 "00010001" // /* MW 3 */ + 10074 "00000000" // /* MW 2 */ + 10075 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10076 "11010100" // ST.s16 r0, [p5, dj0]; VMOV bmll1, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10077 "00100101" // /* MW 5 */ + 10078 "00000001" // /* MW 4 */ + 10079 "11100010" // /* MW 3 */ + 10080 "00000010" // /* MW 2 */ + 10081 "10100000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10082 "00011000" // MOVX crRnd, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10083 "10000000" // /* MW 3 */ + 10084 "00111010" // /* MW 2 */ + 10085 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10086 "00011000" // VCONV.bf16.fp32 wl0, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10087 "10010110" // /* MW 3 */ + 10088 "01000000" // /* MW 2 */ + 10089 "00001000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10091 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10092 "10111000" // VEXTRACT.16 r0, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10093 "00000001" // /* MW 3 */ + 10094 "00000001" // /* MW 2 */ + 10095 "00011000" // /* MW 1 */ + 10096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10097 "00000000" // /* MW 1 */ + 10098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10099 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 + 10100 "10011000" // LDA.s16 r0, [p5, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10101 "00010010" // /* MW 3 */ + 10102 "00000000" // /* MW 2 */ + 10103 "00000101" // /* MW 1 */ + 10104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10105 "00000000" // /* MW 1 */ + 10106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10107 "00000000" // /* MW 1 */ + 10108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10109 "00000000" // /* MW 1 */ + 10110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10111 "00000000" // /* MW 1 */ + 10112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10113 "00000000" // /* MW 1 */ + 10114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10115 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first + 10116 "11111000" // VBCST.16 x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10117 "01110010" // /* MW 3 */ + 10118 "00000001" // /* MW 2 */ + 10119 "00011000" // /* MW 1 */ + 10120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10121 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first + 10122 "00001100" // NOPA; VST x0, [sp, #-64] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10123 "01100110" // /* MW 5 */ + 10124 "11111000" // /* MW 4 */ + 10125 "11111111" // /* MW 3 */ + 10126 "00101100" // /* MW 2 */ + 10127 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_320 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary_shared.h" 166 4 first +.src_ref 3 "elementwise_binary_shared.h" 166 31 first +.src_ref 3 "elementwise_binary_shared.h" 169 16 first + 10128 "10110110" // LDA r2, [p3, #-16]; VLDB x1, [p7], m1; MOVXM ls, #10240 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10129 "00010000" // /* MW 11 */ + 10130 "00000000" // /* MW 10 */ + 10131 "01111100" // /* MW 9 */ + 10132 "00001000" // /* MW 8 */ + 10133 "00000000" // /* MW 7 */ + 10134 "00000000" // /* MW 6 */ + 10135 "11101000" // /* MW 5 */ + 10136 "01010000" // /* MW 4 */ + 10137 "11011110" // /* MW 3 */ + 10138 "10001010" // /* MW 2 */ + 10139 "01111000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 166 4 +.src_ref 3 "elementwise_binary_shared.h" 166 31 +.src_ref 3 "elementwise_binary_shared.h" 171 16 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 10140 "10110110" // MOVA r3, #-5; VLDB x0, [p1], m2; MOVXM le, #10288 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10141 "00010000" // /* MW 11 */ + 10142 "00011000" // /* MW 10 */ + 10143 "10111100" // /* MW 9 */ + 10144 "00001001" // /* MW 8 */ + 10145 "00000000" // /* MW 7 */ + 10146 "00000000" // /* MW 6 */ + 10147 "01101000" // /* MW 5 */ + 10148 "10010000" // /* MW 4 */ + 10149 "00000010" // /* MW 3 */ + 10150 "01100011" // /* MW 2 */ + 10151 "11111111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary_shared.h" 169 16 first +.src_ref 3 "elementwise_binary_shared.h" 173 18 first +.src_ref 3 "elementwise_binary_shared.h" 177 44 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10152 "00010010" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;VLDB x1, [p7], m1; MOVX r0, #60 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10153 "11110001" // /* MW 7 */ + 10154 "00000000" // /* MW 6 */ + 10155 "11101000" // /* MW 5 */ + 10156 "01010000" // /* MW 4 */ + 10157 "01111110" // /* MW 3 */ + 10158 "00000101" // /* MW 2 */ + 10159 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 166 31 first +.src_ref 3 "elementwise_binary_shared.h" 171 16 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10160 "00111100" // LDA.s8 r4, [p4]; VLDB x0, [p1], m2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10161 "01101000" // /* MW 5 */ + 10162 "10010000" // /* MW 4 */ + 10163 "01010010" // /* MW 3 */ + 10164 "10010000" // /* MW 2 */ + 10165 "10000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10167 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary_shared.h" 173 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10168 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10169 "00101011" // /* MW 3 */ + 10170 "00001000" // /* MW 2 */ + 10171 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10173 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 166 31 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10174 "10011000" // LSHL r2, r2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10175 "00111101" // /* MW 3 */ + 10176 "10000100" // /* MW 2 */ + 10177 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 166 4 +.src_ref 3 "elementwise_binary_shared.h" 177 44 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10178 "01100010" // ADD.NC lc, r2, #-3; VMAC.f dm1, dm0, x1, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10179 "00000001" // /* MW 7 */ + 10180 "00000010" // /* MW 6 */ + 10181 "00000001" // /* MW 5 */ + 10182 "10000110" // /* MW 4 */ + 10183 "01111110" // /* MW 3 */ + 10184 "01110001" // /* MW 2 */ + 10185 "00000101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary_shared.h" 169 16 first +.src_ref 3 "elementwise_binary_shared.h" 171 16 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10186 "00111100" // VLDA x0, [p1], m2; VLDB x1, [p7], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10187 "11101000" // /* MW 5 */ + 10188 "01010000" // /* MW 4 */ + 10189 "01111110" // /* MW 3 */ + 10190 "00000011" // /* MW 2 */ + 10191 "00101001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary_shared.h" 173 18 first +.src_ref 3 "elementwise_binary_shared.h" 185 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10192 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; MOVX crRnd, r4; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10193 "00000000" // /* MW 15 */ + 10194 "00000000" // /* MW 14 */ + 10195 "01111000" // /* MW 13 */ + 10196 "10100101" // /* MW 12 */ + 10197 "00000001" // /* MW 11 */ + 10198 "00000000" // /* MW 10 */ + 10199 "11010100" // /* MW 9 */ + 10200 "00001001" // /* MW 8 */ + 10201 "01011011" // /* MW 7 */ + 10202 "00000001" // /* MW 6 */ + 10203 "00100000" // /* MW 5 */ + 10204 "00000000" // /* MW 4 */ + 10205 "01110000" // /* MW 3 */ + 10206 "00000101" // /* MW 2 */ + 10207 "00000001" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10208 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10209 "00000000" // /* MW 15 */ + 10210 "00000000" // /* MW 14 */ + 10211 "01111000" // /* MW 13 */ + 10212 "10100101" // /* MW 12 */ + 10213 "00000001" // /* MW 11 */ + 10214 "00000000" // /* MW 10 */ + 10215 "00000000" // /* MW 9 */ + 10216 "00000000" // /* MW 8 */ + 10217 "01011011" // /* MW 7 */ + 10218 "00000001" // /* MW 6 */ + 10219 "00100000" // /* MW 5 */ + 10220 "00000000" // /* MW 4 */ + 10221 "11110000" // /* MW 3 */ + 10222 "00101100" // /* MW 2 */ + 10223 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 177 44 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10224 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10225 "00010000" // /* MW 15 */ + 10226 "00001000" // /* MW 14 */ + 10227 "01111000" // /* MW 13 */ + 10228 "10100101" // /* MW 12 */ + 10229 "00000001" // /* MW 11 */ + 10230 "00000000" // /* MW 10 */ + 10231 "00000000" // /* MW 9 */ + 10232 "00000000" // /* MW 8 */ + 10233 "01011011" // /* MW 7 */ + 10234 "00000001" // /* MW 6 */ + 10235 "00100000" // /* MW 5 */ + 10236 "00000000" // /* MW 4 */ + 10237 "11110000" // /* MW 3 */ + 10238 "00101100" // /* MW 2 */ + 10239 "00000000" // /* MW 1 */ +.label ZLS_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_432 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary_shared.h" 169 16 first +.src_ref 3 "elementwise_binary_shared.h" 171 16 first +.begin_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 10240 "11100001" // VLDA x0, [p1], m2; VLDB x1, [p7], m1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10241 "00000000" // /* MW 15 */ + 10242 "00000000" // /* MW 14 */ + 10243 "01111000" // /* MW 13 */ + 10244 "10100101" // /* MW 12 */ + 10245 "00000001" // /* MW 11 */ + 10246 "00000000" // /* MW 10 */ + 10247 "00000000" // /* MW 9 */ + 10248 "00000000" // /* MW 8 */ + 10249 "01011011" // /* MW 7 */ + 10250 "00000001" // /* MW 6 */ + 10251 "11101000" // /* MW 5 */ + 10252 "01010000" // /* MW 4 */ + 10253 "01111110" // /* MW 3 */ + 10254 "00000011" // /* MW 2 */ + 10255 "00101001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary_shared.h" 173 18 first +.src_ref 3 "elementwise_binary_shared.h" 185 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10256 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10257 "00000000" // /* MW 15 */ + 10258 "00000000" // /* MW 14 */ + 10259 "01111000" // /* MW 13 */ + 10260 "10100101" // /* MW 12 */ + 10261 "00000001" // /* MW 11 */ + 10262 "00000000" // /* MW 10 */ + 10263 "00000000" // /* MW 9 */ + 10264 "00000000" // /* MW 8 */ + 10265 "10100011" // /* MW 7 */ + 10266 "00011100" // /* MW 6 */ + 10267 "00100010" // /* MW 5 */ + 10268 "00000000" // /* MW 4 */ + 10269 "01110000" // /* MW 3 */ + 10270 "00000101" // /* MW 2 */ + 10271 "00000001" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10272 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10273 "00000000" // /* MW 15 */ + 10274 "00000000" // /* MW 14 */ + 10275 "01111000" // /* MW 13 */ + 10276 "10100101" // /* MW 12 */ + 10277 "00000001" // /* MW 11 */ + 10278 "00000000" // /* MW 10 */ + 10279 "00000000" // /* MW 9 */ + 10280 "00000000" // /* MW 8 */ + 10281 "01011011" // /* MW 7 */ + 10282 "00000001" // /* MW 6 */ + 10283 "00100000" // /* MW 5 */ + 10284 "00000000" // /* MW 4 */ + 10285 "11110000" // /* MW 3 */ + 10286 "00101100" // /* MW 2 */ + 10287 "00000000" // /* MW 1 */ +.label ZLE_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_480 +.src_ref 3 "elementwise_binary_shared.h" 177 44 first +.end_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10288 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10289 "00010000" // /* MW 15 */ + 10290 "00001000" // /* MW 14 */ + 10291 "01111000" // /* MW 13 */ + 10292 "10100101" // /* MW 12 */ + 10293 "00000001" // /* MW 11 */ + 10294 "00000000" // /* MW 10 */ + 10295 "00000000" // /* MW 9 */ + 10296 "00000000" // /* MW 8 */ + 10297 "01011011" // /* MW 7 */ + 10298 "00000001" // /* MW 6 */ + 10299 "00100000" // /* MW 5 */ + 10300 "00000000" // /* MW 4 */ + 10301 "11110000" // /* MW 3 */ + 10302 "00101100" // /* MW 2 */ + 10303 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 187 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 10304 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10305 "00000001" // /* MW 5 */ + 10306 "00000000" // /* MW 4 */ + 10307 "00000000" // /* MW 3 */ + 10308 "11110000" // /* MW 2 */ + 10309 "11111111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary_shared.h" 185 18 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10310 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10311 "10100011" // /* MW 3 */ + 10312 "00011100" // /* MW 2 */ + 10313 "00001010" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 10314 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10315 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 177 44 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 10316 "01001000" // VMAC.f dm1, dm0, x1, x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10317 "00000001" // /* MW 3 */ + 10318 "00000010" // /* MW 2 */ + 10319 "00000001" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 10320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10321 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 187 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 10322 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10323 "00000000" // /* MW 3 */ + 10324 "00101000" // /* MW 2 */ + 10325 "00010000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary_shared.h" 185 18 first +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10326 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10327 "10100011" // /* MW 3 */ + 10328 "00011100" // /* MW 2 */ + 10329 "00001010" // /* MW 1 */ +.delay_slot + 10330 "11111000" // MOV p7, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10331 "10100000" // /* MW 3 */ + 10332 "01100000" // /* MW 2 */ + 10333 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10335 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary_shared.h" 185 18 +.delay_slot + 10336 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10337 "10100011" // /* MW 3 */ + 10338 "00011100" // /* MW 2 */ + 10339 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE__end +.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_end0 + 10341 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E +.function run _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E +.src_ref 4 "vector.hpp" 538 13 +.src_ref 3 "elementwise_binary_shared.h" 237 first +.src_ref 3 "elementwise_binary_shared.h" 244 19 +.src_ref 3 "elementwise_binary_shared.h" 245 12 +.src_ref 3 "elementwise_binary_shared.h" 247 12 +.src_ref 3 "elementwise_binary_shared.h" 250 4 +.function_start + 10352 "10111010" // MOVA dj0, #12; MOVS p3, p2; MOV dc0, lr /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10353 "01110010" // /* MW 9 */ + 10354 "11110000" // /* MW 8 */ + 10355 "01100000" // /* MW 7 */ + 10356 "00000000" // /* MW 6 */ + 10357 "10001011" // /* MW 5 */ + 10358 "10001000" // /* MW 4 */ + 10359 "10000011" // /* MW 3 */ + 10360 "10000010" // /* MW 2 */ + 10361 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 538 13 first +.src_ref 3 "elementwise_binary_shared.h" 244 19 first +.src_ref 3 "elementwise_binary_shared.h" 245 12 +.src_ref 3 "elementwise_binary_shared.h" 247 12 + 10362 "11010100" // LDA.u8 r0, [p2, dj0]; MOV p2, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10363 "10000001" // /* MW 5 */ + 10364 "11000101" // /* MW 4 */ + 10365 "01010100" // /* MW 3 */ + 10366 "00000001" // /* MW 2 */ + 10367 "01000000" // /* MW 1 */ + 10368 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10369 "00000000" // /* MW 1 */ + 10370 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10371 "00000000" // /* MW 1 */ + 10372 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10373 "00000000" // /* MW 1 */ + 10374 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10375 "00000000" // /* MW 1 */ + 10376 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10377 "00000000" // /* MW 1 */ + 10378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10379 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 244 12 +.src_ref 3 "elementwise_binary_shared.h" 244 35 + 10380 "10000100" // JZ r0, #10448 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10448 delay_slots=5 */ + 10381 "00000001" // /* MW 5 */ + 10382 "00000000" // /* MW 4 */ + 10383 "01101000" // /* MW 3 */ + 10384 "00010100" // /* MW 2 */ + 10385 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 237 +.delay_slot + 10386 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10387 "00000001" // /* MW 5 */ + 10388 "00000000" // /* MW 4 */ + 10389 "00000000" // /* MW 3 */ + 10390 "00001000" // /* MW 2 */ + 10391 "00000000" // /* MW 1 */ +.delay_slot + 10392 "11111000" // MOV r1, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10393 "11100000" // /* MW 3 */ + 10394 "01010101" // /* MW 2 */ + 10395 "00011000" // /* MW 1 */ +.delay_slot + 10396 "00011000" // ADD.NC p1, r1, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10397 "11100000" // /* MW 3 */ + 10398 "01100000" // /* MW 2 */ + 10399 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 538 13 +.src_ref 4 "vector_native_types.hpp" 374 137 first +.delay_slot + 10400 "00011000" // VST sfh, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10401 "00101011" // /* MW 3 */ + 10402 "00000111" // /* MW 2 */ + 10403 "00001001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10405 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 247 12 first +.no_stack_arguments + 10406 "00000100" // JL #9808 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9808 delay_slots=5 */ + 10407 "00000001" // /* MW 5 */ + 10408 "00000000" // /* MW 4 */ + 10409 "00101000" // /* MW 3 */ + 10410 "00010011" // /* MW 2 */ + 10411 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10413 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10415 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10417 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10419 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10420 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10421 "10000001" // /* MW 11 */ + 10422 "10101101" // /* MW 10 */ + 10423 "00000000" // /* MW 9 */ + 10424 "00000000" // /* MW 8 */ + 10425 "00000000" // /* MW 7 */ + 10426 "00000000" // /* MW 6 */ + 10427 "00100000" // /* MW 5 */ + 10428 "00000000" // /* MW 4 */ + 10429 "11110000" // /* MW 3 */ + 10430 "00101100" // /* MW 2 */ + 10431 "00000000" // /* MW 1 */ +.return_address + 10432 "10000100" // J #10480 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10480 delay_slots=5 */ + 10433 "00000000" // /* MW 5 */ + 10434 "00000000" // /* MW 4 */ + 10435 "01111000" // /* MW 3 */ + 10436 "00010100" // /* MW 2 */ + 10437 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10439 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10441 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10443 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10445 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10446 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10447 "00000000" // /* MW 1 */ +.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_96 +.src_ref 3 "elementwise_binary_shared.h" 245 12 first +.no_stack_arguments + 10448 "00000100" // JL #9808 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9808 delay_slots=5 */ + 10449 "00000001" // /* MW 5 */ + 10450 "00000000" // /* MW 4 */ + 10451 "00101000" // /* MW 3 */ + 10452 "00010011" // /* MW 2 */ + 10453 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 245 12 +.src_ref 3 "elementwise_binary_shared.h" 245 12 +.delay_slot + 10454 "00000010" // MOVS p0, p1; MOV p1, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10455 "01110000" // /* MW 7 */ + 10456 "01100000" // /* MW 6 */ + 10457 "10110000" // /* MW 5 */ + 10458 "00000000" // /* MW 4 */ + 10459 "01100000" // /* MW 3 */ + 10460 "10010001" // /* MW 2 */ + 10461 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10462 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10463 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10464 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10465 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10466 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10467 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10468 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10469 "10000001" // /* MW 11 */ + 10470 "10101101" // /* MW 10 */ + 10471 "00000000" // /* MW 9 */ + 10472 "00000000" // /* MW 8 */ + 10473 "00000000" // /* MW 7 */ + 10474 "00000000" // /* MW 6 */ + 10475 "00100000" // /* MW 5 */ + 10476 "00000000" // /* MW 4 */ + 10477 "11110000" // /* MW 3 */ + 10478 "00101100" // /* MW 2 */ + 10479 "00000000" // /* MW 1 */ +.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_128 +.src_ref 3 "elementwise_binary_shared.h" 250 4 +.return_address + 10480 "11111000" // MOV lr, dc0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10481 "10000000" // /* MW 3 */ + 10482 "01110001" // /* MW 2 */ + 10483 "00011111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 250 4 first + 10484 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10485 "00000000" // /* MW 3 */ + 10486 "00101000" // /* MW 2 */ + 10487 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 250 4 +.delay_slot + 10488 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10489 "00000001" // /* MW 5 */ + 10490 "00000000" // /* MW 4 */ + 10491 "00000000" // /* MW 3 */ + 10492 "11111000" // /* MW 2 */ + 10493 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10495 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10497 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10499 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_end0 + 10501 "00000000" // /* MW 1 */ +.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_mul1d_attribute_broadcasting _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 7 "superkernels.cpp" 205 first +.src_ref 7 "superkernels.cpp" 210 6 +.function_start + 10512 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10513 "10000000" // /* MW 5 */ + 10514 "11001000" // /* MW 4 */ + 10515 "11000110" // /* MW 3 */ + 10516 "00000111" // /* MW 2 */ + 10517 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 210 6 first + 10518 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10519 "11000001" // /* MW 5 */ + 10520 "10110101" // /* MW 4 */ + 10521 "11011000" // /* MW 3 */ + 10522 "11000010" // /* MW 2 */ + 10523 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 205 + 10524 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10525 "00000001" // /* MW 5 */ + 10526 "00000000" // /* MW 4 */ + 10527 "00000000" // /* MW 3 */ + 10528 "00001000" // /* MW 2 */ + 10529 "00000000" // /* MW 1 */ + 10530 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10531 "01110000" // /* MW 7 */ + 10532 "11010000" // /* MW 6 */ + 10533 "00001011" // /* MW 5 */ + 10534 "00000000" // /* MW 4 */ + 10535 "10110000" // /* MW 3 */ + 10536 "01100011" // /* MW 2 */ + 10537 "11111111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 207 11 + 10538 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10539 "00010001" // /* MW 9 */ + 10540 "00101000" // /* MW 8 */ + 10541 "00110010" // /* MW 7 */ + 10542 "11110011" // /* MW 6 */ + 10543 "00000001" // /* MW 5 */ + 10544 "00000000" // /* MW 4 */ + 10545 "10110000" // /* MW 3 */ + 10546 "10000010" // /* MW 2 */ + 10547 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 10548 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10549 "11000000" // /* MW 3 */ + 10550 "11010100" // /* MW 2 */ + 10551 "00011011" // /* MW 1 */ + 10552 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10553 "00000000" // /* MW 1 */ + 10554 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10555 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 210 6 +.src_ref 7 "superkernels.cpp" 210 16 + 10556 "10000100" // JNZ r16, #10720 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10720 delay_slots=5 */ + 10557 "00000001" // /* MW 5 */ + 10558 "01000000" // /* MW 4 */ + 10559 "11110000" // /* MW 3 */ + 10560 "00010100" // /* MW 2 */ + 10561 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 207 22 first +.delay_slot + 10562 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10563 "10010000" // /* MW 3 */ + 10564 "01100010" // /* MW 2 */ + 10565 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 207 30 +.delay_slot + 10566 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10567 "11111011" // /* MW 3 */ + 10568 "01100011" // /* MW 2 */ + 10569 "00010100" // /* MW 1 */ +.delay_slot + 10570 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10571 "00111101" // /* MW 3 */ + 10572 "11110100" // /* MW 2 */ + 10573 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 207 11 +.src_ref 1 "io_buffer_main.h" 125 25 +.delay_slot + 10574 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10575 "01110000" // /* MW 7 */ + 10576 "01100000" // /* MW 6 */ + 10577 "00110000" // /* MW 5 */ + 10578 "00000011" // /* MW 4 */ + 10579 "00110000" // /* MW 3 */ + 10580 "11000110" // /* MW 2 */ + 10581 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 213 4 +.src_ref 7 "superkernels.cpp" 224 2 +.delay_slot + 10582 "01000100" // MOVXM p0, #509184 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10583 "00000000" // /* MW 5 */ + 10584 "11001010" // /* MW 4 */ + 10585 "11000000" // /* MW 3 */ + 10586 "00000111" // /* MW 2 */ + 10587 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 10588 "01000100" // MOVXM p2, #509032 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10589 "11010000" // /* MW 5 */ + 10590 "11001000" // /* MW 4 */ + 10591 "11000100" // /* MW 3 */ + 10592 "00000111" // /* MW 2 */ + 10593 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10594 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10595 "00010000" // /* MW 9 */ + 10596 "00110010" // /* MW 8 */ + 10597 "00110010" // /* MW 7 */ + 10598 "11110001" // /* MW 6 */ + 10599 "00000001" // /* MW 5 */ + 10600 "00000000" // /* MW 4 */ + 10601 "11100000" // /* MW 3 */ + 10602 "11000000" // /* MW 2 */ + 10603 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10605 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 213 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 10606 "00000100" // JL #9728 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9728 delay_slots=5 */ + 10607 "00000001" // /* MW 5 */ + 10608 "00000000" // /* MW 4 */ + 10609 "00000000" // /* MW 3 */ + 10610 "00010011" // /* MW 2 */ + 10611 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10613 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10615 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10616 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10617 "00110001" // /* MW 3 */ + 10618 "00100000" // /* MW 2 */ + 10619 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 10620 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10621 "00000101" // /* MW 3 */ + 10622 "00100000" // /* MW 2 */ + 10623 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 10624 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10625 "00000000" // /* MW 15 */ + 10626 "00000000" // /* MW 14 */ + 10627 "01111000" // /* MW 13 */ + 10628 "10100101" // /* MW 12 */ + 10629 "00000001" // /* MW 11 */ + 10630 "00000000" // /* MW 10 */ + 10631 "00000000" // /* MW 9 */ + 10632 "10000000" // /* MW 8 */ + 10633 "00010001" // /* MW 7 */ + 10634 "00000110" // /* MW 6 */ + 10635 "00100010" // /* MW 5 */ + 10636 "00000000" // /* MW 4 */ + 10637 "11110000" // /* MW 3 */ + 10638 "00101100" // /* MW 2 */ + 10639 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 217 18 +.return_address + 10640 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10641 "10100000" // /* MW 5 */ + 10642 "11001000" // /* MW 4 */ + 10643 "11000100" // /* MW 3 */ + 10644 "00000111" // /* MW 2 */ + 10645 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 217 18 first +.src_ref 7 "superkernels.cpp" 217 65 + 10646 "10111010" // LDA r16, [p2]; MOVXM p2, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10647 "00010000" // /* MW 9 */ + 10648 "10000000" // /* MW 8 */ + 10649 "00110010" // /* MW 7 */ + 10650 "11110001" // /* MW 6 */ + 10651 "00000001" // /* MW 5 */ + 10652 "00000000" // /* MW 4 */ + 10653 "11010000" // /* MW 3 */ + 10654 "11000010" // /* MW 2 */ + 10655 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 215 51 +.src_ref 7 "superkernels.cpp" 217 65 +.src_ref 7 "superkernels.cpp" 224 2 + 10656 "10111010" // LDA r17, [p2]; MOVXM p2, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10657 "00010000" // /* MW 9 */ + 10658 "10000000" // /* MW 8 */ + 10659 "00110010" // /* MW 7 */ + 10660 "11110001" // /* MW 6 */ + 10661 "00000001" // /* MW 5 */ + 10662 "00000000" // /* MW 4 */ + 10663 "11010000" // /* MW 3 */ + 10664 "11000110" // /* MW 2 */ + 10665 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 215 51 first +.src_ref 7 "superkernels.cpp" 217 16 +.src_ref 7 "superkernels.cpp" 222 47 + 10666 "10111010" // LDA.u16 r18, [p2, #10]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10667 "00010000" // /* MW 9 */ + 10668 "00101010" // /* MW 8 */ + 10669 "10110010" // /* MW 7 */ + 10670 "11110000" // /* MW 6 */ + 10671 "00000001" // /* MW 5 */ + 10672 "00000000" // /* MW 4 */ + 10673 "01010000" // /* MW 3 */ + 10674 "11001011" // /* MW 2 */ + 10675 "01001010" // /* MW 1 */ + 10676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10677 "00000000" // /* MW 1 */ + 10678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10679 "00000000" // /* MW 1 */ + 10680 "10000100" // J #10736 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10736 delay_slots=5 */ + 10681 "00000000" // /* MW 5 */ + 10682 "00000000" // /* MW 4 */ + 10683 "11111000" // /* MW 3 */ + 10684 "00010100" // /* MW 2 */ + 10685 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 215 13 +.delay_slot + 10686 "01000100" // MOVXM p0, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10687 "11000000" // /* MW 5 */ + 10688 "11001000" // /* MW 4 */ + 10689 "11000000" // /* MW 3 */ + 10690 "00000111" // /* MW 2 */ + 10691 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10693 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 217 27 first +.delay_slot + 10694 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10695 "00001111" // /* MW 3 */ + 10696 "01100001" // /* MW 2 */ + 10697 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 215 13 first +.delay_slot + 10698 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10699 "10100011" // /* MW 5 */ + 10700 "00001100" // /* MW 4 */ + 10701 "11110000" // /* MW 3 */ + 10702 "00101100" // /* MW 2 */ + 10703 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 217 16 first +.delay_slot + 10704 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10705 "00000000" // /* MW 15 */ + 10706 "00000000" // /* MW 14 */ + 10707 "01111000" // /* MW 13 */ + 10708 "10100101" // /* MW 12 */ + 10709 "00000001" // /* MW 11 */ + 10710 "00000000" // /* MW 10 */ + 10711 "00000000" // /* MW 9 */ + 10712 "10000000" // /* MW 8 */ + 10713 "00010001" // /* MW 7 */ + 10714 "00000110" // /* MW 6 */ + 10715 "00100001" // /* MW 5 */ + 10716 "00000000" // /* MW 4 */ + 10717 "11110000" // /* MW 3 */ + 10718 "00101100" // /* MW 2 */ + 10719 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 7 "superkernels.cpp" 222 47 +.src_ref 7 "superkernels.cpp" 224 2 + 10720 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10721 "00000000" // /* MW 15 */ + 10722 "00000000" // /* MW 14 */ + 10723 "00010000" // /* MW 13 */ + 10724 "00101010" // /* MW 12 */ + 10725 "10110010" // /* MW 11 */ + 10726 "11110000" // /* MW 10 */ + 10727 "00000001" // /* MW 9 */ + 10728 "00000000" // /* MW 8 */ + 10729 "10001011" // /* MW 7 */ + 10730 "10000000" // /* MW 6 */ + 10731 "00100010" // /* MW 5 */ + 10732 "00000000" // /* MW 4 */ + 10733 "11110000" // /* MW 3 */ + 10734 "00101100" // /* MW 2 */ + 10735 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 218 49 first + 10736 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10737 "00000000" // /* MW 7 */ + 10738 "11000011" // /* MW 6 */ + 10739 "10110011" // /* MW 5 */ + 10740 "00000011" // /* MW 4 */ + 10741 "01100000" // /* MW 3 */ + 10742 "10010001" // /* MW 2 */ + 10743 "01110011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 221 2 +.src_ref 1 "io_buffer_main.h" 218 49 + 10744 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10745 "00010000" // /* MW 9 */ + 10746 "00100000" // /* MW 8 */ + 10747 "00110010" // /* MW 7 */ + 10748 "11110000" // /* MW 6 */ + 10749 "00000001" // /* MW 5 */ + 10750 "00000000" // /* MW 4 */ + 10751 "11010000" // /* MW 3 */ + 10752 "11101110" // /* MW 2 */ + 10753 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 10754 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10755 "00010110" // /* MW 3 */ + 10756 "11111110" // /* MW 2 */ + 10757 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 10758 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10759 "00110110" // /* MW 3 */ + 10760 "11111110" // /* MW 2 */ + 10761 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 10762 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10763 "01010110" // /* MW 3 */ + 10764 "01000110" // /* MW 2 */ + 10765 "00000111" // /* MW 1 */ + 10766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10767 "00000000" // /* MW 1 */ + 10768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10769 "00000000" // /* MW 1 */ + 10770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10771 "00000000" // /* MW 1 */ + 10772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10773 "00000000" // /* MW 1 */ + 10774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10775 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 10776 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10777 "00000010" // /* MW 3 */ + 10778 "01100001" // /* MW 2 */ + 10779 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 10780 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10781 "00010001" // /* MW 3 */ + 10782 "00000110" // /* MW 2 */ + 10783 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 + 10784 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10785 "11111101" // /* MW 3 */ + 10786 "11100000" // /* MW 2 */ + 10787 "00010111" // /* MW 1 */ + 10788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10789 "00000000" // /* MW 1 */ + 10790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10791 "00000000" // /* MW 1 */ + 10792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10793 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 10794 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10795 "00001000" // /* MW 3 */ + 10796 "10010011" // /* MW 2 */ + 10797 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 222 45 + 10798 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10799 "10000001" // /* MW 5 */ + 10800 "10101101" // /* MW 4 */ + 10801 "10100111" // /* MW 3 */ + 10802 "00000000" // /* MW 2 */ + 10803 "00000100" // /* MW 1 */ + 10804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10805 "00000000" // /* MW 1 */ + 10806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10807 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 221 2 first + 10808 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10809 "00110110" // /* MW 3 */ + 10810 "00000110" // /* MW 2 */ + 10811 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 1 "io_buffer_main.h" 324 51 + 10812 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10813 "10000001" // /* MW 5 */ + 10814 "11011101" // /* MW 4 */ + 10815 "11011100" // /* MW 3 */ + 10816 "11001010" // /* MW 2 */ + 10817 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 222 47 first + 10818 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10819 "01110110" // /* MW 3 */ + 10820 "00000110" // /* MW 2 */ + 10821 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 10822 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10823 "10011110" // /* MW 3 */ + 10824 "01011100" // /* MW 2 */ + 10825 "00000111" // /* MW 1 */ + 10826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10827 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 224 2 first +.no_stack_arguments + 10828 "00000100" // JL #10352 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10352 delay_slots=5 */ + 10829 "00000001" // /* MW 5 */ + 10830 "00000000" // /* MW 4 */ + 10831 "00111000" // /* MW 3 */ + 10832 "00010100" // /* MW 2 */ + 10833 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10835 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 221 2 first +.delay_slot + 10836 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10837 "00000111" // /* MW 3 */ + 10838 "01100010" // /* MW 2 */ + 10839 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 221 2 +.delay_slot + 10840 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10841 "00110001" // /* MW 3 */ + 10842 "00000110" // /* MW 2 */ + 10843 "00001000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 222 45 first +.delay_slot + 10844 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10845 "00001101" // /* MW 3 */ + 10846 "11100001" // /* MW 2 */ + 10847 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 222 45 +.delay_slot + 10848 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10849 "00000000" // /* MW 15 */ + 10850 "00000000" // /* MW 14 */ + 10851 "10101000" // /* MW 13 */ + 10852 "10100000" // /* MW 12 */ + 10853 "00110100" // /* MW 11 */ + 10854 "00000000" // /* MW 10 */ + 10855 "00000000" // /* MW 9 */ + 10856 "00000000" // /* MW 8 */ + 10857 "01011011" // /* MW 7 */ + 10858 "00000001" // /* MW 6 */ + 10859 "00100000" // /* MW 5 */ + 10860 "00000000" // /* MW 4 */ + 10861 "11110000" // /* MW 3 */ + 10862 "00101100" // /* MW 2 */ + 10863 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 6 +.src_ref 7 "superkernels.cpp" 227 14 +.src_ref 1 "io_buffer_main.h" 324 51 first +.return_address + 10864 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10865 "00010000" // /* MW 9 */ + 10866 "00100000" // /* MW 8 */ + 10867 "00110010" // /* MW 7 */ + 10868 "11110011" // /* MW 6 */ + 10869 "00000001" // /* MW 5 */ + 10870 "00000000" // /* MW 4 */ + 10871 "11010000" // /* MW 3 */ + 10872 "11000110" // /* MW 2 */ + 10873 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 + 10874 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10875 "00000101" // /* MW 3 */ + 10876 "00100000" // /* MW 2 */ + 10877 "00010000" // /* MW 1 */ + 10878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10879 "00000000" // /* MW 1 */ + 10880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10881 "00000000" // /* MW 1 */ + 10882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10883 "00000000" // /* MW 1 */ + 10884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10885 "00000000" // /* MW 1 */ + 10886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10887 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 10888 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10889 "00001000" // /* MW 3 */ + 10890 "01010001" // /* MW 2 */ + 10891 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 19 +.src_ref 1 "io_buffer_main.h" 327 40 first + 10892 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10893 "00010000" // /* MW 9 */ + 10894 "00110000" // /* MW 8 */ + 10895 "00110010" // /* MW 7 */ + 10896 "11110001" // /* MW 6 */ + 10897 "00000001" // /* MW 5 */ + 10898 "00000000" // /* MW 4 */ + 10899 "11010000" // /* MW 3 */ + 10900 "11001110" // /* MW 2 */ + 10901 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 6 first + 10902 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10903 "00110110" // /* MW 3 */ + 10904 "00000110" // /* MW 2 */ + 10905 "00000110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 19 + 10906 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10907 "01010110" // /* MW 3 */ + 10908 "00000110" // /* MW 2 */ + 10909 "00000010" // /* MW 1 */ + 10910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10911 "00000000" // /* MW 1 */ + 10912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10913 "00000000" // /* MW 1 */ + 10914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10915 "00000000" // /* MW 1 */ + 10916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10917 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 10918 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10919 "00110001" // /* MW 3 */ + 10920 "00100001" // /* MW 2 */ + 10921 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 10922 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10923 "00010001" // /* MW 3 */ + 10924 "11100110" // /* MW 2 */ + 10925 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 16 first + 10926 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10927 "00101000" // /* MW 3 */ + 10928 "01100001" // /* MW 2 */ + 10929 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 6 + 10930 "10000100" // JNZ r16, #10960 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10960 delay_slots=5 */ + 10931 "00000001" // /* MW 5 */ + 10932 "01000000" // /* MW 4 */ + 10933 "01101000" // /* MW 3 */ + 10934 "00010101" // /* MW 2 */ + 10935 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10937 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10939 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10940 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10941 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10943 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10945 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 227 14 + 10946 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10947 "00000001" // /* MW 3 */ + 10948 "00100000" // /* MW 2 */ + 10949 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 227 14 first + 10950 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10951 "00000000" // /* MW 9 */ + 10952 "00000000" // /* MW 8 */ + 10953 "00000000" // /* MW 7 */ + 10954 "10000000" // /* MW 6 */ + 10955 "00010001" // /* MW 5 */ + 10956 "00000110" // /* MW 4 */ + 10957 "11110110" // /* MW 3 */ + 10958 "00101100" // /* MW 2 */ + 10959 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 7 "superkernels.cpp" 229 + 10960 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10961 "00111001" // /* MW 3 */ + 10962 "11110100" // /* MW 2 */ + 10963 "00000111" // /* MW 1 */ + 10964 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10965 "00011001" // /* MW 3 */ + 10966 "11111011" // /* MW 2 */ + 10967 "00000111" // /* MW 1 */ + 10968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10969 "00000000" // /* MW 1 */ + 10970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10971 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 10972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10973 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10974 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10975 "11110001" // /* MW 3 */ + 10976 "11111101" // /* MW 2 */ + 10977 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10979 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 229 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10980 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10981 "00000000" // /* MW 3 */ + 10982 "00101000" // /* MW 2 */ + 10983 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10984 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10985 "10100000" // /* MW 3 */ + 10986 "01100111" // /* MW 2 */ + 10987 "00011111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 229 +.delay_slot + 10988 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10989 "00000001" // /* MW 5 */ + 10990 "00000000" // /* MW 4 */ + 10991 "00000000" // /* MW 3 */ + 10992 "11111000" // /* MW 2 */ + 10993 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10995 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10997 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 10999 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.src_ref 3 "elementwise_binary.h" 141 first +.src_ref 3 "elementwise_binary.h" 142 23 +.src_ref 3 "elementwise_binary.h" 144 4 first +.function_start + 11008 "01100100" // RET lr; MOV r0, #64 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 11009 "00000001" // /* MW 5 */ + 11010 "00100001" // /* MW 4 */ + 11011 "00000000" // /* MW 3 */ + 11012 "00000000" // /* MW 2 */ + 11013 "00000101" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 141 +.delay_slot + 11014 "11111000" // MOV r1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11015 "11000000" // /* MW 3 */ + 11016 "01010000" // /* MW 2 */ + 11017 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 141 +.delay_slot + 11018 "00011000" // ADD.NC p0, r1, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11019 "10010000" // /* MW 3 */ + 11020 "01100000" // /* MW 2 */ + 11021 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 142 23 first +.delay_slot + 11022 "10011000" // ST r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11023 "00010001" // /* MW 3 */ + 11024 "00000100" // /* MW 2 */ + 11025 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 142 23 +.delay_slot + 11026 "10011000" // ST r0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11027 "00010001" // /* MW 3 */ + 11028 "00010100" // /* MW 2 */ + 11029 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_end0 + 11031 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.src_ref 3 "elementwise_binary.h" 130 first +.src_ref 3 "elementwise_binary.h" 133 24 first +.function_start + 11040 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11041 "00101110" // /* MW 3 */ + 11042 "00011100" // /* MW 2 */ + 11043 "00000001" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 130 + 11044 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11045 "00000001" // /* MW 5 */ + 11046 "00000000" // /* MW 4 */ + 11047 "00000000" // /* MW 3 */ + 11048 "00001000" // /* MW 2 */ + 11049 "00000000" // /* MW 1 */ + 11050 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11051 "00111101" // /* MW 3 */ + 11052 "11111100" // /* MW 2 */ + 11053 "00001111" // /* MW 1 */ + 11054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11055 "00000000" // /* MW 1 */ + 11056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11057 "00000000" // /* MW 1 */ + 11058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11059 "00000000" // /* MW 1 */ + 11060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11061 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 133 22 first + 11062 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11063 "00101001" // /* MW 3 */ + 11064 "00011100" // /* MW 2 */ + 11065 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 134 24 first + 11066 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11067 "00101110" // /* MW 3 */ + 11068 "00011100" // /* MW 2 */ + 11069 "00000001" // /* MW 1 */ + 11070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11071 "00000000" // /* MW 1 */ + 11072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11073 "00000000" // /* MW 1 */ + 11074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11075 "00000000" // /* MW 1 */ + 11076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11077 "00000000" // /* MW 1 */ + 11078 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11079 "00000000" // /* MW 1 */ + 11080 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11081 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 134 22 + 11082 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11083 "00101001" // /* MW 3 */ + 11084 "00011100" // /* MW 2 */ + 11085 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 135 24 first + 11086 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11087 "00101110" // /* MW 3 */ + 11088 "00000100" // /* MW 2 */ + 11089 "00000001" // /* MW 1 */ + 11090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11091 "00000000" // /* MW 1 */ + 11092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11093 "00000000" // /* MW 1 */ + 11094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11095 "00000000" // /* MW 1 */ + 11096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11097 "00000000" // /* MW 1 */ + 11098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11099 "00000000" // /* MW 1 */ + 11100 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11101 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 135 22 + 11102 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11103 "00101001" // /* MW 3 */ + 11104 "00011100" // /* MW 2 */ + 11105 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 136 24 first + 11106 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11107 "00101110" // /* MW 3 */ + 11108 "00010100" // /* MW 2 */ + 11109 "00000001" // /* MW 1 */ + 11110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11111 "00000000" // /* MW 1 */ + 11112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11113 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 137 8 first +.no_stack_arguments + 11114 "00000100" // JL #11008 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11008 delay_slots=5 */ + 11115 "00000001" // /* MW 5 */ + 11116 "00000000" // /* MW 4 */ + 11117 "10000000" // /* MW 3 */ + 11118 "00010101" // /* MW 2 */ + 11119 "00000000" // /* MW 1 */ +.delay_slot + 11120 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11121 "10011101" // /* MW 3 */ + 11122 "11111011" // /* MW 2 */ + 11123 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11125 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11127 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 136 22 first +.delay_slot + 11128 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11129 "00101001" // /* MW 3 */ + 11130 "11011100" // /* MW 2 */ + 11131 "00001000" // /* MW 1 */ +.src_ref 8 "mul_impl.h" 134 25 +.delay_slot + 11132 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11133 "11000000" // /* MW 3 */ + 11134 "01100000" // /* MW 2 */ + 11135 "00011111" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 +.return_address + 11136 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11137 "00111001" // /* MW 3 */ + 11138 "11111100" // /* MW 2 */ + 11139 "00000111" // /* MW 1 */ + 11140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11141 "00000000" // /* MW 1 */ + 11142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11143 "00000000" // /* MW 1 */ + 11144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11145 "00000000" // /* MW 1 */ + 11146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11147 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11149 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11150 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11151 "10011001" // /* MW 3 */ + 11152 "11111011" // /* MW 2 */ + 11153 "00000111" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11154 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11155 "00000000" // /* MW 3 */ + 11156 "00101000" // /* MW 2 */ + 11157 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11159 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11161 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11163 "00000000" // /* MW 1 */ +.src_ref 8 "mul_impl.h" 134 25 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11164 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11165 "00000001" // /* MW 3 */ + 11166 "00100000" // /* MW 2 */ + 11167 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 +.src_ref 8 "mul_impl.h" 134 25 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11168 "00111010" // ST r16, [p7, #16]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11169 "01110001" // /* MW 9 */ + 11170 "00000000" // /* MW 8 */ + 11171 "00000000" // /* MW 7 */ + 11172 "00000000" // /* MW 6 */ + 11173 "11111110" // /* MW 5 */ + 11174 "00111111" // /* MW 4 */ + 11175 "00110000" // /* MW 3 */ + 11176 "11000010" // /* MW 2 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + 11177 "11101000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.function run _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.src_ref 3 "elementwise_binary.h" 149 first +.src_ref 3 "elementwise_binary.h" 156 37 +.src_ref 3 "elementwise_binary.h" 168 8 first +.function_start + 11184 "10111010" // MOVA m0, #32; MOVXM ls, #11360 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11185 "00010000" // /* MW 9 */ + 11186 "00110000" // /* MW 8 */ + 11187 "01111110" // /* MW 7 */ + 11188 "00001000" // /* MW 6 */ + 11189 "00000000" // /* MW 5 */ + 11190 "00000000" // /* MW 4 */ + 11191 "10000000" // /* MW 3 */ + 11192 "00000000" // /* MW 2 */ + 11193 "00000100" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 156 37 first +.src_ref 3 "elementwise_binary.h" 168 8 first + 11194 "10111010" // LDA r3, [p3], m0; MOVXM le, #11376 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11195 "00010000" // /* MW 9 */ + 11196 "00111000" // /* MW 8 */ + 11197 "10111110" // /* MW 7 */ + 11198 "00001001" // /* MW 6 */ + 11199 "00000000" // /* MW 5 */ + 11200 "00000000" // /* MW 4 */ + 11201 "11010000" // /* MW 3 */ + 11202 "00001110" // /* MW 2 */ + 11203 "01100001" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 156 78 +.src_ref 3 "elementwise_binary.h" 156 78 + 11204 "10111010" // LDA m1, [p3]; MOVX r1, #-6; MOV r0, #828 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11205 "01011000" // /* MW 9 */ + 11206 "00111100" // /* MW 8 */ + 11207 "00001011" // /* MW 7 */ + 11208 "01001000" // /* MW 6 */ + 11209 "00010111" // /* MW 5 */ + 11210 "00111110" // /* MW 4 */ + 11211 "11010000" // /* MW 3 */ + 11212 "10010000" // /* MW 2 */ + 11213 "01100000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 156 78 +.src_ref 3 "elementwise_binary.h" 156 78 + 11214 "10111010" // LDA m0, [p3, #4]; MOVXM p4, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11215 "00010000" // /* MW 9 */ + 11216 "00110100" // /* MW 8 */ + 11217 "00110010" // /* MW 7 */ + 11218 "11110010" // /* MW 6 */ + 11219 "00000001" // /* MW 5 */ + 11220 "00000000" // /* MW 4 */ + 11221 "11010000" // /* MW 3 */ + 11222 "10000000" // /* MW 2 */ + 11223 "01100010" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 156 78 + 11224 "10011000" // LDA.s8 r2, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11225 "01000010" // /* MW 3 */ + 11226 "00000100" // /* MW 2 */ + 11227 "00000100" // /* MW 1 */ + 11228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11229 "00000000" // /* MW 1 */ + 11230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11231 "00000000" // /* MW 1 */ + 11232 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11233 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 156 78 + 11234 "10011000" // LSHL r1, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11235 "00011101" // /* MW 3 */ + 11236 "11000010" // /* MW 2 */ + 11237 "00010000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary.h" 168 8 +.src_ref 3 "elementwise_binary.h" 187 20 first + 11238 "00110100" // VLDB x1, [p0], m1; ADD.NC lc, r1, #-7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11239 "11111001" // /* MW 5 */ + 11240 "11100001" // /* MW 4 */ + 11241 "10001010" // /* MW 3 */ + 11242 "00001110" // /* MW 2 */ + 11243 "00000101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary.h" 189 20 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11244 "00111100" // VLDA x2, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11245 "01101000" // /* MW 5 */ + 11246 "01010000" // /* MW 4 */ + 11247 "01110000" // /* MW 3 */ + 11248 "00010011" // /* MW 2 */ + 11249 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 195 20 +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11250 "00010010" // VLDA x3, [p1], m0; VLDB x1, [p0], m1; MOVX crRnd, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11251 "10000000" // /* MW 7 */ + 11252 "10111010" // /* MW 6 */ + 11253 "11101000" // /* MW 5 */ + 11254 "01010000" // /* MW 4 */ + 11255 "01110000" // /* MW 3 */ + 11256 "00011011" // /* MW 2 */ + 11257 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary.h" 189 20 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11258 "00111100" // VLDA x2, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11259 "01101000" // /* MW 5 */ + 11260 "01010000" // /* MW 4 */ + 11261 "01110000" // /* MW 3 */ + 11262 "00010011" // /* MW 2 */ + 11263 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11264 "00111100" // VLDA x3, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11265 "11101000" // /* MW 5 */ + 11266 "01010000" // /* MW 4 */ + 11267 "01110000" // /* MW 3 */ + 11268 "00011011" // /* MW 2 */ + 11269 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11270 "10011000" // VLDA x2, [p1], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11271 "10011011" // /* MW 3 */ + 11272 "00001000" // /* MW 2 */ + 11273 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11274 "00111100" // VLDA x3, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11275 "01101000" // /* MW 5 */ + 11276 "01010000" // /* MW 4 */ + 11277 "01110000" // /* MW 3 */ + 11278 "00011011" // /* MW 2 */ + 11279 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11280 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11281 "11101000" // /* MW 5 */ + 11282 "01010000" // /* MW 4 */ + 11283 "01110000" // /* MW 3 */ + 11284 "00010011" // /* MW 2 */ + 11285 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11286 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11287 "01000001" // /* MW 9 */ + 11288 "11100010" // /* MW 8 */ + 11289 "00000000" // /* MW 7 */ + 11290 "00011101" // /* MW 6 */ + 11291 "00110100" // /* MW 5 */ + 11292 "00101000" // /* MW 4 */ + 11293 "01110000" // /* MW 3 */ + 11294 "00011011" // /* MW 2 */ + 11295 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11296 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11297 "01100001" // /* MW 9 */ + 11298 "11100000" // /* MW 8 */ + 11299 "00000001" // /* MW 7 */ + 11300 "00011101" // /* MW 6 */ + 11301 "01110100" // /* MW 5 */ + 11302 "00101000" // /* MW 4 */ + 11303 "01110000" // /* MW 3 */ + 11304 "00010011" // /* MW 2 */ + 11305 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11306 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11307 "01000001" // /* MW 9 */ + 11308 "11100010" // /* MW 8 */ + 11309 "00000000" // /* MW 7 */ + 11310 "00011101" // /* MW 6 */ + 11311 "00110100" // /* MW 5 */ + 11312 "00101000" // /* MW 4 */ + 11313 "01110000" // /* MW 3 */ + 11314 "00011011" // /* MW 2 */ + 11315 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11316 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11317 "01100001" // /* MW 9 */ + 11318 "11100000" // /* MW 8 */ + 11319 "00000001" // /* MW 7 */ + 11320 "00011101" // /* MW 6 */ + 11321 "01110100" // /* MW 5 */ + 11322 "00101000" // /* MW 4 */ + 11323 "01110000" // /* MW 3 */ + 11324 "00010011" // /* MW 2 */ + 11325 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11326 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11327 "01000001" // /* MW 9 */ + 11328 "11100010" // /* MW 8 */ + 11329 "00000000" // /* MW 7 */ + 11330 "00011101" // /* MW 6 */ + 11331 "00110100" // /* MW 5 */ + 11332 "00101000" // /* MW 4 */ + 11333 "01110000" // /* MW 3 */ + 11334 "00011011" // /* MW 2 */ + 11335 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11336 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11337 "01100001" // /* MW 9 */ + 11338 "11100000" // /* MW 8 */ + 11339 "00000001" // /* MW 7 */ + 11340 "00011101" // /* MW 6 */ + 11341 "01110100" // /* MW 5 */ + 11342 "00101000" // /* MW 4 */ + 11343 "01110000" // /* MW 3 */ + 11344 "00010011" // /* MW 2 */ + 11345 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11346 "01101110" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; VMUL.f dm0, x1, x2, r0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 11347 "01000001" // /* MW 13 */ + 11348 "11100010" // /* MW 12 */ + 11349 "00000000" // /* MW 11 */ + 11350 "10001100" // /* MW 10 */ + 11351 "01110000" // /* MW 9 */ + 11352 "00001000" // /* MW 8 */ + 11353 "00000000" // /* MW 7 */ + 11354 "00000000" // /* MW 6 */ + 11355 "01101000" // /* MW 5 */ + 11356 "01010000" // /* MW 4 */ + 11357 "01110000" // /* MW 3 */ + 11358 "00011011" // /* MW 2 */ + 11359 "00100001" // /* MW 1 */ +.label ZLS_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_176 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.src_ref 3 "elementwise_binary.h" 195 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 11360 "00001011" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; VMUL.f dm1, x0, x3, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11361 "00000011" // /* MW 15 */ + 11362 "00001111" // /* MW 14 */ + 11363 "01111000" // /* MW 13 */ + 11364 "10100101" // /* MW 12 */ + 11365 "00000001" // /* MW 11 */ + 11366 "00000000" // /* MW 10 */ + 11367 "00000000" // /* MW 9 */ + 11368 "00000000" // /* MW 8 */ + 11369 "10100011" // /* MW 7 */ + 11370 "00011100" // /* MW 6 */ + 11371 "11101010" // /* MW 5 */ + 11372 "01010000" // /* MW 4 */ + 11373 "01110000" // /* MW 3 */ + 11374 "00010011" // /* MW 2 */ + 11375 "00100001" // /* MW 1 */ +.label ZLE_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_192 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11376 "00001011" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11377 "00010010" // /* MW 15 */ + 11378 "00000111" // /* MW 14 */ + 11379 "01111000" // /* MW 13 */ + 11380 "10100101" // /* MW 12 */ + 11381 "00000001" // /* MW 11 */ + 11382 "00000000" // /* MW 10 */ + 11383 "00000000" // /* MW 9 */ + 11384 "00000000" // /* MW 8 */ + 11385 "00100011" // /* MW 7 */ + 11386 "00011100" // /* MW 6 */ + 11387 "01101010" // /* MW 5 */ + 11388 "01010000" // /* MW 4 */ + 11389 "01110000" // /* MW 3 */ + 11390 "00011011" // /* MW 2 */ + 11391 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 11392 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11393 "01100001" // /* MW 7 */ + 11394 "11100000" // /* MW 6 */ + 11395 "00000001" // /* MW 5 */ + 11396 "00000010" // /* MW 4 */ + 11397 "01100000" // /* MW 3 */ + 11398 "10010100" // /* MW 2 */ + 11399 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11400 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11401 "01000001" // /* MW 7 */ + 11402 "11100010" // /* MW 6 */ + 11403 "00000000" // /* MW 5 */ + 11404 "00000010" // /* MW 4 */ + 11405 "01100000" // /* MW 3 */ + 11406 "10000100" // /* MW 2 */ + 11407 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11408 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11409 "01100001" // /* MW 7 */ + 11410 "11100000" // /* MW 6 */ + 11411 "00000001" // /* MW 5 */ + 11412 "00000010" // /* MW 4 */ + 11413 "01100000" // /* MW 3 */ + 11414 "10010100" // /* MW 2 */ + 11415 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11416 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11417 "01000001" // /* MW 7 */ + 11418 "11100010" // /* MW 6 */ + 11419 "00000000" // /* MW 5 */ + 11420 "00000010" // /* MW 4 */ + 11421 "01100000" // /* MW 3 */ + 11422 "10000100" // /* MW 2 */ + 11423 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11424 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11425 "01100001" // /* MW 7 */ + 11426 "11100000" // /* MW 6 */ + 11427 "00000001" // /* MW 5 */ + 11428 "00000010" // /* MW 4 */ + 11429 "01100000" // /* MW 3 */ + 11430 "10010100" // /* MW 2 */ + 11431 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11432 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11433 "01000001" // /* MW 7 */ + 11434 "11100010" // /* MW 6 */ + 11435 "00000000" // /* MW 5 */ + 11436 "00000010" // /* MW 4 */ + 11437 "01100000" // /* MW 3 */ + 11438 "10000100" // /* MW 2 */ + 11439 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11440 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11441 "01100001" // /* MW 7 */ + 11442 "11100000" // /* MW 6 */ + 11443 "00000001" // /* MW 5 */ + 11444 "00000010" // /* MW 4 */ + 11445 "01100000" // /* MW 3 */ + 11446 "10010100" // /* MW 2 */ + 11447 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11448 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11449 "00100011" // /* MW 3 */ + 11450 "00011100" // /* MW 2 */ + 11451 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 172 4 first +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11452 "01011100" // VST.CONV.bf16.fp32 cml1, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 11453 "00000000" // /* MW 5 */ + 11454 "01010000" // /* MW 4 */ + 11455 "01100000" // /* MW 3 */ + 11456 "10010100" // /* MW 2 */ + 11457 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11458 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11459 "00100011" // /* MW 3 */ + 11460 "00011100" // /* MW 2 */ + 11461 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 195 20 first +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11462 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11463 "10100011" // /* MW 3 */ + 11464 "00011100" // /* MW 2 */ + 11465 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.delay_slot + 11466 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11467 "00100011" // /* MW 3 */ + 11468 "00011100" // /* MW 2 */ + 11469 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 195 20 first +.delay_slot + 11470 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11471 "10100011" // /* MW 3 */ + 11472 "00011100" // /* MW 2 */ + 11473 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_end0 + 11475 "00000000" // /* MW 1 */ +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.function superkernel_mul1d _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.src_ref 7 "superkernels.cpp" 369 first +.src_ref 7 "superkernels.cpp" 374 6 +.function_start + 11488 "01000100" // MOVXM p4, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11489 "10000000" // /* MW 5 */ + 11490 "11001000" // /* MW 4 */ + 11491 "11001000" // /* MW 3 */ + 11492 "00000111" // /* MW 2 */ + 11493 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 374 6 first + 11494 "11010100" // LDA r16, [p4]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11495 "11000001" // /* MW 5 */ + 11496 "10110101" // /* MW 4 */ + 11497 "11011000" // /* MW 3 */ + 11498 "11000010" // /* MW 2 */ + 11499 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 369 + 11500 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11501 "00000001" // /* MW 5 */ + 11502 "00000000" // /* MW 4 */ + 11503 "00000000" // /* MW 3 */ + 11504 "00001000" // /* MW 2 */ + 11505 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 371 22 first +.src_ref 1 "io_buffer_main.h" 218 49 + 11506 "00111010" // ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11507 "01111001" // /* MW 9 */ + 11508 "01100000" // /* MW 8 */ + 11509 "11001010" // /* MW 7 */ + 11510 "10000001" // /* MW 6 */ + 11511 "00010100" // /* MW 5 */ + 11512 "00100011" // /* MW 4 */ + 11513 "10110000" // /* MW 3 */ + 11514 "00111010" // /* MW 2 */ + 11515 "11111111" // /* MW 1 */ + 11516 "00000010" // ST p0, [sp, #-20]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11517 "01110000" // /* MW 7 */ + 11518 "11010000" // /* MW 6 */ + 11519 "00001011" // /* MW 5 */ + 11520 "00000000" // /* MW 4 */ + 11521 "10110000" // /* MW 3 */ + 11522 "10000011" // /* MW 2 */ + 11523 "11111101" // /* MW 1 */ + 11524 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11525 "00010101" // /* MW 3 */ + 11526 "11111100" // /* MW 2 */ + 11527 "00001111" // /* MW 1 */ + 11528 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11529 "00111101" // /* MW 3 */ + 11530 "11110000" // /* MW 2 */ + 11531 "00001111" // /* MW 1 */ + 11532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11533 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 374 6 first +.src_ref 7 "superkernels.cpp" 374 16 first + 11534 "10000100" // JNZ r16, #11680 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11680 delay_slots=5 */ + 11535 "00000001" // /* MW 5 */ + 11536 "01000000" // /* MW 4 */ + 11537 "11010000" // /* MW 3 */ + 11538 "00010110" // /* MW 2 */ + 11539 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 371 30 first +.delay_slot + 11540 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11541 "11111011" // /* MW 3 */ + 11542 "01100011" // /* MW 2 */ + 11543 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 371 11 +.delay_slot + 11544 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11545 "10100000" // /* MW 5 */ + 11546 "11001000" // /* MW 4 */ + 11547 "11000100" // /* MW 3 */ + 11548 "00000111" // /* MW 2 */ + 11549 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 371 11 +.delay_slot + 11550 "00000010" // ST r17, [p2]; MOV p2, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11551 "01110000" // /* MW 7 */ + 11552 "01100000" // /* MW 6 */ + 11553 "00110111" // /* MW 5 */ + 11554 "00000001" // /* MW 4 */ + 11555 "00110000" // /* MW 3 */ + 11556 "11000110" // /* MW 2 */ + 11557 "01000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 +.delay_slot + 11558 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11559 "11000000" // /* MW 3 */ + 11560 "11010110" // /* MW 2 */ + 11561 "00011011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 377 4 +.src_ref 7 "superkernels.cpp" 379 28 +.src_ref 7 "superkernels.cpp" 381 42 +.src_ref 7 "superkernels.cpp" 393 2 +.delay_slot + 11562 "00111010" // ST p2, [sp, #-12]; MOVXM p7, #509248 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11563 "00010001" // /* MW 9 */ + 11564 "10100000" // /* MW 8 */ + 11565 "10110010" // /* MW 7 */ + 11566 "11110011" // /* MW 6 */ + 11567 "00000001" // /* MW 5 */ + 11568 "00000000" // /* MW 4 */ + 11569 "10110000" // /* MW 3 */ + 11570 "10100011" // /* MW 2 */ + 11571 "11111110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 377 4 +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11572 "00111010" // MOVS p0, p7; MOVXM p2, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11573 "00010001" // /* MW 9 */ + 11574 "00110100" // /* MW 8 */ + 11575 "00110010" // /* MW 7 */ + 11576 "11110001" // /* MW 6 */ + 11577 "00000001" // /* MW 5 */ + 11578 "00000000" // /* MW 4 */ + 11579 "01100000" // /* MW 3 */ + 11580 "10010001" // /* MW 2 */ + 11581 "00010011" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11582 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11583 "00010000" // /* MW 9 */ + 11584 "00110010" // /* MW 8 */ + 11585 "00110010" // /* MW 7 */ + 11586 "11110001" // /* MW 6 */ + 11587 "00000001" // /* MW 5 */ + 11588 "00000000" // /* MW 4 */ + 11589 "11100000" // /* MW 3 */ + 11590 "11000000" // /* MW 2 */ + 11591 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11593 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 377 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 11594 "00000100" // JL #11040 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11040 delay_slots=5 */ + 11595 "00000001" // /* MW 5 */ + 11596 "00000000" // /* MW 4 */ + 11597 "10010000" // /* MW 3 */ + 11598 "00010101" // /* MW 2 */ + 11599 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11601 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11603 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11604 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11605 "00110001" // /* MW 3 */ + 11606 "00100000" // /* MW 2 */ + 11607 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 11608 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11609 "00000101" // /* MW 3 */ + 11610 "00100000" // /* MW 2 */ + 11611 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 11612 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11613 "00010001" // /* MW 3 */ + 11614 "00000110" // /* MW 2 */ + 11615 "00001010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 381 18 +.src_ref 7 "superkernels.cpp" 381 42 first +.return_address + 11616 "10111010" // LDA r16, [p7]; MOVXM p1, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11617 "00010000" // /* MW 9 */ + 11618 "00101000" // /* MW 8 */ + 11619 "10110010" // /* MW 7 */ + 11620 "11110000" // /* MW 6 */ + 11621 "00000001" // /* MW 5 */ + 11622 "00000000" // /* MW 4 */ + 11623 "11010000" // /* MW 3 */ + 11624 "11000010" // /* MW 2 */ + 11625 "11100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 381 16 +.src_ref 7 "superkernels.cpp" 381 18 +.src_ref 7 "superkernels.cpp" 390 48 + 11626 "10111010" // LDA r17, [p1]; MOVXM p3, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11627 "00010000" // /* MW 9 */ + 11628 "00101010" // /* MW 8 */ + 11629 "10110010" // /* MW 7 */ + 11630 "11110001" // /* MW 6 */ + 11631 "00000001" // /* MW 5 */ + 11632 "00000000" // /* MW 4 */ + 11633 "11010000" // /* MW 3 */ + 11634 "11000110" // /* MW 2 */ + 11635 "00100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 379 28 first +.src_ref 7 "superkernels.cpp" 382 16 +.src_ref 7 "superkernels.cpp" 391 48 + 11636 "10111010" // LDA.u16 r18, [p7, #10]; MOVXM p1, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11637 "00010000" // /* MW 9 */ + 11638 "00101110" // /* MW 8 */ + 11639 "10110010" // /* MW 7 */ + 11640 "11110000" // /* MW 6 */ + 11641 "00000001" // /* MW 5 */ + 11642 "00000000" // /* MW 4 */ + 11643 "01010000" // /* MW 3 */ + 11644 "11001011" // /* MW 2 */ + 11645 "11101010" // /* MW 1 */ + 11646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11647 "00000000" // /* MW 1 */ + 11648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11649 "00000000" // /* MW 1 */ + 11650 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11651 "00000000" // /* MW 1 */ + 11652 "10000100" // J #11696 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11696 delay_slots=5 */ + 11653 "00000000" // /* MW 5 */ + 11654 "00000000" // /* MW 4 */ + 11655 "11011000" // /* MW 3 */ + 11656 "00010110" // /* MW 2 */ + 11657 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 379 13 +.delay_slot + 11658 "01000100" // MOVXM p2, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11659 "11000000" // /* MW 5 */ + 11660 "11001000" // /* MW 4 */ + 11661 "11000100" // /* MW 3 */ + 11662 "00000111" // /* MW 2 */ + 11663 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 381 27 first +.delay_slot + 11664 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11665 "00001111" // /* MW 3 */ + 11666 "01100001" // /* MW 2 */ + 11667 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 379 13 first +.delay_slot + 11668 "10011000" // ST r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11669 "01010001" // /* MW 3 */ + 11670 "00000110" // /* MW 2 */ + 11671 "00001010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 381 16 first +.delay_slot + 11672 "10011000" // ST r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11673 "00010001" // /* MW 3 */ + 11674 "00000110" // /* MW 2 */ + 11675 "00001011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 382 16 first +.delay_slot + 11676 "10011000" // ST r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11677 "00010001" // /* MW 3 */ + 11678 "00000110" // /* MW 2 */ + 11679 "00001001" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 +.src_ref 7 "superkernels.cpp" 390 48 + 11680 "01000100" // MOVXM p3, #509012 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11681 "10101000" // /* MW 5 */ + 11682 "11001000" // /* MW 4 */ + 11683 "11000110" // /* MW 3 */ + 11684 "00000111" // /* MW 2 */ + 11685 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 391 48 + 11686 "10111010" // NOPA; MOVXM p1, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11687 "00010000" // /* MW 9 */ + 11688 "00101110" // /* MW 8 */ + 11689 "10110010" // /* MW 7 */ + 11690 "11110000" // /* MW 6 */ + 11691 "00000001" // /* MW 5 */ + 11692 "00000000" // /* MW 4 */ + 11693 "11110000" // /* MW 3 */ + 11694 "00101100" // /* MW 2 */ + 11695 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 +.src_ref 1 "io_buffer_main.h" 218 49 first + 11696 "00011000" // ADD.NC p0, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11697 "10000110" // /* MW 3 */ + 11698 "01100111" // /* MW 2 */ + 11699 "00011000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 385 2 +.src_ref 1 "io_buffer_main.h" 218 49 + 11700 "10111010" // LDA r27, [p0], #-4; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11701 "00010000" // /* MW 9 */ + 11702 "00100000" // /* MW 8 */ + 11703 "00110010" // /* MW 7 */ + 11704 "11110001" // /* MW 6 */ + 11705 "00000001" // /* MW 5 */ + 11706 "00000000" // /* MW 4 */ + 11707 "11010000" // /* MW 3 */ + 11708 "11101110" // /* MW 2 */ + 11709 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 11710 "10011000" // LDA r16, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11711 "00010110" // /* MW 3 */ + 11712 "11111110" // /* MW 2 */ + 11713 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 11714 "10011000" // LDA r17, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11715 "00110110" // /* MW 3 */ + 11716 "11111110" // /* MW 2 */ + 11717 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 385 2 first + 11718 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11719 "01010110" // /* MW 3 */ + 11720 "00000110" // /* MW 2 */ + 11721 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 11722 "10011000" // LDA r19, [p0, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11723 "01110110" // /* MW 3 */ + 11724 "01000110" // /* MW 2 */ + 11725 "00000000" // /* MW 1 */ + 11726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11727 "00000000" // /* MW 1 */ + 11728 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11729 "00000000" // /* MW 1 */ + 11730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11731 "00000000" // /* MW 1 */ + 11732 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11733 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 11734 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11735 "00000010" // /* MW 3 */ + 11736 "01100001" // /* MW 2 */ + 11737 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 385 2 first +.src_ref 1 "io_buffer_main.h" 218 20 + 11738 "01011100" // ST r16, [p0]; ADD r16, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11739 "00001110" // /* MW 5 */ + 11740 "01000000" // /* MW 4 */ + 11741 "00111001" // /* MW 3 */ + 11742 "11000010" // /* MW 2 */ + 11743 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 385 2 + 11744 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11745 "00010001" // /* MW 3 */ + 11746 "00000110" // /* MW 2 */ + 11747 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 +.src_ref 1 "io_buffer_main.h" 395 8 + 11748 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11749 "11111101" // /* MW 3 */ + 11750 "11100000" // /* MW 2 */ + 11751 "00010111" // /* MW 1 */ + 11752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11753 "00000000" // /* MW 1 */ + 11754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11755 "00000000" // /* MW 1 */ + 11756 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11757 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 11758 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11759 "00001000" // /* MW 3 */ + 11760 "11010011" // /* MW 2 */ + 11761 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 first + 11762 "00011000" // ADD.NC p2, r14, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11763 "00000110" // /* MW 3 */ + 11764 "01100111" // /* MW 2 */ + 11765 "00011010" // /* MW 1 */ + 11766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11767 "00000000" // /* MW 1 */ + 11768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11769 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 11770 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11771 "01110110" // /* MW 3 */ + 11772 "11111111" // /* MW 2 */ + 11773 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 11774 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11775 "00110110" // /* MW 3 */ + 11776 "11111110" // /* MW 2 */ + 11777 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 11778 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11779 "01010110" // /* MW 3 */ + 11780 "11111110" // /* MW 2 */ + 11781 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 47 first + 11782 "10011000" // LDA r19, [p2, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11783 "01110110" // /* MW 3 */ + 11784 "01010110" // /* MW 2 */ + 11785 "00000010" // /* MW 1 */ + 11786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11787 "00000000" // /* MW 1 */ + 11788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11789 "00000000" // /* MW 1 */ + 11790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11791 "00000000" // /* MW 1 */ + 11792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11793 "00000000" // /* MW 1 */ + 11794 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11795 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 11796 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11797 "00010010" // /* MW 3 */ + 11798 "10100011" // /* MW 2 */ + 11799 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 11800 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11801 "00110001" // /* MW 3 */ + 11802 "00000110" // /* MW 2 */ + 11803 "00001010" // /* MW 1 */ + 11804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11805 "00000000" // /* MW 1 */ + 11806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11807 "00000000" // /* MW 1 */ + 11808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11809 "00000000" // /* MW 1 */ + 11810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11811 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 11812 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11813 "00001000" // /* MW 3 */ + 11814 "11010011" // /* MW 2 */ + 11815 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 390 46 +.src_ref 7 "superkernels.cpp" 391 46 +.src_ref 1 "io_buffer_main.h" 324 32 + 11816 "00111010" // MOVS p6, p2; MOVX r16, #1; MOV r14, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11817 "01111001" // /* MW 9 */ + 11818 "01100000" // /* MW 8 */ + 11819 "11001110" // /* MW 7 */ + 11820 "00101001" // /* MW 6 */ + 11821 "00000000" // /* MW 5 */ + 11822 "00000001" // /* MW 4 */ + 11823 "01100000" // /* MW 3 */ + 11824 "00010001" // /* MW 2 */ + 11825 "11010001" // /* MW 1 */ + 11826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11827 "00000000" // /* MW 1 */ + 11828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11829 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 + 11830 "00011000" // LDA p4, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11831 "00011001" // /* MW 3 */ + 11832 "11101110" // /* MW 2 */ + 11833 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 390 48 first + 11834 "00001100" // LDA r17, [p3]; ST p0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11835 "00111011" // /* MW 5 */ + 11836 "11011000" // /* MW 4 */ + 11837 "11011111" // /* MW 3 */ + 11838 "11000110" // /* MW 2 */ + 11839 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 391 48 first +.src_ref 7 "superkernels.cpp" 393 2 + 11840 "11010100" // LDA r20, [p1]; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11841 "10000001" // /* MW 5 */ + 11842 "11011101" // /* MW 4 */ + 11843 "11010110" // /* MW 3 */ + 11844 "11010010" // /* MW 2 */ + 11845 "00100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 11846 "10011000" // LDA r18, [p2], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11847 "01010110" // /* MW 3 */ + 11848 "01001110" // /* MW 2 */ + 11849 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 11850 "10011000" // LDA p2, [p0], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11851 "00011110" // /* MW 3 */ + 11852 "01011101" // /* MW 2 */ + 11853 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 +.src_ref 1 "io_buffer_main.h" 327 40 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11854 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11855 "11000000" // /* MW 3 */ + 11856 "01100000" // /* MW 2 */ + 11857 "00011111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11859 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11860 "10011000" // LDA r19, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11861 "01110110" // /* MW 3 */ + 11862 "00000110" // /* MW 2 */ + 11863 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11865 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 393 2 first +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 11866 "00000100" // JL #11184 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11184 delay_slots=5 */ + 11867 "00000001" // /* MW 5 */ + 11868 "00000000" // /* MW 4 */ + 11869 "11011000" // /* MW 3 */ + 11870 "00010101" // /* MW 2 */ + 11871 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 +.src_ref 1 "io_buffer_main.h" 327 40 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11872 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11873 "11000000" // /* MW 3 */ + 11874 "11010100" // /* MW 2 */ + 11875 "00011011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 390 46 first +.delay_slot + 11876 "10011000" // LSHL r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11877 "00001101" // /* MW 3 */ + 11878 "01100011" // /* MW 2 */ + 11879 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 391 46 first +.delay_slot + 11880 "10011000" // LSHL r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11881 "00001101" // /* MW 3 */ + 11882 "00100001" // /* MW 2 */ + 11883 "00010101" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 391 46 +.delay_slot + 11884 "01011000" // ADD.NC p1, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11885 "01000001" // /* MW 3 */ + 11886 "01101001" // /* MW 2 */ + 11887 "00011001" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 390 46 first +.delay_slot + 11888 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11889 "00000000" // /* MW 15 */ + 11890 "00000000" // /* MW 14 */ + 11891 "10101000" // /* MW 13 */ + 11892 "11100010" // /* MW 12 */ + 11893 "00110100" // /* MW 11 */ + 11894 "00000000" // /* MW 10 */ + 11895 "00000000" // /* MW 9 */ + 11896 "00000000" // /* MW 8 */ + 11897 "01011011" // /* MW 7 */ + 11898 "00000001" // /* MW 6 */ + 11899 "00100000" // /* MW 5 */ + 11900 "00000000" // /* MW 4 */ + 11901 "11110000" // /* MW 3 */ + 11902 "00101100" // /* MW 2 */ + 11903 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 32 first +.src_ref 1 "io_buffer_main.h" 327 28 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 40 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 +.return_address + 11904 "10111010" // LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11905 "01111000" // /* MW 9 */ + 11906 "11010000" // /* MW 8 */ + 11907 "10110011" // /* MW 7 */ + 11908 "00101000" // /* MW 6 */ + 11909 "00000000" // /* MW 5 */ + 11910 "00000001" // /* MW 4 */ + 11911 "11010000" // /* MW 3 */ + 11912 "11000110" // /* MW 2 */ + 11913 "11001000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 19 + 11914 "01000100" // MOVXM p6, #509024 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11915 "11000000" // /* MW 5 */ + 11916 "11001000" // /* MW 4 */ + 11917 "11001100" // /* MW 3 */ + 11918 "00000111" // /* MW 2 */ + 11919 "00000000" // /* MW 1 */ + 11920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11921 "00000000" // /* MW 1 */ + 11922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11923 "00000000" // /* MW 1 */ + 11924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11925 "00000000" // /* MW 1 */ + 11926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11927 "00000000" // /* MW 1 */ + 11928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11929 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 11930 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11931 "00001000" // /* MW 3 */ + 11932 "01010001" // /* MW 2 */ + 11933 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 40 first + 11934 "10011000" // LDA r17, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11935 "00110110" // /* MW 3 */ + 11936 "11110110" // /* MW 2 */ + 11937 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 + 11938 "00011000" // LDA p2, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11939 "00011001" // /* MW 3 */ + 11940 "11101101" // /* MW 2 */ + 11941 "00000111" // /* MW 1 */ + 11942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11943 "00000000" // /* MW 1 */ + 11944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11945 "00000000" // /* MW 1 */ + 11946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11947 "00000000" // /* MW 1 */ + 11948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11949 "00000000" // /* MW 1 */ + 11950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11951 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 + 11952 "10011000" // SUB r17, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11953 "00010001" // /* MW 3 */ + 11954 "00100011" // /* MW 2 */ + 11955 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 first +.src_ref 1 "io_buffer_main.h" 327 28 + 11956 "00001100" // LDA r17, [p2, #20]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11957 "01100011" // /* MW 5 */ + 11958 "11101100" // /* MW 4 */ + 11959 "11010011" // /* MW 3 */ + 11960 "11000110" // /* MW 2 */ + 11961 "01001010" // /* MW 1 */ + 11962 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11963 "00000000" // /* MW 1 */ + 11964 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11965 "00000000" // /* MW 1 */ + 11966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11967 "00000000" // /* MW 1 */ + 11968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11969 "00000000" // /* MW 1 */ + 11970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11971 "00000000" // /* MW 1 */ + 11972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11973 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 11974 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11975 "00001000" // /* MW 3 */ + 11976 "01010001" // /* MW 2 */ + 11977 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 6 +.src_ref 7 "superkernels.cpp" 398 14 +.src_ref 1 "io_buffer_main.h" 327 40 first + 11978 "10111010" // LDA r19, [p7, #-8]; MOVXM p1, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11979 "00010000" // /* MW 9 */ + 11980 "00100000" // /* MW 8 */ + 11981 "10110010" // /* MW 7 */ + 11982 "11110000" // /* MW 6 */ + 11983 "00000001" // /* MW 5 */ + 11984 "00000000" // /* MW 4 */ + 11985 "11010000" // /* MW 3 */ + 11986 "11001110" // /* MW 2 */ + 11987 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 19 first + 11988 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11989 "01010110" // /* MW 3 */ + 11990 "00000110" // /* MW 2 */ + 11991 "00000110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 6 + 11992 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11993 "00110110" // /* MW 3 */ + 11994 "00000110" // /* MW 2 */ + 11995 "00000001" // /* MW 1 */ + 11996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11997 "00000000" // /* MW 1 */ + 11998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11999 "00000000" // /* MW 1 */ + 12000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12001 "00000000" // /* MW 1 */ + 12002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12003 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 12004 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12005 "00110001" // /* MW 3 */ + 12006 "00100001" // /* MW 2 */ + 12007 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 12008 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12009 "00010001" // /* MW 3 */ + 12010 "11100110" // /* MW 2 */ + 12011 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 16 first + 12012 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12013 "00101000" // /* MW 3 */ + 12014 "01100001" // /* MW 2 */ + 12015 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 6 + 12016 "10000100" // JNZ r16, #12048 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12048 delay_slots=5 */ + 12017 "00000001" // /* MW 5 */ + 12018 "01000000" // /* MW 4 */ + 12019 "10001000" // /* MW 3 */ + 12020 "00010111" // /* MW 2 */ + 12021 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12023 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12025 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12027 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12029 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12031 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 398 14 + 12032 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12033 "00000001" // /* MW 3 */ + 12034 "00100000" // /* MW 2 */ + 12035 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 398 14 first + 12036 "00110110" // NOPA; NOPB; ST r16, [p1]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12037 "11000001" // /* MW 11 */ + 12038 "00001000" // /* MW 10 */ + 12039 "10000011" // /* MW 9 */ + 12040 "00000000" // /* MW 8 */ + 12041 "00000000" // /* MW 7 */ + 12042 "00000000" // /* MW 6 */ + 12043 "00100000" // /* MW 5 */ + 12044 "00000000" // /* MW 4 */ + 12045 "11110000" // /* MW 3 */ + 12046 "00101100" // /* MW 2 */ + 12047 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 +.src_ref 7 "superkernels.cpp" 400 + 12048 "00011000" // LDA lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12049 "00111001" // /* MW 3 */ + 12050 "11110000" // /* MW 2 */ + 12051 "00000111" // /* MW 1 */ + 12052 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12053 "11110001" // /* MW 3 */ + 12054 "11111101" // /* MW 2 */ + 12055 "00000111" // /* MW 1 */ + 12056 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12057 "10011001" // /* MW 3 */ + 12058 "11110111" // /* MW 2 */ + 12059 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 12060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12061 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.noswbrkpt + 12062 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12063 "11010001" // /* MW 3 */ + 12064 "11111001" // /* MW 2 */ + 12065 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 12066 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12067 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 12068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12069 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 400 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 12070 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12071 "00000000" // /* MW 3 */ + 12072 "00101000" // /* MW 2 */ + 12073 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12074 "00011000" // MOVS p6, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12075 "00001011" // /* MW 3 */ + 12076 "10001110" // /* MW 2 */ + 12077 "00001110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 400 +.delay_slot + 12078 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12079 "00000001" // /* MW 5 */ + 12080 "00000000" // /* MW 4 */ + 12081 "00000000" // /* MW 3 */ + 12082 "11111000" // /* MW 2 */ + 12083 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12085 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12087 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 + 12089 "00000000" // /* MW 1 */ +.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh +.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_begin0 +.function setup_conv2d_dw_params_bf16 _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh +.src_ref 2 "conv2d_dw_bf16_params.h" 211 first +.src_ref 2 "conv2d_dw_bf16_params.h" 215 15 +.src_ref 2 "conv2d_dw_bf16_params.h" 215 17 first +.function_start + 12096 "10111010" // LDA el0, [p0], #4; MOVXM p1, #509824 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12097 "00010000" // /* MW 9 */ + 12098 "11000000" // /* MW 8 */ + 12099 "10110011" // /* MW 7 */ + 12100 "11110000" // /* MW 6 */ + 12101 "00000001" // /* MW 5 */ + 12102 "00000000" // /* MW 4 */ + 12103 "11010000" // /* MW 3 */ + 12104 "10000101" // /* MW 2 */ + 12105 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 17 first +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.src_ref 2 "conv2d_dw_bf16_params.h" 218 80 + 12106 "10111010" // LDA eh0, [p0], #4; MOVX r16, #2; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12107 "01011000" // /* MW 9 */ + 12108 "00000000" // /* MW 8 */ + 12109 "00001000" // /* MW 7 */ + 12110 "01001011" // /* MW 6 */ + 12111 "00000000" // /* MW 5 */ + 12112 "00000001" // /* MW 4 */ + 12113 "11010000" // /* MW 3 */ + 12114 "10000001" // /* MW 2 */ + 12115 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 211 + 12116 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12117 "00000001" // /* MW 5 */ + 12118 "00000000" // /* MW 4 */ + 12119 "00000000" // /* MW 3 */ + 12120 "00001000" // /* MW 2 */ + 12121 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 32 + 12122 "00111010" // ST p7, [sp, #-12]; MOVXM p7, #509824 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12123 "00010001" // /* MW 9 */ + 12124 "11000000" // /* MW 8 */ + 12125 "10110011" // /* MW 7 */ + 12126 "11110011" // /* MW 6 */ + 12127 "00000001" // /* MW 5 */ + 12128 "00000000" // /* MW 4 */ + 12129 "10110000" // /* MW 3 */ + 12130 "11110011" // /* MW 2 */ + 12131 "11111110" // /* MW 1 */ + 12132 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12133 "00111101" // /* MW 3 */ + 12134 "11111100" // /* MW 2 */ + 12135 "00001111" // /* MW 1 */ + 12136 "10011000" // ST r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12137 "11110101" // /* MW 3 */ + 12138 "11111001" // /* MW 2 */ + 12139 "00001111" // /* MW 1 */ + 12140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12141 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 15 + 12142 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12143 "00101001" // /* MW 3 */ + 12144 "00011100" // /* MW 2 */ + 12145 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 15 + 12146 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12147 "00001001" // /* MW 3 */ + 12148 "00011100" // /* MW 2 */ + 12149 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 17 + 12150 "10011000" // LDA el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12151 "00101110" // /* MW 3 */ + 12152 "00000100" // /* MW 2 */ + 12153 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 17 + 12154 "10011000" // LDA eh0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12155 "00001110" // /* MW 3 */ + 12156 "00010100" // /* MW 2 */ + 12157 "00000000" // /* MW 1 */ + 12158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12159 "00000000" // /* MW 1 */ + 12160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12161 "00000000" // /* MW 1 */ + 12162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12163 "00000000" // /* MW 1 */ + 12164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12165 "00000000" // /* MW 1 */ + 12166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12167 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 15 + 12168 "10011000" // ST el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12169 "00101001" // /* MW 3 */ + 12170 "00000100" // /* MW 2 */ + 12171 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 215 15 + 12172 "10011000" // ST eh0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12173 "00001001" // /* MW 3 */ + 12174 "00010100" // /* MW 2 */ + 12175 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 32 first + 12176 "10011000" // LDA.u8 r17, [p7], #5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12177 "00101010" // /* MW 3 */ + 12178 "01011110" // /* MW 2 */ + 12179 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 52 + 12180 "10011000" // LDA.u8 r18, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12181 "01001010" // /* MW 3 */ + 12182 "11101110" // /* MW 2 */ + 12183 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 80 + 12184 "10011000" // LDA.u8 r1, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12185 "00101010" // /* MW 3 */ + 12186 "11101100" // /* MW 2 */ + 12187 "00000111" // /* MW 1 */ + 12188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12189 "00000000" // /* MW 1 */ + 12190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12191 "00000000" // /* MW 1 */ + 12192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12193 "00000000" // /* MW 1 */ + 12194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12195 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.no_stack_arguments + 12196 "00000100" // JL #14224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=14224 delay_slots=5 */ + 12197 "00000001" // /* MW 5 */ + 12198 "00000000" // /* MW 4 */ + 12199 "11001000" // /* MW 3 */ + 12200 "00011011" // /* MW 2 */ + 12201 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 38 +.delay_slot + 12202 "01011100" // ST r18, [sp, #-28]; SUB r15, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12203 "01000011" // /* MW 5 */ + 12204 "10111110" // /* MW 4 */ + 12205 "10111000" // /* MW 3 */ + 12206 "11001010" // /* MW 2 */ + 12207 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 80 +.delay_slot + 12208 "01011100" // ST r1, [sp, #-20]; NE r16, r1, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12209 "00010001" // /* MW 5 */ + 12210 "11000010" // /* MW 4 */ + 12211 "10110000" // /* MW 3 */ + 12212 "10000110" // /* MW 2 */ + 12213 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.delay_slot + 12214 "01011100" // ST r16, [sp, #-16]; LT r27, r15, r24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12215 "00010101" // /* MW 5 */ + 12216 "11101111" // /* MW 4 */ + 12217 "10110111" // /* MW 3 */ + 12218 "01000010" // /* MW 2 */ + 12219 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.delay_slot + 12220 "10011000" // SUB r17, r24, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12221 "11110001" // /* MW 3 */ + 12222 "00100010" // /* MW 2 */ + 12223 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.delay_slot + 12224 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r0, r15, r17, r27; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12225 "00000000" // /* MW 15 */ + 12226 "00000000" // /* MW 14 */ + 12227 "01111000" // /* MW 13 */ + 12228 "10100101" // /* MW 12 */ + 12229 "00000001" // /* MW 11 */ + 12230 "10010000" // /* MW 10 */ + 12231 "00001000" // /* MW 9 */ + 12232 "00011110" // /* MW 8 */ + 12233 "01011011" // /* MW 7 */ + 12234 "00000001" // /* MW 6 */ + 12235 "00100000" // /* MW 5 */ + 12236 "00000000" // /* MW 4 */ + 12237 "11110000" // /* MW 3 */ + 12238 "00101100" // /* MW 2 */ + 12239 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.return_address + 12240 "00101100" // LDA r20, [sp, #-20]; MOVX r16, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12241 "00000010" // /* MW 5 */ + 12242 "01000000" // /* MW 4 */ + 12243 "00100000" // /* MW 3 */ + 12244 "11010010" // /* MW 2 */ + 12245 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 +.src_ref 2 "conv2d_dw_bf16_params.h" 219 32 first + 12246 "00101100" // LDA.u8 r17, [p7], #3; SUB r18, r16, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12247 "01000011" // /* MW 5 */ + 12248 "01001000" // /* MW 4 */ + 12249 "01011000" // /* MW 3 */ + 12250 "11000101" // /* MW 2 */ + 12251 "11100111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 52 + 12252 "10011000" // LDA.u8 r19, [p7], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12253 "01101010" // /* MW 3 */ + 12254 "11101110" // /* MW 2 */ + 12255 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 + 12256 "00011000" // LDA r1, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12257 "00110001" // /* MW 3 */ + 12258 "11101100" // /* MW 2 */ + 12259 "00000111" // /* MW 1 */ + 12260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12261 "00000000" // /* MW 1 */ + 12262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12263 "00000000" // /* MW 1 */ + 12264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12265 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 first + 12266 "10011000" // XOR r20, r15, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12267 "01000110" // /* MW 3 */ + 12268 "11101001" // /* MW 2 */ + 12269 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 + 12270 "10011000" // LT r27, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12271 "00001010" // /* MW 3 */ + 12272 "00110111" // /* MW 2 */ + 12273 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 38 first + 12274 "01011100" // ST r19, [sp, #-24]; SUB r17, r17, r19 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12275 "01100011" // /* MW 5 */ + 12276 "11000110" // /* MW 4 */ + 12277 "10111000" // /* MW 3 */ + 12278 "01001110" // /* MW 2 */ + 12279 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.no_stack_arguments + 12280 "00111010" // ST r17, [sp, #-32]; JL #14224 /* MW 10 */ /* control_operation: words=10 call unconditional cycles_taken=1 direct absolute target_address=14224 delay_slots=5 */ + 12281 "01000001" // /* MW 9 */ + 12282 "00000000" // /* MW 8 */ + 12283 "00000000" // /* MW 7 */ + 12284 "11110010" // /* MW 6 */ + 12285 "00000110" // /* MW 5 */ + 12286 "00000000" // /* MW 4 */ + 12287 "10110000" // /* MW 3 */ + 12288 "01000110" // /* MW 2 */ + 12289 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 first +.delay_slot + 12290 "00011000" // SEL.EQZ r20, r2, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12291 "00100010" // /* MW 3 */ + 12292 "10101001" // /* MW 2 */ + 12293 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first +.delay_slot + 12294 "10011000" // LT r27, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12295 "00001010" // /* MW 3 */ + 12296 "01110111" // /* MW 2 */ + 12297 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.delay_slot + 12298 "10011000" // SUB r18, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12299 "00010001" // /* MW 3 */ + 12300 "00100101" // /* MW 2 */ + 12301 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 66 first +.delay_slot + 12302 "00011000" // EXTEND.s16 r19, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12303 "01110000" // /* MW 3 */ + 12304 "00100110" // /* MW 2 */ + 12305 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 218 87 +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first +.delay_slot + 12306 "01111110" // NOPA; NOPB; NOPS; SEL.EQZ r0, r17, r18, r27; ADD.NC r15, r19, #1 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 12307 "01100000" // /* MW 13 */ + 12308 "00101011" // /* MW 12 */ + 12309 "00000000" // /* MW 11 */ + 12310 "00001001" // /* MW 10 */ + 12311 "10011000" // /* MW 9 */ + 12312 "00111101" // /* MW 8 */ + 12313 "00100010" // /* MW 7 */ + 12314 "01000001" // /* MW 6 */ + 12315 "00100100" // /* MW 5 */ + 12316 "00000000" // /* MW 4 */ + 12317 "11110000" // /* MW 3 */ + 12318 "00101100" // /* MW 2 */ + 12319 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.src_ref 2 "conv2d_dw_bf16_params.h" 226 50 +.src_ref 2 "conv2d_dw_bf16_params.h" 231 68 +.return_address + 12320 "10111010" // LDA r3, [sp, #-32]; MOVX r19, #-2; MOV m0, #66 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12321 "01011000" // /* MW 9 */ + 12322 "01000010" // /* MW 8 */ + 12323 "00000000" // /* MW 7 */ + 12324 "11001000" // /* MW 6 */ + 12325 "00110111" // /* MW 5 */ + 12326 "00111111" // /* MW 4 */ + 12327 "00100000" // /* MW 3 */ + 12328 "00001110" // /* MW 2 */ + 12329 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.src_ref 2 "conv2d_dw_bf16_params.h" 220 23 +.src_ref 2 "conv2d_dw_bf16_params.h" 220 84 +.src_ref 2 "conv2d_dw_bf16_params.h" 232 45 +.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 +.src_ref 2 "conv2d_dw_bf16_params.h" 234 64 +.src_ref 2 "conv2d_dw_bf16_params.h" 243 100 +.src_ref 2 "conv2d_dw_bf16_params.h" 250 53 +.src_ref 2 "conv2d_dw_bf16_params.h" 253 63 +.src_ref 2 "conv2d_dw_bf16_params.h" 260 49 +.src_ref 2 "conv2d_dw_bf16_params.h" 264 47 + 12330 "10111010" // LDA r16, [sp, #-20]; MOVX r24, #0; MOV r1, #508 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12331 "01011000" // /* MW 9 */ + 12332 "11111100" // /* MW 8 */ + 12333 "00101001" // /* MW 7 */ + 12334 "00001000" // /* MW 6 */ + 12335 "10000000" // /* MW 5 */ + 12336 "00000001" // /* MW 4 */ + 12337 "00100000" // /* MW 3 */ + 12338 "11000010" // /* MW 2 */ + 12339 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 220 74 +.src_ref 2 "conv2d_dw_bf16_params.h" 234 64 +.src_ref 2 "conv2d_dw_bf16_params.h" 246 52 +.src_ref 2 "conv2d_dw_bf16_params.h" 253 53 + 12340 "10111010" // LDA r22, [sp, #-28]; MOVX r6, #4; MOV r4, #2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12341 "01011000" // /* MW 9 */ + 12342 "00000010" // /* MW 8 */ + 12343 "10001000" // /* MW 7 */ + 12344 "10001000" // /* MW 6 */ + 12345 "01100000" // /* MW 5 */ + 12346 "00000000" // /* MW 4 */ + 12347 "00100000" // /* MW 3 */ + 12348 "11011010" // /* MW 2 */ + 12349 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 226 50 first +.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 +.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 + 12350 "10111010" // LDA.u8 r17, [p7], m0; MOVX r5, #8; MOV r28, #23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12351 "01011000" // /* MW 9 */ + 12352 "00010111" // /* MW 8 */ + 12353 "10001000" // /* MW 7 */ + 12354 "00001011" // /* MW 6 */ + 12355 "01010001" // /* MW 5 */ + 12356 "00000000" // /* MW 4 */ + 12357 "01010000" // /* MW 3 */ + 12358 "01000101" // /* MW 2 */ + 12359 "11100001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 231 78 +.src_ref 2 "conv2d_dw_bf16_params.h" 232 39 +.src_ref 2 "conv2d_dw_bf16_params.h" 232 76 + 12360 "10111010" // LDA r21, [sp, #-24]; MOVX r18, #-6; MOV m1, #32 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12361 "01011000" // /* MW 9 */ + 12362 "00100000" // /* MW 8 */ + 12363 "10000000" // /* MW 7 */ + 12364 "01001000" // /* MW 6 */ + 12365 "00100111" // /* MW 5 */ + 12366 "00111111" // /* MW 4 */ + 12367 "00100000" // /* MW 3 */ + 12368 "01010110" // /* MW 2 */ + 12369 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 235 50 +.src_ref 2 "conv2d_dw_bf16_params.h" 235 50 +.src_ref 2 "conv2d_dw_bf16_params.h" 242 16 +.src_ref 2 "conv2d_dw_bf16_params.h" 242 63 +.src_ref 2 "conv2d_dw_bf16_params.h" 243 93 +.src_ref 2 "conv2d_dw_bf16_params.h" 250 53 +.src_ref 2 "conv2d_dw_bf16_params.h" 255 73 +.src_ref 2 "conv2d_dw_bf16_params.h" 260 49 +.src_ref 2 "conv2d_dw_bf16_params.h" 264 47 + 12370 "10111010" // LDA r30, [sp, #-16]; MOVX r23, #6; MOV r26, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12371 "01011000" // /* MW 9 */ + 12372 "00000001" // /* MW 8 */ + 12373 "01001000" // /* MW 7 */ + 12374 "11001011" // /* MW 6 */ + 12375 "01110000" // /* MW 5 */ + 12376 "00000001" // /* MW 4 */ + 12377 "00100000" // /* MW 3 */ + 12378 "01111010" // /* MW 2 */ + 12379 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 +.src_ref 2 "conv2d_dw_bf16_params.h" 239 42 +.src_ref 2 "conv2d_dw_bf16_params.h" 242 71 +.src_ref 2 "conv2d_dw_bf16_params.h" 248 72 +.src_ref 2 "conv2d_dw_bf16_params.h" 253 63 +.src_ref 2 "conv2d_dw_bf16_params.h" 264 41 + 12380 "10111010" // MOVA m0, #-178; MOVX r29, #128; MOV r31, #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12381 "01011000" // /* MW 9 */ + 12382 "11000000" // /* MW 8 */ + 12383 "11101111" // /* MW 7 */ + 12384 "00001011" // /* MW 6 */ + 12385 "11010000" // /* MW 5 */ + 12386 "00000101" // /* MW 4 */ + 12387 "10000000" // /* MW 3 */ + 12388 "11000000" // /* MW 2 */ + 12389 "11101001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first + 12390 "10011000" // SUB r20, r24, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12391 "00100001" // /* MW 3 */ + 12392 "00101000" // /* MW 2 */ + 12393 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 + 12394 "10011000" // XOR r3, r3, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12395 "00000110" // /* MW 3 */ + 12396 "11000111" // /* MW 2 */ + 12397 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.src_ref 2 "conv2d_dw_bf16_params.h" 220 74 + 12398 "00100100" // LT r27, r3, r24; ADD.NC r0, r22, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12399 "00000010" // /* MW 5 */ + 12400 "00110110" // /* MW 4 */ + 12401 "01010000" // /* MW 3 */ + 12402 "11110001" // /* MW 2 */ + 12403 "00011110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 +.src_ref 2 "conv2d_dw_bf16_params.h" 243 42 +.src_ref 2 "conv2d_dw_bf16_params.h" 247 69 + 12404 "01100100" // SEL.EQZ r20, r2, r20, r27; MOV r22, #-3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12405 "11110101" // /* MW 5 */ + 12406 "00111111" // /* MW 4 */ + 12407 "01001011" // /* MW 3 */ + 12408 "00101000" // /* MW 2 */ + 12409 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 220 23 first +.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 + 12410 "01100100" // MUL r3, r15, r16; MOV r2, #7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12411 "00011101" // /* MW 5 */ + 12412 "00100000" // /* MW 4 */ + 12413 "11110001" // /* MW 3 */ + 12414 "11100001" // /* MW 2 */ + 12415 "01111000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 67 first + 12416 "00011000" // EXTEND.s16 r20, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12417 "01110000" // /* MW 3 */ + 12418 "00101000" // /* MW 2 */ + 12419 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 220 84 first +.src_ref 2 "conv2d_dw_bf16_params.h" 231 68 + 12420 "00100100" // AND r0, r1, r0; ADD.NC r1, r0, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12421 "00000001" // /* MW 5 */ + 12422 "10100000" // /* MW 4 */ + 12423 "10010000" // /* MW 3 */ + 12424 "00000000" // /* MW 2 */ + 12425 "00001000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 219 88 first +.src_ref 2 "conv2d_dw_bf16_params.h" 231 68 first + 12426 "00100100" // LSHL r19, r1, r19; ADD.NC r27, r20, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12427 "00000001" // /* MW 5 */ + 12428 "10110100" // /* MW 4 */ + 12429 "10111101" // /* MW 3 */ + 12430 "11100111" // /* MW 2 */ + 12431 "00001100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 220 44 first +.src_ref 2 "conv2d_dw_bf16_params.h" 253 53 first + 12432 "10100100" // LSHL r20, r15, r6; ADD.NC r1, r3, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12433 "00000010" // /* MW 5 */ + 12434 "10100011" // /* MW 4 */ + 12435 "10110000" // /* MW 3 */ + 12436 "00001101" // /* MW 2 */ + 12437 "01111101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 240 70 +.src_ref 2 "conv2d_dw_bf16_params.h" 246 52 first + 12438 "00100100" // LSHL r7, r1, r6; ADD.NC r0, r21, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12439 "11111111" // /* MW 5 */ + 12440 "00110101" // /* MW 4 */ + 12441 "10110000" // /* MW 3 */ + 12442 "11001101" // /* MW 2 */ + 12443 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 232 45 first + 12444 "10011000" // MUL r6, r27, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12445 "00001111" // /* MW 3 */ + 12446 "11001101" // /* MW 2 */ + 12447 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 226 22 first + 12448 "10011000" // MUL r15, r15, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12449 "00011111" // /* MW 3 */ + 12450 "11011111" // /* MW 2 */ + 12451 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 231 78 first +.src_ref 2 "conv2d_dw_bf16_params.h" 238 79 + 12452 "00100100" // MUL r21, r19, r21; ADD.NC r19, r19, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12453 "11111111" // /* MW 5 */ + 12454 "10110011" // /* MW 4 */ + 12455 "11111001" // /* MW 3 */ + 12456 "01101011" // /* MW 2 */ + 12457 "10011101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 234 64 first + 12458 "10011000" // EQ r27, r4, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12459 "00000111" // /* MW 3 */ + 12460 "00110111" // /* MW 2 */ + 12461 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 231 39 first +.src_ref 2 "conv2d_dw_bf16_params.h" 232 55 first + 12462 "01011100" // ST r21, [p7], #-4; MUL r4, r15, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12463 "11011111" // /* MW 5 */ + 12464 "10010000" // /* MW 4 */ + 12465 "00110111" // /* MW 3 */ + 12466 "11010110" // /* MW 2 */ + 12467 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 first + 12468 "00011000" // SEL.EQZ r28, r28, r5, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12469 "01010010" // /* MW 3 */ + 12470 "00111000" // /* MW 2 */ + 12471 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 232 76 first + 12472 "10011000" // LSHL r18, r4, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12473 "00101101" // /* MW 3 */ + 12474 "00100101" // /* MW 2 */ + 12475 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 227 22 first +.src_ref 2 "conv2d_dw_bf16_params.h" 232 39 + 12476 "01011100" // ST r18, [p7], m1; MUL r18, r17, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12477 "00111111" // /* MW 5 */ + 12478 "11001000" // /* MW 4 */ + 12479 "00111000" // /* MW 3 */ + 12480 "01001010" // /* MW 2 */ + 12481 "11100101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 234 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 235 50 first + 12482 "01011100" // ST r28, [p7], #-16; LSHL r28, r30, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12483 "11111011" // /* MW 5 */ + 12484 "01110010" // /* MW 4 */ + 12485 "00111111" // /* MW 3 */ + 12486 "11110010" // /* MW 2 */ + 12487 "11111001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 235 47 +.src_ref 2 "conv2d_dw_bf16_params.h" 243 53 first + 12488 "01011100" // ST r28, [p7], #24; MUL r28, r18, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12489 "00011111" // /* MW 5 */ + 12490 "01110000" // /* MW 4 */ + 12491 "00111001" // /* MW 3 */ + 12492 "11110010" // /* MW 2 */ + 12493 "11101101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 238 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 242 63 first + 12494 "01011100" // ST r19, [p7], #4; LSHL r19, r19, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12495 "11111011" // /* MW 5 */ + 12496 "11001110" // /* MW 4 */ + 12497 "00111001" // /* MW 3 */ + 12498 "11001110" // /* MW 2 */ + 12499 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 242 71 +.src_ref 2 "conv2d_dw_bf16_params.h" 243 93 first + 12500 "10100100" // LSHL r28, r28, r26; ADD.NC r19, r19, r29 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12501 "11101010" // /* MW 5 */ + 12502 "10110011" // /* MW 4 */ + 12503 "10111001" // /* MW 3 */ + 12504 "00110101" // /* MW 2 */ + 12505 "11100111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 239 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 242 16 first + 12506 "01011100" // ST r31, [p7], #4; LSHL r30, r18, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12507 "01011011" // /* MW 5 */ + 12508 "01111011" // /* MW 4 */ + 12509 "00111001" // /* MW 3 */ + 12510 "11111110" // /* MW 2 */ + 12511 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 243 100 first +.src_ref 2 "conv2d_dw_bf16_params.h" 250 53 first + 12512 "10100100" // MUL r16, r18, r16; ADD.NC r18, r19, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12513 "11100010" // /* MW 5 */ + 12514 "00110011" // /* MW 4 */ + 12515 "11111001" // /* MW 3 */ + 12516 "00100001" // /* MW 2 */ + 12517 "10010100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 234 45 first +.src_ref 2 "conv2d_dw_bf16_params.h" 240 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 253 63 first + 12518 "01011100" // ST r0, [p7], #4; SEL.EQZ r28, r31, r24, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12519 "00000100" // /* MW 5 */ + 12520 "11110011" // /* MW 4 */ + 12521 "00111111" // /* MW 3 */ + 12522 "10000010" // /* MW 2 */ + 12523 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 247 69 first + 12524 "10011000" // LSHL r31, r3, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12525 "01101101" // /* MW 3 */ + 12526 "11111111" // /* MW 2 */ + 12527 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 242 23 first +.src_ref 2 "conv2d_dw_bf16_params.h" 247 73 + 12528 "00100100" // SUB r1, r30, r19; ADD.NC r19, r31, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12529 "11111111" // /* MW 5 */ + 12530 "10111111" // /* MW 4 */ + 12531 "00111001" // /* MW 3 */ + 12532 "01100110" // /* MW 2 */ + 12533 "11110000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 241 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 243 42 first + 12534 "01011100" // ST r1, [p7], #4; LSHL r17, r17, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12535 "11011011" // /* MW 5 */ + 12536 "11000110" // /* MW 4 */ + 12537 "00111000" // /* MW 3 */ + 12538 "10000110" // /* MW 2 */ + 12539 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 243 100 +.src_ref 2 "conv2d_dw_bf16_params.h" 245 77 first + 12540 "00100100" // SUB r22, r24, r18; ADD.NC r18, r17, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12541 "11111111" // /* MW 5 */ + 12542 "00110001" // /* MW 4 */ + 12543 "00111001" // /* MW 3 */ + 12544 "10100100" // /* MW 2 */ + 12545 "11000101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 243 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 248 72 first + 12546 "01011100" // ST r22, [p7], #4; SUB r22, r7, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12547 "11000011" // /* MW 5 */ + 12548 "11011011" // /* MW 4 */ + 12549 "00110011" // /* MW 3 */ + 12550 "11011010" // /* MW 2 */ + 12551 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 245 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 250 53 first + 12552 "01011100" // ST r18, [p7], #4; LSHL r16, r16, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12553 "01011011" // /* MW 5 */ + 12554 "01000011" // /* MW 4 */ + 12555 "00111000" // /* MW 3 */ + 12556 "11001010" // /* MW 2 */ + 12557 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 246 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 + 12558 "01011100" // ST r7, [p7], #4; LSHL r31, r19, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12559 "01011011" // /* MW 5 */ + 12560 "11111100" // /* MW 4 */ + 12561 "00111001" // /* MW 3 */ + 12562 "10011110" // /* MW 2 */ + 12563 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 247 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 248 72 first + 12564 "01011100" // ST r19, [p7], #4; ADD r22, r29, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12565 "11000001" // /* MW 5 */ + 12566 "11011010" // /* MW 4 */ + 12567 "00111110" // /* MW 3 */ + 12568 "11001110" // /* MW 2 */ + 12569 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 first +.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 first + 12570 "10100100" // ADD r16, r7, r16; ADD.NC r29, r31, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12571 "11110010" // /* MW 5 */ + 12572 "10111111" // /* MW 4 */ + 12573 "00011110" // /* MW 3 */ + 12574 "00100000" // /* MW 2 */ + 12575 "00111100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 248 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 250 61 + 12576 "01011100" // ST r22, [p7], #4; SUB r16, r16, r29 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12577 "10100011" // /* MW 5 */ + 12578 "01000011" // /* MW 4 */ + 12579 "00111000" // /* MW 3 */ + 12580 "11011010" // /* MW 2 */ + 12581 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 249 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 255 73 first +.src_ref 2 "conv2d_dw_bf16_params.h" 258 116 +.src_ref 2 "conv2d_dw_bf16_params.h" 258 140 + 12582 "00111010" // ST r16, [p7], #4; LSHL r22, r15, r26; MOV r16, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12583 "01011001" // /* MW 9 */ + 12584 "11111111" // /* MW 8 */ + 12585 "00001111" // /* MW 7 */ + 12586 "01101110" // /* MW 6 */ + 12587 "01101101" // /* MW 5 */ + 12588 "00011111" // /* MW 4 */ + 12589 "00110000" // /* MW 3 */ + 12590 "11000010" // /* MW 2 */ + 12591 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 252 43 first +.src_ref 2 "conv2d_dw_bf16_params.h" 253 60 first + 12592 "01011100" // ST r18, [p7], #4; ADD r26, r28, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12593 "10000001" // /* MW 5 */ + 12594 "01101010" // /* MW 4 */ + 12595 "00111110" // /* MW 3 */ + 12596 "11001010" // /* MW 2 */ + 12597 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 253 43 +.src_ref 2 "conv2d_dw_bf16_params.h" 255 73 first + 12598 "01011100" // ST r26, [p7], #4; SUB r20, r20, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12599 "11000011" // /* MW 5 */ + 12600 "01010010" // /* MW 4 */ + 12601 "00111010" // /* MW 3 */ + 12602 "11101010" // /* MW 2 */ + 12603 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 254 43 first +.src_ref 2 "conv2d_dw_bf16_params.h" 255 73 +.src_ref 2 "conv2d_dw_bf16_params.h" 256 43 +.src_ref 2 "conv2d_dw_bf16_params.h" 258 116 first +.src_ref 2 "conv2d_dw_bf16_params.h" 258 140 first +.src_ref 2 "conv2d_dw_bf16_params.h" 259 43 +.src_ref 2 "conv2d_dw_bf16_params.h" 263 41 + 12604 "01110110" // MOVA r17, #64; ST r19, [p7], #4; MAC r16, r16, r21, r17; ADD.NC r19, r20, #64 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12605 "00001000" // /* MW 11 */ + 12606 "00010000" // /* MW 10 */ + 12607 "01101101" // /* MW 9 */ + 12608 "10110010" // /* MW 8 */ + 12609 "00001000" // /* MW 7 */ + 12610 "10101011" // /* MW 6 */ + 12611 "01110001" // /* MW 5 */ + 12612 "00011110" // /* MW 4 */ + 12613 "00000111" // /* MW 3 */ + 12614 "00010001" // /* MW 2 */ + 12615 "00001000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 255 43 first + 12616 "10011000" // ST r19, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12617 "01110001" // /* MW 3 */ + 12618 "00011110" // /* MW 2 */ + 12619 "00001111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 256 43 first +.src_ref 2 "conv2d_dw_bf16_params.h" 260 49 first + 12620 "01011100" // ST r17, [p7], #4; LSHL r20, r16, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12621 "11111011" // /* MW 5 */ + 12622 "01010010" // /* MW 4 */ + 12623 "00111000" // /* MW 3 */ + 12624 "11000110" // /* MW 2 */ + 12625 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 258 42 first +.src_ref 2 "conv2d_dw_bf16_params.h" 260 49 + 12626 "01011100" // ST r16, [p7], #4; SUB r16, r24, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12627 "10000011" // /* MW 5 */ + 12628 "01000010" // /* MW 4 */ + 12629 "00111100" // /* MW 3 */ + 12630 "11000010" // /* MW 2 */ + 12631 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 259 43 first +.src_ref 2 "conv2d_dw_bf16_params.h" 264 47 first + 12632 "01011100" // ST r17, [p7], #4; LSHL r20, r18, r23 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12633 "11111011" // /* MW 5 */ + 12634 "01010010" // /* MW 4 */ + 12635 "00111001" // /* MW 3 */ + 12636 "11000110" // /* MW 2 */ + 12637 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 260 43 first +.src_ref 2 "conv2d_dw_bf16_params.h" 264 47 + 12638 "01011100" // ST r16, [p7], #4; SUB r16, r24, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12639 "10000011" // /* MW 5 */ + 12640 "01000010" // /* MW 4 */ + 12641 "00111100" // /* MW 3 */ + 12642 "11000010" // /* MW 2 */ + 12643 "11100011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 262 40 first + 12644 "10011000" // ST r18, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12645 "01010001" // /* MW 3 */ + 12646 "00011110" // /* MW 2 */ + 12647 "00001111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 263 41 first + 12648 "10011000" // ST r17, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12649 "00110001" // /* MW 3 */ + 12650 "00011110" // /* MW 2 */ + 12651 "00001111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 264 41 first + 12652 "10011000" // ST r16, [p7], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12653 "00010001" // /* MW 3 */ + 12654 "00001010" // /* MW 2 */ + 12655 "00001111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 54 first + 12656 "10011000" // LDA.u8 r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12657 "00001010" // /* MW 3 */ + 12658 "00000110" // /* MW 2 */ + 12659 "00000111" // /* MW 1 */ + 12660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12661 "00000000" // /* MW 1 */ + 12662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12663 "00000000" // /* MW 1 */ + 12664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12665 "00000000" // /* MW 1 */ + 12666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12667 "00000000" // /* MW 1 */ + 12668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12669 "00000000" // /* MW 1 */ + 12670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12671 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.src_ref 2 "conv2d_dw_bf16_params.h" 266 58 + 12672 "10000100" // JZ r16, #12704 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12704 delay_slots=5 */ + 12673 "00000001" // /* MW 5 */ + 12674 "00000000" // /* MW 4 */ + 12675 "11010000" // /* MW 3 */ + 12676 "00011000" // /* MW 2 */ + 12677 "10000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.delay_slot + 12678 "11111000" // MOV vaddSign0, crMCDEn /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12679 "01100000" // /* MW 3 */ + 12680 "00111011" // /* MW 2 */ + 12681 "00011001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.delay_slot + 12682 "01000100" // MOVXM r19, #-8454144 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12683 "00000000" // /* MW 5 */ + 12684 "10100000" // /* MW 4 */ + 12685 "00001001" // /* MW 3 */ + 12686 "01111111" // /* MW 2 */ + 12687 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12689 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12691 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12693 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 + 12694 "01111010" // NOPA; NOPS; MOVX r19, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12695 "00000001" // /* MW 9 */ + 12696 "00100110" // /* MW 8 */ + 12697 "00000000" // /* MW 7 */ + 12698 "00000000" // /* MW 6 */ + 12699 "01011011" // /* MW 5 */ + 12700 "00000001" // /* MW 4 */ + 12701 "11110000" // /* MW 3 */ + 12702 "00101100" // /* MW 2 */ + 12703 "00000000" // /* MW 1 */ +.label TGT_F_ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh_608 +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.src_ref 2 "conv2d_dw_bf16_params.h" 267 + 12704 "10111010" // LDA lr, [sp, #-4]; MOVXM p0, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12705 "00010000" // /* MW 9 */ + 12706 "00110100" // /* MW 8 */ + 12707 "00110010" // /* MW 7 */ + 12708 "11110000" // /* MW 6 */ + 12709 "00000001" // /* MW 5 */ + 12710 "00000000" // /* MW 4 */ + 12711 "00100000" // /* MW 3 */ + 12712 "10000111" // /* MW 2 */ + 12713 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 + 12714 "11010100" // LDA.s8 r16, [p0]; VINSERT.32 x0, x0, #0, r19 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12715 "11100010" // /* MW 5 */ + 12716 "00000100" // /* MW 4 */ + 12717 "01010000" // /* MW 3 */ + 12718 "11000000" // /* MW 2 */ + 12719 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 39 + 12720 "01010100" // LDA p0, [sp, #-12]; MOV dj0, #186 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12721 "11101001" // /* MW 5 */ + 12722 "00000010" // /* MW 4 */ + 12723 "00100001" // /* MW 3 */ + 12724 "10000011" // /* MW 2 */ + 12725 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 + 12726 "11010100" // LDA r15, [sp, #-8]; VMOV bmll0, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12727 "00100101" // /* MW 5 */ + 12728 "00000001" // /* MW 4 */ + 12729 "00100000" // /* MW 3 */ + 12730 "00111110" // /* MW 2 */ + 12731 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 267 first + 12732 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12733 "00000001" // /* MW 5 */ + 12734 "00000000" // /* MW 4 */ + 12735 "00000000" // /* MW 3 */ + 12736 "11111000" // /* MW 2 */ + 12737 "11111111" // /* MW 1 */ + 12738 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12739 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 12740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12741 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 39 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 12742 "00011000" // ST.s16 r16, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12743 "00010111" // /* MW 3 */ + 12744 "00000010" // /* MW 2 */ + 12745 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.src_ref 2 "conv2d_dw_bf16_params.h" 267 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12746 "11100100" // RET lr; MOV crRnd, r16 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 12747 "01000001" // /* MW 5 */ + 12748 "01110000" // /* MW 4 */ + 12749 "00001111" // /* MW 3 */ + 12750 "00000000" // /* MW 2 */ + 12751 "00000101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12752 "00011000" // VCONV.bf16.fp32 wl0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12753 "00010110" // /* MW 3 */ + 12754 "01000000" // /* MW 2 */ + 12755 "00001000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12756 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12757 "11000000" // /* MW 3 */ + 12758 "01100000" // /* MW 2 */ + 12759 "00011111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16_params.h" 266 41 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12760 "10111000" // VEXTRACT.16 r16, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12761 "00000001" // /* MW 3 */ + 12762 "00000001" // /* MW 2 */ + 12763 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12765 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh__end +.label __ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh___func_end0 + 12767 "00000000" // /* MW 1 */ +.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_begin0 +.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params +.function conv2d_dw<(unsigned char)'\x01', bfloat16, bfloat16, bfloat16, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::async, adf::addressing::linear, adf::margin<0U> > > _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params +.src_ref 1 "io_buffer_main.h" 125 12 +.src_ref 2 "conv2d_dw_bf16.h" 199 first +.function_start + 12768 "11111000" // MOV r17, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12769 "11000000" // /* MW 3 */ + 12770 "01010110" // /* MW 2 */ + 12771 "00011100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 2 "conv2d_dw_bf16.h" 204 82 + 12772 "01010100" // LDA p1, [p1]; MOV m7, #106 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12773 "10101001" // /* MW 5 */ + 12774 "00000001" // /* MW 4 */ + 12775 "11011110" // /* MW 3 */ + 12776 "10010011" // /* MW 2 */ + 12777 "00100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 12 +.src_ref 1 "io_buffer_main.h" 125 25 + 12778 "00010100" // LDA p0, [p0]; ADD.NC p3, r17, #2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12779 "00000010" // /* MW 5 */ + 12780 "11010001" // /* MW 4 */ + 12781 "11010110" // /* MW 3 */ + 12782 "10000011" // /* MW 2 */ + 12783 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 204 82 first + 12784 "10011000" // LDA.u8 r4, [p3], m7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12785 "10001010" // /* MW 3 */ + 12786 "11101000" // /* MW 2 */ + 12787 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 221 4 first + 12788 "10011000" // LDA dj2, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12789 "01000110" // /* MW 3 */ + 12790 "11111101" // /* MW 2 */ + 12791 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 221 4 + 12792 "10011000" // LDA dn2, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12793 "00100110" // /* MW 3 */ + 12794 "00111101" // /* MW 2 */ + 12795 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 221 4 + 12796 "10011000" // LDA dj6, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12797 "01000110" // /* MW 3 */ + 12798 "11111111" // /* MW 2 */ + 12799 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 221 4 + 12800 "10011000" // LDA dn6, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12801 "00100110" // /* MW 3 */ + 12802 "00101111" // /* MW 2 */ + 12803 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 221 4 + 12804 "10011000" // LDA m2, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12805 "00000110" // /* MW 3 */ + 12806 "00101101" // /* MW 2 */ + 12807 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 222 4 first + 12808 "10011000" // LDA dj0, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12809 "01000110" // /* MW 3 */ + 12810 "11111100" // /* MW 2 */ + 12811 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 222 4 + 12812 "10011000" // LDA dn0, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12813 "00100110" // /* MW 3 */ + 12814 "00111100" // /* MW 2 */ + 12815 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 222 4 + 12816 "10011000" // LDA dj4, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12817 "01000110" // /* MW 3 */ + 12818 "11111110" // /* MW 2 */ + 12819 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 222 4 + 12820 "10011000" // LDA dn4, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12821 "00100110" // /* MW 3 */ + 12822 "00101110" // /* MW 2 */ + 12823 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 222 4 + 12824 "10011000" // LDA m0, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12825 "00000110" // /* MW 3 */ + 12826 "00101100" // /* MW 2 */ + 12827 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 223 4 first + 12828 "10011000" // LDA dj1, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12829 "11000110" // /* MW 3 */ + 12830 "11111100" // /* MW 2 */ + 12831 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 223 4 + 12832 "10011000" // LDA dn1, [p3], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12833 "10100110" // /* MW 3 */ + 12834 "00111100" // /* MW 2 */ + 12835 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 223 4 + 12836 "10011000" // LDA dj5, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12837 "11000110" // /* MW 3 */ + 12838 "11111110" // /* MW 2 */ + 12839 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 223 4 + 12840 "10011000" // LDA dn5, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12841 "10100110" // /* MW 3 */ + 12842 "00101110" // /* MW 2 */ + 12843 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 223 4 + 12844 "10011000" // LDA m1, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12845 "10000110" // /* MW 3 */ + 12846 "00101100" // /* MW 2 */ + 12847 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 224 4 first + 12848 "10011000" // LDA dj7, [p3], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12849 "11000110" // /* MW 3 */ + 12850 "11111111" // /* MW 2 */ + 12851 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 224 4 + 12852 "10011000" // LDA dn7, [p3], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12853 "10100110" // /* MW 3 */ + 12854 "00101111" // /* MW 2 */ + 12855 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 224 4 +.src_ref 2 "conv2d_dw_bf16.h" 244 56 + 12856 "10111010" // LDA m7, [p3], #8; MOVXM p4, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12857 "00010000" // /* MW 9 */ + 12858 "00110100" // /* MW 8 */ + 12859 "00110010" // /* MW 7 */ + 12860 "11110010" // /* MW 6 */ + 12861 "00000001" // /* MW 5 */ + 12862 "00000000" // /* MW 4 */ + 12863 "11010000" // /* MW 3 */ + 12864 "11110000" // /* MW 2 */ + 12865 "01100101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_dw_bf16.h" 244 56 first + 12866 "11010100" // LDA.s8 r6, [p4]; MOV p4, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12867 "10000001" // /* MW 5 */ + 12868 "11000101" // /* MW 4 */ + 12869 "01011000" // /* MW 3 */ + 12870 "10011000" // /* MW 2 */ + 12871 "10000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 225 4 + 12872 "10111000" // MOV m3, #-120 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12873 "00010000" // /* MW 3 */ + 12874 "00001111" // /* MW 2 */ + 12875 "00011011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 6 "aie_core.h" 81 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_dw_bf16.h" 204 43 + 12876 "10110110" // VLDA.CONV.fp32.bf16 cml0, [p4];VLDB x6, [p0], #64; MOVX r2, #3; MOV dc4, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12877 "01011000" // /* MW 11 */ + 12878 "00000000" // /* MW 10 */ + 12879 "01100000" // /* MW 9 */ + 12880 "01101010" // /* MW 8 */ + 12881 "00100000" // /* MW 7 */ + 12882 "00000000" // /* MW 6 */ + 12883 "01101000" // /* MW 5 */ + 12884 "00111011" // /* MW 4 */ + 12885 "01110000" // /* MW 3 */ + 12886 "10000101" // /* MW 2 */ + 12887 "10000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 6 "aie_core.h" 81 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 2 "conv2d_dw_bf16.h" 204 43 first +.src_ref 2 "conv2d_dw_bf16.h" 225 4 first +.src_ref 2 "conv2d_dw_bf16.h" 244 56 + 12888 "01111110" // LDA dj3, [p3], #-4; VLDB x1, [p0], #64; MOVS dc3, dc4; LSHL r2, r4, r2; MOV m6, #128 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 12889 "01100000" // /* MW 13 */ + 12890 "00001001" // /* MW 12 */ + 12891 "01100010" // /* MW 11 */ + 12892 "00001011" // /* MW 10 */ + 12893 "00010000" // /* MW 9 */ + 12894 "11100000" // /* MW 8 */ + 12895 "00101101" // /* MW 7 */ + 12896 "00000100" // /* MW 6 */ + 12897 "11101001" // /* MW 5 */ + 12898 "00111000" // /* MW 4 */ + 12899 "11010000" // /* MW 3 */ + 12900 "10111000" // /* MW 2 */ + 12901 "01111111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 2 "conv2d_dw_bf16.h" 204 43 +.src_ref 2 "conv2d_dw_bf16.h" 225 4 + 12902 "10111010" // LDA dn3, [p3], #8; MOVS dc1, dc3; MOV m5, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12903 "01110010" // /* MW 9 */ + 12904 "10010000" // /* MW 8 */ + 12905 "10000000" // /* MW 7 */ + 12906 "00000010" // /* MW 6 */ + 12907 "01001011" // /* MW 5 */ + 12908 "00001100" // /* MW 4 */ + 12909 "11010001" // /* MW 3 */ + 12910 "10110100" // /* MW 2 */ + 12911 "01100101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "conv2d_dw_bf16.h" 204 43 +.src_ref 2 "conv2d_dw_bf16.h" 225 4 + 12912 "10111010" // LDA m3, [p3], m3; PADDB [p1], m5; MOV dc7, dc1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12913 "01111110" // /* MW 9 */ + 12914 "11000000" // /* MW 8 */ + 12915 "11100001" // /* MW 7 */ + 12916 "00000011" // /* MW 6 */ + 12917 "10010000" // /* MW 5 */ + 12918 "10101011" // /* MW 4 */ + 12919 "11010001" // /* MW 3 */ + 12920 "00110000" // /* MW 2 */ + 12921 "01101101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 2 "conv2d_dw_bf16.h" 244 56 +.src_ref 2 "conv2d_dw_bf16.h" 244 56 first + 12922 "10111010" // LDA r2, [p3], m6; VLDB.2D x3, [p1], d7; MOV m4, #-112 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12923 "01011110" // /* MW 9 */ + 12924 "10010000" // /* MW 8 */ + 12925 "00000111" // /* MW 7 */ + 12926 "00000010" // /* MW 6 */ + 12927 "11110100" // /* MW 5 */ + 12928 "11110000" // /* MW 4 */ + 12929 "11010001" // /* MW 3 */ + 12930 "00001010" // /* MW 2 */ + 12931 "01111001" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 2 "conv2d_dw_bf16.h" 244 56 + 12932 "00101100" // LDA.s16 r7, [p3], m4; MOVX r0, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12933 "10000010" // /* MW 5 */ + 12934 "00000000" // /* MW 4 */ + 12935 "01010000" // /* MW 3 */ + 12936 "00011110" // /* MW 2 */ + 12937 "01110001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "conv2d_dw_bf16.h" 250 8 first + 12938 "01110110" // LDA m4, [p3], #16; MOVS dc6, dc4; MOVXM ls, #13040 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12939 "00010000" // /* MW 11 */ + 12940 "01111000" // /* MW 10 */ + 12941 "01111001" // /* MW 9 */ + 12942 "00001100" // /* MW 8 */ + 12943 "00000000" // /* MW 7 */ + 12944 "00000000" // /* MW 6 */ + 12945 "01001011" // /* MW 5 */ + 12946 "00010000" // /* MW 4 */ + 12947 "11010110" // /* MW 3 */ + 12948 "11000000" // /* MW 2 */ + 12949 "01101001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "conv2d_dw_bf16.h" 244 56 first +.src_ref 2 "conv2d_dw_bf16.h" 250 8 + 12950 "01110110" // LDA r4, [p3, #-28]; MOVS dc2, dc4; MOVXM le, #13136 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 12951 "00010000" // /* MW 11 */ + 12952 "10101000" // /* MW 10 */ + 12953 "10111001" // /* MW 9 */ + 12954 "00001101" // /* MW 8 */ + 12955 "00000000" // /* MW 7 */ + 12956 "00000000" // /* MW 6 */ + 12957 "01001011" // /* MW 5 */ + 12958 "00010000" // /* MW 4 */ + 12959 "11010010" // /* MW 3 */ + 12960 "10010010" // /* MW 2 */ + 12961 "01110010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 142 18 first + 12962 "10110100" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12963 "00000101" // /* MW 5 */ + 12964 "01100001" // /* MW 4 */ + 12965 "10000100" // /* MW 3 */ + 12966 "00010110" // /* MW 2 */ + 12967 "00001011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 270 12 + 12968 "11111000" // VMOV cml3, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12969 "10001010" // /* MW 3 */ + 12970 "00000000" // /* MW 2 */ + 12971 "00011011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 244 4 + 12972 "10111010" // LDA r5, [p3]; MOVXM p3, #13200 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12973 "00010000" // /* MW 9 */ + 12974 "11001000" // /* MW 8 */ + 12975 "10110001" // /* MW 7 */ + 12976 "00001101" // /* MW 6 */ + 12977 "00000000" // /* MW 5 */ + 12978 "00000000" // /* MW 4 */ + 12979 "11010000" // /* MW 3 */ + 12980 "10010110" // /* MW 2 */ + 12981 "01100000" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 2 "conv2d_dw_bf16.h" 268 12 first + 12982 "10111010" // NOPA; MOVX r1, #32; VEXTBCST.128 x10, x3, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12983 "10101000" // /* MW 9 */ + 12984 "00000001" // /* MW 8 */ + 12985 "10001110" // /* MW 7 */ + 12986 "00001010" // /* MW 6 */ + 12987 "00010100" // /* MW 5 */ + 12988 "00000000" // /* MW 4 */ + 12989 "11110000" // /* MW 3 */ + 12990 "00101100" // /* MW 2 */ + 12991 "00000000" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 2 "conv2d_dw_bf16.h" 270 12 +.src_ref 2 "conv2d_dw_bf16.h" 271 12 +.src_ref 2 "conv2d_dw_bf16.h" 272 12 +.src_ref 2 "conv2d_dw_bf16.h" 273 12 +.src_ref 2 "conv2d_dw_bf16.h" 274 12 +.src_ref 2 "conv2d_dw_bf16.h" 275 12 +.src_ref 2 "conv2d_dw_bf16.h" 276 12 +.src_ref 2 "conv2d_dw_bf16.h" 277 12 + 12992 "11100001" // MOVA r17, #60; NOPB; NOPS; MOVX r3, #48; VBCST.16 x0, r7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12993 "00000000" // /* MW 15 */ + 12994 "00000000" // /* MW 14 */ + 12995 "01111000" // /* MW 13 */ + 12996 "10111001" // /* MW 12 */ + 12997 "00001110" // /* MW 11 */ + 12998 "00001000" // /* MW 10 */ + 12999 "00110110" // /* MW 9 */ + 13000 "00000000" // /* MW 8 */ + 13001 "01011011" // /* MW 7 */ + 13002 "00000001" // /* MW 6 */ + 13003 "00100000" // /* MW 5 */ + 13004 "00000000" // /* MW 4 */ + 13005 "00000000" // /* MW 3 */ + 13006 "10010001" // /* MW 2 */ + 13007 "00000111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_dw_bf16.h" 265 12 first +.src_ref 2 "conv2d_dw_bf16.h" 270 12 first + 13008 "00001011" // NOPA; NOPB; MOVS dc0, dc4; MOVX crRnd, r6; VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13009 "01101010" // /* MW 15 */ + 13010 "01100011" // /* MW 14 */ + 13011 "10101100" // /* MW 13 */ + 13012 "00000011" // /* MW 12 */ + 13013 "00001110" // /* MW 11 */ + 13014 "00000010" // /* MW 10 */ + 13015 "11010100" // /* MW 9 */ + 13016 "00001101" // /* MW 8 */ + 13017 "01001011" // /* MW 7 */ + 13018 "00010000" // /* MW 6 */ + 13019 "00100000" // /* MW 5 */ + 13020 "00000000" // /* MW 4 */ + 13021 "11110000" // /* MW 3 */ + 13022 "00101100" // /* MW 2 */ + 13023 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 2 "conv2d_dw_bf16.h" 250 8 first +.src_ref 2 "conv2d_dw_bf16.h" 274 12 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 13024 "00001011" // LDA p2, [p2]; NOPB; MOVS dc5, dc4; ADD r2, r2, #-2; ADD.NC lc, r4, #-1; VMAC.f dm1, dm0, x1, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13025 "00011010" // /* MW 15 */ + 13026 "01001000" // /* MW 14 */ + 13027 "11001100" // /* MW 13 */ + 13028 "00111111" // /* MW 12 */ + 13029 "10111001" // /* MW 11 */ + 13030 "11011010" // /* MW 10 */ + 13031 "00101111" // /* MW 9 */ + 13032 "00000100" // /* MW 8 */ + 13033 "01001011" // /* MW 7 */ + 13034 "00010000" // /* MW 6 */ + 13035 "00100101" // /* MW 5 */ + 13036 "00000000" // /* MW 4 */ + 13037 "11010000" // /* MW 3 */ + 13038 "10100011" // /* MW 2 */ + 13039 "01000000" // /* MW 1 */ +.label ZLS_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_272 +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 142 18 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.noswbrkpt +.loop_nesting 1 + 13040 "10111010" // VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13041 "01101110" // /* MW 9 */ + 13042 "10000001" // /* MW 8 */ + 13043 "10000100" // /* MW 7 */ + 13044 "00000010" // /* MW 6 */ + 13045 "11110100" // /* MW 5 */ + 13046 "11110000" // /* MW 4 */ + 13047 "01110001" // /* MW 3 */ + 13048 "10110011" // /* MW 2 */ + 13049 "00000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "conv2d_dw_bf16.h" 266 12 first +.src_ref 2 "conv2d_dw_bf16.h" 271 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13050 "01001010" // VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13051 "00000001" // /* MW 9 */ + 13052 "10001001" // /* MW 8 */ + 13053 "10001010" // /* MW 7 */ + 13054 "01000110" // /* MW 6 */ + 13055 "00001011" // /* MW 5 */ + 13056 "10011100" // /* MW 4 */ + 13057 "11101010" // /* MW 3 */ + 13058 "00111000" // /* MW 2 */ + 13059 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 275 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13060 "01001010" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13061 "00000001" // /* MW 9 */ + 13062 "00110101" // /* MW 8 */ + 13063 "10001001" // /* MW 7 */ + 13064 "11000110" // /* MW 6 */ + 13065 "10000110" // /* MW 5 */ + 13066 "00110000" // /* MW 4 */ + 13067 "01101010" // /* MW 3 */ + 13068 "10110001" // /* MW 2 */ + 13069 "00000000" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13070 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13071 "00000110" // /* MW 3 */ + 13072 "10001001" // /* MW 2 */ + 13073 "00011101" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 2 "conv2d_dw_bf16.h" 272 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13074 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13075 "10100001" // /* MW 7 */ + 13076 "01001000" // /* MW 6 */ + 13077 "10001100" // /* MW 5 */ + 13078 "11000110" // /* MW 4 */ + 13079 "10001110" // /* MW 3 */ + 13080 "10110000" // /* MW 2 */ + 13081 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 267 12 first +.src_ref 2 "conv2d_dw_bf16.h" 276 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13082 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13083 "10100001" // /* MW 7 */ + 13084 "00110110" // /* MW 6 */ + 13085 "10001010" // /* MW 5 */ + 13086 "01000110" // /* MW 4 */ + 13087 "00001111" // /* MW 3 */ + 13088 "10011100" // /* MW 2 */ + 13089 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13090 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13091 "00001110" // /* MW 3 */ + 13092 "10001001" // /* MW 2 */ + 13093 "00011101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 268 12 first +.src_ref 2 "conv2d_dw_bf16.h" 273 12 first + 13094 "01100010" // VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13095 "11100001" // /* MW 7 */ + 13096 "10010010" // /* MW 6 */ + 13097 "10001011" // /* MW 5 */ + 13098 "01000110" // /* MW 4 */ + 13099 "00000011" // /* MW 3 */ + 13100 "00011100" // /* MW 2 */ + 13101 "00000101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 265 12 first +.src_ref 2 "conv2d_dw_bf16.h" 277 12 first + 13102 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13103 "11100001" // /* MW 7 */ + 13104 "01010110" // /* MW 6 */ + 13105 "10001000" // /* MW 5 */ + 13106 "01000110" // /* MW 4 */ + 13107 "00000111" // /* MW 3 */ + 13108 "00011100" // /* MW 2 */ + 13109 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first + 13110 "10111010" // NOPA; NOPB; VSHIFT x4, x6, x1, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13111 "01101110" // /* MW 9 */ + 13112 "01000001" // /* MW 8 */ + 13113 "00011000" // /* MW 7 */ + 13114 "00000001" // /* MW 6 */ + 13115 "00010000" // /* MW 5 */ + 13116 "00000000" // /* MW 4 */ + 13117 "11110000" // /* MW 3 */ + 13118 "00101100" // /* MW 2 */ + 13119 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 270 12 first + 13120 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm4, dm3, x6, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13121 "01101010" // /* MW 15 */ + 13122 "01100011" // /* MW 14 */ + 13123 "01111100" // /* MW 13 */ + 13124 "10100101" // /* MW 12 */ + 13125 "00000001" // /* MW 11 */ + 13126 "00000000" // /* MW 10 */ + 13127 "00000000" // /* MW 9 */ + 13128 "00000000" // /* MW 8 */ + 13129 "01011011" // /* MW 7 */ + 13130 "00000001" // /* MW 6 */ + 13131 "00100000" // /* MW 5 */ + 13132 "00000000" // /* MW 4 */ + 13133 "11110000" // /* MW 3 */ + 13134 "00101100" // /* MW 2 */ + 13135 "00000000" // /* MW 1 */ +.label ZLE_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_368 +.src_ref 2 "conv2d_dw_bf16.h" 274 12 first +.end_of_loop +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 13136 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13137 "00011010" // /* MW 15 */ + 13138 "01001000" // /* MW 14 */ + 13139 "01111100" // /* MW 13 */ + 13140 "10100101" // /* MW 12 */ + 13141 "00000001" // /* MW 11 */ + 13142 "00000000" // /* MW 10 */ + 13143 "00000000" // /* MW 9 */ + 13144 "00000000" // /* MW 8 */ + 13145 "01011011" // /* MW 7 */ + 13146 "00000001" // /* MW 6 */ + 13147 "00100000" // /* MW 5 */ + 13148 "00000000" // /* MW 4 */ + 13149 "11110000" // /* MW 3 */ + 13150 "00101100" // /* MW 2 */ + 13151 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "shuffle.hpp" 142 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 13152 "10111010" // PADDA.3D [p0], d0; PADDB.2D [p4], d3; VSHIFT x10, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13153 "01101110" // /* MW 9 */ + 13154 "10000001" // /* MW 8 */ + 13155 "10000100" // /* MW 7 */ + 13156 "00000010" // /* MW 6 */ + 13157 "10010000" // /* MW 5 */ + 13158 "01110011" // /* MW 4 */ + 13159 "11110100" // /* MW 3 */ + 13160 "00001100" // /* MW 2 */ + 13161 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 266 12 first +.src_ref 2 "conv2d_dw_bf16.h" 271 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13162 "01100010" // VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13163 "00000001" // /* MW 7 */ + 13164 "10001001" // /* MW 6 */ + 13165 "10001010" // /* MW 5 */ + 13166 "01000110" // /* MW 4 */ + 13167 "00001011" // /* MW 3 */ + 13168 "10011100" // /* MW 2 */ + 13169 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 275 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13170 "01100010" // VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13171 "00000001" // /* MW 7 */ + 13172 "00110101" // /* MW 6 */ + 13173 "10001001" // /* MW 5 */ + 13174 "11000110" // /* MW 4 */ + 13175 "10000110" // /* MW 3 */ + 13176 "00110000" // /* MW 2 */ + 13177 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13178 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13179 "00000110" // /* MW 3 */ + 13180 "10001001" // /* MW 2 */ + 13181 "00011101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 267 12 first +.src_ref 2 "conv2d_dw_bf16.h" 272 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13182 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13183 "10100001" // /* MW 7 */ + 13184 "01001000" // /* MW 6 */ + 13185 "10001100" // /* MW 5 */ + 13186 "01000110" // /* MW 4 */ + 13187 "00001111" // /* MW 3 */ + 13188 "10011100" // /* MW 2 */ + 13189 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 276 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13190 "01001010" // NOPA; VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13191 "10100001" // /* MW 9 */ + 13192 "00110110" // /* MW 8 */ + 13193 "10001010" // /* MW 7 */ + 13194 "11000010" // /* MW 6 */ + 13195 "10001110" // /* MW 5 */ + 13196 "10110000" // /* MW 4 */ + 13197 "11110100" // /* MW 3 */ + 13198 "00101100" // /* MW 2 */ + 13199 "00000000" // /* MW 1 */ +.label TGT_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_432 +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 142 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 13200 "10110100" // VLDB.2D x3, [p1], d7; VSHIFT x11, x1, x2, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13201 "00011101" // /* MW 5 */ + 13202 "00010010" // /* MW 4 */ + 13203 "10001011" // /* MW 3 */ + 13204 "00011110" // /* MW 2 */ + 13205 "00111110" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 250 8 first +.src_ref 2 "conv2d_dw_bf16.h" 273 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13206 "01011010" // MOVXM le, #13376; VMAC.f dm3, dm4, x9, x7, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13207 "11100001" // /* MW 9 */ + 13208 "10010010" // /* MW 8 */ + 13209 "10001011" // /* MW 7 */ + 13210 "00000010" // /* MW 6 */ + 13211 "01000100" // /* MW 5 */ + 13212 "10110111" // /* MW 4 */ + 13213 "00000001" // /* MW 3 */ + 13214 "00000000" // /* MW 2 */ + 13215 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_dw_bf16.h" 250 8 +.src_ref 2 "conv2d_dw_bf16.h" 277 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13216 "01000110" // VLDA.CONV.fp32.bf16 cml0, [p4]; MOVXM ls, #13296; VMAC.f dm0, dm2, x11, x7, r17 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 13217 "11100001" // /* MW 11 */ + 13218 "01010110" // /* MW 10 */ + 13219 "10001000" // /* MW 9 */ + 13220 "00000010" // /* MW 8 */ + 13221 "00111111" // /* MW 7 */ + 13222 "10001111" // /* MW 6 */ + 13223 "00000001" // /* MW 5 */ + 13224 "00000000" // /* MW 4 */ + 13225 "01110000" // /* MW 3 */ + 13226 "10000101" // /* MW 2 */ + 13227 "10000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 250 8 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13228 "10011000" // ADD.NC lc, r4, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13229 "01111111" // /* MW 3 */ + 13230 "01110010" // /* MW 2 */ + 13231 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13232 "10011000" // VLDA x6, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13233 "10011011" // /* MW 3 */ + 13234 "00011101" // /* MW 2 */ + 13235 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13236 "00011000" // VLDB x1, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13237 "01110100" // /* MW 3 */ + 13238 "00011100" // /* MW 2 */ + 13239 "00111000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13240 "00011000" // VLDB.3D x2, [p0], d2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13241 "10110100" // /* MW 3 */ + 13242 "01011000" // /* MW 2 */ + 13243 "00111000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1110 102 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13244 "00011000" // VCONV.bf16.fp32 x10, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13245 "10010110" // /* MW 3 */ + 13246 "00010001" // /* MW 2 */ + 13247 "00001101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1110 102 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13248 "00011000" // VCONV.bf16.fp32 x6, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13249 "00010110" // /* MW 3 */ + 13250 "00010000" // /* MW 2 */ + 13251 "00001011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13252 "11111000" // VMAX_LT.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13253 "01101100" // /* MW 3 */ + 13254 "01010000" // /* MW 2 */ + 13255 "00011100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 286 17 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13256 "01111000" // VSHUFFLE x10, x10, x6, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13257 "00010100" // /* MW 3 */ + 13258 "01010011" // /* MW 2 */ + 13259 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 2 "conv2d_dw_bf16.h" 285 16 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13260 "00000010" // VST x8, [p2], m4; VMAX_LT.bf16 x10, r16, x10, x0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13261 "01110000" // /* MW 7 */ + 13262 "00110110" // /* MW 6 */ + 13263 "10101000" // /* MW 5 */ + 13264 "00000010" // /* MW 4 */ + 13265 "01100000" // /* MW 3 */ + 13266 "01000010" // /* MW 2 */ + 13267 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 268 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13268 "01011000" // VEXTBCST.128 x10, x3, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13269 "00000011" // /* MW 3 */ + 13270 "00011100" // /* MW 2 */ + 13271 "00011101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 2 "conv2d_dw_bf16.h" 270 12 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13272 "00000010" // VST.3D x10, [p2], d1; VMOV cml3, cml0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13273 "01110000" // /* MW 7 */ + 13274 "01000101" // /* MW 6 */ + 13275 "10000000" // /* MW 5 */ + 13276 "00000001" // /* MW 4 */ + 13277 "01100000" // /* MW 3 */ + 13278 "01010010" // /* MW 2 */ + 13279 "01000111" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 265 12 first +.src_ref 2 "conv2d_dw_bf16.h" 270 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13280 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm4, dm3, x6, x10, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13281 "01000001" // /* MW 7 */ + 13282 "01101101" // /* MW 6 */ + 13283 "10001100" // /* MW 5 */ + 13284 "01000110" // /* MW 4 */ + 13285 "00000111" // /* MW 3 */ + 13286 "00011100" // /* MW 2 */ + 13287 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 274 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13288 "01100010" // VSHIFT x4, x6, x1, r0; VMAC.f dm1, dm0, x1, x10, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13289 "01000001" // /* MW 7 */ + 13290 "00000011" // /* MW 6 */ + 13291 "10001001" // /* MW 5 */ + 13292 "11000110" // /* MW 4 */ + 13293 "10000010" // /* MW 3 */ + 13294 "00110000" // /* MW 2 */ + 13295 "00000010" // /* MW 1 */ +.label ZLS_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_528 +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 142 18 +.begin_of_loop +.aggressive_scheduled_block_id 2 +.noswbrkpt +.loop_nesting 2 + 13296 "10111010" // VLDA x6, [p0], #64; VLDB.2D x3, [p1], d7; VSHIFT x10, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13297 "01101110" // /* MW 9 */ + 13298 "10000001" // /* MW 8 */ + 13299 "10000100" // /* MW 7 */ + 13300 "00000010" // /* MW 6 */ + 13301 "11110100" // /* MW 5 */ + 13302 "11110000" // /* MW 4 */ + 13303 "01110001" // /* MW 3 */ + 13304 "10110011" // /* MW 2 */ + 13305 "00000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 2 "conv2d_dw_bf16.h" 266 12 first +.src_ref 2 "conv2d_dw_bf16.h" 271 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13306 "01001010" // VLDB x1, [p0], #64; VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13307 "00000001" // /* MW 9 */ + 13308 "10001001" // /* MW 8 */ + 13309 "10001010" // /* MW 7 */ + 13310 "01000110" // /* MW 6 */ + 13311 "00001011" // /* MW 5 */ + 13312 "10011100" // /* MW 4 */ + 13313 "11101010" // /* MW 3 */ + 13314 "00111000" // /* MW 2 */ + 13315 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 275 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13316 "01001010" // VLDB.3D x2, [p0], d2; VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13317 "00000001" // /* MW 9 */ + 13318 "00110101" // /* MW 8 */ + 13319 "10001001" // /* MW 7 */ + 13320 "11000110" // /* MW 6 */ + 13321 "10000110" // /* MW 5 */ + 13322 "00110000" // /* MW 4 */ + 13323 "01101010" // /* MW 3 */ + 13324 "10110001" // /* MW 2 */ + 13325 "00000000" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13326 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13327 "00000110" // /* MW 3 */ + 13328 "10001001" // /* MW 2 */ + 13329 "00011101" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.src_ref 2 "conv2d_dw_bf16.h" 272 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13330 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13331 "10100001" // /* MW 7 */ + 13332 "01001000" // /* MW 6 */ + 13333 "10001100" // /* MW 5 */ + 13334 "11000110" // /* MW 4 */ + 13335 "10001110" // /* MW 3 */ + 13336 "10110000" // /* MW 2 */ + 13337 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 267 12 first +.src_ref 2 "conv2d_dw_bf16.h" 276 12 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 13338 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13339 "10100001" // /* MW 7 */ + 13340 "00110110" // /* MW 6 */ + 13341 "10001010" // /* MW 5 */ + 13342 "01000110" // /* MW 4 */ + 13343 "00001111" // /* MW 3 */ + 13344 "10011100" // /* MW 2 */ + 13345 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13346 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13347 "00001110" // /* MW 3 */ + 13348 "10001001" // /* MW 2 */ + 13349 "00011101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 268 12 first +.src_ref 2 "conv2d_dw_bf16.h" 273 12 first + 13350 "01100010" // VEXTBCST.128 x10, x3, #0; VMAC.f dm3, dm4, x9, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13351 "11100001" // /* MW 7 */ + 13352 "10010010" // /* MW 6 */ + 13353 "10001011" // /* MW 5 */ + 13354 "01000110" // /* MW 4 */ + 13355 "00000011" // /* MW 3 */ + 13356 "00011100" // /* MW 2 */ + 13357 "00000101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 265 12 first +.src_ref 2 "conv2d_dw_bf16.h" 277 12 first + 13358 "01100010" // VEXTBCST.128 x8, x3, #1; VMAC.f dm0, dm2, x11, x7, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13359 "11100001" // /* MW 7 */ + 13360 "01010110" // /* MW 6 */ + 13361 "10001000" // /* MW 5 */ + 13362 "01000110" // /* MW 4 */ + 13363 "00000111" // /* MW 3 */ + 13364 "00011100" // /* MW 2 */ + 13365 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first + 13366 "10010100" // NOPA; VSHIFT x4, x6, x1, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13367 "00000101" // /* MW 5 */ + 13368 "01100001" // /* MW 4 */ + 13369 "11110100" // /* MW 3 */ + 13370 "00101100" // /* MW 2 */ + 13371 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 270 12 first + 13372 "01001000" // VMAC.f dm4, dm3, x6, x10, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13373 "01000001" // /* MW 3 */ + 13374 "01101101" // /* MW 2 */ + 13375 "10001100" // /* MW 1 */ +.label ZLE_F_Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params_608 +.src_ref 2 "conv2d_dw_bf16.h" 274 12 first +.end_of_loop +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 13376 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x10, r17 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13377 "00011010" // /* MW 15 */ + 13378 "01001000" // /* MW 14 */ + 13379 "01111100" // /* MW 13 */ + 13380 "10100101" // /* MW 12 */ + 13381 "00000001" // /* MW 11 */ + 13382 "00000000" // /* MW 10 */ + 13383 "00000000" // /* MW 9 */ + 13384 "00000000" // /* MW 8 */ + 13385 "01011011" // /* MW 7 */ + 13386 "00000001" // /* MW 6 */ + 13387 "00100000" // /* MW 5 */ + 13388 "00000000" // /* MW 4 */ + 13389 "11110000" // /* MW 3 */ + 13390 "00101100" // /* MW 2 */ + 13391 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 81 15 first +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 244 4 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 13392 "10110110" // PADDA.3D [p0], d0; PADDB.2D [p4], d3; JNZD r2, r2, p3; VSHIFT x10, x1, x2, r0 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 13393 "01101000" // /* MW 11 */ + 13394 "10000001" // /* MW 10 */ + 13395 "10000100" // /* MW 9 */ + 13396 "00000010" // /* MW 8 */ + 13397 "00100111" // /* MW 7 */ + 13398 "00000100" // /* MW 6 */ + 13399 "00100000" // /* MW 5 */ + 13400 "11100111" // /* MW 4 */ + 13401 "11111000" // /* MW 3 */ + 13402 "00001100" // /* MW 2 */ + 13403 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 266 12 first +.src_ref 2 "conv2d_dw_bf16.h" 271 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13404 "01100010" // VEXTBCST.128 x5, x3, #2; VMAC.f dm2, dm4, x4, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13405 "00000001" // /* MW 7 */ + 13406 "10001001" // /* MW 6 */ + 13407 "10001010" // /* MW 5 */ + 13408 "01000110" // /* MW 4 */ + 13409 "00001011" // /* MW 3 */ + 13410 "10011100" // /* MW 2 */ + 13411 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 275 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13412 "01100010" // VSHIFT x4, x6, x1, r1; VMAC.f dm1, dm1, x10, x8, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13413 "00000001" // /* MW 7 */ + 13414 "00110101" // /* MW 6 */ + 13415 "10001001" // /* MW 5 */ + 13416 "11000110" // /* MW 4 */ + 13417 "10000110" // /* MW 3 */ + 13418 "00110000" // /* MW 2 */ + 13419 "00000010" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13420 "11011000" // VSHIFT x11, x1, x2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13421 "00000110" // /* MW 3 */ + 13422 "10001001" // /* MW 2 */ + 13423 "00011101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 267 12 first +.src_ref 2 "conv2d_dw_bf16.h" 272 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13424 "01100010" // VEXTBCST.128 x7, x3, #3; VMAC.f dm4, dm2, x4, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13425 "10100001" // /* MW 7 */ + 13426 "01001000" // /* MW 6 */ + 13427 "10001100" // /* MW 5 */ + 13428 "01000110" // /* MW 4 */ + 13429 "00001111" // /* MW 3 */ + 13430 "10011100" // /* MW 2 */ + 13431 "00000011" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 first +.src_ref 2 "conv2d_dw_bf16.h" 276 12 first +.delay_slot +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13432 "01100010" // VSHIFT x9, x6, x1, r3; VMAC.f dm2, dm1, x11, x5, r17 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13433 "10100001" // /* MW 7 */ + 13434 "00110110" // /* MW 6 */ + 13435 "10001010" // /* MW 5 */ + 13436 "11000110" // /* MW 4 */ + 13437 "10001110" // /* MW 3 */ + 13438 "10110000" // /* MW 2 */ + 13439 "00000100" // /* MW 1 */ +.src_ref 4 "shuffle.hpp" 142 18 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 13440 "11011000" // VSHIFT x11, x1, x2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13441 "00001110" // /* MW 3 */ + 13442 "10001001" // /* MW 2 */ + 13443 "00011101" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 273 12 first + 13444 "01001000" // VMAC.f dm3, dm4, x9, x7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13445 "11100001" // /* MW 3 */ + 13446 "10010010" // /* MW 2 */ + 13447 "10001011" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 277 12 first + 13448 "01001000" // VMAC.f dm0, dm2, x11, x7, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13449 "11100001" // /* MW 3 */ + 13450 "01010110" // /* MW 2 */ + 13451 "10001000" // /* MW 1 */ + 13452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13453 "00000000" // /* MW 1 */ + 13454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13455 "00000000" // /* MW 1 */ + 13456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13457 "00000000" // /* MW 1 */ + 13458 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13459 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1110 102 first + 13460 "00011000" // VCONV.bf16.fp32 x10, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13461 "10010110" // /* MW 3 */ + 13462 "00010001" // /* MW 2 */ + 13463 "00001101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_dw_bf16.h" 290 first + 13464 "01011100" // VCONV.bf16.fp32 x6, cml0; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 13465 "00000000" // /* MW 5 */ + 13466 "01010000" // /* MW 4 */ + 13467 "11000000" // /* MW 3 */ + 13468 "00000010" // /* MW 2 */ + 13469 "01100010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 13470 "11111000" // VMAX_LT.bf16 x8, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13471 "01101100" // /* MW 3 */ + 13472 "01010000" // /* MW 2 */ + 13473 "00011100" // /* MW 1 */ +.src_ref 2 "conv2d_dw_bf16.h" 286 17 first +.delay_slot + 13474 "01111000" // VSHUFFLE x10, x10, x6, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13475 "00010100" // /* MW 3 */ + 13476 "01010011" // /* MW 2 */ + 13477 "00011101" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 13478 "11111000" // VMAX_LT.bf16 x10, r16, x10, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13479 "01101100" // /* MW 3 */ + 13480 "01010000" // /* MW 2 */ + 13481 "00011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 2 "conv2d_dw_bf16.h" 285 16 first +.delay_slot + 13482 "00011000" // VST x8, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13483 "00010011" // /* MW 3 */ + 13484 "10001010" // /* MW 2 */ + 13485 "00001010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1159 33 +.delay_slot + 13486 "00011000" // VST.3D x10, [p2], d1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13487 "10010011" // /* MW 3 */ + 13488 "00111010" // /* MW 2 */ +.label _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params__end +.label __Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params___func_end0 + 13489 "00001010" // /* MW 1 */ +.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_conv2d_dwc _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 7 "superkernels.cpp" 444 first +.src_ref 7 "superkernels.cpp" 449 6 +.function_start + 13504 "01000100" // MOVXM p4, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13505 "10000000" // /* MW 5 */ + 13506 "11001000" // /* MW 4 */ + 13507 "11001000" // /* MW 3 */ + 13508 "00000111" // /* MW 2 */ + 13509 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 449 6 first + 13510 "11010100" // LDA r16, [p4]; MOV r0, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13511 "01000001" // /* MW 5 */ + 13512 "00101111" // /* MW 4 */ + 13513 "11010000" // /* MW 3 */ + 13514 "11000010" // /* MW 2 */ + 13515 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 444 + 13516 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13517 "00000001" // /* MW 5 */ + 13518 "00000000" // /* MW 4 */ + 13519 "00000000" // /* MW 3 */ + 13520 "00010000" // /* MW 2 */ + 13521 "00000000" // /* MW 1 */ + 13522 "00000010" // ST r14, [sp, #-8]; MOV r17, CORE_ID /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13523 "01110000" // /* MW 7 */ + 13524 "01110000" // /* MW 6 */ + 13525 "00101101" // /* MW 5 */ + 13526 "00000010" // /* MW 4 */ + 13527 "10110000" // /* MW 3 */ + 13528 "00111010" // /* MW 2 */ + 13529 "11111111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 467 + 13530 "00000010" // ST r13, [sp, #-4]; MOV r13, lr /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13531 "01110000" // /* MW 7 */ + 13532 "11110000" // /* MW 6 */ + 13533 "10101000" // /* MW 5 */ + 13534 "00000001" // /* MW 4 */ + 13535 "10110000" // /* MW 3 */ + 13536 "10110110" // /* MW 2 */ + 13537 "11111111" // /* MW 1 */ + 13538 "10011000" // ST p0, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13539 "00011101" // /* MW 3 */ + 13540 "11101100" // /* MW 2 */ + 13541 "00001111" // /* MW 1 */ + 13542 "10011000" // ST p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13543 "10011101" // /* MW 3 */ + 13544 "11110111" // /* MW 2 */ + 13545 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 452 44 + 13546 "00000010" // ST r0, [sp, #-16]; MOV r14, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13547 "01110000" // /* MW 7 */ + 13548 "01100000" // /* MW 6 */ + 13549 "11001010" // /* MW 5 */ + 13550 "00000001" // /* MW 4 */ + 13551 "10110000" // /* MW 3 */ + 13552 "00000010" // /* MW 2 */ + 13553 "11111110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 449 6 +.src_ref 7 "superkernels.cpp" 449 16 + 13554 "10000100" // JNZ r16, #13680 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=13680 delay_slots=5 */ + 13555 "00000001" // /* MW 5 */ + 13556 "01000000" // /* MW 4 */ + 13557 "10111000" // /* MW 3 */ + 13558 "00011010" // /* MW 2 */ + 13559 "10000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 +.delay_slot + 13560 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13561 "11000000" // /* MW 3 */ + 13562 "11010110" // /* MW 2 */ + 13563 "00011011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 446 22 first +.delay_slot + 13564 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13565 "10010000" // /* MW 3 */ + 13566 "01100010" // /* MW 2 */ + 13567 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 446 30 +.delay_slot + 13568 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13569 "11111011" // /* MW 3 */ + 13570 "01100011" // /* MW 2 */ + 13571 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 446 11 +.delay_slot + 13572 "01000100" // MOVXM p3, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13573 "10100000" // /* MW 5 */ + 13574 "11001000" // /* MW 4 */ + 13575 "11000110" // /* MW 3 */ + 13576 "00000111" // /* MW 2 */ + 13577 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 446 11 +.delay_slot + 13578 "10011000" // ST r17, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13579 "00110001" // /* MW 3 */ + 13580 "00000110" // /* MW 2 */ + 13581 "00001011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 461 2 +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 13582 "00111010" // MOVS p7, p1; MOVXM p1, #509032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13583 "00010001" // /* MW 9 */ + 13584 "00110100" // /* MW 8 */ + 13585 "10110010" // /* MW 7 */ + 13586 "11110000" // /* MW 6 */ + 13587 "00000001" // /* MW 5 */ + 13588 "00000000" // /* MW 4 */ + 13589 "01100000" // /* MW 3 */ + 13590 "10010001" // /* MW 2 */ + 13591 "11110000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 451 4 +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 13592 "01110110" // ST.s8 r16, [p1]; MOVS p0, p2; MOVXM p1, #509028 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 13593 "00010000" // /* MW 11 */ + 13594 "00110010" // /* MW 10 */ + 13595 "10110010" // /* MW 9 */ + 13596 "11110000" // /* MW 8 */ + 13597 "00000001" // /* MW 7 */ + 13598 "00000000" // /* MW 6 */ + 13599 "10001011" // /* MW 5 */ + 13600 "10001000" // /* MW 4 */ + 13601 "11100000" // /* MW 3 */ + 13602 "11000000" // /* MW 2 */ + 13603 "00100000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13605 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 451 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 13606 "00000100" // JL #12096 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12096 delay_slots=5 */ + 13607 "00000001" // /* MW 5 */ + 13608 "00000000" // /* MW 4 */ + 13609 "10100000" // /* MW 3 */ + 13610 "00010111" // /* MW 2 */ + 13611 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13613 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13615 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13616 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13617 "00110001" // /* MW 3 */ + 13618 "00100000" // /* MW 2 */ + 13619 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 13620 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13621 "00000101" // /* MW 3 */ + 13622 "00100000" // /* MW 2 */ + 13623 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 13624 "00000010" // ST r16, [p1]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13625 "01110000" // /* MW 7 */ + 13626 "10100101" // /* MW 6 */ + 13627 "00000001" // /* MW 5 */ + 13628 "00000000" // /* MW 4 */ + 13629 "00110000" // /* MW 3 */ + 13630 "11000010" // /* MW 2 */ + 13631 "00100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 452 44 +.src_ref 7 "superkernels.cpp" 461 2 +.return_address + 13632 "00000010" // MOVS p1, p7; ADD.NC p2, r14, #8 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13633 "00000000" // /* MW 7 */ + 13634 "10000010" // /* MW 6 */ + 13635 "00110011" // /* MW 5 */ + 13636 "00000001" // /* MW 4 */ + 13637 "01100000" // /* MW 3 */ + 13638 "10010001" // /* MW 2 */ + 13639 "00110011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 452 17 first + 13640 "10011000" // LDA.u16 r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13641 "00111010" // /* MW 3 */ + 13642 "00000110" // /* MW 2 */ + 13643 "00000010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 453 13 +.src_ref 7 "superkernels.cpp" 453 15 first + 13644 "10111010" // LDA.u16 r16, [p2, #4]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13645 "00010000" // /* MW 9 */ + 13646 "00110000" // /* MW 8 */ + 13647 "00110010" // /* MW 7 */ + 13648 "11110001" // /* MW 6 */ + 13649 "00000001" // /* MW 5 */ + 13650 "00000000" // /* MW 4 */ + 13651 "01010000" // /* MW 3 */ + 13652 "11000011" // /* MW 2 */ + 13653 "01000100" // /* MW 1 */ + 13654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13655 "00000000" // /* MW 1 */ + 13656 "10000100" // J #13696 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=13696 delay_slots=5 */ + 13657 "00000000" // /* MW 5 */ + 13658 "00000000" // /* MW 4 */ + 13659 "11000000" // /* MW 3 */ + 13660 "00011010" // /* MW 2 */ + 13661 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 452 15 +.src_ref 7 "superkernels.cpp" 457 26 +.delay_slot + 13662 "01000100" // MOVXM p3, #509016 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13663 "10110000" // /* MW 5 */ + 13664 "11001000" // /* MW 4 */ + 13665 "11000110" // /* MW 3 */ + 13666 "00000111" // /* MW 2 */ + 13667 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13669 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13671 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 452 15 first +.delay_slot + 13672 "10011000" // ST r17, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13673 "00110001" // /* MW 3 */ + 13674 "00000110" // /* MW 2 */ + 13675 "00001011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 453 13 first +.delay_slot + 13676 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13677 "00010001" // /* MW 3 */ + 13678 "00000110" // /* MW 2 */ + 13679 "00001010" // /* MW 1 */ +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_176 +.src_ref 7 "superkernels.cpp" 457 26 + 13680 "11100001" // NOPA; NOPB; NOPS; MOVXM p3, #509016; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13681 "00000000" // /* MW 15 */ + 13682 "00000000" // /* MW 14 */ + 13683 "00010000" // /* MW 13 */ + 13684 "00101100" // /* MW 12 */ + 13685 "10110010" // /* MW 11 */ + 13686 "11110001" // /* MW 10 */ + 13687 "00000001" // /* MW 9 */ + 13688 "00000000" // /* MW 8 */ + 13689 "01011011" // /* MW 7 */ + 13690 "00000001" // /* MW 6 */ + 13691 "00100000" // /* MW 5 */ + 13692 "00000000" // /* MW 4 */ + 13693 "11110000" // /* MW 3 */ + 13694 "00101100" // /* MW 2 */ + 13695 "00000000" // /* MW 1 */ +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_192 +.src_ref 1 "io_buffer_main.h" 218 49 first + 13696 "00011000" // ADD.NC p2, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13697 "10000110" // /* MW 3 */ + 13698 "01100111" // /* MW 2 */ + 13699 "00011010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 457 15 +.src_ref 1 "io_buffer_main.h" 218 49 + 13700 "10111010" // LDA r27, [p2], #-4; MOVXM p4, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13701 "00010000" // /* MW 9 */ + 13702 "00101000" // /* MW 8 */ + 13703 "00110010" // /* MW 7 */ + 13704 "11110010" // /* MW 6 */ + 13705 "00000001" // /* MW 5 */ + 13706 "00000000" // /* MW 4 */ + 13707 "11010000" // /* MW 3 */ + 13708 "11101110" // /* MW 2 */ + 13709 "01011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 13710 "10011000" // LDA r16, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13711 "00010110" // /* MW 3 */ + 13712 "11111110" // /* MW 2 */ + 13713 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 13714 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13715 "00110110" // /* MW 3 */ + 13716 "11111110" // /* MW 2 */ + 13717 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 13718 "10011000" // LDA r18, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13719 "01010110" // /* MW 3 */ + 13720 "01000110" // /* MW 2 */ + 13721 "00000010" // /* MW 1 */ + 13722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13723 "00000000" // /* MW 1 */ + 13724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13725 "00000000" // /* MW 1 */ + 13726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13727 "00000000" // /* MW 1 */ + 13728 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13729 "00000000" // /* MW 1 */ + 13730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13731 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 13732 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13733 "00000010" // /* MW 3 */ + 13734 "01100001" // /* MW 2 */ + 13735 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 13736 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13737 "00010001" // /* MW 3 */ + 13738 "00000110" // /* MW 2 */ + 13739 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 + 13740 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13741 "11111101" // /* MW 3 */ + 13742 "11100000" // /* MW 2 */ + 13743 "00010111" // /* MW 1 */ + 13744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13745 "00000000" // /* MW 1 */ + 13746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13747 "00000000" // /* MW 1 */ + 13748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13749 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 13750 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13751 "00001000" // /* MW 3 */ + 13752 "10010011" // /* MW 2 */ + 13753 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 456 11 +.src_ref 7 "superkernels.cpp" 459 47 +.src_ref 7 "superkernels.cpp" 464 6 +.src_ref 7 "superkernels.cpp" 465 16 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 + 13754 "10111010" // MOVA r15, #1; MOVXM p7, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13755 "00010000" // /* MW 9 */ + 13756 "00100000" // /* MW 8 */ + 13757 "10110010" // /* MW 7 */ + 13758 "11110011" // /* MW 6 */ + 13759 "00000001" // /* MW 5 */ + 13760 "00000000" // /* MW 4 */ + 13761 "00000000" // /* MW 3 */ + 13762 "00101111" // /* MW 2 */ + 13763 "00000000" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 52 16 + 13764 "11100100" // MOVX r24, #0; MOV r16, sp /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13765 "11000001" // /* MW 5 */ + 13766 "00101011" // /* MW 4 */ + 13767 "00101000" // /* MW 3 */ + 13768 "00000000" // /* MW 2 */ + 13769 "00000110" // /* MW 1 */ + 13770 "00011000" // ADD.NC p0, r16, #-76 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13771 "01011010" // /* MW 3 */ + 13772 "01101000" // /* MW 2 */ + 13773 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 1 "io_buffer_main.h" 324 51 + 13774 "11010100" // LDA p5, [sp, #-20]; MOV r14, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13775 "10000001" // /* MW 5 */ + 13776 "00101001" // /* MW 4 */ + 13777 "00100111" // /* MW 3 */ + 13778 "11010011" // /* MW 2 */ + 13779 "11111101" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 457 15 first + 13780 "10011000" // LDA r17, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13781 "00110110" // /* MW 3 */ + 13782 "00000110" // /* MW 2 */ + 13783 "00000100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 457 26 +.src_ref 7 "superkernels.cpp" 461 2 + 13784 "10111010" // LDA r16, [p3]; MOVXM p3, #509824 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13785 "00010000" // /* MW 9 */ + 13786 "11000000" // /* MW 8 */ + 13787 "10110011" // /* MW 7 */ + 13788 "11110001" // /* MW 6 */ + 13789 "00000001" // /* MW 5 */ + 13790 "00000000" // /* MW 4 */ + 13791 "11010000" // /* MW 3 */ + 13792 "11000010" // /* MW 2 */ + 13793 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 456 11 first + 13794 "10011000" // LDA r18, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13795 "01010110" // /* MW 3 */ + 13796 "00000110" // /* MW 2 */ + 13797 "00000111" // /* MW 1 */ + 13798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13799 "00000000" // /* MW 1 */ + 13800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13801 "00000000" // /* MW 1 */ + 13802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13803 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 13804 "10011000" // LDA r19, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13805 "01110110" // /* MW 3 */ + 13806 "00000110" // /* MW 2 */ + 13807 "00000101" // /* MW 1 */ + 13808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13809 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 457 24 first + 13810 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13811 "00001111" // /* MW 3 */ + 13812 "01100001" // /* MW 2 */ + 13813 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 456 11 first + 13814 "00011000" // ADD r17, r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13815 "00000111" // /* MW 3 */ + 13816 "10100010" // /* MW 2 */ + 13817 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 459 47 first + 13818 "10011000" // LSHL r16, r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13819 "11111101" // /* MW 3 */ + 13820 "00100000" // /* MW 2 */ + 13821 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 461 2 first +.no_stack_arguments + 13822 "00000100" // JL #12768 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12768 delay_slots=5 */ + 13823 "00000001" // /* MW 5 */ + 13824 "00000000" // /* MW 4 */ + 13825 "11110000" // /* MW 3 */ + 13826 "00011000" // /* MW 2 */ + 13827 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 456 11 first +.delay_slot + 13828 "10011000" // ST r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13829 "00110001" // /* MW 3 */ + 13830 "00000110" // /* MW 2 */ + 13831 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 459 47 first +.delay_slot + 13832 "01011000" // ADD.NC dn0, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13833 "11000001" // /* MW 3 */ + 13834 "01001001" // /* MW 2 */ + 13835 "00011000" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 201 10 first +.delay_slot + 13836 "10011000" // ST dn0, [sp, #-76] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13837 "00100101" // /* MW 3 */ + 13838 "10110100" // /* MW 2 */ + 13839 "00001111" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 52 16 first +.delay_slot + 13840 "10011000" // ST r24, [sp, #-72] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13841 "00010101" // /* MW 3 */ + 13842 "10111011" // /* MW 2 */ + 13843 "00001111" // /* MW 1 */ +.src_ref 9 "io_buffer_impl.h" 52 16 +.delay_slot + 13844 "00110110" // NOPA; NOPB; ST r24, [sp, #-68]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 13845 "11000001" // /* MW 11 */ + 13846 "10001010" // /* MW 10 */ + 13847 "11011111" // /* MW 9 */ + 13848 "00000011" // /* MW 8 */ + 13849 "00000000" // /* MW 7 */ + 13850 "00000000" // /* MW 6 */ + 13851 "00100000" // /* MW 5 */ + 13852 "00000000" // /* MW 4 */ + 13853 "11110000" // /* MW 3 */ + 13854 "00101100" // /* MW 2 */ + 13855 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 +.return_address + 13856 "00011000" // ADD.NC p2, r14, #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13857 "00001010" // /* MW 3 */ + 13858 "01100111" // /* MW 2 */ + 13859 "00011010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 first + 13860 "10011000" // LDA r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13861 "00010110" // /* MW 3 */ + 13862 "00000110" // /* MW 2 */ + 13863 "00000010" // /* MW 1 */ + 13864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13865 "00000000" // /* MW 1 */ + 13866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13867 "00000000" // /* MW 1 */ + 13868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13869 "00000000" // /* MW 1 */ + 13870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13871 "00000000" // /* MW 1 */ + 13872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13873 "00000000" // /* MW 1 */ + 13874 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13875 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 13876 "00011000" // REL r16, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13877 "11111000" // /* MW 3 */ + 13878 "00010000" // /* MW 2 */ + 13879 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 464 19 +.src_ref 1 "io_buffer_main.h" 327 40 first + 13880 "10111010" // LDA r16, [p2, #-8]; MOVXM p1, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13881 "00010000" // /* MW 9 */ + 13882 "00110000" // /* MW 8 */ + 13883 "10110010" // /* MW 7 */ + 13884 "11110000" // /* MW 6 */ + 13885 "00000001" // /* MW 5 */ + 13886 "00000000" // /* MW 4 */ + 13887 "11010000" // /* MW 3 */ + 13888 "11000010" // /* MW 2 */ + 13889 "01011100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 464 19 first + 13890 "10011000" // LDA r18, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13891 "01010110" // /* MW 3 */ + 13892 "00000110" // /* MW 2 */ + 13893 "00000001" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 464 6 + 13894 "10011000" // LDA r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13895 "00110110" // /* MW 3 */ + 13896 "00000110" // /* MW 2 */ + 13897 "00000111" // /* MW 1 */ + 13898 "00011000" // LDA p1, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13899 "10011001" // /* MW 3 */ + 13900 "11110100" // /* MW 2 */ + 13901 "00000111" // /* MW 1 */ + 13902 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13903 "11010001" // /* MW 3 */ + 13904 "11111001" // /* MW 2 */ + 13905 "00000111" // /* MW 1 */ + 13906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13907 "00000000" // /* MW 1 */ + 13908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13909 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 13910 "10011000" // SUB r16, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13911 "00000001" // /* MW 3 */ + 13912 "11100001" // /* MW 2 */ + 13913 "00010011" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 13914 "10011000" // ST r16, [p2, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13915 "00010001" // /* MW 3 */ + 13916 "11100110" // /* MW 2 */ + 13917 "00001010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 464 16 first + 13918 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13919 "00101000" // /* MW 3 */ + 13920 "01100001" // /* MW 2 */ + 13921 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 464 6 + 13922 "10000100" // JNZ r16, #13952 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=13952 delay_slots=5 */ + 13923 "00000001" // /* MW 5 */ + 13924 "01000000" // /* MW 4 */ + 13925 "01000000" // /* MW 3 */ + 13926 "00011011" // /* MW 2 */ + 13927 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 465 16 +.delay_slot + 13928 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13929 "00000001" // /* MW 3 */ + 13930 "00110000" // /* MW 2 */ + 13931 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13933 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13935 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13937 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13939 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 465 16 first + 13940 "00110110" // NOPA; NOPB; ST r24, [p7]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 13941 "11000001" // /* MW 11 */ + 13942 "10001000" // /* MW 10 */ + 13943 "10000011" // /* MW 9 */ + 13944 "00000011" // /* MW 8 */ + 13945 "00000000" // /* MW 7 */ + 13946 "00000000" // /* MW 6 */ + 13947 "00100000" // /* MW 5 */ + 13948 "00000000" // /* MW 4 */ + 13949 "11110000" // /* MW 3 */ + 13950 "00101100" // /* MW 2 */ + 13951 "00000000" // /* MW 1 */ +.label TGT_F_Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 7 "superkernels.cpp" 467 + 13952 "11010100" // LDA r13, [sp, #-4]; MOV lr, r13 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13953 "01000001" // /* MW 5 */ + 13954 "11101101" // /* MW 4 */ + 13955 "00101110" // /* MW 3 */ + 13956 "10110110" // /* MW 2 */ + 13957 "11111111" // /* MW 1 */ + 13958 "00011000" // LDA r15, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13959 "11110001" // /* MW 3 */ + 13960 "11110001" // /* MW 2 */ + 13961 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 467 first + 13962 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 13963 "00000000" // /* MW 3 */ + 13964 "00101000" // /* MW 2 */ + 13965 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 467 +.delay_slot + 13966 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13967 "00000001" // /* MW 5 */ + 13968 "00000000" // /* MW 4 */ + 13969 "00000000" // /* MW 3 */ + 13970 "11110000" // /* MW 2 */ + 13971 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13973 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13975 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13977 "00000000" // /* MW 1 */ +.delay_slot + 13978 "11111000" // MOV p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13979 "11000000" // /* MW 3 */ + 13980 "01100010" // /* MW 2 */ +.label _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 13981 "00011111" // /* MW 1 */ +.label __Z13_b896_wrapperPPv___func_begin0 +.label _Z13_b896_wrapperPPv +.function _b896_wrapper _Z13_b896_wrapperPPv +.src_ref 0 "0_0_reloadable3.cc" 20 first +.src_ref 0 "0_0_reloadable3.cc" 22 79 +.function_start + 13984 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13985 "11000000" // /* MW 3 */ + 13986 "01100000" // /* MW 2 */ + 13987 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 22 79 first + 13988 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13989 "00011110" // /* MW 3 */ + 13990 "00011100" // /* MW 2 */ + 13991 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 23 79 first + 13992 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13993 "10011110" // /* MW 3 */ + 13994 "00101100" // /* MW 2 */ + 13995 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 25 81 first + 13996 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13997 "10011110" // /* MW 3 */ + 13998 "11110101" // /* MW 2 */ + 13999 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 24 47 first + 14000 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14001 "00011110" // /* MW 3 */ + 14002 "00000101" // /* MW 2 */ + 14003 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 21 4 first +.tail_call + 14004 "10000100" // J #6880 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=6880 delay_slots=5 */ + 14005 "00000000" // /* MW 5 */ + 14006 "00000000" // /* MW 4 */ + 14007 "01110000" // /* MW 3 */ + 14008 "00001101" // /* MW 2 */ + 14009 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14010 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14011 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14013 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14015 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14017 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14018 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b896_wrapperPPv__end +.label __Z13_b896_wrapperPPv___func_end0 + 14019 "00000000" // /* MW 1 */ +.label __Z13_b901_wrapperPPv___func_begin0 +.label _Z13_b901_wrapperPPv +.function _b901_wrapper _Z13_b901_wrapperPPv +.src_ref 0 "0_0_reloadable3.cc" 29 first +.src_ref 0 "0_0_reloadable3.cc" 31 79 +.function_start + 14032 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14033 "11000000" // /* MW 3 */ + 14034 "01100000" // /* MW 2 */ + 14035 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 31 79 first + 14036 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14037 "00011110" // /* MW 3 */ + 14038 "00101100" // /* MW 2 */ + 14039 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 33 81 first + 14040 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14041 "00011110" // /* MW 3 */ + 14042 "11110101" // /* MW 2 */ + 14043 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 32 47 first + 14044 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14045 "10011110" // /* MW 3 */ + 14046 "00000100" // /* MW 2 */ + 14047 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 30 4 first +.tail_call + 14048 "10000100" // J #8240 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=8240 delay_slots=5 */ + 14049 "00000000" // /* MW 5 */ + 14050 "00000000" // /* MW 4 */ + 14051 "00011000" // /* MW 3 */ + 14052 "00010000" // /* MW 2 */ + 14053 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14055 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14057 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14059 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14061 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b901_wrapperPPv__end +.label __Z13_b901_wrapperPPv___func_end0 + 14063 "00000000" // /* MW 1 */ +.label __Z13_b906_wrapperPPv___func_begin0 +.label _Z13_b906_wrapperPPv +.function _b906_wrapper _Z13_b906_wrapperPPv +.src_ref 0 "0_0_reloadable3.cc" 37 first +.src_ref 0 "0_0_reloadable3.cc" 39 79 +.function_start + 14064 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14065 "11000000" // /* MW 3 */ + 14066 "01100000" // /* MW 2 */ + 14067 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 39 79 first + 14068 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14069 "00011110" // /* MW 3 */ + 14070 "00101100" // /* MW 2 */ + 14071 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 41 81 first + 14072 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14073 "00011110" // /* MW 3 */ + 14074 "11110101" // /* MW 2 */ + 14075 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 40 47 first + 14076 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14077 "10011110" // /* MW 3 */ + 14078 "00000100" // /* MW 2 */ + 14079 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 38 4 first +.tail_call + 14080 "10000100" // J #9104 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=9104 delay_slots=5 */ + 14081 "00000000" // /* MW 5 */ + 14082 "00000000" // /* MW 4 */ + 14083 "11001000" // /* MW 3 */ + 14084 "00010001" // /* MW 2 */ + 14085 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14087 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14089 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14091 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14093 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b906_wrapperPPv__end +.label __Z13_b906_wrapperPPv___func_end0 + 14095 "00000000" // /* MW 1 */ +.label __Z13_b881_wrapperPPv___func_begin0 +.label _Z13_b881_wrapperPPv +.function _b881_wrapper _Z13_b881_wrapperPPv +.src_ref 0 "0_0_reloadable3.cc" 45 first +.src_ref 0 "0_0_reloadable3.cc" 47 79 +.function_start + 14096 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14097 "11000000" // /* MW 3 */ + 14098 "01100000" // /* MW 2 */ + 14099 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 47 79 first + 14100 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14101 "00011110" // /* MW 3 */ + 14102 "00101100" // /* MW 2 */ + 14103 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 49 81 first + 14104 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14105 "00011110" // /* MW 3 */ + 14106 "11110101" // /* MW 2 */ + 14107 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 48 47 first + 14108 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14109 "10011110" // /* MW 3 */ + 14110 "00000100" // /* MW 2 */ + 14111 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 46 4 first +.tail_call + 14112 "10000100" // J #10512 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10512 delay_slots=5 */ + 14113 "00000000" // /* MW 5 */ + 14114 "00000000" // /* MW 4 */ + 14115 "10001000" // /* MW 3 */ + 14116 "00010100" // /* MW 2 */ + 14117 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14119 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14121 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14123 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14125 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14126 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b881_wrapperPPv__end +.label __Z13_b881_wrapperPPv___func_end0 + 14127 "00000000" // /* MW 1 */ +.label __Z13_b891_wrapperPPv___func_begin0 +.label _Z13_b891_wrapperPPv +.function _b891_wrapper _Z13_b891_wrapperPPv +.src_ref 0 "0_0_reloadable3.cc" 53 first +.src_ref 0 "0_0_reloadable3.cc" 55 79 +.function_start + 14128 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14129 "11000000" // /* MW 3 */ + 14130 "01100000" // /* MW 2 */ + 14131 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 55 79 first + 14132 "10011000" // LDA p0, [p2], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14133 "00011110" // /* MW 3 */ + 14134 "00111100" // /* MW 2 */ + 14135 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 56 47 first + 14136 "10011000" // LDA p1, [p2], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14137 "10011110" // /* MW 3 */ + 14138 "11101100" // /* MW 2 */ + 14139 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 58 81 first + 14140 "10011000" // LDA p3, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14141 "10011110" // /* MW 3 */ + 14142 "00010101" // /* MW 2 */ + 14143 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 57 80 first + 14144 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14145 "00011110" // /* MW 3 */ + 14146 "00000101" // /* MW 2 */ + 14147 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 54 4 first +.tail_call + 14148 "10000100" // J #11488 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=11488 delay_slots=5 */ + 14149 "00000000" // /* MW 5 */ + 14150 "00000000" // /* MW 4 */ + 14151 "01110000" // /* MW 3 */ + 14152 "00010110" // /* MW 2 */ + 14153 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14155 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14157 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14159 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14161 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b891_wrapperPPv__end +.label __Z13_b891_wrapperPPv___func_end0 + 14163 "00000000" // /* MW 1 */ +.label __Z13_b919_wrapperPPv___func_begin0 +.label _Z13_b919_wrapperPPv +.function _b919_wrapper _Z13_b919_wrapperPPv +.src_ref 0 "0_0_reloadable3.cc" 62 first +.src_ref 0 "0_0_reloadable3.cc" 64 79 +.function_start + 14176 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14177 "11000000" // /* MW 3 */ + 14178 "01100000" // /* MW 2 */ + 14179 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 64 79 first + 14180 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14181 "00011110" // /* MW 3 */ + 14182 "00011100" // /* MW 2 */ + 14183 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 65 79 first + 14184 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14185 "10011110" // /* MW 3 */ + 14186 "00101100" // /* MW 2 */ + 14187 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 67 81 first + 14188 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14189 "10011110" // /* MW 3 */ + 14190 "11110101" // /* MW 2 */ + 14191 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 66 47 first + 14192 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14193 "00011110" // /* MW 3 */ + 14194 "00000101" // /* MW 2 */ + 14195 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable3.cc" 63 4 first +.tail_call + 14196 "10000100" // J #13504 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=13504 delay_slots=5 */ + 14197 "00000000" // /* MW 5 */ + 14198 "00000000" // /* MW 4 */ + 14199 "01100000" // /* MW 3 */ + 14200 "00011010" // /* MW 2 */ + 14201 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14203 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14205 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14207 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14209 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b919_wrapperPPv__end +.label __Z13_b919_wrapperPPv___func_end0 + 14211 "00000000" // /* MW 1 */ +.label _ZN12me_primitive10udiv_dstepEjjRjS0_ +.function udiv_dstep _ZN12me_primitive10udiv_dstepEjjRjS0_ +.src_ref 10 "me_div.c" 108 19 +.src_ref 10 "me_div.c" 108 19 +.src_ref 10 "me_div.c" 115 4 first +.function_start + 14224 "11100100" // MOVX r3, #0; MOV r31, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 14225 "01000001" // /* MW 5 */ + 14226 "10100000" // /* MW 4 */ + 14227 "00101111" // /* MW 3 */ + 14228 "11000000" // /* MW 2 */ + 14229 "00000000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14230 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14231 "00011100" // /* MW 3 */ + 14232 "11000110" // /* MW 2 */ + 14233 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14234 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14235 "00011100" // /* MW 3 */ + 14236 "11000110" // /* MW 2 */ + 14237 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14238 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14239 "00011100" // /* MW 3 */ + 14240 "11000110" // /* MW 2 */ + 14241 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14242 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14243 "00011100" // /* MW 3 */ + 14244 "11000110" // /* MW 2 */ + 14245 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14246 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14247 "00011100" // /* MW 3 */ + 14248 "11000110" // /* MW 2 */ + 14249 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14250 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14251 "00011100" // /* MW 3 */ + 14252 "11000110" // /* MW 2 */ + 14253 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14254 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14255 "00011100" // /* MW 3 */ + 14256 "11000110" // /* MW 2 */ + 14257 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14258 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14259 "00011100" // /* MW 3 */ + 14260 "11000110" // /* MW 2 */ + 14261 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14262 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14263 "00011100" // /* MW 3 */ + 14264 "11000110" // /* MW 2 */ + 14265 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14266 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14267 "00011100" // /* MW 3 */ + 14268 "11000110" // /* MW 2 */ + 14269 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14270 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14271 "00011100" // /* MW 3 */ + 14272 "11000110" // /* MW 2 */ + 14273 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14274 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14275 "00011100" // /* MW 3 */ + 14276 "11000110" // /* MW 2 */ + 14277 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14278 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14279 "00011100" // /* MW 3 */ + 14280 "11000110" // /* MW 2 */ + 14281 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14282 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14283 "00011100" // /* MW 3 */ + 14284 "11000110" // /* MW 2 */ + 14285 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14286 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14287 "00011100" // /* MW 3 */ + 14288 "11000110" // /* MW 2 */ + 14289 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14290 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14291 "00011100" // /* MW 3 */ + 14292 "11000110" // /* MW 2 */ + 14293 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14294 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14295 "00011100" // /* MW 3 */ + 14296 "11000110" // /* MW 2 */ + 14297 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14298 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14299 "00011100" // /* MW 3 */ + 14300 "11000110" // /* MW 2 */ + 14301 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14302 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14303 "00011100" // /* MW 3 */ + 14304 "11000110" // /* MW 2 */ + 14305 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14306 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14307 "00011100" // /* MW 3 */ + 14308 "11000110" // /* MW 2 */ + 14309 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14310 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14311 "00011100" // /* MW 3 */ + 14312 "11000110" // /* MW 2 */ + 14313 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14314 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14315 "00011100" // /* MW 3 */ + 14316 "11000110" // /* MW 2 */ + 14317 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14318 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14319 "00011100" // /* MW 3 */ + 14320 "11000110" // /* MW 2 */ + 14321 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14322 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14323 "00011100" // /* MW 3 */ + 14324 "11000110" // /* MW 2 */ + 14325 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14326 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14327 "00011100" // /* MW 3 */ + 14328 "11000110" // /* MW 2 */ + 14329 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14330 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14331 "00011100" // /* MW 3 */ + 14332 "11000110" // /* MW 2 */ + 14333 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14334 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14335 "00011100" // /* MW 3 */ + 14336 "11000110" // /* MW 2 */ + 14337 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 + 14338 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14339 "00011100" // /* MW 3 */ + 14340 "11000110" // /* MW 2 */ + 14341 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 119 first + 14342 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 14343 "00000000" // /* MW 3 */ + 14344 "00101000" // /* MW 2 */ + 14345 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 first +.delay_slot + 14346 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14347 "00011100" // /* MW 3 */ + 14348 "11000110" // /* MW 2 */ + 14349 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 +.delay_slot + 14350 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14351 "00011100" // /* MW 3 */ + 14352 "11000110" // /* MW 2 */ + 14353 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 +.delay_slot + 14354 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14355 "00011100" // /* MW 3 */ + 14356 "11000110" // /* MW 2 */ + 14357 "00010000" // /* MW 1 */ +.src_ref 10 "me_div.c" 108 19 +.delay_slot + 14358 "00011000" // DIVS r3, r31, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14359 "00011100" // /* MW 3 */ + 14360 "11000110" // /* MW 2 */ + 14361 "00010000" // /* MW 1 */ +.delay_slot + 14362 "11111000" // MOV r2, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14363 "10100000" // /* MW 3 */ + 14364 "10011111" // /* MW 2 */ +.label _ZN12me_primitive10udiv_dstepEjjRjS0___end + 14365 "00011000" // /* MW 1 */ +.dir 0 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable3/src" +.dir 1 "/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer" +.dir 2 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/conv" +.dir 3 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common" +.dir 4 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2" +.dir 5 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p" +.dir 6 "/usr/local/lib/python3.10/dist-packages/data/aie2p/lib" +.dir 7 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend" +.dir 8 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc" +.dir 9 "/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/detail" +.dir 10 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21708/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib" diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/Release/0_2_reloadable9.txt b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/Release/0_2_reloadable9.txt new file mode 100644 index 0000000000000000000000000000000000000000..f228402b070aee7a7253fb79fa4b7ba5dbc4d5a3 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/Release/0_2_reloadable9.txt @@ -0,0 +1,4968 @@ +Contents of the .debug_line section: + +sigmoid_carf_templated_lut.h: +File name Line number Starting address View Stmt + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 205 0x2580 x +elementwise_binary_shared.h 211 0x2580 1 x +elementwise_binary_shared.h 216 0x2580 2 +elementwise_binary_shared.h 216 0x2580 3 +elementwise_binary_shared.h 216 0x258a +elementwise_binary_shared.h 211 0x2598 x +elementwise_binary_shared.h 212 0x259c x +elementwise_binary_shared.h 212 0x25ac +elementwise_binary_shared.h 213 0x25b0 x +elementwise_binary_shared.h 213 0x25c0 +elementwise_binary_shared.h 214 0x25c4 x +elementwise_binary_shared.h 214 0x25d4 +elementwise_binary_shared.h 216 0x25d8 x +elementwise_binary_shared.h 217 0x25dc x +elementwise_binary_shared.h 216 0x25e0 +elementwise_binary_shared.h 216 0x25e6 x +elementwise_binary_shared.h 216 0x25ea +elementwise_binary_shared.h 216 0x25ee +elementwise_binary_shared.h 107 0x2650 x +elementwise_binary_shared.h 119 0x2650 1 +elementwise_binary_shared.h 126 0x2650 2 +elementwise_binary_shared.h 131 0x2650 3 +elementwise_binary_shared.h 119 0x2654 x +elementwise_binary_shared.h 122 0x2658 x +elementwise_binary_shared.h 124 0x265c x +elementwise_binary_shared.h 124 0x2668 +elementwise_binary_shared.h 107 0x266c +elementwise_binary_shared.h 124 0x2672 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 65 0x2676 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 124 0x2676 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 65 0x2680 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 150 0x268c +elementwise_binary_shared.h 119 0x2692 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 56 0x2696 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 126 0x2696 1 +elementwise_binary_shared.h 126 0x2696 2 +elementwise_binary_shared.h 131 0x2696 3 +elementwise_binary_shared.h 131 0x2696 4 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x26a0 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 126 0x26a0 1 x +elementwise_binary_shared.h 131 0x26a0 2 x +elementwise_binary_shared.h 171 0x26a0 3 +elementwise_binary_shared.h 131 0x26b2 +elementwise_binary_shared.h 131 0x26b2 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x26b8 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x26b8 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 173 0x26b8 2 +elementwise_binary_shared.h 166 0x26bc + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x26c8 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 169 0x26c8 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x26da x +vector.hpp 1139 0x26e0 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 169 0x26e0 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x26e4 +vector.hpp 1159 0x26e4 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 171 0x26e4 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x26f6 +vector.hpp 1139 0x26f6 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x26f6 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 169 0x26f6 3 +elementwise_binary_shared.h 173 0x26f6 4 +elementwise_binary_shared.h 150 0x2710 +elementwise_binary_shared.h 150 0x2714 x +elementwise_binary_shared.h 150 0x2718 +elementwise_binary_shared.h 150 0x271e +elementwise_binary_shared.h 150 0x2724 +elementwise_binary_shared.h 166 0x2724 1 +elementwise_binary_shared.h 150 0x2730 +elementwise_binary_shared.h 150 0x2740 +elementwise_binary_shared.h 150 0x2740 1 +elementwise_binary_shared.h 150 0x2740 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x274a + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x274a 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 173 0x274a 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x274e + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 169 0x274e 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2752 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 171 0x2752 1 +elementwise_binary_shared.h 150 0x2758 +elementwise_binary_shared.h 150 0x275c +elementwise_binary_shared.h 150 0x275c 1 +elementwise_binary_shared.h 150 0x2762 +elementwise_binary_shared.h 150 0x2766 +elementwise_binary_shared.h 150 0x276c +elementwise_binary_shared.h 150 0x2774 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 56 0x2784 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x278a x +vector.hpp 1139 0x2790 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 166 0x2790 1 x +elementwise_binary_shared.h 166 0x2790 2 x +elementwise_binary_shared.h 169 0x2790 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x279c + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 166 0x279c 1 +elementwise_binary_shared.h 166 0x279c 2 +elementwise_binary_shared.h 171 0x279c 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x27a8 x +vector.hpp 1139 0x27a8 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x27a8 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 169 0x27a8 3 x +elementwise_binary_shared.h 173 0x27a8 4 x +elementwise_binary_shared.h 177 0x27a8 5 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x27b0 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 166 0x27b0 1 x +elementwise_binary_shared.h 171 0x27b0 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x27b8 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x27b8 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 173 0x27b8 2 x +elementwise_binary_shared.h 166 0x27be x +elementwise_binary_shared.h 166 0x27c2 +elementwise_binary_shared.h 177 0x27c2 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x27ca x +vector.hpp 1139 0x27ca 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 169 0x27ca 2 x +elementwise_binary_shared.h 171 0x27ca 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x27d0 +vector.hpp 1159 0x27d0 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x27d0 2 x +accum.hpp 1110 0x27d0 3 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 173 0x27d0 4 x +elementwise_binary_shared.h 185 0x27d0 5 +elementwise_binary_shared.h 177 0x27f0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2800 x +vector.hpp 1139 0x2800 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 169 0x2800 2 x +elementwise_binary_shared.h 171 0x2800 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2810 +vector.hpp 1159 0x2810 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x2810 2 x +accum.hpp 1110 0x2810 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 173 0x2810 4 x +elementwise_binary_shared.h 185 0x2810 5 x +elementwise_binary_shared.h 177 0x2830 x +elementwise_binary_shared.h 187 0x2840 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2846 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2846 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 185 0x2846 2 x +elementwise_binary_shared.h 177 0x284c x +elementwise_binary_shared.h 187 0x2852 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2856 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2856 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 185 0x2856 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2860 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2860 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 185 0x2860 2 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x31e0 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 199 0x31e0 1 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x31e4 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 204 0x31e4 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x31ea +io_buffer_main.h 125 0x31ea 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 204 0x31f0 x +conv2d_dw_bf16.h 221 0x31f4 x +conv2d_dw_bf16.h 221 0x31f8 +conv2d_dw_bf16.h 221 0x31fc +conv2d_dw_bf16.h 221 0x3200 +conv2d_dw_bf16.h 221 0x3204 +conv2d_dw_bf16.h 222 0x3208 x +conv2d_dw_bf16.h 222 0x320c +conv2d_dw_bf16.h 222 0x3210 +conv2d_dw_bf16.h 222 0x3214 +conv2d_dw_bf16.h 222 0x3218 +conv2d_dw_bf16.h 223 0x321c x +conv2d_dw_bf16.h 223 0x3220 +conv2d_dw_bf16.h 223 0x3224 +conv2d_dw_bf16.h 223 0x3228 +conv2d_dw_bf16.h 223 0x322c +conv2d_dw_bf16.h 224 0x3230 x +conv2d_dw_bf16.h 224 0x3234 +conv2d_dw_bf16.h 224 0x3238 +conv2d_dw_bf16.h 244 0x3238 1 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x3242 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3242 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x3242 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 244 0x3242 3 x +conv2d_dw_bf16.h 225 0x3248 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x324c +aie_core.h 81 0x324c 1 +aie_core.h 100 0x324c 2 +aie_core.h 100 0x324c 3 +aie_core.h 100 0x324c 4 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x324c 5 +vector.hpp 1139 0x324c 6 +vector.hpp 1139 0x324c 7 x +vector.hpp 1139 0x324c 8 x +vector.hpp 1159 0x324c 9 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x324c 10 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 204 0x324c 11 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x3258 +aie_core.h 81 0x3258 1 +aie_core.h 100 0x3258 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3258 3 +vector.hpp 1139 0x3258 4 +vector.hpp 1159 0x3258 5 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 204 0x3258 6 x +conv2d_dw_bf16.h 225 0x3258 7 x +conv2d_dw_bf16.h 244 0x3258 8 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x3266 +aie_core.h 100 0x3266 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3266 2 +vector.hpp 1159 0x3266 3 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 204 0x3266 4 +conv2d_dw_bf16.h 225 0x3266 5 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x3270 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3270 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 204 0x3270 2 +conv2d_dw_bf16.h 225 0x3270 3 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x327a x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x327a 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 244 0x327a 2 +conv2d_dw_bf16.h 244 0x327a 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3284 +shuffle.hpp 142 0x3284 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 244 0x3284 2 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x328a + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x328a 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 250 0x328a 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x3296 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3296 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 244 0x3296 2 x +conv2d_dw_bf16.h 250 0x3296 3 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x32a2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x32a2 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x32a2 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 270 0x32a8 +conv2d_dw_bf16.h 244 0x32ac + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x32b6 +shuffle.hpp 142 0x32b6 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 268 0x32b6 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x32c0 +shuffle.hpp 142 0x32c0 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 270 0x32c0 2 +conv2d_dw_bf16.h 271 0x32c0 3 +conv2d_dw_bf16.h 272 0x32c0 4 +conv2d_dw_bf16.h 273 0x32c0 5 +conv2d_dw_bf16.h 274 0x32c0 6 +conv2d_dw_bf16.h 275 0x32c0 7 +conv2d_dw_bf16.h 276 0x32c0 8 +conv2d_dw_bf16.h 277 0x32c0 9 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x32d0 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x32d0 1 +accum.hpp 1110 0x32d0 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 265 0x32d0 3 x +conv2d_dw_bf16.h 270 0x32d0 4 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x32e0 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x32e0 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x32e0 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 250 0x32e0 3 x +conv2d_dw_bf16.h 274 0x32e0 4 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x32f0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x32f0 1 x +vector.hpp 1139 0x32f0 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x32f0 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x32fa + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 266 0x32fa 1 x +conv2d_dw_bf16.h 271 0x32fa 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x3304 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3304 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3304 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 275 0x3304 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x330e +shuffle.hpp 142 0x3312 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 272 0x3312 1 x +conv2d_dw_bf16.h 267 0x331a x +conv2d_dw_bf16.h 276 0x331a 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3322 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 268 0x3326 x +conv2d_dw_bf16.h 273 0x3326 1 x +conv2d_dw_bf16.h 265 0x332e x +conv2d_dw_bf16.h 277 0x332e 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3336 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 270 0x3340 x +conv2d_dw_bf16.h 274 0x3350 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x3360 x +aie_core.h 100 0x3360 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3360 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 266 0x336a x +conv2d_dw_bf16.h 271 0x336a 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3372 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 275 0x3372 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x337a + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 267 0x337e x +conv2d_dw_bf16.h 272 0x337e 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3386 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 276 0x3386 1 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x3390 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3390 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3390 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 250 0x3396 x +conv2d_dw_bf16.h 273 0x3396 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x33a0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x33a0 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 250 0x33a0 2 +conv2d_dw_bf16.h 277 0x33a0 3 x +conv2d_dw_bf16.h 250 0x33ac x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x33b0 x +vector.hpp 1139 0x33b4 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x33b8 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x33b8 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x33bc x +accum.hpp 1110 0x33c0 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x33c4 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 286 0x33c8 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x33cc x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x33cc 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 285 0x33cc 2 x +conv2d_dw_bf16.h 268 0x33d4 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x33d8 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x33d8 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 270 0x33d8 2 +conv2d_dw_bf16.h 265 0x33e0 x +conv2d_dw_bf16.h 270 0x33e0 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x33e8 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 274 0x33e8 1 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x33f0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x33f0 1 x +vector.hpp 1139 0x33f0 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x33f0 3 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x33fa + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 266 0x33fa 1 x +conv2d_dw_bf16.h 271 0x33fa 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x3404 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3404 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3404 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 275 0x3404 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x340e +shuffle.hpp 142 0x3412 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 272 0x3412 1 x +conv2d_dw_bf16.h 267 0x341a x +conv2d_dw_bf16.h 276 0x341a 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3422 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 268 0x3426 x +conv2d_dw_bf16.h 273 0x3426 1 x +conv2d_dw_bf16.h 265 0x342e x +conv2d_dw_bf16.h 277 0x342e 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3436 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 270 0x343c x +conv2d_dw_bf16.h 274 0x3440 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 81 0x3450 x +aie_core.h 100 0x3450 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3450 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 244 0x3450 3 x +conv2d_dw_bf16.h 266 0x345c x +conv2d_dw_bf16.h 271 0x345c 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3464 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 275 0x3464 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x346c + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 267 0x3470 x +conv2d_dw_bf16.h 272 0x3470 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3478 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 276 0x3478 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/shuffle.hpp: +shuffle.hpp 142 0x3480 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 273 0x3484 x +conv2d_dw_bf16.h 277 0x3488 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x3494 x +accum.hpp 1110 0x3498 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 290 0x3498 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x349e x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 286 0x34a2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x34a6 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x34aa x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16.h: +conv2d_dw_bf16.h 285 0x34aa 1 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x34ae x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x34ae 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h: +conv2d_bf16_params.h 432 0xac0 x +conv2d_bf16_params.h 438 0xac0 1 x +conv2d_bf16_params.h 452 0xac0 2 +conv2d_bf16_params.h 453 0xac0 3 +conv2d_bf16_params.h 458 0xac0 4 +conv2d_bf16_params.h 470 0xac0 5 +conv2d_bf16_params.h 438 0xaca +conv2d_bf16_params.h 438 0xaca 1 x +conv2d_bf16_params.h 452 0xaca 2 +conv2d_bf16_params.h 462 0xaca 3 +conv2d_bf16_params.h 432 0xad4 +conv2d_bf16_params.h 444 0xad4 1 +conv2d_bf16_params.h 453 0xade +conv2d_bf16_params.h 458 0xade 1 +conv2d_bf16_params.h 458 0xade 2 +conv2d_bf16_params.h 444 0xaea +conv2d_bf16_params.h 470 0xaea 1 +conv2d_bf16_params.h 477 0xaea 2 +conv2d_bf16_params.h 557 0xaea 3 +conv2d_bf16_params.h 452 0xaf6 +conv2d_bf16_params.h 458 0xaf6 1 +conv2d_bf16_params.h 462 0xaf6 2 +conv2d_bf16_params.h 438 0xafe +conv2d_bf16_params.h 438 0xb02 +conv2d_bf16_params.h 438 0xb06 +conv2d_bf16_params.h 438 0xb0a +conv2d_bf16_params.h 438 0xb18 +conv2d_bf16_params.h 438 0xb1c +conv2d_bf16_params.h 438 0xb20 +conv2d_bf16_params.h 438 0xb24 +conv2d_bf16_params.h 438 0xb32 +conv2d_bf16_params.h 438 0xb36 +conv2d_bf16_params.h 438 0xb3a +conv2d_bf16_params.h 438 0xb3e +conv2d_bf16_params.h 438 0xb4c +conv2d_bf16_params.h 438 0xb50 +conv2d_bf16_params.h 444 0xb54 x +conv2d_bf16_params.h 447 0xb58 x +conv2d_bf16_params.h 448 0xb5c x +conv2d_bf16_params.h 452 0xb60 x +conv2d_bf16_params.h 453 0xb64 x +conv2d_bf16_params.h 458 0xb68 x +conv2d_bf16_params.h 444 0xb6e x +conv2d_bf16_params.h 458 0xb72 x +conv2d_bf16_params.h 462 0xb72 1 x +conv2d_bf16_params.h 462 0xb78 +conv2d_bf16_params.h 452 0xb7c x +conv2d_bf16_params.h 452 0xb80 +conv2d_bf16_params.h 462 0xb80 1 x +conv2d_bf16_params.h 557 0xb80 2 +conv2d_bf16_params.h 462 0xb86 +conv2d_bf16_params.h 458 0xb8a x +conv2d_bf16_params.h 458 0xb8e +conv2d_bf16_params.h 458 0xb92 +conv2d_bf16_params.h 477 0xb92 1 +conv2d_bf16_params.h 557 0xb92 2 x +conv2d_bf16_params.h 458 0xb98 x +conv2d_bf16_params.h 458 0xb9e +conv2d_bf16_params.h 477 0xb9e 1 x +conv2d_bf16_params.h 458 0xba4 x +conv2d_bf16_params.h 444 0xba8 x +conv2d_bf16_params.h 462 0xbac x +conv2d_bf16_params.h 470 0xbb0 x +conv2d_bf16_params.h 470 0xbb4 +conv2d_bf16_params.h 477 0xbb4 1 x +conv2d_bf16_params.h 477 0xbb8 +conv2d_bf16_params.h 491 0xbc8 +conv2d_bf16_params.h 492 0xbc8 1 +conv2d_bf16_params.h 495 0xbc8 2 +conv2d_bf16_params.h 502 0xbc8 3 +conv2d_bf16_params.h 533 0xbc8 4 +conv2d_bf16_params.h 539 0xbc8 5 +conv2d_bf16_params.h 557 0xbc8 6 +conv2d_bf16_params.h 621 0xbc8 7 +conv2d_bf16_params.h 645 0xbc8 8 +conv2d_bf16_params.h 709 0xbc8 9 +conv2d_bf16_params.h 477 0xbd2 +conv2d_bf16_params.h 481 0xbd2 1 +conv2d_bf16_params.h 500 0xbd2 2 +conv2d_bf16_params.h 506 0xbd2 3 +conv2d_bf16_params.h 507 0xbd2 4 +conv2d_bf16_params.h 524 0xbd2 5 +conv2d_bf16_params.h 539 0xbd2 6 +conv2d_bf16_params.h 655 0xbd2 7 +conv2d_bf16_params.h 477 0xbdc +conv2d_bf16_params.h 504 0xbdc 1 +conv2d_bf16_params.h 510 0xbdc 2 +conv2d_bf16_params.h 520 0xbdc 3 +conv2d_bf16_params.h 700 0xbdc 4 +conv2d_bf16_params.h 477 0xbe2 +conv2d_bf16_params.h 539 0xbe2 1 +conv2d_bf16_params.h 578 0xbe2 2 +conv2d_bf16_params.h 642 0xbe2 3 +conv2d_bf16_params.h 529 0xbe6 +conv2d_bf16_params.h 642 0xbe6 1 +conv2d_bf16_params.h 642 0xbe6 2 +conv2d_bf16_params.h 655 0xbea +conv2d_bf16_params.h 453 0xbf0 +conv2d_bf16_params.h 453 0xbf0 1 +conv2d_bf16_params.h 477 0xbf0 2 +conv2d_bf16_params.h 504 0xbf0 3 +conv2d_bf16_params.h 655 0xbf0 4 +conv2d_bf16_params.h 453 0xbfc x +conv2d_bf16_params.h 477 0xbfc 1 +conv2d_bf16_params.h 481 0xbfc 2 +conv2d_bf16_params.h 500 0xbfc 3 +conv2d_bf16_params.h 506 0xbfc 4 +conv2d_bf16_params.h 507 0xbfc 5 +conv2d_bf16_params.h 524 0xbfc 6 +conv2d_bf16_params.h 539 0xbfc 7 +conv2d_bf16_params.h 491 0xc06 +conv2d_bf16_params.h 492 0xc06 1 +conv2d_bf16_params.h 495 0xc06 2 +conv2d_bf16_params.h 502 0xc06 3 +conv2d_bf16_params.h 510 0xc06 4 +conv2d_bf16_params.h 520 0xc06 5 +conv2d_bf16_params.h 533 0xc06 6 +conv2d_bf16_params.h 539 0xc06 7 +conv2d_bf16_params.h 557 0xc06 8 +conv2d_bf16_params.h 621 0xc06 9 +conv2d_bf16_params.h 645 0xc06 10 +conv2d_bf16_params.h 655 0xc06 11 +conv2d_bf16_params.h 700 0xc06 12 +conv2d_bf16_params.h 709 0xc06 13 +conv2d_bf16_params.h 477 0xc10 +conv2d_bf16_params.h 529 0xc10 1 +conv2d_bf16_params.h 539 0xc10 2 +conv2d_bf16_params.h 578 0xc10 3 +conv2d_bf16_params.h 642 0xc10 4 +conv2d_bf16_params.h 642 0xc10 5 +conv2d_bf16_params.h 642 0xc10 6 +conv2d_bf16_params.h 477 0xc20 x +conv2d_bf16_params.h 495 0xc20 1 x +conv2d_bf16_params.h 495 0xc20 2 +conv2d_bf16_params.h 682 0xc20 3 +conv2d_bf16_params.h 477 0xc2a +conv2d_bf16_params.h 481 0xc2a 1 x +conv2d_bf16_params.h 495 0xc2a 2 +conv2d_bf16_params.h 495 0xc2a 3 +conv2d_bf16_params.h 477 0xc34 x +conv2d_bf16_params.h 496 0xc34 1 +conv2d_bf16_params.h 504 0xc34 2 +conv2d_bf16_params.h 539 0xc34 3 +conv2d_bf16_params.h 578 0xc34 4 +conv2d_bf16_params.h 496 0xc3e +conv2d_bf16_params.h 499 0xc3e 1 +conv2d_bf16_params.h 504 0xc3e 2 x +conv2d_bf16_params.h 509 0xc3e 3 +conv2d_bf16_params.h 519 0xc3e 4 +conv2d_bf16_params.h 700 0xc3e 5 +conv2d_bf16_params.h 492 0xc48 x +conv2d_bf16_params.h 497 0xc48 1 +conv2d_bf16_params.h 509 0xc48 2 +conv2d_bf16_params.h 500 0xc52 +conv2d_bf16_params.h 520 0xc52 1 x +conv2d_bf16_params.h 502 0xc58 +conv2d_bf16_params.h 520 0xc58 1 +conv2d_bf16_params.h 502 0xc62 +conv2d_bf16_params.h 507 0xc62 1 x +conv2d_bf16_params.h 495 0xc68 x +conv2d_bf16_params.h 495 0xc6c +conv2d_bf16_params.h 495 0xc6c 1 +conv2d_bf16_params.h 610 0xc6c 2 +conv2d_bf16_params.h 709 0xc6c 3 +conv2d_bf16_params.h 507 0xc72 x +conv2d_bf16_params.h 495 0xc76 x +conv2d_bf16_params.h 495 0xc7a +conv2d_bf16_params.h 506 0xc7a 1 +conv2d_bf16_params.h 519 0xc7a 2 x +conv2d_bf16_params.h 496 0xc84 x +conv2d_bf16_params.h 504 0xc84 1 x +conv2d_bf16_params.h 522 0xc84 2 +conv2d_bf16_params.h 509 0xc8e x +conv2d_bf16_params.h 496 0xc94 x +conv2d_bf16_params.h 520 0xc94 1 x +conv2d_bf16_params.h 529 0xc94 2 +conv2d_bf16_params.h 497 0xc9e x +conv2d_bf16_params.h 509 0xc9e 1 x +conv2d_bf16_params.h 533 0xc9e 2 +conv2d_bf16_params.h 539 0xca8 x +conv2d_bf16_params.h 499 0xcac x +conv2d_bf16_params.h 499 0xcb0 +conv2d_bf16_params.h 529 0xcb4 x +conv2d_bf16_params.h 507 0xcb8 x +conv2d_bf16_params.h 511 0xcb8 1 +conv2d_bf16_params.h 491 0xcbe x +conv2d_bf16_params.h 507 0xcbe 1 +conv2d_bf16_params.h 500 0xcc8 x +conv2d_bf16_params.h 511 0xcc8 1 x +conv2d_bf16_params.h 500 0xcce +conv2d_bf16_params.h 534 0xcce 1 +conv2d_bf16_params.h 502 0xcd6 x +conv2d_bf16_params.h 509 0xcd6 1 x +conv2d_bf16_params.h 642 0xcd6 2 +conv2d_bf16_params.h 510 0xce2 x +conv2d_bf16_params.h 506 0xce6 x +conv2d_bf16_params.h 527 0xcea x +conv2d_bf16_params.h 502 0xcf4 x +conv2d_bf16_params.h 502 0xcf8 +conv2d_bf16_params.h 506 0xcfc x +conv2d_bf16_params.h 506 0xd0c +conv2d_bf16_params.h 506 0xd10 +conv2d_bf16_params.h 510 0xd14 x +conv2d_bf16_params.h 510 0xd18 +conv2d_bf16_params.h 510 0xd1e +conv2d_bf16_params.h 510 0xd22 +conv2d_bf16_params.h 510 0xd28 +conv2d_bf16_params.h 539 0xd28 1 +conv2d_bf16_params.h 642 0xd28 2 +conv2d_bf16_params.h 511 0xd2e x +conv2d_bf16_params.h 524 0xd2e 1 +conv2d_bf16_params.h 539 0xd2e 2 +conv2d_bf16_params.h 512 0xd34 x +conv2d_bf16_params.h 524 0xd34 1 x +conv2d_bf16_params.h 524 0xd3a +conv2d_bf16_params.h 524 0xd3e +conv2d_bf16_params.h 520 0xd42 x +conv2d_bf16_params.h 511 0xd46 x +conv2d_bf16_params.h 522 0xd46 1 x +conv2d_bf16_params.h 524 0xd4c x +conv2d_bf16_params.h 529 0xd4c 1 x +conv2d_bf16_params.h 539 0xd4c 2 x +conv2d_bf16_params.h 534 0xd56 +conv2d_bf16_params.h 539 0xd56 1 +conv2d_bf16_params.h 527 0xd5c x +conv2d_bf16_params.h 533 0xd5c 1 x +conv2d_bf16_params.h 529 0xd6a x +conv2d_bf16_params.h 533 0xd6a 1 +conv2d_bf16_params.h 539 0xd70 x +conv2d_bf16_params.h 529 0xd76 x +conv2d_bf16_params.h 529 0xd76 1 +conv2d_bf16_params.h 529 0xd7c +conv2d_bf16_params.h 534 0xd80 x +conv2d_bf16_params.h 534 0xd84 +conv2d_bf16_params.h 539 0xd84 1 x +conv2d_bf16_params.h 555 0xd84 2 +conv2d_bf16_params.h 559 0xd84 3 +conv2d_bf16_params.h 700 0xd84 4 +conv2d_bf16_params.h 669 0xd8e +conv2d_bf16_params.h 700 0xd8e 1 +conv2d_bf16_params.h 539 0xd92 +conv2d_bf16_params.h 539 0xda2 +conv2d_bf16_params.h 539 0xdb2 +conv2d_bf16_params.h 539 0xdb2 1 +conv2d_bf16_params.h 539 0xdb2 2 +conv2d_bf16_params.h 539 0xdb2 3 +conv2d_bf16_params.h 539 0xdbc +conv2d_bf16_params.h 539 0xdc0 +conv2d_bf16_params.h 539 0xdc4 +conv2d_bf16_params.h 539 0xdc4 1 +conv2d_bf16_params.h 539 0xdca +conv2d_bf16_params.h 539 0xdce +conv2d_bf16_params.h 539 0xdd2 +conv2d_bf16_params.h 669 0xdd2 1 +conv2d_bf16_params.h 539 0xdd8 +conv2d_bf16_params.h 539 0xddc +conv2d_bf16_params.h 539 0xde0 +conv2d_bf16_params.h 539 0xde4 +conv2d_bf16_params.h 555 0xde8 x +conv2d_bf16_params.h 642 0xdf0 +conv2d_bf16_params.h 669 0xdf0 1 +conv2d_bf16_params.h 669 0xdf0 2 +conv2d_bf16_params.h 669 0xdfa x +conv2d_bf16_params.h 497 0xdfe x +conv2d_bf16_params.h 641 0xdfe 1 x +conv2d_bf16_params.h 645 0xdfe 2 +conv2d_bf16_params.h 559 0xe08 x +conv2d_bf16_params.h 640 0xe08 1 +conv2d_bf16_params.h 642 0xe08 2 +conv2d_bf16_params.h 642 0xe08 3 +conv2d_bf16_params.h 642 0xe12 x +conv2d_bf16_params.h 578 0xe16 x +conv2d_bf16_params.h 640 0xe1a x +conv2d_bf16_params.h 557 0xe1e +conv2d_bf16_params.h 645 0xe1e 1 +conv2d_bf16_params.h 641 0xe28 x +conv2d_bf16_params.h 642 0xe28 1 x +conv2d_bf16_params.h 642 0xe2e +conv2d_bf16_params.h 642 0xe2e 1 +conv2d_bf16_params.h 558 0xe32 x +conv2d_bf16_params.h 645 0xe32 1 +conv2d_bf16_params.h 540 0xe38 +conv2d_bf16_params.h 645 0xe38 1 x +conv2d_bf16_params.h 540 0xe3e x +conv2d_bf16_params.h 557 0xe3e 1 +conv2d_bf16_params.h 642 0xe44 x +conv2d_bf16_params.h 557 0xe48 x +conv2d_bf16_params.h 655 0xe48 1 +conv2d_bf16_params.h 558 0xe4e +conv2d_bf16_params.h 655 0xe4e 1 x +conv2d_bf16_params.h 558 0xe54 x +conv2d_bf16_params.h 540 0xe58 x +conv2d_bf16_params.h 655 0xe58 1 +conv2d_bf16_params.h 655 0xe58 2 +conv2d_bf16_params.h 679 0xe58 3 +conv2d_bf16_params.h 655 0xe62 x +conv2d_bf16_params.h 558 0xe66 x +conv2d_bf16_params.h 655 0xe66 1 +conv2d_bf16_params.h 655 0xe66 2 +conv2d_bf16_params.h 679 0xe66 3 +conv2d_bf16_params.h 655 0xe70 x +conv2d_bf16_params.h 126 0xe74 x +conv2d_bf16_params.h 559 0xe74 1 x +conv2d_bf16_params.h 669 0xe7a x +conv2d_bf16_params.h 700 0xe7a 1 +conv2d_bf16_params.h 558 0xe80 x +conv2d_bf16_params.h 700 0xe86 x +conv2d_bf16_params.h 578 0xe8a x +conv2d_bf16_params.h 559 0xe8e x +conv2d_bf16_params.h 578 0xe92 x +conv2d_bf16_params.h 610 0xe96 x +conv2d_bf16_params.h 611 0xe96 1 +conv2d_bf16_params.h 621 0xe96 2 +conv2d_bf16_params.h 621 0xe96 3 +conv2d_bf16_params.h 629 0xe96 4 +conv2d_bf16_params.h 621 0xea2 +conv2d_bf16_params.h 621 0xea2 1 x +conv2d_bf16_params.h 645 0xea2 2 +conv2d_bf16_params.h 649 0xea2 3 +conv2d_bf16_params.h 645 0xea8 +conv2d_bf16_params.h 554 0xeae x +conv2d_bf16_params.h 645 0xeae 1 x +conv2d_bf16_params.h 554 0xeb8 +conv2d_bf16_params.h 555 0xeb8 1 +conv2d_bf16_params.h 555 0xeb8 2 x +conv2d_bf16_params.h 645 0xeb8 3 +conv2d_bf16_params.h 555 0xec4 +conv2d_bf16_params.h 621 0xec4 1 +conv2d_bf16_params.h 621 0xec4 2 x +conv2d_bf16_params.h 645 0xec4 3 +conv2d_bf16_params.h 558 0xece x +conv2d_bf16_params.h 559 0xece 1 +conv2d_bf16_params.h 621 0xece 2 +conv2d_bf16_params.h 621 0xece 3 +conv2d_bf16_params.h 645 0xece 4 +conv2d_bf16_params.h 559 0xeda x +conv2d_bf16_params.h 621 0xeda 1 x +conv2d_bf16_params.h 645 0xeda 2 x +conv2d_bf16_params.h 610 0xee0 x +conv2d_bf16_params.h 621 0xee0 1 +conv2d_bf16_params.h 655 0xee0 2 +conv2d_bf16_params.h 679 0xee0 3 +conv2d_bf16_params.h 621 0xeec +conv2d_bf16_params.h 649 0xeec 1 +conv2d_bf16_params.h 655 0xeec 2 x +conv2d_bf16_params.h 661 0xeec 3 +conv2d_bf16_params.h 127 0xef6 x +conv2d_bf16_params.h 127 0xef6 1 x +conv2d_bf16_params.h 621 0xef6 2 +conv2d_bf16_params.h 649 0xef6 3 +conv2d_bf16_params.h 655 0xef6 4 +conv2d_bf16_params.h 679 0xef6 5 +conv2d_bf16_params.h 710 0xef6 6 +conv2d_bf16_params.h 710 0xef6 7 +conv2d_bf16_params.h 655 0xf00 x +conv2d_bf16_params.h 679 0xf00 1 x +conv2d_bf16_params.h 621 0xf06 x +conv2d_bf16_params.h 649 0xf06 1 x +conv2d_bf16_params.h 655 0xf06 2 +conv2d_bf16_params.h 655 0xf06 3 +conv2d_bf16_params.h 700 0xf06 4 +conv2d_bf16_params.h 700 0xf06 5 +conv2d_bf16_params.h 655 0xf10 x +conv2d_bf16_params.h 700 0xf10 1 x +conv2d_bf16_params.h 629 0xf14 x +conv2d_bf16_params.h 611 0xf18 x +conv2d_bf16_params.h 643 0xf26 x +conv2d_bf16_params.h 664 0xf2a +conv2d_bf16_params.h 621 0xf30 x +conv2d_bf16_params.h 629 0xf30 1 +conv2d_bf16_params.h 684 0xf30 2 x +conv2d_bf16_params.h 629 0xf3a x +conv2d_bf16_params.h 127 0xf40 x +conv2d_bf16_params.h 644 0xf40 1 +conv2d_bf16_params.h 700 0xf40 2 x +conv2d_bf16_params.h 705 0xf40 3 +conv2d_bf16_params.h 705 0xf40 4 +conv2d_bf16_params.h 645 0xf4a x +conv2d_bf16_params.h 700 0xf4a 1 +conv2d_bf16_params.h 700 0xf4a 2 +conv2d_bf16_params.h 705 0xf4a 3 +conv2d_bf16_params.h 644 0xf54 +conv2d_bf16_params.h 649 0xf54 1 x +conv2d_bf16_params.h 674 0xf54 2 +conv2d_bf16_params.h 644 0xf5e x +conv2d_bf16_params.h 662 0xf5e 1 +conv2d_bf16_params.h 664 0xf5e 2 x +conv2d_bf16_params.h 127 0xf68 x +conv2d_bf16_params.h 663 0xf68 1 x +conv2d_bf16_params.h 664 0xf68 2 +conv2d_bf16_params.h 126 0xf6e x +conv2d_bf16_params.h 664 0xf6e 1 x +conv2d_bf16_params.h 126 0xf74 +conv2d_bf16_params.h 664 0xf74 1 +conv2d_bf16_params.h 127 0xf7a x +conv2d_bf16_params.h 127 0xf7a 1 x +conv2d_bf16_params.h 664 0xf7a 2 +conv2d_bf16_params.h 664 0xf7a 3 +conv2d_bf16_params.h 675 0xf7a 4 +conv2d_bf16_params.h 696 0xf7a 5 +conv2d_bf16_params.h 644 0xf84 x +conv2d_bf16_params.h 664 0xf84 1 x +conv2d_bf16_params.h 705 0xf84 2 +conv2d_bf16_params.h 664 0xf8e +conv2d_bf16_params.h 705 0xf8e 1 x +conv2d_bf16_params.h 705 0xf8e 2 x +conv2d_bf16_params.h 127 0xf94 +conv2d_bf16_params.h 674 0xf94 1 x +conv2d_bf16_params.h 675 0xf94 2 x +conv2d_bf16_params.h 682 0xf94 3 +conv2d_bf16_params.h 718 0xf94 4 +conv2d_bf16_params.h 720 0xf94 5 +conv2d_bf16_params.h 127 0xf9e x +conv2d_bf16_params.h 642 0xf9e 1 +conv2d_bf16_params.h 675 0xf9e 2 +conv2d_bf16_params.h 675 0xfa8 x +conv2d_bf16_params.h 707 0xfa8 1 x +conv2d_bf16_params.h 642 0xfae +conv2d_bf16_params.h 674 0xfae 1 x +conv2d_bf16_params.h 675 0xfae 2 +conv2d_bf16_params.h 642 0xfb8 x +conv2d_bf16_params.h 655 0xfb8 1 +conv2d_bf16_params.h 655 0xfb8 2 +conv2d_bf16_params.h 675 0xfb8 3 x +conv2d_bf16_params.h 679 0xfb8 4 +conv2d_bf16_params.h 679 0xfb8 5 +conv2d_bf16_params.h 655 0xfc4 x +conv2d_bf16_params.h 679 0xfc4 1 x +conv2d_bf16_params.h 713 0xfc4 2 +conv2d_bf16_params.h 691 0xfca x +conv2d_bf16_params.h 675 0xfce +conv2d_bf16_params.h 675 0xfce 1 x +conv2d_bf16_params.h 709 0xfce 2 x +conv2d_bf16_params.h 675 0xfd8 +conv2d_bf16_params.h 706 0xfd8 1 x +conv2d_bf16_params.h 706 0xfd8 2 +conv2d_bf16_params.h 709 0xfd8 3 +conv2d_bf16_params.h 682 0xfe4 x +conv2d_bf16_params.h 706 0xfe4 1 +conv2d_bf16_params.h 126 0xfea x +conv2d_bf16_params.h 696 0xfea 1 x +conv2d_bf16_params.h 127 0xff0 x +conv2d_bf16_params.h 127 0xff0 1 x +conv2d_bf16_params.h 696 0xff0 2 +conv2d_bf16_params.h 696 0xff6 x +conv2d_bf16_params.h 713 0xff6 1 x +conv2d_bf16_params.h 696 0xffc +conv2d_bf16_params.h 706 0xffc 1 +conv2d_bf16_params.h 706 0xffc 2 x +conv2d_bf16_params.h 706 0x1006 +conv2d_bf16_params.h 696 0x100a x +conv2d_bf16_params.h 707 0x100a 1 x +conv2d_bf16_params.h 696 0x1010 +conv2d_bf16_params.h 709 0x1010 1 x +conv2d_bf16_params.h 696 0x1016 x +conv2d_bf16_params.h 709 0x1016 1 +conv2d_bf16_params.h 707 0x1020 x +conv2d_bf16_params.h 708 0x1020 1 +conv2d_bf16_params.h 710 0x1020 2 x +conv2d_bf16_params.h 710 0x1020 3 x +conv2d_bf16_params.h 708 0x102c x +conv2d_bf16_params.h 713 0x102c 1 x +conv2d_bf16_params.h 709 0x1036 x +conv2d_bf16_params.h 800 0x1036 1 x +conv2d_bf16_params.h 710 0x103c x +conv2d_bf16_params.h 718 0x1044 x +conv2d_bf16_params.h 718 0x1048 +conv2d_bf16_params.h 720 0x104c x +conv2d_bf16_params.h 800 0x104c 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 526 0x1060 +utils.h 531 0x1060 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 689 0x1060 2 x +conv2d_bf16.h 698 0x1060 3 +conv2d_bf16.h 704 0x1060 4 +conv2d_bf16.h 707 0x1060 5 +conv2d_bf16.h 707 0x1060 6 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 526 0x106c +utils.h 526 0x106c 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 698 0x106c 2 x +conv2d_bf16.h 704 0x106c 3 x +conv2d_bf16.h 707 0x106c 4 +conv2d_bf16.h 707 0x106c 5 +conv2d_bf16.h 698 0x107a +conv2d_bf16.h 702 0x107a 1 +conv2d_bf16.h 698 0x1084 +conv2d_bf16.h 702 0x1084 1 x +conv2d_bf16.h 699 0x108e x +conv2d_bf16.h 702 0x108e 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 531 0x1098 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 702 0x1098 1 x +conv2d_bf16.h 702 0x109e + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 531 0x10a6 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 705 0x10a6 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 526 0x10ac x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 704 0x10b0 x +conv2d_bf16.h 702 0x10b4 x +conv2d_bf16.h 705 0x10b4 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 526 0x10ba x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 707 0x10ba 1 +conv2d_bf16.h 707 0x10ba 2 +conv2d_bf16.h 704 0x10c0 x +conv2d_bf16.h 705 0x10c6 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 350 0x10d0 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 526 0x10d0 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 705 0x10d0 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 350 0x10e0 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 706 0x10e0 1 x +conv2d_bf16.h 704 0x10f0 x +conv2d_bf16.h 705 0x1100 x +conv2d_bf16.h 707 0x1100 1 x +conv2d_bf16.h 707 0x1100 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 350 0x1110 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 526 0x1110 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 705 0x1110 2 +conv2d_bf16.h 708 0x1110 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 350 0x1120 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 531 0x1120 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 706 0x1120 2 x +conv2d_bf16.h 707 0x1132 x +conv2d_bf16.h 707 0x1132 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 350 0x1136 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 705 0x1136 1 x +conv2d_bf16.h 708 0x1136 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 350 0x113e + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 706 0x113e 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 531 0x1142 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 350 0x1146 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 705 0x1146 1 x +conv2d_bf16.h 707 0x1146 2 x +conv2d_bf16.h 707 0x1146 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 350 0x114e + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 706 0x114e 1 x +conv2d_bf16.h 708 0x114e 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 531 0x1156 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 707 0x115a x +conv2d_bf16.h 707 0x115a 1 x +conv2d_bf16.h 723 0x115a 2 x +conv2d_bf16.h 708 0x1160 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 531 0x1164 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x1170 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1836 0x1170 1 +conv2d_bf16.h 1836 0x1170 2 x +conv2d_bf16.h 1836 0x1170 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h: +conv2d_bf16_params.h 240 0x1170 4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1836 0x117e + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h: +conv2d_bf16_params.h 241 0x117e 1 +conv2d_bf16_params.h 242 0x117e 2 +conv2d_bf16_params.h 250 0x117e 3 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 866 0x118a +conv2d_bf16.h 876 0x118a 1 +conv2d_bf16.h 876 0x118a 2 +conv2d_bf16.h 881 0x118a 3 +conv2d_bf16.h 1836 0x118a 4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h: +conv2d_bf16_params.h 242 0x118a 5 +conv2d_bf16_params.h 242 0x118a 6 +conv2d_bf16_params.h 242 0x118a 7 +conv2d_bf16_params.h 242 0x118a 8 +conv2d_bf16_params.h 242 0x118a 9 +conv2d_bf16_params.h 243 0x118a 10 +conv2d_bf16_params.h 245 0x118a 11 +conv2d_bf16_params.h 250 0x118a 12 +conv2d_bf16_params.h 250 0x118a 13 +conv2d_bf16_params.h 240 0x1196 +conv2d_bf16_params.h 240 0x1196 1 x +conv2d_bf16_params.h 242 0x11a2 +conv2d_bf16_params.h 245 0x11a2 1 +conv2d_bf16_params.h 242 0x11ae +conv2d_bf16_params.h 244 0x11ae 1 +conv2d_bf16_params.h 244 0x11ae 2 +conv2d_bf16_params.h 249 0x11ae 3 +conv2d_bf16_params.h 243 0x11ba +conv2d_bf16_params.h 244 0x11ba 1 +conv2d_bf16_params.h 250 0x11ba 2 +conv2d_bf16_params.h 244 0x11c6 +conv2d_bf16_params.h 240 0x11d4 +conv2d_bf16_params.h 240 0x11d8 +conv2d_bf16_params.h 241 0x11d8 1 x +conv2d_bf16_params.h 242 0x11de x +conv2d_bf16_params.h 242 0x11de 1 x +conv2d_bf16_params.h 245 0x11e4 x +conv2d_bf16_params.h 242 0x11f2 x +conv2d_bf16_params.h 242 0x11f6 +conv2d_bf16_params.h 242 0x11fa +conv2d_bf16_params.h 241 0x11fe x +conv2d_bf16_params.h 242 0x11fe 1 +conv2d_bf16_params.h 242 0x1204 x +conv2d_bf16_params.h 242 0x1208 +conv2d_bf16_params.h 242 0x120c +conv2d_bf16_params.h 242 0x1210 +conv2d_bf16_params.h 242 0x1210 1 +conv2d_bf16_params.h 242 0x1216 +conv2d_bf16_params.h 243 0x121a x +conv2d_bf16_params.h 242 0x121e x +conv2d_bf16_params.h 243 0x121e 1 +conv2d_bf16_params.h 244 0x1224 x +conv2d_bf16_params.h 245 0x1224 1 x +conv2d_bf16_params.h 244 0x1236 +conv2d_bf16_params.h 244 0x1236 1 +conv2d_bf16_params.h 245 0x123c +conv2d_bf16_params.h 244 0x1242 +conv2d_bf16_params.h 244 0x1246 +conv2d_bf16_params.h 244 0x124a +conv2d_bf16_params.h 244 0x124e +conv2d_bf16_params.h 244 0x1252 +conv2d_bf16_params.h 245 0x1256 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 876 0x1268 +conv2d_bf16.h 876 0x1268 1 +conv2d_bf16.h 1849 0x1276 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h: +conv2d_bf16_params.h 250 0x1280 x +conv2d_bf16_params.h 250 0x1280 1 +conv2d_bf16_params.h 250 0x128c +conv2d_bf16_params.h 250 0x1290 +conv2d_bf16_params.h 250 0x1294 +conv2d_bf16_params.h 250 0x1298 +conv2d_bf16_params.h 250 0x1298 1 +conv2d_bf16_params.h 250 0x129e +conv2d_bf16_params.h 249 0x12a2 x +conv2d_bf16_params.h 249 0x12a6 +conv2d_bf16_params.h 250 0x12aa x +conv2d_bf16_params.h 258 0x12b0 x +conv2d_bf16_params.h 259 0x12c8 +conv2d_bf16_params.h 259 0x12ce x +conv2d_bf16_params.h 259 0x12d2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1841 0x12e0 x +conv2d_bf16.h 1849 0x12e0 1 +conv2d_bf16.h 1849 0x12e0 2 x +conv2d_bf16.h 876 0x12ea +conv2d_bf16.h 881 0x12ea 1 +conv2d_bf16.h 1841 0x12ea 2 +conv2d_bf16.h 1842 0x12ea 3 +conv2d_bf16.h 1842 0x12ea 4 +conv2d_bf16.h 1842 0x12ea 5 +conv2d_bf16.h 1845 0x12ea 6 +conv2d_bf16.h 1849 0x12ea 7 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h: +conv2d_bf16_params.h 243 0x12ea 8 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1842 0x12f4 x +conv2d_bf16.h 1842 0x12f4 1 +conv2d_bf16.h 1849 0x12f4 2 +conv2d_bf16.h 862 0x1300 +conv2d_bf16.h 1842 0x1300 1 +conv2d_bf16.h 1845 0x1300 2 +conv2d_bf16.h 1845 0x130c x +conv2d_bf16.h 862 0x1310 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x1314 x +io_buffer_main.h 125 0x1318 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1841 0x1318 1 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x131e x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1842 0x1322 x +conv2d_bf16.h 1845 0x1328 x +conv2d_bf16.h 866 0x132c x +conv2d_bf16.h 866 0x1330 +conv2d_bf16.h 1842 0x1336 x +conv2d_bf16.h 1842 0x1336 1 x +conv2d_bf16.h 1842 0x133c +conv2d_bf16.h 1845 0x133c 1 x +conv2d_bf16.h 1841 0x1342 x +conv2d_bf16.h 881 0x134a +conv2d_bf16.h 885 0x134a 1 +conv2d_bf16.h 1845 0x134e x +conv2d_bf16.h 867 0x1352 +conv2d_bf16.h 867 0x1358 +conv2d_bf16.h 867 0x1358 1 x +conv2d_bf16.h 867 0x1360 +conv2d_bf16.h 867 0x1366 +conv2d_bf16.h 867 0x1372 +conv2d_bf16.h 867 0x1372 1 +conv2d_bf16.h 867 0x1378 +conv2d_bf16.h 867 0x137c +conv2d_bf16.h 867 0x1382 +conv2d_bf16.h 867 0x138a +conv2d_bf16.h 881 0x13a0 +conv2d_bf16.h 883 0x13a0 1 +conv2d_bf16.h 884 0x13a0 2 +conv2d_bf16.h 876 0x13ac x +conv2d_bf16.h 876 0x13ac 1 x +conv2d_bf16.h 881 0x13ac 2 x +conv2d_bf16.h 883 0x13ac 3 +conv2d_bf16.h 884 0x13ac 4 +conv2d_bf16.h 885 0x13b8 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h: +conv2d_bf16_params.h 243 0x13b8 1 +conv2d_bf16_params.h 243 0x13b8 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 883 0x13c2 x +conv2d_bf16.h 884 0x13c8 x +conv2d_bf16.h 876 0x13ce x +conv2d_bf16.h 876 0x13d2 +conv2d_bf16.h 881 0x13d6 x +conv2d_bf16.h 881 0x13da + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h: +conv2d_bf16_params.h 243 0x13da 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 881 0x13e0 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h: +conv2d_bf16_params.h 243 0x13e0 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 885 0x13f0 +conv2d_bf16.h 885 0x13f4 x +conv2d_bf16.h 885 0x13fe +conv2d_bf16.h 885 0x1402 +conv2d_bf16.h 885 0x1406 +conv2d_bf16.h 896 0x1410 +conv2d_bf16.h 1115 0x1410 1 +conv2d_bf16.h 1115 0x1410 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x141a +vector.hpp 1152 0x141a 1 +vector.hpp 1152 0x141a 2 +vector.hpp 1152 0x141a 3 +vector.hpp 1152 0x141a 4 +vector.hpp 1152 0x141a 5 +vector.hpp 1152 0x141a 6 +vector.hpp 1152 0x141a 7 +vector.hpp 1152 0x141a 8 +vector.hpp 1152 0x141a 9 +vector.hpp 1152 0x141a 10 +vector.hpp 1152 0x141a 11 +vector.hpp 1152 0x141a 12 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x141a 13 +accum.hpp 149 0x141a 14 +accum.hpp 149 0x141a 15 +accum.hpp 149 0x141a 16 +accum.hpp 149 0x141a 17 +accum.hpp 149 0x141a 18 +accum.hpp 149 0x141a 19 +accum.hpp 149 0x141a 20 +accum.hpp 149 0x141a 21 +accum.hpp 149 0x141a 22 +accum.hpp 149 0x141a 23 +accum.hpp 149 0x141a 24 +accum.hpp 149 0x141a 25 +accum.hpp 149 0x141a 26 +accum.hpp 149 0x141a 27 +accum.hpp 149 0x141a 28 +accum.hpp 1110 0x141a 29 +accum.hpp 1110 0x141a 30 +accum.hpp 1110 0x141a 31 +accum.hpp 1110 0x141a 32 +accum.hpp 1110 0x141a 33 +accum.hpp 1110 0x141a 34 +accum.hpp 1110 0x141a 35 +accum.hpp 1110 0x141a 36 +accum.hpp 1110 0x141a 37 +accum.hpp 1110 0x141a 38 +accum.hpp 1110 0x141a 39 +accum.hpp 1110 0x141a 40 +accum.hpp 1110 0x141a 41 +accum.hpp 1110 0x141a 42 +accum.hpp 1110 0x141a 43 +accum.hpp 1110 0x141a 44 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 886 0x141a 45 +conv2d_bf16.h 896 0x141a 46 x +conv2d_bf16.h 1123 0x141a 47 +conv2d_bf16.h 896 0x1420 +conv2d_bf16.h 896 0x1424 +conv2d_bf16.h 896 0x1428 +conv2d_bf16.h 896 0x142c +conv2d_bf16.h 896 0x1430 +conv2d_bf16.h 896 0x1434 +conv2d_bf16.h 897 0x1438 x +conv2d_bf16.h 897 0x143c +conv2d_bf16.h 897 0x1440 +conv2d_bf16.h 897 0x1444 +conv2d_bf16.h 897 0x1448 +conv2d_bf16.h 897 0x144c +conv2d_bf16.h 897 0x1450 +conv2d_bf16.h 898 0x1454 x +conv2d_bf16.h 898 0x1458 +conv2d_bf16.h 898 0x145c +conv2d_bf16.h 898 0x1460 +conv2d_bf16.h 898 0x1464 +conv2d_bf16.h 898 0x1468 +conv2d_bf16.h 1115 0x146c x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 56 0x1470 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 898 0x1474 x +conv2d_bf16.h 1115 0x1480 x +conv2d_bf16.h 1115 0x1484 +conv2d_bf16.h 886 0x148a +conv2d_bf16.h 886 0x1490 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 56 0x1494 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1123 0x149c +conv2d_bf16.h 1123 0x149c 1 +conv2d_bf16.h 1123 0x149c 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x14a6 +aie_core.h 100 0x14a6 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x14a6 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x14a6 3 +accum.hpp 946 0x14a6 4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1123 0x14a6 5 +conv2d_bf16.h 1125 0x14a6 6 +conv2d_bf16.h 1154 0x14a6 7 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x14b0 +aie_core.h 100 0x14b0 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x14b0 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x14b0 3 +accum.hpp 946 0x14b0 4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1123 0x14b0 5 +conv2d_bf16.h 1125 0x14b0 6 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x14ba +aie_core.h 100 0x14ba 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x14ba 2 +vector.hpp 1152 0x14ba 3 +vector.hpp 1152 0x14ba 4 +vector.hpp 1152 0x14ba 5 +vector.hpp 1152 0x14ba 6 +vector.hpp 1152 0x14ba 7 +vector.hpp 1152 0x14ba 8 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x14ba 9 +accum.hpp 149 0x14ba 10 +accum.hpp 149 0x14ba 11 +accum.hpp 149 0x14ba 12 +accum.hpp 149 0x14ba 13 +accum.hpp 149 0x14ba 14 +accum.hpp 149 0x14ba 15 +accum.hpp 149 0x14ba 16 +accum.hpp 578 0x14ba 17 +accum.hpp 946 0x14ba 18 +accum.hpp 1110 0x14ba 19 +accum.hpp 1110 0x14ba 20 +accum.hpp 1110 0x14ba 21 +accum.hpp 1110 0x14ba 22 +accum.hpp 1110 0x14ba 23 +accum.hpp 1110 0x14ba 24 +accum.hpp 1110 0x14ba 25 +accum.hpp 1110 0x14ba 26 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 746 0x14ba 27 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x14c6 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 738 0x14c6 1 +conv2d_bf16.h 1187 0x14c6 2 +conv2d_bf16.h 1199 0x14c6 3 +conv2d_bf16.h 1200 0x14c6 4 +conv2d_bf16.h 1201 0x14c6 5 +conv2d_bf16.h 1202 0x14c6 6 +conv2d_bf16.h 1143 0x14d2 +conv2d_bf16.h 1218 0x14d2 1 +conv2d_bf16.h 749 0x14dc +conv2d_bf16.h 750 0x14dc 1 +conv2d_bf16.h 751 0x14dc 2 +conv2d_bf16.h 752 0x14dc 3 +conv2d_bf16.h 1123 0x14dc 4 +conv2d_bf16.h 736 0x14e6 +conv2d_bf16.h 738 0x14e6 1 +conv2d_bf16.h 1123 0x14e6 2 +conv2d_bf16.h 1873 0x14e6 3 +conv2d_bf16.h 1125 0x14f2 x +conv2d_bf16.h 1125 0x14f6 +conv2d_bf16.h 1125 0x14fa +conv2d_bf16.h 1149 0x14fe x +conv2d_bf16.h 1154 0x1502 x +conv2d_bf16.h 743 0x1506 x +conv2d_bf16.h 745 0x150a x +conv2d_bf16.h 746 0x150e x +conv2d_bf16.h 1125 0x150e 1 x +conv2d_bf16.h 1143 0x1514 x +conv2d_bf16.h 1206 0x1518 x +conv2d_bf16.h 1149 0x151c +conv2d_bf16.h 1154 0x1524 +conv2d_bf16.h 1125 0x1528 x +conv2d_bf16.h 1149 0x152c x +conv2d_bf16.h 1154 0x1530 x +conv2d_bf16.h 1287 0x1536 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1540 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1540 1 x +accum.hpp 946 0x1540 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 736 0x1540 3 +conv2d_bf16.h 738 0x1540 4 +conv2d_bf16.h 1147 0x1540 5 x +conv2d_bf16.h 1187 0x1540 6 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x154c + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x154c 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x154c 2 +accum.hpp 946 0x154c 3 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 736 0x154c 4 x +conv2d_bf16.h 738 0x154c 5 x +conv2d_bf16.h 1188 0x154c 6 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1558 +aie_core.h 100 0x1558 1 +aie_core.h 100 0x1558 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1558 3 +vector.hpp 1139 0x1558 4 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1558 5 +accum.hpp 578 0x1558 6 +accum.hpp 946 0x1558 7 +accum.hpp 946 0x1558 8 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 737 0x1558 9 x +conv2d_bf16.h 742 0x1558 10 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1564 +aie_core.h 100 0x1564 1 +aie_core.h 100 0x1564 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1564 3 +vector.hpp 1139 0x1564 4 +vector.hpp 1139 0x1564 5 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1564 6 +accum.hpp 578 0x1564 7 +accum.hpp 578 0x1564 8 x +accum.hpp 946 0x1564 9 +accum.hpp 946 0x1564 10 +accum.hpp 946 0x1564 11 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 740 0x1564 12 x +conv2d_bf16.h 1149 0x1564 13 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1570 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1570 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1570 2 +accum.hpp 946 0x1570 3 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x1570 4 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 743 0x1570 5 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x157a x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x157a 1 x +accum.hpp 946 0x157a 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 736 0x157a 3 x +conv2d_bf16.h 1152 0x157a 4 x +conv2d_bf16.h 1206 0x157a 5 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1586 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1586 1 +accum.hpp 946 0x1586 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 737 0x1586 3 x +conv2d_bf16.h 1154 0x1586 4 x +conv2d_bf16.h 1206 0x1586 5 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1592 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1592 1 x +accum.hpp 946 0x1592 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 740 0x1592 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1598 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1598 1 +accum.hpp 946 0x1598 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x1598 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1157 0x1598 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x159e x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x159e 1 x +accum.hpp 946 0x159e 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 736 0x159e 3 x +conv2d_bf16.h 1159 0x159e 4 x +conv2d_bf16.h 737 0x15a4 x +conv2d_bf16.h 738 0x15a4 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x15aa x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x15aa 1 x +accum.hpp 946 0x15aa 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 740 0x15aa 3 x +conv2d_bf16.h 1192 0x15aa 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x15b0 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x15b0 1 +accum.hpp 946 0x15b0 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x15b0 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 745 0x15b0 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x15ba +vector.hpp 1139 0x15ba 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x15ba 2 +accum.hpp 578 0x15ba 3 x +accum.hpp 946 0x15ba 4 +accum.hpp 946 0x15ba 5 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 736 0x15ba 6 x +conv2d_bf16.h 746 0x15ba 7 x +conv2d_bf16.h 1162 0x15ba 8 +conv2d_bf16.h 737 0x15c6 x +conv2d_bf16.h 742 0x15c6 1 x +conv2d_bf16.h 749 0x15c6 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x15d0 x +aie_core.h 143 0x15d0 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x15d0 2 x +vector.hpp 1152 0x15d0 3 +vector.hpp 1152 0x15d0 4 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x15d0 5 x +accum.hpp 946 0x15d0 6 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 750 0x15d0 7 x +conv2d_bf16.h 1286 0x15d0 8 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x15de + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x15de 1 +vector.hpp 1139 0x15de 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x15de 3 +accum.hpp 578 0x15de 4 +accum.hpp 946 0x15de 5 +accum.hpp 946 0x15de 6 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 745 0x15de 7 x +conv2d_bf16.h 751 0x15de 8 x +conv2d_bf16.h 1162 0x15de 9 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x15ec + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x15ec 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x15ec 2 +accum.hpp 946 0x15ec 3 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 743 0x15ec 4 x +conv2d_bf16.h 746 0x15ec 5 x +conv2d_bf16.h 1199 0x15ec 6 x +conv2d_bf16.h 738 0x15fa x +conv2d_bf16.h 1200 0x15fa 1 x +conv2d_bf16.h 742 0x1602 x +conv2d_bf16.h 1201 0x1602 1 x +conv2d_bf16.h 743 0x160a x +conv2d_bf16.h 752 0x160a 1 x +conv2d_bf16.h 738 0x1612 x +conv2d_bf16.h 740 0x1612 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x1618 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 736 0x161c x +conv2d_bf16.h 742 0x161c 1 x +conv2d_bf16.h 1202 0x161c 2 x +conv2d_bf16.h 1206 0x161c 3 x +conv2d_bf16.h 737 0x1628 x +conv2d_bf16.h 743 0x1628 1 x +conv2d_bf16.h 749 0x1628 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x1632 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 738 0x1632 1 x +conv2d_bf16.h 740 0x1632 2 x +conv2d_bf16.h 751 0x1632 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x1640 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 745 0x1640 1 x +conv2d_bf16.h 750 0x1640 2 x +conv2d_bf16.h 736 0x1650 x +conv2d_bf16.h 742 0x1650 1 x +conv2d_bf16.h 746 0x1650 2 x +conv2d_bf16.h 752 0x1650 3 x +conv2d_bf16.h 737 0x1660 x +conv2d_bf16.h 743 0x1660 1 x +conv2d_bf16.h 749 0x1660 2 x +conv2d_bf16.h 738 0x1670 x +conv2d_bf16.h 740 0x1670 1 x +conv2d_bf16.h 751 0x1670 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x1680 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 745 0x1680 1 x +conv2d_bf16.h 750 0x1680 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1690 +aie_core.h 100 0x1690 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1690 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1690 3 +accum.hpp 946 0x1690 4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 742 0x1690 5 x +conv2d_bf16.h 746 0x1690 6 x +conv2d_bf16.h 752 0x1690 7 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x169e + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x169e 1 +vector.hpp 1152 0x169e 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 743 0x169e 3 x +conv2d_bf16.h 749 0x169e 4 x +conv2d_bf16.h 1286 0x169e 5 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x16ac + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x16ac 1 +vector.hpp 1152 0x16ac 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 751 0x16ac 3 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x16b6 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 745 0x16b6 1 x +conv2d_bf16.h 750 0x16b6 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x16c0 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 746 0x16c0 1 x +conv2d_bf16.h 752 0x16c0 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x16ca +vector.hpp 1152 0x16ca 1 +vector.hpp 1152 0x16ca 2 +vector.hpp 1152 0x16ca 3 +vector.hpp 1152 0x16ca 4 +vector.hpp 1152 0x16ca 5 +vector.hpp 1152 0x16ca 6 +vector.hpp 1152 0x16ca 7 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 749 0x16ca 8 x +conv2d_bf16.h 1285 0x16ca 9 x +conv2d_bf16.h 1286 0x16ca 10 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x16d6 +aie_core.h 100 0x16d6 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x16d6 2 +vector.hpp 1152 0x16d6 3 +vector.hpp 1152 0x16d6 4 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x16d6 5 +accum.hpp 946 0x16d6 6 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 751 0x16d6 7 x +conv2d_bf16.h 746 0x16e0 x +conv2d_bf16.h 750 0x16e0 1 x +conv2d_bf16.h 745 0x16e8 x +conv2d_bf16.h 752 0x16e8 1 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x16f0 +aie_core.h 143 0x16f4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 750 0x16f4 1 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x16fc + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 749 0x16fc 1 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x1704 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 752 0x1704 1 x +conv2d_bf16.h 1286 0x1704 2 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x170e + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x170e 1 +vector.hpp 1152 0x170e 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 751 0x170e 3 x +conv2d_bf16.h 1286 0x170e 4 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x171a + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x171a 1 +vector.hpp 1152 0x171a 2 +vector.hpp 1152 0x171a 3 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x171a 4 +accum.hpp 946 0x171a 5 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x1722 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1287 0x1722 1 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x172a + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x172a 1 x +accum.hpp 1110 0x172a 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x1732 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x1732 1 +accum.hpp 1110 0x1732 2 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x173a x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 736 0x173a 1 +conv2d_bf16.h 1287 0x173a 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x1744 x +accum.hpp 1110 0x1744 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1287 0x1744 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x174c +accum.hpp 1110 0x174c 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1218 0x174c 2 x +conv2d_bf16.h 1287 0x174c 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x1758 x +accum.hpp 1110 0x1758 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 738 0x1758 2 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x1760 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x1760 1 +accum.hpp 1110 0x1760 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1768 +vector.hpp 1152 0x1768 1 +vector.hpp 1152 0x1768 2 +vector.hpp 1152 0x1768 3 +vector.hpp 1152 0x1768 4 +vector.hpp 1152 0x1768 5 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x1768 6 +accum.hpp 1110 0x1768 7 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1772 +vector.hpp 1152 0x1772 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x1772 2 x +accum.hpp 1110 0x1772 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1286 0x1772 4 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x177a +aie_core.h 143 0x177a 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x177a 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x177a 3 +accum.hpp 946 0x177a 4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1187 0x177a 5 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1782 x +max_min.hpp 20 0x1786 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x178a x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x178a 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1792 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1792 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x179a x +vector.hpp 1152 0x17a4 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x17a4 1 x +max_min.hpp 20 0x17ac + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x17b0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x17b0 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x17b8 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x17b8 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x17c0 x +vector.hpp 1152 0x17d0 +vector.hpp 1152 0x17d4 +vector.hpp 1152 0x17d8 +vector.hpp 1152 0x17dc +vector.hpp 1152 0x17e0 +vector.hpp 1152 0x17e4 +vector.hpp 1152 0x17e8 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x17f0 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x17f0 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1143 0x17f0 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x17fc + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x17fc 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x17fc 2 +accum.hpp 946 0x17fc 3 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1800 +aie_core.h 100 0x1804 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1804 1 +vector.hpp 1152 0x1804 2 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x180a +aie_core.h 143 0x1820 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 792 0x1820 1 +conv2d_bf16.h 1364 0x1820 2 +conv2d_bf16.h 1364 0x1820 3 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x182c +aie_core.h 143 0x182c 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x182c 2 +vector.hpp 1152 0x182c 3 +vector.hpp 1152 0x182c 4 +vector.hpp 1152 0x182c 5 +vector.hpp 1152 0x182c 6 +vector.hpp 1152 0x182c 7 +vector.hpp 1152 0x182c 8 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x182c 9 +accum.hpp 149 0x182c 10 +accum.hpp 149 0x182c 11 +accum.hpp 149 0x182c 12 +accum.hpp 149 0x182c 13 +accum.hpp 149 0x182c 14 +accum.hpp 149 0x182c 15 +accum.hpp 149 0x182c 16 +accum.hpp 1110 0x182c 17 +accum.hpp 1110 0x182c 18 +accum.hpp 1110 0x182c 19 +accum.hpp 1110 0x182c 20 +accum.hpp 1110 0x182c 21 +accum.hpp 1110 0x182c 22 +accum.hpp 1110 0x182c 23 +accum.hpp 1110 0x182c 24 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1369 0x182c 25 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x1838 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 807 0x1838 1 +conv2d_bf16.h 808 0x1838 2 +conv2d_bf16.h 809 0x1838 3 +conv2d_bf16.h 810 0x1838 4 +conv2d_bf16.h 1436 0x1838 5 +conv2d_bf16.h 1437 0x1838 6 +conv2d_bf16.h 1438 0x1838 7 +conv2d_bf16.h 1439 0x1838 8 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x1842 +aie_core.h 143 0x1842 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 802 0x1842 2 +conv2d_bf16.h 1428 0x1842 3 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x184e +aie_core.h 143 0x184e 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 792 0x184e 2 +conv2d_bf16.h 794 0x184e 3 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x185a + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 794 0x185a 1 +conv2d_bf16.h 1455 0x185a 2 +conv2d_bf16.h 1337 0x1864 +conv2d_bf16.h 1364 0x186e x +conv2d_bf16.h 1873 0x186e 1 +conv2d_bf16.h 1364 0x1874 +conv2d_bf16.h 1369 0x1878 x +conv2d_bf16.h 799 0x187c x +conv2d_bf16.h 801 0x1880 x +conv2d_bf16.h 802 0x1884 x +conv2d_bf16.h 1337 0x1888 x +conv2d_bf16.h 1443 0x188c x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1892 +vector.hpp 1152 0x1892 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1369 0x1892 2 +conv2d_bf16.h 1364 0x1896 +conv2d_bf16.h 1518 0x1896 1 +conv2d_bf16.h 1364 0x189a +conv2d_bf16.h 1364 0x189e x +conv2d_bf16.h 1369 0x18a2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x18a8 +vector.hpp 1152 0x18a8 1 +vector.hpp 1139 0x18b0 +vector.hpp 1139 0x18b0 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x18b0 2 +accum.hpp 578 0x18b0 3 +accum.hpp 578 0x18b0 4 x +accum.hpp 946 0x18b0 5 +accum.hpp 946 0x18b0 6 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 792 0x18b0 7 x +conv2d_bf16.h 1362 0x18b0 8 x +conv2d_bf16.h 1429 0x18b0 9 +conv2d_bf16.h 1443 0x18b0 10 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x18be + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x18be 1 +accum.hpp 946 0x18be 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 793 0x18be 3 x +conv2d_bf16.h 1364 0x18be 4 x +conv2d_bf16.h 1443 0x18be 5 +conv2d_bf16.h 794 0x18ca x +conv2d_bf16.h 795 0x18ca 1 x +conv2d_bf16.h 1428 0x18ca 2 x +conv2d_bf16.h 1443 0x18ca 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x18d6 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x18d6 1 +accum.hpp 578 0x18d6 2 +accum.hpp 946 0x18d6 3 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 796 0x18d6 4 x +conv2d_bf16.h 799 0x18d6 5 x +conv2d_bf16.h 1429 0x18d6 6 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x18e0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x18e0 1 x +accum.hpp 946 0x18e0 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 797 0x18e0 3 x +conv2d_bf16.h 1367 0x18e0 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x18e6 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x18e6 1 +accum.hpp 946 0x18e6 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x18e6 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1369 0x18e6 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x18ec x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x18ec 1 x +accum.hpp 946 0x18ec 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 792 0x18ec 3 x +conv2d_bf16.h 1372 0x18ec 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x18f2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x18f2 1 +accum.hpp 946 0x18f2 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 793 0x18f2 3 x +conv2d_bf16.h 1374 0x18f2 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x18f8 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x18f8 1 x +accum.hpp 946 0x18f8 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 795 0x18f8 3 x +conv2d_bf16.h 1377 0x18f8 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x18fe + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x18fe 1 +accum.hpp 946 0x18fe 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 796 0x18fe 3 x +conv2d_bf16.h 1379 0x18fe 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1904 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1904 1 x +accum.hpp 946 0x1904 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 797 0x1904 3 x +conv2d_bf16.h 1429 0x1904 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x190a + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x190a 1 +accum.hpp 946 0x190a 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x190a 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 801 0x190a 4 x +conv2d_bf16.h 1429 0x190a 5 +conv2d_bf16.h 792 0x1914 x +conv2d_bf16.h 794 0x1914 1 x +conv2d_bf16.h 802 0x1914 2 x +conv2d_bf16.h 793 0x191e x +conv2d_bf16.h 799 0x191e 1 x +conv2d_bf16.h 803 0x191e 2 x +conv2d_bf16.h 807 0x191e 3 x +conv2d_bf16.h 794 0x192a x +conv2d_bf16.h 804 0x192a 1 x +conv2d_bf16.h 808 0x192a 2 x +conv2d_bf16.h 809 0x1934 x +conv2d_bf16.h 810 0x1938 x +conv2d_bf16.h 795 0x193c x +conv2d_bf16.h 802 0x193c 1 x +conv2d_bf16.h 1437 0x193c 2 x +conv2d_bf16.h 796 0x1946 x +conv2d_bf16.h 1436 0x1946 1 x +conv2d_bf16.h 797 0x194e x +conv2d_bf16.h 1438 0x194e 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x1956 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 799 0x1956 1 x +conv2d_bf16.h 1439 0x1956 2 x +conv2d_bf16.h 792 0x1960 x +conv2d_bf16.h 801 0x1960 1 x +conv2d_bf16.h 793 0x1966 x +conv2d_bf16.h 804 0x1966 1 x +conv2d_bf16.h 808 0x1966 2 x +conv2d_bf16.h 795 0x1970 x +conv2d_bf16.h 803 0x1970 1 x +conv2d_bf16.h 807 0x1970 2 x +conv2d_bf16.h 796 0x197a x +conv2d_bf16.h 810 0x197a 1 x +conv2d_bf16.h 794 0x1982 x +conv2d_bf16.h 797 0x1982 1 x +conv2d_bf16.h 809 0x1982 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x1990 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 799 0x1990 1 x +conv2d_bf16.h 802 0x1990 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x19a0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x19a0 1 +vector.hpp 1152 0x19a0 2 +vector.hpp 1152 0x19a0 3 +vector.hpp 1152 0x19a0 4 +vector.hpp 1152 0x19a0 5 +vector.hpp 1152 0x19a0 6 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 801 0x19a0 7 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x19ac +vector.hpp 1152 0x19ac 1 +vector.hpp 1152 0x19ac 2 +vector.hpp 1152 0x19ac 3 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 804 0x19ac 4 x +conv2d_bf16.h 808 0x19ac 5 x +conv2d_bf16.h 1517 0x19ac 6 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x19b8 +vector.hpp 1152 0x19b8 1 +vector.hpp 1152 0x19b8 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 803 0x19b8 3 x +conv2d_bf16.h 807 0x19b8 4 x +conv2d_bf16.h 1518 0x19b8 5 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x19c4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 810 0x19c4 1 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x19cc x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 809 0x19cc 1 x +conv2d_bf16.h 1428 0x19cc 2 +conv2d_bf16.h 801 0x19d6 x +conv2d_bf16.h 802 0x19da x +conv2d_bf16.h 803 0x19de x +conv2d_bf16.h 807 0x19de 1 x +conv2d_bf16.h 804 0x19e6 x +conv2d_bf16.h 808 0x19e6 1 x +conv2d_bf16.h 809 0x19ee x +conv2d_bf16.h 810 0x19f2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x19fa x +accum.hpp 1110 0x19fa 1 x +accum.hpp 149 0x19fe +accum.hpp 1110 0x19fe 1 +accum.hpp 149 0x1a02 +accum.hpp 1110 0x1a02 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1455 0x1a02 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x1a0c x +accum.hpp 1110 0x1a0c 1 x +accum.hpp 149 0x1a10 +accum.hpp 1110 0x1a10 1 +accum.hpp 149 0x1a14 +accum.hpp 1110 0x1a14 1 +accum.hpp 149 0x1a18 +accum.hpp 1110 0x1a18 1 +accum.hpp 149 0x1a1c +accum.hpp 1110 0x1a1c 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1a20 x +max_min.hpp 20 0x1a24 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1a28 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1a28 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1a30 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1a30 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1a38 x +vector.hpp 1152 0x1a42 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1a42 1 x +max_min.hpp 20 0x1a4a + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1a4e x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1a4e 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1a56 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1a56 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1a60 x +vector.hpp 1152 0x1a70 +vector.hpp 1152 0x1a74 +vector.hpp 1152 0x1a78 +vector.hpp 1152 0x1a7c +vector.hpp 1152 0x1a80 +vector.hpp 1152 0x1a84 +vector.hpp 1152 0x1a88 +vector.hpp 1152 0x1a90 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1337 0x1a90 1 x +conv2d_bf16.h 1873 0x1ac8 x +conv2d_bf16.h 1873 0x1acc + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 74 0x1ae0 x +superkernels.cpp 79 0x1ae0 1 +superkernels.cpp 81 0x1ae0 2 +superkernels.cpp 79 0x1aea x +superkernels.cpp 81 0x1aea 1 +superkernels.cpp 74 0x1af4 +superkernels.cpp 79 0x1b06 +superkernels.cpp 79 0x1b06 1 +superkernels.cpp 81 0x1b1c +superkernels.cpp 113 0x1b22 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x1b22 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 81 0x1b2c + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x1b2c 1 +tile.hpp 86 0x1b2c 2 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 81 0x1b3c x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x1b44 +tile.hpp 74 0x1b48 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 113 0x1b4c + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x1b4c 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 81 0x1b54 +superkernels.cpp 81 0x1b60 +superkernels.cpp 87 0x1b64 +superkernels.cpp 87 0x1b64 1 x +superkernels.cpp 88 0x1b6e x +superkernels.cpp 89 0x1b6e 1 +superkernels.cpp 88 0x1b78 +superkernels.cpp 88 0x1b7e +superkernels.cpp 87 0x1b86 x +superkernels.cpp 113 0x1b86 1 +superkernels.cpp 88 0x1b8e x +superkernels.cpp 88 0x1b94 +superkernels.cpp 89 0x1b9a x +superkernels.cpp 89 0x1ba0 +superkernels.cpp 113 0x1ba0 1 +superkernels.cpp 106 0x1bb0 +superkernels.cpp 113 0x1bb0 1 +superkernels.cpp 117 0x1bb0 2 +superkernels.cpp 136 0x1bb0 3 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x1bb0 4 +io_buffer_main.h 324 0x1bb0 5 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 106 0x1bba x +superkernels.cpp 108 0x1bba 1 +superkernels.cpp 107 0x1bc4 +superkernels.cpp 108 0x1bc4 1 x +superkernels.cpp 139 0x1bc4 2 +superkernels.cpp 140 0x1bc4 3 +superkernels.cpp 107 0x1bce x +superkernels.cpp 110 0x1bda x +superkernels.cpp 110 0x1bda 1 x +superkernels.cpp 108 0x1be0 x +superkernels.cpp 107 0x1be4 x +superkernels.cpp 108 0x1be4 1 +superkernels.cpp 106 0x1bea x +superkernels.cpp 106 0x1bee +superkernels.cpp 107 0x1bf2 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x1bf6 x +io_buffer_main.h 218 0x1bfa +io_buffer_main.h 218 0x1bfe +io_buffer_main.h 218 0x1c02 +io_buffer_main.h 235 0x1c08 x +io_buffer_main.h 218 0x1c14 x +io_buffer_main.h 218 0x1c14 1 x +io_buffer_main.h 218 0x1c18 +io_buffer_main.h 395 0x1c1c +io_buffer_main.h 395 0x1c26 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 113 0x1c30 x +superkernels.cpp 113 0x1c36 +superkernels.cpp 113 0x1c42 +superkernels.cpp 117 0x1c50 x +superkernels.cpp 117 0x1c50 1 +superkernels.cpp 117 0x1c5a +superkernels.cpp 117 0x1c6c +superkernels.cpp 117 0x1c70 +superkernels.cpp 136 0x1c76 +superkernels.cpp 140 0x1c76 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x1c82 x +io_buffer_main.h 327 0x1c82 1 +io_buffer_main.h 425 0x1c82 2 +io_buffer_main.h 324 0x1c88 +io_buffer_main.h 425 0x1c98 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 136 0x1c9c x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x1c9c 1 x +io_buffer_main.h 327 0x1cae +io_buffer_main.h 327 0x1cb2 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 139 0x1cc0 x +superkernels.cpp 139 0x1cc0 1 +superkernels.cpp 139 0x1cca +superkernels.cpp 142 0x1cd2 +superkernels.cpp 139 0x1cde +superkernels.cpp 139 0x1ce2 +superkernels.cpp 140 0x1cf4 x +superkernels.cpp 142 0x1d04 x +superkernels.cpp 142 0x1d08 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 141 0x1d20 x +elementwise_binary.h 142 0x1d20 1 +elementwise_binary.h 144 0x1d20 2 x +elementwise_binary.h 141 0x1d26 +elementwise_binary.h 141 0x1d2a +elementwise_binary.h 142 0x1d2e x +elementwise_binary.h 142 0x1d32 +elementwise_binary.h 130 0x1d40 x +elementwise_binary.h 133 0x1d40 1 x +elementwise_binary.h 130 0x1d44 +elementwise_binary.h 133 0x1d58 x +elementwise_binary.h 134 0x1d5c x +elementwise_binary.h 134 0x1d6c +elementwise_binary.h 135 0x1d70 x +elementwise_binary.h 135 0x1d80 +elementwise_binary.h 136 0x1d84 x +elementwise_binary.h 137 0x1d8c x +elementwise_binary.h 136 0x1d98 x +elementwise_binary.h 137 0x1d9c +elementwise_binary.h 137 0x1da0 +elementwise_binary.h 139 0x1da0 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h: +add_impl.h 146 0x1da0 2 +add_impl.h 146 0x1daa +add_impl.h 147 0x1daa 1 +add_impl.h 147 0x1daa 2 +add_impl.h 146 0x1db4 x +add_impl.h 147 0x1db4 1 +add_impl.h 147 0x1dbe x +add_impl.h 147 0x1dc6 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 139 0x1dca x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h: +add_impl.h 147 0x1dce + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 139 0x1dd2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h: +add_impl.h 147 0x1dd8 x +add_impl.h 147 0x1ddc + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h: +elementwise_binary_broadcasting.h 81 0x1df0 +elementwise_binary_broadcasting.h 81 0x1df0 1 x +elementwise_binary_broadcasting.h 82 0x1df0 2 +elementwise_binary_broadcasting.h 82 0x1df0 3 +elementwise_binary_broadcasting.h 83 0x1df0 4 +elementwise_binary_broadcasting.h 81 0x1dfa +elementwise_binary_broadcasting.h 82 0x1dfa 1 +elementwise_binary_broadcasting.h 82 0x1e00 x +elementwise_binary_broadcasting.h 84 0x1e0e x +elementwise_binary_broadcasting.h 82 0x1e12 x +elementwise_binary_broadcasting.h 83 0x1e16 x +elementwise_binary_broadcasting.h 82 0x1e1a x +elementwise_binary_broadcasting.h 83 0x1e1a 1 +elementwise_binary_broadcasting.h 82 0x1e20 +elementwise_binary_broadcasting.h 82 0x1e24 +elementwise_binary_broadcasting.h 76 0x1e30 +elementwise_binary_broadcasting.h 76 0x1e30 1 x +elementwise_binary_broadcasting.h 77 0x1e3a x +elementwise_binary_broadcasting.h 78 0x1e44 +elementwise_binary_broadcasting.h 78 0x1e54 +elementwise_binary_broadcasting.h 78 0x1e58 x +elementwise_binary_broadcasting.h 78 0x1e5e +elementwise_binary_broadcasting.h 79 0x1e62 x +elementwise_binary_broadcasting.h 89 0x1e70 x +elementwise_binary_broadcasting.h 96 0x1e70 1 x +elementwise_binary_broadcasting.h 102 0x1e70 2 +elementwise_binary_broadcasting.h 102 0x1e76 x +elementwise_binary_broadcasting.h 117 0x1e76 1 +elementwise_binary_broadcasting.h 102 0x1e88 +elementwise_binary_broadcasting.h 102 0x1e88 1 +elementwise_binary_broadcasting.h 96 0x1e8e +elementwise_binary_broadcasting.h 96 0x1e92 x +elementwise_binary_broadcasting.h 103 0x1e9c x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 56 0x1eb0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1eb6 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h: +elementwise_binary_broadcasting.h 106 0x1ec0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 56 0x1ed0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1ed6 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1ee0 +add_accum.hpp 19 0x1ee0 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h: +elementwise_binary_broadcasting.h 117 0x1ee0 2 x +elementwise_binary_broadcasting.h 117 0x1ee0 3 x +elementwise_binary_broadcasting.h 117 0x1eea +elementwise_binary_broadcasting.h 117 0x1eea 1 +elementwise_binary_broadcasting.h 117 0x1ef4 +elementwise_binary_broadcasting.h 117 0x1efa +elementwise_binary_broadcasting.h 117 0x1f00 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f08 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f08 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x1f08 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f0c + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f0c 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 189 0x1f0c 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f10 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f10 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x1f10 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f14 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f14 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 213 0x1f14 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f18 x +vector.hpp 1159 0x1f18 1 +vector.hpp 1159 0x1f18 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f18 3 x +accum.hpp 1110 0x1f18 4 +accum.hpp 1110 0x1f18 5 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x1f18 6 x +elementwise_binary.h 195 0x1f18 7 +elementwise_binary.h 218 0x1f18 8 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f1e + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f1e 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1f1e 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 189 0x1f1e 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f26 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f26 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x1f26 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f2a + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f2a 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1f2a 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 213 0x1f2a 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f32 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f32 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x1f32 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f36 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f36 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1f36 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 189 0x1f36 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f3e x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f3e 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x1f3e 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f42 +vector.hpp 1159 0x1f42 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f42 2 +accum.hpp 1110 0x1f42 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1f42 4 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 213 0x1f42 5 x +elementwise_binary.h 218 0x1f42 6 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f50 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f50 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x1f50 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f54 +vector.hpp 1159 0x1f54 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f54 2 +accum.hpp 1110 0x1f54 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1f54 4 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 189 0x1f54 5 x +elementwise_binary.h 195 0x1f54 6 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f60 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f60 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x1f60 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1f70 +vector.hpp 1159 0x1f70 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1f70 2 +accum.hpp 1110 0x1f70 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1f70 4 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 213 0x1f70 5 x +elementwise_binary.h 218 0x1f70 6 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1f82 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x1f82 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1f82 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 195 0x1f82 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1f8c x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x1f8c 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1f8c 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 218 0x1f8c 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1f96 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x1f96 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 195 0x1f96 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h: +elementwise_binary_broadcasting.h 121 0x1f96 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1f9e x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x1f9e 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 218 0x1f9e 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1fa4 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x1fa4 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 195 0x1fa4 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_attribute_broadcasting.h: +ise_binary_attribute_broadcasting.h 82 0x1fb0 +ise_binary_attribute_broadcasting.h 82 0x1fb0 1 x +ise_binary_attribute_broadcasting.h 90 0x1fb6 +ise_binary_attribute_broadcasting.h 90 0x1fbe x +ise_binary_attribute_broadcasting.h 117 0x1fbe 1 +ise_binary_attribute_broadcasting.h 92 0x1fc6 x +ise_binary_attribute_broadcasting.h 92 0x1fc6 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 538 0x1fd6 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector_native_types.hpp: +vector_native_types.hpp 374 0x1fd6 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_attribute_broadcasting.h: +ise_binary_attribute_broadcasting.h 117 0x1fe2 x +ise_binary_attribute_broadcasting.h 92 0x1fe8 +ise_binary_attribute_broadcasting.h 92 0x1fee x +ise_binary_attribute_broadcasting.h 92 0x1ff2 +ise_binary_attribute_broadcasting.h 117 0x1ff2 1 +ise_binary_attribute_broadcasting.h 117 0x1ff8 +ise_binary_attribute_broadcasting.h 118 0x2000 +ise_binary_attribute_broadcasting.h 118 0x2010 x +ise_binary_attribute_broadcasting.h 118 0x2014 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 147 0x2030 x +superkernels.cpp 152 0x2030 1 +superkernels.cpp 152 0x2036 x +superkernels.cpp 147 0x203c +superkernels.cpp 149 0x204a + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2054 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 152 0x205c +superkernels.cpp 152 0x205c 1 +superkernels.cpp 149 0x2062 x +superkernels.cpp 149 0x2066 +superkernels.cpp 149 0x206e + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x206e 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 155 0x2076 +superkernels.cpp 166 0x2076 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x207c +tile.hpp 74 0x2082 +tile.hpp 86 0x2082 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 155 0x208e x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x2098 +tile.hpp 74 0x209c +tile.hpp 74 0x20a0 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 159 0x20b0 +superkernels.cpp 159 0x20b6 x +superkernels.cpp 159 0x20b6 1 +superkernels.cpp 157 0x20c0 +superkernels.cpp 159 0x20c0 1 +superkernels.cpp 166 0x20c0 2 +superkernels.cpp 157 0x20ca x +superkernels.cpp 159 0x20ca 1 +superkernels.cpp 164 0x20ca 2 +superkernels.cpp 157 0x20de +superkernels.cpp 159 0x20e6 x +superkernels.cpp 157 0x20ea x +superkernels.cpp 159 0x20f0 x +superkernels.cpp 164 0x2100 +superkernels.cpp 166 0x2100 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2110 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 163 0x2118 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2118 1 +io_buffer_main.h 218 0x2122 +io_buffer_main.h 218 0x2126 +io_buffer_main.h 235 0x212a x +io_buffer_main.h 218 0x2138 x +io_buffer_main.h 218 0x2138 1 x +io_buffer_main.h 218 0x213c +io_buffer_main.h 395 0x2140 +io_buffer_main.h 395 0x214a x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 164 0x214e +superkernels.cpp 163 0x2158 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x215c x +io_buffer_main.h 324 0x215c 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 164 0x2162 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x2166 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 166 0x216c x +superkernels.cpp 163 0x2174 x +superkernels.cpp 163 0x2178 +superkernels.cpp 164 0x217c x +superkernels.cpp 164 0x2180 +superkernels.cpp 168 0x2190 +superkernels.cpp 169 0x2190 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x2190 2 x +io_buffer_main.h 327 0x219a +io_buffer_main.h 425 0x219a 1 +io_buffer_main.h 425 0x21a8 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 168 0x21ac + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x21ac 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 168 0x21b6 x +superkernels.cpp 168 0x21ba + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x21c6 x +io_buffer_main.h 327 0x21ca + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 168 0x21ce x +superkernels.cpp 168 0x21d2 +superkernels.cpp 169 0x21e2 +superkernels.cpp 169 0x21e6 x +superkernels.cpp 171 0x21f0 +superkernels.cpp 171 0x2204 x +superkernels.cpp 171 0x220c + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 124 0x2220 x +elementwise_unary.h 126 0x2220 1 x +elementwise_unary.h 126 0x2230 x +elementwise_unary.h 127 0x2234 x +elementwise_unary.h 127 0x2244 +elementwise_unary.h 128 0x2248 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/clip_impl.h: +clip_impl.h 113 0x224c x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 128 0x225a x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/clip_impl.h: +clip_impl.h 113 0x225e x +clip_impl.h 114 0x226e x +clip_impl.h 114 0x2272 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 130 0x2276 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2290 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 136 0x2290 1 x +elementwise_unary.h 142 0x2290 2 +elementwise_unary.h 154 0x2290 3 x +elementwise_unary.h 171 0x2290 4 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x229c x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 142 0x229c 1 +elementwise_unary.h 154 0x229c 2 x +elementwise_unary.h 190 0x229c 3 x +elementwise_unary.h 136 0x22a8 +elementwise_unary.h 136 0x22ac x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/clip_impl.h: +clip_impl.h 103 0x22b0 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 142 0x22b4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x22b8 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 171 0x22b8 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/clip_impl.h: +clip_impl.h 104 0x22b8 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x22c4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 190 0x22c4 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x22cc x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x22cc 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 142 0x22cc 2 x +elementwise_unary.h 171 0x22cc 3 x +elementwise_unary.h 154 0x22d6 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x22de x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x22e2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x22e2 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 190 0x22e2 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x22f0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x22f0 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 195 0x22f0 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x2300 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2310 x +vector.hpp 1159 0x2310 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x2310 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 171 0x2310 3 x +elementwise_unary.h 176 0x2310 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2320 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x2320 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 190 0x2320 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2330 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x2330 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 195 0x2330 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x2340 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2350 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x2350 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 176 0x2350 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x2358 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x235c x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x235c 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 195 0x235c 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x2364 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 158 0x2364 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x236a x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x236a 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 176 0x236a 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x2372 x +max_min.hpp 21 0x2376 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x237a x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 195 0x237a 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x237e + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 176 0x237e 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 176 0x2390 x +superkernels.cpp 181 0x2390 1 +superkernels.cpp 181 0x2396 x +superkernels.cpp 176 0x239c +superkernels.cpp 178 0x23aa + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x23b4 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 181 0x23bc +superkernels.cpp 181 0x23bc 1 +superkernels.cpp 178 0x23c2 x +superkernels.cpp 178 0x23c6 +superkernels.cpp 178 0x23ce + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x23ce 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 184 0x23d6 +superkernels.cpp 195 0x23d6 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x23dc +tile.hpp 74 0x23e2 +tile.hpp 86 0x23e2 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 184 0x23ee x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x23f8 +tile.hpp 74 0x23fc +tile.hpp 74 0x2400 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 188 0x2410 +superkernels.cpp 188 0x2416 x +superkernels.cpp 188 0x2416 1 +superkernels.cpp 186 0x2420 +superkernels.cpp 188 0x2420 1 +superkernels.cpp 195 0x2420 2 +superkernels.cpp 186 0x242a x +superkernels.cpp 188 0x242a 1 +superkernels.cpp 193 0x242a 2 +superkernels.cpp 186 0x243e +superkernels.cpp 188 0x2446 x +superkernels.cpp 186 0x244a x +superkernels.cpp 188 0x2450 x +superkernels.cpp 193 0x2460 +superkernels.cpp 195 0x2460 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2470 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 192 0x2478 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2478 1 +io_buffer_main.h 218 0x2482 +io_buffer_main.h 218 0x2486 +io_buffer_main.h 235 0x248a x +io_buffer_main.h 218 0x2498 x +io_buffer_main.h 218 0x2498 1 x +io_buffer_main.h 218 0x249c +io_buffer_main.h 395 0x24a0 +io_buffer_main.h 395 0x24aa x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 193 0x24ae +superkernels.cpp 192 0x24b8 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x24bc x +io_buffer_main.h 324 0x24bc 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 193 0x24c2 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x24c6 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 195 0x24cc x +superkernels.cpp 192 0x24d4 x +superkernels.cpp 192 0x24d8 +superkernels.cpp 193 0x24dc x +superkernels.cpp 193 0x24e0 +superkernels.cpp 197 0x24f0 +superkernels.cpp 198 0x24f0 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x24f0 2 x +io_buffer_main.h 327 0x24fa +io_buffer_main.h 425 0x24fa 1 +io_buffer_main.h 425 0x2508 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 197 0x250c + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x250c 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 197 0x2516 x +superkernels.cpp 197 0x251a + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x2526 x +io_buffer_main.h 327 0x252a + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 197 0x252e x +superkernels.cpp 197 0x2532 +superkernels.cpp 198 0x2542 +superkernels.cpp 198 0x2546 x +superkernels.cpp 200 0x2550 +superkernels.cpp 200 0x2564 x +superkernels.cpp 200 0x256c + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 219 0x2600 +elementwise_binary_shared.h 219 0x2600 1 x +elementwise_binary_shared.h 220 0x260a x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h: +mul_impl.h 193 0x2614 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 222 0x2620 +elementwise_binary_shared.h 222 0x2632 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h: +mul_impl.h 193 0x263c + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 222 0x2640 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h: +mul_impl.h 193 0x2640 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 538 0x2870 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 237 0x2870 1 x +elementwise_binary_shared.h 244 0x2870 2 +elementwise_binary_shared.h 245 0x2870 3 +elementwise_binary_shared.h 247 0x2870 4 +elementwise_binary_shared.h 250 0x2870 5 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 538 0x287a x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 244 0x287a 1 x +elementwise_binary_shared.h 245 0x287a 2 +elementwise_binary_shared.h 247 0x287a 3 +elementwise_binary_shared.h 244 0x288c +elementwise_binary_shared.h 244 0x288c 1 +elementwise_binary_shared.h 237 0x2892 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 538 0x28a0 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector_native_types.hpp: +vector_native_types.hpp 374 0x28a0 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 247 0x28a6 x +elementwise_binary_shared.h 245 0x28d0 x +elementwise_binary_shared.h 245 0x28d6 +elementwise_binary_shared.h 245 0x28d6 1 +elementwise_binary_shared.h 250 0x28f0 +elementwise_binary_shared.h 250 0x28f4 x +elementwise_binary_shared.h 250 0x28f8 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 205 0x2910 x +superkernels.cpp 210 0x2910 1 +superkernels.cpp 210 0x2916 x +superkernels.cpp 205 0x291c +superkernels.cpp 207 0x292a + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2934 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 210 0x293c +superkernels.cpp 210 0x293c 1 +superkernels.cpp 207 0x2942 x +superkernels.cpp 207 0x2946 +superkernels.cpp 207 0x294e + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x294e 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 213 0x2956 +superkernels.cpp 224 0x2956 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x295c +tile.hpp 74 0x2962 +tile.hpp 86 0x2962 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 213 0x296e x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x2978 +tile.hpp 74 0x297c +tile.hpp 74 0x2980 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 217 0x2990 +superkernels.cpp 217 0x2996 x +superkernels.cpp 217 0x2996 1 +superkernels.cpp 215 0x29a0 +superkernels.cpp 217 0x29a0 1 +superkernels.cpp 224 0x29a0 2 +superkernels.cpp 215 0x29aa x +superkernels.cpp 217 0x29aa 1 +superkernels.cpp 222 0x29aa 2 +superkernels.cpp 215 0x29be +superkernels.cpp 217 0x29c6 x +superkernels.cpp 215 0x29ca x +superkernels.cpp 217 0x29d0 x +superkernels.cpp 222 0x29e0 +superkernels.cpp 224 0x29e0 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x29f0 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 221 0x29f8 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x29f8 1 +io_buffer_main.h 218 0x2a02 +io_buffer_main.h 218 0x2a06 +io_buffer_main.h 235 0x2a0a x +io_buffer_main.h 218 0x2a18 x +io_buffer_main.h 218 0x2a18 1 x +io_buffer_main.h 218 0x2a1c +io_buffer_main.h 395 0x2a20 +io_buffer_main.h 395 0x2a2a x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 222 0x2a2e +superkernels.cpp 221 0x2a38 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x2a3c x +io_buffer_main.h 324 0x2a3c 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 222 0x2a42 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x2a46 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 224 0x2a4c x +superkernels.cpp 221 0x2a54 x +superkernels.cpp 221 0x2a58 +superkernels.cpp 222 0x2a5c x +superkernels.cpp 222 0x2a60 +superkernels.cpp 226 0x2a70 +superkernels.cpp 227 0x2a70 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x2a70 2 x +io_buffer_main.h 327 0x2a7a +io_buffer_main.h 425 0x2a7a 1 +io_buffer_main.h 425 0x2a88 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 226 0x2a8c + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x2a8c 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 226 0x2a96 x +superkernels.cpp 226 0x2a9a + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x2aa6 x +io_buffer_main.h 327 0x2aaa + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 226 0x2aae x +superkernels.cpp 226 0x2ab2 +superkernels.cpp 227 0x2ac2 +superkernels.cpp 227 0x2ac6 x +superkernels.cpp 229 0x2ad0 +superkernels.cpp 229 0x2ae4 x +superkernels.cpp 229 0x2aec + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 141 0x2b00 x +elementwise_binary.h 142 0x2b00 1 +elementwise_binary.h 144 0x2b00 2 x +elementwise_binary.h 141 0x2b06 +elementwise_binary.h 141 0x2b0a +elementwise_binary.h 142 0x2b0e x +elementwise_binary.h 142 0x2b12 +elementwise_binary.h 130 0x2b20 x +elementwise_binary.h 133 0x2b20 1 x +elementwise_binary.h 130 0x2b24 +elementwise_binary.h 133 0x2b36 x +elementwise_binary.h 134 0x2b3a x +elementwise_binary.h 134 0x2b4a +elementwise_binary.h 135 0x2b4e x +elementwise_binary.h 135 0x2b5e +elementwise_binary.h 136 0x2b62 x +elementwise_binary.h 137 0x2b6a x +elementwise_binary.h 136 0x2b78 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h: +mul_impl.h 134 0x2b7c + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 139 0x2b80 +elementwise_binary.h 139 0x2b92 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h: +mul_impl.h 134 0x2b9c + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 139 0x2ba0 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h: +mul_impl.h 134 0x2ba0 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 149 0x2bb0 x +elementwise_binary.h 156 0x2bb0 1 +elementwise_binary.h 168 0x2bb0 2 x +elementwise_binary.h 156 0x2bba x +elementwise_binary.h 168 0x2bba 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2bc4 +mul_acc32_fp.hpp 36 0x2bc4 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 156 0x2bc4 2 +elementwise_binary.h 156 0x2bc4 3 +elementwise_binary.h 156 0x2bce +elementwise_binary.h 156 0x2bce 1 +elementwise_binary.h 156 0x2bd8 +elementwise_binary.h 156 0x2be2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2be6 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 168 0x2be6 1 +elementwise_binary.h 187 0x2be6 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2bec +vector.hpp 1139 0x2bec 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 189 0x2bec 2 x +elementwise_binary.h 211 0x2bec 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2bf2 x +vector.hpp 1139 0x2bf2 1 x +vector.hpp 1159 0x2bf2 2 +vector.hpp 1159 0x2bf2 3 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2bf2 4 +accum.hpp 1110 0x2bf2 5 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x2bf2 6 x +elementwise_binary.h 195 0x2bf2 7 +elementwise_binary.h 213 0x2bf2 8 x +elementwise_binary.h 218 0x2bf2 9 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2bfa +vector.hpp 1139 0x2bfa 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 189 0x2bfa 2 x +elementwise_binary.h 211 0x2bfa 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2c00 x +vector.hpp 1139 0x2c00 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x2c00 2 x +elementwise_binary.h 213 0x2c00 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2c06 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 189 0x2c06 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2c0a x +vector.hpp 1139 0x2c0a 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x2c0a 2 x +elementwise_binary.h 213 0x2c0a 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2c10 +vector.hpp 1139 0x2c10 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x2c10 2 x +elementwise_binary.h 189 0x2c10 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2c16 x +vector.hpp 1139 0x2c16 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2c16 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x2c16 3 x +elementwise_binary.h 213 0x2c16 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2c20 +vector.hpp 1139 0x2c20 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2c20 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x2c20 3 x +elementwise_binary.h 189 0x2c20 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2c2a x +vector.hpp 1139 0x2c2a 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2c2a 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x2c2a 3 x +elementwise_binary.h 213 0x2c2a 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2c34 +vector.hpp 1139 0x2c34 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2c34 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x2c34 3 x +elementwise_binary.h 189 0x2c34 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2c3e x +vector.hpp 1139 0x2c3e 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2c3e 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x2c3e 3 x +elementwise_binary.h 213 0x2c3e 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2c48 +vector.hpp 1139 0x2c48 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2c48 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x2c48 3 x +elementwise_binary.h 189 0x2c48 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2c52 x +vector.hpp 1139 0x2c52 1 x +vector.hpp 1159 0x2c52 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2c52 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2c52 4 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x2c52 5 x +elementwise_binary.h 213 0x2c52 6 x +elementwise_binary.h 218 0x2c52 7 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2c60 +vector.hpp 1139 0x2c60 1 +vector.hpp 1159 0x2c60 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2c60 3 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2c60 4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x2c60 5 x +elementwise_binary.h 189 0x2c60 6 x +elementwise_binary.h 195 0x2c60 7 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2c70 x +vector.hpp 1139 0x2c70 1 x +vector.hpp 1159 0x2c70 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2c70 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2c70 4 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x2c70 5 x +elementwise_binary.h 213 0x2c70 6 x +elementwise_binary.h 218 0x2c70 7 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2c80 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2c80 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2c80 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 195 0x2c80 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2c88 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2c88 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2c88 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 218 0x2c88 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2c90 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2c90 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2c90 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 195 0x2c90 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2c98 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2c98 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2c98 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 218 0x2c98 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2ca0 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2ca0 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2ca0 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 195 0x2ca0 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2ca8 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2ca8 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2ca8 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 218 0x2ca8 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2cb0 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2cb0 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2cb0 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 195 0x2cb0 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2cb8 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2cb8 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 218 0x2cb8 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2cbc + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2cbc 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 172 0x2cbc 2 x +elementwise_binary.h 195 0x2cbc 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2cc2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2cc2 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 218 0x2cc2 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2cc6 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2cc6 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 195 0x2cc6 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2cca x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2cca 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 218 0x2cca 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2cce + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2cce 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 195 0x2cce 2 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 369 0x2ce0 x +superkernels.cpp 374 0x2ce0 1 +superkernels.cpp 374 0x2ce6 x +superkernels.cpp 369 0x2cec +superkernels.cpp 371 0x2cf2 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2cf2 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 374 0x2d0e x +superkernels.cpp 374 0x2d0e 1 x +superkernels.cpp 371 0x2d14 x +superkernels.cpp 371 0x2d18 +superkernels.cpp 371 0x2d1e + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2d26 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 377 0x2d2a +superkernels.cpp 379 0x2d2a 1 +superkernels.cpp 381 0x2d2a 2 +superkernels.cpp 393 0x2d2a 3 +superkernels.cpp 377 0x2d34 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x2d34 1 +tile.hpp 74 0x2d3e +tile.hpp 86 0x2d3e 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 377 0x2d4a x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x2d54 +tile.hpp 74 0x2d58 +tile.hpp 74 0x2d5c x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 381 0x2d60 +superkernels.cpp 381 0x2d60 1 x +superkernels.cpp 381 0x2d6a +superkernels.cpp 381 0x2d6a 1 +superkernels.cpp 390 0x2d6a 2 +superkernels.cpp 379 0x2d74 x +superkernels.cpp 382 0x2d74 1 +superkernels.cpp 391 0x2d74 2 +superkernels.cpp 379 0x2d8a +superkernels.cpp 381 0x2d90 x +superkernels.cpp 379 0x2d94 x +superkernels.cpp 381 0x2d98 x +superkernels.cpp 382 0x2d9c x +superkernels.cpp 390 0x2da0 +superkernels.cpp 391 0x2da6 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2db0 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 385 0x2db4 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2db4 1 +io_buffer_main.h 218 0x2dbe +io_buffer_main.h 218 0x2dc2 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 385 0x2dc6 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 235 0x2dca x +io_buffer_main.h 218 0x2dd6 x +io_buffer_main.h 218 0x2dd6 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 385 0x2dda x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2dda 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 385 0x2de0 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 395 0x2de4 +io_buffer_main.h 395 0x2de4 1 +io_buffer_main.h 395 0x2dee x +io_buffer_main.h 218 0x2df2 x +io_buffer_main.h 218 0x2dfa +io_buffer_main.h 218 0x2dfe +io_buffer_main.h 218 0x2e02 +io_buffer_main.h 235 0x2e06 x +io_buffer_main.h 218 0x2e14 x +io_buffer_main.h 218 0x2e14 1 x +io_buffer_main.h 218 0x2e18 +io_buffer_main.h 395 0x2e24 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 390 0x2e28 +superkernels.cpp 391 0x2e28 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x2e28 2 +io_buffer_main.h 125 0x2e36 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 390 0x2e3a x +superkernels.cpp 391 0x2e40 x +superkernels.cpp 393 0x2e40 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x2e46 x +io_buffer_main.h 125 0x2e4a +io_buffer_main.h 327 0x2e4e +io_buffer_main.h 327 0x2e4e 1 +io_buffer_main.h 125 0x2e54 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 393 0x2e5a x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x2e60 +io_buffer_main.h 327 0x2e60 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 390 0x2e64 x +superkernels.cpp 391 0x2e68 x +superkernels.cpp 391 0x2e6c +superkernels.cpp 390 0x2e70 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x2e80 x +io_buffer_main.h 327 0x2e80 1 +io_buffer_main.h 327 0x2e80 2 +io_buffer_main.h 327 0x2e80 3 +io_buffer_main.h 327 0x2e80 4 +io_buffer_main.h 425 0x2e80 5 +io_buffer_main.h 425 0x2e80 6 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 397 0x2e8a + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 425 0x2e9a x +io_buffer_main.h 327 0x2e9e x +io_buffer_main.h 324 0x2ea2 +io_buffer_main.h 327 0x2eb0 +io_buffer_main.h 324 0x2eb4 x +io_buffer_main.h 327 0x2eb4 1 +io_buffer_main.h 425 0x2ec6 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 397 0x2eca +superkernels.cpp 398 0x2eca 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x2eca 2 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 397 0x2ed4 x +superkernels.cpp 397 0x2ed8 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x2ee4 x +io_buffer_main.h 327 0x2ee8 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 397 0x2eec x +superkernels.cpp 397 0x2ef0 +superkernels.cpp 398 0x2f00 +superkernels.cpp 398 0x2f04 x +superkernels.cpp 400 0x2f10 +superkernels.cpp 400 0x2f26 x +superkernels.cpp 400 0x2f2e + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_dw_bf16_params.h: +conv2d_dw_bf16_params.h 211 0x2f40 x +conv2d_dw_bf16_params.h 215 0x2f40 1 +conv2d_dw_bf16_params.h 215 0x2f40 2 x +conv2d_dw_bf16_params.h 215 0x2f4a x +conv2d_dw_bf16_params.h 218 0x2f4a 1 +conv2d_dw_bf16_params.h 218 0x2f4a 2 +conv2d_dw_bf16_params.h 211 0x2f54 +conv2d_dw_bf16_params.h 218 0x2f5a +conv2d_dw_bf16_params.h 215 0x2f6e +conv2d_dw_bf16_params.h 215 0x2f72 +conv2d_dw_bf16_params.h 215 0x2f76 +conv2d_dw_bf16_params.h 215 0x2f7a +conv2d_dw_bf16_params.h 215 0x2f88 +conv2d_dw_bf16_params.h 215 0x2f8c +conv2d_dw_bf16_params.h 218 0x2f90 x +conv2d_dw_bf16_params.h 218 0x2f94 +conv2d_dw_bf16_params.h 218 0x2f98 +conv2d_dw_bf16_params.h 218 0x2fa4 +conv2d_dw_bf16_params.h 218 0x2faa +conv2d_dw_bf16_params.h 218 0x2fb0 +conv2d_dw_bf16_params.h 218 0x2fb6 +conv2d_dw_bf16_params.h 218 0x2fbc +conv2d_dw_bf16_params.h 218 0x2fc0 +conv2d_dw_bf16_params.h 218 0x2fd0 +conv2d_dw_bf16_params.h 218 0x2fd0 1 +conv2d_dw_bf16_params.h 219 0x2fd0 2 +conv2d_dw_bf16_params.h 218 0x2fd6 +conv2d_dw_bf16_params.h 219 0x2fd6 1 x +conv2d_dw_bf16_params.h 219 0x2fdc +conv2d_dw_bf16_params.h 219 0x2fe0 +conv2d_dw_bf16_params.h 218 0x2fea x +conv2d_dw_bf16_params.h 218 0x2fee +conv2d_dw_bf16_params.h 219 0x2ff2 x +conv2d_dw_bf16_params.h 219 0x2ff8 +conv2d_dw_bf16_params.h 218 0x3002 x +conv2d_dw_bf16_params.h 219 0x3006 x +conv2d_dw_bf16_params.h 219 0x300a +conv2d_dw_bf16_params.h 218 0x300e x +conv2d_dw_bf16_params.h 218 0x3012 +conv2d_dw_bf16_params.h 219 0x3012 1 x +conv2d_dw_bf16_params.h 219 0x3020 +conv2d_dw_bf16_params.h 226 0x3020 1 +conv2d_dw_bf16_params.h 231 0x3020 2 +conv2d_dw_bf16_params.h 219 0x302a +conv2d_dw_bf16_params.h 219 0x302a 1 +conv2d_dw_bf16_params.h 220 0x302a 2 +conv2d_dw_bf16_params.h 220 0x302a 3 +conv2d_dw_bf16_params.h 232 0x302a 4 +conv2d_dw_bf16_params.h 234 0x302a 5 +conv2d_dw_bf16_params.h 234 0x302a 6 +conv2d_dw_bf16_params.h 243 0x302a 7 +conv2d_dw_bf16_params.h 250 0x302a 8 +conv2d_dw_bf16_params.h 253 0x302a 9 +conv2d_dw_bf16_params.h 260 0x302a 10 +conv2d_dw_bf16_params.h 264 0x302a 11 +conv2d_dw_bf16_params.h 220 0x3034 +conv2d_dw_bf16_params.h 234 0x3034 1 +conv2d_dw_bf16_params.h 246 0x3034 2 +conv2d_dw_bf16_params.h 253 0x3034 3 +conv2d_dw_bf16_params.h 226 0x303e x +conv2d_dw_bf16_params.h 234 0x303e 1 +conv2d_dw_bf16_params.h 234 0x303e 2 +conv2d_dw_bf16_params.h 231 0x3048 +conv2d_dw_bf16_params.h 232 0x3048 1 +conv2d_dw_bf16_params.h 232 0x3048 2 +conv2d_dw_bf16_params.h 235 0x3052 +conv2d_dw_bf16_params.h 235 0x3052 1 +conv2d_dw_bf16_params.h 242 0x3052 2 +conv2d_dw_bf16_params.h 242 0x3052 3 +conv2d_dw_bf16_params.h 243 0x3052 4 +conv2d_dw_bf16_params.h 250 0x3052 5 +conv2d_dw_bf16_params.h 255 0x3052 6 +conv2d_dw_bf16_params.h 260 0x3052 7 +conv2d_dw_bf16_params.h 264 0x3052 8 +conv2d_dw_bf16_params.h 234 0x305c +conv2d_dw_bf16_params.h 239 0x305c 1 +conv2d_dw_bf16_params.h 242 0x305c 2 +conv2d_dw_bf16_params.h 248 0x305c 3 +conv2d_dw_bf16_params.h 253 0x305c 4 +conv2d_dw_bf16_params.h 264 0x305c 5 +conv2d_dw_bf16_params.h 219 0x3066 x +conv2d_dw_bf16_params.h 219 0x306a +conv2d_dw_bf16_params.h 219 0x306e +conv2d_dw_bf16_params.h 220 0x306e 1 +conv2d_dw_bf16_params.h 219 0x3074 +conv2d_dw_bf16_params.h 243 0x3074 1 +conv2d_dw_bf16_params.h 247 0x3074 2 +conv2d_dw_bf16_params.h 220 0x307a x +conv2d_dw_bf16_params.h 250 0x307a 1 +conv2d_dw_bf16_params.h 219 0x3080 x +conv2d_dw_bf16_params.h 220 0x3084 x +conv2d_dw_bf16_params.h 231 0x3084 1 +conv2d_dw_bf16_params.h 219 0x308a x +conv2d_dw_bf16_params.h 231 0x308a 1 x +conv2d_dw_bf16_params.h 220 0x3090 x +conv2d_dw_bf16_params.h 253 0x3090 1 x +conv2d_dw_bf16_params.h 240 0x3096 +conv2d_dw_bf16_params.h 246 0x3096 1 x +conv2d_dw_bf16_params.h 232 0x309c x +conv2d_dw_bf16_params.h 226 0x30a0 x +conv2d_dw_bf16_params.h 231 0x30a4 x +conv2d_dw_bf16_params.h 238 0x30a4 1 +conv2d_dw_bf16_params.h 234 0x30aa x +conv2d_dw_bf16_params.h 231 0x30ae x +conv2d_dw_bf16_params.h 232 0x30ae 1 x +conv2d_dw_bf16_params.h 234 0x30b4 x +conv2d_dw_bf16_params.h 232 0x30b8 x +conv2d_dw_bf16_params.h 227 0x30bc x +conv2d_dw_bf16_params.h 232 0x30bc 1 +conv2d_dw_bf16_params.h 234 0x30c2 x +conv2d_dw_bf16_params.h 235 0x30c2 1 x +conv2d_dw_bf16_params.h 235 0x30c8 +conv2d_dw_bf16_params.h 243 0x30c8 1 x +conv2d_dw_bf16_params.h 238 0x30ce x +conv2d_dw_bf16_params.h 242 0x30ce 1 x +conv2d_dw_bf16_params.h 242 0x30d4 +conv2d_dw_bf16_params.h 243 0x30d4 1 x +conv2d_dw_bf16_params.h 239 0x30da x +conv2d_dw_bf16_params.h 242 0x30da 1 x +conv2d_dw_bf16_params.h 243 0x30e0 x +conv2d_dw_bf16_params.h 250 0x30e0 1 x +conv2d_dw_bf16_params.h 234 0x30e6 x +conv2d_dw_bf16_params.h 240 0x30e6 1 x +conv2d_dw_bf16_params.h 253 0x30e6 2 x +conv2d_dw_bf16_params.h 247 0x30ec x +conv2d_dw_bf16_params.h 242 0x30f0 x +conv2d_dw_bf16_params.h 247 0x30f0 1 +conv2d_dw_bf16_params.h 241 0x30f6 x +conv2d_dw_bf16_params.h 243 0x30f6 1 x +conv2d_dw_bf16_params.h 243 0x30fc +conv2d_dw_bf16_params.h 245 0x30fc 1 x +conv2d_dw_bf16_params.h 243 0x3102 x +conv2d_dw_bf16_params.h 248 0x3102 1 x +conv2d_dw_bf16_params.h 245 0x3108 x +conv2d_dw_bf16_params.h 250 0x3108 1 x +conv2d_dw_bf16_params.h 246 0x310e x +conv2d_dw_bf16_params.h 250 0x310e 1 +conv2d_dw_bf16_params.h 247 0x3114 x +conv2d_dw_bf16_params.h 248 0x3114 1 x +conv2d_dw_bf16_params.h 250 0x311a x +conv2d_dw_bf16_params.h 250 0x311a 1 x +conv2d_dw_bf16_params.h 248 0x3120 x +conv2d_dw_bf16_params.h 250 0x3120 1 +conv2d_dw_bf16_params.h 249 0x3126 x +conv2d_dw_bf16_params.h 255 0x3126 1 x +conv2d_dw_bf16_params.h 258 0x3126 2 +conv2d_dw_bf16_params.h 258 0x3126 3 +conv2d_dw_bf16_params.h 252 0x3130 x +conv2d_dw_bf16_params.h 253 0x3130 1 x +conv2d_dw_bf16_params.h 253 0x3136 +conv2d_dw_bf16_params.h 255 0x3136 1 x +conv2d_dw_bf16_params.h 254 0x313c x +conv2d_dw_bf16_params.h 255 0x313c 1 +conv2d_dw_bf16_params.h 256 0x313c 2 +conv2d_dw_bf16_params.h 258 0x313c 3 x +conv2d_dw_bf16_params.h 258 0x313c 4 x +conv2d_dw_bf16_params.h 259 0x313c 5 +conv2d_dw_bf16_params.h 263 0x313c 6 +conv2d_dw_bf16_params.h 255 0x3148 x +conv2d_dw_bf16_params.h 256 0x314c x +conv2d_dw_bf16_params.h 260 0x314c 1 x +conv2d_dw_bf16_params.h 258 0x3152 x +conv2d_dw_bf16_params.h 260 0x3152 1 +conv2d_dw_bf16_params.h 259 0x3158 x +conv2d_dw_bf16_params.h 264 0x3158 1 x +conv2d_dw_bf16_params.h 260 0x315e x +conv2d_dw_bf16_params.h 264 0x315e 1 +conv2d_dw_bf16_params.h 262 0x3164 x +conv2d_dw_bf16_params.h 263 0x3168 x +conv2d_dw_bf16_params.h 264 0x316c x +conv2d_dw_bf16_params.h 266 0x3170 x +conv2d_dw_bf16_params.h 266 0x3180 +conv2d_dw_bf16_params.h 266 0x3180 1 +conv2d_dw_bf16_params.h 266 0x3186 +conv2d_dw_bf16_params.h 266 0x318a +conv2d_dw_bf16_params.h 266 0x3196 +conv2d_dw_bf16_params.h 266 0x31a0 +conv2d_dw_bf16_params.h 267 0x31a0 1 +conv2d_dw_bf16_params.h 266 0x31aa +conv2d_dw_bf16_params.h 266 0x31aa 1 +conv2d_dw_bf16_params.h 266 0x31b0 +conv2d_dw_bf16_params.h 266 0x31b6 +conv2d_dw_bf16_params.h 267 0x31bc x +conv2d_dw_bf16_params.h 266 0x31c6 x +conv2d_dw_bf16_params.h 266 0x31ca +conv2d_dw_bf16_params.h 267 0x31ca 1 x +conv2d_dw_bf16_params.h 266 0x31d0 x +conv2d_dw_bf16_params.h 266 0x31d8 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 444 0x34c0 x +superkernels.cpp 449 0x34c0 1 +superkernels.cpp 449 0x34c6 x +superkernels.cpp 444 0x34cc +superkernels.cpp 467 0x34da +superkernels.cpp 452 0x34ea +superkernels.cpp 449 0x34f2 +superkernels.cpp 449 0x34f2 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x34f8 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 446 0x34fc x +superkernels.cpp 446 0x3500 +superkernels.cpp 446 0x3504 +superkernels.cpp 446 0x350a +superkernels.cpp 461 0x350e + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x350e 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 451 0x3518 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x3518 1 +tile.hpp 86 0x3518 2 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 451 0x3526 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x3530 +tile.hpp 74 0x3534 +tile.hpp 74 0x3538 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 452 0x3540 +superkernels.cpp 461 0x3540 1 +superkernels.cpp 452 0x3548 x +superkernels.cpp 453 0x354c +superkernels.cpp 453 0x354c 1 x +superkernels.cpp 452 0x355e +superkernels.cpp 457 0x355e 1 +superkernels.cpp 452 0x3568 x +superkernels.cpp 453 0x356c x +superkernels.cpp 457 0x3570 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x3580 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 457 0x3584 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x3584 1 +io_buffer_main.h 218 0x358e +io_buffer_main.h 218 0x3592 +io_buffer_main.h 235 0x3596 x +io_buffer_main.h 218 0x35a4 x +io_buffer_main.h 218 0x35a4 1 x +io_buffer_main.h 218 0x35a8 +io_buffer_main.h 395 0x35ac +io_buffer_main.h 395 0x35b6 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 456 0x35ba +superkernels.cpp 459 0x35ba 1 +superkernels.cpp 464 0x35ba 2 +superkernels.cpp 465 0x35ba 3 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x35ba 4 +io_buffer_main.h 425 0x35ba 5 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/detail/io_buffer_impl.h: +io_buffer_impl.h 52 0x35c4 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x35ce +io_buffer_main.h 324 0x35ce 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 457 0x35d4 x +superkernels.cpp 457 0x35d8 +superkernels.cpp 461 0x35d8 1 +superkernels.cpp 456 0x35e2 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x35ec x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 457 0x35f2 x +superkernels.cpp 456 0x35f6 x +superkernels.cpp 459 0x35fa x +superkernels.cpp 461 0x35fe x +superkernels.cpp 456 0x3604 x +superkernels.cpp 459 0x3608 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/detail/io_buffer_impl.h: +io_buffer_impl.h 201 0x360c x +io_buffer_impl.h 52 0x3610 x +io_buffer_impl.h 52 0x3614 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x3620 +io_buffer_main.h 324 0x3624 x +io_buffer_main.h 425 0x3634 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 464 0x3638 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x3638 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 464 0x3642 x +superkernels.cpp 464 0x3646 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x3656 x +io_buffer_main.h 327 0x365a + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 464 0x365e x +superkernels.cpp 464 0x3662 +superkernels.cpp 465 0x3668 +superkernels.cpp 465 0x3674 x +superkernels.cpp 467 0x3680 +superkernels.cpp 467 0x368a x +superkernels.cpp 467 0x368e +superkernels.cpp - 0x368f + + +superkernels.cpp: +File name Line number Starting address View Stmt + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable3/src/0_0_reloadable3.cc: +0_0_reloadable3.cc 20 0x36a0 x +0_0_reloadable3.cc 22 0x36a0 1 +0_0_reloadable3.cc 22 0x36a4 x +0_0_reloadable3.cc 23 0x36a8 x +0_0_reloadable3.cc 25 0x36ac x +0_0_reloadable3.cc 24 0x36b0 x +0_0_reloadable3.cc 21 0x36b4 x +0_0_reloadable3.cc 29 0x36d0 x +0_0_reloadable3.cc 31 0x36d0 1 +0_0_reloadable3.cc 31 0x36d4 x +0_0_reloadable3.cc 33 0x36d8 x +0_0_reloadable3.cc 32 0x36dc x +0_0_reloadable3.cc 30 0x36e0 x +0_0_reloadable3.cc 37 0x36f0 x +0_0_reloadable3.cc 39 0x36f0 1 +0_0_reloadable3.cc 39 0x36f4 x +0_0_reloadable3.cc 41 0x36f8 x +0_0_reloadable3.cc 40 0x36fc x +0_0_reloadable3.cc 38 0x3700 x +0_0_reloadable3.cc 45 0x3710 x +0_0_reloadable3.cc 47 0x3710 1 +0_0_reloadable3.cc 47 0x3714 x +0_0_reloadable3.cc 49 0x3718 x +0_0_reloadable3.cc 48 0x371c x +0_0_reloadable3.cc 46 0x3720 x +0_0_reloadable3.cc 53 0x3730 x +0_0_reloadable3.cc 55 0x3730 1 +0_0_reloadable3.cc 55 0x3734 x +0_0_reloadable3.cc 56 0x3738 x +0_0_reloadable3.cc 58 0x373c x +0_0_reloadable3.cc 57 0x3740 x +0_0_reloadable3.cc 54 0x3744 x +0_0_reloadable3.cc 62 0x3760 x +0_0_reloadable3.cc 64 0x3760 1 +0_0_reloadable3.cc 64 0x3764 x +0_0_reloadable3.cc 65 0x3768 x +0_0_reloadable3.cc 67 0x376c x +0_0_reloadable3.cc 66 0x3770 x +0_0_reloadable3.cc 63 0x3774 x +0_0_reloadable3.cc 82 0x930 x +0_0_reloadable3.cc 84 0x930 1 x +0_0_reloadable3.cc 84 0x930 2 +0_0_reloadable3.cc 86 0x930 3 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0x930 4 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable3/src/0_0_reloadable3.cc: +0_0_reloadable3.cc 82 0x936 +0_0_reloadable3.cc 84 0x944 +0_0_reloadable3.cc 86 0x944 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0x944 2 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable3/src/0_0_reloadable3.cc: +0_0_reloadable3.cc 84 0x94c x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 410 0x952 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 590 0x958 x +io_buffer_compiler.h 590 0x95c +io_buffer_compiler.h 590 0x960 +io_buffer_compiler.h 590 0x964 +io_buffer_compiler.h 590 0x968 +io_buffer_compiler.h 195 0x978 x +io_buffer_compiler.h 195 0x978 1 x +io_buffer_compiler.h 194 0x97c x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 410 0x980 +io_buffer_main.h 410 0x980 1 +io_buffer_main.h 410 0x98a x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable3/src/0_0_reloadable3.cc: +0_0_reloadable3.cc 86 0x98e +0_0_reloadable3.cc 90 0x98e 1 +0_0_reloadable3.cc 86 0x992 x +0_0_reloadable3.cc 86 0x996 +0_0_reloadable3.cc 86 0x99a +0_0_reloadable3.cc 86 0x9a8 +0_0_reloadable3.cc 86 0x9ac + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 590 0x9b0 x +io_buffer_compiler.h 590 0x9b8 +io_buffer_compiler.h 590 0x9bc +io_buffer_compiler.h 590 0x9c0 +io_buffer_compiler.h 590 0x9c4 +io_buffer_compiler.h 195 0x9d4 x +io_buffer_compiler.h 195 0x9d4 1 x +io_buffer_compiler.h 194 0x9d8 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 410 0x9e4 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable3/src/0_0_reloadable3.cc: +0_0_reloadable3.cc 90 0x9e8 x +0_0_reloadable3.cc 90 0x9ec +0_0_reloadable3.cc 90 0x9f0 +0_0_reloadable3.cc 90 0x9f6 +0_0_reloadable3.cc 90 0xa08 +0_0_reloadable3.cc 93 0xa0c +0_0_reloadable3.cc 95 0xa0c 1 +0_0_reloadable3.cc 93 0xa20 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0xa20 1 +io_buffer_compiler.h 606 0xa20 2 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 440 0xa20 3 +io_buffer_main.h 440 0xa20 4 +io_buffer_main.h 440 0xa26 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable3/src/0_0_reloadable3.cc: +0_0_reloadable3.cc 95 0xa2a + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0xa2e + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 440 0xa2e 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 605 0xa38 x +io_buffer_compiler.h 605 0xa3c + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 440 0xa4a +io_buffer_main.h 440 0xa4e x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0xa52 +io_buffer_compiler.h 606 0xa52 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable3/src/0_0_reloadable3.cc: +0_0_reloadable3.cc 95 0xa58 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0xa66 x +io_buffer_compiler.h 605 0xa6a x +io_buffer_compiler.h 606 0xa6a 1 +io_buffer_compiler.h 605 0xa70 +io_buffer_compiler.h 606 0xa70 1 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 440 0xa82 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable3/src/0_0_reloadable3.cc: +0_0_reloadable3.cc 98 0xa86 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0xa8a x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable3/src/0_0_reloadable3.cc: +0_0_reloadable3.cc 98 0xa96 x +0_0_reloadable3.cc 98 0xaa0 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0xaa4 +io_buffer_compiler.h 606 0xaa8 x +io_buffer_compiler.h 606 0xaac +io_buffer_compiler.h 606 0xab0 +io_buffer_compiler.h - 0xab1 + + +CU: me_div.c: +File name Line number Starting address View Stmt + +./me_div.c:[++] +me_div.c 108 0x3790 +me_div.c 108 0x3790 1 +me_div.c 115 0x3790 2 x +me_div.c 108 0x3796 +me_div.c 108 0x379a +me_div.c 108 0x379e +me_div.c 108 0x37a2 +me_div.c 108 0x37a6 +me_div.c 108 0x37aa +me_div.c 108 0x37ae +me_div.c 108 0x37b2 +me_div.c 108 0x37b6 +me_div.c 108 0x37ba +me_div.c 108 0x37be +me_div.c 108 0x37c2 +me_div.c 108 0x37c6 +me_div.c 108 0x37ca +me_div.c 108 0x37ce +me_div.c 108 0x37d2 +me_div.c 108 0x37d6 +me_div.c 108 0x37da +me_div.c 108 0x37de +me_div.c 108 0x37e2 +me_div.c 108 0x37e6 +me_div.c 108 0x37ea +me_div.c 108 0x37ee +me_div.c 108 0x37f2 +me_div.c 108 0x37f6 +me_div.c 108 0x37fa +me_div.c 108 0x37fe +me_div.c 108 0x3802 +me_div.c 119 0x3806 x +me_div.c 108 0x380a x +me_div.c 108 0x380e +me_div.c 108 0x3812 +me_div.c 108 0x3816 +me_div.c - 0x3817 + + +CU: No directory table +CU: Empty file name table + - 0x1 + + +CU: No directory table +CU: Empty file name table + - 0x1 + + diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/scripts/0_2_reloadable9.bcf b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/scripts/0_2_reloadable9.bcf new file mode 100644 index 0000000000000000000000000000000000000000..ac2c44e2095fee61e0bb45bf67ea52ec6719ca60 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/scripts/0_2_reloadable9.bcf @@ -0,0 +1,16 @@ +_reserved DMb 0x0 0x40000 + +_reserved PM 0x0 0x930 //reserved for main elf + +_entry_point _Z13kernelWrapperPPvjjjj +_symbol _Z13kernelWrapperPPvjjjj 0x930 + +_reserved DMb 0x7b280 0x800 //reserved for lcp ping-pong buffers +_reserved DMb 0x7ba80 0x40 //reserved for sync buffer +_stack DM_stack 0x7bac0 0x940 //stack for core +_reserved DMb 0x7c400 0x40 //reserved for main elf heap +//space for synopsys compiler at 0x7c440 0x880//heap +_reserved DMb 0x40000 0x3b280 + +_reserved DMb 0x7ccc0 0x3340 +_reserved DMb 0x80000 0x80000 // And everything else the core can't see diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/scripts/0_2_reloadable9.prx b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/scripts/0_2_reloadable9.prx new file mode 100644 index 0000000000000000000000000000000000000000..8293b710c6225a9a7df8779b59cf814cff80af23 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/scripts/0_2_reloadable9.prx @@ -0,0 +1,13 @@ + + + \ No newline at end of file diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/src/0_2_reloadable9.cc b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/src/0_2_reloadable9.cc new file mode 100644 index 0000000000000000000000000000000000000000..49ee750344fb3fb512b745064140a08205682319 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_2_reloadable9/src/0_2_reloadable9.cc @@ -0,0 +1,98 @@ +// Automatically generated processor driver using AIEngine tool-chain + +#include +#include +#include + + +// Declare Kernel functions and initializers +void conv2d_maxpool(adf::io_buffer>> &__restrict,adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict); +void superkernel_add1d_attribute_broadcasting(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict); +void superkernel_clip1d(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict); +void superkernel_mul1d_attribute_broadcasting(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict); +void superkernel_mul1d(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict,adf::io_buffer, adf::locking::async>> &__restrict); +void superkernel_conv2d_dwc(adf::io_buffer>> &__restrict,adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict); + +// Declare Kernel objects and external arrays + + +void _b896_wrapper(void* args[]) +{ + conv2d_maxpool( + *reinterpret_cast*>(args[0]), + *reinterpret_cast*>(args[1]), + *reinterpret_cast(args[3]), + *reinterpret_cast*>(args[2])); +} + +void _b901_wrapper(void* args[]) +{ + superkernel_add1d_attribute_broadcasting( + *reinterpret_cast*>(args[0]), + *reinterpret_cast(args[2]), + *reinterpret_cast*>(args[1])); +} + +void _b906_wrapper(void* args[]) +{ + superkernel_clip1d( + *reinterpret_cast*>(args[0]), + *reinterpret_cast(args[2]), + *reinterpret_cast*>(args[1])); +} + +void _b881_wrapper(void* args[]) +{ + superkernel_mul1d_attribute_broadcasting( + *reinterpret_cast*>(args[0]), + *reinterpret_cast(args[2]), + *reinterpret_cast*>(args[1])); +} + +void _b891_wrapper(void* args[]) +{ + superkernel_mul1d( + *reinterpret_cast*>(args[0]), + *reinterpret_cast(args[3]), + *reinterpret_cast*>(args[1]), + *reinterpret_cast*>(args[2])); +} + +void _b919_wrapper(void* args[]) +{ + superkernel_conv2d_dwc( + *reinterpret_cast*>(args[0]), + *reinterpret_cast*>(args[1]), + *reinterpret_cast(args[3]), + *reinterpret_cast*>(args[2])); +} + +using UniformKernelFunc = void (*)(void **); + +static UniformKernelFunc g_uniformKernelFuncs[6] = { + _b896_wrapper, + _b901_wrapper, + _b906_wrapper, + _b881_wrapper, + _b891_wrapper, + _b919_wrapper +}; + +__attribute__((always_inline)) void kernelWrapper(void* args[], uint32 kernelId, uint32 numSyncIn, uint32 numAsyncIn, uint32 numSyncOut) +{ + uint32 idx = 0; + reinterpret_cast(args[idx])->acquire(numSyncIn > 0); + idx += (numSyncIn > 0) ? 1 : 0; + reinterpret_cast(args[idx])->acquire(numSyncIn > 1); + idx += (numSyncIn > 1) ? 1 : 0; + idx += numAsyncIn; + + (*(g_uniformKernelFuncs[kernelId]))(args); + + idx = 0; + reinterpret_cast(args[idx])->release(numSyncIn > 0); + idx += (numSyncIn > 0) ? 1 : 0; + reinterpret_cast(args[idx])->release(numSyncIn > 1); + idx += (numSyncIn > 1) ? 1 : 0; + idx += numAsyncIn; +} diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3/Release/0_3.map b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3/Release/0_3.map new file mode 100644 index 0000000000000000000000000000000000000000..ef46d1312460834f68715f0d9820c058bd977f34 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3/Release/0_3.map @@ -0,0 +1,143 @@ + +// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:42:51 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// bridge -o../Release/0_0 ../Release/0_0.o -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/isg -g -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0.bcf -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork848 -pme + +// Release: ipp V-2024.06-TGT-241219 + +Memory map for memory 'DM_bankA': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 36 + + 0x0007c400..0x0007c41f ( 32 items) : atexit.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release/libc.a)::_ZL7atexits (Data, Local, .data.DM_bankA.4) + + Called functions : _fini + + 0x0007c420..0x0007c423 ( 4 items) : atexit.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release/libc.a)::_ZL10atexit_cnt (Data, Local, .data.DM_bankA.4) + +Memory map for memory 'DM_stack': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 2368 + + 0x0007bac0..0x0007c3ff ( 2368 items) : Stack + +Memory map for memory 'DMb': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 4452 + + 0x00000000..0x0007b27f ( 504448 items) : Reserved + 0x0007b280..0x0007b67f ( 1024 items) : ../Release/0_0.o::lcpPing (Data, Global, .bss.DMb.4) + 0x0007b680..0x0007ba7f ( 1024 items) : ../Release/0_0.o::lcpPong (Data, Global, .bss.DMb.4) + 0x0007ba80..0x0007babf ( 64 items) : Reserved + 0x0007bac0..0x0007c3ff ( 2368 items) : Stack + 0x0007c400..0x0007c41f : Occupied in alias or record memory 'DM_bankA' by symbol '_ZL7atexits' + 0x0007c420..0x0007c423 : Occupied in alias or record memory 'DM_bankA' by symbol '_ZL10atexit_cnt' + 0x0007c440..0x000fffff ( 539584 items) : Reserved + +Memory map for memory 'PM': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 1998 + + 0x00000000..0x000000df ( 224 items) : me_basic.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)::_main_init (Function, Global, .text) (stack frame size = 0) + + Called functions : _main + __cxa_finalize + + Referenced symbols: _sp_start_value_DM_stack + _ctors_start + _ctors_end + + 0x000000e0..0x00000533 ( 1108 items) : ../Release/0_0.o::_main (Function, Global, .text) (stack frame size = 192) + + Called functions : _ZN3adf11block_writeEPKNS_7reg_valEj + _Z13kernelWrapperPPvjjjj + + Referenced symbols: lcpPing + lcpPong + + 0x00000540..0x000005ed ( 174 items) : ../Release/0_0.o::_ZN3adf11block_writeEPKNS_7reg_valEj (Function, Weak, .text) (stack frame size = 0) + 0x000005f0..0x00000697 ( 168 items) : me_basic.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)::_fini (Function, Global, .text) (stack frame size = 64) + + Referenced symbols: _dtors_start + _dtors_end + + 0x000006a0..0x000007e3 ( 324 items) : atexit.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release/libc.a)::__cxa_finalize (Function, Global, .text) (stack frame size = 64) + + Referenced symbols: _ZL10atexit_cnt + _ZL7atexits + + 0x00000930..0x00003fff ( 14032 items) : Reserved + +External symbols: + + _Z13kernelWrapperPPvjjjj = 0x930 + __dso_handle = 0x0 + _ctors_end = 0x0 + _ctors_start = 0x0 + _dtors_end = 0x0 + _dtors_start = 0x0 + _pc_end = 0x7e4 + _pc_start = 0x0 + _sp_end_DM_stack = 0x7c400 + _sp_start_DM_stack = 0x7bac0 + +Section summary for memory 'DM_bankA': + + .data File + ---------- ---------- + 36 atexit.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release/libc.a) + ---------- ---------- + 36 Total + +Section summary for memory 'DM_stack': + + .stack File + ---------- ---------- + 2368 + ---------- ---------- + 2368 Total + +Section summary for memory 'DMb': + + .bss .data File + ---------- ---------- ---------- + 2048 0 ../Release/0_0.o + 0 36 atexit.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release/libc.a) (in DM_bankA) + ---------- ---------- ---------- + 2048 36 Total + +Section summary for memory 'PM': + + .text File + ---------- ---------- + 1282 ../Release/0_0.o + 392 me_basic.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a) + 324 atexit.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release/libc.a) + ---------- ---------- + 1998 Total + +File summary: + +atexit.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release/libc.a) + DM_bankA 36 + PM 324 + +../Release/0_0.o + DMb 2048 + PM 1282 + +me_basic.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a) + PM 392 + diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3/Release/0_3.sdr b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3/Release/0_3.sdr new file mode 100644 index 0000000000000000000000000000000000000000..d2b4c2317a9a0aeb9f20fc4389bef9abee6493bf --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3/Release/0_3.sdr @@ -0,0 +1,90 @@ + +// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:42:51 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// bridge -o../Release/0_0 ../Release/0_0.o -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/isg -g -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0.bcf -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork848 -pme + +// Release: ipp V-2024.06-TGT-241219 + +// Symbols in memory 'DM_bankA': +// Symbols in memory 'DM_bankAB': +// Symbols in memory 'DM_bankAC': +// Symbols in memory 'DM_bankAD': +// Symbols in memory 'DM_bankB': +// Symbols in memory 'DM_bankBC': +// Symbols in memory 'DM_bankBD': +// Symbols in memory 'DM_bankC': +// Symbols in memory 'DM_bankCD': +// Symbols in memory 'DM_bankD': +// Symbols in memory 'DM_stack': +// Symbols in memory 'DM_test': +// Symbols in memory 'DMb': +_symbol lcpPing 0x0007b280 +_symbol lcpPong 0x0007b680 +// Symbols in memory 'DMh': +// Symbols in memory 'DMh_bankA': +// Symbols in memory 'DMh_bankAB': +// Symbols in memory 'DMh_bankAC': +// Symbols in memory 'DMh_bankAD': +// Symbols in memory 'DMh_bankB': +// Symbols in memory 'DMh_bankBC': +// Symbols in memory 'DMh_bankBD': +// Symbols in memory 'DMh_bankC': +// Symbols in memory 'DMh_bankCD': +// Symbols in memory 'DMh_bankD': +// Symbols in memory 'DMh_stack': +// Symbols in memory 'DMs': +// Symbols in memory 'DMs_bankA': +// Symbols in memory 'DMs_bankAB': +// Symbols in memory 'DMs_bankAC': +// Symbols in memory 'DMs_bankAD': +// Symbols in memory 'DMs_bankB': +// Symbols in memory 'DMs_bankBC': +// Symbols in memory 'DMs_bankBD': +// Symbols in memory 'DMs_bankC': +// Symbols in memory 'DMs_bankCD': +// Symbols in memory 'DMs_bankD': +// Symbols in memory 'DMs_stack': +// Symbols in memory 'DMv': +// Symbols in memory 'DMv_bankA': +// Symbols in memory 'DMv_bankAB': +// Symbols in memory 'DMv_bankAC': +// Symbols in memory 'DMv_bankAD': +// Symbols in memory 'DMv_bankB': +// Symbols in memory 'DMv_bankBC': +// Symbols in memory 'DMv_bankBD': +// Symbols in memory 'DMv_bankC': +// Symbols in memory 'DMv_bankCD': +// Symbols in memory 'DMv_bankD': +// Symbols in memory 'DMv_stack': +// Symbols in memory 'DMw': +// Symbols in memory 'DMw_bankA': +// Symbols in memory 'DMw_bankAB': +// Symbols in memory 'DMw_bankAC': +// Symbols in memory 'DMw_bankAD': +// Symbols in memory 'DMw_bankB': +// Symbols in memory 'DMw_bankBC': +// Symbols in memory 'DMw_bankBD': +// Symbols in memory 'DMw_bankC': +// Symbols in memory 'DMw_bankCD': +// Symbols in memory 'DMw_bankD': +// Symbols in memory 'DMw_stack': +// Symbols in memory 'DMx': +// Symbols in memory 'DMx_bankA': +// Symbols in memory 'DMx_bankAB': +// Symbols in memory 'DMx_bankAC': +// Symbols in memory 'DMx_bankAD': +// Symbols in memory 'DMx_bankB': +// Symbols in memory 'DMx_bankBC': +// Symbols in memory 'DMx_bankBD': +// Symbols in memory 'DMx_bankC': +// Symbols in memory 'DMx_bankCD': +// Symbols in memory 'DMx_bankD': +// Symbols in memory 'DMx_stack': +// Symbols in memory 'PM': +_symbol _main_init 0x00000000 +_symbol _main 0x000000e0 +_symbol _ZN3adf11block_writeEPKNS_7reg_valEj 0x00000540 +_symbol _fini 0x000005f0 +_symbol __cxa_finalize 0x000006a0 +// Symbols in memory 'PMw': +// Symbols in memory 'TM4': diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3/Release/0_3.srv b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3/Release/0_3.srv new file mode 100644 index 0000000000000000000000000000000000000000..f5651c6a640c9df2e9a1eaae268b6cc9f28cc921 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3/Release/0_3.srv @@ -0,0 +1,2640 @@ + +// File generated by darts version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:42:52 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// darts -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -d -h -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0 me + +// Release: ipp V-2024.06-TGT-241219 +.label __AIE_ARCH_MODEL_VERSION__21011100__inlined__1__me_basic___main_init_ +.label _main_init +.function _main_init _main_init +.src_ref 0 "me_basic.c" 91 4 first +.src_ref 0 "me_basic.c" 87 first +.function_start + 0 "01000100" // MOVXM sp, #506560 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 1 "10000000" // /* MW 5 */ + 2 "11110101" // /* MW 4 */ + 3 "10111001" // /* MW 3 */ + 4 "00000111" // /* MW 2 */ + 5 "00000000" // /* MW 1 */ +.src_ref 0 "me_basic.c" 69 8 +.src_ref 0 "me_basic.c" 69 41 + 6 "01000100" // MOVXM r8, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7 "00000000" // /* MW 5 */ + 8 "00100000" // /* MW 4 */ + 9 "00000100" // /* MW 3 */ + 10 "00000000" // /* MW 2 */ + 11 "00000000" // /* MW 1 */ +.src_ref 0 "me_basic.c" 69 8 + 12 "01000100" // MOVXM r16, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13 "00000000" // /* MW 5 */ + 14 "00100000" // /* MW 4 */ + 15 "00001000" // /* MW 3 */ + 16 "00000000" // /* MW 2 */ + 17 "00000000" // /* MW 1 */ +.src_ref 0 "me_basic.c" 69 8 first + 18 "10011000" // EQ r16, r8, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 19 "00000111" // /* MW 3 */ + 20 "00100001" // /* MW 2 */ + 21 "00010010" // /* MW 1 */ +.src_ref 0 "me_basic.c" 69 8 + 22 "10000100" // JNZ r16, #128 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=128 delay_slots=5 */ + 23 "00000001" // /* MW 5 */ + 24 "01000000" // /* MW 4 */ + 25 "01000000" // /* MW 3 */ + 26 "00000000" // /* MW 2 */ + 27 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 28 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 29 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 30 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 31 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 32 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 33 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 34 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 35 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 36 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 37 "00000000" // /* MW 1 */ +.src_ref 0 "me_basic.c" 98 11 +.src_ref 0 "me_basic.c" 98 11 + 38 "10111010" // NOPA; MOVS p7, p0; MOV r9, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 39 "01110010" // /* MW 9 */ + 40 "01010000" // /* MW 8 */ + 41 "00101000" // /* MW 7 */ + 42 "00000001" // /* MW 6 */ + 43 "10001011" // /* MW 5 */ + 44 "10000000" // /* MW 4 */ + 45 "11110111" // /* MW 3 */ + 46 "00101100" // /* MW 2 */ + 47 "00000000" // /* MW 1 */ +.src_ref 0 "me_basic.c" 69 41 +.src_ref 0 "me_basic.c" 70 13 + 48 "11100001" // NOPA; NOPB; NOPS; MOVXM p6, #-4; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 49 "00000000" // /* MW 15 */ + 50 "00000000" // /* MW 14 */ + 51 "00010000" // /* MW 13 */ + 52 "11111110" // /* MW 12 */ + 53 "00110111" // /* MW 11 */ + 54 "11111111" // /* MW 10 */ + 55 "11111111" // /* MW 9 */ + 56 "00111111" // /* MW 8 */ + 57 "01011011" // /* MW 7 */ + 58 "00000001" // /* MW 6 */ + 59 "00100000" // /* MW 5 */ + 60 "00000000" // /* MW 4 */ + 61 "11110000" // /* MW 3 */ + 62 "00101100" // /* MW 2 */ + 63 "00000000" // /* MW 1 */ +.label TGT_F_main_init_64 +.src_ref 0 "me_basic.c" 69 41 +.src_ref 0 "me_basic.c" 70 13 first +.loop_nesting 1 + 64 "11010100" // LDA p0, [p6], #-4; MOV r10, p6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 65 "10000001" // /* MW 5 */ + 66 "00111001" // /* MW 4 */ + 67 "11010101" // /* MW 3 */ + 68 "10000011" // /* MW 2 */ + 69 "11011111" // /* MW 1 */ + 70 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 71 "00000000" // /* MW 1 */ + 72 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 73 "00000000" // /* MW 1 */ + 74 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 75 "00000000" // /* MW 1 */ + 76 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 77 "00000000" // /* MW 1 */ + 78 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 79 "00000000" // /* MW 1 */ + 80 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 81 "00000000" // /* MW 1 */ +.src_ref 0 "me_basic.c" 70 16 +.no_stack_arguments + 82 "00011000" // JL p0 /* MW 4 */ /* control_operation: words=4 call unconditional cycles_taken=1 indirect absolute delay_slots=5 */ + 83 "00000000" // /* MW 3 */ + 84 "00110000" // /* MW 2 */ + 85 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 86 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 87 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 88 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 89 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 90 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 91 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 92 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 93 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 94 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 95 "00000000" // /* MW 1 */ +.src_ref 0 "me_basic.c" 69 41 first +.return_address + 96 "10011000" // NE r16, r10, r8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 97 "10001000" // /* MW 3 */ + 98 "10100000" // /* MW 2 */ + 99 "00010010" // /* MW 1 */ +.src_ref 0 "me_basic.c" 69 8 + 100 "10000100" // JNZ r16, #64 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=64 delay_slots=5 */ + 101 "00000001" // /* MW 5 */ + 102 "01000000" // /* MW 4 */ + 103 "00100000" // /* MW 3 */ + 104 "00000000" // /* MW 2 */ + 105 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 106 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 107 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 109 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 111 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 113 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 115 "00000000" // /* MW 1 */ +.src_ref 0 "me_basic.c" 98 11 +.src_ref 0 "me_basic.c" 98 11 +.loop_nesting 0 + 116 "11110110" // NOPA; NOPB; MOVS p0, p7; MOV r1, r9 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 117 "01110000" // /* MW 11 */ + 118 "01010000" // /* MW 10 */ + 119 "00101010" // /* MW 9 */ + 120 "00000000" // /* MW 8 */ + 121 "10001011" // /* MW 7 */ + 122 "10011100" // /* MW 6 */ + 123 "00100000" // /* MW 5 */ + 124 "00000000" // /* MW 4 */ + 125 "11110000" // /* MW 3 */ + 126 "00101100" // /* MW 2 */ + 127 "00000000" // /* MW 1 */ +.label TGT_F_main_init_128 +.src_ref 0 "me_basic.c" 98 11 first +.no_stack_arguments + 128 "00000100" // JL #224 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=224 delay_slots=5 */ + 129 "00000001" // /* MW 5 */ + 130 "00000000" // /* MW 4 */ + 131 "01110000" // /* MW 3 */ + 132 "00000000" // /* MW 2 */ + 133 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 134 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 135 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 137 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 139 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 141 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 143 "00000000" // /* MW 1 */ +.src_ref 1 "stdlib.h" 77 4 first +.return_address +.no_stack_arguments + 144 "00000100" // JL #1696 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=1696 delay_slots=5 */ + 145 "00000001" // /* MW 5 */ + 146 "00000000" // /* MW 4 */ + 147 "01010000" // /* MW 3 */ + 148 "00000011" // /* MW 2 */ + 149 "00000000" // /* MW 1 */ +.src_ref 1 "stdlib.h" 77 4 +.delay_slot + 150 "10111000" // MOV p0, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 151 "00000000" // /* MW 3 */ + 152 "01100000" // /* MW 2 */ + 153 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 155 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 157 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 159 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 160 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 161 "00000000" // /* MW 15 */ + 162 "00000000" // /* MW 14 */ + 163 "01111000" // /* MW 13 */ + 164 "10100101" // /* MW 12 */ + 165 "00000001" // /* MW 11 */ + 166 "00000000" // /* MW 10 */ + 167 "00000000" // /* MW 9 */ + 168 "00000000" // /* MW 8 */ + 169 "01011011" // /* MW 7 */ + 170 "00000001" // /* MW 6 */ + 171 "00100000" // /* MW 5 */ + 172 "00000000" // /* MW 4 */ + 173 "11110000" // /* MW 3 */ + 174 "00101100" // /* MW 2 */ + 175 "00000000" // /* MW 1 */ +.return_address +.swstall chess_separator_scheduler + 176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 177 "00000000" // /* MW 1 */ +.swstall chess_separator_scheduler + 178 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 179 "00000000" // /* MW 1 */ +.swstall chess_separator_scheduler + 180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 181 "00000000" // /* MW 1 */ +.swstall chess_separator_scheduler + 182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 183 "00000000" // /* MW 1 */ +.swstall chess_separator_scheduler + 184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 185 "00000000" // /* MW 1 */ +.swstall chess_separator_scheduler + 186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 187 "00000000" // /* MW 1 */ +.src_ref 1 "stdlib.h" 61 4 first + 188 "00011000" // DONE /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 189 "00000000" // /* MW 3 */ + 190 "00001000" // /* MW 2 */ + 191 "00010000" // /* MW 1 */ +.swstall chess_separator_scheduler + 192 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 193 "00000000" // /* MW 1 */ +.swstall chess_separator_scheduler + 194 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 195 "00000000" // /* MW 1 */ +.swstall chess_separator_scheduler + 196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 197 "00000000" // /* MW 1 */ +.swstall chess_separator_scheduler + 198 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 199 "00000000" // /* MW 1 */ +.swstall chess_separator_scheduler + 200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 201 "00000000" // /* MW 1 */ +.swstall chess_separator_scheduler + 202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 203 "00000000" // /* MW 1 */ +.src_ref 1 "stdlib.h" 62 4 first +.swstall for_chess_exit +.exit + 204 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 205 "01100111" // /* MW 3 */ + 206 "00000001" // /* MW 2 */ + 207 "00000000" // /* MW 1 */ +.label TGT_F_main_init_208 +.src_ref 1 "stdlib.h" 64 4 first +.loop_nesting 1 + 208 "10000100" // J #208 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=208 delay_slots=5 */ + 209 "00000000" // /* MW 5 */ + 210 "00000000" // /* MW 4 */ + 211 "01101000" // /* MW 3 */ + 212 "00000000" // /* MW 2 */ + 213 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 215 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 216 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 217 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 219 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 221 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _main_init__end + 223 "00000000" // /* MW 1 */ +.label _main___func_begin0 +.label _main +.function main _main +.src_ref 2 "0_0.cc" 12 +.src_ref 2 "0_0.cc" 12 first +.function_start + 224 "10111010" // MOVA m0, #-160; PADDXM [sp], #192 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 225 "01110000" // /* MW 9 */ + 226 "00000000" // /* MW 8 */ + 227 "00000000" // /* MW 7 */ + 228 "00000000" // /* MW 6 */ + 229 "00000110" // /* MW 5 */ + 230 "00000000" // /* MW 4 */ + 231 "10000000" // /* MW 3 */ + 232 "00000000" // /* MW 2 */ + 233 "11101100" // /* MW 1 */ +.src_ref 3 "tile_control.h" 147 12 +.src_ref 3 "tile_control.h" 147 14 +.src_ref 2 "0_0.cc" 49 38 +.src_ref 2 "0_0.cc" 50 33 +.src_ref 2 "0_0.cc" 59 50 +.src_ref 2 "0_0.cc" 59 63 +.src_ref 2 "0_0.cc" 63 43 +.src_ref 2 "0_0.cc" 64 63 +.src_ref 2 "0_0.cc" 67 46 + 234 "10111010" // MOVA m1, #-184; MOVX r12, #2; MOV p1, sp /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 235 "01111000" // /* MW 9 */ + 236 "11110000" // /* MW 8 */ + 237 "10110010" // /* MW 7 */ + 238 "01001000" // /* MW 6 */ + 239 "11000000" // /* MW 5 */ + 240 "00000000" // /* MW 4 */ + 241 "10000000" // /* MW 3 */ + 242 "00000100" // /* MW 2 */ + 243 "11101001" // /* MW 1 */ +.src_ref 3 "tile_control.h" 278 68 +.src_ref 3 "tile_control.h" 278 68 +.src_ref 3 "tile_control.h" 278 68 +.src_ref 4 "io_buffer_compiler.h" 566 27 +.src_ref 4 "io_buffer_compiler.h" 567 18 + 244 "01111110" // MOVA r25, #0; PADDB [p1], m0; MOVS p6, p1; MOVXM p0, #651488 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 245 "01100000" // /* MW 13 */ + 246 "10010001" // /* MW 12 */ + 247 "11010000" // /* MW 11 */ + 248 "00000010" // /* MW 10 */ + 249 "00001110" // /* MW 9 */ + 250 "10000110" // /* MW 8 */ + 251 "01001111" // /* MW 7 */ + 252 "00000000" // /* MW 6 */ + 253 "00100000" // /* MW 5 */ + 254 "00010111" // /* MW 4 */ + 255 "00000010" // /* MW 3 */ + 256 "00011001" // /* MW 2 */ + 257 "00000000" // /* MW 1 */ +.src_ref 4 "io_buffer_compiler.h" 564 18 +.src_ref 4 "io_buffer_compiler.h" 565 24 +.src_ref 4 "io_buffer_compiler.h" 572 18 +.src_ref 2 "0_0.cc" 19 8 + 258 "01111110" // NOPA; PADDB [p6], m1; ST p1, [sp, #-4]; MOVX r16, #1; MOV r24, #0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 259 "10110000" // /* MW 13 */ + 260 "10010011" // /* MW 12 */ + 261 "11111111" // /* MW 11 */ + 262 "00001011" // /* MW 10 */ + 263 "00000000" // /* MW 9 */ + 264 "01100001" // /* MW 8 */ + 265 "00000101" // /* MW 7 */ + 266 "00100000" // /* MW 6 */ + 267 "00100000" // /* MW 5 */ + 268 "01010111" // /* MW 4 */ + 269 "11111100" // /* MW 3 */ + 270 "00101100" // /* MW 2 */ + 271 "00000000" // /* MW 1 */ +.label TGT_F_main_48 +.src_ref 3 "tile_control.h" 278 68 first +.loop_nesting 1 + 272 "10011000" // ST.TM r25, [p0], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 273 "00111110" // /* MW 3 */ + 274 "01001111" // /* MW 2 */ + 275 "00001000" // /* MW 1 */ +.src_ref 3 "tile_control.h" 278 68 + 276 "10011000" // ST.TM r25, [p0], #-16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 277 "00111110" // /* MW 3 */ + 278 "11001111" // /* MW 2 */ + 279 "00001000" // /* MW 1 */ + 280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 281 "00000000" // /* MW 1 */ + 282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 283 "00000000" // /* MW 1 */ + 284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 285 "00000000" // /* MW 1 */ +.src_ref 2 "0_0.cc" 19 8 first + 286 "00011000" // ACQ #62, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 287 "00001000" // /* MW 3 */ + 288 "11000011" // /* MW 2 */ + 289 "00010111" // /* MW 1 */ +.src_ref 4 "io_buffer_compiler.h" 566 27 +.src_ref 2 "0_0.cc" 29 31 + 290 "10111010" // MOVA m7, #-92; MOVXM p7, #504448 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 291 "00010000" // /* MW 9 */ + 292 "01000000" // /* MW 8 */ + 293 "10110001" // /* MW 7 */ + 294 "11101111" // /* MW 6 */ + 295 "00000001" // /* MW 5 */ + 296 "00000000" // /* MW 4 */ + 297 "10000000" // /* MW 3 */ + 298 "10011100" // /* MW 2 */ + 299 "11110100" // /* MW 1 */ + 300 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 301 "00000000" // /* MW 1 */ + 302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 303 "00000000" // /* MW 1 */ + 304 "10011000" // ST p0, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 305 "00011101" // /* MW 3 */ + 306 "11111000" // /* MW 2 */ + 307 "00001111" // /* MW 1 */ +.src_ref 4 "io_buffer_compiler.h" 564 18 first + 308 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 309 "00010001" // /* MW 3 */ + 310 "00011111" // /* MW 2 */ + 311 "00001001" // /* MW 1 */ +.src_ref 4 "io_buffer_compiler.h" 565 24 first + 312 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 313 "00010001" // /* MW 3 */ + 314 "00011111" // /* MW 2 */ + 315 "00001001" // /* MW 1 */ +.src_ref 4 "io_buffer_compiler.h" 565 24 + 316 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 317 "00010001" // /* MW 3 */ + 318 "00011111" // /* MW 2 */ + 319 "00001001" // /* MW 1 */ +.src_ref 4 "io_buffer_compiler.h" 567 18 first + 320 "10011000" // ST r25, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 321 "00110001" // /* MW 3 */ + 322 "00011111" // /* MW 2 */ + 323 "00001001" // /* MW 1 */ +.src_ref 4 "io_buffer_compiler.h" 567 18 + 324 "10011000" // ST r25, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 325 "00110001" // /* MW 3 */ + 326 "00011111" // /* MW 2 */ + 327 "00001001" // /* MW 1 */ +.src_ref 4 "io_buffer_compiler.h" 566 27 first + 328 "10011000" // ST r25, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 329 "00110001" // /* MW 3 */ + 330 "00011111" // /* MW 2 */ + 331 "00001001" // /* MW 1 */ +.src_ref 4 "io_buffer_compiler.h" 564 18 first + 332 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 333 "00010001" // /* MW 3 */ + 334 "00011111" // /* MW 2 */ + 335 "00001001" // /* MW 1 */ +.src_ref 4 "io_buffer_compiler.h" 565 24 first + 336 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 337 "00010001" // /* MW 3 */ + 338 "00011111" // /* MW 2 */ + 339 "00001001" // /* MW 1 */ +.src_ref 4 "io_buffer_compiler.h" 565 24 + 340 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 341 "00010001" // /* MW 3 */ + 342 "00011111" // /* MW 2 */ + 343 "00001001" // /* MW 1 */ +.src_ref 4 "io_buffer_compiler.h" 567 18 first + 344 "10011000" // ST r25, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 345 "00110001" // /* MW 3 */ + 346 "00011111" // /* MW 2 */ + 347 "00001001" // /* MW 1 */ +.src_ref 4 "io_buffer_compiler.h" 567 18 + 348 "10011000" // ST r25, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 349 "00110001" // /* MW 3 */ + 350 "00011111" // /* MW 2 */ + 351 "00001001" // /* MW 1 */ +.src_ref 4 "io_buffer_compiler.h" 566 27 first + 352 "10011000" // ST r25, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 353 "00110001" // /* MW 3 */ + 354 "00011111" // /* MW 2 */ + 355 "00001001" // /* MW 1 */ +.src_ref 4 "io_buffer_compiler.h" 564 18 first + 356 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 357 "00010001" // /* MW 3 */ + 358 "00011111" // /* MW 2 */ + 359 "00001001" // /* MW 1 */ +.src_ref 4 "io_buffer_compiler.h" 565 24 first + 360 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 361 "00010001" // /* MW 3 */ + 362 "00011111" // /* MW 2 */ + 363 "00001001" // /* MW 1 */ +.src_ref 4 "io_buffer_compiler.h" 565 24 + 364 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 365 "00010001" // /* MW 3 */ + 366 "00011111" // /* MW 2 */ + 367 "00001001" // /* MW 1 */ +.src_ref 4 "io_buffer_compiler.h" 567 18 first + 368 "10011000" // ST r25, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 369 "00110001" // /* MW 3 */ + 370 "00011111" // /* MW 2 */ + 371 "00001001" // /* MW 1 */ +.src_ref 4 "io_buffer_compiler.h" 567 18 + 372 "10011000" // ST r25, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 373 "00110001" // /* MW 3 */ + 374 "00011111" // /* MW 2 */ + 375 "00001001" // /* MW 1 */ +.src_ref 4 "io_buffer_compiler.h" 566 27 first + 376 "10011000" // ST r25, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 377 "00110001" // /* MW 3 */ + 378 "00011111" // /* MW 2 */ + 379 "00001001" // /* MW 1 */ +.src_ref 4 "io_buffer_compiler.h" 564 18 first + 380 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 381 "00010001" // /* MW 3 */ + 382 "00011111" // /* MW 2 */ + 383 "00001001" // /* MW 1 */ +.src_ref 4 "io_buffer_compiler.h" 565 24 first + 384 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 385 "00010001" // /* MW 3 */ + 386 "00011111" // /* MW 2 */ + 387 "00001001" // /* MW 1 */ +.src_ref 4 "io_buffer_compiler.h" 565 24 + 388 "10011000" // ST r24, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 389 "00010001" // /* MW 3 */ + 390 "00011111" // /* MW 2 */ + 391 "00001001" // /* MW 1 */ +.src_ref 4 "io_buffer_compiler.h" 567 18 first + 392 "10011000" // ST r25, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 393 "00110001" // /* MW 3 */ + 394 "00011111" // /* MW 2 */ + 395 "00001001" // /* MW 1 */ +.src_ref 4 "io_buffer_compiler.h" 567 18 + 396 "10011000" // ST r25, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 397 "00110001" // /* MW 3 */ + 398 "00011111" // /* MW 2 */ + 399 "00001001" // /* MW 1 */ +.src_ref 4 "io_buffer_compiler.h" 566 27 first + 400 "10011000" // ST r25, [p1], m7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 401 "00110001" // /* MW 3 */ + 402 "11101011" // /* MW 2 */ + 403 "00001001" // /* MW 1 */ + 404 "00110110" // NOPA; NOPB; ST p1, [sp, #-12]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 405 "11000001" // /* MW 11 */ + 406 "01001110" // /* MW 10 */ + 407 "11111010" // /* MW 9 */ + 408 "00000011" // /* MW 8 */ + 409 "00000000" // /* MW 7 */ + 410 "00000000" // /* MW 6 */ + 411 "00100000" // /* MW 5 */ + 412 "00000000" // /* MW 4 */ + 413 "11110000" // /* MW 3 */ + 414 "00101100" // /* MW 2 */ + 415 "00000000" // /* MW 1 */ +.label TGT_F_main_192 +.src_ref 2 "0_0.cc" 29 31 first +.src_ref 2 "0_0.cc" 37 12 first +.loop_nesting 2 + 416 "10111010" // LDA r17, [p7], #4; MOVXM ls, #496 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 417 "00010000" // /* MW 9 */ + 418 "11111000" // /* MW 8 */ + 419 "01111000" // /* MW 7 */ + 420 "00000000" // /* MW 6 */ + 421 "00000000" // /* MW 5 */ + 422 "00000000" // /* MW 4 */ + 423 "11010000" // /* MW 3 */ + 424 "11000110" // /* MW 2 */ + 425 "11100011" // /* MW 1 */ +.src_ref 2 "0_0.cc" 30 36 first +.src_ref 2 "0_0.cc" 37 12 + 426 "10111010" // LDA r10, [p7], #4; MOVXM le, #592 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 427 "00010000" // /* MW 9 */ + 428 "00101000" // /* MW 8 */ + 429 "10111001" // /* MW 7 */ + 430 "00000001" // /* MW 6 */ + 431 "00000000" // /* MW 5 */ + 432 "00000000" // /* MW 4 */ + 433 "11010000" // /* MW 3 */ + 434 "10101010" // /* MW 2 */ + 435 "11100011" // /* MW 1 */ +.src_ref 2 "0_0.cc" 31 37 first + 436 "10011000" // LDA r9, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 437 "00110110" // /* MW 3 */ + 438 "00011101" // /* MW 2 */ + 439 "00000111" // /* MW 1 */ +.src_ref 2 "0_0.cc" 32 37 first + 440 "10011000" // LDA r8, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 441 "00010110" // /* MW 3 */ + 442 "00011101" // /* MW 2 */ + 443 "00000111" // /* MW 1 */ +.src_ref 2 "0_0.cc" 33 38 first + 444 "10011000" // LDA r19, [p7], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 445 "01110110" // /* MW 3 */ + 446 "00101110" // /* MW 2 */ + 447 "00000111" // /* MW 1 */ +.src_ref 4 "io_buffer_compiler.h" 572 18 +.src_ref 2 "0_0.cc" 40 79 +.src_ref 2 "0_0.cc" 40 86 + 448 "11010100" // LDA p1, [sp, #-4]; MOV p2, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 449 "10000001" // /* MW 5 */ + 450 "11011101" // /* MW 4 */ + 451 "00100100" // /* MW 3 */ + 452 "10010011" // /* MW 2 */ + 453 "11111111" // /* MW 1 */ + 454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 455 "00000000" // /* MW 1 */ +.src_ref 4 "io_buffer_compiler.h" 572 18 +.src_ref 4 "io_buffer_compiler.h" 575 18 +.src_ref 2 "0_0.cc" 58 20 + 456 "11100100" // MOVX r16, #0; MOV el7, r24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 457 "00111001" // /* MW 5 */ + 458 "00110001" // /* MW 4 */ + 459 "00100111" // /* MW 3 */ + 460 "00000000" // /* MW 2 */ + 461 "00000100" // /* MW 1 */ +.src_ref 4 "io_buffer_compiler.h" 575 18 + 462 "11111000" // MOV el9, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 463 "10011100" // /* MW 3 */ + 464 "10010000" // /* MW 2 */ + 465 "00011100" // /* MW 1 */ + 466 "01011000" // ADD.NC r20, r9, r10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 467 "10101001" // /* MW 3 */ + 468 "00010100" // /* MW 2 */ + 469 "00011101" // /* MW 1 */ +.src_ref 2 "0_0.cc" 41 30 + 470 "10111010" // NOPA; MOVS p0, p6; ADD.NC r20, r20, r8 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 471 "10100010" // /* MW 9 */ + 472 "00010000" // /* MW 8 */ + 473 "10001101" // /* MW 7 */ + 474 "00000010" // /* MW 6 */ + 475 "10001011" // /* MW 5 */ + 476 "10011000" // /* MW 4 */ + 477 "11110000" // /* MW 3 */ + 478 "00101100" // /* MW 2 */ + 479 "00000000" // /* MW 1 */ +.src_ref 2 "0_0.cc" 37 12 first +.src_ref 2 "0_0.cc" 42 24 +.src_ref 2 "0_0.cc" 43 20 + 480 "11100001" // MOVA r18, #6; NOPB; NOPS; MOVX r19, #0; ADD.NC lc, r19, r20; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 481 "00000000" // /* MW 15 */ + 482 "00000000" // /* MW 14 */ + 483 "10101000" // /* MW 13 */ + 484 "11101000" // /* MW 12 */ + 485 "10111100" // /* MW 11 */ + 486 "00001010" // /* MW 10 */ + 487 "00110000" // /* MW 9 */ + 488 "00000001" // /* MW 8 */ + 489 "01011011" // /* MW 7 */ + 490 "00000001" // /* MW 6 */ + 491 "00100000" // /* MW 5 */ + 492 "00000000" // /* MW 4 */ + 493 "00000000" // /* MW 3 */ + 494 "11010010" // /* MW 2 */ + 495 "00000000" // /* MW 1 */ +.label ZLS_F_main_272 +.src_ref 4 "io_buffer_compiler.h" 572 18 first +.src_ref 2 "0_0.cc" 40 79 first +.src_ref 2 "0_0.cc" 40 86 first +.src_ref 2 "0_0.cc" 42 24 first +.begin_of_loop +.loop_nesting 3 + 496 "10111010" // LDA dn6, [p2], #4; ST el7, [p1], #4; ADD.NC r19, r19, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 497 "01000010" // /* MW 9 */ + 498 "11000000" // /* MW 8 */ + 499 "01101100" // /* MW 7 */ + 500 "10000010" // /* MW 6 */ + 501 "11101001" // /* MW 5 */ + 502 "00011101" // /* MW 4 */ + 503 "11010001" // /* MW 3 */ + 504 "11100100" // /* MW 2 */ + 505 "01000011" // /* MW 1 */ +.src_ref 2 "0_0.cc" 40 98 +.src_ref 2 "0_0.cc" 43 20 first + 506 "00010100" // LDA r20, [p2], #4; ADD.NC r18, r18, #4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 507 "00000100" // /* MW 5 */ + 508 "00110010" // /* MW 4 */ + 509 "11011001" // /* MW 3 */ + 510 "11010010" // /* MW 2 */ + 511 "01000011" // /* MW 1 */ +.src_ref 2 "0_0.cc" 40 110 first +.src_ref 2 "0_0.cc" 40 117 first + 512 "10011000" // LDA dc7, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 513 "11100110" // /* MW 3 */ + 514 "00011111" // /* MW 2 */ + 515 "00000010" // /* MW 1 */ +.src_ref 2 "0_0.cc" 40 129 + 516 "10011000" // LDA el11, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 517 "11101110" // /* MW 3 */ + 518 "00011110" // /* MW 2 */ + 519 "00000010" // /* MW 1 */ + 520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 521 "00000000" // /* MW 1 */ + 522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 523 "00000000" // /* MW 1 */ + 524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 525 "00000000" // /* MW 1 */ +.src_ref 4 "io_buffer_compiler.h" 573 24 first + 526 "10011000" // ST dn6, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 527 "00100001" // /* MW 3 */ + 528 "00011111" // /* MW 2 */ + 529 "00001001" // /* MW 1 */ + 530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 531 "00000000" // /* MW 1 */ +.src_ref 4 "io_buffer_compiler.h" 573 24 + 532 "10011000" // ST dc7, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 533 "11100001" // /* MW 3 */ + 534 "00011111" // /* MW 2 */ + 535 "00001001" // /* MW 1 */ +.src_ref 4 "io_buffer_compiler.h" 575 18 first + 536 "00000010" // ST el9, [p1], #4; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 537 "01110000" // /* MW 7 */ + 538 "10100101" // /* MW 6 */ + 539 "00000001" // /* MW 5 */ + 540 "00000000" // /* MW 4 */ + 541 "00110000" // /* MW 3 */ + 542 "11001101" // /* MW 2 */ + 543 "00100011" // /* MW 1 */ +.src_ref 4 "io_buffer_compiler.h" 574 27 first + 544 "11100001" // NOPA; NOPB; ST r20, [p1], #4; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 545 "00000000" // /* MW 15 */ + 546 "00000000" // /* MW 14 */ + 547 "01111000" // /* MW 13 */ + 548 "10100101" // /* MW 12 */ + 549 "00000001" // /* MW 11 */ + 550 "00000000" // /* MW 10 */ + 551 "00000000" // /* MW 9 */ + 552 "10000000" // /* MW 8 */ + 553 "10010001" // /* MW 7 */ + 554 "00011110" // /* MW 6 */ + 555 "00100001" // /* MW 5 */ + 556 "00000000" // /* MW 4 */ + 557 "11110000" // /* MW 3 */ + 558 "00101100" // /* MW 2 */ + 559 "00000000" // /* MW 1 */ +.src_ref 4 "io_buffer_compiler.h" 574 27 + 560 "11100001" // NOPA; NOPB; ST el11, [p1], #-20; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 561 "00000000" // /* MW 15 */ + 562 "00000000" // /* MW 14 */ + 563 "01111000" // /* MW 13 */ + 564 "10100101" // /* MW 12 */ + 565 "00000001" // /* MW 11 */ + 566 "00000000" // /* MW 10 */ + 567 "00000000" // /* MW 9 */ + 568 "10000000" // /* MW 8 */ + 569 "11101001" // /* MW 7 */ + 570 "10111110" // /* MW 6 */ + 571 "00100001" // /* MW 5 */ + 572 "00000000" // /* MW 4 */ + 573 "11110000" // /* MW 3 */ + 574 "00101100" // /* MW 2 */ + 575 "00000000" // /* MW 1 */ +.src_ref 2 "0_0.cc" 41 30 + 576 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV r21, p1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 577 "00000000" // /* MW 15 */ + 578 "00000000" // /* MW 14 */ + 579 "01111000" // /* MW 13 */ + 580 "01100000" // /* MW 12 */ + 581 "10101001" // /* MW 11 */ + 582 "00000010" // /* MW 10 */ + 583 "00000000" // /* MW 9 */ + 584 "00000000" // /* MW 8 */ + 585 "01011011" // /* MW 7 */ + 586 "00000001" // /* MW 6 */ + 587 "00100000" // /* MW 5 */ + 588 "00000000" // /* MW 4 */ + 589 "11110000" // /* MW 3 */ + 590 "00101100" // /* MW 2 */ + 591 "00000000" // /* MW 1 */ +.label ZLE_F_main_368 +.src_ref 2 "0_0.cc" 41 30 first +.end_of_loop + 592 "11100001" // NOPA; NOPB; ST r21, [p0], #4; NOPX; ADD.NC p1, r21, #24; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 593 "00000000" // /* MW 15 */ + 594 "00000000" // /* MW 14 */ + 595 "00001000" // /* MW 13 */ + 596 "01000110" // /* MW 12 */ + 597 "10110101" // /* MW 11 */ + 598 "00000000" // /* MW 10 */ + 599 "00000000" // /* MW 9 */ + 600 "10000000" // /* MW 8 */ + 601 "10110001" // /* MW 7 */ + 602 "00011110" // /* MW 6 */ + 603 "00100000" // /* MW 5 */ + 604 "00000000" // /* MW 4 */ + 605 "11110000" // /* MW 3 */ + 606 "00101100" // /* MW 2 */ + 607 "00000000" // /* MW 1 */ +.src_ref 2 "0_0.cc" 70 32 +.loop_nesting 2 + 608 "10111010" // MOVA r11, #0; LSHL r20, r19, r12; MOV r19, p7 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 609 "01111000" // /* MW 9 */ + 610 "01100000" // /* MW 8 */ + 611 "01101111" // /* MW 7 */ + 612 "01101110" // /* MW 6 */ + 613 "01000110" // /* MW 5 */ + 614 "00100111" // /* MW 4 */ + 615 "00000000" // /* MW 3 */ + 616 "00001011" // /* MW 2 */ + 617 "00000000" // /* MW 1 */ +.src_ref 2 "0_0.cc" 50 40 + 618 "00100100" // ADD r13, r19, #-24; ADD.NC r19, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 619 "00000001" // /* MW 5 */ + 620 "10110010" // /* MW 4 */ + 621 "01101001" // /* MW 3 */ + 622 "01110100" // /* MW 2 */ + 623 "10011011" // /* MW 1 */ +.src_ref 2 "0_0.cc" 49 38 +.src_ref 2 "0_0.cc" 49 38 first +.src_ref 2 "0_0.cc" 50 30 + 624 "00111010" // MOVS p7, r13; LSHL r18, r18, r12; MOV dj6, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 625 "01111001" // /* MW 9 */ + 626 "00010000" // /* MW 8 */ + 627 "01000101" // /* MW 7 */ + 628 "01101111" // /* MW 6 */ + 629 "00100110" // /* MW 5 */ + 630 "00100101" // /* MW 4 */ + 631 "01100000" // /* MW 3 */ + 632 "10100001" // /* MW 2 */ + 633 "11110001" // /* MW 1 */ +.src_ref 2 "0_0.cc" 49 38 +.src_ref 2 "0_0.cc" 50 33 first + 634 "11100100" // LSHL r18, r19, r12; MOV dj5, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 635 "01000001" // /* MW 5 */ + 636 "00010010" // /* MW 4 */ + 637 "10111011" // /* MW 3 */ + 638 "10011001" // /* MW 2 */ + 639 "10011100" // /* MW 1 */ +.src_ref 2 "0_0.cc" 49 38 first +.src_ref 2 "0_0.cc" 50 33 + 640 "10010100" // LDA r18, [p7, dj5]; ADD.NC dn7, r13, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 641 "10010010" // /* MW 5 */ + 642 "10001101" // /* MW 4 */ + 643 "11011110" // /* MW 3 */ + 644 "01001010" // /* MW 2 */ + 645 "11110100" // /* MW 1 */ +.src_ref 2 "0_0.cc" 50 30 first + 646 "00000010" // ST dn7, [p6, dj6]; ADD.NC r17, r17, #-1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 647 "11000000" // /* MW 7 */ + 648 "01111111" // /* MW 6 */ + 649 "00101100" // /* MW 5 */ + 650 "00000010" // /* MW 4 */ + 651 "00110000" // /* MW 3 */ + 652 "01110100" // /* MW 2 */ + 653 "11011000" // /* MW 1 */ + 654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 655 "00000000" // /* MW 1 */ + 656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 657 "00000000" // /* MW 1 */ + 658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 659 "00000000" // /* MW 1 */ + 660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 661 "00000000" // /* MW 1 */ + 662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 663 "00000000" // /* MW 1 */ +.src_ref 2 "0_0.cc" 52 20 first + 664 "00000010" // NOPS; ADD.NC r18, r19, r18 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 665 "10100000" // /* MW 7 */ + 666 "11100100" // /* MW 6 */ + 667 "01001100" // /* MW 5 */ + 668 "00000010" // /* MW 4 */ + 669 "01100000" // /* MW 3 */ + 670 "00101011" // /* MW 2 */ + 671 "00000000" // /* MW 1 */ +.label TGT_F_main_448 +.src_ref 2 "0_0.cc" 58 20 first +.loop_nesting 3 + 672 "10011000" // SUB r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 673 "00000001" // /* MW 3 */ + 674 "10100001" // /* MW 2 */ + 675 "00010100" // /* MW 1 */ +.src_ref 2 "0_0.cc" 59 63 first + 676 "10011000" // LSHL r15, r16, r12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 677 "11001101" // /* MW 3 */ + 678 "00011110" // /* MW 2 */ + 679 "00010100" // /* MW 1 */ +.src_ref 2 "0_0.cc" 59 63 +.src_ref 2 "0_0.cc" 59 63 +.src_ref 2 "0_0.cc" 60 40 +.src_ref 2 "0_0.cc" 63 43 +.src_ref 2 "0_0.cc" 64 63 +.src_ref 2 "0_0.cc" 65 40 +.src_ref 2 "0_0.cc" 67 46 + 680 "00000010" // MOVS p7, r13; MOV dj7, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 681 "01110000" // /* MW 7 */ + 682 "11010000" // /* MW 6 */ + 683 "11000011" // /* MW 5 */ + 684 "00000011" // /* MW 4 */ + 685 "01100000" // /* MW 3 */ + 686 "10100001" // /* MW 2 */ + 687 "11110001" // /* MW 1 */ +.src_ref 2 "0_0.cc" 59 63 + 688 "10011000" // LDA r0, [p7, dj7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 689 "00010110" // /* MW 3 */ + 690 "11100000" // /* MW 2 */ + 691 "00000111" // /* MW 1 */ +.src_ref 2 "0_0.cc" 59 16 +.no_stack_arguments + 692 "00000100" // JL #1344 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=1344 delay_slots=5 */ + 693 "00000001" // /* MW 5 */ + 694 "00000000" // /* MW 4 */ + 695 "10100000" // /* MW 3 */ + 696 "00000010" // /* MW 2 */ + 697 "00000000" // /* MW 1 */ +.src_ref 2 "0_0.cc" 59 57 +.delay_slot + 698 "10011000" // ADD.NC r14, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 699 "00000000" // /* MW 3 */ + 700 "10011000" // /* MW 2 */ + 701 "00011011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 703 "00000000" // /* MW 1 */ +.src_ref 2 "0_0.cc" 59 50 +.delay_slot + 704 "10011000" // LSHL r16, r14, r12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 705 "11001101" // /* MW 3 */ + 706 "10100000" // /* MW 2 */ + 707 "00010011" // /* MW 1 */ +.src_ref 2 "0_0.cc" 59 50 +.delay_slot + 708 "01011000" // ADD.NC p0, r13, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 709 "11000001" // /* MW 3 */ + 710 "01100110" // /* MW 2 */ + 711 "00011000" // /* MW 1 */ +.delay_slot + 712 "00000010" // ST r17, [sp, #-16]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 713 "01110000" // /* MW 7 */ + 714 "10100101" // /* MW 6 */ + 715 "00000001" // /* MW 5 */ + 716 "00000000" // /* MW 4 */ + 717 "10110000" // /* MW 3 */ + 718 "01000110" // /* MW 2 */ + 719 "11111110" // /* MW 1 */ +.src_ref 2 "0_0.cc" 60 40 +.src_ref 2 "0_0.cc" 60 49 +.return_address + 720 "11100100" // MOVX r17, #1; MOV dj0, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 721 "01000001" // /* MW 5 */ + 722 "00001111" // /* MW 4 */ + 723 "10100001" // /* MW 3 */ + 724 "01000000" // /* MW 2 */ + 725 "00000100" // /* MW 1 */ +.src_ref 3 "tile_control.h" 147 12 +.src_ref 2 "0_0.cc" 60 40 first + 726 "10111010" // LDA r18, [p7, dj0]; MOVXM r20, #30656 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 727 "00010000" // /* MW 9 */ + 728 "11100000" // /* MW 8 */ + 729 "10001011" // /* MW 7 */ + 730 "00011110" // /* MW 6 */ + 731 "00000000" // /* MW 5 */ + 732 "00000000" // /* MW 4 */ + 733 "11010000" // /* MW 3 */ + 734 "01001010" // /* MW 2 */ + 735 "11100000" // /* MW 1 */ +.src_ref 3 "tile_control.h" 147 12 + 736 "01000100" // MOVXM r21, #30658 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 737 "10000100" // /* MW 5 */ + 738 "10101111" // /* MW 4 */ + 739 "01111010" // /* MW 3 */ + 740 "00000000" // /* MW 2 */ + 741 "00000000" // /* MW 1 */ +.src_ref 3 "tile_control.h" 147 12 +.src_ref 3 "tile_control.h" 260 15 + 742 "01000100" // MOVXM p0, #524288 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 743 "00000000" // /* MW 5 */ + 744 "11000000" // /* MW 4 */ + 745 "00000000" // /* MW 3 */ + 746 "00001000" // /* MW 2 */ + 747 "00000000" // /* MW 1 */ +.src_ref 3 "tile_control.h" 440 26 + 748 "01000100" // MOVXM r16, #7340035 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 749 "00000110" // /* MW 5 */ + 750 "00100000" // /* MW 4 */ + 751 "00001000" // /* MW 3 */ + 752 "01110000" // /* MW 2 */ + 753 "00000000" // /* MW 1 */ + 754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 755 "00000000" // /* MW 1 */ + 756 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 757 "00000000" // /* MW 1 */ + 758 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 759 "00000000" // /* MW 1 */ +.src_ref 2 "0_0.cc" 60 49 + 760 "10011000" // LSHL r17, r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 761 "00011101" // /* MW 3 */ + 762 "10100011" // /* MW 2 */ + 763 "00010100" // /* MW 1 */ +.src_ref 2 "0_0.cc" 61 20 first + 764 "01011000" // ADD.NC r18, r17, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 765 "10111001" // /* MW 3 */ + 766 "10011000" // /* MW 2 */ + 767 "00011100" // /* MW 1 */ +.src_ref 2 "0_0.cc" 63 43 first + 768 "10011000" // LSHL r19, r18, r12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 769 "11001101" // /* MW 3 */ + 770 "10100110" // /* MW 2 */ + 771 "00010100" // /* MW 1 */ +.src_ref 2 "0_0.cc" 63 43 + 772 "11111000" // MOV dj0, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 773 "10100000" // /* MW 3 */ + 774 "10001001" // /* MW 2 */ + 775 "00011000" // /* MW 1 */ +.src_ref 2 "0_0.cc" 63 43 + 776 "10011000" // LDA r22, [p7, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 777 "11010110" // /* MW 3 */ + 778 "00000010" // /* MW 2 */ + 779 "00000111" // /* MW 1 */ + 780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 781 "00000000" // /* MW 1 */ + 782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 783 "00000000" // /* MW 1 */ + 784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 785 "00000000" // /* MW 1 */ + 786 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 787 "00000000" // /* MW 1 */ + 788 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 789 "00000000" // /* MW 1 */ + 790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 791 "00000000" // /* MW 1 */ +.src_ref 3 "tile_control.h" 147 14 first + 792 "10011000" // LTU r27, r22, r12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 793 "11001100" // /* MW 3 */ + 794 "10110110" // /* MW 2 */ + 795 "00010101" // /* MW 1 */ +.src_ref 3 "tile_control.h" 147 12 + 796 "00011000" // SEL.EQZ r20, r21, r20, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 797 "01000010" // /* MW 3 */ + 798 "01101001" // /* MW 2 */ + 799 "00010101" // /* MW 1 */ +.src_ref 3 "tile_control.h" 147 12 + 800 "01011000" // ADD.NC r20, r22, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 801 "01010001" // /* MW 3 */ + 802 "00011011" // /* MW 2 */ + 803 "00011101" // /* MW 1 */ +.src_ref 3 "tile_control.h" 147 12 + 804 "10011000" // LSHL r20, r20, r12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 805 "11001101" // /* MW 3 */ + 806 "00101000" // /* MW 2 */ + 807 "00010101" // /* MW 1 */ +.src_ref 3 "tile_control.h" 147 12 +.src_ref 3 "tile_control.h" 260 15 + 808 "00000010" // NOPS; MOV dj0, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 809 "01110000" // /* MW 7 */ + 810 "00010000" // /* MW 6 */ + 811 "01000101" // /* MW 5 */ + 812 "00000000" // /* MW 4 */ + 813 "01100000" // /* MW 3 */ + 814 "00101011" // /* MW 2 */ + 815 "00000000" // /* MW 1 */ +.label TGT_F_main_592 +.src_ref 3 "tile_control.h" 147 12 +.src_ref 3 "tile_control.h" 260 15 first +.loop_nesting 4 + 816 "10011000" // LDA.TM r20, [p0, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 817 "10010011" // /* MW 3 */ + 818 "00000010" // /* MW 2 */ + 819 "00000000" // /* MW 1 */ + 820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 821 "00000000" // /* MW 1 */ + 822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 823 "00000000" // /* MW 1 */ + 824 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 825 "00000000" // /* MW 1 */ + 826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 827 "00000000" // /* MW 1 */ + 828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 829 "00000000" // /* MW 1 */ + 830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 831 "00000000" // /* MW 1 */ +.src_ref 3 "tile_control.h" 440 26 first + 832 "10011000" // AND r21, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 833 "00000100" // /* MW 3 */ + 834 "00101011" // /* MW 2 */ + 835 "00010101" // /* MW 1 */ +.src_ref 3 "tile_control.h" 440 8 +.src_ref 3 "tile_control.h" 440 61 + 836 "10000100" // JNZ r21, #816 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=816 delay_slots=5 */ + 837 "00000001" // /* MW 5 */ + 838 "01000000" // /* MW 4 */ + 839 "10011000" // /* MW 3 */ + 840 "00000001" // /* MW 2 */ + 841 "10101000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 843 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 845 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 846 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 847 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 849 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 851 "00000000" // /* MW 1 */ +.src_ref 2 "0_0.cc" 63 50 +.loop_nesting 3 + 852 "10011000" // ADD.NC r15, r18, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 853 "00000000" // /* MW 3 */ + 854 "11011001" // /* MW 2 */ + 855 "00011011" // /* MW 1 */ +.src_ref 2 "0_0.cc" 64 63 first + 856 "10011000" // LSHL r14, r15, r12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 857 "11001101" // /* MW 3 */ + 858 "11011100" // /* MW 2 */ + 859 "00010011" // /* MW 1 */ +.src_ref 2 "0_0.cc" 64 63 + 860 "11111000" // MOV dj1, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 861 "00100000" // /* MW 3 */ + 862 "10000111" // /* MW 2 */ + 863 "00011001" // /* MW 1 */ +.src_ref 2 "0_0.cc" 64 63 + 864 "10011000" // LDA r0, [p7, dj1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 865 "00010110" // /* MW 3 */ + 866 "00100000" // /* MW 2 */ + 867 "00000111" // /* MW 1 */ +.src_ref 2 "0_0.cc" 64 16 +.no_stack_arguments + 868 "00000100" // JL #1344 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=1344 delay_slots=5 */ + 869 "00000001" // /* MW 5 */ + 870 "00000000" // /* MW 4 */ + 871 "10100000" // /* MW 3 */ + 872 "00000010" // /* MW 2 */ + 873 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 874 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 875 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 876 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 877 "00000000" // /* MW 1 */ +.src_ref 2 "0_0.cc" 40 117 +.delay_slot + 878 "00011000" // ADD r13, r13, #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 879 "00100011" // /* MW 3 */ + 880 "01011010" // /* MW 2 */ + 881 "00010011" // /* MW 1 */ +.src_ref 2 "0_0.cc" 64 50 +.delay_slot + 882 "01011000" // ADD.NC p0, r19, r13 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 883 "10110101" // /* MW 3 */ + 884 "01101001" // /* MW 2 */ + 885 "00011000" // /* MW 1 */ +.delay_slot + 886 "01111010" // NOPA; ST r17, [sp, #-24]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 887 "00000000" // /* MW 9 */ + 888 "00000000" // /* MW 8 */ + 889 "00000000" // /* MW 7 */ + 890 "10000000" // /* MW 6 */ + 891 "00110101" // /* MW 5 */ + 892 "11101010" // /* MW 4 */ + 893 "11110111" // /* MW 3 */ + 894 "00101100" // /* MW 2 */ + 895 "00000000" // /* MW 1 */ +.src_ref 2 "0_0.cc" 65 40 +.src_ref 2 "0_0.cc" 65 49 +.src_ref 2 "0_0.cc" 65 53 +.src_ref 2 "0_0.cc" 70 32 +.return_address + 896 "10111010" // MOVA r14, #0; MOVX r16, #1; MOV dj0, r14 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 897 "01111000" // /* MW 9 */ + 898 "10010000" // /* MW 8 */ + 899 "01000011" // /* MW 7 */ + 900 "00101000" // /* MW 6 */ + 901 "00000000" // /* MW 5 */ + 902 "00000001" // /* MW 4 */ + 903 "00000000" // /* MW 3 */ + 904 "00001110" // /* MW 2 */ + 905 "00000000" // /* MW 1 */ +.src_ref 2 "0_0.cc" 40 98 +.src_ref 2 "0_0.cc" 65 40 first + 906 "00010100" // LDA r18, [p7, dj0]; ADD.NC r17, r13, #-4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 907 "11111100" // /* MW 5 */ + 908 "10101101" // /* MW 4 */ + 909 "11011000" // /* MW 3 */ + 910 "01001010" // /* MW 2 */ + 911 "11100000" // /* MW 1 */ + 912 "10011000" // ST r17, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 913 "00110101" // /* MW 3 */ + 914 "11101110" // /* MW 2 */ + 915 "00001111" // /* MW 1 */ + 916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 917 "00000000" // /* MW 1 */ + 918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 919 "00000000" // /* MW 1 */ + 920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 921 "00000000" // /* MW 1 */ + 922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 923 "00000000" // /* MW 1 */ + 924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 925 "00000000" // /* MW 1 */ +.src_ref 2 "0_0.cc" 65 49 + 926 "10011000" // LSHL r18, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 927 "00001101" // /* MW 3 */ + 928 "10100101" // /* MW 2 */ + 929 "00010100" // /* MW 1 */ +.src_ref 2 "0_0.cc" 65 53 + 930 "10011000" // OR r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 931 "00000101" // /* MW 3 */ + 932 "10100001" // /* MW 2 */ + 933 "00010100" // /* MW 1 */ +.src_ref 2 "0_0.cc" 66 20 first + 934 "01011000" // ADD.NC r18, r15, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 935 "11000001" // /* MW 3 */ + 936 "10010111" // /* MW 2 */ + 937 "00011100" // /* MW 1 */ +.src_ref 2 "0_0.cc" 67 46 +.src_ref 2 "0_0.cc" 67 46 first + 938 "00111010" // ST r16, [sp, #-32]; LSHL r12, r18, r12; MOV p0, p7 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 939 "01111001" // /* MW 9 */ + 940 "01100000" // /* MW 8 */ + 941 "00110111" // /* MW 7 */ + 942 "01101100" // /* MW 6 */ + 943 "11000110" // /* MW 5 */ + 944 "00100100" // /* MW 4 */ + 945 "10110000" // /* MW 3 */ + 946 "01000010" // /* MW 2 */ + 947 "11111100" // /* MW 1 */ +.src_ref 2 "0_0.cc" 67 46 +.src_ref 2 "0_0.cc" 67 46 + 948 "00111010" // ST r18, [sp, #-28]; ADD r16, r17, r12; MOV dj0, r12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 949 "01111001" // /* MW 9 */ + 950 "00010000" // /* MW 8 */ + 951 "01000011" // /* MW 7 */ + 952 "00000100" // /* MW 6 */ + 953 "00000110" // /* MW 5 */ + 954 "00100011" // /* MW 4 */ + 955 "10110000" // /* MW 3 */ + 956 "11001010" // /* MW 2 */ + 957 "11111100" // /* MW 1 */ +.src_ref 2 "0_0.cc" 67 16 +.src_ref 2 "0_0.cc" 67 46 + 958 "01110110" // LDA r16, [p0, dj0]; ST r16, [sp, #-36]; MOVXM p7, #992 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 959 "00010000" // /* MW 11 */ + 960 "11110000" // /* MW 10 */ + 961 "10110001" // /* MW 9 */ + 962 "00000011" // /* MW 8 */ + 963 "00000000" // /* MW 7 */ + 964 "10000000" // /* MW 6 */ + 965 "00010101" // /* MW 5 */ + 966 "11011110" // /* MW 4 */ + 967 "11010111" // /* MW 3 */ + 968 "01000010" // /* MW 2 */ + 969 "00000000" // /* MW 1 */ + 970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 971 "00000000" // /* MW 1 */ + 972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 973 "00000000" // /* MW 1 */ + 974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 975 "00000000" // /* MW 1 */ + 976 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 977 "00000000" // /* MW 1 */ + 978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 979 "00000000" // /* MW 1 */ + 980 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 981 "00000000" // /* MW 1 */ + 982 "10111010" // NOPA; NOPB; ADD.NC r15, r16, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 983 "11001110" // /* MW 9 */ + 984 "00111111" // /* MW 8 */ + 985 "11101100" // /* MW 7 */ + 986 "00000001" // /* MW 6 */ + 987 "00010000" // /* MW 5 */ + 988 "00000000" // /* MW 4 */ + 989 "11110000" // /* MW 3 */ + 990 "00101100" // /* MW 2 */ + 991 "00000000" // /* MW 1 */ +.label TGT_F_main_768 +.src_ref 2 "0_0.cc" 70 32 first +.loop_nesting 4 + 992 "10011000" // OR r16, r11, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 993 "11100101" // /* MW 3 */ + 994 "11100000" // /* MW 2 */ + 995 "00010010" // /* MW 1 */ +.src_ref 2 "0_0.cc" 70 32 + 996 "10000100" // JNZ r16, #1104 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=1104 delay_slots=5 */ + 997 "00000001" // /* MW 5 */ + 998 "01000000" // /* MW 4 */ + 999 "00101000" // /* MW 3 */ + 1000 "00000010" // /* MW 2 */ + 1001 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1003 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1005 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1006 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1007 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1008 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1009 "00000000" // /* MW 1 */ +.src_ref 2 "0_0.cc" 67 46 +.src_ref 2 "0_0.cc" 67 46 +.src_ref 2 "0_0.cc" 72 40 +.src_ref 2 "0_0.cc" 72 40 +.delay_slot + 1010 "00000010" // MOVS p0, r13; MOV dj1, r12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 1011 "01110000" // /* MW 7 */ + 1012 "00010000" // /* MW 6 */ + 1013 "11000011" // /* MW 5 */ + 1014 "00000000" // /* MW 4 */ + 1015 "01100000" // /* MW 3 */ + 1016 "10100001" // /* MW 2 */ + 1017 "00010001" // /* MW 1 */ +.src_ref 2 "0_0.cc" 70 47 + 1018 "00011000" // LDA p1, [sp, #-36] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1019 "10011001" // /* MW 3 */ + 1020 "11011100" // /* MW 2 */ + 1021 "00000111" // /* MW 1 */ + 1022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1023 "00000000" // /* MW 1 */ + 1024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1025 "00000000" // /* MW 1 */ + 1026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1027 "00000000" // /* MW 1 */ + 1028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1029 "00000000" // /* MW 1 */ + 1030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1031 "00000000" // /* MW 1 */ + 1032 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1033 "00000000" // /* MW 1 */ +.src_ref 2 "0_0.cc" 70 47 + 1034 "10011000" // LDA r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1035 "00010110" // /* MW 3 */ + 1036 "00000110" // /* MW 2 */ + 1037 "00000001" // /* MW 1 */ + 1038 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1039 "00000000" // /* MW 1 */ + 1040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1041 "00000000" // /* MW 1 */ + 1042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1043 "00000000" // /* MW 1 */ + 1044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1045 "00000000" // /* MW 1 */ + 1046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1047 "00000000" // /* MW 1 */ + 1048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1049 "00000000" // /* MW 1 */ +.src_ref 2 "0_0.cc" 70 23 +.src_ref 2 "0_0.cc" 70 47 + 1050 "10000100" // JZ r16, #1104 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=1104 delay_slots=5 */ + 1051 "00000001" // /* MW 5 */ + 1052 "00000000" // /* MW 4 */ + 1053 "00101000" // /* MW 3 */ + 1054 "00000010" // /* MW 2 */ + 1055 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1057 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1059 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1061 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1063 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1065 "00000000" // /* MW 1 */ +.swstall chess_separator_scheduler + 1066 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1067 "00000000" // /* MW 1 */ +.swstall chess_separator_scheduler + 1068 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1069 "00000000" // /* MW 1 */ +.swstall chess_separator_scheduler + 1070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1071 "00000000" // /* MW 1 */ +.swstall chess_separator_scheduler + 1072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1073 "00000000" // /* MW 1 */ +.swstall chess_separator_scheduler + 1074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1075 "00000000" // /* MW 1 */ +.swstall chess_separator_scheduler + 1076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1077 "00000000" // /* MW 1 */ +.src_ref 2 "0_0.cc" 71 24 first + 1078 "00011000" // DONE /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1079 "00000000" // /* MW 3 */ + 1080 "00001000" // /* MW 2 */ + 1081 "00010000" // /* MW 1 */ +.swstall chess_separator_scheduler + 1082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1083 "00000000" // /* MW 1 */ +.swstall chess_separator_scheduler + 1084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1085 "00000000" // /* MW 1 */ +.swstall chess_separator_scheduler + 1086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1087 "00000000" // /* MW 1 */ +.swstall chess_separator_scheduler + 1088 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1089 "00000000" // /* MW 1 */ +.swstall chess_separator_scheduler + 1090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1091 "00000000" // /* MW 1 */ +.swstall chess_separator_scheduler + 1092 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 1093 "10000001" // /* MW 11 */ + 1094 "10101101" // /* MW 10 */ + 1095 "00000000" // /* MW 9 */ + 1096 "00000000" // /* MW 8 */ + 1097 "00000000" // /* MW 7 */ + 1098 "00000000" // /* MW 6 */ + 1099 "00100000" // /* MW 5 */ + 1100 "00000000" // /* MW 4 */ + 1101 "11110000" // /* MW 3 */ + 1102 "00101100" // /* MW 2 */ + 1103 "00000000" // /* MW 1 */ +.label TGT_F_main_880 +.src_ref 2 "0_0.cc" 67 46 first +.src_ref 2 "0_0.cc" 72 20 +.src_ref 2 "0_0.cc" 72 40 first + 1104 "11010100" // LDA r0, [p0, dj1]; MOV p0, p6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 1105 "10000001" // /* MW 5 */ + 1106 "11011001" // /* MW 4 */ + 1107 "11010000" // /* MW 3 */ + 1108 "00000010" // /* MW 2 */ + 1109 "00000100" // /* MW 1 */ +.src_ref 2 "0_0.cc" 72 20 +.no_stack_arguments + 1110 "00000100" // JL #2352 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=2352 delay_slots=5 */ + 1111 "00000001" // /* MW 5 */ + 1112 "00000000" // /* MW 4 */ + 1113 "10011000" // /* MW 3 */ + 1114 "00000100" // /* MW 2 */ + 1115 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1117 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1119 "00000000" // /* MW 1 */ +.src_ref 2 "0_0.cc" 72 20 +.delay_slot + 1120 "11111000" // MOV r1, r10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1121 "00100000" // /* MW 3 */ + 1122 "01010101" // /* MW 2 */ + 1123 "00011000" // /* MW 1 */ +.src_ref 2 "0_0.cc" 72 20 +.delay_slot + 1124 "11111000" // MOV r2, r9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1125 "10100000" // /* MW 3 */ + 1126 "10010100" // /* MW 2 */ + 1127 "00011000" // /* MW 1 */ +.src_ref 2 "0_0.cc" 72 20 +.delay_slot + 1128 "00000010" // NOPS; MOV r3, r8 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 1129 "01110000" // /* MW 7 */ + 1130 "00010000" // /* MW 6 */ + 1131 "01101010" // /* MW 5 */ + 1132 "00000000" // /* MW 4 */ + 1133 "01100000" // /* MW 3 */ + 1134 "00101011" // /* MW 2 */ + 1135 "00000000" // /* MW 1 */ +.src_ref 2 "0_0.cc" 67 16 +.return_address + 1136 "00011000" // JNZD r15, r15, p7 /* MW 4 */ /* control_operation: words=4 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 1137 "11100000" // /* MW 3 */ + 1138 "11011111" // /* MW 2 */ + 1139 "00010011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1140 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1141 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1143 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1145 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1147 "00000000" // /* MW 1 */ +.src_ref 2 "0_0.cc" 67 76 +.delay_slot + 1148 "10011000" // ADD.NC r14, r14, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1149 "00000000" // /* MW 3 */ + 1150 "10010111" // /* MW 2 */ + 1151 "00011011" // /* MW 1 */ +.src_ref 3 "tile_control.h" 147 12 +.src_ref 3 "tile_control.h" 147 14 +.src_ref 2 "0_0.cc" 49 38 +.src_ref 2 "0_0.cc" 50 33 +.src_ref 2 "0_0.cc" 55 12 +.src_ref 2 "0_0.cc" 55 64 +.src_ref 2 "0_0.cc" 59 50 +.src_ref 2 "0_0.cc" 59 63 +.src_ref 2 "0_0.cc" 63 43 +.src_ref 2 "0_0.cc" 64 63 +.src_ref 2 "0_0.cc" 67 46 +.src_ref 2 "0_0.cc" 78 31 +.loop_nesting 3 + 1152 "10111010" // LDA r17, [sp, #-16]; MOVX r12, #2; ADD.NC r11, r11, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 1153 "01001000" // /* MW 9 */ + 1154 "11000000" // /* MW 8 */ + 1155 "01101010" // /* MW 7 */ + 1156 "01001001" // /* MW 6 */ + 1157 "11000000" // /* MW 5 */ + 1158 "00000000" // /* MW 4 */ + 1159 "00100000" // /* MW 3 */ + 1160 "01000110" // /* MW 2 */ + 1161 "11111110" // /* MW 1 */ +.src_ref 2 "0_0.cc" 55 12 + 1162 "10111010" // LDA r18, [sp, #-20]; MOVXM p7, #672 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 1163 "00010000" // /* MW 9 */ + 1164 "01010000" // /* MW 8 */ + 1165 "10110001" // /* MW 7 */ + 1166 "00000011" // /* MW 6 */ + 1167 "00000000" // /* MW 5 */ + 1168 "00000000" // /* MW 4 */ + 1169 "00100000" // /* MW 3 */ + 1170 "11001010" // /* MW 2 */ + 1171 "11111101" // /* MW 1 */ +.src_ref 2 "0_0.cc" 74 20 first + 1172 "00011000" // LDA r19, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1173 "01110001" // /* MW 3 */ + 1174 "11100110" // /* MW 2 */ + 1175 "00000111" // /* MW 1 */ +.src_ref 2 "0_0.cc" 75 43 + 1176 "00011000" // LDA r16, [sp, #-32] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1177 "00010001" // /* MW 3 */ + 1178 "11100010" // /* MW 2 */ + 1179 "00000111" // /* MW 1 */ +.src_ref 2 "0_0.cc" 75 43 + 1180 "00011000" // LDA r20, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1181 "10010001" // /* MW 3 */ + 1182 "11101010" // /* MW 2 */ + 1183 "00000111" // /* MW 1 */ + 1184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1185 "00000000" // /* MW 1 */ + 1186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1187 "00000000" // /* MW 1 */ +.src_ref 2 "0_0.cc" 55 12 first + 1188 "00011000" // JNZD r17, r17, p7 /* MW 4 */ /* control_operation: words=4 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 1189 "11100000" // /* MW 3 */ + 1190 "01100011" // /* MW 2 */ + 1191 "00010100" // /* MW 1 */ +.delay_slot + 1192 "00011000" // ADD r13, r18, #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1193 "11110011" // /* MW 3 */ + 1194 "10011011" // /* MW 2 */ + 1195 "00010100" // /* MW 1 */ +.src_ref 2 "0_0.cc" 74 20 +.delay_slot + 1196 "10011000" // ADD.NC r18, r19, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1197 "10000001" // /* MW 3 */ + 1198 "10011001" // /* MW 2 */ + 1199 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1201 "00000000" // /* MW 1 */ +.src_ref 2 "0_0.cc" 75 43 first +.delay_slot + 1202 "01011000" // ADD.NC r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1203 "01000001" // /* MW 3 */ + 1204 "00011010" // /* MW 2 */ + 1205 "00011100" // /* MW 1 */ +.src_ref 2 "0_0.cc" 75 60 +.delay_slot + 1206 "10011000" // ADD.NC r16, r16, #5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1207 "00000010" // /* MW 3 */ + 1208 "00011000" // /* MW 2 */ + 1209 "00011100" // /* MW 1 */ +.src_ref 2 "0_0.cc" 78 31 +.src_ref 2 "0_0.cc" 78 31 first +.src_ref 2 "0_0.cc" 79 21 +.src_ref 2 "0_0.cc" 79 21 +.src_ref 2 "0_0.cc" 80 27 +.src_ref 2 "0_0.cc" 80 27 +.loop_nesting 2 + 1210 "01110110" // MOVA r18, #62; MOVS p7, r13; LSHL r16, r18, r12; MOV r20, #63 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 1211 "01011000" // /* MW 11 */ + 1212 "00111111" // /* MW 10 */ + 1213 "10001000" // /* MW 9 */ + 1214 "01101110" // /* MW 8 */ + 1215 "00000110" // /* MW 7 */ + 1216 "00100101" // /* MW 6 */ + 1217 "00001011" // /* MW 5 */ + 1218 "10001101" // /* MW 4 */ + 1219 "00000111" // /* MW 3 */ + 1220 "11010010" // /* MW 2 */ + 1221 "00000111" // /* MW 1 */ +.src_ref 2 "0_0.cc" 78 31 +.src_ref 2 "0_0.cc" 79 12 +.src_ref 2 "0_0.cc" 80 12 + 1222 "10111010" // MOVA r16, #1; MOVX r22, #-1; MOV dj0, r16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 1223 "01111000" // /* MW 9 */ + 1224 "00010000" // /* MW 8 */ + 1225 "01000100" // /* MW 7 */ + 1226 "11101000" // /* MW 6 */ + 1227 "01100111" // /* MW 5 */ + 1228 "00111111" // /* MW 4 */ + 1229 "00000000" // /* MW 3 */ + 1230 "00110000" // /* MW 2 */ + 1231 "00000000" // /* MW 1 */ +.src_ref 2 "0_0.cc" 78 31 +.src_ref 2 "0_0.cc" 79 25 +.src_ref 2 "0_0.cc" 81 19 + 1232 "10111010" // LDA r17, [p7, dj0]; MOVXM r19, #504448 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 1233 "00010000" // /* MW 9 */ + 1234 "01000000" // /* MW 8 */ + 1235 "01101001" // /* MW 7 */ + 1236 "11101110" // /* MW 6 */ + 1237 "00000001" // /* MW 5 */ + 1238 "00000000" // /* MW 4 */ + 1239 "11010000" // /* MW 3 */ + 1240 "01000110" // /* MW 2 */ + 1241 "11100000" // /* MW 1 */ +.src_ref 4 "io_buffer_compiler.h" 564 18 +.src_ref 4 "io_buffer_compiler.h" 565 24 +.src_ref 4 "io_buffer_compiler.h" 572 18 +.src_ref 2 "0_0.cc" 79 25 first + 1242 "01100100" // EQ r27, r19, r13; MOV r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 1243 "00000001" // /* MW 5 */ + 1244 "00100000" // /* MW 4 */ + 1245 "11111100" // /* MW 3 */ + 1246 "11011010" // /* MW 2 */ + 1247 "10011110" // /* MW 1 */ +.src_ref 2 "0_0.cc" 79 21 + 1248 "00011000" // SEL.EQZ r23, r20, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1249 "00100010" // /* MW 3 */ + 1250 "00101111" // /* MW 2 */ + 1251 "00010101" // /* MW 1 */ +.src_ref 2 "0_0.cc" 81 19 + 1252 "01000100" // MOVXM r21, #505472 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 1253 "00000000" // /* MW 5 */ + 1254 "10101101" // /* MW 4 */ + 1255 "10111010" // /* MW 3 */ + 1256 "00000111" // /* MW 2 */ + 1257 "00000000" // /* MW 1 */ + 1258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1259 "00000000" // /* MW 1 */ + 1260 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1261 "00000000" // /* MW 1 */ + 1262 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1263 "00000000" // /* MW 1 */ +.src_ref 2 "0_0.cc" 78 31 first + 1264 "00011000" // EQZ r26, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1265 "11010000" // /* MW 3 */ + 1266 "01110100" // /* MW 2 */ + 1267 "00010100" // /* MW 1 */ +.src_ref 2 "0_0.cc" 79 12 first + 1268 "00011000" // REL r23, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1269 "01101000" // /* MW 3 */ + 1270 "11010001" // /* MW 2 */ + 1271 "00010101" // /* MW 1 */ +.src_ref 2 "0_0.cc" 80 27 first + 1272 "00011000" // SEL.EQZ r18, r18, r20, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1273 "01000010" // /* MW 3 */ + 1274 "10100101" // /* MW 2 */ + 1275 "00010100" // /* MW 1 */ + 1276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1277 "00000000" // /* MW 1 */ + 1278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1279 "00000000" // /* MW 1 */ +.src_ref 2 "0_0.cc" 80 12 + 1280 "00011000" // ACQ.COND r18, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1281 "00001000" // /* MW 3 */ + 1282 "10010111" // /* MW 2 */ + 1283 "00010100" // /* MW 1 */ + 1284 "10000100" // JZ r17, #416 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=416 delay_slots=5 */ + 1285 "00000001" // /* MW 5 */ + 1286 "00000000" // /* MW 4 */ + 1287 "11010000" // /* MW 3 */ + 1288 "00000000" // /* MW 2 */ + 1289 "10001000" // /* MW 1 */ +.src_ref 2 "0_0.cc" 81 19 first +.delay_slot + 1290 "00011000" // SEL.EQZ r16, r19, r21, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1291 "01010010" // /* MW 3 */ + 1292 "11100001" // /* MW 2 */ + 1293 "00010100" // /* MW 1 */ +.src_ref 2 "0_0.cc" 29 31 +.delay_slot + 1294 "11111000" // MOV p7, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1295 "00100000" // /* MW 3 */ + 1296 "01101000" // /* MW 2 */ + 1297 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1298 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1299 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1300 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1301 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1303 "00000000" // /* MW 1 */ +.src_ref 3 "tile_control.h" 278 68 +.src_ref 3 "tile_control.h" 278 68 +.src_ref 3 "tile_control.h" 278 68 +.src_ref 4 "io_buffer_compiler.h" 566 27 +.src_ref 4 "io_buffer_compiler.h" 567 18 +.src_ref 2 "0_0.cc" 19 8 +.loop_nesting 1 + 1304 "10111010" // LDA p0, [sp, #-8]; MOVX r25, #0; MOV r16, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 1305 "01011000" // /* MW 9 */ + 1306 "00000001" // /* MW 8 */ + 1307 "00001000" // /* MW 7 */ + 1308 "00001010" // /* MW 6 */ + 1309 "10010000" // /* MW 5 */ + 1310 "00000001" // /* MW 4 */ + 1311 "00100000" // /* MW 3 */ + 1312 "00000011" // /* MW 2 */ + 1313 "11111111" // /* MW 1 */ + 1314 "10000100" // J #272 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=272 delay_slots=5 */ + 1315 "00000000" // /* MW 5 */ + 1316 "00000000" // /* MW 4 */ + 1317 "10001000" // /* MW 3 */ + 1318 "00000000" // /* MW 2 */ + 1319 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1321 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1322 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1323 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1325 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1327 "00000000" // /* MW 1 */ +.src_ref 4 "io_buffer_compiler.h" 564 18 +.delay_slot + 1328 "00011000" // LDA p1, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1329 "10011001" // /* MW 3 */ + 1330 "11110100" // /* MW 2 */ +.label _main__end +.label _main___func_end0 + 1331 "00000111" // /* MW 1 */ +.label __ZN3adf11block_writeEPKNS_7reg_valEj___func_begin0 +.label _ZN3adf11block_writeEPKNS_7reg_valEj +.function block_write _ZN3adf11block_writeEPKNS_7reg_valEj +.src_ref 3 "tile_control.h" 288 first +.src_ref 3 "tile_control.h" 292 8 +.src_ref 3 "tile_control.h" 292 25 +.function_start + 1344 "10000100" // JZ r0, #1504 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=1504 delay_slots=5 */ + 1345 "00000001" // /* MW 5 */ + 1346 "00000000" // /* MW 4 */ + 1347 "11110000" // /* MW 3 */ + 1348 "00000010" // /* MW 2 */ + 1349 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1350 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1351 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1352 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1353 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1354 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1355 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1357 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1359 "00000000" // /* MW 1 */ +.src_ref 3 "tile_control.h" 278 34 +.src_ref 3 "tile_control.h" 292 8 first + 1360 "11100100" // MOVX r0, #-4; MOV lc, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 1361 "01000001" // /* MW 5 */ + 1362 "11100000" // /* MW 4 */ + 1363 "00101010" // /* MW 3 */ + 1364 "00011110" // /* MW 2 */ + 1365 "11111000" // /* MW 1 */ +.src_ref 3 "tile_control.h" 292 8 + 1366 "01000100" // MOVXM ls, #1392 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 1367 "11100000" // /* MW 5 */ + 1368 "11101010" // /* MW 4 */ + 1369 "00000001" // /* MW 3 */ + 1370 "00000000" // /* MW 2 */ + 1371 "00000000" // /* MW 1 */ +.src_ref 3 "tile_control.h" 292 8 + 1372 "01000100" // MOVXM le, #1488 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 1373 "10100000" // /* MW 5 */ + 1374 "11101011" // /* MW 4 */ + 1375 "00000110" // /* MW 3 */ + 1376 "00000000" // /* MW 2 */ + 1377 "00000000" // /* MW 1 */ +.src_ref 3 "tile_control.h" 278 34 +.src_ref 3 "tile_control.h" 278 68 + 1378 "01111110" // NOPA; NOPB; NOPS; MOVXM p1, #524288 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 1379 "01100000" // /* MW 13 */ + 1380 "00101011" // /* MW 12 */ + 1381 "00000000" // /* MW 11 */ + 1382 "00000010" // /* MW 10 */ + 1383 "00000000" // /* MW 9 */ + 1384 "00010110" // /* MW 8 */ + 1385 "01000000" // /* MW 7 */ + 1386 "00000000" // /* MW 6 */ + 1387 "00100000" // /* MW 5 */ + 1388 "00000000" // /* MW 4 */ + 1389 "11110000" // /* MW 3 */ + 1390 "00101100" // /* MW 2 */ + 1391 "00000000" // /* MW 1 */ +.label ZLS_F_ZN3adf11block_writeEPKNS_7reg_valEj_48 +.src_ref 3 "tile_control.h" 292 44 +.begin_of_loop +.loop_nesting 1 + 1392 "10011000" // LDA r2, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1393 "01010110" // /* MW 3 */ + 1394 "00011100" // /* MW 2 */ + 1395 "00000000" // /* MW 1 */ +.src_ref 3 "tile_control.h" 292 44 + 1396 "10011000" // LDA r1, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1397 "00110110" // /* MW 3 */ + 1398 "00011100" // /* MW 2 */ + 1399 "00000000" // /* MW 1 */ + 1400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1401 "00000000" // /* MW 1 */ + 1402 "00111100" // NOPA; NOPB /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 1403 "00100000" // /* MW 5 */ + 1404 "00000000" // /* MW 4 */ + 1405 "11110000" // /* MW 3 */ + 1406 "00101100" // /* MW 2 */ + 1407 "00000000" // /* MW 1 */ + 1408 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 1409 "00000000" // /* MW 15 */ + 1410 "00000000" // /* MW 14 */ + 1411 "01111000" // /* MW 13 */ + 1412 "10100101" // /* MW 12 */ + 1413 "00000001" // /* MW 11 */ + 1414 "00000000" // /* MW 10 */ + 1415 "00000000" // /* MW 9 */ + 1416 "00000000" // /* MW 8 */ + 1417 "01011011" // /* MW 7 */ + 1418 "00000001" // /* MW 6 */ + 1419 "00100000" // /* MW 5 */ + 1420 "00000000" // /* MW 4 */ + 1421 "11110000" // /* MW 3 */ + 1422 "00101100" // /* MW 2 */ + 1423 "00000000" // /* MW 1 */ + 1424 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 1425 "00000000" // /* MW 15 */ + 1426 "00000000" // /* MW 14 */ + 1427 "01111000" // /* MW 13 */ + 1428 "10100101" // /* MW 12 */ + 1429 "00000001" // /* MW 11 */ + 1430 "00000000" // /* MW 10 */ + 1431 "00000000" // /* MW 9 */ + 1432 "00000000" // /* MW 8 */ + 1433 "01011011" // /* MW 7 */ + 1434 "00000001" // /* MW 6 */ + 1435 "00100000" // /* MW 5 */ + 1436 "00000000" // /* MW 4 */ + 1437 "11110000" // /* MW 3 */ + 1438 "00101100" // /* MW 2 */ + 1439 "00000000" // /* MW 1 */ + 1440 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 1441 "00000000" // /* MW 15 */ + 1442 "00000000" // /* MW 14 */ + 1443 "01111000" // /* MW 13 */ + 1444 "10100101" // /* MW 12 */ + 1445 "00000001" // /* MW 11 */ + 1446 "00000000" // /* MW 10 */ + 1447 "00000000" // /* MW 9 */ + 1448 "00000000" // /* MW 8 */ + 1449 "01011011" // /* MW 7 */ + 1450 "00000001" // /* MW 6 */ + 1451 "00100000" // /* MW 5 */ + 1452 "00000000" // /* MW 4 */ + 1453 "11110000" // /* MW 3 */ + 1454 "00101100" // /* MW 2 */ + 1455 "00000000" // /* MW 1 */ +.src_ref 3 "tile_control.h" 278 34 first + 1456 "11100001" // NOPA; NOPB; NOPS; AND r3, r2, r0; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 1457 "00000000" // /* MW 15 */ + 1458 "00000000" // /* MW 14 */ + 1459 "01111000" // /* MW 13 */ + 1460 "10100101" // /* MW 12 */ + 1461 "00000001" // /* MW 11 */ + 1462 "00100100" // /* MW 10 */ + 1463 "00110000" // /* MW 9 */ + 1464 "00000100" // /* MW 8 */ + 1465 "01011011" // /* MW 7 */ + 1466 "00000001" // /* MW 6 */ + 1467 "00100000" // /* MW 5 */ + 1468 "00000000" // /* MW 4 */ + 1469 "11110000" // /* MW 3 */ + 1470 "00101100" // /* MW 2 */ + 1471 "00000000" // /* MW 1 */ +.src_ref 3 "tile_control.h" 278 34 +.src_ref 3 "tile_control.h" 278 68 + 1472 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV dj0, r3; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 1473 "00000000" // /* MW 15 */ + 1474 "00000000" // /* MW 14 */ + 1475 "01111000" // /* MW 13 */ + 1476 "11010000" // /* MW 12 */ + 1477 "01000000" // /* MW 11 */ + 1478 "00000000" // /* MW 10 */ + 1479 "00000000" // /* MW 9 */ + 1480 "00000000" // /* MW 8 */ + 1481 "01011011" // /* MW 7 */ + 1482 "00000001" // /* MW 6 */ + 1483 "00100000" // /* MW 5 */ + 1484 "00000000" // /* MW 4 */ + 1485 "11110000" // /* MW 3 */ + 1486 "00101100" // /* MW 2 */ + 1487 "00000000" // /* MW 1 */ +.label ZLE_F_ZN3adf11block_writeEPKNS_7reg_valEj_144 +.src_ref 3 "tile_control.h" 278 34 +.src_ref 3 "tile_control.h" 278 68 +.end_of_loop + 1488 "11100001" // NOPA; NOPB; ST.TM r1, [p1, dj0]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 1489 "00000000" // /* MW 15 */ + 1490 "00000000" // /* MW 14 */ + 1491 "01111000" // /* MW 13 */ + 1492 "10100101" // /* MW 12 */ + 1493 "00000001" // /* MW 11 */ + 1494 "00000000" // /* MW 10 */ + 1495 "00000000" // /* MW 9 */ + 1496 "10000000" // /* MW 8 */ + 1497 "00111110" // /* MW 7 */ + 1498 "00000000" // /* MW 6 */ + 1499 "00100001" // /* MW 5 */ + 1500 "00000000" // /* MW 4 */ + 1501 "11110000" // /* MW 3 */ + 1502 "00101100" // /* MW 2 */ + 1503 "00000000" // /* MW 1 */ +.label TGT_F_ZN3adf11block_writeEPKNS_7reg_valEj_160 +.src_ref 3 "tile_control.h" 293 4 first +.loop_nesting 0 + 1504 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 1505 "00000000" // /* MW 3 */ + 1506 "00101000" // /* MW 2 */ + 1507 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1508 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1509 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1510 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1511 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1512 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1513 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1515 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN3adf11block_writeEPKNS_7reg_valEj__end +.label __ZN3adf11block_writeEPKNS_7reg_valEj___func_end0 + 1517 "00000000" // /* MW 1 */ +.label _fini +.function _fini _fini +.src_ref 0 "me_basic.c" 73 4 first +.src_ref 0 "me_basic.c" 73 9 +.function_start + 1520 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 1521 "00000001" // /* MW 5 */ + 1522 "00000000" // /* MW 4 */ + 1523 "00000000" // /* MW 3 */ + 1524 "00001000" // /* MW 2 */ + 1525 "00000000" // /* MW 1 */ +.src_ref 0 "me_basic.c" 75 8 +.src_ref 0 "me_basic.c" 76 13 + 1526 "00111010" // ST r14, [sp, #-12]; MOVXM r16, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 1527 "00010001" // /* MW 9 */ + 1528 "00000000" // /* MW 8 */ + 1529 "00001000" // /* MW 7 */ + 1530 "00000010" // /* MW 6 */ + 1531 "00000000" // /* MW 5 */ + 1532 "00000000" // /* MW 4 */ + 1533 "10110000" // /* MW 3 */ + 1534 "10111010" // /* MW 2 */ + 1535 "11111110" // /* MW 1 */ +.src_ref 0 "me_basic.c" 75 8 +.src_ref 0 "me_basic.c" 75 41 + 1536 "00111010" // ST p7, [sp, #-8]; MOVXM r14, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 1537 "00010001" // /* MW 9 */ + 1538 "00000000" // /* MW 8 */ + 1539 "11001000" // /* MW 7 */ + 1540 "00000001" // /* MW 6 */ + 1541 "00000000" // /* MW 5 */ + 1542 "00000000" // /* MW 4 */ + 1543 "10110000" // /* MW 3 */ + 1544 "01110011" // /* MW 2 */ + 1545 "11111111" // /* MW 1 */ +.src_ref 0 "me_basic.c" 75 8 first +.src_ref 0 "me_basic.c" 76 13 + 1546 "11100100" // EQ r16, r14, r16; MOV p7, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 1547 "01000001" // /* MW 5 */ + 1548 "11010000" // /* MW 4 */ + 1549 "11111110" // /* MW 3 */ + 1550 "00100000" // /* MW 2 */ + 1551 "01110100" // /* MW 1 */ +.src_ref 0 "me_basic.c" 75 8 + 1552 "10000100" // JNZ r16, #1648 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=1648 delay_slots=5 */ + 1553 "00000001" // /* MW 5 */ + 1554 "01000000" // /* MW 4 */ + 1555 "00111000" // /* MW 3 */ + 1556 "00000011" // /* MW 2 */ + 1557 "10000000" // /* MW 1 */ +.delay_slot + 1558 "10011000" // ST r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1559 "11110101" // /* MW 3 */ + 1560 "11111101" // /* MW 2 */ + 1561 "00001111" // /* MW 1 */ +.delay_slot + 1562 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1563 "00111101" // /* MW 3 */ + 1564 "11110000" // /* MW 2 */ + 1565 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1567 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1569 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1570 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 1571 "00011100" // /* MW 13 */ + 1572 "00000000" // /* MW 12 */ + 1573 "00000000" // /* MW 11 */ + 1574 "01010111" // /* MW 10 */ + 1575 "00011010" // /* MW 9 */ + 1576 "01000000" // /* MW 8 */ + 1577 "00000000" // /* MW 7 */ + 1578 "00000000" // /* MW 6 */ + 1579 "10110110" // /* MW 5 */ + 1580 "00000010" // /* MW 4 */ + 1581 "11110000" // /* MW 3 */ + 1582 "00101100" // /* MW 2 */ + 1583 "00000000" // /* MW 1 */ +.label TGT_F_fini_64 +.src_ref 0 "me_basic.c" 76 13 first +.loop_nesting 1 + 1584 "10011000" // LDA p0, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1585 "00011110" // /* MW 3 */ + 1586 "00011100" // /* MW 2 */ + 1587 "00000111" // /* MW 1 */ + 1588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1589 "00000000" // /* MW 1 */ + 1590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1591 "00000000" // /* MW 1 */ + 1592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1593 "00000000" // /* MW 1 */ + 1594 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1595 "00000000" // /* MW 1 */ + 1596 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1597 "00000000" // /* MW 1 */ + 1598 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1599 "00000000" // /* MW 1 */ +.src_ref 0 "me_basic.c" 76 16 +.no_stack_arguments + 1600 "00011000" // JL p0 /* MW 4 */ /* control_operation: words=4 call unconditional cycles_taken=1 indirect absolute delay_slots=5 */ + 1601 "00000000" // /* MW 3 */ + 1602 "00110000" // /* MW 2 */ + 1603 "00010000" // /* MW 1 */ +.src_ref 0 "me_basic.c" 75 41 +.delay_slot + 1604 "11111000" // MOV r15, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1605 "11000000" // /* MW 3 */ + 1606 "11011110" // /* MW 2 */ + 1607 "00011011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1608 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1609 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1611 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1612 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1613 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1614 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1615 "00000000" // /* MW 1 */ +.src_ref 0 "me_basic.c" 75 41 first +.return_address + 1616 "10011000" // NE r16, r15, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1617 "11101000" // /* MW 3 */ + 1618 "11100000" // /* MW 2 */ + 1619 "00010011" // /* MW 1 */ +.src_ref 0 "me_basic.c" 75 8 + 1620 "10000100" // JNZ r16, #1584 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=1584 delay_slots=5 */ + 1621 "00000001" // /* MW 5 */ + 1622 "01000000" // /* MW 4 */ + 1623 "00011000" // /* MW 3 */ + 1624 "00000011" // /* MW 2 */ + 1625 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1627 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1628 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1629 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1630 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1631 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1633 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1634 "00101110" // NOPA; NOPS; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 1635 "00011100" // /* MW 13 */ + 1636 "00000000" // /* MW 12 */ + 1637 "00000000" // /* MW 11 */ + 1638 "01010111" // /* MW 10 */ + 1639 "00011010" // /* MW 9 */ + 1640 "01000000" // /* MW 8 */ + 1641 "00000000" // /* MW 7 */ + 1642 "00000000" // /* MW 6 */ + 1643 "10110110" // /* MW 5 */ + 1644 "00000010" // /* MW 4 */ + 1645 "11110000" // /* MW 3 */ + 1646 "00101100" // /* MW 2 */ + 1647 "00000000" // /* MW 1 */ +.label TGT_F_fini_128 +.src_ref 0 "me_basic.c" 77 4 +.loop_nesting 0 + 1648 "00011000" // LDA lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1649 "00111001" // /* MW 3 */ + 1650 "11110000" // /* MW 2 */ + 1651 "00000111" // /* MW 1 */ + 1652 "00011000" // LDA r14, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1653 "11010001" // /* MW 3 */ + 1654 "11110101" // /* MW 2 */ + 1655 "00000111" // /* MW 1 */ + 1656 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1657 "11110001" // /* MW 3 */ + 1658 "11111101" // /* MW 2 */ + 1659 "00000111" // /* MW 1 */ + 1660 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1661 "10011001" // /* MW 3 */ + 1662 "11111011" // /* MW 2 */ + 1663 "00000111" // /* MW 1 */ + 1664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1665 "00000000" // /* MW 1 */ + 1666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1667 "00000000" // /* MW 1 */ + 1668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1669 "00000000" // /* MW 1 */ +.src_ref 0 "me_basic.c" 77 4 first + 1670 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 1671 "00000000" // /* MW 3 */ + 1672 "00101000" // /* MW 2 */ + 1673 "00010000" // /* MW 1 */ +.src_ref 0 "me_basic.c" 77 4 +.delay_slot + 1674 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 1675 "00000001" // /* MW 5 */ + 1676 "00000000" // /* MW 4 */ + 1677 "00000000" // /* MW 3 */ + 1678 "11111000" // /* MW 2 */ + 1679 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1681 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1683 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1685 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _fini__end + 1687 "00000000" // /* MW 1 */ +.label __cxa_finalize +.function __cxa_finalize __cxa_finalize +.src_ref 5 "atexit.c" 47 first +.src_ref 5 "atexit.c" 47 5 +.src_ref 5 "atexit.c" 55 15 +.function_start + 1696 "10111010" // MOVA r1, #-3; PADDXM [sp], #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 1697 "01110000" // /* MW 9 */ + 1698 "00000000" // /* MW 8 */ + 1699 "00000000" // /* MW 7 */ + 1700 "00000000" // /* MW 6 */ + 1701 "00000010" // /* MW 5 */ + 1702 "00000000" // /* MW 4 */ + 1703 "00000000" // /* MW 3 */ + 1704 "10100001" // /* MW 2 */ + 1705 "11111111" // /* MW 1 */ +.src_ref 5 "atexit.c" 53 4 + 1706 "00111010" // ST lr, [sp, #-40]; MOVX r4, #8; MOV r3, packSign1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 1707 "01111001" // /* MW 9 */ + 1708 "11100000" // /* MW 8 */ + 1709 "01101101" // /* MW 7 */ + 1710 "00001000" // /* MW 6 */ + 1711 "01000001" // /* MW 5 */ + 1712 "00000000" // /* MW 4 */ + 1713 "10110000" // /* MW 3 */ + 1714 "00000111" // /* MW 2 */ + 1715 "11111011" // /* MW 1 */ + 1716 "00000010" // ST r3, [sp, #-44]; MOV r3, packSign0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 1717 "01110000" // /* MW 7 */ + 1718 "11100000" // /* MW 6 */ + 1719 "01101001" // /* MW 5 */ + 1720 "00000000" // /* MW 4 */ + 1721 "10110000" // /* MW 3 */ + 1722 "10001110" // /* MW 2 */ + 1723 "11111010" // /* MW 1 */ + 1724 "00000010" // ST r3, [sp, #-32]; MOV r3, unpackSign1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 1725 "01110000" // /* MW 7 */ + 1726 "00110000" // /* MW 6 */ + 1727 "01101110" // /* MW 5 */ + 1728 "00000000" // /* MW 4 */ + 1729 "10110000" // /* MW 3 */ + 1730 "00001110" // /* MW 2 */ + 1731 "11111100" // /* MW 1 */ + 1732 "00000010" // ST r3, [sp, #-28]; MOV r3, unpackSign0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 1733 "01110000" // /* MW 7 */ + 1734 "00110000" // /* MW 6 */ + 1735 "01101010" // /* MW 5 */ + 1736 "00000000" // /* MW 4 */ + 1737 "10110000" // /* MW 3 */ + 1738 "10001110" // /* MW 2 */ + 1739 "11111100" // /* MW 1 */ +.src_ref 5 "atexit.c" 52 14 +.src_ref 5 "atexit.c" 53 4 + 1740 "00111010" // ST r3, [sp, #-20]; MOVXM p0, #508960 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 1741 "00010001" // /* MW 9 */ + 1742 "00010000" // /* MW 8 */ + 1743 "00110010" // /* MW 7 */ + 1744 "11110000" // /* MW 6 */ + 1745 "00000001" // /* MW 5 */ + 1746 "00000000" // /* MW 4 */ + 1747 "10110000" // /* MW 3 */ + 1748 "10001110" // /* MW 2 */ + 1749 "11111101" // /* MW 1 */ +.src_ref 5 "atexit.c" 52 14 first + 1750 "11010100" // LDA r2, [p0]; MOV r3, crSRSMode /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 1751 "11000001" // /* MW 5 */ + 1752 "10110001" // /* MW 4 */ + 1753 "11010001" // /* MW 3 */ + 1754 "10001010" // /* MW 2 */ + 1755 "00000000" // /* MW 1 */ + 1756 "00000010" // ST r3, [sp, #-16]; MOV r3, crPackSize /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 1757 "01110000" // /* MW 7 */ + 1758 "10110000" // /* MW 6 */ + 1759 "01101011" // /* MW 5 */ + 1760 "00000000" // /* MW 4 */ + 1761 "10110000" // /* MW 3 */ + 1762 "00001110" // /* MW 2 */ + 1763 "11111110" // /* MW 1 */ + 1764 "00000010" // ST r3, [sp, #-12]; MOV r3, crSat /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 1765 "01110000" // /* MW 7 */ + 1766 "01110000" // /* MW 6 */ + 1767 "01101010" // /* MW 5 */ + 1768 "00000000" // /* MW 4 */ + 1769 "10110000" // /* MW 3 */ + 1770 "10001110" // /* MW 2 */ + 1771 "11111110" // /* MW 1 */ +.src_ref 5 "atexit.c" 53 4 first + 1772 "00000010" // ST r4, [p0]; MOV r9, upsSign1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 1773 "01110000" // /* MW 7 */ + 1774 "00110000" // /* MW 6 */ + 1775 "00101100" // /* MW 5 */ + 1776 "00000001" // /* MW 4 */ + 1777 "00110000" // /* MW 3 */ + 1778 "10010010" // /* MW 2 */ + 1779 "00000000" // /* MW 1 */ + 1780 "00000010" // ST r3, [sp, #-8]; MOV r8, upsSign0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 1781 "01110000" // /* MW 7 */ + 1782 "00110000" // /* MW 6 */ + 1783 "00001000" // /* MW 5 */ + 1784 "00000001" // /* MW 4 */ + 1785 "10110000" // /* MW 3 */ + 1786 "00001110" // /* MW 2 */ + 1787 "11111111" // /* MW 1 */ + 1788 "11111000" // MOV r11, vaddSign1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1789 "01100000" // /* MW 3 */ + 1790 "11011010" // /* MW 2 */ + 1791 "00011010" // /* MW 1 */ + 1792 "11111000" // MOV r10, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1793 "01100000" // /* MW 3 */ + 1794 "10010010" // /* MW 2 */ + 1795 "00011010" // /* MW 1 */ +.src_ref 5 "atexit.c" 54 8 first + 1796 "11100100" // ADD r2, r2, #-8; MOV r12, srsSign1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 1797 "10000001" // /* MW 5 */ + 1798 "00111111" // /* MW 4 */ + 1799 "01100110" // /* MW 3 */ + 1800 "10111100" // /* MW 2 */ + 1801 "00010000" // /* MW 1 */ +.src_ref 5 "atexit.c" 55 15 first + 1802 "11100100" // ASHL r13, r2, r1; MOV r1, crUnpackSize /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 1803 "11000001" // /* MW 5 */ + 1804 "10100101" // /* MW 4 */ + 1805 "11010000" // /* MW 3 */ + 1806 "01000011" // /* MW 2 */ + 1807 "00010011" // /* MW 1 */ + 1808 "00000010" // ST r1, [sp, #-4]; MOV r1, crRnd /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 1809 "01110000" // /* MW 7 */ + 1810 "10110000" // /* MW 6 */ + 1811 "00101111" // /* MW 5 */ + 1812 "00000000" // /* MW 4 */ + 1813 "10110000" // /* MW 3 */ + 1814 "10000110" // /* MW 2 */ + 1815 "11111111" // /* MW 1 */ + 1816 "00000010" // ST r1, [sp, #-24]; MOV r1, crUPSMode /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 1817 "01110000" // /* MW 7 */ + 1818 "01110000" // /* MW 6 */ + 1819 "00101110" // /* MW 5 */ + 1820 "00000000" // /* MW 4 */ + 1821 "10110000" // /* MW 3 */ + 1822 "00000110" // /* MW 2 */ + 1823 "11111101" // /* MW 1 */ + 1824 "00000010" // ST r1, [sp, #-36]; MOV r14, srsSign0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 1825 "01110000" // /* MW 7 */ + 1826 "11100000" // /* MW 6 */ + 1827 "11001011" // /* MW 5 */ + 1828 "00000001" // /* MW 4 */ + 1829 "10110000" // /* MW 3 */ + 1830 "10000110" // /* MW 2 */ + 1831 "11111011" // /* MW 1 */ +.src_ref 5 "atexit.c" 56 37 + 1832 "01000100" // MOVXM r1, #508928 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 1833 "00000000" // /* MW 5 */ + 1834 "10101000" // /* MW 4 */ + 1835 "11000000" // /* MW 3 */ + 1836 "00000111" // /* MW 2 */ + 1837 "00000000" // /* MW 1 */ +.src_ref 5 "atexit.c" 56 37 first + 1838 "01011000" // ADD.NC p6, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1839 "10001001" // /* MW 3 */ + 1840 "01100000" // /* MW 2 */ + 1841 "00011110" // /* MW 1 */ +.src_ref 5 "atexit.c" 60 4 + 1842 "01111110" // NOPA; NOPB; NOPS; MOVXM p7, #1856 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 1843 "01100000" // /* MW 13 */ + 1844 "00101011" // /* MW 12 */ + 1845 "00000000" // /* MW 11 */ + 1846 "00000010" // /* MW 10 */ + 1847 "01110100" // /* MW 9 */ + 1848 "01110110" // /* MW 8 */ + 1849 "00000000" // /* MW 7 */ + 1850 "00000000" // /* MW 6 */ + 1851 "00100000" // /* MW 5 */ + 1852 "00000000" // /* MW 4 */ + 1853 "11110000" // /* MW 3 */ + 1854 "00101100" // /* MW 2 */ + 1855 "00000000" // /* MW 1 */ +.label TGT_F__cxa_finalize_160 +.src_ref 5 "atexit.c" 63 18 first +.loop_nesting 1 + 1856 "10011000" // LDA p1, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1857 "10011110" // /* MW 3 */ + 1858 "00011100" // /* MW 2 */ + 1859 "00000110" // /* MW 1 */ +.src_ref 5 "atexit.c" 63 35 + 1860 "10011000" // LDA p0, [p6], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1861 "00011110" // /* MW 3 */ + 1862 "11011100" // /* MW 2 */ + 1863 "00000110" // /* MW 1 */ + 1864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1865 "00000000" // /* MW 1 */ + 1866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1867 "00000000" // /* MW 1 */ + 1868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1869 "00000000" // /* MW 1 */ + 1870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1871 "00000000" // /* MW 1 */ + 1872 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1873 "00000000" // /* MW 1 */ +.src_ref 5 "atexit.c" 63 24 +.no_stack_arguments + 1874 "00011000" // JL p1 /* MW 4 */ /* control_operation: words=4 call unconditional cycles_taken=1 indirect absolute delay_slots=5 */ + 1875 "01000000" // /* MW 3 */ + 1876 "00110000" // /* MW 2 */ + 1877 "00010000" // /* MW 1 */ +.delay_slot + 1878 "11111000" // MOV r15, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1879 "00100000" // /* MW 3 */ + 1880 "11010000" // /* MW 2 */ + 1881 "00011011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1883 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1885 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1887 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1888 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 1889 "00000000" // /* MW 15 */ + 1890 "00000000" // /* MW 14 */ + 1891 "01111000" // /* MW 13 */ + 1892 "10100101" // /* MW 12 */ + 1893 "00000001" // /* MW 11 */ + 1894 "00000000" // /* MW 10 */ + 1895 "00000000" // /* MW 9 */ + 1896 "00000000" // /* MW 8 */ + 1897 "01011011" // /* MW 7 */ + 1898 "00000001" // /* MW 6 */ + 1899 "00100000" // /* MW 5 */ + 1900 "00000000" // /* MW 4 */ + 1901 "11110000" // /* MW 3 */ + 1902 "00101100" // /* MW 2 */ + 1903 "00000000" // /* MW 1 */ +.src_ref 5 "atexit.c" 60 4 first +.return_address + 1904 "00011000" // JNZD r13, r13, p7 /* MW 4 */ /* control_operation: words=4 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 1905 "11100000" // /* MW 3 */ + 1906 "01011011" // /* MW 2 */ + 1907 "00010011" // /* MW 1 */ +.delay_slot + 1908 "11111000" // MOV r0, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1909 "10100000" // /* MW 3 */ + 1910 "00010111" // /* MW 2 */ + 1911 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1913 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1915 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1916 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1917 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 1918 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 1919 "00000000" // /* MW 1 */ +.src_ref 5 "atexit.c" 66 +.loop_nesting 0 + 1920 "10111010" // LDA lr, [sp, #-40]; MOVX upsSign1, r9; MOV vaddSign1, r11 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 1921 "01111000" // /* MW 9 */ + 1922 "11010000" // /* MW 8 */ + 1923 "10011010" // /* MW 7 */ + 1924 "00000010" // /* MW 6 */ + 1925 "11101010" // /* MW 5 */ + 1926 "00010011" // /* MW 4 */ + 1927 "00100000" // /* MW 3 */ + 1928 "00000111" // /* MW 2 */ + 1929 "11111011" // /* MW 1 */ + 1930 "10111010" // LDA r1, [sp, #-44]; MOVX upsSign0, r8; MOV vaddSign0, r10 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 1931 "01111000" // /* MW 9 */ + 1932 "10010000" // /* MW 8 */ + 1933 "10011010" // /* MW 7 */ + 1934 "00000000" // /* MW 6 */ + 1935 "11001010" // /* MW 5 */ + 1936 "00010001" // /* MW 4 */ + 1937 "00100000" // /* MW 3 */ + 1938 "10000110" // /* MW 2 */ + 1939 "11111010" // /* MW 1 */ + 1940 "00101100" // LDA r2, [sp, #-32]; MOVX srsSign1, r12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 1941 "10000000" // /* MW 5 */ + 1942 "01111001" // /* MW 4 */ + 1943 "00100110" // /* MW 3 */ + 1944 "00001010" // /* MW 2 */ + 1945 "11111100" // /* MW 1 */ + 1946 "00101100" // LDA r3, [sp, #-28]; MOVX srsSign0, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 1947 "10000000" // /* MW 5 */ + 1948 "01110001" // /* MW 4 */ + 1949 "00100111" // /* MW 3 */ + 1950 "10001110" // /* MW 2 */ + 1951 "11111100" // /* MW 1 */ + 1952 "00011000" // LDA r7, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1953 "11110001" // /* MW 3 */ + 1954 "11101100" // /* MW 2 */ + 1955 "00000111" // /* MW 1 */ + 1956 "00011000" // LDA r4, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1957 "10010001" // /* MW 3 */ + 1958 "11110000" // /* MW 2 */ + 1959 "00000111" // /* MW 1 */ + 1960 "00011000" // LDA r5, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1961 "10110001" // /* MW 3 */ + 1962 "11110100" // /* MW 2 */ + 1963 "00000111" // /* MW 1 */ + 1964 "00011000" // LDA r6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 1965 "11010001" // /* MW 3 */ + 1966 "11111000" // /* MW 2 */ + 1967 "00000111" // /* MW 1 */ + 1968 "00101100" // LDA r1, [sp, #-4]; MOVX packSign1, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 1969 "10000000" // /* MW 5 */ + 1970 "11111000" // /* MW 4 */ + 1971 "00100000" // /* MW 3 */ + 1972 "10000110" // /* MW 2 */ + 1973 "11111111" // /* MW 1 */ + 1974 "00101100" // LDA r2, [sp, #-24]; MOVX packSign0, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 1975 "10000000" // /* MW 5 */ + 1976 "01110000" // /* MW 4 */ + 1977 "00100001" // /* MW 3 */ + 1978 "00001010" // /* MW 2 */ + 1979 "11111101" // /* MW 1 */ + 1980 "00101100" // LDA r3, [sp, #-36]; MOVX unpackSign1, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 1981 "10000000" // /* MW 5 */ + 1982 "11111011" // /* MW 4 */ + 1983 "00100001" // /* MW 3 */ + 1984 "10001110" // /* MW 2 */ + 1985 "11111011" // /* MW 1 */ +.src_ref 5 "atexit.c" 66 first + 1986 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 1987 "00000001" // /* MW 5 */ + 1988 "00000000" // /* MW 4 */ + 1989 "00000000" // /* MW 3 */ + 1990 "11111000" // /* MW 2 */ + 1991 "11111111" // /* MW 1 */ +.src_ref 5 "atexit.c" 66 + 1992 "11100100" // RET lr; MOV unpackSign0, r7 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 1993 "01000001" // /* MW 5 */ + 1994 "01100111" // /* MW 4 */ + 1995 "00000100" // /* MW 3 */ + 1996 "00000000" // /* MW 2 */ + 1997 "00000101" // /* MW 1 */ +.delay_slot + 1998 "11100100" // MOVX crSRSMode, r4; MOV crPackSize, r5 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 1999 "01000001" // /* MW 5 */ + 2000 "01100101" // /* MW 4 */ + 2001 "00000111" // /* MW 3 */ + 2002 "01100000" // /* MW 2 */ + 2003 "00100111" // /* MW 1 */ +.delay_slot + 2004 "00011000" // MOVX crSat, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2005 "10000000" // /* MW 3 */ + 2006 "10111011" // /* MW 2 */ + 2007 "00010001" // /* MW 1 */ +.delay_slot + 2008 "00011000" // MOVX crUnpackSize, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2009 "10000000" // /* MW 3 */ + 2010 "01111100" // /* MW 2 */ + 2011 "00010000" // /* MW 1 */ +.delay_slot + 2012 "00011000" // MOVX crRnd, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2013 "10000000" // /* MW 3 */ + 2014 "10111010" // /* MW 2 */ + 2015 "00010000" // /* MW 1 */ +.delay_slot + 2016 "00011000" // MOVX crUPSMode, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2017 "00000000" // /* MW 3 */ + 2018 "11111100" // /* MW 2 */ +.label __cxa_finalize__end + 2019 "00010000" // /* MW 1 */ +.dir 0 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21708/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib" +.dir 1 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21708/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib/runtime/include" +.dir 2 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0/src" +.dir 3 "/usr/local/lib/python3.10/dist-packages/include/adf/aie" +.dir 4 "/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer" +.dir 5 "/scratch/sw_component_pipelines/continuous/gradle_simmodels_workspaces/21708/HEAD/build/Aie2p_core_model/stage-src/core_model/sipp/lib/runtime/src" diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3/Release/0_3.txt b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3/Release/0_3.txt new file mode 100644 index 0000000000000000000000000000000000000000..516851d5027146f4cb7c3ba9741c6dcb9d7898b1 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3/Release/0_3.txt @@ -0,0 +1,405 @@ +Contents of the .debug_line section: + +CU: 0_0/src/0_0.cc: +File name Line number Starting address View Stmt + +/usr/local/lib/python3.10/dist-packages/include/adf/aie/tile_control.h: +tile_control.h 288 0x540 x +tile_control.h 292 0x540 1 +tile_control.h 292 0x540 2 +tile_control.h 278 0x550 +tile_control.h 292 0x550 1 x +tile_control.h 292 0x556 +tile_control.h 292 0x55c +tile_control.h 278 0x562 +tile_control.h 278 0x562 1 +tile_control.h 292 0x570 +tile_control.h 292 0x574 +tile_control.h 278 0x5b0 x +tile_control.h 278 0x5c0 +tile_control.h 278 0x5c0 1 +tile_control.h 278 0x5d0 +tile_control.h 278 0x5d0 1 +tile_control.h 293 0x5e0 x + +0_0/src/0_0.cc: +0_0.cc 12 0xe0 +0_0.cc 12 0xe0 1 x + +/usr/local/lib/python3.10/dist-packages/include/adf/aie/tile_control.h: +tile_control.h 147 0xea +tile_control.h 147 0xea 1 + +0_0/src/0_0.cc: +0_0.cc 49 0xea 2 +0_0.cc 50 0xea 3 +0_0.cc 59 0xea 4 +0_0.cc 59 0xea 5 +0_0.cc 63 0xea 6 +0_0.cc 64 0xea 7 +0_0.cc 67 0xea 8 + +/usr/local/lib/python3.10/dist-packages/include/adf/aie/tile_control.h: +tile_control.h 278 0xf4 +tile_control.h 278 0xf4 1 +tile_control.h 278 0xf4 2 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 566 0xf4 3 +io_buffer_compiler.h 567 0xf4 4 +io_buffer_compiler.h 564 0x102 +io_buffer_compiler.h 565 0x102 1 +io_buffer_compiler.h 572 0x102 2 + +0_0/src/0_0.cc: +0_0.cc 19 0x102 3 + +/usr/local/lib/python3.10/dist-packages/include/adf/aie/tile_control.h: +tile_control.h 278 0x110 x +tile_control.h 278 0x114 + +0_0/src/0_0.cc: +0_0.cc 19 0x11e x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 566 0x122 + +0_0/src/0_0.cc: +0_0.cc 29 0x122 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 564 0x134 x +io_buffer_compiler.h 565 0x138 x +io_buffer_compiler.h 565 0x13c +io_buffer_compiler.h 567 0x140 x +io_buffer_compiler.h 567 0x144 +io_buffer_compiler.h 566 0x148 x +io_buffer_compiler.h 564 0x14c x +io_buffer_compiler.h 565 0x150 x +io_buffer_compiler.h 565 0x154 +io_buffer_compiler.h 567 0x158 x +io_buffer_compiler.h 567 0x15c +io_buffer_compiler.h 566 0x160 x +io_buffer_compiler.h 564 0x164 x +io_buffer_compiler.h 565 0x168 x +io_buffer_compiler.h 565 0x16c +io_buffer_compiler.h 567 0x170 x +io_buffer_compiler.h 567 0x174 +io_buffer_compiler.h 566 0x178 x +io_buffer_compiler.h 564 0x17c x +io_buffer_compiler.h 565 0x180 x +io_buffer_compiler.h 565 0x184 +io_buffer_compiler.h 567 0x188 x +io_buffer_compiler.h 567 0x18c +io_buffer_compiler.h 566 0x190 x + +0_0/src/0_0.cc: +0_0.cc 29 0x1a0 x +0_0.cc 37 0x1a0 1 x +0_0.cc 30 0x1aa x +0_0.cc 37 0x1aa 1 +0_0.cc 31 0x1b4 x +0_0.cc 32 0x1b8 x +0_0.cc 33 0x1bc x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 572 0x1c0 + +0_0/src/0_0.cc: +0_0.cc 40 0x1c0 1 +0_0.cc 40 0x1c0 2 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 572 0x1c8 +io_buffer_compiler.h 575 0x1c8 1 + +0_0/src/0_0.cc: +0_0.cc 58 0x1c8 2 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 575 0x1ce + +0_0/src/0_0.cc: +0_0.cc 41 0x1d6 +0_0.cc 37 0x1e0 x +0_0.cc 42 0x1e0 1 +0_0.cc 43 0x1e0 2 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 572 0x1f0 x + +0_0/src/0_0.cc: +0_0.cc 40 0x1f0 1 x +0_0.cc 40 0x1f0 2 x +0_0.cc 42 0x1f0 3 x +0_0.cc 40 0x1fa +0_0.cc 43 0x1fa 1 x +0_0.cc 40 0x200 x +0_0.cc 40 0x200 1 x +0_0.cc 40 0x204 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 573 0x20e x +io_buffer_compiler.h 573 0x214 +io_buffer_compiler.h 575 0x218 x +io_buffer_compiler.h 574 0x220 x +io_buffer_compiler.h 574 0x230 + +0_0/src/0_0.cc: +0_0.cc 41 0x240 +0_0.cc 41 0x250 x +0_0.cc 70 0x260 +0_0.cc 50 0x26a +0_0.cc 49 0x270 +0_0.cc 49 0x270 1 x +0_0.cc 50 0x270 2 +0_0.cc 49 0x27a +0_0.cc 50 0x27a 1 x +0_0.cc 49 0x280 x +0_0.cc 50 0x280 1 +0_0.cc 50 0x286 x +0_0.cc 52 0x298 x +0_0.cc 58 0x2a0 x +0_0.cc 59 0x2a4 x +0_0.cc 59 0x2a8 +0_0.cc 59 0x2a8 1 +0_0.cc 60 0x2a8 2 +0_0.cc 63 0x2a8 3 +0_0.cc 64 0x2a8 4 +0_0.cc 65 0x2a8 5 +0_0.cc 67 0x2a8 6 +0_0.cc 59 0x2b0 +0_0.cc 59 0x2b4 +0_0.cc 59 0x2ba +0_0.cc 59 0x2c0 +0_0.cc 59 0x2c4 +0_0.cc 60 0x2d0 +0_0.cc 60 0x2d0 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/aie/tile_control.h: +tile_control.h 147 0x2d6 + +0_0/src/0_0.cc: +0_0.cc 60 0x2d6 1 x + +/usr/local/lib/python3.10/dist-packages/include/adf/aie/tile_control.h: +tile_control.h 147 0x2e0 +tile_control.h 147 0x2e6 +tile_control.h 260 0x2e6 1 +tile_control.h 440 0x2ec + +0_0/src/0_0.cc: +0_0.cc 60 0x2f8 +0_0.cc 61 0x2fc x +0_0.cc 63 0x300 x +0_0.cc 63 0x304 +0_0.cc 63 0x308 + +/usr/local/lib/python3.10/dist-packages/include/adf/aie/tile_control.h: +tile_control.h 147 0x318 x +tile_control.h 147 0x31c +tile_control.h 147 0x320 +tile_control.h 147 0x324 +tile_control.h 147 0x328 +tile_control.h 260 0x328 1 +tile_control.h 147 0x330 +tile_control.h 260 0x330 1 x +tile_control.h 440 0x340 x +tile_control.h 440 0x344 +tile_control.h 440 0x344 1 + +0_0/src/0_0.cc: +0_0.cc 63 0x354 +0_0.cc 64 0x358 x +0_0.cc 64 0x35c +0_0.cc 64 0x360 +0_0.cc 64 0x364 +0_0.cc 40 0x36e +0_0.cc 64 0x372 +0_0.cc 65 0x380 +0_0.cc 65 0x380 1 +0_0.cc 65 0x380 2 +0_0.cc 70 0x380 3 +0_0.cc 40 0x38a +0_0.cc 65 0x38a 1 x +0_0.cc 65 0x39e +0_0.cc 65 0x3a2 +0_0.cc 66 0x3a6 x +0_0.cc 67 0x3aa +0_0.cc 67 0x3aa 1 x +0_0.cc 67 0x3b4 +0_0.cc 67 0x3b4 1 +0_0.cc 67 0x3be +0_0.cc 67 0x3be 1 +0_0.cc 70 0x3e0 x +0_0.cc 70 0x3e4 +0_0.cc 67 0x3f2 +0_0.cc 67 0x3f2 1 +0_0.cc 72 0x3f2 2 +0_0.cc 72 0x3f2 3 +0_0.cc 70 0x3fa +0_0.cc 70 0x40a +0_0.cc 70 0x41a +0_0.cc 70 0x41a 1 +0_0.cc 71 0x436 x +0_0.cc 67 0x450 x +0_0.cc 72 0x450 1 +0_0.cc 72 0x450 2 x +0_0.cc 72 0x456 +0_0.cc 72 0x460 +0_0.cc 72 0x464 +0_0.cc 72 0x468 +0_0.cc 67 0x470 +0_0.cc 67 0x47c + +/usr/local/lib/python3.10/dist-packages/include/adf/aie/tile_control.h: +tile_control.h 147 0x480 +tile_control.h 147 0x480 1 + +0_0/src/0_0.cc: +0_0.cc 49 0x480 2 +0_0.cc 50 0x480 3 +0_0.cc 55 0x480 4 +0_0.cc 55 0x480 5 +0_0.cc 59 0x480 6 +0_0.cc 59 0x480 7 +0_0.cc 63 0x480 8 +0_0.cc 64 0x480 9 +0_0.cc 67 0x480 10 +0_0.cc 78 0x480 11 +0_0.cc 55 0x48a +0_0.cc 74 0x494 x +0_0.cc 75 0x498 +0_0.cc 75 0x49c +0_0.cc 55 0x4a4 x +0_0.cc 74 0x4ac +0_0.cc 75 0x4b2 x +0_0.cc 75 0x4b6 +0_0.cc 78 0x4ba +0_0.cc 78 0x4ba 1 x +0_0.cc 79 0x4ba 2 +0_0.cc 79 0x4ba 3 +0_0.cc 80 0x4ba 4 +0_0.cc 80 0x4ba 5 +0_0.cc 78 0x4c6 +0_0.cc 79 0x4c6 1 +0_0.cc 80 0x4c6 2 +0_0.cc 78 0x4d0 +0_0.cc 79 0x4d0 1 +0_0.cc 81 0x4d0 2 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 564 0x4da +io_buffer_compiler.h 565 0x4da 1 +io_buffer_compiler.h 572 0x4da 2 + +0_0/src/0_0.cc: +0_0.cc 79 0x4da 3 x +0_0.cc 79 0x4e0 +0_0.cc 81 0x4e4 +0_0.cc 78 0x4f0 x +0_0.cc 79 0x4f4 x +0_0.cc 80 0x4f8 x +0_0.cc 80 0x500 +0_0.cc 81 0x50a x +0_0.cc 29 0x50e + +/usr/local/lib/python3.10/dist-packages/include/adf/aie/tile_control.h: +tile_control.h 278 0x518 +tile_control.h 278 0x518 1 +tile_control.h 278 0x518 2 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 566 0x518 3 +io_buffer_compiler.h 567 0x518 4 + +0_0/src/0_0.cc: +0_0.cc 19 0x518 5 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 564 0x530 +io_buffer_compiler.h - 0x531 + + +CU: No directory table +CU: Empty file name table + - 0x1 + + +CU: ./me_basic.c: +File name Line number Starting address View Stmt + +./me_basic.c:[++] +me_basic.c 73 0x5f0 x +me_basic.c 73 0x5f0 1 +me_basic.c 75 0x5f6 +me_basic.c 76 0x5f6 1 +me_basic.c 75 0x600 +me_basic.c 75 0x600 1 +me_basic.c 75 0x60a x +me_basic.c 76 0x60a 1 +me_basic.c 75 0x610 +me_basic.c 76 0x630 x +me_basic.c 76 0x640 +me_basic.c 75 0x644 +me_basic.c 75 0x650 x +me_basic.c 75 0x654 +me_basic.c 77 0x670 +me_basic.c 77 0x686 x +me_basic.c 77 0x68a +me_basic.c 91 0 x +me_basic.c 87 0 1 x +me_basic.c 69 0x6 +me_basic.c 69 0x6 1 +me_basic.c 69 0xc +me_basic.c 69 0x12 x +me_basic.c 69 0x16 +me_basic.c 98 0x26 +me_basic.c 98 0x26 1 +me_basic.c 69 0x30 +me_basic.c 70 0x30 1 +me_basic.c 69 0x40 +me_basic.c 70 0x40 1 x +me_basic.c 70 0x52 +me_basic.c 69 0x60 x +me_basic.c 69 0x64 +me_basic.c 98 0x74 +me_basic.c 98 0x74 1 +me_basic.c 98 0x80 x + +runtime/include/stdlib.h: +stdlib.h 77 0x90 x +stdlib.h 77 0x96 +stdlib.h 61 0xbc x +stdlib.h 62 0xcc x +stdlib.h 64 0xd0 x +stdlib.h - 0xd1 + + +CU: src/atexit.c: +File name Line number Starting address View Stmt + +src/atexit.c: +atexit.c 47 0x6a0 x +atexit.c 47 0x6a0 1 +atexit.c 55 0x6a0 2 +atexit.c 53 0x6aa +atexit.c 52 0x6cc +atexit.c 53 0x6cc 1 +atexit.c 52 0x6d6 x +atexit.c 53 0x6ec x +atexit.c 54 0x704 x +atexit.c 55 0x70a x +atexit.c 56 0x728 +atexit.c 56 0x72e x +atexit.c 60 0x732 +atexit.c 63 0x740 x +atexit.c 63 0x744 +atexit.c 63 0x752 +atexit.c 60 0x770 x +atexit.c 66 0x780 +atexit.c 66 0x7c2 x +atexit.c 66 0x7c8 +atexit.c - 0x7c9 + + diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3/scripts/0_3.bcf b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3/scripts/0_3.bcf new file mode 100644 index 0000000000000000000000000000000000000000..c449cca17b320139b2659f7e603429e85d473c6b --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3/scripts/0_3.bcf @@ -0,0 +1,20 @@ +_reserved DMb 0x0 0x40000 + +_reserved PM 0x930 0x36d0 //reserved for reloadable elfs + +_entry_point _main_init +_symbol _main _after _main_init +_symbol _main_init 0x0 +_symbol _Z13kernelWrapperPPvjjjj 0x930 +_extern _Z13kernelWrapperPPvjjjj + +_symbol lcpPing 0x7b280 +_symbol lcpPong 0x7b680 +_reserved DMb 0x7ba80 0x40 //reserved for sync buffer +_stack DM_stack 0x7bac0 0x940 //stack for core +//space for synopsys compiler at 0x7c400 0x40//heap +_reserved DMb 0x7c440 0x880 //reserved for reloadable elf heap +_reserved DMb 0x40000 0x3b280 + +_reserved DMb 0x7ccc0 0x3340 +_reserved DMb 0x80000 0x80000 // And everything else the core can't see diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3/scripts/0_3.prx b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3/scripts/0_3.prx new file mode 100644 index 0000000000000000000000000000000000000000..ad07a911f0bbe1b64a557a167ed906e71f7306f3 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3/scripts/0_3.prx @@ -0,0 +1,13 @@ + + + \ No newline at end of file diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3/src/0_3.cc b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3/src/0_3.cc new file mode 100644 index 0000000000000000000000000000000000000000..2b4079d2709f7543da54035187e94a5617be5e71 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3/src/0_3.cc @@ -0,0 +1,90 @@ +// Automatically generated processor driver using AIEngine tool-chain + +#include +#include +#include + +extern void kernelWrapper(void* args[], uint32 kernelId, uint32 numSyncIn, uint32 numAsyncIn, uint32 numSyncOut); +uint32 lcpPing[256]; +uint32 lcpPong[256]; + +int main(void) +{ + + while (true) + { + //initialize locks for layer control parameters + adf::initialize_lock(14, 0); + adf::initialize_lock(15, 0); + acquire_equal(62, 1); + uint32* lcp = lcpPing; + uint32 idx = 0; + constexpr int const maxKernelPorts = 5; + adf::_io_buffer_light_weight_generic ioBufPorts[4] = {}; + + while (true) + { + void* args[1 + maxKernelPorts]; + uint32 numArgs = 0; + uint32 loop_cnt0 = lcp[idx++]; + uint32 numSyncInPorts = lcp[idx++]; + uint32 numAsyncInPorts = lcp[idx++]; + uint32 numSyncOutPorts = lcp[idx++]; + uint32 numAsyncOutPorts = lcp[idx++]; + uint32 numIoBufPorts = numSyncInPorts + numAsyncInPorts + numSyncOutPorts + numAsyncOutPorts; + uint32 numRtpPorts = lcp[idx++]; + + for (int i = 0; i < numIoBufPorts; ++i) + chess_loop_range(2,4) + { + new(&ioBufPorts[numArgs]) adf::_io_buffer_light_weight_generic{(void*)lcp[idx+0], lcp[idx+1], (void*)lcp[idx+2], lcp[idx+3]}; + args[numArgs] = &ioBufPorts[numArgs]; + numArgs += 1; + idx += 4; + } + + for (int i = 0; i < numRtpPorts; i++) + chess_loop_range(1,1) + { + uint32 numRtpValues = lcp[idx]; + args[numArgs] = &lcp[idx+1]; + numArgs++; + idx += (numRtpValues+1); + } + + for(int lc0 = 0, lcp_size0 = 0; lc0 < loop_cnt0; lc0++) + chess_loop_range(1,) + { + idx -= lcp_size0; + adf::block_write((adf::reg_val*)(&lcp[idx+1]), lcp[idx]); + uint32 numBlockWrite1 = lcp[idx] * 2 + 1; + idx += numBlockWrite1; + // Wait previous layer mm2s channel done + adf::wait_dma_channel_done(lcp[idx++]); + adf::block_write((adf::reg_val*)(&lcp[idx+1]), lcp[idx]); + uint32 numBlockWrite2 = lcp[idx] * 2 + 1; + idx += numBlockWrite2; + for (int lc1 = 0, loop_cnt1 = lcp[idx]; lc1 < loop_cnt1; lc1++) + chess_loop_range(1,) + { + if(lc0 == 0 && lc1 == 0 && lcp[idx+1]) + done(); + kernelWrapper(args, lcp[idx+2], numSyncInPorts, numAsyncInPorts, numSyncOutPorts); + } + idx += 3; + lcp_size0 = numBlockWrite1 + numBlockWrite2 + 4; + } + + bool isLastLayer = lcp[idx]; + release((lcp == lcpPing ? 62 : 63), -1); + acquire_equal((lcp == lcpPing ? 63 : 62), 1, !isLastLayer); + lcp = (lcp == lcpPing ? lcpPong : lcpPing); + idx = 0; + + if (isLastLayer) + break; + } + } + + return 0; +} diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3_reloadable0/Release/0_3_reloadable0.calltree b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3_reloadable0/Release/0_3_reloadable0.calltree new file mode 100644 index 0000000000000000000000000000000000000000..a7245143acd21f5aadabb02d3d289374aea0f40e --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3_reloadable0/Release/0_3_reloadable0.calltree @@ -0,0 +1,98 @@ + +// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:44:05 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// bridge -o../Release/0_0_reloadable0 ../Release/0_0_reloadable0.o -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/isg -g -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable0.bcf -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork1033 -pme + + +// Release: ipp V-2024.06-TGT-241219 + +_Z13kernelWrapperPPvjjjj + _Z13_b881_wrapperPPv (referenced text) + _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv + _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv + _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E + _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE + _Z13_b719_wrapperPPv (referenced text) + _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA5_Kj + _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA5_Kj + _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_EEvPS1_S2_R23transposeshuffle_params + _Z13_b886_wrapperPPv (referenced text) + _Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE + _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv + _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E + _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE (*) + _Z13_b891_wrapperPPv (referenced text) + _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E + _Z13_b896_wrapperPPv (referenced text) + _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh + _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params + _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams + _Z13_b901_wrapperPPv (referenced text) + _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E + _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E + _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E + _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E + _Z13_b906_wrapperPPv (referenced text) + _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv + _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E + + +Call tree stack and functions sizes: + +stack stack stack call func func function name + desc level level desc +----- ----- ----- ----- ----- ----- -------------------------------------------------------------- + 64 320 0 0 546 11410 _Z13kernelWrapperPPvjjjj + 0 256 1 1 32 1394 _Z13_b881_wrapperPPv + 64 256 1 2 488 1362 _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + 64 64 2 3 74 190 _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv + 0 0 3 4 116 116 _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv + 64 192 2 3 150 684 _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E + 128 128 3 4 534 534 _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE + 0 64 1 1 32 682 _Z13_b719_wrapperPPv + 64 64 1 2 114 650 _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA5_Kj + 0 0 2 3 74 74 _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA5_Kj + 0 0 1 3 462 462 _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_EEvPS1_S2_R23transposeshuffle_params + 0 192 1 1 36 1402 _Z13_b886_wrapperPPv + 64 192 1 2 602 1366 _Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE + 64 64 2 3 98 214 _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv + 0 0 3 4 116 116 _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv + 0 128 2 3 16 550 _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E + 128 128 2 4 534 534 _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE + 0 128 1 1 36 1092 _Z13_b891_wrapperPPv + 64 128 1 2 602 1056 _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE + 64 64 2 3 138 162 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv + 0 0 3 4 24 24 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E + 0 0 2 3 292 292 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E + 0 192 1 1 36 4714 _Z13_b896_wrapperPPv + 64 192 1 2 568 4678 _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + 64 64 2 3 1430 1430 _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh + 128 128 2 3 2410 2680 _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params + 0 0 3 4 270 270 _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams + 0 192 1 1 32 1252 _Z13_b901_wrapperPPv + 64 192 1 2 488 1220 _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + 64 128 2 3 62 304 _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv + 64 64 3 4 162 186 _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv + 0 0 4 5 24 24 _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E + 0 0 2 4 56 56 _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E + 128 128 2 3 114 428 _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E + 0 0 3 4 314 314 _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E + 0 64 1 1 32 862 _Z13_b906_wrapperPPv + 64 64 1 2 488 830 _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + 0 0 2 3 100 100 _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv + 0 0 2 3 242 242 _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E + + +Maximum call level : 5 +Maximum stack level: 4 +Maximum stack size : 320 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3_reloadable0/Release/0_3_reloadable0.cmic2 b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3_reloadable0/Release/0_3_reloadable0.cmic2 new file mode 100644 index 0000000000000000000000000000000000000000..5adeecb8c28386f72eb4d0cc18df3c83a0980d91 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3_reloadable0/Release/0_3_reloadable0.cmic2 @@ -0,0 +1,16705 @@ + +// File generated by darts version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:44:07 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// darts -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -d -h -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable0 me + +// Release: ipp V-2024.06-TGT-241219 +.label __Z13kernelWrapperPPvjjjj___func_begin0 +.label _Z13kernelWrapperPPvjjjj +.function kernelWrapper _Z13kernelWrapperPPvjjjj +.src_ref 0 "0_0_reloadable0.cc" 92 first +.src_ref 0 "0_0_reloadable0.cc" 94 60 +.src_ref 0 "0_0_reloadable0.cc" 94 110 first +.function_start + 2352 "00101100" // LDA r16, [p0]; NEZ r26, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2353 "11100000" // /* MW 5 */ + 2354 "11101001" // /* MW 4 */ + 2355 "11010000" // /* MW 3 */ + 2356 "11000010" // /* MW 2 */ + 2357 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 92 + 2358 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2359 "00000001" // /* MW 5 */ + 2360 "00000000" // /* MW 4 */ + 2361 "00000000" // /* MW 3 */ + 2362 "00001000" // /* MW 2 */ + 2363 "00000000" // /* MW 1 */ + 2364 "10011000" // ST p6, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2365 "00011101" // /* MW 3 */ + 2366 "11101111" // /* MW 2 */ + 2367 "00001111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 99 112 +.src_ref 1 "io_buffer_compiler.h" 606 24 + 2368 "00000010" // ST r14, [sp, #-16]; MOV r14, r3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2369 "01110000" // /* MW 7 */ + 2370 "11010000" // /* MW 6 */ + 2371 "11001000" // /* MW 5 */ + 2372 "00000001" // /* MW 4 */ + 2373 "10110000" // /* MW 3 */ + 2374 "00111010" // /* MW 2 */ + 2375 "11111110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 96 110 +.src_ref 1 "io_buffer_compiler.h" 606 24 + 2376 "00000010" // ST r15, [sp, #-8]; MOV r15, r1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2377 "01110000" // /* MW 7 */ + 2378 "01010000" // /* MW 6 */ + 2379 "11101000" // /* MW 5 */ + 2380 "00000001" // /* MW 4 */ + 2381 "10110000" // /* MW 3 */ + 2382 "00111110" // /* MW 2 */ + 2383 "11111111" // /* MW 1 */ + 2384 "10011000" // ST p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2385 "10011101" // /* MW 3 */ + 2386 "11110111" // /* MW 2 */ + 2387 "00001111" // /* MW 1 */ + 2388 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2389 "00111101" // /* MW 3 */ + 2390 "11111100" // /* MW 2 */ + 2391 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 first + 2392 "00011000" // ADD.NC p6, r16, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2393 "00000010" // /* MW 3 */ + 2394 "01101000" // /* MW 2 */ + 2395 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 + 2396 "10011000" // LDA r16, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2397 "00010110" // /* MW 3 */ + 2398 "00011110" // /* MW 2 */ + 2399 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 36 + 2400 "10011000" // LDA r18, [p6], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2401 "01010110" // /* MW 3 */ + 2402 "00111110" // /* MW 2 */ + 2403 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 46 + 2404 "10011000" // LDA r17, [p6], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2405 "00110110" // /* MW 3 */ + 2406 "11101110" // /* MW 2 */ + 2407 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 60 + 2408 "10011000" // LDA r27, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2409 "01110110" // /* MW 3 */ + 2410 "00000111" // /* MW 2 */ + 2411 "00000110" // /* MW 1 */ + 2412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2413 "00000000" // /* MW 1 */ + 2414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2415 "00000000" // /* MW 1 */ + 2416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2417 "00000000" // /* MW 1 */ + 2418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2419 "00000000" // /* MW 1 */ + 2420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2421 "00000000" // /* MW 1 */ + 2422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2423 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 195 30 first +.src_ref 1 "io_buffer_compiler.h" 195 37 first + 2424 "00011000" // SEL.EQZ r16, r16, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2425 "00100010" // /* MW 3 */ + 2426 "00100001" // /* MW 2 */ + 2427 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 194 23 first + 2428 "10011000" // ST r16, [p6, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2429 "00010001" // /* MW 3 */ + 2430 "11010110" // /* MW 2 */ + 2431 "00001110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 +.src_ref 1 "io_buffer_main.h" 410 8 +.src_ref 1 "io_buffer_main.h" 410 8 + 2432 "11100100" // MOVX r16, #-1; MOV el0, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2433 "00111001" // /* MW 5 */ + 2434 "00110101" // /* MW 4 */ + 2435 "10100000" // /* MW 3 */ + 2436 "00011111" // /* MW 2 */ + 2437 "11111100" // /* MW 1 */ + 2438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2439 "00000000" // /* MW 1 */ + 2440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2441 "00000000" // /* MW 1 */ + 2442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2443 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 first + 2444 "00011000" // ACQ.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2445 "00001000" // /* MW 3 */ + 2446 "01010111" // /* MW 2 */ + 2447 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 96 60 +.src_ref 0 "0_0_reloadable0.cc" 96 110 +.src_ref 0 "0_0_reloadable0.cc" 99 60 +.src_ref 0 "0_0_reloadable0.cc" 102 7 + 2448 "01100100" // MOVX r17, #2; MOV r19, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2449 "00000101" // /* MW 5 */ + 2450 "10100000" // /* MW 4 */ + 2451 "00101001" // /* MW 3 */ + 2452 "01000001" // /* MW 2 */ + 2453 "00000100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 96 60 +.src_ref 0 "0_0_reloadable0.cc" 96 60 first + 2454 "11100100" // LSHL r20, r26, r17; MOV r18, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2455 "10000001" // /* MW 5 */ + 2456 "00100001" // /* MW 4 */ + 2457 "10111001" // /* MW 3 */ + 2458 "00100011" // /* MW 2 */ + 2459 "11010101" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 96 60 +.src_ref 0 "0_0_reloadable0.cc" 96 110 + 2460 "10100100" // LTU r18, r19, r15; ADD.NC p6, r18, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2461 "10100010" // /* MW 5 */ + 2462 "11010010" // /* MW 4 */ + 2463 "10011100" // /* MW 3 */ + 2464 "10011111" // /* MW 2 */ + 2465 "10011100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 96 60 +.src_ref 0 "0_0_reloadable0.cc" 99 60 + 2466 "10111010" // LDA r20, [p6]; ST r20, [sp, #-28]; MOV r19, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2467 "01110010" // /* MW 9 */ + 2468 "01100000" // /* MW 8 */ + 2469 "01101110" // /* MW 7 */ + 2470 "10000010" // /* MW 6 */ + 2471 "10010101" // /* MW 5 */ + 2472 "11100110" // /* MW 4 */ + 2473 "11010111" // /* MW 3 */ + 2474 "11010010" // /* MW 2 */ + 2475 "11000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 + 2476 "00000010" // ST r18, [sp, #-24]; MOV r26, r18 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2477 "01110000" // /* MW 7 */ + 2478 "10010000" // /* MW 6 */ + 2479 "01001100" // /* MW 5 */ + 2480 "00000011" // /* MW 4 */ + 2481 "10110000" // /* MW 3 */ + 2482 "01001010" // /* MW 2 */ + 2483 "11111101" // /* MW 1 */ + 2484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2485 "00000000" // /* MW 1 */ + 2486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2487 "00000000" // /* MW 1 */ + 2488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2489 "00000000" // /* MW 1 */ + 2490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2491 "00000000" // /* MW 1 */ + 2492 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2493 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 first + 2494 "00011000" // ADD.NC p6, r20, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2495 "00000010" // /* MW 3 */ + 2496 "01101010" // /* MW 2 */ + 2497 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 + 2498 "10011000" // LDA r20, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2499 "10010110" // /* MW 3 */ + 2500 "00011110" // /* MW 2 */ + 2501 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 36 + 2502 "10011000" // LDA r22, [p6], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2503 "11010110" // /* MW 3 */ + 2504 "00111110" // /* MW 2 */ + 2505 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 46 + 2506 "10011000" // LDA r21, [p6], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2507 "10110110" // /* MW 3 */ + 2508 "11101110" // /* MW 2 */ + 2509 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 60 + 2510 "10011000" // LDA r27, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2511 "01110110" // /* MW 3 */ + 2512 "00000111" // /* MW 2 */ + 2513 "00000110" // /* MW 1 */ + 2514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2515 "00000000" // /* MW 1 */ + 2516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2517 "00000000" // /* MW 1 */ + 2518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2519 "00000000" // /* MW 1 */ + 2520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2521 "00000000" // /* MW 1 */ + 2522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2523 "00000000" // /* MW 1 */ + 2524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2525 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 195 30 first +.src_ref 1 "io_buffer_compiler.h" 195 37 first + 2526 "00011000" // SEL.EQZ r20, r20, r22, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2527 "01100010" // /* MW 3 */ + 2528 "00101001" // /* MW 2 */ + 2529 "00010101" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 194 23 first + 2530 "10011000" // ST r20, [p6, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2531 "10010001" // /* MW 3 */ + 2532 "11010110" // /* MW 2 */ + 2533 "00001110" // /* MW 1 */ + 2534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2535 "00000000" // /* MW 1 */ + 2536 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2537 "00000000" // /* MW 1 */ + 2538 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2539 "00000000" // /* MW 1 */ + 2540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2541 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 first + 2542 "00011000" // ACQ.COND r21, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2543 "00001000" // /* MW 3 */ + 2544 "01010111" // /* MW 2 */ + 2545 "00010101" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 99 60 first + 2546 "10011000" // LSHL r18, r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2547 "00011101" // /* MW 3 */ + 2548 "10100101" // /* MW 2 */ + 2549 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 99 60 +.src_ref 0 "0_0_reloadable0.cc" 99 60 + 2550 "10100100" // LSHL r18, r2, r17; ADD.NC r19, r19, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2551 "10010010" // /* MW 5 */ + 2552 "10110011" // /* MW 4 */ + 2553 "10111001" // /* MW 3 */ + 2554 "10100011" // /* MW 2 */ + 2555 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 99 60 +.src_ref 0 "0_0_reloadable0.cc" 99 112 + 2556 "10100100" // NEZ r26, r14; ADD.NC p6, r19, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2557 "10010010" // /* MW 5 */ + 2558 "11010011" // /* MW 4 */ + 2559 "00001100" // /* MW 3 */ + 2560 "10011110" // /* MW 2 */ + 2561 "01110110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 99 60 + 2562 "00001100" // LDA r18, [p6]; ST r26, [sp, #-32] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2563 "10101011" // /* MW 5 */ + 2564 "11000110" // /* MW 4 */ + 2565 "11011111" // /* MW 3 */ + 2566 "11001010" // /* MW 2 */ + 2567 "11000000" // /* MW 1 */ + 2568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2569 "00000000" // /* MW 1 */ + 2570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2571 "00000000" // /* MW 1 */ + 2572 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2573 "00000000" // /* MW 1 */ + 2574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2575 "00000000" // /* MW 1 */ + 2576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2577 "00000000" // /* MW 1 */ + 2578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2579 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 first + 2580 "00011000" // ADD.NC p7, r18, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2581 "00000010" // /* MW 3 */ + 2582 "01101001" // /* MW 2 */ + 2583 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 + 2584 "10011000" // LDA r19, [p7], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2585 "01110110" // /* MW 3 */ + 2586 "00111110" // /* MW 2 */ + 2587 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 23 + 2588 "10011000" // LDA r18, [p7], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2589 "01010110" // /* MW 3 */ + 2590 "11101110" // /* MW 2 */ + 2591 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 36 + 2592 "10011000" // LDA r20, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2593 "10010110" // /* MW 3 */ + 2594 "00011110" // /* MW 2 */ + 2595 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 60 + 2596 "10011000" // LDA r27, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2597 "01110110" // /* MW 3 */ + 2598 "00000111" // /* MW 2 */ + 2599 "00000111" // /* MW 1 */ + 2600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2601 "00000000" // /* MW 1 */ + 2602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2603 "00000000" // /* MW 1 */ + 2604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2605 "00000000" // /* MW 1 */ + 2606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2607 "00000000" // /* MW 1 */ + 2608 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2609 "00000000" // /* MW 1 */ + 2610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2611 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 195 30 first +.src_ref 1 "io_buffer_compiler.h" 195 37 first + 2612 "00011000" // SEL.EQZ r19, r19, r20, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2613 "01000010" // /* MW 3 */ + 2614 "11100111" // /* MW 2 */ + 2615 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 194 23 first + 2616 "10011000" // ST r19, [p7, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2617 "01110001" // /* MW 3 */ + 2618 "11010110" // /* MW 2 */ + 2619 "00001111" // /* MW 1 */ + 2620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2621 "00000000" // /* MW 1 */ + 2622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2623 "00000000" // /* MW 1 */ + 2624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2625 "00000000" // /* MW 1 */ + 2626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2627 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 first + 2628 "00011000" // ACQ.COND r18, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2629 "00001000" // /* MW 3 */ + 2630 "10010111" // /* MW 2 */ + 2631 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 102 7 first + 2632 "10011000" // LSHL r16, r0, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2633 "00011101" // /* MW 3 */ + 2634 "00100001" // /* MW 2 */ + 2635 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 102 7 + 2636 "11111000" // MOV dj0, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2637 "00100000" // /* MW 3 */ + 2638 "10001000" // /* MW 2 */ + 2639 "00011000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 102 7 + 2640 "01000100" // MOVXM p7, #509056 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2641 "00000000" // /* MW 5 */ + 2642 "11001001" // /* MW 4 */ + 2643 "11001110" // /* MW 3 */ + 2644 "00000111" // /* MW 2 */ + 2645 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 102 7 + 2646 "00001100" // LDA p1, [p7, dj0]; ST el0, [sp, #-36] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2647 "01011011" // /* MW 5 */ + 2648 "10111000" // /* MW 4 */ + 2649 "11011111" // /* MW 3 */ + 2650 "00010011" // /* MW 2 */ + 2651 "11100000" // /* MW 1 */ + 2652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2653 "00000000" // /* MW 1 */ + 2654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2655 "00000000" // /* MW 1 */ + 2656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2657 "00000000" // /* MW 1 */ + 2658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2659 "00000000" // /* MW 1 */ + 2660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2661 "00000000" // /* MW 1 */ + 2662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2663 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 102 4 +.no_stack_arguments + 2664 "00011000" // JL p1 /* MW 4 */ /* control_operation: words=4 call unconditional cycles_taken=1 indirect absolute delay_slots=5 */ + 2665 "01000000" // /* MW 3 */ + 2666 "00110000" // /* MW 2 */ + 2667 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 105 60 +.src_ref 0 "0_0_reloadable0.cc" 107 60 +.delay_slot + 2668 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2669 "11000000" // /* MW 3 */ + 2670 "01100000" // /* MW 2 */ + 2671 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2673 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2674 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2675 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2677 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2678 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2679 "01111110" // /* MW 9 */ + 2680 "10100101" // /* MW 8 */ + 2681 "00000001" // /* MW 7 */ + 2682 "00000000" // /* MW 6 */ + 2683 "00010000" // /* MW 5 */ + 2684 "00000000" // /* MW 4 */ + 2685 "11110000" // /* MW 3 */ + 2686 "00101100" // /* MW 2 */ + 2687 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 105 60 first +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_main.h" 440 8 +.src_ref 1 "io_buffer_main.h" 440 8 +.src_ref 1 "io_buffer_main.h" 440 8 +.return_address + 2688 "00101100" // LDA r17, [p7]; MOVX r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2689 "00001010" // /* MW 5 */ + 2690 "01000000" // /* MW 4 */ + 2691 "11010000" // /* MW 3 */ + 2692 "11000110" // /* MW 2 */ + 2693 "11100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 + 2694 "00011000" // LDA r26, [sp, #-36] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2695 "01010001" // /* MW 3 */ + 2696 "11011111" // /* MW 2 */ + 2697 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 107 60 + 2698 "00011000" // LDA dj0, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2699 "01000001" // /* MW 3 */ + 2700 "11100100" // /* MW 2 */ + 2701 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_main.h" 440 8 + 2702 "00011000" // LDA el0, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2703 "00101001" // /* MW 3 */ + 2704 "11101000" // /* MW 2 */ + 2705 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 + 2706 "00011000" // LDA eh0, [sp, #-32] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2707 "00001001" // /* MW 3 */ + 2708 "11100000" // /* MW 2 */ + 2709 "00000111" // /* MW 1 */ + 2710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2711 "00000000" // /* MW 1 */ + 2712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2713 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 first + 2714 "00011000" // ADD.NC p0, r17, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2715 "10001000" // /* MW 3 */ + 2716 "01101000" // /* MW 2 */ + 2717 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 + 2718 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2719 "00110110" // /* MW 3 */ + 2720 "00000110" // /* MW 2 */ + 2721 "00000000" // /* MW 1 */ + 2722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2723 "00000000" // /* MW 1 */ + 2724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2725 "00000000" // /* MW 1 */ + 2726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2727 "00000000" // /* MW 1 */ + 2728 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2729 "00000000" // /* MW 1 */ + 2730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2731 "00000000" // /* MW 1 */ + 2732 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2733 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 first + 2734 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2735 "00001000" // /* MW 3 */ + 2736 "01010101" // /* MW 2 */ + 2737 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_compiler.h" 606 24 first + 2738 "11010100" // LDA r17, [p0, #-4]; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2739 "01000001" // /* MW 5 */ + 2740 "10101111" // /* MW 4 */ + 2741 "11011101" // /* MW 3 */ + 2742 "11000110" // /* MW 2 */ + 2743 "00011110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 107 60 first +.src_ref 1 "io_buffer_main.h" 440 8 + 2744 "11010100" // LDA r18, [p7, dj0]; MOV r26, el0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2745 "00111001" // /* MW 5 */ + 2746 "01000000" // /* MW 4 */ + 2747 "11011101" // /* MW 3 */ + 2748 "01001010" // /* MW 2 */ + 2749 "11100000" // /* MW 1 */ + 2750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2751 "00000000" // /* MW 1 */ + 2752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2753 "00000000" // /* MW 1 */ + 2754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2755 "00000000" // /* MW 1 */ + 2756 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2757 "00000000" // /* MW 1 */ + 2758 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2759 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 first + 2760 "10011000" // SUB r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2761 "00010001" // /* MW 3 */ + 2762 "00100111" // /* MW 2 */ + 2763 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 first +.src_ref 1 "io_buffer_compiler.h" 606 24 + 2764 "00100100" // SEL.EQZ r17, r17, r19, r27; ADD.NC p7, r18, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2765 "00010000" // /* MW 5 */ + 2766 "11010010" // /* MW 4 */ + 2767 "01001110" // /* MW 3 */ + 2768 "01100110" // /* MW 2 */ + 2769 "10001100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 +.src_ref 1 "io_buffer_compiler.h" 606 22 first + 2770 "00001100" // LDA r17, [p7]; ST r17, [p0, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2771 "01100011" // /* MW 5 */ + 2772 "11101100" // /* MW 4 */ + 2773 "11010001" // /* MW 3 */ + 2774 "11000110" // /* MW 2 */ + 2775 "11100000" // /* MW 1 */ + 2776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2777 "00000000" // /* MW 1 */ + 2778 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2779 "00000000" // /* MW 1 */ + 2780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2781 "00000000" // /* MW 1 */ + 2782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2783 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 2784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2785 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 2786 "11111000" // MOV r26, eh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2787 "00011100" // /* MW 3 */ + 2788 "10100001" // /* MW 2 */ + 2789 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2790 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2791 "00001000" // /* MW 3 */ + 2792 "01010101" // /* MW 2 */ + 2793 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_compiler.h" 606 24 first + 2794 "11010100" // LDA r17, [p7, #-4]; MOV r27, el0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2795 "00111001" // /* MW 5 */ + 2796 "11000000" // /* MW 4 */ + 2797 "11011101" // /* MW 3 */ + 2798 "11000110" // /* MW 2 */ + 2799 "11111110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 110 60 first + 2800 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2801 "01010110" // /* MW 3 */ + 2802 "00000110" // /* MW 2 */ + 2803 "00000110" // /* MW 1 */ + 2804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2805 "00000000" // /* MW 1 */ + 2806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2807 "00000000" // /* MW 1 */ + 2808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2809 "00000000" // /* MW 1 */ + 2810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2811 "00000000" // /* MW 1 */ + 2812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2813 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 first + 2814 "10011000" // SUB r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2815 "00010001" // /* MW 3 */ + 2816 "00100111" // /* MW 2 */ + 2817 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 25 first +.src_ref 1 "io_buffer_compiler.h" 606 24 + 2818 "00100100" // SEL.EQZ r17, r17, r19, r27; ADD.NC p0, r18, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2819 "00010100" // /* MW 5 */ + 2820 "11010010" // /* MW 4 */ + 2821 "01000000" // /* MW 3 */ + 2822 "01100110" // /* MW 2 */ + 2823 "10001100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 25 +.src_ref 1 "io_buffer_compiler.h" 606 22 first + 2824 "00001100" // LDA r17, [p0]; ST r17, [p7, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2825 "01100011" // /* MW 5 */ + 2826 "11101100" // /* MW 4 */ + 2827 "11011111" // /* MW 3 */ + 2828 "11000110" // /* MW 2 */ + 2829 "00000000" // /* MW 1 */ + 2830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2831 "00000000" // /* MW 1 */ + 2832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2833 "00000000" // /* MW 1 */ + 2834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2835 "00000000" // /* MW 1 */ + 2836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2837 "00000000" // /* MW 1 */ + 2838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2839 "00000000" // /* MW 1 */ + 2840 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2841 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 first + 2842 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2843 "00001000" // /* MW 3 */ + 2844 "01010101" // /* MW 2 */ + 2845 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 112 + 2846 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2847 "00111001" // /* MW 3 */ + 2848 "11111100" // /* MW 2 */ + 2849 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 first + 2850 "10011000" // LDA r17, [p0, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2851 "00110110" // /* MW 3 */ + 2852 "11100110" // /* MW 2 */ + 2853 "00000000" // /* MW 1 */ + 2854 "00011000" // LDA p6, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2855 "00011001" // /* MW 3 */ + 2856 "11101111" // /* MW 2 */ + 2857 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 2858 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2859 "10011001" // /* MW 3 */ + 2860 "11110111" // /* MW 2 */ + 2861 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 2862 "00011000" // LDA r14, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2863 "11010001" // /* MW 3 */ + 2864 "11110001" // /* MW 2 */ + 2865 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 2866 "00011000" // LDA r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2867 "11110001" // /* MW 3 */ + 2868 "11111001" // /* MW 2 */ + 2869 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 112 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 2870 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2871 "00000001" // /* MW 5 */ + 2872 "00000000" // /* MW 4 */ + 2873 "00000000" // /* MW 3 */ + 2874 "11111000" // /* MW 2 */ + 2875 "11111111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 112 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 2876 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 2877 "00000000" // /* MW 3 */ + 2878 "00101000" // /* MW 2 */ + 2879 "00010000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 first +.delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 2880 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2881 "00010001" // /* MW 3 */ + 2882 "00100001" // /* MW 2 */ + 2883 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 2884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2885 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2886 "11111000" // MOV r27, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2887 "00100000" // /* MW 3 */ + 2888 "11010111" // /* MW 2 */ + 2889 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.delay_slot + 2890 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2891 "00000010" // /* MW 3 */ + 2892 "01100001" // /* MW 2 */ + 2893 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 22 +.delay_slot + 2894 "10011000" // ST r16, [p0, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2895 "00010001" // /* MW 3 */ + 2896 "11100110" // /* MW 2 */ +.label _Z13kernelWrapperPPvjjjj__end +.label __Z13kernelWrapperPPvjjjj___func_end0 + 2897 "00001000" // /* MW 1 */ +.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_begin0 +.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh +.function setup_conv2d_bf16_params _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh +.src_ref 2 "conv2d_bf16_params.h" 432 first +.src_ref 2 "conv2d_bf16_params.h" 438 17 first +.src_ref 2 "conv2d_bf16_params.h" 452 40 +.src_ref 2 "conv2d_bf16_params.h" 453 40 +.src_ref 2 "conv2d_bf16_params.h" 458 36 +.src_ref 2 "conv2d_bf16_params.h" 470 11 +.function_start + 2912 "10111010" // LDA el0, [p0], #4; MOVX r4, #4; MOV r2, p1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2913 "01111000" // /* MW 9 */ + 2914 "01100000" // /* MW 8 */ + 2915 "01001001" // /* MW 7 */ + 2916 "10001000" // /* MW 6 */ + 2917 "01000000" // /* MW 5 */ + 2918 "00000000" // /* MW 4 */ + 2919 "11010000" // /* MW 3 */ + 2920 "10000101" // /* MW 2 */ + 2921 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 +.src_ref 2 "conv2d_bf16_params.h" 438 17 first +.src_ref 2 "conv2d_bf16_params.h" 452 40 +.src_ref 2 "conv2d_bf16_params.h" 462 7 + 2922 "10111010" // LDA eh0, [p0], #4; MOVX r5, #-1; ADD.NC p2, r2, #9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2923 "01001000" // /* MW 9 */ + 2924 "10000010" // /* MW 8 */ + 2925 "00110000" // /* MW 7 */ + 2926 "11101001" // /* MW 6 */ + 2927 "01010111" // /* MW 5 */ + 2928 "00111110" // /* MW 4 */ + 2929 "11010000" // /* MW 3 */ + 2930 "10000001" // /* MW 2 */ + 2931 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 432 +.src_ref 2 "conv2d_bf16_params.h" 444 52 + 2932 "10111010" // MOVA r1, #-4; PADDXM [sp], #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2933 "01110000" // /* MW 9 */ + 2934 "00000000" // /* MW 8 */ + 2935 "00000000" // /* MW 7 */ + 2936 "00000000" // /* MW 6 */ + 2937 "00000010" // /* MW 5 */ + 2938 "00000000" // /* MW 4 */ + 2939 "00000000" // /* MW 3 */ + 2940 "10000001" // /* MW 2 */ + 2941 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 453 40 +.src_ref 2 "conv2d_bf16_params.h" 458 30 +.src_ref 2 "conv2d_bf16_params.h" 458 30 + 2942 "01110110" // MOVA r6, #12; ST r13, [sp, #-4]; MOVX r16, #1; MOV m0, #16 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 2943 "01011000" // /* MW 11 */ + 2944 "00010000" // /* MW 10 */ + 2945 "00000000" // /* MW 9 */ + 2946 "00101000" // /* MW 8 */ + 2947 "00000000" // /* MW 7 */ + 2948 "10000001" // /* MW 6 */ + 2949 "10110101" // /* MW 5 */ + 2950 "11111101" // /* MW 4 */ + 2951 "00000111" // /* MW 3 */ + 2952 "10000110" // /* MW 2 */ + 2953 "00000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 444 52 +.src_ref 2 "conv2d_bf16_params.h" 470 11 +.src_ref 2 "conv2d_bf16_params.h" 477 40 +.src_ref 2 "conv2d_bf16_params.h" 557 34 + 2954 "01110110" // MOVA r3, #3; ST r14, [sp, #-8]; MOVX r21, #-3; MOV r20, #15 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 2955 "01011000" // /* MW 11 */ + 2956 "00001111" // /* MW 10 */ + 2957 "10001000" // /* MW 9 */ + 2958 "10101010" // /* MW 8 */ + 2959 "01010111" // /* MW 7 */ + 2960 "10111111" // /* MW 6 */ + 2961 "11010101" // /* MW 5 */ + 2962 "11111001" // /* MW 4 */ + 2963 "00000111" // /* MW 3 */ + 2964 "01100011" // /* MW 2 */ + 2965 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 452 40 +.src_ref 2 "conv2d_bf16_params.h" 458 36 +.src_ref 2 "conv2d_bf16_params.h" 462 7 + 2966 "01011100" // ST r15, [sp, #-12]; MOVX r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2967 "00000010" // /* MW 5 */ + 2968 "01100000" // /* MW 4 */ + 2969 "10110000" // /* MW 3 */ + 2970 "10111110" // /* MW 2 */ + 2971 "11111110" // /* MW 1 */ + 2972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2973 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2974 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2975 "00101001" // /* MW 3 */ + 2976 "00011100" // /* MW 2 */ + 2977 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2978 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2979 "00001001" // /* MW 3 */ + 2980 "00011100" // /* MW 2 */ + 2981 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2982 "10011000" // LDA el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2983 "00101110" // /* MW 3 */ + 2984 "00011100" // /* MW 2 */ + 2985 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2986 "10011000" // LDA eh0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2987 "00001110" // /* MW 3 */ + 2988 "00011100" // /* MW 2 */ + 2989 "00000000" // /* MW 1 */ + 2990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2991 "00000000" // /* MW 1 */ + 2992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2993 "00000000" // /* MW 1 */ + 2994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2995 "00000000" // /* MW 1 */ + 2996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2997 "00000000" // /* MW 1 */ + 2998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2999 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 3000 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3001 "00101001" // /* MW 3 */ + 3002 "00011100" // /* MW 2 */ + 3003 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 3004 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3005 "00001001" // /* MW 3 */ + 3006 "00011100" // /* MW 2 */ + 3007 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 3008 "10011000" // LDA el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3009 "00101110" // /* MW 3 */ + 3010 "00011100" // /* MW 2 */ + 3011 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 3012 "10011000" // LDA eh0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3013 "00001110" // /* MW 3 */ + 3014 "00011100" // /* MW 2 */ + 3015 "00000000" // /* MW 1 */ + 3016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3017 "00000000" // /* MW 1 */ + 3018 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3019 "00000000" // /* MW 1 */ + 3020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3021 "00000000" // /* MW 1 */ + 3022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3023 "00000000" // /* MW 1 */ + 3024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3025 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 3026 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3027 "00101001" // /* MW 3 */ + 3028 "00011100" // /* MW 2 */ + 3029 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 3030 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3031 "00001001" // /* MW 3 */ + 3032 "00011100" // /* MW 2 */ + 3033 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 3034 "10011000" // LDA eh0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3035 "00001110" // /* MW 3 */ + 3036 "00000100" // /* MW 2 */ + 3037 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 3038 "10011000" // LDA el0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3039 "00101110" // /* MW 3 */ + 3040 "00010100" // /* MW 2 */ + 3041 "00000000" // /* MW 1 */ + 3042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3043 "00000000" // /* MW 1 */ + 3044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3045 "00000000" // /* MW 1 */ + 3046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3047 "00000000" // /* MW 1 */ + 3048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3049 "00000000" // /* MW 1 */ + 3050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3051 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 3052 "10011000" // ST eh0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3053 "00001001" // /* MW 3 */ + 3054 "00000100" // /* MW 2 */ + 3055 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 3056 "10011000" // ST el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3057 "00101001" // /* MW 3 */ + 3058 "00010100" // /* MW 2 */ + 3059 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 444 40 first + 3060 "10011000" // LDA.u8 r13, [p2], #-3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3061 "10101010" // /* MW 3 */ + 3062 "11011101" // /* MW 2 */ + 3063 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 447 34 first + 3064 "10011000" // LDA.u8 r17, [p2], #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3065 "00101010" // /* MW 3 */ + 3066 "00011110" // /* MW 2 */ + 3067 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 448 34 first + 3068 "10011000" // LDA.u8 r14, [p2], #-5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3069 "11001010" // /* MW 3 */ + 3070 "10111101" // /* MW 2 */ + 3071 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 452 40 first + 3072 "10011000" // LDA.u16 r15, [p2], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3073 "11111010" // /* MW 3 */ + 3074 "11111101" // /* MW 2 */ + 3075 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 453 40 first + 3076 "10011000" // LDA.u8 r19, [p2], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3077 "01101010" // /* MW 3 */ + 3078 "00001010" // /* MW 2 */ + 3079 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 20 first + 3080 "10011000" // LDA.u8 r7, [p2], #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3081 "11101010" // /* MW 3 */ + 3082 "10101100" // /* MW 2 */ + 3083 "00000010" // /* MW 1 */ + 3084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3085 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 444 52 first + 3086 "10011000" // LSHL r1, r13, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3087 "00011101" // /* MW 3 */ + 3088 "01000010" // /* MW 2 */ + 3089 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 30 first +.src_ref 2 "conv2d_bf16_params.h" 462 7 first + 3090 "00100100" // EQ r16, r1, r16; ADD.NC r18, r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3091 "00000001" // /* MW 5 */ + 3092 "00110001" // /* MW 4 */ + 3093 "11111001" // /* MW 3 */ + 3094 "00100000" // /* MW 2 */ + 3095 "00001100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 462 7 + 3096 "10011000" // LSHL r18, r18, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3097 "01011101" // /* MW 3 */ + 3098 "10100100" // /* MW 2 */ + 3099 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 452 40 first + 3100 "10011000" // EQ r27, r15, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3101 "01000111" // /* MW 3 */ + 3102 "11110110" // /* MW 2 */ + 3103 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 452 40 +.src_ref 2 "conv2d_bf16_params.h" 462 7 first +.src_ref 2 "conv2d_bf16_params.h" 557 34 + 3104 "11100100" // SEL.EQZ r5, r24, r5, r27; MOV eh0, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3105 "00111001" // /* MW 5 */ + 3106 "10110111" // /* MW 4 */ + 3107 "01000000" // /* MW 3 */ + 3108 "01001010" // /* MW 2 */ + 3109 "11000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 462 7 + 3110 "00011000" // SEL.EQZ r29, r17, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3111 "00100010" // /* MW 3 */ + 3112 "01111011" // /* MW 2 */ + 3113 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 30 first + 3114 "10011000" // EQ r6, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3115 "01100111" // /* MW 3 */ + 3116 "11001100" // /* MW 2 */ + 3117 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 + 3118 "10011000" // AND r27, r6, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3119 "00000100" // /* MW 3 */ + 3120 "10110111" // /* MW 2 */ + 3121 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 +.src_ref 2 "conv2d_bf16_params.h" 477 40 +.src_ref 2 "conv2d_bf16_params.h" 557 34 first + 3122 "11100100" // LSHL r15, r15, r21; MOV r25, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3123 "01000001" // /* MW 5 */ + 3124 "10111011" // /* MW 4 */ + 3125 "10111100" // /* MW 3 */ + 3126 "11101011" // /* MW 2 */ + 3127 "01111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 first + 3128 "01011100" // ST r15, [sp, #-20]; SEL.EQZ r6, r7, r24, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3129 "00000100" // /* MW 5 */ + 3130 "10011011" // /* MW 4 */ + 3131 "10110011" // /* MW 3 */ + 3132 "10111110" // /* MW 2 */ + 3133 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 +.src_ref 2 "conv2d_bf16_params.h" 477 40 first + 3134 "10000100" // JNZ r25, #3216 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3216 delay_slots=5 */ + 3135 "00000001" // /* MW 5 */ + 3136 "01000000" // /* MW 4 */ + 3137 "01001000" // /* MW 3 */ + 3138 "00000110" // /* MW 2 */ + 3139 "11001000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 first +.delay_slot + 3140 "10011000" // EQ r27, r6, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3141 "01000111" // /* MW 3 */ + 3142 "10110110" // /* MW 2 */ + 3143 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 444 52 first +.delay_slot + 3144 "10011000" // AND r24, r13, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3145 "01000100" // /* MW 3 */ + 3146 "01110001" // /* MW 2 */ + 3147 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 462 7 first +.delay_slot + 3148 "10011000" // LSHL r30, r19, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3149 "01011101" // /* MW 3 */ + 3150 "11111100" // /* MW 2 */ + 3151 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 470 11 first +.delay_slot + 3152 "10011000" // LSHL r20, r27, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3153 "01001101" // /* MW 3 */ + 3154 "11101000" // /* MW 2 */ + 3155 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 470 11 +.src_ref 2 "conv2d_bf16_params.h" 477 40 first +.delay_slot + 3156 "00011000" // SEL.EQZ r6, r6, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3157 "00110010" // /* MW 3 */ + 3158 "10001100" // /* MW 2 */ + 3159 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 40 + 3160 "10000100" // JNZ r27, #3216 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3216 delay_slots=5 */ + 3161 "00000001" // /* MW 5 */ + 3162 "01000000" // /* MW 4 */ + 3163 "01001000" // /* MW 3 */ + 3164 "00000110" // /* MW 2 */ + 3165 "11011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3167 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3168 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3169 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3171 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3173 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3175 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 491 25 +.src_ref 2 "conv2d_bf16_params.h" 492 25 +.src_ref 2 "conv2d_bf16_params.h" 495 99 +.src_ref 2 "conv2d_bf16_params.h" 502 57 +.src_ref 2 "conv2d_bf16_params.h" 533 46 +.src_ref 2 "conv2d_bf16_params.h" 539 82 +.src_ref 2 "conv2d_bf16_params.h" 557 34 +.src_ref 2 "conv2d_bf16_params.h" 621 240 +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.src_ref 2 "conv2d_bf16_params.h" 709 76 + 3176 "10111010" // MOVA r15, #1; J #3264 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=3264 delay_slots=5 */ + 3177 "00100000" // /* MW 9 */ + 3178 "00000000" // /* MW 8 */ + 3179 "00000000" // /* MW 7 */ + 3180 "10011000" // /* MW 6 */ + 3181 "00000001" // /* MW 5 */ + 3182 "00000000" // /* MW 4 */ + 3183 "00000000" // /* MW 3 */ + 3184 "00101111" // /* MW 2 */ + 3185 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 481 24 +.src_ref 2 "conv2d_bf16_params.h" 500 53 +.src_ref 2 "conv2d_bf16_params.h" 506 73 +.src_ref 2 "conv2d_bf16_params.h" 507 53 +.src_ref 2 "conv2d_bf16_params.h" 524 122 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.delay_slot + 3186 "10111010" // MOVA r26, #0; MOVX r5, #-3; MOV r28, #12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3187 "01011000" // /* MW 9 */ + 3188 "00001100" // /* MW 8 */ + 3189 "10001000" // /* MW 7 */ + 3190 "10101011" // /* MW 6 */ + 3191 "01010111" // /* MW 5 */ + 3192 "00111110" // /* MW 4 */ + 3193 "00000000" // /* MW 3 */ + 3194 "00011010" // /* MW 2 */ + 3195 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 504 45 +.src_ref 2 "conv2d_bf16_params.h" 510 45 +.src_ref 2 "conv2d_bf16_params.h" 520 48 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.delay_slot + 3196 "01100100" // MOVX r21, #4; MOV r2, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3197 "01000001" // /* MW 5 */ + 3198 "00100000" // /* MW 4 */ + 3199 "00100001" // /* MW 3 */ + 3200 "01000010" // /* MW 2 */ + 3201 "00000101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 40 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 578 52 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.delay_slot + 3202 "00011000" // MOVX r13, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3203 "00001101" // /* MW 3 */ + 3204 "00011010" // /* MW 2 */ + 3205 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 78 +.src_ref 2 "conv2d_bf16_params.h" 642 20 +.src_ref 2 "conv2d_bf16_params.h" 642 87 +.delay_slot + 3206 "00011000" // MOVX r7, #15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3207 "00111101" // /* MW 3 */ + 3208 "00001110" // /* MW 2 */ + 3209 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.delay_slot + 3210 "00101100" // NOPA; MOVX r4, #-4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3211 "11100010" // /* MW 5 */ + 3212 "10010001" // /* MW 4 */ + 3213 "11111111" // /* MW 3 */ + 3214 "00101100" // /* MW 2 */ + 3215 "00000000" // /* MW 1 */ +.label __ll6__Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh +.src_ref 2 "conv2d_bf16_params.h" 453 40 +.src_ref 2 "conv2d_bf16_params.h" 453 40 +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 504 45 +.src_ref 2 "conv2d_bf16_params.h" 655 23 + 3216 "01110110" // MOVA dj0, #16; MOVS p1, r2; MOVX r21, #4; MOV r4, #-4 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3217 "01011000" // /* MW 11 */ + 3218 "11111100" // /* MW 10 */ + 3219 "10001111" // /* MW 9 */ + 3220 "10001000" // /* MW 8 */ + 3221 "01010000" // /* MW 7 */ + 3222 "00000001" // /* MW 6 */ + 3223 "00001011" // /* MW 5 */ + 3224 "10000010" // /* MW 4 */ + 3225 "10000001" // /* MW 3 */ + 3226 "00000010" // /* MW 2 */ + 3227 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 453 40 first +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 481 24 +.src_ref 2 "conv2d_bf16_params.h" 500 53 +.src_ref 2 "conv2d_bf16_params.h" 506 73 +.src_ref 2 "conv2d_bf16_params.h" 507 53 +.src_ref 2 "conv2d_bf16_params.h" 524 122 +.src_ref 2 "conv2d_bf16_params.h" 539 139 + 3228 "10111010" // ST.s8 r6, [p1, dj0]; MOVX r26, #0; MOV r28, #12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3229 "01011000" // /* MW 9 */ + 3230 "00001100" // /* MW 8 */ + 3231 "10001000" // /* MW 7 */ + 3232 "00001011" // /* MW 6 */ + 3233 "10100000" // /* MW 5 */ + 3234 "00000001" // /* MW 4 */ + 3235 "11100000" // /* MW 3 */ + 3236 "00011000" // /* MW 2 */ + 3237 "00100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 491 25 +.src_ref 2 "conv2d_bf16_params.h" 492 25 +.src_ref 2 "conv2d_bf16_params.h" 495 99 +.src_ref 2 "conv2d_bf16_params.h" 502 57 +.src_ref 2 "conv2d_bf16_params.h" 510 45 +.src_ref 2 "conv2d_bf16_params.h" 520 48 +.src_ref 2 "conv2d_bf16_params.h" 533 46 +.src_ref 2 "conv2d_bf16_params.h" 539 82 +.src_ref 2 "conv2d_bf16_params.h" 557 34 +.src_ref 2 "conv2d_bf16_params.h" 621 240 +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.src_ref 2 "conv2d_bf16_params.h" 709 76 + 3238 "10111010" // MOVA r2, #16; MOVX r5, #-3; MOV r15, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3239 "01011000" // /* MW 9 */ + 3240 "00000001" // /* MW 8 */ + 3241 "11101000" // /* MW 7 */ + 3242 "10101001" // /* MW 6 */ + 3243 "01010111" // /* MW 5 */ + 3244 "00111110" // /* MW 4 */ + 3245 "00000000" // /* MW 3 */ + 3246 "00000010" // /* MW 2 */ + 3247 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 40 +.src_ref 2 "conv2d_bf16_params.h" 529 78 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 578 52 +.src_ref 2 "conv2d_bf16_params.h" 642 20 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 642 87 + 3248 "11100001" // NOPA; NOPB; NOPS; MOVX r7, #15; MOV r13, #3; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3249 "00000000" // /* MW 15 */ + 3250 "00000000" // /* MW 14 */ + 3251 "01011000" // /* MW 13 */ + 3252 "00000011" // /* MW 12 */ + 3253 "10101000" // /* MW 11 */ + 3254 "11101001" // /* MW 10 */ + 3255 "01110001" // /* MW 9 */ + 3256 "00000000" // /* MW 8 */ + 3257 "01011011" // /* MW 7 */ + 3258 "00000001" // /* MW 6 */ + 3259 "00100000" // /* MW 5 */ + 3260 "00000000" // /* MW 4 */ + 3261 "11110000" // /* MW 3 */ + 3262 "00101100" // /* MW 2 */ + 3263 "00000000" // /* MW 1 */ +.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_352 +.src_ref 2 "conv2d_bf16_params.h" 477 40 first +.src_ref 2 "conv2d_bf16_params.h" 495 68 first +.src_ref 2 "conv2d_bf16_params.h" 495 112 +.src_ref 2 "conv2d_bf16_params.h" 682 38 + 3264 "10111010" // LDA.u8 r17, [p2], #-2; EQ r27, r13, r6; MOV m0, #60 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3265 "01011000" // /* MW 9 */ + 3266 "00111100" // /* MW 8 */ + 3267 "00000000" // /* MW 7 */ + 3268 "00111100" // /* MW 6 */ + 3269 "10110011" // /* MW 5 */ + 3270 "00011011" // /* MW 4 */ + 3271 "01010000" // /* MW 3 */ + 3272 "11000101" // /* MW 2 */ + 3273 "01011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 481 24 first +.src_ref 2 "conv2d_bf16_params.h" 495 53 +.src_ref 2 "conv2d_bf16_params.h" 495 112 + 3274 "10111010" // LDA.u8 r1, [p2], m0; SEL.EQZ r18, r1, r26, r27; MOV m5, #-51 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3275 "01011000" // /* MW 9 */ + 3276 "11001101" // /* MW 8 */ + 3277 "10000111" // /* MW 7 */ + 3278 "00010010" // /* MW 6 */ + 3279 "00101101" // /* MW 5 */ + 3280 "00000011" // /* MW 4 */ + 3281 "01010000" // /* MW 3 */ + 3282 "00000101" // /* MW 2 */ + 3283 "01000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 18 first +.src_ref 2 "conv2d_bf16_params.h" 496 68 +.src_ref 2 "conv2d_bf16_params.h" 504 35 +.src_ref 2 "conv2d_bf16_params.h" 539 14 +.src_ref 2 "conv2d_bf16_params.h" 578 47 + 3284 "10111010" // MOVA r23, #2; SEL.EQZ r29, r29, r21, r27; MOV m3, #55 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3285 "01011000" // /* MW 9 */ + 3286 "00110111" // /* MW 8 */ + 3287 "10000000" // /* MW 7 */ + 3288 "10010001" // /* MW 6 */ + 3289 "11011010" // /* MW 5 */ + 3290 "00111011" // /* MW 4 */ + 3291 "00000000" // /* MW 3 */ + 3292 "01010111" // /* MW 2 */ + 3293 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 496 53 +.src_ref 2 "conv2d_bf16_params.h" 499 51 +.src_ref 2 "conv2d_bf16_params.h" 504 45 first +.src_ref 2 "conv2d_bf16_params.h" 509 50 +.src_ref 2 "conv2d_bf16_params.h" 519 42 +.src_ref 2 "conv2d_bf16_params.h" 700 34 + 3294 "10111010" // MOVA r3, #8; EQ r27, r21, r0; MOV m2, #-68 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3295 "01011000" // /* MW 9 */ + 3296 "10111100" // /* MW 8 */ + 3297 "00000111" // /* MW 7 */ + 3298 "00111101" // /* MW 6 */ + 3299 "10110000" // /* MW 5 */ + 3300 "00101011" // /* MW 4 */ + 3301 "00000000" // /* MW 3 */ + 3302 "00000011" // /* MW 2 */ + 3303 "00000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 492 25 first +.src_ref 2 "conv2d_bf16_params.h" 497 46 +.src_ref 2 "conv2d_bf16_params.h" 509 50 + 3304 "10111010" // MOVA r16, #512; LSHL r22, r15, r24; MOV m1, #112 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3305 "01011000" // /* MW 9 */ + 3306 "01110000" // /* MW 8 */ + 3307 "10000000" // /* MW 7 */ + 3308 "01101100" // /* MW 6 */ + 3309 "01101100" // /* MW 5 */ + 3310 "00011111" // /* MW 4 */ + 3311 "00000000" // /* MW 3 */ + 3312 "00010000" // /* MW 2 */ + 3313 "01000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 500 53 +.src_ref 2 "conv2d_bf16_params.h" 520 34 first + 3314 "01100100" // EXTEND.u8 r22, r22; MOV m4, #-105 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3315 "01011101" // /* MW 5 */ + 3316 "00011110" // /* MW 4 */ + 3317 "00001000" // /* MW 3 */ + 3318 "10010010" // /* MW 2 */ + 3319 "10110101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 77 +.src_ref 2 "conv2d_bf16_params.h" 520 48 + 3320 "00111010" // ST r22, [sp, #-16]; LSHL r22, r22, r2; MOV m7, #49 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3321 "01011001" // /* MW 9 */ + 3322 "00110001" // /* MW 8 */ + 3323 "10000000" // /* MW 7 */ + 3324 "01101111" // /* MW 6 */ + 3325 "01100001" // /* MW 5 */ + 3326 "00101101" // /* MW 4 */ + 3327 "10110000" // /* MW 3 */ + 3328 "01011010" // /* MW 2 */ + 3329 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 53 +.src_ref 2 "conv2d_bf16_params.h" 507 42 first + 3330 "01100100" // SUB r30, r30, r29; MOV m6, #-63 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3331 "00000101" // /* MW 5 */ + 3332 "00011111" // /* MW 4 */ + 3333 "00111100" // /* MW 3 */ + 3334 "10111010" // /* MW 2 */ + 3335 "11110111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 495 99 first + 3336 "10011000" // SUB r1, r15, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3337 "00010001" // /* MW 3 */ + 3338 "11000010" // /* MW 2 */ + 3339 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 495 96 +.src_ref 2 "conv2d_bf16_params.h" 495 96 +.src_ref 2 "conv2d_bf16_params.h" 610 64 +.src_ref 2 "conv2d_bf16_params.h" 709 96 + 3340 "01100100" // MUL r31, r17, r1; MOV r1, #7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3341 "00011101" // /* MW 5 */ + 3342 "10100000" // /* MW 4 */ + 3343 "11110000" // /* MW 3 */ + 3344 "11000011" // /* MW 2 */ + 3345 "10001111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 507 53 first + 3346 "10011000" // SUB r17, r26, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3347 "00100001" // /* MW 3 */ + 3348 "10100011" // /* MW 2 */ + 3349 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 495 96 first + 3350 "10011000" // LSHL r31, r31, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3351 "00011101" // /* MW 3 */ + 3352 "11111110" // /* MW 2 */ + 3353 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 495 53 +.src_ref 2 "conv2d_bf16_params.h" 506 48 +.src_ref 2 "conv2d_bf16_params.h" 519 42 first + 3354 "00111010" // ST r31, [p2], m5; LSHL r31, r29, r3; MOV m5, #87 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3355 "01011001" // /* MW 9 */ + 3356 "01010111" // /* MW 8 */ + 3357 "10000000" // /* MW 7 */ + 3358 "11101110" // /* MW 6 */ + 3359 "11110001" // /* MW 5 */ + 3360 "00111011" // /* MW 4 */ + 3361 "00110000" // /* MW 3 */ + 3362 "01111110" // /* MW 2 */ + 3363 "01010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 496 68 first +.src_ref 2 "conv2d_bf16_params.h" 504 35 first +.src_ref 2 "conv2d_bf16_params.h" 522 68 + 3364 "10111010" // LDA.u8 r21, [p2], m3; EQ r19, r23, r0; MOV m3, #-78 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3365 "01011000" // /* MW 9 */ + 3366 "10110010" // /* MW 8 */ + 3367 "10000111" // /* MW 7 */ + 3368 "00111101" // /* MW 6 */ + 3369 "00110000" // /* MW 5 */ + 3370 "00101111" // /* MW 4 */ + 3371 "01010000" // /* MW 3 */ + 3372 "01010101" // /* MW 2 */ + 3373 "01001101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 509 50 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3374 "01011100" // ST r19, [sp, #-24]; LSHL r19, r19, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3375 "01111011" // /* MW 5 */ + 3376 "11001100" // /* MW 4 */ + 3377 "10111001" // /* MW 3 */ + 3378 "01001110" // /* MW 2 */ + 3379 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 496 53 first +.src_ref 2 "conv2d_bf16_params.h" 520 19 first +.src_ref 2 "conv2d_bf16_params.h" 529 62 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3380 "10111010" // ST.s8 r21, [p2], m2; OR r22, r31, r22; MOV m2, #246 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3381 "01011000" // /* MW 9 */ + 3382 "11110110" // /* MW 8 */ + 3383 "00000000" // /* MW 7 */ + 3384 "00101101" // /* MW 6 */ + 3385 "01101011" // /* MW 5 */ + 3386 "00111111" // /* MW 4 */ + 3387 "11100000" // /* MW 3 */ + 3388 "01010100" // /* MW 2 */ + 3389 "01001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 497 46 first +.src_ref 2 "conv2d_bf16_params.h" 509 50 first +.src_ref 2 "conv2d_bf16_params.h" 533 44 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3390 "10111010" // LDA.u16 r16, [p2], m1; SEL.EQZ r19, r19, r16, r27; MOV m1, #-176 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3391 "01011000" // /* MW 9 */ + 3392 "01010000" // /* MW 8 */ + 3393 "10000111" // /* MW 7 */ + 3394 "00010000" // /* MW 6 */ + 3395 "00111000" // /* MW 5 */ + 3396 "00100111" // /* MW 4 */ + 3397 "01010000" // /* MW 3 */ + 3398 "01000011" // /* MW 2 */ + 3399 "01000101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 14 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3400 "10011000" // EQ r31, r23, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3401 "01100111" // /* MW 3 */ + 3402 "11111110" // /* MW 2 */ + 3403 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 499 51 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3404 "10011000" // EQ r16, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3405 "01100111" // /* MW 3 */ + 3406 "11100000" // /* MW 2 */ + 3407 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 499 51 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3408 "10011000" // OR r27, r31, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3409 "00000101" // /* MW 3 */ + 3410 "11110111" // /* MW 2 */ + 3411 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 78 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3412 "10011000" // AND r21, r7, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3413 "01010100" // /* MW 3 */ + 3414 "11101011" // /* MW 2 */ + 3415 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 507 53 first +.src_ref 2 "conv2d_bf16_params.h" 511 47 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3416 "01100100" // ASHL r30, r30, r17; MOV r17, #24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3417 "01100001" // /* MW 5 */ + 3418 "10100000" // /* MW 4 */ + 3419 "11011000" // /* MW 3 */ + 3420 "10100011" // /* MW 2 */ + 3421 "11110111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 491 25 first +.src_ref 2 "conv2d_bf16_params.h" 507 34 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3422 "00111010" // ST r16, [sp, #-32]; LSHL r18, r15, r18; ADD.NC r30, r30, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3423 "01001001" // /* MW 9 */ + 3424 "10000000" // /* MW 8 */ + 3425 "11001111" // /* MW 7 */ + 3426 "01101111" // /* MW 6 */ + 3427 "00101001" // /* MW 5 */ + 3428 "00011111" // /* MW 4 */ + 3429 "10110000" // /* MW 3 */ + 3430 "01000010" // /* MW 2 */ + 3431 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 500 53 first +.src_ref 2 "conv2d_bf16_params.h" 511 47 first + 3432 "01011100" // ST r26, [p2], #4; LSHL r17, r30, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3433 "00111011" // /* MW 5 */ + 3434 "01000110" // /* MW 4 */ + 3435 "00111111" // /* MW 3 */ + 3436 "11101010" // /* MW 2 */ + 3437 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 500 53 +.src_ref 2 "conv2d_bf16_params.h" 534 44 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 3438 "00000010" // ST r26, [p2], m4; MOV m4, #168 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3439 "01010000" // /* MW 7 */ + 3440 "10101000" // /* MW 6 */ + 3441 "00000000" // /* MW 5 */ + 3442 "00000010" // /* MW 4 */ + 3443 "00110000" // /* MW 3 */ + 3444 "01101010" // /* MW 2 */ + 3445 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 77 first +.src_ref 2 "conv2d_bf16_params.h" 509 19 first +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 3446 "01110110" // LDA.u8 r18, [p2], m7; ST r31, [sp, #-28]; OR r27, r19, r0; MOV el0, r27 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3447 "01111000" // /* MW 11 */ + 3448 "11001110" // /* MW 10 */ + 3449 "00001101" // /* MW 9 */ + 3450 "00101100" // /* MW 8 */ + 3451 "10110000" // /* MW 7 */ + 3452 "10100111" // /* MW 6 */ + 3453 "11110101" // /* MW 5 */ + 3454 "11100111" // /* MW 4 */ + 3455 "01010111" // /* MW 3 */ + 3456 "01001001" // /* MW 2 */ + 3457 "01011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 19 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3458 "10011000" // OR r17, r27, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3459 "00010101" // /* MW 3 */ + 3460 "11100011" // /* MW 2 */ + 3461 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 506 73 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3462 "10011000" // SUB r27, r26, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3463 "10000001" // /* MW 3 */ + 3464 "10110111" // /* MW 2 */ + 3465 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 527 47 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3466 "00011000" // EXTEND.u8 r24, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3467 "10010000" // /* MW 3 */ + 3468 "10110000" // /* MW 2 */ + 3469 "00010100" // /* MW 1 */ + 3470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3471 "00000000" // /* MW 1 */ + 3472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3473 "00000000" // /* MW 1 */ + 3474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3475 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 57 first + 3476 "10011000" // SUB r18, r15, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3477 "00100001" // /* MW 3 */ + 3478 "11100101" // /* MW 2 */ + 3479 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 53 + 3480 "10011000" // ST r18, [p2], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3481 "01010001" // /* MW 3 */ + 3482 "11001010" // /* MW 2 */ + 3483 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 506 48 first + 3484 "10011000" // LDA.u8 r18, [p2], m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3485 "01001010" // /* MW 3 */ + 3486 "10101010" // /* MW 2 */ + 3487 "00000010" // /* MW 1 */ + 3488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3489 "00000000" // /* MW 1 */ + 3490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3491 "00000000" // /* MW 1 */ + 3492 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3493 "00000000" // /* MW 1 */ + 3494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3495 "00000000" // /* MW 1 */ + 3496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3497 "00000000" // /* MW 1 */ + 3498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3499 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 506 62 + 3500 "10011000" // SUB r18, r18, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3501 "11100001" // /* MW 3 */ + 3502 "10100100" // /* MW 2 */ + 3503 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 506 73 + 3504 "10011000" // ASHL r18, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3505 "10111110" // /* MW 3 */ + 3506 "10100101" // /* MW 2 */ + 3507 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 first + 3508 "10011000" // LSHL r18, r18, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3509 "00101101" // /* MW 3 */ + 3510 "10100100" // /* MW 2 */ + 3511 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 + 3512 "01000100" // MOVXM r27, #65536 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3513 "00000000" // /* MW 5 */ + 3514 "10100000" // /* MW 4 */ + 3515 "00001101" // /* MW 3 */ + 3516 "00000001" // /* MW 2 */ + 3517 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 + 3518 "10011000" // ADD r18, r27, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3519 "00100000" // /* MW 3 */ + 3520 "11100101" // /* MW 2 */ + 3521 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 + 3522 "01000100" // MOVXM r27, #16711680 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3523 "00000000" // /* MW 5 */ + 3524 "10100000" // /* MW 4 */ + 3525 "00001101" // /* MW 3 */ + 3526 "11111111" // /* MW 2 */ + 3527 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 +.src_ref 2 "conv2d_bf16_params.h" 539 14 +.src_ref 2 "conv2d_bf16_params.h" 642 99 + 3528 "01100100" // AND r27, r27, r18; MOV r18, #-16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3529 "11000001" // /* MW 5 */ + 3530 "00111111" // /* MW 4 */ + 3531 "10011001" // /* MW 3 */ + 3532 "11100100" // /* MW 2 */ + 3533 "11011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 511 19 first +.src_ref 2 "conv2d_bf16_params.h" 524 122 +.src_ref 2 "conv2d_bf16_params.h" 539 14 + 3534 "01100100" // OR r27, r27, r17; MOV r17, #-8 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3535 "11100001" // /* MW 5 */ + 3536 "10111111" // /* MW 4 */ + 3537 "10111000" // /* MW 3 */ + 3538 "11100010" // /* MW 2 */ + 3539 "11011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 512 64 first +.src_ref 2 "conv2d_bf16_params.h" 524 122 first + 3540 "01011100" // ST r27, [p2], #4; LSHL r19, r19, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3541 "00111011" // /* MW 5 */ + 3542 "11001110" // /* MW 4 */ + 3543 "00111001" // /* MW 3 */ + 3544 "11101110" // /* MW 2 */ + 3545 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 524 122 + 3546 "10011000" // SUB r26, r26, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3547 "00110001" // /* MW 3 */ + 3548 "10110101" // /* MW 2 */ + 3549 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 524 122 + 3550 "10011000" // LSHL r20, r20, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3551 "10101101" // /* MW 3 */ + 3552 "00101001" // /* MW 2 */ + 3553 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 520 19 first + 3554 "10011000" // OR r26, r14, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3555 "01100101" // /* MW 3 */ + 3556 "10110101" // /* MW 2 */ + 3557 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 511 36 first +.src_ref 2 "conv2d_bf16_params.h" 522 68 first + 3558 "01011100" // ST r26, [p2], m3; EXTEND.u8 r26, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3559 "00100000" // /* MW 5 */ + 3560 "01101001" // /* MW 4 */ + 3561 "00111111" // /* MW 3 */ + 3562 "01101010" // /* MW 2 */ + 3563 "01001101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 524 65 first +.src_ref 2 "conv2d_bf16_params.h" 529 62 first +.src_ref 2 "conv2d_bf16_params.h" 539 14 first + 3564 "10111010" // LDA.u8 r25, [p2], m2; LSHL r20, r27, r18; ADD.NC r30, r26, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3565 "10101000" // /* MW 9 */ + 3566 "10101000" // /* MW 8 */ + 3567 "11001110" // /* MW 7 */ + 3568 "01101111" // /* MW 6 */ + 3569 "01001001" // /* MW 5 */ + 3570 "00110111" // /* MW 4 */ + 3571 "01010000" // /* MW 3 */ + 3572 "01100101" // /* MW 2 */ + 3573 "01001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 534 93 +.src_ref 2 "conv2d_bf16_params.h" 539 14 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 3574 "01100100" // LSHL r22, r22, r17; MOV r17, #254 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3575 "11111001" // /* MW 5 */ + 3576 "10100011" // /* MW 4 */ + 3577 "10111000" // /* MW 3 */ + 3578 "10100011" // /* MW 2 */ + 3579 "10110101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 527 45 first +.src_ref 2 "conv2d_bf16_params.h" 533 44 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 3580 "00101100" // ST.s8 r25, [p2], m1; MUL r26, r26, r24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3581 "00011111" // /* MW 5 */ + 3582 "01101011" // /* MW 4 */ + 3583 "11101101" // /* MW 3 */ + 3584 "01100100" // /* MW 2 */ + 3585 "01000101" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3587 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3589 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3591 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3593 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 48 first +.src_ref 2 "conv2d_bf16_params.h" 533 46 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3594 "10100100" // LSHL r25, r16, r15; ADD.NC r27, r21, r25 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3595 "11001010" // /* MW 5 */ + 3596 "10110101" // /* MW 4 */ + 3597 "10111101" // /* MW 3 */ + 3598 "01011111" // /* MW 2 */ + 3599 "10000110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 14 first + 3600 "10000100" // JNZ r31, #3728 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3728 delay_slots=5 */ + 3601 "00000001" // /* MW 5 */ + 3602 "01000000" // /* MW 4 */ + 3603 "01001000" // /* MW 3 */ + 3604 "00000111" // /* MW 2 */ + 3605 "11111000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 76 first +.src_ref 2 "conv2d_bf16_params.h" 529 122 +.delay_slot + 3606 "10100100" // ADD r21, r19, #3; ADD.NC r27, r27, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3607 "11110010" // /* MW 5 */ + 3608 "10111011" // /* MW 4 */ + 3609 "11101101" // /* MW 3 */ + 3610 "01000001" // /* MW 2 */ + 3611 "10011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 122 +.delay_slot + 3612 "10011000" // LSHL r21, r27, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3613 "01011101" // /* MW 3 */ + 3614 "11101011" // /* MW 2 */ + 3615 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 534 93 first +.delay_slot + 3616 "10011000" // AND r17, r25, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3617 "00010100" // /* MW 3 */ + 3618 "01100011" // /* MW 2 */ + 3619 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 534 44 +.src_ref 2 "conv2d_bf16_params.h" 539 139 first +.src_ref 2 "conv2d_bf16_params.h" 555 59 +.src_ref 2 "conv2d_bf16_params.h" 559 59 +.src_ref 2 "conv2d_bf16_params.h" 700 17 +.delay_slot + 3620 "00111010" // ST r17, [p2], m4; EQ r27, r6, r28; MOV r17, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3621 "01011001" // /* MW 9 */ + 3622 "00000001" // /* MW 8 */ + 3623 "00101000" // /* MW 7 */ + 3624 "00111110" // /* MW 6 */ + 3625 "10111110" // /* MW 5 */ + 3626 "00001101" // /* MW 4 */ + 3627 "00110000" // /* MW 3 */ + 3628 "01000110" // /* MW 2 */ + 3629 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 669 63 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.delay_slot + 3630 "11111000" // MOV el1, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3631 "10011100" // /* MW 3 */ + 3632 "10011011" // /* MW 2 */ + 3633 "00011000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 14 + 3634 "00011000" // LDA r28, [sp, #-32] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3635 "10010001" // /* MW 3 */ + 3636 "11100011" // /* MW 2 */ + 3637 "00000111" // /* MW 1 */ + 3638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3639 "00000000" // /* MW 1 */ + 3640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3641 "00000000" // /* MW 1 */ + 3642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3643 "00000000" // /* MW 1 */ + 3644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3645 "00000000" // /* MW 1 */ + 3646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3647 "00000000" // /* MW 1 */ + 3648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3649 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 14 + 3650 "10000100" // JNZ r28, #3728 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3728 delay_slots=5 */ + 3651 "00000001" // /* MW 5 */ + 3652 "01000000" // /* MW 4 */ + 3653 "01001000" // /* MW 3 */ + 3654 "00000111" // /* MW 2 */ + 3655 "11100000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3657 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3659 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3661 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3663 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3665 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 115 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 539 162 + 3666 "10111010" // MOVA r28, #5; MOVX r17, #4; MOV r25, #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3667 "01011000" // /* MW 9 */ + 3668 "01000000" // /* MW 8 */ + 3669 "00101000" // /* MW 7 */ + 3670 "10001011" // /* MW 6 */ + 3671 "00010000" // /* MW 5 */ + 3672 "00000001" // /* MW 4 */ + 3673 "00000000" // /* MW 3 */ + 3674 "10111100" // /* MW 2 */ + 3675 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 139 + 3676 "00011000" // SEL.EQZ r31, r17, r13, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3677 "11010010" // /* MW 3 */ + 3678 "01111110" // /* MW 2 */ + 3679 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 162 + 3680 "10011000" // EQ r27, r25, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3681 "01100111" // /* MW 3 */ + 3682 "01110110" // /* MW 2 */ + 3683 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 135 +.src_ref 2 "conv2d_bf16_params.h" 539 139 + 3684 "01100100" // SEL.EQZ r28, r31, r28, r27; MOV r31, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3685 "00000001" // /* MW 5 */ + 3686 "10100000" // /* MW 4 */ + 3687 "01001111" // /* MW 3 */ + 3688 "00111000" // /* MW 2 */ + 3689 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 46 + 3690 "00011000" // EXTEND.s8 r25, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3691 "01010000" // /* MW 3 */ + 3692 "00110010" // /* MW 2 */ + 3693 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 44 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 3694 "10011000" // MUL r30, r25, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3695 "11101111" // /* MW 3 */ + 3696 "01111101" // /* MW 2 */ + 3697 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 115 +.src_ref 2 "conv2d_bf16_params.h" 669 63 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 3698 "11100100" // LT r27, r25, r17; MOV r27, el1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3699 "00111001" // /* MW 5 */ + 3700 "11000100" // /* MW 4 */ + 3701 "01011101" // /* MW 3 */ + 3702 "11100011" // /* MW 2 */ + 3703 "11001110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 82 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3704 "00011000" // SEL.EQZ r17, r15, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3705 "10000010" // /* MW 3 */ + 3706 "11100011" // /* MW 2 */ + 3707 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 79 + 3708 "10011000" // MUL r17, r17, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3709 "11101111" // /* MW 3 */ + 3710 "01100011" // /* MW 2 */ + 3711 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 135 + 3712 "10011000" // SUB r28, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3713 "11000001" // /* MW 3 */ + 3714 "11111001" // /* MW 2 */ + 3715 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 135 + 3716 "10011000" // ASHL r17, r17, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3717 "11001110" // /* MW 3 */ + 3718 "01100011" // /* MW 2 */ + 3719 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 555 55 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 3720 "00100010" // EXTEND.u8 r17, r17; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3721 "00011100" // /* MW 7 */ + 3722 "00000000" // /* MW 6 */ + 3723 "00000000" // /* MW 5 */ + 3724 "10000001" // /* MW 4 */ + 3725 "00010100" // /* MW 3 */ + 3726 "00100011" // /* MW 2 */ + 3727 "00000000" // /* MW 1 */ +.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_816 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 669 63 +.src_ref 2 "conv2d_bf16_params.h" 669 63 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 3728 "10111010" // MOVA r25, #0; MOVX r28, #-1; MOV r27, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3729 "01111000" // /* MW 9 */ + 3730 "00001110" // /* MW 8 */ + 3731 "01110000" // /* MW 7 */ + 3732 "11101011" // /* MW 6 */ + 3733 "11000111" // /* MW 5 */ + 3734 "00111111" // /* MW 4 */ + 3735 "00000000" // /* MW 3 */ + 3736 "00011001" // /* MW 2 */ + 3737 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 669 63 first +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3738 "00011000" // SEL.EQZ r31, r25, r28, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3739 "11000010" // /* MW 3 */ + 3740 "01111111" // /* MW 2 */ + 3741 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 497 34 first +.src_ref 2 "conv2d_bf16_params.h" 641 32 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 3742 "10111010" // LDA r27, [sp, #-24]; EXTEND.u8 r16, r16; ADD.NC r26, r29, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3743 "10101000" // /* MW 9 */ + 3744 "01110100" // /* MW 8 */ + 3745 "01001111" // /* MW 7 */ + 3746 "10000011" // /* MW 6 */ + 3747 "00000100" // /* MW 5 */ + 3748 "00100001" // /* MW 4 */ + 3749 "00100000" // /* MW 3 */ + 3750 "01101110" // /* MW 2 */ + 3751 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 559 61 first +.src_ref 2 "conv2d_bf16_params.h" 640 16 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3752 "10111010" // MOVA r30, #72; EXTEND.u8 r20, r20; MOV r29, #9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3753 "01011000" // /* MW 9 */ + 3754 "00001001" // /* MW 8 */ + 3755 "10101000" // /* MW 7 */ + 3756 "10000011" // /* MW 6 */ + 3757 "01000100" // /* MW 5 */ + 3758 "00101001" // /* MW 4 */ + 3759 "00000000" // /* MW 3 */ + 3760 "00011110" // /* MW 2 */ + 3761 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 25 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3762 "00011000" // SEL.EQZ r25, r29, r30, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3763 "11100010" // /* MW 3 */ + 3764 "01110011" // /* MW 2 */ + 3765 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 578 47 first + 3766 "10011000" // NE r28, r23, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3767 "10001000" // /* MW 3 */ + 3768 "11111001" // /* MW 2 */ + 3769 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 640 16 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 3770 "10011000" // LSHL r29, r29, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3771 "00111101" // /* MW 3 */ + 3772 "01111011" // /* MW 2 */ + 3773 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 557 34 +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.aggressive_scheduled_block_id 6 +.noswbrkpt + 3774 "10111010" // LDA r23, [sp, #-20]; MOVXM r24, #1032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3775 "00010000" // /* MW 9 */ + 3776 "00000100" // /* MW 8 */ + 3777 "00001010" // /* MW 7 */ + 3778 "00000011" // /* MW 6 */ + 3779 "00000000" // /* MW 5 */ + 3780 "00000000" // /* MW 4 */ + 3781 "00100000" // /* MW 3 */ + 3782 "11011110" // /* MW 2 */ + 3783 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 641 44 first +.src_ref 2 "conv2d_bf16_params.h" 642 45 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 3784 "00100100" // LSHL r19, r25, r19; ADD.NC r30, r26, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3785 "11111111" // /* MW 5 */ + 3786 "00111010" // /* MW 4 */ + 3787 "10111111" // /* MW 3 */ + 3788 "11100111" // /* MW 2 */ + 3789 "11001100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 20 +.src_ref 2 "conv2d_bf16_params.h" 642 87 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 3790 "00011000" // MAC r7, r7, r19, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3791 "11100110" // /* MW 3 */ + 3792 "11001111" // /* MW 2 */ + 3793 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 55 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 3794 "01100100" // EXTEND.u8 r19, r22; MOV r23, #522 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3795 "00101001" // /* MW 5 */ + 3796 "10101000" // /* MW 4 */ + 3797 "00001011" // /* MW 3 */ + 3798 "11010010" // /* MW 2 */ + 3799 "10110100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 540 38 +.src_ref 2 "conv2d_bf16_params.h" 645 41 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3800 "01100100" // SEL.EQZ r22, r23, r24, r27; MOV r26, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3801 "00000001" // /* MW 5 */ + 3802 "00100001" // /* MW 4 */ + 3803 "01001101" // /* MW 3 */ + 3804 "10110000" // /* MW 2 */ + 3805 "10111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 540 38 first +.src_ref 2 "conv2d_bf16_params.h" 557 34 + 3806 "11100100" // NE r6, r6, r26; MOV r27, eh0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3807 "00111001" // /* MW 5 */ + 3808 "11000010" // /* MW 4 */ + 3809 "00011101" // /* MW 3 */ + 3810 "10110101" // /* MW 2 */ + 3811 "00110001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 99 first + 3812 "10011000" // AND r7, r7, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3813 "00100100" // /* MW 3 */ + 3814 "11001111" // /* MW 2 */ + 3815 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 557 34 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 + 3816 "11100100" // SEL.EQZ r23, r23, r15, r27; MOV r27, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3817 "01000001" // /* MW 5 */ + 3818 "10100110" // /* MW 4 */ + 3819 "01001101" // /* MW 3 */ + 3820 "11011110" // /* MW 2 */ + 3821 "10111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 119 +.src_ref 2 "conv2d_bf16_params.h" 655 23 first + 3822 "01100100" // SEL.EQZ r4, r5, r4, r27; MOV r18, #31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3823 "01111101" // /* MW 5 */ + 3824 "00100000" // /* MW 4 */ + 3825 "01001001" // /* MW 3 */ + 3826 "00001000" // /* MW 2 */ + 3827 "00101001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 119 first + 3828 "10011000" // AND r23, r23, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3829 "00100100" // /* MW 3 */ + 3830 "11101111" // /* MW 2 */ + 3831 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 540 15 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 + 3832 "10111010" // MOVA r30, #-288; LSHL r4, r16, r4; MOV r18, #-144 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3833 "01011000" // /* MW 9 */ + 3834 "01110000" // /* MW 8 */ + 3835 "01001111" // /* MW 7 */ + 3836 "01101110" // /* MW 6 */ + 3837 "01000010" // /* MW 5 */ + 3838 "00100000" // /* MW 4 */ + 3839 "00000000" // /* MW 3 */ + 3840 "00011110" // /* MW 2 */ + 3841 "11011100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first + 3842 "00011000" // SEL.EQZ r30, r30, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3843 "00100010" // /* MW 3 */ + 3844 "10111101" // /* MW 2 */ + 3845 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 85 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 + 3846 "10111010" // MOVA r5, #144; MUL r26, r23, r19; MOV r16, #288 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3847 "01011000" // /* MW 9 */ + 3848 "00100000" // /* MW 8 */ + 3849 "00001001" // /* MW 7 */ + 3850 "11111110" // /* MW 6 */ + 3851 "10101001" // /* MW 5 */ + 3852 "00101111" // /* MW 4 */ + 3853 "00000000" // /* MW 3 */ + 3854 "00000101" // /* MW 2 */ + 3855 "00010010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first + 3856 "00011000" // SEL.EQZ r16, r16, r5, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3857 "01010010" // /* MW 3 */ + 3858 "00100000" // /* MW 2 */ + 3859 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 126 21 first +.src_ref 2 "conv2d_bf16_params.h" 559 59 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 3860 "10100100" // MUL r24, r17, r4; ADD.NC r27, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3861 "11110010" // /* MW 5 */ + 3862 "10111101" // /* MW 4 */ + 3863 "11111101" // /* MW 3 */ + 3864 "00001001" // /* MW 2 */ + 3865 "10001110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 669 41 first +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.aggressive_scheduled_block_id 7 +.noswbrkpt + 3866 "11100100" // LSHL r16, r16, r31; MOV r27, el1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3867 "00111001" // /* MW 5 */ + 3868 "11000100" // /* MW 4 */ + 3869 "10111101" // /* MW 3 */ + 3870 "00111111" // /* MW 2 */ + 3871 "10000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 117 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3872 "01011100" // ST r27, [sp, #-36]; MUL r26, r14, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3873 "01011111" // /* MW 5 */ + 3874 "01101011" // /* MW 4 */ + 3875 "10110111" // /* MW 3 */ + 3876 "11101110" // /* MW 2 */ + 3877 "11111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 700 34 first + 3878 "00011000" // SEL.EQZ r2, r2, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3879 "00110010" // /* MW 3 */ + 3880 "10000100" // /* MW 2 */ + 3881 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 578 52 first + 3882 "10011000" // LTU r31, r13, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3883 "00001100" // /* MW 3 */ + 3884 "01111110" // /* MW 2 */ + 3885 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 559 92 first + 3886 "10011000" // MUL r24, r20, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3887 "10001111" // /* MW 3 */ + 3888 "00110001" // /* MW 2 */ + 3889 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 578 36 first + 3890 "10011000" // OR r27, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3891 "11000101" // /* MW 3 */ + 3892 "11110111" // /* MW 2 */ + 3893 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 610 64 first +.src_ref 2 "conv2d_bf16_params.h" 611 47 +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 629 82 + 3894 "01110110" // MOVA r3, #128; ST r20, [sp, #-20]; LSHL r28, r27, r1; MOV r20, #256 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3895 "01011000" // /* MW 11 */ + 3896 "00000000" // /* MW 10 */ + 3897 "10001001" // /* MW 9 */ + 3898 "11101110" // /* MW 8 */ + 3899 "11000000" // /* MW 7 */ + 3900 "10110111" // /* MW 6 */ + 3901 "10010101" // /* MW 5 */ + 3902 "11101110" // /* MW 4 */ + 3903 "00000111" // /* MW 3 */ + 3904 "00000011" // /* MW 2 */ + 3905 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 621 156 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.src_ref 2 "conv2d_bf16_params.h" 649 41 + 3906 "11100100" // SEL.EQZ r20, r3, r20, r27; MOV eh0, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3907 "00111001" // /* MW 5 */ + 3908 "10110111" // /* MW 4 */ + 3909 "01000000" // /* MW 3 */ + 3910 "00101000" // /* MW 2 */ + 3911 "00011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 645 41 + 3912 "01000100" // MOVXM r31, #1542 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3913 "00001100" // /* MW 5 */ + 3914 "10101100" // /* MW 4 */ + 3915 "00001111" // /* MW 3 */ + 3916 "00000000" // /* MW 2 */ + 3917 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 554 60 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 first + 3918 "00111010" // ST r4, [sp, #-24]; EQ r27, r15, r0; ADD.NC r4, r4, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3919 "11001001" // /* MW 9 */ + 3920 "00111111" // /* MW 8 */ + 3921 "10001001" // /* MW 7 */ + 3922 "00111100" // /* MW 6 */ + 3923 "10110000" // /* MW 5 */ + 3924 "00011111" // /* MW 4 */ + 3925 "10110000" // /* MW 3 */ + 3926 "00010010" // /* MW 2 */ + 3927 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 554 53 +.src_ref 2 "conv2d_bf16_params.h" 555 53 +.src_ref 2 "conv2d_bf16_params.h" 555 59 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 + 3928 "01110110" // MOVA m3, #-148; ST r4, [p2], #4; SEL.EQZ r31, r22, r31, r27; ADD.NC r22, r17, #-1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3929 "11001000" // /* MW 11 */ + 3930 "01111111" // /* MW 10 */ + 3931 "11001100" // /* MW 9 */ + 3932 "10010010" // /* MW 8 */ + 3933 "11111111" // /* MW 7 */ + 3934 "10101101" // /* MW 6 */ + 3935 "10010001" // /* MW 5 */ + 3936 "00011100" // /* MW 4 */ + 3937 "10000010" // /* MW 3 */ + 3938 "10001100" // /* MW 2 */ + 3939 "11101101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 555 53 +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 621 240 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 + 3940 "00111010" // ST r22, [p2], m3; LSHL r21, r21, r15; MOV r27, eh0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3941 "01111001" // /* MW 9 */ + 3942 "10001110" // /* MW 8 */ + 3943 "01110000" // /* MW 7 */ + 3944 "11101111" // /* MW 6 */ + 3945 "01010111" // /* MW 5 */ + 3946 "00101011" // /* MW 4 */ + 3947 "00110000" // /* MW 3 */ + 3948 "01011010" // /* MW 2 */ + 3949 "01001101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 53 first +.src_ref 2 "conv2d_bf16_params.h" 559 53 +.src_ref 2 "conv2d_bf16_params.h" 621 140 +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 645 41 + 3950 "01110110" // MOVA r25, #22; ST r26, [p2], #4; SUB r20, r20, r28; MOV m4, #88 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3951 "01011000" // /* MW 11 */ + 3952 "01011000" // /* MW 10 */ + 3953 "00000000" // /* MW 9 */ + 3954 "00001110" // /* MW 8 */ + 3955 "01001110" // /* MW 7 */ + 3956 "10101001" // /* MW 6 */ + 3957 "01010001" // /* MW 5 */ + 3958 "00011111" // /* MW 4 */ + 3959 "00000010" // /* MW 3 */ + 3960 "11011001" // /* MW 2 */ + 3961 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 559 53 first +.src_ref 2 "conv2d_bf16_params.h" 621 156 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 first +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 3962 "01011100" // ST r24, [p2], m4; SEL.EQZ r24, r31, r25, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3963 "00100100" // /* MW 5 */ + 3964 "11100011" // /* MW 4 */ + 3965 "00111111" // /* MW 3 */ + 3966 "01100010" // /* MW 2 */ + 3967 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 610 47 first +.src_ref 2 "conv2d_bf16_params.h" 621 222 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 +.aggressive_scheduled_block_id 8 +.noswbrkpt + 3968 "01110110" // LDA r27, [sp, #-32]; ST r28, [p2], #-8; SUB r28, r21, r28; MOV r27, r6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3969 "01111000" // /* MW 11 */ + 3970 "10010000" // /* MW 10 */ + 3971 "01101001" // /* MW 9 */ + 3972 "00001111" // /* MW 8 */ + 3973 "11001110" // /* MW 7 */ + 3974 "10101011" // /* MW 6 */ + 3975 "10010001" // /* MW 5 */ + 3976 "11101111" // /* MW 4 */ + 3977 "00100010" // /* MW 3 */ + 3978 "01101110" // /* MW 2 */ + 3979 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 649 41 +.src_ref 2 "conv2d_bf16_params.h" 655 23 first +.src_ref 2 "conv2d_bf16_params.h" 661 61 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3980 "10111010" // MOVA r19, #279; SEL.EQZ r28, r20, r28, r27; ADD.NC r20, r19, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3981 "11001000" // /* MW 9 */ + 3982 "11111111" // /* MW 8 */ + 3983 "10001100" // /* MW 7 */ + 3984 "00010010" // /* MW 6 */ + 3985 "11001110" // /* MW 5 */ + 3986 "00101001" // /* MW 4 */ + 3987 "00000000" // /* MW 3 */ + 3988 "11110011" // /* MW 2 */ + 3989 "00100010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 127 19 first +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 649 41 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 +.src_ref 2 "conv2d_bf16_params.h" 710 60 +.src_ref 2 "conv2d_bf16_params.h" 710 65 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3990 "10111010" // MOVA r29, #-72; MSC r30, r30, r29, r20; MOV r27, eh0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3991 "01111000" // /* MW 9 */ + 3992 "10001110" // /* MW 8 */ + 3993 "01110000" // /* MW 7 */ + 3994 "01110011" // /* MW 6 */ + 3995 "11101010" // /* MW 5 */ + 3996 "00111011" // /* MW 4 */ + 3997 "00000000" // /* MW 3 */ + 3998 "00011101" // /* MW 2 */ + 3999 "11110111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first +.src_ref 2 "conv2d_bf16_params.h" 679 23 first +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 4000 "00101100" // LDA r27, [sp, #-28]; SEL.EQZ r18, r29, r18, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4001 "01000100" // /* MW 5 */ + 4002 "11001010" // /* MW 4 */ + 4003 "00101110" // /* MW 3 */ + 4004 "11101110" // /* MW 2 */ + 4005 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 621 156 first +.src_ref 2 "conv2d_bf16_params.h" 649 41 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 4006 "10111010" // MOVA r31, #32; SEL.EQZ r19, r31, r19, r27; MOV r27, r6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4007 "01111000" // /* MW 9 */ + 4008 "10010000" // /* MW 8 */ + 4009 "01101001" // /* MW 7 */ + 4010 "10010011" // /* MW 6 */ + 4011 "00111001" // /* MW 5 */ + 4012 "00111111" // /* MW 4 */ + 4013 "00000000" // /* MW 3 */ + 4014 "00011111" // /* MW 2 */ + 4015 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first +.src_ref 2 "conv2d_bf16_params.h" 700 34 first +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 4016 "00011000" // SEL.EQZ r2, r31, r2, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4017 "00100010" // /* MW 3 */ + 4018 "11000100" // /* MW 2 */ + 4019 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 629 82 first +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 4020 "10011000" // SUB r21, r3, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4021 "01010001" // /* MW 3 */ + 4022 "11101011" // /* MW 2 */ + 4023 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 611 47 first +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 4024 "00111010" // ST r3, [p2], #12; SEL.EQZ r2, r2, r15, r27; MOV r3, #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4025 "01011001" // /* MW 9 */ + 4026 "11000000" // /* MW 8 */ + 4027 "01101111" // /* MW 7 */ + 4028 "10010000" // /* MW 6 */ + 4029 "00100111" // /* MW 5 */ + 4030 "00000100" // /* MW 4 */ + 4031 "00110000" // /* MW 3 */ + 4032 "10001110" // /* MW 2 */ + 4033 "01000111" // /* MW 1 */ +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4034 "00011000" // SEL.EQZ r28, r28, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4035 "00110010" // /* MW 3 */ + 4036 "00111000" // /* MW 2 */ + 4037 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 643 22 first +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id first + 4038 "10011000" // MUL r31, r23, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4039 "01111111" // /* MW 3 */ + 4040 "11111110" // /* MW 2 */ + 4041 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.aggressive_scheduled_block_id 9 +.noswbrkpt + 4042 "00101100" // LDA r17, [sp, #-36]; SEL.EQZ r3, r28, r3, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4043 "01100100" // /* MW 5 */ + 4044 "00001100" // /* MW 4 */ + 4045 "00101110" // /* MW 3 */ + 4046 "11000110" // /* MW 2 */ + 4047 "11111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 621 47 first +.src_ref 2 "conv2d_bf16_params.h" 629 45 +.src_ref 2 "conv2d_bf16_params.h" 684 30 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 4048 "00111010" // ST r3, [p2], #-8; MUL r18, r26, r18; MOV m1, #40 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4049 "01011001" // /* MW 9 */ + 4050 "00101000" // /* MW 8 */ + 4051 "10000000" // /* MW 7 */ + 4052 "01111100" // /* MW 6 */ + 4053 "00101001" // /* MW 5 */ + 4054 "00110101" // /* MW 4 */ + 4055 "00110000" // /* MW 3 */ + 4056 "10001110" // /* MW 2 */ + 4057 "01011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 629 45 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 4058 "01011100" // ST r21, [p2], m1; SEL.EQZ r3, r2, r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4059 "11100100" // /* MW 5 */ + 4060 "00001101" // /* MW 4 */ + 4061 "00110001" // /* MW 3 */ + 4062 "01010110" // /* MW 2 */ + 4063 "01000101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 644 22 +.src_ref 2 "conv2d_bf16_params.h" 700 17 first +.src_ref 2 "conv2d_bf16_params.h" 705 50 +.src_ref 2 "conv2d_bf16_params.h" 705 61 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 4064 "10111010" // LDA r0, [sp, #-16]; MUL r3, r3, r17; ADD.NC r21, r7, r30 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4065 "10101000" // /* MW 9 */ + 4066 "11111100" // /* MW 8 */ + 4067 "10101001" // /* MW 7 */ + 4068 "11111110" // /* MW 6 */ + 4069 "00111000" // /* MW 5 */ + 4070 "00000110" // /* MW 4 */ + 4071 "00100000" // /* MW 3 */ + 4072 "00000010" // /* MW 2 */ + 4073 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 645 38 first +.src_ref 2 "conv2d_bf16_params.h" 700 111 +.src_ref 2 "conv2d_bf16_params.h" 700 149 +.src_ref 2 "conv2d_bf16_params.h" 705 45 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 4074 "01111010" // LDA r17, [sp, #-20]; ST r24, [p2], #4; MAC r3, r3, r20, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4075 "00000110" // /* MW 9 */ + 4076 "00000110" // /* MW 8 */ + 4077 "00000101" // /* MW 7 */ + 4078 "10000000" // /* MW 6 */ + 4079 "00010001" // /* MW 5 */ + 4080 "00011111" // /* MW 4 */ + 4081 "00100010" // /* MW 3 */ + 4082 "11000110" // /* MW 2 */ + 4083 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 644 14 +.src_ref 2 "conv2d_bf16_params.h" 649 38 first +.src_ref 2 "conv2d_bf16_params.h" 674 24 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 4084 "00111010" // ST r19, [p2], #28; MOVXM r19, #65520 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4085 "00010001" // /* MW 9 */ + 4086 "11111000" // /* MW 8 */ + 4087 "01101111" // /* MW 7 */ + 4088 "00111110" // /* MW 6 */ + 4089 "00000000" // /* MW 5 */ + 4090 "00000000" // /* MW 4 */ + 4091 "00110000" // /* MW 3 */ + 4092 "11001110" // /* MW 2 */ + 4093 "01001111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 644 14 first +.src_ref 2 "conv2d_bf16_params.h" 662 61 +.src_ref 2 "conv2d_bf16_params.h" 664 38 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 4094 "00111010" // ST r20, [p2], #4; AND r20, r31, r19; ADD.NC r2, r14, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4095 "11001001" // /* MW 9 */ + 4096 "10111111" // /* MW 8 */ + 4097 "01001011" // /* MW 7 */ + 4098 "10100100" // /* MW 6 */ + 4099 "01001001" // /* MW 5 */ + 4100 "00111111" // /* MW 4 */ + 4101 "00110000" // /* MW 3 */ + 4102 "11010010" // /* MW 2 */ + 4103 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 19 first +.src_ref 2 "conv2d_bf16_params.h" 663 22 first +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4104 "01011100" // ST r17, [p2], #4; MSC r21, r21, r2, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4105 "10011100" // /* MW 5 */ + 4106 "01010110" // /* MW 4 */ + 4107 "00110001" // /* MW 3 */ + 4108 "11000110" // /* MW 2 */ + 4109 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 126 21 first +.src_ref 2 "conv2d_bf16_params.h" 664 38 first + 4110 "01011100" // ST r2, [p2], #4; ADD r30, r30, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4111 "10000001" // /* MW 5 */ + 4112 "01111010" // /* MW 4 */ + 4113 "00111111" // /* MW 3 */ + 4114 "10001010" // /* MW 2 */ + 4115 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 126 21 +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id first + 4116 "01011100" // ST r30, [p2], #4; SUB r28, r16, r31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4117 "11100011" // /* MW 5 */ + 4118 "01110011" // /* MW 4 */ + 4119 "00111000" // /* MW 3 */ + 4120 "11111010" // /* MW 2 */ + 4121 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 127 19 first +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.src_ref 2 "conv2d_bf16_params.h" 675 38 +.src_ref 2 "conv2d_bf16_params.h" 696 37 +.aggressive_scheduled_block_id 10 +.noswbrkpt + 4122 "00111010" // ST r21, [p2], #4; MAC r31, r31, r22, r16; MOV dc0, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4123 "01011001" // /* MW 9 */ + 4124 "00000000" // /* MW 8 */ + 4125 "01100000" // /* MW 7 */ + 4126 "00110000" // /* MW 6 */ + 4127 "11111000" // /* MW 5 */ + 4128 "00101101" // /* MW 4 */ + 4129 "00110000" // /* MW 3 */ + 4130 "11010110" // /* MW 2 */ + 4131 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 644 22 first +.src_ref 2 "conv2d_bf16_params.h" 664 38 first +.src_ref 2 "conv2d_bf16_params.h" 705 45 +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4132 "00111010" // ST dc0, [p2], #4; MUL r2, r31, r0; ADD.NC r17, r17, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4133 "11001001" // /* MW 9 */ + 4134 "01111111" // /* MW 8 */ + 4135 "00101100" // /* MW 7 */ + 4136 "01111110" // /* MW 6 */ + 4137 "00100000" // /* MW 5 */ + 4138 "00111110" // /* MW 4 */ + 4139 "00110000" // /* MW 3 */ + 4140 "10001100" // /* MW 2 */ + 4141 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.src_ref 2 "conv2d_bf16_params.h" 705 50 first +.src_ref 2 "conv2d_bf16_params.h" 705 61 first + 4142 "01011100" // ST dc0, [p2], #4; MAC r14, r14, r17, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4143 "00001100" // /* MW 5 */ + 4144 "10111000" // /* MW 4 */ + 4145 "00111000" // /* MW 3 */ + 4146 "10001100" // /* MW 2 */ + 4147 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 +.src_ref 2 "conv2d_bf16_params.h" 674 24 first +.src_ref 2 "conv2d_bf16_params.h" 675 38 first +.src_ref 2 "conv2d_bf16_params.h" 682 38 +.src_ref 2 "conv2d_bf16_params.h" 718 48 +.src_ref 2 "conv2d_bf16_params.h" 720 50 + 4148 "00111010" // ST r22, [p2], #4; AND r16, r19, r2; MOV r2, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4149 "01011001" // /* MW 9 */ + 4150 "00000000" // /* MW 8 */ + 4151 "01001000" // /* MW 7 */ + 4152 "00100100" // /* MW 6 */ + 4153 "00000001" // /* MW 5 */ + 4154 "00100111" // /* MW 4 */ + 4155 "00110000" // /* MW 3 */ + 4156 "11011010" // /* MW 2 */ + 4157 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 675 38 + 4158 "00111010" // ST r28, [p2], #4; SUB r17, r2, r31; MOV r27, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4159 "01111001" // /* MW 9 */ + 4160 "00001110" // /* MW 8 */ + 4161 "01110000" // /* MW 7 */ + 4162 "10001111" // /* MW 6 */ + 4163 "00011111" // /* MW 5 */ + 4164 "00000101" // /* MW 4 */ + 4165 "00110000" // /* MW 3 */ + 4166 "11110010" // /* MW 2 */ + 4167 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 675 38 first +.src_ref 2 "conv2d_bf16_params.h" 707 61 first + 4168 "01011100" // ST r4, [p2], #4; MUL r14, r23, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4169 "11011111" // /* MW 5 */ + 4170 "10111001" // /* MW 4 */ + 4171 "00111011" // /* MW 3 */ + 4172 "10010010" // /* MW 2 */ + 4173 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 674 22 first +.src_ref 2 "conv2d_bf16_params.h" 675 38 + 4174 "00111010" // ST r17, [p2], #4; SUB r16, r16, r31; MOV r0, #6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4175 "01011001" // /* MW 9 */ + 4176 "00000110" // /* MW 8 */ + 4177 "00001000" // /* MW 7 */ + 4178 "10001100" // /* MW 6 */ + 4179 "00001111" // /* MW 5 */ + 4180 "00100001" // /* MW 4 */ + 4181 "00110000" // /* MW 3 */ + 4182 "11000110" // /* MW 2 */ + 4183 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 25 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 675 38 first +.src_ref 2 "conv2d_bf16_params.h" 679 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id first + 4184 "01110110" // MOVA r0, #72; ST r16, [p2], #4; SEL.EQZ r16, r13, r0, r27; MOV r27, r6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4185 "01111000" // /* MW 11 */ + 4186 "10010000" // /* MW 10 */ + 4187 "01101001" // /* MW 9 */ + 4188 "00010011" // /* MW 8 */ + 4189 "00000000" // /* MW 7 */ + 4190 "10011011" // /* MW 6 */ + 4191 "00010001" // /* MW 5 */ + 4192 "00011110" // /* MW 4 */ + 4193 "00000010" // /* MW 3 */ + 4194 "00000000" // /* MW 2 */ + 4195 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first +.src_ref 2 "conv2d_bf16_params.h" 679 23 first +.src_ref 2 "conv2d_bf16_params.h" 713 12 +.aggressive_scheduled_block_id 11 +.noswbrkpt + 4196 "00101100" // LDA r5, [sp, #-24]; SEL.EQZ r5, r0, r5, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4197 "10100100" // /* MW 5 */ + 4198 "00010100" // /* MW 4 */ + 4199 "00100000" // /* MW 3 */ + 4200 "00010110" // /* MW 2 */ + 4201 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 691 56 first +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4202 "10011000" // MUL r17, r5, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4203 "10101111" // /* MW 3 */ + 4204 "01100011" // /* MW 2 */ + 4205 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 675 38 +.src_ref 2 "conv2d_bf16_params.h" 675 38 first +.src_ref 2 "conv2d_bf16_params.h" 709 71 first + 4206 "00111010" // ST dc0, [p2], #4; LSHL r16, r3, r16; MOV m2, #-56 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4207 "01011001" // /* MW 9 */ + 4208 "11001000" // /* MW 8 */ + 4209 "00000111" // /* MW 7 */ + 4210 "01101101" // /* MW 6 */ + 4211 "00001000" // /* MW 5 */ + 4212 "00000111" // /* MW 4 */ + 4213 "00110000" // /* MW 3 */ + 4214 "10001100" // /* MW 2 */ + 4215 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 675 38 +.src_ref 2 "conv2d_bf16_params.h" 706 23 first +.src_ref 2 "conv2d_bf16_params.h" 706 28 +.src_ref 2 "conv2d_bf16_params.h" 709 76 + 4216 "01110110" // MOVA r3, #-29; ST dc0, [p2], m2; LSHL r15, r16, r15; ADD.NC r13, r3, #7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4217 "11001000" // /* MW 11 */ + 4218 "11000001" // /* MW 10 */ + 4219 "10101000" // /* MW 9 */ + 4220 "11101101" // /* MW 8 */ + 4221 "11110111" // /* MW 7 */ + 4222 "10100000" // /* MW 6 */ + 4223 "01100001" // /* MW 5 */ + 4224 "01001000" // /* MW 4 */ + 4225 "00000010" // /* MW 3 */ + 4226 "01100011" // /* MW 2 */ + 4227 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 682 38 first +.src_ref 2 "conv2d_bf16_params.h" 706 28 + 4228 "01011100" // ST r2, [p2], m0; LSHL r16, r13, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4229 "01111011" // /* MW 5 */ + 4230 "11000000" // /* MW 4 */ + 4231 "00110110" // /* MW 3 */ + 4232 "00001010" // /* MW 2 */ + 4233 "01000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 126 21 first +.src_ref 2 "conv2d_bf16_params.h" 696 37 first + 4234 "01011100" // ST r22, [p2], #4; ADD r3, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4235 "01000001" // /* MW 5 */ + 4236 "10001110" // /* MW 4 */ + 4237 "00111000" // /* MW 3 */ + 4238 "11011010" // /* MW 2 */ + 4239 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 127 19 first +.src_ref 2 "conv2d_bf16_params.h" 696 37 + 4240 "01011100" // ST r18, [p2], #4; MSC r18, r18, r17, r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4241 "10011100" // /* MW 5 */ + 4242 "11001000" // /* MW 4 */ + 4243 "00111000" // /* MW 3 */ + 4244 "11001010" // /* MW 2 */ + 4245 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 first +.src_ref 2 "conv2d_bf16_params.h" 713 12 first + 4246 "01011100" // ST r4, [p2], #4; LSHL r5, r5, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4247 "11011011" // /* MW 5 */ + 4248 "10010100" // /* MW 4 */ + 4249 "00110010" // /* MW 3 */ + 4250 "10010010" // /* MW 2 */ + 4251 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 +.src_ref 2 "conv2d_bf16_params.h" 706 28 +.src_ref 2 "conv2d_bf16_params.h" 706 28 first + 4252 "00111010" // ST r3, [p2], #4; ADD r3, r13, r16; MOV r0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4253 "01011001" // /* MW 9 */ + 4254 "11111101" // /* MW 8 */ + 4255 "00001111" // /* MW 7 */ + 4256 "00000100" // /* MW 6 */ + 4257 "00111000" // /* MW 5 */ + 4258 "00011010" // /* MW 4 */ + 4259 "00110000" // /* MW 3 */ + 4260 "10001110" // /* MW 2 */ + 4261 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 706 28 + 4262 "10011000" // ASHL r0, r3, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4263 "00001110" // /* MW 3 */ + 4264 "11000000" // /* MW 2 */ + 4265 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 first +.src_ref 2 "conv2d_bf16_params.h" 707 66 first + 4266 "01011100" // ST r18, [p2], #4; MUL r4, r14, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4267 "00011111" // /* MW 5 */ + 4268 "00010000" // /* MW 4 */ + 4269 "00110111" // /* MW 3 */ + 4270 "11001010" // /* MW 2 */ + 4271 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 +.src_ref 2 "conv2d_bf16_params.h" 709 96 first + 4272 "01011100" // ST dc0, [p2], #4; LSHL r3, r0, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4273 "00111011" // /* MW 5 */ + 4274 "00001100" // /* MW 4 */ + 4275 "00110000" // /* MW 3 */ + 4276 "10001100" // /* MW 2 */ + 4277 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 first +.src_ref 2 "conv2d_bf16_params.h" 709 90 + 4278 "11111010" // LDA r13, [sp, #-4]; ST dc0, [p2], #4; SUB r3, r15, r3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4279 "00110001" // /* MW 9 */ + 4280 "11000110" // /* MW 8 */ + 4281 "00000011" // /* MW 7 */ + 4282 "10000000" // /* MW 6 */ + 4283 "01100001" // /* MW 5 */ + 4284 "00011100" // /* MW 4 */ + 4285 "00100010" // /* MW 3 */ + 4286 "10110110" // /* MW 2 */ + 4287 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 707 50 first +.src_ref 2 "conv2d_bf16_params.h" 708 59 +.src_ref 2 "conv2d_bf16_params.h" 710 60 first +.src_ref 2 "conv2d_bf16_params.h" 710 65 first + 4288 "01110110" // LDA r14, [sp, #-8]; ST r4, [p2], #4; MAC r7, r7, r29, r0; ADD.NC r1, r0, #-1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4289 "11001000" // /* MW 11 */ + 4290 "00111111" // /* MW 10 */ + 4291 "00101000" // /* MW 9 */ + 4292 "00110000" // /* MW 8 */ + 4293 "01110000" // /* MW 7 */ + 4294 "10111010" // /* MW 6 */ + 4295 "10010001" // /* MW 5 */ + 4296 "00011100" // /* MW 4 */ + 4297 "00100010" // /* MW 3 */ + 4298 "00111010" // /* MW 2 */ + 4299 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 708 48 first +.src_ref 2 "conv2d_bf16_params.h" 713 12 first + 4300 "11111010" // LDA r15, [sp, #-12]; ST r1, [p2], #4; MUL r0, r5, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4301 "10101111" // /* MW 9 */ + 4302 "01000001" // /* MW 8 */ + 4303 "00000001" // /* MW 7 */ + 4304 "10000000" // /* MW 6 */ + 4305 "00110001" // /* MW 5 */ + 4306 "00011100" // /* MW 4 */ + 4307 "00100010" // /* MW 3 */ + 4308 "10111110" // /* MW 2 */ + 4309 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 709 50 first +.src_ref 2 "conv2d_bf16_params.h" 800 first + 4310 "01011100" // ST r3, [p2], #4; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 4311 "00000000" // /* MW 5 */ + 4312 "01010000" // /* MW 4 */ + 4313 "00110000" // /* MW 3 */ + 4314 "10001110" // /* MW 2 */ + 4315 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 710 50 first +.delay_slot + 4316 "10011000" // ST r7, [p2], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4317 "11110001" // /* MW 3 */ + 4318 "01011100" // /* MW 2 */ + 4319 "00001010" // /* MW 1 */ +.delay_slot + 4320 "10011000" // ST r0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4321 "00010001" // /* MW 3 */ + 4322 "00011100" // /* MW 2 */ + 4323 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 718 48 first +.delay_slot + 4324 "10011000" // ST r2, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4325 "01010001" // /* MW 3 */ + 4326 "00011100" // /* MW 2 */ + 4327 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 718 48 +.delay_slot + 4328 "10011000" // ST r2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4329 "01010001" // /* MW 3 */ + 4330 "00000100" // /* MW 2 */ + 4331 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 720 50 first +.src_ref 2 "conv2d_bf16_params.h" 800 first +.delay_slot + 4332 "00111010" // ST r2, [p2, #4]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4333 "01110001" // /* MW 9 */ + 4334 "00000000" // /* MW 8 */ + 4335 "00000000" // /* MW 7 */ + 4336 "00000000" // /* MW 6 */ + 4337 "11111110" // /* MW 5 */ + 4338 "00111111" // /* MW 4 */ + 4339 "00110000" // /* MW 3 */ + 4340 "10001010" // /* MW 2 */ +.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh__end +.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_end0 + 4341 "01000010" // /* MW 1 */ +.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_begin0 +.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams +.function convert_bf16_to_bfp16 _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams +.src_ref 3 "utils.h" 526 11 +.src_ref 3 "utils.h" 531 4 +.src_ref 2 "conv2d_bf16.h" 689 first +.src_ref 2 "conv2d_bf16.h" 698 28 +.src_ref 2 "conv2d_bf16.h" 704 12 +.src_ref 2 "conv2d_bf16.h" 707 12 +.src_ref 2 "conv2d_bf16.h" 707 30 +.function_start + 4352 "01110110" // MOVA dc0, #0; MOVS p2, p1; MOVX r24, #0; MOV r0, p2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4353 "01111000" // /* MW 11 */ + 4354 "01100000" // /* MW 10 */ + 4355 "00001010" // /* MW 9 */ + 4356 "00001000" // /* MW 8 */ + 4357 "10000000" // /* MW 7 */ + 4358 "00000001" // /* MW 6 */ + 4359 "10001011" // /* MW 5 */ + 4360 "10000100" // /* MW 4 */ + 4361 "10000010" // /* MW 3 */ + 4362 "00000011" // /* MW 2 */ + 4363 "00000000" // /* MW 1 */ +.src_ref 3 "utils.h" 526 11 +.src_ref 3 "utils.h" 526 11 +.src_ref 2 "conv2d_bf16.h" 698 28 first +.src_ref 2 "conv2d_bf16.h" 704 12 first +.src_ref 2 "conv2d_bf16.h" 707 12 +.src_ref 2 "conv2d_bf16.h" 707 30 + 4364 "01111110" // MOVA dj1, #0; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc1, dc0; MOVX r26, #0; ADD.NC p3, r0, #4 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 4365 "01100000" // /* MW 13 */ + 4366 "00001001" // /* MW 12 */ + 4367 "00100000" // /* MW 11 */ + 4368 "00100001" // /* MW 10 */ + 4369 "00000000" // /* MW 9 */ + 4370 "00110110" // /* MW 8 */ + 4371 "00000001" // /* MW 7 */ + 4372 "00110100" // /* MW 6 */ + 4373 "00101000" // /* MW 5 */ + 4374 "00101000" // /* MW 4 */ + 4375 "10001000" // /* MW 3 */ + 4376 "00000110" // /* MW 2 */ + 4377 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 698 28 +.src_ref 2 "conv2d_bf16.h" 702 37 + 4378 "10111010" // LDA dn1, [p3], #4; MOVXM p4, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4379 "00010000" // /* MW 9 */ + 4380 "00110010" // /* MW 8 */ + 4381 "00110010" // /* MW 7 */ + 4382 "11110010" // /* MW 6 */ + 4383 "00000001" // /* MW 5 */ + 4384 "00000000" // /* MW 4 */ + 4385 "11010000" // /* MW 3 */ + 4386 "10010100" // /* MW 2 */ + 4387 "01100011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 698 43 +.src_ref 2 "conv2d_bf16.h" 702 4 first + 4388 "10111010" // LDA m1, [p3], #4; MOVXM ls, #4496 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4389 "00010000" // /* MW 9 */ + 4390 "11001000" // /* MW 8 */ + 4391 "01111000" // /* MW 7 */ + 4392 "00000100" // /* MW 6 */ + 4393 "00000000" // /* MW 5 */ + 4394 "00000000" // /* MW 4 */ + 4395 "11010000" // /* MW 3 */ + 4396 "10010000" // /* MW 2 */ + 4397 "01100011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 699 43 first +.src_ref 2 "conv2d_bf16.h" 702 4 + 4398 "10111010" // LDA m0, [p3]; MOVXM le, #4544 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4399 "00010000" // /* MW 9 */ + 4400 "11100000" // /* MW 8 */ + 4401 "10111000" // /* MW 7 */ + 4402 "00000101" // /* MW 6 */ + 4403 "00000000" // /* MW 5 */ + 4404 "00000000" // /* MW 4 */ + 4405 "11010000" // /* MW 3 */ + 4406 "10000000" // /* MW 2 */ + 4407 "01100000" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 +.src_ref 2 "conv2d_bf16.h" 702 37 first + 4408 "01010100" // LDA r0, [p3, #-12]; MOV dj0, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4409 "00000001" // /* MW 5 */ + 4410 "00000000" // /* MW 4 */ + 4411 "11010001" // /* MW 3 */ + 4412 "10000010" // /* MW 2 */ + 4413 "01111010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 702 37 + 4414 "10011000" // LDA.s8 r1, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4415 "00100010" // /* MW 3 */ + 4416 "00000100" // /* MW 2 */ + 4417 "00000100" // /* MW 1 */ + 4418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4419 "00000000" // /* MW 1 */ + 4420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4421 "00000000" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 +.src_ref 2 "conv2d_bf16.h" 705 66 first + 4422 "11110100" // VLDB.POP.512 x1, [p0, lf0, r24]; MOV dn0, dn1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4423 "00000001" // /* MW 5 */ + 4424 "10000101" // /* MW 4 */ + 4425 "10000000" // /* MW 3 */ + 4426 "00001010" // /* MW 2 */ + 4427 "00000000" // /* MW 1 */ +.src_ref 3 "utils.h" 526 11 first + 4428 "00011000" // VLDB.POP.512.2D x0, [p0, lf0, r24, d1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4429 "00010100" // /* MW 3 */ + 4430 "00110000" // /* MW 2 */ + 4431 "00111110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 704 12 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4432 "00011000" // VLDB.FILL.512 [p0, lf0, r24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4433 "00010100" // /* MW 3 */ + 4434 "00010100" // /* MW 2 */ + 4435 "00111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 702 4 first +.src_ref 2 "conv2d_bf16.h" 705 66 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4436 "00110100" // VLDB.POP.512 x1, [p0, lf0, r24]; ADD.NC lc, r0, #-3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4437 "11111101" // /* MW 5 */ + 4438 "11100000" // /* MW 4 */ + 4439 "10001010" // /* MW 3 */ + 4440 "00001010" // /* MW 2 */ + 4441 "00000000" // /* MW 1 */ +.src_ref 3 "utils.h" 526 11 first +.src_ref 2 "conv2d_bf16.h" 707 12 +.src_ref 2 "conv2d_bf16.h" 707 30 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4442 "00011100" // VLDB.POP.512.2D x0, [p0, lf0, r24, d1]; MOVX crRnd, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4443 "00000000" // /* MW 5 */ + 4444 "11110101" // /* MW 4 */ + 4445 "10000000" // /* MW 3 */ + 4446 "00000010" // /* MW 2 */ + 4447 "11000110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 704 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4448 "00011000" // VLDB.FILL.512 [p0, lf0, r24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4449 "00010100" // /* MW 3 */ + 4450 "00010100" // /* MW 2 */ + 4451 "00111100" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4453 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 705 66 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4454 "10111010" // NOPA; VLDB.POP.512 x1, [p0, lf0, r24]; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4455 "01111110" // /* MW 9 */ + 4456 "10100101" // /* MW 8 */ + 4457 "00000001" // /* MW 7 */ + 4458 "00000000" // /* MW 6 */ + 4459 "01010100" // /* MW 5 */ + 4460 "00000000" // /* MW 4 */ + 4461 "11110000" // /* MW 3 */ + 4462 "00101100" // /* MW 2 */ + 4463 "00000000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 first +.src_ref 3 "utils.h" 526 11 first +.src_ref 2 "conv2d_bf16.h" 705 30 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4464 "11100001" // NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];NOPS; NOPX; VCONV.fp32.bf16 cml0, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4465 "00000000" // /* MW 15 */ + 4466 "00000000" // /* MW 14 */ + 4467 "01111000" // /* MW 13 */ + 4468 "11000101" // /* MW 12 */ + 4469 "00000001" // /* MW 11 */ + 4470 "00000000" // /* MW 10 */ + 4471 "00000000" // /* MW 9 */ + 4472 "00000000" // /* MW 8 */ + 4473 "01011011" // /* MW 7 */ + 4474 "00000001" // /* MW 6 */ + 4475 "00101000" // /* MW 5 */ + 4476 "01100000" // /* MW 4 */ + 4477 "11111100" // /* MW 3 */ + 4478 "00101100" // /* MW 2 */ + 4479 "00000000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 +.src_ref 2 "conv2d_bf16.h" 706 18 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4480 "11100001" // NOPA; NOPB; NOPS; NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4481 "00000000" // /* MW 15 */ + 4482 "00000000" // /* MW 14 */ + 4483 "01111000" // /* MW 13 */ + 4484 "11000101" // /* MW 12 */ + 4485 "01000000" // /* MW 11 */ + 4486 "00000000" // /* MW 10 */ + 4487 "00000000" // /* MW 9 */ + 4488 "00000000" // /* MW 8 */ + 4489 "01011011" // /* MW 7 */ + 4490 "00000001" // /* MW 6 */ + 4491 "00100000" // /* MW 5 */ + 4492 "00000000" // /* MW 4 */ + 4493 "11110000" // /* MW 3 */ + 4494 "00101100" // /* MW 2 */ + 4495 "00000000" // /* MW 1 */ +.label ZLS_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_144 +.src_ref 2 "conv2d_bf16.h" 704 12 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 4496 "11100001" // NOPA; VLDB.FILL.512 [p0, lf0, r24]; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4497 "00000000" // /* MW 15 */ + 4498 "00000000" // /* MW 14 */ + 4499 "01111000" // /* MW 13 */ + 4500 "10100101" // /* MW 12 */ + 4501 "00000001" // /* MW 11 */ + 4502 "00000000" // /* MW 10 */ + 4503 "00000000" // /* MW 9 */ + 4504 "00000000" // /* MW 8 */ + 4505 "01011011" // /* MW 7 */ + 4506 "00000001" // /* MW 6 */ + 4507 "00101000" // /* MW 5 */ + 4508 "00101000" // /* MW 4 */ + 4509 "11111000" // /* MW 3 */ + 4510 "00101100" // /* MW 2 */ + 4511 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 705 66 first +.src_ref 2 "conv2d_bf16.h" 707 12 first +.src_ref 2 "conv2d_bf16.h" 707 30 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4512 "11100001" // NOPA; VLDB.POP.512 x1, [p0, lf0, r24];VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4513 "00000000" // /* MW 15 */ + 4514 "00000000" // /* MW 14 */ + 4515 "01111000" // /* MW 13 */ + 4516 "10100101" // /* MW 12 */ + 4517 "00000001" // /* MW 11 */ + 4518 "00000000" // /* MW 10 */ + 4519 "00000000" // /* MW 9 */ + 4520 "00000000" // /* MW 8 */ + 4521 "00000011" // /* MW 7 */ + 4522 "10000000" // /* MW 6 */ + 4523 "10101101" // /* MW 5 */ + 4524 "00000000" // /* MW 4 */ + 4525 "11110000" // /* MW 3 */ + 4526 "00101100" // /* MW 2 */ + 4527 "00000000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 first +.src_ref 3 "utils.h" 526 11 first +.src_ref 2 "conv2d_bf16.h" 705 30 +.src_ref 2 "conv2d_bf16.h" 708 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4528 "11100001" // NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];VST.FLUSH.512.CONV [p2, sf, r26];NOPX; VCONV.fp32.bf16 cml0, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4529 "00000000" // /* MW 15 */ + 4530 "00000000" // /* MW 14 */ + 4531 "01111000" // /* MW 13 */ + 4532 "11000101" // /* MW 12 */ + 4533 "00000001" // /* MW 11 */ + 4534 "00000000" // /* MW 10 */ + 4535 "00000000" // /* MW 9 */ + 4536 "00000000" // /* MW 8 */ + 4537 "00000011" // /* MW 7 */ + 4538 "00000000" // /* MW 6 */ + 4539 "00101001" // /* MW 5 */ + 4540 "01100000" // /* MW 4 */ + 4541 "11111100" // /* MW 3 */ + 4542 "00101100" // /* MW 2 */ + 4543 "00000000" // /* MW 1 */ +.label ZLE_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_192 +.src_ref 3 "kernel_helpers.h" 350 11 +.src_ref 3 "utils.h" 531 4 first +.src_ref 2 "conv2d_bf16.h" 706 18 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4544 "11100001" // NOPA; NOPB; VST.FLUSH.512.CONV.2D [p2, sf, r26, d0];NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4545 "00000000" // /* MW 15 */ + 4546 "00000000" // /* MW 14 */ + 4547 "01111000" // /* MW 13 */ + 4548 "11000101" // /* MW 12 */ + 4549 "01000000" // /* MW 11 */ + 4550 "00000000" // /* MW 10 */ + 4551 "00000000" // /* MW 9 */ + 4552 "00000000" // /* MW 8 */ + 4553 "00000011" // /* MW 7 */ + 4554 "00000000" // /* MW 6 */ + 4555 "00100011" // /* MW 5 */ + 4556 "00000000" // /* MW 4 */ + 4557 "11110000" // /* MW 3 */ + 4558 "00101100" // /* MW 2 */ + 4559 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 4560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4561 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 707 12 first +.src_ref 2 "conv2d_bf16.h" 707 30 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4562 "00011000" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4563 "00000011" // /* MW 3 */ + 4564 "10000000" // /* MW 2 */ + 4565 "00001101" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 first +.src_ref 2 "conv2d_bf16.h" 705 30 first +.src_ref 2 "conv2d_bf16.h" 708 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4566 "00000010" // VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4567 "01110000" // /* MW 7 */ + 4568 "11000101" // /* MW 6 */ + 4569 "00000001" // /* MW 5 */ + 4570 "00000000" // /* MW 4 */ + 4571 "01100000" // /* MW 3 */ + 4572 "00000000" // /* MW 2 */ + 4573 "00100000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 +.src_ref 2 "conv2d_bf16.h" 706 18 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4574 "11111000" // VCONV.fp32.bf16 cmh0, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4575 "10001010" // /* MW 3 */ + 4576 "10000001" // /* MW 2 */ + 4577 "00011000" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 first + 4578 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4579 "00000011" // /* MW 3 */ + 4580 "00000000" // /* MW 2 */ + 4581 "00001011" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 first +.src_ref 2 "conv2d_bf16.h" 705 30 first +.src_ref 2 "conv2d_bf16.h" 707 12 first +.src_ref 2 "conv2d_bf16.h" 707 30 first + 4582 "00000010" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4583 "01110000" // /* MW 7 */ + 4584 "11000101" // /* MW 6 */ + 4585 "00000001" // /* MW 5 */ + 4586 "00000000" // /* MW 4 */ + 4587 "01100000" // /* MW 3 */ + 4588 "00000000" // /* MW 2 */ + 4589 "10110000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 +.src_ref 2 "conv2d_bf16.h" 706 18 first +.src_ref 2 "conv2d_bf16.h" 708 12 first + 4590 "00000010" // VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cmh0, x0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4591 "01110000" // /* MW 7 */ + 4592 "11000101" // /* MW 6 */ + 4593 "01000000" // /* MW 5 */ + 4594 "00000000" // /* MW 4 */ + 4595 "01100000" // /* MW 3 */ + 4596 "00000000" // /* MW 2 */ + 4597 "00100000" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 first + 4598 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4599 "00000011" // /* MW 3 */ + 4600 "00000000" // /* MW 2 */ + 4601 "00001011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 707 12 first +.src_ref 2 "conv2d_bf16.h" 707 30 first +.src_ref 2 "conv2d_bf16.h" 723 first + 4602 "01011100" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 4603 "00000000" // /* MW 5 */ + 4604 "01010000" // /* MW 4 */ + 4605 "01100000" // /* MW 3 */ + 4606 "00000000" // /* MW 2 */ + 4607 "10110000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 708 12 first +.delay_slot + 4608 "00011000" // VST.FLUSH.512.CONV [p2, sf, r26] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4609 "00000011" // /* MW 3 */ + 4610 "00000000" // /* MW 2 */ + 4611 "00001001" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 first +.delay_slot + 4612 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4613 "00000011" // /* MW 3 */ + 4614 "00000000" // /* MW 2 */ + 4615 "00001011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4616 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4617 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4619 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams__end +.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_end0 + 4621 "00000000" // /* MW 1 */ +.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_begin0 +.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params +.function conv2d_bf16<(unsigned char)'\x01', (act_t)0, bfloat16, bfloat16, bfloat16, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::async, adf::addressing::linear, adf::margin<0U> >, false, false, true, false> _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 2 "conv2d_bf16.h" 1836 +.src_ref 2 "conv2d_bf16.h" 1836 first +.src_ref 2 "conv2d_bf16.h" 1836 first +.src_ref 2 "conv2d_bf16_params.h" 240 68 +.function_start + 4624 "01111110" // MOVA m0, #-81; PADDB [p3], #64; MOVS p4, p2; PADDXM [sp], #128 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 4625 "01100000" // /* MW 13 */ + 4626 "00010001" // /* MW 12 */ + 4627 "10010001" // /* MW 11 */ + 4628 "00001110" // /* MW 10 */ + 4629 "00000000" // /* MW 9 */ + 4630 "00000000" // /* MW 8 */ + 4631 "10000000" // /* MW 7 */ + 4632 "00000000" // /* MW 6 */ + 4633 "00100000" // /* MW 5 */ + 4634 "00111111" // /* MW 4 */ + 4635 "10000110" // /* MW 3 */ + 4636 "11100000" // /* MW 2 */ + 4637 "11110101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1836 +.src_ref 2 "conv2d_bf16_params.h" 241 95 +.src_ref 2 "conv2d_bf16_params.h" 242 153 +.src_ref 2 "conv2d_bf16_params.h" 250 129 + 4638 "01110110" // MOVA r19, #3; ST r12, [sp, #-16]; MOVX r28, #-24; MOV r17, p3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4639 "01111000" // /* MW 11 */ + 4640 "01100000" // /* MW 10 */ + 4641 "00101011" // /* MW 9 */ + 4642 "00001010" // /* MW 8 */ + 4643 "11000101" // /* MW 7 */ + 4644 "10111111" // /* MW 6 */ + 4645 "10010101" // /* MW 5 */ + 4646 "11110001" // /* MW 4 */ + 4647 "00000111" // /* MW 3 */ + 4648 "01110011" // /* MW 2 */ + 4649 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 866 21 +.src_ref 2 "conv2d_bf16.h" 876 12 +.src_ref 2 "conv2d_bf16.h" 876 51 +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16.h" 1836 +.src_ref 2 "conv2d_bf16_params.h" 242 41 +.src_ref 2 "conv2d_bf16_params.h" 242 54 +.src_ref 2 "conv2d_bf16_params.h" 242 94 +.src_ref 2 "conv2d_bf16_params.h" 242 100 +.src_ref 2 "conv2d_bf16_params.h" 242 153 +.src_ref 2 "conv2d_bf16_params.h" 243 80 +.src_ref 2 "conv2d_bf16_params.h" 245 28 +.src_ref 2 "conv2d_bf16_params.h" 250 106 +.src_ref 2 "conv2d_bf16_params.h" 250 133 + 4650 "01110110" // MOVA r25, #0; ST r17, [sp, #-40]; MOVX r17, #1; ADD.NC p2, r17, #28 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4651 "00001000" // /* MW 11 */ + 4652 "01000111" // /* MW 10 */ + 4653 "00110100" // /* MW 9 */ + 4654 "00101001" // /* MW 8 */ + 4655 "00010000" // /* MW 7 */ + 4656 "10000001" // /* MW 6 */ + 4657 "00110101" // /* MW 5 */ + 4658 "11011010" // /* MW 4 */ + 4659 "00000111" // /* MW 3 */ + 4660 "00011001" // /* MW 2 */ + 4661 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 240 68 +.src_ref 2 "conv2d_bf16_params.h" 240 68 first + 4662 "01110110" // LDA r18, [p2]; ST r9, [sp, #-12]; MOVXM r29, #16777216 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4663 "00010000" // /* MW 11 */ + 4664 "00000000" // /* MW 10 */ + 4665 "10101000" // /* MW 9 */ + 4666 "00000011" // /* MW 8 */ + 4667 "01000000" // /* MW 7 */ + 4668 "10000000" // /* MW 6 */ + 4669 "00110101" // /* MW 5 */ + 4670 "11110101" // /* MW 4 */ + 4671 "11010111" // /* MW 3 */ + 4672 "11001010" // /* MW 2 */ + 4673 "01000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 171 +.src_ref 2 "conv2d_bf16_params.h" 245 20 + 4674 "01110110" // MOVA m6, #88; ST r14, [sp, #-4]; MOVXM r31, #33554431 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4675 "10010000" // /* MW 11 */ + 4676 "11111111" // /* MW 10 */ + 4677 "11101111" // /* MW 9 */ + 4678 "11111111" // /* MW 8 */ + 4679 "01111111" // /* MW 7 */ + 4680 "10000000" // /* MW 6 */ + 4681 "11010101" // /* MW 5 */ + 4682 "11111101" // /* MW 4 */ + 4683 "10000111" // /* MW 3 */ + 4684 "00011000" // /* MW 2 */ + 4685 "00001011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 39 +.src_ref 2 "conv2d_bf16_params.h" 244 82 +.src_ref 2 "conv2d_bf16_params.h" 244 156 +.src_ref 2 "conv2d_bf16_params.h" 249 87 + 4686 "01110110" // MOVA r20, #5; ST r13, [sp, #-32]; MOVX r22, #8; MOV m4, #-20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4687 "01011000" // /* MW 11 */ + 4688 "11101100" // /* MW 10 */ + 4689 "00000111" // /* MW 9 */ + 4690 "00001010" // /* MW 8 */ + 4691 "01100001" // /* MW 7 */ + 4692 "10000001" // /* MW 6 */ + 4693 "10110101" // /* MW 5 */ + 4694 "11100001" // /* MW 4 */ + 4695 "00000111" // /* MW 3 */ + 4696 "10110100" // /* MW 2 */ + 4697 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 243 39 +.src_ref 2 "conv2d_bf16_params.h" 244 87 +.src_ref 2 "conv2d_bf16_params.h" 250 71 + 4698 "01110110" // MOVA r21, #12; ST r15, [sp, #-20]; MOVX r23, #254; MOV m5, #-60 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4699 "01011000" // /* MW 11 */ + 4700 "11000100" // /* MW 10 */ + 4701 "10000111" // /* MW 9 */ + 4702 "11001010" // /* MW 8 */ + 4703 "01110111" // /* MW 7 */ + 4704 "10000111" // /* MW 6 */ + 4705 "11110101" // /* MW 5 */ + 4706 "11101101" // /* MW 4 */ + 4707 "00000111" // /* MW 3 */ + 4708 "10010101" // /* MW 2 */ + 4709 "00000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 44 + 4710 "00000010" // ST p7, [sp, #-8]; MOV m7, #64 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4711 "01010000" // /* MW 7 */ + 4712 "01000000" // /* MW 6 */ + 4713 "10000000" // /* MW 5 */ + 4714 "00000011" // /* MW 4 */ + 4715 "10110000" // /* MW 3 */ + 4716 "01110011" // /* MW 2 */ + 4717 "11111111" // /* MW 1 */ + 4718 "10011000" // ST lr, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4719 "00111101" // /* MW 3 */ + 4720 "11100100" // /* MW 2 */ + 4721 "00001111" // /* MW 1 */ + 4722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4723 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 240 68 + 4724 "10011000" // ADD r12, r29, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4725 "00100000" // /* MW 3 */ + 4726 "01011001" // /* MW 2 */ + 4727 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 240 68 +.src_ref 2 "conv2d_bf16_params.h" 241 95 first + 4728 "01011100" // ST r12, [p2], m0; LSHL r29, r12, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4729 "10011011" // /* MW 5 */ + 4730 "01110111" // /* MW 4 */ + 4731 "00110110" // /* MW 3 */ + 4732 "00110010" // /* MW 2 */ + 4733 "01000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 54 first +.src_ref 2 "conv2d_bf16_params.h" 242 94 first + 4734 "00101100" // LDA.u8 r30, [p2], #-3; EQ r28, r29, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4735 "00101111" // /* MW 5 */ + 4736 "11110010" // /* MW 4 */ + 4737 "01011110" // /* MW 3 */ + 4738 "11111001" // /* MW 2 */ + 4739 "01011011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 245 20 first + 4740 "10011000" // LDA.u8 r9, [p2], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4741 "00101010" // /* MW 3 */ + 4742 "11001001" // /* MW 2 */ + 4743 "00000010" // /* MW 1 */ + 4744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4745 "00000000" // /* MW 1 */ + 4746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4747 "00000000" // /* MW 1 */ + 4748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4749 "00000000" // /* MW 1 */ + 4750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4751 "00000000" // /* MW 1 */ + 4752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4753 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 174 first + 4754 "10011000" // LTU r27, r29, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4755 "11101100" // /* MW 3 */ + 4756 "01110111" // /* MW 2 */ + 4757 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 153 + 4758 "00011000" // SEL.EQZ r14, r25, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4759 "00110010" // /* MW 3 */ + 4760 "01011101" // /* MW 2 */ + 4761 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 171 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4762 "10011000" // LTU r27, r31, r12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4763 "11001100" // /* MW 3 */ + 4764 "11110110" // /* MW 2 */ + 4765 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 241 95 first +.src_ref 2 "conv2d_bf16_params.h" 242 39 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4766 "00101100" // ST.s8 r28, [p2], m4; EQ r13, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4767 "11001111" // /* MW 5 */ + 4768 "10110111" // /* MW 4 */ + 4769 "11101110" // /* MW 3 */ + 4770 "01110000" // /* MW 2 */ + 4771 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 100 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4772 "10011000" // LSHL r31, r13, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4773 "00011101" // /* MW 3 */ + 4774 "01111111" // /* MW 2 */ + 4775 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 153 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4776 "00011000" // SEL.EQZ r12, r25, r14, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4777 "11100010" // /* MW 3 */ + 4778 "01011000" // /* MW 2 */ + 4779 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 98 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4780 "10011000" // OR r28, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4781 "11000101" // /* MW 3 */ + 4782 "11111001" // /* MW 2 */ + 4783 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 54 +.src_ref 2 "conv2d_bf16_params.h" 242 151 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4784 "10100100" // LTU r27, r17, r30; ADD.NC r28, r28, r12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4785 "01100010" // /* MW 5 */ + 4786 "00111100" // /* MW 4 */ + 4787 "10011110" // /* MW 3 */ + 4788 "11111101" // /* MW 2 */ + 4789 "10001110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 41 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4790 "00011000" // SEL.EQZ r28, r25, r28, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4791 "11000010" // /* MW 3 */ + 4792 "01111001" // /* MW 2 */ + 4793 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 243 80 first + 4794 "10011000" // LTU r31, r17, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4795 "11001100" // /* MW 3 */ + 4796 "01111111" // /* MW 2 */ + 4797 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 117 first +.src_ref 2 "conv2d_bf16_params.h" 243 39 + 4798 "01011100" // ST r31, [p2], m5; NE r29, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4799 "11010001" // /* MW 5 */ + 4800 "11110111" // /* MW 4 */ + 4801 "00111110" // /* MW 3 */ + 4802 "01111110" // /* MW 2 */ + 4803 "01010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 44 first +.src_ref 2 "conv2d_bf16_params.h" 245 28 first + 4804 "00101100" // LDA.u8 r30, [p2], m7; NE r12, r9, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4805 "00110001" // /* MW 5 */ + 4806 "10110010" // /* MW 4 */ + 4807 "01010100" // /* MW 3 */ + 4808 "01111001" // /* MW 2 */ + 4809 "01011101" // /* MW 1 */ + 4810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4811 "00000000" // /* MW 1 */ + 4812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4813 "00000000" // /* MW 1 */ + 4814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4815 "00000000" // /* MW 1 */ + 4816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4817 "00000000" // /* MW 1 */ + 4818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4819 "00000000" // /* MW 1 */ + 4820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4821 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 82 +.src_ref 2 "conv2d_bf16_params.h" 244 87 + 4822 "00100100" // NE r22, r30, r22; ADD.NC r31, r30, #-4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4823 "11111100" // /* MW 5 */ + 4824 "10111110" // /* MW 4 */ + 4825 "00011111" // /* MW 3 */ + 4826 "10101101" // /* MW 2 */ + 4827 "11110101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 245 33 + 4828 "10000100" // JNZ r12, #4896 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4896 delay_slots=5 */ + 4829 "00000001" // /* MW 5 */ + 4830 "01000000" // /* MW 4 */ + 4831 "10010000" // /* MW 3 */ + 4832 "00001001" // /* MW 2 */ + 4833 "01100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 156 +.delay_slot + 4834 "10011000" // NE r9, r30, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4835 "01001000" // /* MW 3 */ + 4836 "10010011" // /* MW 2 */ + 4837 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 87 +.delay_slot + 4838 "00011000" // EXTEND.u8 r31, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4839 "10010000" // /* MW 3 */ + 4840 "11111110" // /* MW 2 */ + 4841 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 87 +.delay_slot + 4842 "10011000" // AND r22, r9, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4843 "01100100" // /* MW 3 */ + 4844 "01101101" // /* MW 2 */ + 4845 "00010010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 87 +.delay_slot + 4846 "10011000" // LTU r23, r31, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4847 "01111100" // /* MW 3 */ + 4848 "11101111" // /* MW 2 */ + 4849 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 132 +.delay_slot + 4850 "10011000" // AND r16, r23, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4851 "01100100" // /* MW 3 */ + 4852 "11100001" // /* MW 2 */ + 4853 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 245 33 + 4854 "10000100" // JNZ r29, #4896 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4896 delay_slots=5 */ + 4855 "00000001" // /* MW 5 */ + 4856 "01000000" // /* MW 4 */ + 4857 "10010000" // /* MW 3 */ + 4858 "00001001" // /* MW 2 */ + 4859 "11101000" // /* MW 1 */ +.delay_slot + 4860 "10011000" // ST p6, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4861 "00011101" // /* MW 3 */ + 4862 "11101011" // /* MW 2 */ + 4863 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4865 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4867 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4869 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4871 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 +.src_ref 2 "conv2d_bf16.h" 876 51 + 4872 "10111010" // MOVA r27, #1; J #4944 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=4944 delay_slots=5 */ + 4873 "00100000" // /* MW 9 */ + 4874 "00000000" // /* MW 8 */ + 4875 "00000000" // /* MW 7 */ + 4876 "01101010" // /* MW 6 */ + 4877 "00000010" // /* MW 5 */ + 4878 "00000000" // /* MW 4 */ + 4879 "00000000" // /* MW 3 */ + 4880 "00111011" // /* MW 2 */ + 4881 "00000000" // /* MW 1 */ +.delay_slot + 4882 "11111000" // MOV el0, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4883 "10011100" // /* MW 3 */ + 4884 "00011001" // /* MW 2 */ + 4885 "00011000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1849 12 +.delay_slot + 4886 "00011000" // MOVX r19, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4887 "00000101" // /* MW 3 */ + 4888 "00100110" // /* MW 2 */ + 4889 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4891 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4892 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4893 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4895 "00000000" // /* MW 1 */ +.label __ll6__Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params +.src_ref 2 "conv2d_bf16_params.h" 250 71 first +.src_ref 2 "conv2d_bf16_params.h" 250 101 + 4896 "01110110" // MOVA r21, #4; ST p6, [sp, #-24]; EQ r27, r21, r30; MOV el0, r25 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4897 "01111000" // /* MW 11 */ + 4898 "11001110" // /* MW 10 */ + 4899 "00001100" // /* MW 9 */ + 4900 "00111100" // /* MW 8 */ + 4901 "10111111" // /* MW 7 */ + 4902 "10101011" // /* MW 6 */ + 4903 "00011101" // /* MW 5 */ + 4904 "11101011" // /* MW 4 */ + 4905 "00000111" // /* MW 3 */ + 4906 "10010101" // /* MW 2 */ + 4907 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 101 + 4908 "10011000" // LSHL r21, r30, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4909 "01011101" // /* MW 3 */ + 4910 "10101011" // /* MW 2 */ + 4911 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 106 + 4912 "00011000" // SEL.EQZ r21, r21, r25, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4913 "10010010" // /* MW 3 */ + 4914 "01101011" // /* MW 2 */ + 4915 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 129 + 4916 "10011000" // EQ r27, r19, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4917 "11100111" // /* MW 3 */ + 4918 "11110111" // /* MW 2 */ + 4919 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 106 +.src_ref 2 "conv2d_bf16_params.h" 250 133 + 4920 "11100100" // SEL.EQZ r19, r21, r25, r27; MOV r27, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4921 "01000001" // /* MW 5 */ + 4922 "10110000" // /* MW 4 */ + 4923 "01001101" // /* MW 3 */ + 4924 "11110010" // /* MW 2 */ + 4925 "10101100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 133 + 4926 "00011000" // SEL.EQZ r19, r25, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4927 "00110010" // /* MW 3 */ + 4928 "01100111" // /* MW 2 */ + 4929 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 249 87 first + 4930 "10011000" // AND r20, r28, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4931 "01000100" // /* MW 3 */ + 4932 "00101001" // /* MW 2 */ + 4933 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 249 87 + 4934 "00011000" // NEZ r27, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4935 "11110000" // /* MW 3 */ + 4936 "00110110" // /* MW 2 */ + 4937 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 152 first + 4938 "00101100" // NOPA; OR r19, r19, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4939 "10001011" // /* MW 5 */ + 4940 "11001111" // /* MW 4 */ + 4941 "11111001" // /* MW 3 */ + 4942 "00101100" // /* MW 2 */ + 4943 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_320 +.src_ref 2 "conv2d_bf16_params.h" 258 8 first + 4944 "01110110" // MOVA m4, #12; ST r27, [p2], #24; JNZ r29, #4992 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4992 delay_slots=5 */ + 4945 "01100000" // /* MW 11 */ + 4946 "00000000" // /* MW 10 */ + 4947 "00010000" // /* MW 9 */ + 4948 "01110000" // /* MW 8 */ + 4949 "00000010" // /* MW 7 */ + 4950 "10111010" // /* MW 6 */ + 4951 "01110001" // /* MW 5 */ + 4952 "01101111" // /* MW 4 */ + 4953 "10000010" // /* MW 3 */ + 4954 "10010000" // /* MW 2 */ + 4955 "00000001" // /* MW 1 */ +.delay_slot + 4956 "00011000" // ST.s8 r19, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4957 "01100111" // /* MW 3 */ + 4958 "10001010" // /* MW 2 */ + 4959 "00000010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4961 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4962 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4963 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4964 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4965 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4967 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 259 71 + 4968 "01000100" // MOVXM r20, #16777215 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4969 "11111110" // /* MW 5 */ + 4970 "00111111" // /* MW 4 */ + 4971 "11111010" // /* MW 3 */ + 4972 "11111111" // /* MW 2 */ + 4973 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 259 71 first + 4974 "10011000" // AND r18, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4975 "01000100" // /* MW 3 */ + 4976 "10100101" // /* MW 2 */ + 4977 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 259 71 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4978 "00101110" // NOPA; ST r18, [p3, #28]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 4979 "00011100" // /* MW 13 */ + 4980 "00000000" // /* MW 12 */ + 4981 "00000000" // /* MW 11 */ + 4982 "01010111" // /* MW 10 */ + 4983 "00011010" // /* MW 9 */ + 4984 "01000000" // /* MW 8 */ + 4985 "00000000" // /* MW 7 */ + 4986 "00000000" // /* MW 6 */ + 4987 "10100011" // /* MW 5 */ + 4988 "11101100" // /* MW 4 */ + 4989 "11110110" // /* MW 3 */ + 4990 "00101100" // /* MW 2 */ + 4991 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_368 +.src_ref 2 "conv2d_bf16.h" 1841 65 first +.src_ref 2 "conv2d_bf16.h" 1849 4 +.src_ref 2 "conv2d_bf16.h" 1849 12 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4992 "10111010" // LDA r20, [p2], #-32; EXTEND.u8 r20, r19; MOV r22, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4993 "01011000" // /* MW 9 */ + 4994 "11111101" // /* MW 8 */ + 4995 "11001111" // /* MW 7 */ + 4996 "10000010" // /* MW 6 */ + 4997 "01000100" // /* MW 5 */ + 4998 "00100111" // /* MW 4 */ + 4999 "11010000" // /* MW 3 */ + 5000 "11010010" // /* MW 2 */ + 5001 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16.h" 1841 34 +.src_ref 2 "conv2d_bf16.h" 1842 36 +.src_ref 2 "conv2d_bf16.h" 1842 67 +.src_ref 2 "conv2d_bf16.h" 1842 75 +.src_ref 2 "conv2d_bf16.h" 1845 31 +.src_ref 2 "conv2d_bf16.h" 1849 4 +.src_ref 2 "conv2d_bf16_params.h" 243 80 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5002 "10111010" // MOVA r18, #2; ADD r21, r20, #-1; MOV m4, #36 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5003 "01011000" // /* MW 9 */ + 5004 "00100100" // /* MW 8 */ + 5005 "00000000" // /* MW 7 */ + 5006 "11111010" // /* MW 6 */ + 5007 "01011111" // /* MW 5 */ + 5008 "00101001" // /* MW 4 */ + 5009 "00000000" // /* MW 3 */ + 5010 "01010010" // /* MW 2 */ + 5011 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1842 67 first +.src_ref 2 "conv2d_bf16.h" 1842 106 +.src_ref 2 "conv2d_bf16.h" 1849 4 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 5012 "01110110" // LDA r22, [p2], m4; ST el0, [sp, #-48]; AND r22, r21, r22; MOV m4, #-52 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5013 "01011000" // /* MW 11 */ + 5014 "11001100" // /* MW 10 */ + 5015 "00000111" // /* MW 9 */ + 5016 "00100110" // /* MW 8 */ + 5017 "01101011" // /* MW 7 */ + 5018 "10101011" // /* MW 6 */ + 5019 "00101101" // /* MW 5 */ + 5020 "11010000" // /* MW 4 */ + 5021 "11010111" // /* MW 3 */ + 5022 "01011010" // /* MW 2 */ + 5023 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 862 52 +.src_ref 2 "conv2d_bf16.h" 1842 106 +.src_ref 2 "conv2d_bf16.h" 1845 80 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5024 "01110110" // LDA r23, [p2], m4; ST r22, [sp, #-36]; MOVX r19, #-1; MOV m4, #196 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5025 "01011000" // /* MW 11 */ + 5026 "11000100" // /* MW 10 */ + 5027 "00000000" // /* MW 9 */ + 5028 "11101010" // /* MW 8 */ + 5029 "00110111" // /* MW 7 */ + 5030 "10111111" // /* MW 6 */ + 5031 "11010101" // /* MW 5 */ + 5032 "11011110" // /* MW 4 */ + 5033 "11010111" // /* MW 3 */ + 5034 "01011110" // /* MW 2 */ + 5035 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1845 63 first + 5036 "10011000" // LDA r29, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5037 "10110110" // /* MW 3 */ + 5038 "11111111" // /* MW 2 */ + 5039 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 862 52 first + 5040 "10011000" // LDA r31, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5041 "11110110" // /* MW 3 */ + 5042 "10001011" // /* MW 2 */ + 5043 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 5044 "10011000" // LDA r21, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5045 "10110110" // /* MW 3 */ + 5046 "00000110" // /* MW 2 */ + 5047 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 2 "conv2d_bf16.h" 1841 34 first + 5048 "00101100" // LDA r20, [p0]; LSHL r9, r20, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5049 "01011011" // /* MW 5 */ + 5050 "00100110" // /* MW 4 */ + 5051 "11011010" // /* MW 3 */ + 5052 "11010010" // /* MW 2 */ + 5053 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 5054 "10011000" // LDA r30, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5055 "11010110" // /* MW 3 */ + 5056 "00000111" // /* MW 2 */ + 5057 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1842 36 first + 5058 "10011000" // LSHL r22, r22, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5059 "00101101" // /* MW 3 */ + 5060 "10101101" // /* MW 2 */ + 5061 "00010101" // /* MW 1 */ + 5062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5063 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1845 80 first + 5064 "10011000" // ASHL r19, r29, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5065 "00111110" // /* MW 3 */ + 5066 "01100111" // /* MW 2 */ + 5067 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 866 21 first + 5068 "10011000" // NE r17, r31, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5069 "00011000" // /* MW 3 */ + 5070 "11100011" // /* MW 2 */ + 5071 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 866 12 + 5072 "10000100" // JNZ r17, #5184 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5184 delay_slots=5 */ + 5073 "00000001" // /* MW 5 */ + 5074 "01000000" // /* MW 4 */ + 5075 "00100000" // /* MW 3 */ + 5076 "00001010" // /* MW 2 */ + 5077 "10001000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1842 36 first +.src_ref 2 "conv2d_bf16.h" 1842 75 first +.delay_slot + 5078 "10100100" // LSHL r22, r23, r18; ADD.NC r21, r21, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5079 "10110010" // /* MW 5 */ + 5080 "10110101" // /* MW 4 */ + 5081 "10111010" // /* MW 3 */ + 5082 "10100101" // /* MW 2 */ + 5083 "10111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1842 75 +.src_ref 2 "conv2d_bf16.h" 1845 31 first +.delay_slot + 5084 "10100100" // LSHL r21, r19, r18; ADD.NC dn0, r21, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5085 "10110010" // /* MW 5 */ + 5086 "10010101" // /* MW 4 */ + 5087 "10110000" // /* MW 3 */ + 5088 "01100101" // /* MW 2 */ + 5089 "10011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1841 34 first +.delay_slot + 5090 "00000010" // ST dn0, [sp, #-44]; ADD.NC r14, r9, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5091 "10100000" // /* MW 7 */ + 5092 "01101000" // /* MW 6 */ + 5093 "11001010" // /* MW 5 */ + 5094 "00000001" // /* MW 4 */ + 5095 "10110000" // /* MW 3 */ + 5096 "10000100" // /* MW 2 */ + 5097 "11111010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16.h" 885 4 +.delay_slot + 5098 "11111000" // MOV r15, dn0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5099 "10000000" // /* MW 3 */ + 5100 "11010000" // /* MW 2 */ + 5101 "00011011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1845 31 first +.delay_slot + 5102 "01011000" // ADD.NC p6, r21, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5103 "11111001" // /* MW 3 */ + 5104 "01101010" // /* MW 2 */ + 5105 "00011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 + 5106 "01000100" // MOVXM p7, #509028 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5107 "11001000" // /* MW 5 */ + 5108 "11001000" // /* MW 4 */ + 5109 "11001110" // /* MW 3 */ + 5110 "00000111" // /* MW 2 */ + 5111 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.src_ref 2 "conv2d_bf16.h" 867 18 first + 5112 "00101100" // LDA.s8 r17, [p7]; MOVX vaddSign0, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5113 "10000000" // /* MW 5 */ + 5114 "10110100" // /* MW 4 */ + 5115 "01010000" // /* MW 3 */ + 5116 "11000100" // /* MW 2 */ + 5117 "11100000" // /* MW 1 */ + 5118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5119 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 + 5120 "01000100" // MOVXM r20, #-8454144 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5121 "00000000" // /* MW 5 */ + 5122 "00100000" // /* MW 4 */ + 5123 "00001010" // /* MW 3 */ + 5124 "01111111" // /* MW 2 */ + 5125 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 + 5126 "01111000" // VINSERT.32 x0, x0, #0, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5127 "10010001" // /* MW 3 */ + 5128 "00000010" // /* MW 2 */ + 5129 "00011000" // /* MW 1 */ + 5130 "11111000" // MOV r20, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5131 "11100000" // /* MW 3 */ + 5132 "00010101" // /* MW 2 */ + 5133 "00011101" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 5134 "00011000" // ADD.NC p7, r20, #-66 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5135 "01011111" // /* MW 3 */ + 5136 "01101010" // /* MW 2 */ + 5137 "00011111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.src_ref 2 "conv2d_bf16.h" 867 18 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 5138 "11010100" // ST.s16 r17, [p7]; VMOV bmll0, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5139 "00100101" // /* MW 5 */ + 5140 "00000001" // /* MW 4 */ + 5141 "11100000" // /* MW 3 */ + 5142 "11000110" // /* MW 2 */ + 5143 "11100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 5144 "00011000" // MOVX crRnd, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5145 "10000000" // /* MW 3 */ + 5146 "01111010" // /* MW 2 */ + 5147 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 5148 "00011000" // VCONV.bf16.fp32 wl0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5149 "00010110" // /* MW 3 */ + 5150 "01000000" // /* MW 2 */ + 5151 "00001000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 5152 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5153 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5154 "10111000" // VEXTRACT.16 r17, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5155 "00000001" // /* MW 3 */ + 5156 "01000001" // /* MW 2 */ + 5157 "00011100" // /* MW 1 */ + 5158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5159 "00000000" // /* MW 1 */ + 5160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5161 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 + 5162 "10011000" // LDA.s16 r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5163 "00110010" // /* MW 3 */ + 5164 "00000110" // /* MW 2 */ + 5165 "00000111" // /* MW 1 */ + 5166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5167 "00000000" // /* MW 1 */ + 5168 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5169 "00000000" // /* MW 1 */ + 5170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5171 "00000000" // /* MW 1 */ + 5172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5173 "00000000" // /* MW 1 */ + 5174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5175 "00000000" // /* MW 1 */ + 5176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5177 "00000000" // /* MW 1 */ + 5178 "00001100" // NOPA; ST r17, [sp, #-48] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5179 "01101011" // /* MW 5 */ + 5180 "10100100" // /* MW 4 */ + 5181 "11111111" // /* MW 3 */ + 5182 "00101100" // /* MW 2 */ + 5183 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_560 +.src_ref 2 "conv2d_bf16.h" 881 76 +.src_ref 2 "conv2d_bf16.h" 883 4 +.src_ref 2 "conv2d_bf16.h" 884 4 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 5184 "01110110" // MOVA m4, #92; MOVS p1, r14; MOVXM p3, #509028 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5185 "00010000" // /* MW 11 */ + 5186 "00110010" // /* MW 10 */ + 5187 "10110010" // /* MW 9 */ + 5188 "11110001" // /* MW 8 */ + 5189 "00000001" // /* MW 7 */ + 5190 "00000000" // /* MW 6 */ + 5191 "00001011" // /* MW 5 */ + 5192 "10001110" // /* MW 4 */ + 5193 "10000001" // /* MW 3 */ + 5194 "10010000" // /* MW 2 */ + 5195 "00001011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 first +.src_ref 2 "conv2d_bf16.h" 876 51 first +.src_ref 2 "conv2d_bf16.h" 881 76 first +.src_ref 2 "conv2d_bf16.h" 883 4 +.src_ref 2 "conv2d_bf16.h" 884 4 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 5196 "01110110" // LDA.u8 r17, [p2], m4; MOVS p0, p1; SEL.EQZ r17, r25, r19, r27; MOV r19, #11 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5197 "01011000" // /* MW 11 */ + 5198 "00001011" // /* MW 10 */ + 5199 "01101000" // /* MW 9 */ + 5200 "10010010" // /* MW 8 */ + 5201 "00011001" // /* MW 7 */ + 5202 "00110011" // /* MW 6 */ + 5203 "10001011" // /* MW 5 */ + 5204 "10000100" // /* MW 4 */ + 5205 "01010000" // /* MW 3 */ + 5206 "01000101" // /* MW 2 */ + 5207 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 44 +.src_ref 2 "conv2d_bf16_params.h" 243 80 +.src_ref 2 "conv2d_bf16_params.h" 243 80 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5208 "10111010" // MOVA r22, #780; LTU r27, r28, r18; MOV r13, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5209 "01111000" // /* MW 9 */ + 5210 "01100000" // /* MW 8 */ + 5211 "10101010" // /* MW 7 */ + 5212 "01100101" // /* MW 6 */ + 5213 "10111001" // /* MW 5 */ + 5214 "00111001" // /* MW 4 */ + 5215 "00000000" // /* MW 3 */ + 5216 "10010110" // /* MW 2 */ + 5217 "01100001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 883 4 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5218 "00011000" // ST.s8 r19, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5219 "01100111" // /* MW 3 */ + 5220 "00000110" // /* MW 2 */ + 5221 "00000011" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5223 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 884 4 first +.aggressive_scheduled_block_id 4 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 5224 "00000100" // JL #4352 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4352 delay_slots=5 */ + 5225 "00000001" // /* MW 5 */ + 5226 "00000000" // /* MW 4 */ + 5227 "10000000" // /* MW 3 */ + 5228 "00001000" // /* MW 2 */ + 5229 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 first +.delay_slot +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5230 "10011000" // LSHL r21, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5231 "00101101" // /* MW 3 */ + 5232 "01101011" // /* MW 2 */ + 5233 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 +.delay_slot + 5234 "01011000" // ADD.NC p7, r21, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5235 "11111001" // /* MW 3 */ + 5236 "01101010" // /* MW 2 */ + 5237 "00011111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 881 45 first +.delay_slot + 5238 "10011000" // SUB r17, r25, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5239 "00010001" // /* MW 3 */ + 5240 "01100011" // /* MW 2 */ + 5241 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16_params.h" 243 80 +.delay_slot + 5242 "01100100" // LSHL r17, r17, r18; MOV r20, #781 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5243 "00110101" // /* MW 5 */ + 5244 "00101100" // /* MW 4 */ + 5245 "10111010" // /* MW 3 */ + 5246 "01100101" // /* MW 2 */ + 5247 "10001100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16_params.h" 243 80 first +.delay_slot + 5248 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r9, r22, r20, r27; ADD.NC r12, r15, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5249 "00000000" // /* MW 15 */ + 5250 "00000000" // /* MW 14 */ + 5251 "10101000" // /* MW 13 */ + 5252 "11100010" // /* MW 12 */ + 5253 "10001011" // /* MW 11 */ + 5254 "00010001" // /* MW 10 */ + 5255 "10011010" // /* MW 9 */ + 5256 "00101100" // /* MW 8 */ + 5257 "01011011" // /* MW 7 */ + 5258 "00000001" // /* MW 6 */ + 5259 "00100000" // /* MW 5 */ + 5260 "00000000" // /* MW 4 */ + 5261 "11110000" // /* MW 3 */ + 5262 "00101100" // /* MW 2 */ + 5263 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 4 +.return_address + 5264 "00011000" // LDA p1, [sp, #-44] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5265 "10011001" // /* MW 3 */ + 5266 "11010100" // /* MW 2 */ + 5267 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 4 first +.no_stack_arguments + 5268 "00000100" // JL #4352 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4352 delay_slots=5 */ + 5269 "00000001" // /* MW 5 */ + 5270 "00000000" // /* MW 4 */ + 5271 "10000000" // /* MW 3 */ + 5272 "00001000" // /* MW 2 */ + 5273 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5275 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5277 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 44 +.delay_slot + 5278 "00011000" // ADD.NC r13, r13, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5279 "10010000" // /* MW 3 */ + 5280 "01010110" // /* MW 2 */ + 5281 "00011011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 4 +.delay_slot + 5282 "11111000" // MOV p2, r13 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5283 "10100000" // /* MW 3 */ + 5284 "01100110" // /* MW 2 */ + 5285 "00011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 4 +.delay_slot + 5286 "01111010" // NOPA; MOVS p0, r15; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5287 "00000000" // /* MW 9 */ + 5288 "00000000" // /* MW 8 */ + 5289 "00000000" // /* MW 7 */ + 5290 "00000000" // /* MW 6 */ + 5291 "00001011" // /* MW 5 */ + 5292 "10001111" // /* MW 4 */ + 5293 "11110000" // /* MW 3 */ + 5294 "00101100" // /* MW 2 */ + 5295 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 +.src_ref 2 "conv2d_bf16.h" 1115 26 +.src_ref 2 "conv2d_bf16.h" 1115 26 +.return_address + 5296 "10111010" // MOVA dj6, #-332; MOVX r19, #63; ADD.NC p4, r13, #-116 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5297 "00001000" // /* MW 9 */ + 5298 "01100011" // /* MW 8 */ + 5299 "00110011" // /* MW 7 */ + 5300 "11101010" // /* MW 6 */ + 5301 "00110111" // /* MW 5 */ + 5302 "00000001" // /* MW 4 */ + 5303 "10000000" // /* MW 3 */ + 5304 "10011010" // /* MW 2 */ + 5305 "11010110" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 886 4 +.src_ref 2 "conv2d_bf16.h" 896 23 first +.src_ref 2 "conv2d_bf16.h" 1123 71 + 5306 "00101100" // LDA dn0, [p4], #4; MOVX r13, #12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5307 "01100010" // /* MW 5 */ + 5308 "00110100" // /* MW 4 */ + 5309 "11010000" // /* MW 3 */ + 5310 "10000100" // /* MW 2 */ + 5311 "10000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5312 "10011000" // LDA dj0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5313 "01000110" // /* MW 3 */ + 5314 "00011100" // /* MW 2 */ + 5315 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5316 "10011000" // LDA dn4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5317 "00100110" // /* MW 3 */ + 5318 "00011110" // /* MW 2 */ + 5319 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5320 "10011000" // LDA dj4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5321 "01000110" // /* MW 3 */ + 5322 "00011110" // /* MW 2 */ + 5323 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5324 "10011000" // LDA m0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5325 "00000110" // /* MW 3 */ + 5326 "00011100" // /* MW 2 */ + 5327 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5328 "10011000" // LDA dc0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5329 "01100110" // /* MW 3 */ + 5330 "00011100" // /* MW 2 */ + 5331 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5332 "10011000" // LDA dc4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5333 "01100110" // /* MW 3 */ + 5334 "00011110" // /* MW 2 */ + 5335 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 first + 5336 "10011000" // LDA r22, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5337 "11010110" // /* MW 3 */ + 5338 "00011110" // /* MW 2 */ + 5339 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5340 "10011000" // LDA r17, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5341 "00110110" // /* MW 3 */ + 5342 "00011110" // /* MW 2 */ + 5343 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5344 "10011000" // LDA r28, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5345 "10010110" // /* MW 3 */ + 5346 "00011111" // /* MW 2 */ + 5347 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5348 "10011000" // LDA r21, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5349 "10110110" // /* MW 3 */ + 5350 "00011110" // /* MW 2 */ + 5351 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5352 "10011000" // LDA r23, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5353 "11110110" // /* MW 3 */ + 5354 "00011110" // /* MW 2 */ + 5355 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5356 "10011000" // LDA p3, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5357 "10011110" // /* MW 3 */ + 5358 "00011101" // /* MW 2 */ + 5359 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5360 "10011000" // LDA dn2, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5361 "00100110" // /* MW 3 */ + 5362 "00011101" // /* MW 2 */ + 5363 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 first + 5364 "10011000" // LDA dn1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5365 "10100110" // /* MW 3 */ + 5366 "00011100" // /* MW 2 */ + 5367 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5368 "10011000" // LDA dj1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5369 "11000110" // /* MW 3 */ + 5370 "00011100" // /* MW 2 */ + 5371 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5372 "10011000" // LDA dn5, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5373 "10100110" // /* MW 3 */ + 5374 "00011110" // /* MW 2 */ + 5375 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5376 "10011000" // LDA r30, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5377 "11010110" // /* MW 3 */ + 5378 "00011111" // /* MW 2 */ + 5379 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5380 "10011000" // LDA r29, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5381 "10110110" // /* MW 3 */ + 5382 "00011111" // /* MW 2 */ + 5383 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5384 "10011000" // LDA dc1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5385 "11100110" // /* MW 3 */ + 5386 "00011100" // /* MW 2 */ + 5387 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1115 26 first + 5388 "10011000" // LDA.u8 r18, [p4, dj6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5389 "01001010" // /* MW 3 */ + 5390 "11000010" // /* MW 2 */ + 5391 "00000100" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 + 5392 "00011000" // LDA r20, [sp, #-48] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5393 "10010001" // /* MW 3 */ + 5394 "11010010" // /* MW 2 */ + 5395 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 first + 5396 "10011000" // LDA r2, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5397 "01010110" // /* MW 3 */ + 5398 "00000100" // /* MW 2 */ + 5399 "00000100" // /* MW 1 */ + 5400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5401 "00000000" // /* MW 1 */ + 5402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5403 "00000000" // /* MW 1 */ + 5404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5405 "00000000" // /* MW 1 */ + 5406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5407 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1115 26 first + 5408 "10011000" // LTU r19, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5409 "00101100" // /* MW 3 */ + 5410 "11100111" // /* MW 2 */ + 5411 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1115 12 + 5412 "10000100" // JNZ r19, #6336 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6336 delay_slots=5 */ + 5413 "00000001" // /* MW 5 */ + 5414 "01000000" // /* MW 4 */ + 5415 "01100000" // /* MW 3 */ + 5416 "00001100" // /* MW 2 */ + 5417 "10011000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 886 4 +.delay_slot + 5418 "01000100" // MOVXM p2, #509028 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5419 "11001000" // /* MW 5 */ + 5420 "11001000" // /* MW 4 */ + 5421 "11000100" // /* MW 3 */ + 5422 "00000111" // /* MW 2 */ + 5423 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 886 4 first +.delay_slot + 5424 "00011000" // ST.s8 r13, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5425 "10100111" // /* MW 3 */ + 5426 "00000101" // /* MW 2 */ + 5427 "00000010" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first +.delay_slot + 5428 "11111000" // VBCST.16 x9, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5429 "01110010" // /* MW 3 */ + 5430 "11010001" // /* MW 2 */ + 5431 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5433 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5435 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1123 71 first + 5436 "10111010" // LDA p4, [sp, #-40]; EQ r27, r13, r18; MOV m7, #132 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5437 "01011000" // /* MW 9 */ + 5438 "10000100" // /* MW 8 */ + 5439 "10000000" // /* MW 7 */ + 5440 "00111111" // /* MW 6 */ + 5441 "10111001" // /* MW 5 */ + 5442 "00011011" // /* MW 4 */ + 5443 "00100000" // /* MW 3 */ + 5444 "01000011" // /* MW 2 */ + 5445 "11111011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1125 16 +.src_ref 2 "conv2d_bf16.h" 1154 80 + 5446 "10111010" // MOVA r19, #0; MOVX r18, #-128; MOV m4, #60 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5447 "01011000" // /* MW 9 */ + 5448 "00111100" // /* MW 8 */ + 5449 "00000000" // /* MW 7 */ + 5450 "00001010" // /* MW 6 */ + 5451 "00100000" // /* MW 5 */ + 5452 "00111101" // /* MW 4 */ + 5453 "00000000" // /* MW 3 */ + 5454 "00010011" // /* MW 2 */ + 5455 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1125 16 + 5456 "10111010" // MOVA m5, #-64; MOVX r26, #0; MOV dc7, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5457 "01111000" // /* MW 9 */ + 5458 "11010000" // /* MW 8 */ + 5459 "11100100" // /* MW 7 */ + 5460 "00001011" // /* MW 6 */ + 5461 "10100000" // /* MW 5 */ + 5462 "00000001" // /* MW 4 */ + 5463 "10000000" // /* MW 3 */ + 5464 "00010100" // /* MW 2 */ + 5465 "11111000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 746 83 + 5466 "01110110" // MOVA m6, #-132; MOVS dc2, dc7; MOVX crRnd, r13; MOV dn3, dc7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5467 "01111000" // /* MW 11 */ + 5468 "11000000" // /* MW 10 */ + 5469 "10100111" // /* MW 9 */ + 5470 "00000001" // /* MW 8 */ + 5471 "11010100" // /* MW 7 */ + 5472 "00011011" // /* MW 6 */ + 5473 "01001011" // /* MW 5 */ + 5474 "00011100" // /* MW 4 */ + 5475 "10000010" // /* MW 3 */ + 5476 "10011000" // /* MW 2 */ + 5477 "11101111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 2 "conv2d_bf16.h" 738 8 +.src_ref 2 "conv2d_bf16.h" 1187 40 +.src_ref 2 "conv2d_bf16.h" 1199 26 +.src_ref 2 "conv2d_bf16.h" 1200 26 +.src_ref 2 "conv2d_bf16.h" 1201 26 +.src_ref 2 "conv2d_bf16.h" 1202 26 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 5478 "01110110" // LDA r5, [sp, #-44]; MOVS dc6, dc7; MOVX r31, #60; MOV r15, #7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5479 "01011000" // /* MW 11 */ + 5480 "00000111" // /* MW 10 */ + 5481 "11101000" // /* MW 9 */ + 5482 "10001001" // /* MW 8 */ + 5483 "11110111" // /* MW 7 */ + 5484 "00000001" // /* MW 6 */ + 5485 "01001011" // /* MW 5 */ + 5486 "00011100" // /* MW 4 */ + 5487 "00100110" // /* MW 3 */ + 5488 "10010110" // /* MW 2 */ + 5489 "11111010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1143 12 +.src_ref 2 "conv2d_bf16.h" 1218 20 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 5490 "10111010" // LDA r18, [sp, #-36]; MOVXM p2, #5600 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5491 "00010000" // /* MW 9 */ + 5492 "11110000" // /* MW 8 */ + 5493 "00110010" // /* MW 7 */ + 5494 "00000101" // /* MW 6 */ + 5495 "00000000" // /* MW 5 */ + 5496 "00000000" // /* MW 4 */ + 5497 "00100000" // /* MW 3 */ + 5498 "11001010" // /* MW 2 */ + 5499 "11111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 749 26 +.src_ref 2 "conv2d_bf16.h" 750 26 +.src_ref 2 "conv2d_bf16.h" 751 26 +.src_ref 2 "conv2d_bf16.h" 752 26 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5500 "10111010" // LDA r13, [sp, #-32]; SEL.EQZ r6, r26, r18, r27; MOV r20, #780 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5501 "01011000" // /* MW 9 */ + 5502 "00001100" // /* MW 8 */ + 5503 "10001011" // /* MW 7 */ + 5504 "00010010" // /* MW 6 */ + 5505 "01101001" // /* MW 5 */ + 5506 "00110100" // /* MW 4 */ + 5507 "00100000" // /* MW 3 */ + 5508 "00110110" // /* MW 2 */ + 5509 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 736 8 +.src_ref 2 "conv2d_bf16.h" 738 8 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1873 + 5510 "10110110" // LDA lr, [sp, #-28]; PADDB [p4], m7; MOVX r25, #0; MOV r24, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5511 "01011000" // /* MW 11 */ + 5512 "00000000" // /* MW 10 */ + 5513 "00001000" // /* MW 9 */ + 5514 "00001011" // /* MW 8 */ + 5515 "10010000" // /* MW 7 */ + 5516 "00000001" // /* MW 6 */ + 5517 "00100000" // /* MW 5 */ + 5518 "11010111" // /* MW 4 */ + 5519 "00101001" // /* MW 3 */ + 5520 "10000111" // /* MW 2 */ + 5521 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1125 16 first + 5522 "10011000" // LDA r0, [p4], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5523 "00010110" // /* MW 3 */ + 5524 "10001000" // /* MW 2 */ + 5525 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1125 16 + 5526 "10011000" // LDA dn6, [p4], m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5527 "00100110" // /* MW 3 */ + 5528 "10101011" // /* MW 2 */ + 5529 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1125 16 + 5530 "10011000" // LDA r27, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5531 "01110110" // /* MW 3 */ + 5532 "00101111" // /* MW 2 */ + 5533 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1149 80 first + 5534 "10011000" // LDA m5, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5535 "10000110" // /* MW 3 */ + 5536 "00011110" // /* MW 2 */ + 5537 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1154 80 first + 5538 "10011000" // LDA dj5, [p4], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5539 "11000110" // /* MW 3 */ + 5540 "10001010" // /* MW 2 */ + 5541 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 743 87 first + 5542 "10011000" // LDA m4, [p4], #-28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5543 "00000110" // /* MW 3 */ + 5544 "10011110" // /* MW 2 */ + 5545 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 745 83 first + 5546 "10011000" // LDA r1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5547 "00110110" // /* MW 3 */ + 5548 "00011100" // /* MW 2 */ + 5549 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 746 83 first +.src_ref 2 "conv2d_bf16.h" 1125 16 first + 5550 "10010100" // LDA r0, [p4], m6; ADD.NC dj6, r6, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5551 "00000010" // /* MW 5 */ + 5552 "00000110" // /* MW 4 */ + 5553 "11011101" // /* MW 3 */ + 5554 "00000010" // /* MW 2 */ + 5555 "10011001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1143 66 first + 5556 "10011000" // LDA r3, [p4, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5557 "01110110" // /* MW 3 */ + 5558 "00010100" // /* MW 2 */ + 5559 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1206 63 first + 5560 "10011000" // LDA r4, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5561 "10010110" // /* MW 3 */ + 5562 "00000100" // /* MW 2 */ + 5563 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1149 89 + 5564 "11111000" // MOV r7, m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5565 "00000000" // /* MW 3 */ + 5566 "11011010" // /* MW 2 */ + 5567 "00011001" // /* MW 1 */ + 5568 "01011000" // ADD.NC dj2, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5569 "10011001" // /* MW 3 */ + 5570 "10000011" // /* MW 2 */ + 5571 "00011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1154 89 + 5572 "11111000" // MOV r16, dj5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5573 "00000000" // /* MW 3 */ + 5574 "00011011" // /* MW 2 */ + 5575 "00011100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1125 16 first + 5576 "01011000" // ADD.NC m2, r27, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5577 "10011001" // /* MW 3 */ + 5578 "00001101" // /* MW 2 */ + 5579 "00011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1149 89 first + 5580 "00011000" // ADD.NC m6, r7, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5581 "11100000" // /* MW 3 */ + 5582 "00000011" // /* MW 2 */ + 5583 "00011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1154 89 first + 5584 "00100100" // ADD r3, r3, #-1; ADD.NC m7, r16, #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5585 "11000000" // /* MW 5 */ + 5586 "00010000" // /* MW 4 */ + 5587 "11101110" // /* MW 3 */ + 5588 "11111111" // /* MW 2 */ + 5589 "00011000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1287 37 + 5590 "10111010" // NOPA; NOPB; MOV m1, dj2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5591 "01111110" // /* MW 9 */ + 5592 "10000000" // /* MW 8 */ + 5593 "10000010" // /* MW 7 */ + 5594 "00000000" // /* MW 6 */ + 5595 "00010000" // /* MW 5 */ + 5596 "00000000" // /* MW 4 */ + 5597 "11110000" // /* MW 3 */ + 5598 "00101100" // /* MW 2 */ + 5599 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_976 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 736 8 +.src_ref 2 "conv2d_bf16.h" 738 8 +.src_ref 2 "conv2d_bf16.h" 1147 31 first +.src_ref 2 "conv2d_bf16.h" 1187 40 first +.loop_nesting 1 + 5600 "01110110" // VLDA.CONV.fp32.bf16 cml0, [p6], #64; MOVS p1, r5; LSHL r14, r2, r15; MOV p0, r14 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5601 "01111000" // /* MW 11 */ + 5602 "10010000" // /* MW 10 */ + 5603 "00110011" // /* MW 9 */ + 5604 "11101100" // /* MW 8 */ + 5605 "11100111" // /* MW 7 */ + 5606 "00000100" // /* MW 6 */ + 5607 "00001011" // /* MW 5 */ + 5608 "10000101" // /* MW 4 */ + 5609 "01110001" // /* MW 3 */ + 5610 "10000101" // /* MW 2 */ + 5611 "11000011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 1188 50 first + 5612 "11110110" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc3, dn3; ADD.NC p4, r14, r12 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5613 "10100000" // /* MW 11 */ + 5614 "10011000" // /* MW 10 */ + 5615 "00110011" // /* MW 9 */ + 5616 "00000010" // /* MW 8 */ + 5617 "01001011" // /* MW 7 */ + 5618 "00001110" // /* MW 6 */ + 5619 "00101011" // /* MW 5 */ + 5620 "00101000" // /* MW 4 */ + 5621 "01111000" // /* MW 3 */ + 5622 "10000001" // /* MW 2 */ + 5623 "00100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 742 30 first + 5624 "11110110" // VLDA.POP.576 ex7, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];MOVS dn7, dn6; MOV dj7, dj6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5625 "01110000" // /* MW 11 */ + 5626 "10000000" // /* MW 10 */ + 5627 "11000110" // /* MW 9 */ + 5628 "00000011" // /* MW 8 */ + 5629 "01001011" // /* MW 7 */ + 5630 "01011010" // /* MW 6 */ + 5631 "00101111" // /* MW 5 */ + 5632 "00101000" // /* MW 4 */ + 5633 "01111000" // /* MW 3 */ + 5634 "00111001" // /* MW 2 */ + 5635 "10100000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.src_ref 2 "conv2d_bf16.h" 1149 31 first + 5636 "11110110" // VLDA.CONV.fp32.bf16 cmh0, [p6], m6;VLDB.POP.576 ex6, [p0, lf0, r24];MOVS dn3, r19; MOV m3, m2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5637 "01110000" // /* MW 11 */ + 5638 "00000000" // /* MW 10 */ + 5639 "10000010" // /* MW 9 */ + 5640 "00000001" // /* MW 8 */ + 5641 "00001011" // /* MW 7 */ + 5642 "01010011" // /* MW 6 */ + 5643 "00101011" // /* MW 5 */ + 5644 "00000011" // /* MW 4 */ + 5645 "01110100" // /* MW 3 */ + 5646 "00001101" // /* MW 2 */ + 5647 "11011001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 743 30 first + 5648 "10111010" // VLDA.POP.576 ex8, [p1, lf1, r25, m4];VLDB.POP.576.3D ex11, [p0, lf0, r24, d0]; MOV dj3, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5649 "01011110" // /* MW 9 */ + 5650 "00000000" // /* MW 8 */ + 5651 "11000000" // /* MW 7 */ + 5652 "00000001" // /* MW 6 */ + 5653 "11010100" // /* MW 5 */ + 5654 "00010010" // /* MW 4 */ + 5655 "01110100" // /* MW 3 */ + 5656 "01000001" // /* MW 2 */ + 5657 "01110001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 1152 31 first +.src_ref 2 "conv2d_bf16.h" 1206 8 first + 5658 "10110110" // VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.FILL.512 [p0, lf0, r24]; MOVXM le, #5920 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5659 "00010000" // /* MW 11 */ + 5660 "10010000" // /* MW 10 */ + 5661 "10111011" // /* MW 9 */ + 5662 "00000101" // /* MW 8 */ + 5663 "00000000" // /* MW 7 */ + 5664 "00000000" // /* MW 6 */ + 5665 "00101000" // /* MW 5 */ + 5666 "00101000" // /* MW 4 */ + 5667 "01111000" // /* MW 3 */ + 5668 "10010101" // /* MW 2 */ + 5669 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 1154 31 first +.src_ref 2 "conv2d_bf16.h" 1206 8 + 5670 "10110110" // VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.FILL.512 [p0, lf0, r24]; MOVXM ls, #5872 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5671 "00010000" // /* MW 11 */ + 5672 "01111000" // /* MW 10 */ + 5673 "01111011" // /* MW 9 */ + 5674 "00000100" // /* MW 8 */ + 5675 "00000000" // /* MW 7 */ + 5676 "00000000" // /* MW 6 */ + 5677 "00101000" // /* MW 5 */ + 5678 "00101000" // /* MW 4 */ + 5679 "01111000" // /* MW 3 */ + 5680 "00011101" // /* MW 2 */ + 5681 "11011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 740 30 first + 5682 "00111100" // VLDA.CONV.fp32.bf16 cml3, [p4];VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5683 "00101000" // /* MW 5 */ + 5684 "00000001" // /* MW 4 */ + 5685 "01110100" // /* MW 3 */ + 5686 "10110101" // /* MW 2 */ + 5687 "10000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 1157 31 first + 5688 "00111100" // VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.POP.576.3D ex4, [p0, lf0, r24, d0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5689 "00101000" // /* MW 5 */ + 5690 "00100010" // /* MW 4 */ + 5691 "01111000" // /* MW 3 */ + 5692 "10100101" // /* MW 2 */ + 5693 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 1159 31 first + 5694 "00111100" // VLDA.CONV.fp32.bf16 cmh2, [p6], m6;VLDB.FILL.512 [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5695 "00101000" // /* MW 5 */ + 5696 "00101000" // /* MW 4 */ + 5697 "01111000" // /* MW 3 */ + 5698 "00101101" // /* MW 2 */ + 5699 "11011001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 738 8 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 5700 "00111100" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5701 "00101000" // /* MW 5 */ + 5702 "00101000" // /* MW 4 */ + 5703 "01111000" // /* MW 3 */ + 5704 "10000001" // /* MW 2 */ + 5705 "00100010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.src_ref 2 "conv2d_bf16.h" 1192 29 first +.aggressive_scheduled_block_id 6 +.noswbrkpt + 5706 "00111100" // VLDA.CONV.fp32.bf16 cmh3, [p4], #64;VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5707 "00101000" // /* MW 5 */ + 5708 "00000001" // /* MW 4 */ + 5709 "01110100" // /* MW 3 */ + 5710 "10111101" // /* MW 2 */ + 5711 "10000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 745 30 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5712 "10111010" // VLDA.CONV.fp32.bf16 cmh4, [p4];VLDB.POP.576.3D ex4, [p0, lf0, r24, d0]; VSHUFFLE ex10, ex6, ex11, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5713 "11101110" // /* MW 9 */ + 5714 "11000011" // /* MW 8 */ + 5715 "10011010" // /* MW 7 */ + 5716 "00000010" // /* MW 6 */ + 5717 "00010100" // /* MW 5 */ + 5718 "00010001" // /* MW 4 */ + 5719 "01110100" // /* MW 3 */ + 5720 "11001101" // /* MW 2 */ + 5721 "10000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 1162 81 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5722 "11110110" // VLDA.CONV.fp32.bf16 cml4, [p4];VLDB.FILL.512 [p0, lf0, r24];MOVS p4, p6; VSHUFFLE ex6, ex6, ex11, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5723 "11100000" // /* MW 11 */ + 5724 "11000001" // /* MW 10 */ + 5725 "10011010" // /* MW 9 */ + 5726 "00000001" // /* MW 8 */ + 5727 "10001011" // /* MW 7 */ + 5728 "10011000" // /* MW 6 */ + 5729 "00101100" // /* MW 5 */ + 5730 "00101000" // /* MW 4 */ + 5731 "01111000" // /* MW 3 */ + 5732 "11000101" // /* MW 2 */ + 5733 "10000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 749 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5734 "01001010" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex1, [p1, lf1, r25]; VMAC.f dm0, dm0, ex10, ex7, r9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5735 "11101001" // /* MW 9 */ + 5736 "00010100" // /* MW 8 */ + 5737 "01001000" // /* MW 7 */ + 5738 "00011101" // /* MW 6 */ + 5739 "01010100" // /* MW 5 */ + 5740 "00000000" // /* MW 4 */ + 5741 "01110011" // /* MW 3 */ + 5742 "10000001" // /* MW 2 */ + 5743 "00000010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.src_ref 2 "conv2d_bf16.h" 1286 32 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5744 "01101110" // VLDA.3D.CONV.fp32.bf16 cml3, [p6], d3; MOVS dn3, dn2; MOV dj3, dj5; VMAC.f dm1, dm1, ex6, ex7, r9 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5745 "11101001" // /* MW 13 */ + 5746 "00101100" // /* MW 12 */ + 5747 "01001001" // /* MW 11 */ + 5748 "00000111" // /* MW 10 */ + 5749 "01011000" // /* MW 9 */ + 5750 "01011100" // /* MW 8 */ + 5751 "00000000" // /* MW 7 */ + 5752 "00000000" // /* MW 6 */ + 5753 "10010110" // /* MW 5 */ + 5754 "10010100" // /* MW 4 */ + 5755 "01110110" // /* MW 3 */ + 5756 "00110101" // /* MW 2 */ + 5757 "11001111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 751 26 first +.src_ref 2 "conv2d_bf16.h" 1162 81 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5758 "01101110" // VLDA.CONV.fp32.bf16 cmh3, [p4, #64]; MOVS dc5, dc7; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm2, dm2, ex10, ex8, r9 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5759 "00001001" // /* MW 13 */ + 5760 "01010101" // /* MW 12 */ + 5761 "01001010" // /* MW 11 */ + 5762 "00111110" // /* MW 10 */ + 5763 "10010000" // /* MW 9 */ + 5764 "01001100" // /* MW 8 */ + 5765 "00000000" // /* MW 7 */ + 5766 "00000000" // /* MW 6 */ + 5767 "10010110" // /* MW 5 */ + 5768 "00111000" // /* MW 4 */ + 5769 "01111010" // /* MW 3 */ + 5770 "10111101" // /* MW 2 */ + 5771 "10000010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 1199 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5772 "01101110" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dn2, dc3; VSHUFFLE ex5, ex2, ex4, r0; VADD.f dm0, dm3, dm0, r31 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5773 "00111101" // /* MW 13 */ + 5774 "01100000" // /* MW 12 */ + 5775 "11111000" // /* MW 11 */ + 5776 "00011110" // /* MW 10 */ + 5777 "10010000" // /* MW 9 */ + 5778 "01010100" // /* MW 8 */ + 5779 "00000000" // /* MW 7 */ + 5780 "00000000" // /* MW 6 */ + 5781 "10010110" // /* MW 5 */ + 5782 "10011000" // /* MW 4 */ + 5783 "01110100" // /* MW 3 */ + 5784 "00000001" // /* MW 2 */ + 5785 "01110001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 1200 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5786 "01100010" // VLDA.FILL.512 [p1, lf1, r25]; VADD.f dm1, dm3, dm1, r31 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5787 "00111101" // /* MW 7 */ + 5788 "01100100" // /* MW 6 */ + 5789 "11111001" // /* MW 5 */ + 5790 "00000100" // /* MW 4 */ + 5791 "01110000" // /* MW 3 */ + 5792 "10000001" // /* MW 2 */ + 5793 "00100010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 1201 26 first +.aggressive_scheduled_block_id 6 +.noswbrkpt + 5794 "01100010" // VLDA.POP.576 ex1, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r31 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5795 "00111101" // /* MW 7 */ + 5796 "10001000" // /* MW 6 */ + 5797 "11111010" // /* MW 5 */ + 5798 "00000100" // /* MW 4 */ + 5799 "01110000" // /* MW 3 */ + 5800 "00001001" // /* MW 2 */ + 5801 "10100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5802 "01100010" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; VMAC.f dm3, dm3, ex6, ex8, r9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5803 "00001001" // /* MW 7 */ + 5804 "01101101" // /* MW 6 */ + 5805 "01001011" // /* MW 5 */ + 5806 "00000100" // /* MW 4 */ + 5807 "01110000" // /* MW 3 */ + 5808 "00000001" // /* MW 2 */ + 5809 "01110001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5810 "00111100" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5811 "00101000" // /* MW 5 */ + 5812 "00000001" // /* MW 4 */ + 5813 "01110100" // /* MW 3 */ + 5814 "10000001" // /* MW 2 */ + 5815 "00100010" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 978 11 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5816 "00011000" // VLDB.POP.576.3D ex4, [p0, lf0, r24, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5817 "00010100" // /* MW 3 */ + 5818 "00010001" // /* MW 2 */ + 5819 "00111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 1202 26 first +.src_ref 2 "conv2d_bf16.h" 1206 8 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5820 "01100110" // VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24]; ADD.NC lc, r4, #-5; VADD.f dm3, dm4, dm3, r31 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5821 "00111101" // /* MW 11 */ + 5822 "10001100" // /* MW 10 */ + 5823 "11111011" // /* MW 9 */ + 5824 "10000010" // /* MW 8 */ + 5825 "01111101" // /* MW 7 */ + 5826 "01110010" // /* MW 6 */ + 5827 "00101101" // /* MW 5 */ + 5828 "00101000" // /* MW 4 */ + 5829 "01111000" // /* MW 3 */ + 5830 "00001001" // /* MW 2 */ + 5831 "10100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 749 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5832 "01001010" // VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24]; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5833 "00101001" // /* MW 9 */ + 5834 "00000110" // /* MW 8 */ + 5835 "10100000" // /* MW 7 */ + 5836 "00011101" // /* MW 6 */ + 5837 "00010100" // /* MW 5 */ + 5838 "00010100" // /* MW 4 */ + 5839 "01110100" // /* MW 3 */ + 5840 "00000001" // /* MW 2 */ + 5841 "01110001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.src_ref 2 "conv2d_bf16.h" 751 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5842 "01001110" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24]; NOPX; MOV dj5, r21; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5843 "00001001" // /* MW 13 */ + 5844 "01000110" // /* MW 12 */ + 5845 "10100010" // /* MW 11 */ + 5846 "00001111" // /* MW 10 */ + 5847 "10101010" // /* MW 9 */ + 5848 "01011000" // /* MW 8 */ + 5849 "00000000" // /* MW 7 */ + 5850 "00000000" // /* MW 6 */ + 5851 "00101000" // /* MW 5 */ + 5852 "00000001" // /* MW 4 */ + 5853 "01110100" // /* MW 3 */ + 5854 "10000001" // /* MW 2 */ + 5855 "00100010" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5856 "01001011" // NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5857 "01010001" // /* MW 15 */ + 5858 "00001001" // /* MW 14 */ + 5859 "11101101" // /* MW 13 */ + 5860 "00000011" // /* MW 12 */ + 5861 "11001001" // /* MW 11 */ + 5862 "00000000" // /* MW 10 */ + 5863 "00000000" // /* MW 9 */ + 5864 "00000000" // /* MW 8 */ + 5865 "01011011" // /* MW 7 */ + 5866 "00000001" // /* MW 6 */ + 5867 "00101000" // /* MW 5 */ + 5868 "00100010" // /* MW 4 */ + 5869 "11111000" // /* MW 3 */ + 5870 "00101100" // /* MW 2 */ + 5871 "00000000" // /* MW 1 */ +.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1248 +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.begin_of_loop +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt +.loop_nesting 2 + 5872 "01001011" // VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5873 "01010000" // /* MW 15 */ + 5874 "00011011" // /* MW 14 */ + 5875 "11101101" // /* MW 13 */ + 5876 "00000001" // /* MW 12 */ + 5877 "01001001" // /* MW 11 */ + 5878 "00000001" // /* MW 10 */ + 5879 "00000000" // /* MW 9 */ + 5880 "00000000" // /* MW 8 */ + 5881 "01011011" // /* MW 7 */ + 5882 "00000001" // /* MW 6 */ + 5883 "00101000" // /* MW 5 */ + 5884 "00101000" // /* MW 4 */ + 5885 "01111000" // /* MW 3 */ + 5886 "00001001" // /* MW 2 */ + 5887 "10100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 749 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5888 "01001011" // VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5889 "00110001" // /* MW 15 */ + 5890 "00000000" // /* MW 14 */ + 5891 "01111101" // /* MW 13 */ + 5892 "10100101" // /* MW 12 */ + 5893 "00000001" // /* MW 11 */ + 5894 "00000000" // /* MW 10 */ + 5895 "00000000" // /* MW 9 */ + 5896 "00000000" // /* MW 8 */ + 5897 "01011011" // /* MW 7 */ + 5898 "00000001" // /* MW 6 */ + 5899 "00101000" // /* MW 5 */ + 5900 "00101000" // /* MW 4 */ + 5901 "01111000" // /* MW 3 */ + 5902 "00000001" // /* MW 2 */ + 5903 "01110001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.src_ref 2 "conv2d_bf16.h" 751 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5904 "01001011" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5905 "00110000" // /* MW 15 */ + 5906 "00010010" // /* MW 14 */ + 5907 "01111101" // /* MW 13 */ + 5908 "10100101" // /* MW 12 */ + 5909 "00000001" // /* MW 11 */ + 5910 "00000000" // /* MW 10 */ + 5911 "00000000" // /* MW 9 */ + 5912 "00000000" // /* MW 8 */ + 5913 "01011011" // /* MW 7 */ + 5914 "00000001" // /* MW 6 */ + 5915 "00101000" // /* MW 5 */ + 5916 "00000001" // /* MW 4 */ + 5917 "01110100" // /* MW 3 */ + 5918 "10000001" // /* MW 2 */ + 5919 "00100010" // /* MW 1 */ +.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1296 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.end_of_loop +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5920 "01001011" // NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5921 "01010001" // /* MW 15 */ + 5922 "00001001" // /* MW 14 */ + 5923 "11101101" // /* MW 13 */ + 5924 "00000011" // /* MW 12 */ + 5925 "11001001" // /* MW 11 */ + 5926 "00000000" // /* MW 10 */ + 5927 "00000000" // /* MW 9 */ + 5928 "00000000" // /* MW 8 */ + 5929 "01011011" // /* MW 7 */ + 5930 "00000001" // /* MW 6 */ + 5931 "00101000" // /* MW 5 */ + 5932 "00100010" // /* MW 4 */ + 5933 "11111000" // /* MW 3 */ + 5934 "00101100" // /* MW 2 */ + 5935 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 5936 "01101110" // VLDA.POP.576 ex1, [p1, lf1, r25]; MOVS dn6, dn7; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5937 "00001001" // /* MW 13 */ + 5938 "01101010" // /* MW 12 */ + 5939 "10100011" // /* MW 11 */ + 5940 "00011110" // /* MW 10 */ + 5941 "10010000" // /* MW 9 */ + 5942 "01010100" // /* MW 8 */ + 5943 "00000000" // /* MW 7 */ + 5944 "00000000" // /* MW 6 */ + 5945 "10010110" // /* MW 5 */ + 5946 "10111100" // /* MW 4 */ + 5947 "01111100" // /* MW 3 */ + 5948 "00001001" // /* MW 2 */ + 5949 "10100000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 749 26 first +.src_ref 2 "conv2d_bf16.h" 1286 32 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5950 "01101110" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dc7, dn3; MOV dj7, dj3; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5951 "00101001" // /* MW 13 */ + 5952 "00000110" // /* MW 12 */ + 5953 "10100000" // /* MW 11 */ + 5954 "00000111" // /* MW 10 */ + 5955 "00111000" // /* MW 9 */ + 5956 "01111100" // /* MW 8 */ + 5957 "00000000" // /* MW 7 */ + 5958 "00000000" // /* MW 6 */ + 5959 "10010110" // /* MW 5 */ + 5960 "00011100" // /* MW 4 */ + 5961 "01111110" // /* MW 3 */ + 5962 "00000001" // /* MW 2 */ + 5963 "01110001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 751 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5964 "01001010" // MOVS dc3, p3; MOV r5, dj2; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5965 "00001001" // /* MW 9 */ + 5966 "01000110" // /* MW 8 */ + 5967 "10100010" // /* MW 7 */ + 5968 "11100100" // /* MW 6 */ + 5969 "00000000" // /* MW 5 */ + 5970 "01010101" // /* MW 4 */ + 5971 "01100001" // /* MW 3 */ + 5972 "10010001" // /* MW 2 */ + 5973 "01100001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5974 "01001010" // MOVS dn3, r22; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5975 "00101001" // /* MW 9 */ + 5976 "00101010" // /* MW 8 */ + 5977 "10100001" // /* MW 7 */ + 5978 "11000100" // /* MW 6 */ + 5979 "00000111" // /* MW 5 */ + 5980 "10010010" // /* MW 4 */ + 5981 "01100001" // /* MW 3 */ + 5982 "11000001" // /* MW 2 */ + 5983 "01101010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5984 "01001010" // MOVS dn7, r28; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5985 "00001001" // /* MW 9 */ + 5986 "01101010" // /* MW 8 */ + 5987 "10100011" // /* MW 7 */ + 5988 "11000100" // /* MW 6 */ + 5989 "00000011" // /* MW 5 */ + 5990 "10010010" // /* MW 4 */ + 5991 "01100010" // /* MW 3 */ + 5992 "10000001" // /* MW 2 */ + 5993 "11101011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 749 26 first +.src_ref 2 "conv2d_bf16.h" 1285 32 first +.src_ref 2 "conv2d_bf16.h" 1286 32 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5994 "01100110" // PADDB [p7], m5; MOVS p5, p7; MOV dj2, dj7; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5995 "00101001" // /* MW 11 */ + 5996 "00000110" // /* MW 10 */ + 5997 "10100000" // /* MW 9 */ + 5998 "11100110" // /* MW 8 */ + 5999 "00000000" // /* MW 7 */ + 6000 "10001111" // /* MW 6 */ + 6001 "00100010" // /* MW 5 */ + 6002 "01010111" // /* MW 4 */ + 6003 "01101111" // /* MW 3 */ + 6004 "10010001" // /* MW 2 */ + 6005 "10110011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 751 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 6006 "01001010" // MOVS p4, p7; MOV m2, m3; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6007 "00001001" // /* MW 9 */ + 6008 "01000110" // /* MW 8 */ + 6009 "10100010" // /* MW 7 */ + 6010 "11100100" // /* MW 6 */ + 6011 "00000000" // /* MW 5 */ + 6012 "00000110" // /* MW 4 */ + 6013 "01100010" // /* MW 3 */ + 6014 "10010001" // /* MW 2 */ + 6015 "10010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 6016 "01100010" // VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6017 "00101001" // /* MW 7 */ + 6018 "00101010" // /* MW 6 */ + 6019 "10100001" // /* MW 5 */ + 6020 "11000110" // /* MW 4 */ + 6021 "00000011" // /* MW 3 */ + 6022 "10010010" // /* MW 2 */ + 6023 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6024 "01100010" // VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6025 "00001001" // /* MW 7 */ + 6026 "01101010" // /* MW 6 */ + 6027 "10100011" // /* MW 5 */ + 6028 "11000110" // /* MW 4 */ + 6029 "00000111" // /* MW 3 */ + 6030 "10010010" // /* MW 2 */ + 6031 "00000001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 + 6032 "11111000" // MOV dj7, dj5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6033 "00000000" // /* MW 3 */ + 6034 "10001011" // /* MW 2 */ + 6035 "00011111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 750 26 first + 6036 "01100010" // MOV m3, r23; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6037 "00101001" // /* MW 7 */ + 6038 "00101010" // /* MW 6 */ + 6039 "10100001" // /* MW 5 */ + 6040 "11100110" // /* MW 4 */ + 6041 "10100000" // /* MW 3 */ + 6042 "00001011" // /* MW 2 */ + 6043 "00000011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 749 26 first + 6044 "01100010" // MOV dj3, r17; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6045 "00101001" // /* MW 7 */ + 6046 "00000110" // /* MW 6 */ + 6047 "10100000" // /* MW 5 */ + 6048 "11100110" // /* MW 4 */ + 6049 "10100000" // /* MW 3 */ + 6050 "10001000" // /* MW 2 */ + 6051 "00000011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.src_ref 2 "conv2d_bf16.h" 1286 32 + 6052 "01001010" // PADDB.3D [p0], d3; MOV m3, dj2; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6053 "00001001" // /* MW 9 */ + 6054 "01101010" // /* MW 8 */ + 6055 "10100011" // /* MW 7 */ + 6056 "11100110" // /* MW 6 */ + 6057 "00000000" // /* MW 5 */ + 6058 "00000101" // /* MW 4 */ + 6059 "00100011" // /* MW 3 */ + 6060 "11110111" // /* MW 2 */ + 6061 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 751 26 first +.src_ref 2 "conv2d_bf16.h" 1286 32 first + 6062 "01100110" // PADDB [p7], m3; MOVS p3, dc3; MOV dj5, r5; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6063 "00001001" // /* MW 11 */ + 6064 "01000110" // /* MW 10 */ + 6065 "10100010" // /* MW 9 */ + 6066 "11100110" // /* MW 8 */ + 6067 "10100000" // /* MW 7 */ + 6068 "10000010" // /* MW 6 */ + 6069 "00100101" // /* MW 5 */ + 6070 "11010111" // /* MW 4 */ + 6071 "01101110" // /* MW 3 */ + 6072 "10001001" // /* MW 2 */ + 6073 "01110001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 + 6074 "00000010" // MOVS dc3, dc5; MOV dj7, dj5 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6075 "01110000" // /* MW 7 */ + 6076 "10000000" // /* MW 6 */ + 6077 "11000101" // /* MW 5 */ + 6078 "00000011" // /* MW 4 */ + 6079 "01100000" // /* MW 3 */ + 6080 "10001001" // /* MW 2 */ + 6081 "01100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 1287 37 + 6082 "00000010" // MOVS dc5, r2; MOV m3, m1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6083 "01110000" // /* MW 7 */ + 6084 "00000000" // /* MW 6 */ + 6085 "10000001" // /* MW 5 */ + 6086 "00000001" // /* MW 4 */ + 6087 "01100000" // /* MW 3 */ + 6088 "01000001" // /* MW 2 */ + 6089 "10100000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first + 6090 "00000010" // VCONV.bf16.fp32 x11, cml1; MOV m1, r29 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6091 "01110000" // /* MW 7 */ + 6092 "01010000" // /* MW 6 */ + 6093 "10000111" // /* MW 5 */ + 6094 "00000000" // /* MW 4 */ + 6095 "11000000" // /* MW 3 */ + 6096 "00010010" // /* MW 2 */ + 6097 "10110010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 + 6098 "00000010" // VCONV.bf16.fp32 x10, cml0; MOV dj5, r30 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6099 "01110000" // /* MW 7 */ + 6100 "10010000" // /* MW 6 */ + 6101 "11000111" // /* MW 5 */ + 6102 "00000010" // /* MW 4 */ + 6103 "11000000" // /* MW 3 */ + 6104 "00000010" // /* MW 2 */ + 6105 "10100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 first +.src_ref 2 "conv2d_bf16.h" 736 8 +.src_ref 2 "conv2d_bf16.h" 1287 37 + 6106 "10111010" // PADDB.3D [p1], d1; MOVS p0, p7; MOV r14, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6107 "01110110" // /* MW 9 */ + 6108 "01100000" // /* MW 8 */ + 6109 "11001000" // /* MW 7 */ + 6110 "00000001" // /* MW 6 */ + 6111 "10010000" // /* MW 5 */ + 6112 "00111011" // /* MW 4 */ + 6113 "01100001" // /* MW 3 */ + 6114 "10010001" // /* MW 2 */ + 6115 "00010011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 2 "conv2d_bf16.h" 1287 37 + 6116 "00000010" // VCONV.bf16.fp32 x6, cmh0; MOV m1, m3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6117 "01110000" // /* MW 7 */ + 6118 "00000000" // /* MW 6 */ + 6119 "10000011" // /* MW 5 */ + 6120 "00000000" // /* MW 4 */ + 6121 "11000000" // /* MW 3 */ + 6122 "00001010" // /* MW 2 */ + 6123 "01100010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 1218 20 first +.src_ref 2 "conv2d_bf16.h" 1287 37 first + 6124 "00110110" // PADDB [p0], m1; VCONV.bf16.fp32 x5, cml2; JZ r18, #6256 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6256 delay_slots=5 */ + 6125 "01100000" // /* MW 11 */ + 6126 "00000000" // /* MW 10 */ + 6127 "00000000" // /* MW 9 */ + 6128 "00001110" // /* MW 8 */ + 6129 "00000011" // /* MW 7 */ + 6130 "00100100" // /* MW 6 */ + 6131 "00100000" // /* MW 5 */ + 6132 "01010111" // /* MW 4 */ + 6133 "11000000" // /* MW 3 */ + 6134 "00100010" // /* MW 2 */ + 6135 "01010010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 2 "conv2d_bf16.h" 738 8 +.delay_slot + 6136 "00000010" // VCONV.bf16.fp32 x7, cmh1; MOV r5, p1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6137 "01110000" // /* MW 7 */ + 6138 "01100000" // /* MW 6 */ + 6139 "10101001" // /* MW 5 */ + 6140 "00000000" // /* MW 4 */ + 6141 "11000000" // /* MW 3 */ + 6142 "00011010" // /* MW 2 */ + 6143 "01110010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 6144 "00000010" // VCONV.bf16.fp32 x8, cml3; MOV dn7, dc7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6145 "01110000" // /* MW 7 */ + 6146 "11000000" // /* MW 6 */ + 6147 "10100111" // /* MW 5 */ + 6148 "00000011" // /* MW 4 */ + 6149 "11000000" // /* MW 3 */ + 6150 "00110010" // /* MW 2 */ + 6151 "10000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 6152 "10111010" // PADDB [p5], m1; VCONV.bf16.fp32 x1, cmh3; MOV p1, p5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6153 "01110110" // /* MW 9 */ + 6154 "01100000" // /* MW 8 */ + 6155 "10110101" // /* MW 7 */ + 6156 "00000000" // /* MW 6 */ + 6157 "10010000" // /* MW 5 */ + 6158 "00101011" // /* MW 4 */ + 6159 "11000101" // /* MW 3 */ + 6160 "00111010" // /* MW 2 */ + 6161 "00010010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 2 "conv2d_bf16.h" 1286 32 +.delay_slot + 6162 "00000010" // VCONV.bf16.fp32 x2, cmh2; MOV dj5, dj2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6163 "01110000" // /* MW 7 */ + 6164 "10000000" // /* MW 6 */ + 6165 "11000010" // /* MW 5 */ + 6166 "00000010" // /* MW 4 */ + 6167 "11000000" // /* MW 3 */ + 6168 "00101010" // /* MW 2 */ + 6169 "00100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 1187 40 +.delay_slot + 6170 "00000010" // MOVS dc7, dc3; MOV r2, dc5 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6171 "01110000" // /* MW 7 */ + 6172 "11000000" // /* MW 6 */ + 6173 "01001101" // /* MW 5 */ + 6174 "00000000" // /* MW 4 */ + 6175 "01100000" // /* MW 3 */ + 6176 "10001001" // /* MW 2 */ + 6177 "11100001" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first + 6178 "11111000" // VMAX_LT.bf16 x11, r16, x11, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6179 "11101100" // /* MW 3 */ + 6180 "11011100" // /* MW 2 */ + 6181 "00011101" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 + 6182 "11111000" // VMAX_LT.bf16 x7, r16, x7, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6183 "11101100" // /* MW 3 */ + 6184 "10111100" // /* MW 2 */ + 6185 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.src_ref 4 "max_min.hpp" 20 104 + 6186 "00000010" // VST x11, [p1, dj7]; VMAX_LT.bf16 x10, r16, x10, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6187 "01110000" // /* MW 7 */ + 6188 "01110110" // /* MW 6 */ + 6189 "10101010" // /* MW 5 */ + 6190 "00000010" // /* MW 4 */ + 6191 "01100000" // /* MW 3 */ + 6192 "01011010" // /* MW 2 */ + 6193 "00111100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first + 6194 "00000010" // VST x7, [p5, #64]; VMAX_LT.bf16 x7, r16, x6, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6195 "01110000" // /* MW 7 */ + 6196 "01110110" // /* MW 6 */ + 6197 "11011010" // /* MW 5 */ + 6198 "00000001" // /* MW 4 */ + 6199 "01100000" // /* MW 3 */ + 6200 "10111010" // /* MW 2 */ + 6201 "10100010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first + 6202 "00111010" // VST x10, [p1]; J #6288 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=6288 delay_slots=5 */ + 6203 "00100001" // /* MW 9 */ + 6204 "00000000" // /* MW 8 */ + 6205 "00000000" // /* MW 7 */ + 6206 "00010010" // /* MW 6 */ + 6207 "00000011" // /* MW 5 */ + 6208 "00000000" // /* MW 4 */ + 6209 "01100000" // /* MW 3 */ + 6210 "11010010" // /* MW 2 */ + 6211 "00100000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 6212 "00000010" // VST x7, [p1, #64]; VMAX_LT.bf16 x10, r16, x8, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6213 "01110000" // /* MW 7 */ + 6214 "01110110" // /* MW 6 */ + 6215 "10100010" // /* MW 5 */ + 6216 "00000010" // /* MW 4 */ + 6217 "01100000" // /* MW 3 */ + 6218 "10111010" // /* MW 2 */ + 6219 "00100010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 +.delay_slot + 6220 "11111000" // VMAX_LT.bf16 x7, r16, x1, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6221 "11101100" // /* MW 3 */ + 6222 "10001100" // /* MW 2 */ + 6223 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.src_ref 4 "max_min.hpp" 20 104 +.delay_slot + 6224 "00000010" // VST x10, [p0]; VMAX_LT.bf16 x10, r16, x5, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6225 "01110000" // /* MW 7 */ + 6226 "01110110" // /* MW 6 */ + 6227 "10010110" // /* MW 5 */ + 6228 "00000010" // /* MW 4 */ + 6229 "01100000" // /* MW 3 */ + 6230 "11010010" // /* MW 2 */ + 6231 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 6232 "00000010" // VST x7, [p0, #64]; VMAX_LT.bf16 x2, r16, x2, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6233 "01110000" // /* MW 7 */ + 6234 "01110110" // /* MW 6 */ + 6235 "10001010" // /* MW 5 */ + 6236 "00000000" // /* MW 4 */ + 6237 "01100000" // /* MW 3 */ + 6238 "10111010" // /* MW 2 */ + 6239 "00000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.delay_slot + 6240 "11100001" // NOPA; NOPB; VST x10, [p4, dj5]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6241 "00000000" // /* MW 15 */ + 6242 "00000000" // /* MW 14 */ + 6243 "01111000" // /* MW 13 */ + 6244 "10100101" // /* MW 12 */ + 6245 "00000001" // /* MW 11 */ + 6246 "00000000" // /* MW 10 */ + 6247 "00000000" // /* MW 9 */ + 6248 "00000000" // /* MW 8 */ + 6249 "10010011" // /* MW 7 */ + 6250 "10100010" // /* MW 6 */ + 6251 "00100100" // /* MW 5 */ + 6252 "00000000" // /* MW 4 */ + 6253 "11110000" // /* MW 3 */ + 6254 "00101100" // /* MW 2 */ + 6255 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1632 +.src_ref 4 "vector.hpp" 1152 43 + 6256 "00011000" // VST.CONV.bf16.fp32 cml1, [p1, dj7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6257 "10100011" // /* MW 3 */ + 6258 "11100000" // /* MW 2 */ + 6259 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6260 "00011000" // VST.CONV.bf16.fp32 cmh1, [p5, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6261 "11100011" // /* MW 3 */ + 6262 "00010100" // /* MW 2 */ + 6263 "00001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6264 "00011000" // VST.CONV.bf16.fp32 cml0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6265 "00100011" // /* MW 3 */ + 6266 "00000100" // /* MW 2 */ + 6267 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6268 "00011000" // VST.CONV.bf16.fp32 cmh0, [p1, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6269 "01100011" // /* MW 3 */ + 6270 "00010100" // /* MW 2 */ + 6271 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6272 "00011000" // VST x8, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6273 "00010011" // /* MW 3 */ + 6274 "00000110" // /* MW 2 */ + 6275 "00001000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6276 "00011000" // VST.CONV.bf16.fp32 cmh3, [p0, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6277 "11100011" // /* MW 3 */ + 6278 "00010101" // /* MW 2 */ + 6279 "00001000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6280 "00000010" // VST.CONV.bf16.fp32 cml2, [p4, dj5]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6281 "01110000" // /* MW 7 */ + 6282 "10100101" // /* MW 6 */ + 6283 "00000001" // /* MW 5 */ + 6284 "00000000" // /* MW 4 */ + 6285 "01100000" // /* MW 3 */ + 6286 "00100100" // /* MW 2 */ + 6287 "10010100" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1664 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 1143 12 first + 6288 "00110110" // PADDB [p7], m5; VST x2, [p7, #64]; JNZD r3, r3, p2; MOV dj2, #0 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 6289 "01011000" // /* MW 11 */ + 6290 "00000000" // /* MW 10 */ + 6291 "01000000" // /* MW 9 */ + 6292 "00000001" // /* MW 8 */ + 6293 "00110101" // /* MW 7 */ + 6294 "00000110" // /* MW 6 */ + 6295 "00100000" // /* MW 5 */ + 6296 "01010111" // /* MW 4 */ + 6297 "01101111" // /* MW 3 */ + 6298 "10010010" // /* MW 2 */ + 6299 "11100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.delay_slot + 6300 "11111000" // MOV dn3, dn2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6301 "10000000" // /* MW 3 */ + 6302 "01000100" // /* MW 2 */ + 6303 "00011011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.delay_slot + 6304 "11111000" // MOV dn2, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6305 "10100000" // /* MW 3 */ + 6306 "01001001" // /* MW 2 */ + 6307 "00011010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.delay_slot + 6308 "11110100" // PADDB.3D [p7], d2; MOV dj2, dj7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6309 "00000001" // /* MW 5 */ + 6310 "00011110" // /* MW 4 */ + 6311 "00000101" // /* MW 3 */ + 6312 "01110010" // /* MW 2 */ + 6313 "11101011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.delay_slot + 6314 "11111000" // MOV dn2, dn7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6315 "10000000" // /* MW 3 */ + 6316 "01001110" // /* MW 2 */ + 6317 "00011010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6318 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6319 "00000000" // /* MW 1 */ +.loop_nesting 0 + 6320 "10000100" // J #6992 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6992 delay_slots=5 */ + 6321 "00000000" // /* MW 5 */ + 6322 "00000000" // /* MW 4 */ + 6323 "10101000" // /* MW 3 */ + 6324 "00001101" // /* MW 2 */ + 6325 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6327 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6329 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6331 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6333 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6335 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1712 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 792 8 +.src_ref 2 "conv2d_bf16.h" 1364 80 +.src_ref 2 "conv2d_bf16.h" 1364 80 + 6336 "01110110" // LDA r31, [sp, #-40]; MOVS dc2, p3; MOVX r14, #136; MOV p1, r14 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6337 "01111000" // /* MW 11 */ + 6338 "10010000" // /* MW 10 */ + 6339 "10110011" // /* MW 9 */ + 6340 "00001000" // /* MW 8 */ + 6341 "11100001" // /* MW 7 */ + 6342 "00000100" // /* MW 6 */ + 6343 "10001011" // /* MW 5 */ + 6344 "00001100" // /* MW 4 */ + 6345 "00100010" // /* MW 3 */ + 6346 "01111110" // /* MW 2 */ + 6347 "11111011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 1369 80 + 6348 "01110110" // MOVA m4, #60; MOVS dn2, r22; MOVX crRnd, r13; MOV dc6, dn2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6349 "01111000" // /* MW 11 */ + 6350 "01000000" // /* MW 10 */ + 6351 "01100010" // /* MW 9 */ + 6352 "00000011" // /* MW 8 */ + 6353 "11010100" // /* MW 7 */ + 6354 "00011011" // /* MW 6 */ + 6355 "00001011" // /* MW 5 */ + 6356 "01010110" // /* MW 4 */ + 6357 "10000010" // /* MW 3 */ + 6358 "10010000" // /* MW 2 */ + 6359 "00000111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 807 26 +.src_ref 2 "conv2d_bf16.h" 808 26 +.src_ref 2 "conv2d_bf16.h" 809 26 +.src_ref 2 "conv2d_bf16.h" 810 26 +.src_ref 2 "conv2d_bf16.h" 1436 26 +.src_ref 2 "conv2d_bf16.h" 1437 26 +.src_ref 2 "conv2d_bf16.h" 1438 26 +.src_ref 2 "conv2d_bf16.h" 1439 26 + 6360 "10111010" // MOVA r20, #60; MOVX r19, #780; MOV m2, r23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6361 "01111000" // /* MW 9 */ + 6362 "11010000" // /* MW 8 */ + 6363 "00000101" // /* MW 7 */ + 6364 "10001001" // /* MW 6 */ + 6365 "00110001" // /* MW 5 */ + 6366 "00011001" // /* MW 4 */ + 6367 "00000000" // /* MW 3 */ + 6368 "10010100" // /* MW 2 */ + 6369 "00000111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 802 83 +.src_ref 2 "conv2d_bf16.h" 1428 39 + 6370 "01110110" // MOVA m6, #-132; MOVS dn6, r28; MOVX r18, #6; MOV dj5, r30 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6371 "01111000" // /* MW 11 */ + 6372 "10010000" // /* MW 10 */ + 6373 "11000111" // /* MW 9 */ + 6374 "11001010" // /* MW 8 */ + 6375 "00100000" // /* MW 7 */ + 6376 "00000001" // /* MW 6 */ + 6377 "00001011" // /* MW 5 */ + 6378 "01011100" // /* MW 4 */ + 6379 "10000110" // /* MW 3 */ + 6380 "10011000" // /* MW 2 */ + 6381 "11101111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 792 8 +.src_ref 2 "conv2d_bf16.h" 794 8 + 6382 "01110110" // LDA p0, [sp, #-44]; MOVS dc5, r2; MOVX r25, #0; MOV m1, r29 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6383 "01111000" // /* MW 11 */ + 6384 "01010000" // /* MW 10 */ + 6385 "10000111" // /* MW 9 */ + 6386 "00001000" // /* MW 8 */ + 6387 "10010000" // /* MW 7 */ + 6388 "00000001" // /* MW 6 */ + 6389 "00001011" // /* MW 5 */ + 6390 "00000010" // /* MW 4 */ + 6391 "00100101" // /* MW 3 */ + 6392 "10000011" // /* MW 2 */ + 6393 "11111010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 794 8 +.src_ref 2 "conv2d_bf16.h" 1455 20 + 6394 "10111010" // LDA r21, [sp, #-36]; MOVX r24, #0; MOV dj6, r21 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6395 "01111000" // /* MW 9 */ + 6396 "01010000" // /* MW 8 */ + 6397 "01000101" // /* MW 7 */ + 6398 "00001011" // /* MW 6 */ + 6399 "10000000" // /* MW 5 */ + 6400 "00000001" // /* MW 4 */ + 6401 "00100000" // /* MW 3 */ + 6402 "11010110" // /* MW 2 */ + 6403 "11111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1337 12 + 6404 "10111010" // LDA r13, [sp, #-32]; MOVXM p2, #6480 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6405 "00010000" // /* MW 9 */ + 6406 "10101000" // /* MW 8 */ + 6407 "00110100" // /* MW 7 */ + 6408 "00000101" // /* MW 6 */ + 6409 "00000000" // /* MW 5 */ + 6410 "00000000" // /* MW 4 */ + 6411 "00100000" // /* MW 3 */ + 6412 "00110110" // /* MW 2 */ + 6413 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 80 first +.src_ref 2 "conv2d_bf16.h" 1873 + 6414 "10010100" // LDA lr, [sp, #-28]; ADD.NC p3, r31, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6415 "01110010" // /* MW 5 */ + 6416 "11011111" // /* MW 4 */ + 6417 "00100110" // /* MW 3 */ + 6418 "10000111" // /* MW 2 */ + 6419 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 80 + 6420 "10011000" // LDA dj3, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6421 "11000110" // /* MW 3 */ + 6422 "00011101" // /* MW 2 */ + 6423 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1369 80 first + 6424 "10011000" // LDA m4, [p3], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6425 "00000110" // /* MW 3 */ + 6426 "10001010" // /* MW 2 */ + 6427 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 799 87 first + 6428 "10011000" // LDA m5, [p3], #-28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6429 "10000110" // /* MW 3 */ + 6430 "10011110" // /* MW 2 */ + 6431 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 801 83 first + 6432 "10011000" // LDA r22, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6433 "11010110" // /* MW 3 */ + 6434 "00011110" // /* MW 2 */ + 6435 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 802 83 first + 6436 "10011000" // LDA r23, [p3], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6437 "11110110" // /* MW 3 */ + 6438 "11001010" // /* MW 2 */ + 6439 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1337 66 first + 6440 "10011000" // LDA r29, [p3, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6441 "10110110" // /* MW 3 */ + 6442 "00010111" // /* MW 2 */ + 6443 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1443 71 first + 6444 "10011000" // LDA r28, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6445 "10010110" // /* MW 3 */ + 6446 "00000111" // /* MW 2 */ + 6447 "00000011" // /* MW 1 */ + 6448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6449 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 1369 89 + 6450 "11111000" // MOV r30, m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6451 "00000000" // /* MW 3 */ + 6452 "10011000" // /* MW 2 */ + 6453 "00011111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 89 +.src_ref 2 "conv2d_bf16.h" 1518 37 + 6454 "11111000" // MOV m6, dj3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6455 "00000000" // /* MW 3 */ + 6456 "00000111" // /* MW 2 */ + 6457 "00011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 89 + 6458 "11111000" // MOV r31, m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6459 "00000000" // /* MW 3 */ + 6460 "11011100" // /* MW 2 */ + 6461 "00011111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 89 first + 6462 "00011000" // ADD.NC m3, r31, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6463 "11100000" // /* MW 3 */ + 6464 "00001111" // /* MW 2 */ + 6465 "00011011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1369 89 first + 6466 "00100100" // ADD r29, r29, #-1; ADD.NC m7, r30, #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6467 "11000000" // /* MW 5 */ + 6468 "00011110" // /* MW 4 */ + 6469 "11101110" // /* MW 3 */ + 6470 "01111111" // /* MW 2 */ + 6471 "11101111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 + 6472 "00000010" // NOPS; MOV dj7, r30 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6473 "01110000" // /* MW 7 */ + 6474 "10010000" // /* MW 6 */ + 6475 "11000111" // /* MW 5 */ + 6476 "00000011" // /* MW 4 */ + 6477 "01100000" // /* MW 3 */ + 6478 "00101011" // /* MW 2 */ + 6479 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1856 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 792 8 first +.src_ref 2 "conv2d_bf16.h" 1362 31 first +.src_ref 2 "conv2d_bf16.h" 1429 50 +.src_ref 2 "conv2d_bf16.h" 1443 16 first +.loop_nesting 1 + 6480 "01111110" // VLDA.CONV.fp32.bf16 cml0, [p6], #64;VLDB.FILL.512 [p1, lf1, r25];MOVS p3, r12; MOVXM ls, #6656 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 6481 "01100000" // /* MW 13 */ + 6482 "10000001" // /* MW 12 */ + 6483 "01110001" // /* MW 11 */ + 6484 "00000010" // /* MW 10 */ + 6485 "10100000" // /* MW 9 */ + 6486 "10001111" // /* MW 8 */ + 6487 "00000000" // /* MW 7 */ + 6488 "00000000" // /* MW 6 */ + 6489 "00101000" // /* MW 5 */ + 6490 "00101000" // /* MW 4 */ + 6491 "01111010" // /* MW 3 */ + 6492 "10000101" // /* MW 2 */ + 6493 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 793 8 first +.src_ref 2 "conv2d_bf16.h" 1364 31 first +.src_ref 2 "conv2d_bf16.h" 1443 16 + 6494 "10110110" // VLDA.CONV.fp32.bf16 cmh0, [p6], m3;VLDB.FILL.512 [p1, lf1, r25]; MOVXM le, #6704 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6495 "00010000" // /* MW 11 */ + 6496 "00011000" // /* MW 10 */ + 6497 "10111101" // /* MW 9 */ + 6498 "00000101" // /* MW 8 */ + 6499 "00000000" // /* MW 7 */ + 6500 "00000000" // /* MW 6 */ + 6501 "00101000" // /* MW 5 */ + 6502 "00101000" // /* MW 4 */ + 6503 "01111010" // /* MW 3 */ + 6504 "00001101" // /* MW 2 */ + 6505 "11001101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 794 8 first +.src_ref 2 "conv2d_bf16.h" 795 30 first +.src_ref 2 "conv2d_bf16.h" 1428 39 first +.src_ref 2 "conv2d_bf16.h" 1443 16 first + 6506 "10110110" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex10, [p1, lf1, r25]; LSHL r30, r2, r18; ADD.NC lc, r28, #-3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6507 "01001000" // /* MW 11 */ + 6508 "00111111" // /* MW 10 */ + 6509 "10111111" // /* MW 9 */ + 6510 "01101110" // /* MW 8 */ + 6511 "11101001" // /* MW 7 */ + 6512 "00000101" // /* MW 6 */ + 6513 "00101000" // /* MW 5 */ + 6514 "00000101" // /* MW 4 */ + 6515 "01110110" // /* MW 3 */ + 6516 "10000001" // /* MW 2 */ + 6517 "00000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 796 30 first +.src_ref 2 "conv2d_bf16.h" 799 30 first +.src_ref 2 "conv2d_bf16.h" 1429 50 + 6518 "10111010" // VLDA.POP.576 ex11, [p0, lf0, r24, m5];VLDB.POP.576 ex4, [p1, lf1, r25]; MOV dj2, r30 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6519 "01111110" // /* MW 9 */ + 6520 "10010000" // /* MW 8 */ + 6521 "01000111" // /* MW 7 */ + 6522 "00000001" // /* MW 6 */ + 6523 "00010100" // /* MW 5 */ + 6524 "00000001" // /* MW 4 */ + 6525 "01110011" // /* MW 3 */ + 6526 "01011001" // /* MW 2 */ + 6527 "01010101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 797 30 first +.src_ref 2 "conv2d_bf16.h" 1367 31 first + 6528 "00111100" // VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.POP.576 ex2, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6529 "00101000" // /* MW 5 */ + 6530 "00000001" // /* MW 4 */ + 6531 "01110110" // /* MW 3 */ + 6532 "10010101" // /* MW 2 */ + 6533 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 1369 31 first + 6534 "00111100" // VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.POP.576.3D ex3, [p1, lf1, r25, d0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6535 "10101000" // /* MW 5 */ + 6536 "00100001" // /* MW 4 */ + 6537 "01111010" // /* MW 3 */ + 6538 "00011101" // /* MW 2 */ + 6539 "11011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 792 8 first +.src_ref 2 "conv2d_bf16.h" 1372 31 first + 6540 "00111100" // VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.FILL.512 [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6541 "00101000" // /* MW 5 */ + 6542 "00101000" // /* MW 4 */ + 6543 "01111010" // /* MW 3 */ + 6544 "10100101" // /* MW 2 */ + 6545 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 793 8 first +.src_ref 2 "conv2d_bf16.h" 1374 31 first + 6546 "00111100" // VLDA.CONV.fp32.bf16 cmh2, [p6], m3;VLDB.FILL.512 [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6547 "00101000" // /* MW 5 */ + 6548 "00101000" // /* MW 4 */ + 6549 "01111010" // /* MW 3 */ + 6550 "00101101" // /* MW 2 */ + 6551 "11001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 795 30 first +.src_ref 2 "conv2d_bf16.h" 1377 31 first + 6552 "00111100" // VLDA.CONV.fp32.bf16 cml3, [p6], #64;VLDB.POP.576 ex1, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6553 "10101000" // /* MW 5 */ + 6554 "00000000" // /* MW 4 */ + 6555 "01110110" // /* MW 3 */ + 6556 "10110101" // /* MW 2 */ + 6557 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 796 30 first +.src_ref 2 "conv2d_bf16.h" 1379 31 first + 6558 "00111100" // VLDA.CONV.fp32.bf16 cmh3, [p6], m7;VLDB.POP.576 ex6, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6559 "00101000" // /* MW 5 */ + 6560 "00000011" // /* MW 4 */ + 6561 "01110110" // /* MW 3 */ + 6562 "00111101" // /* MW 2 */ + 6563 "11011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 797 30 first +.src_ref 2 "conv2d_bf16.h" 1429 50 first + 6564 "00111100" // VLDA.CONV.fp32.bf16 cml4, [p3, dj2];VLDB.POP.576 ex7, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6565 "10101000" // /* MW 5 */ + 6566 "00000011" // /* MW 4 */ + 6567 "01110110" // /* MW 3 */ + 6568 "01000101" // /* MW 2 */ + 6569 "01101000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 801 30 first +.src_ref 2 "conv2d_bf16.h" 1429 50 + 6570 "10111010" // VLDA.CONV.fp32.bf16 cmh4, [p3, dj2];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VSHUFFLE ex5, ex10, ex4, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6571 "11101110" // /* MW 9 */ + 6572 "00101101" // /* MW 8 */ + 6573 "01101001" // /* MW 7 */ + 6574 "00000001" // /* MW 6 */ + 6575 "00010100" // /* MW 5 */ + 6576 "00010010" // /* MW 4 */ + 6577 "01110101" // /* MW 3 */ + 6578 "01001101" // /* MW 2 */ + 6579 "01101000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 792 8 first +.src_ref 2 "conv2d_bf16.h" 794 8 first +.src_ref 2 "conv2d_bf16.h" 802 30 first + 6580 "10111010" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex10, ex10, ex4, r23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6581 "11101110" // /* MW 9 */ + 6582 "00101111" // /* MW 8 */ + 6583 "10101001" // /* MW 7 */ + 6584 "00000010" // /* MW 6 */ + 6585 "00010100" // /* MW 5 */ + 6586 "00010100" // /* MW 4 */ + 6587 "01110101" // /* MW 3 */ + 6588 "10000001" // /* MW 2 */ + 6589 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 793 8 first +.src_ref 2 "conv2d_bf16.h" 799 30 first +.src_ref 2 "conv2d_bf16.h" 803 30 first +.src_ref 2 "conv2d_bf16.h" 807 26 first + 6590 "01100110" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex2, ex3, r22; VMAC.f dm0, dm0, ex5, ex11, r9 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6591 "01101001" // /* MW 11 */ + 6592 "00001011" // /* MW 10 */ + 6593 "01001000" // /* MW 9 */ + 6594 "11000010" // /* MW 8 */ + 6595 "11011011" // /* MW 7 */ + 6596 "00010001" // /* MW 6 */ + 6597 "00101010" // /* MW 5 */ + 6598 "00101000" // /* MW 4 */ + 6599 "01111010" // /* MW 3 */ + 6600 "00000001" // /* MW 2 */ + 6601 "01010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 794 8 first +.src_ref 2 "conv2d_bf16.h" 804 30 first +.src_ref 2 "conv2d_bf16.h" 808 26 first + 6602 "01001010" // VLDA.FILL.512 [p0, lf0, r24]; VSHUFFLE ex10, ex2, ex3, r23; VMAC.f dm1, dm1, ex10, ex11, r9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6603 "01101001" // /* MW 9 */ + 6604 "00110101" // /* MW 8 */ + 6605 "01001001" // /* MW 7 */ + 6606 "11000010" // /* MW 6 */ + 6607 "11011111" // /* MW 5 */ + 6608 "00010001" // /* MW 4 */ + 6609 "01110101" // /* MW 3 */ + 6610 "10000001" // /* MW 2 */ + 6611 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 809 26 first + 6612 "01001000" // VMAC.f dm2, dm2, ex4, ex11, r9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6613 "01101001" // /* MW 3 */ + 6614 "01001001" // /* MW 2 */ + 6615 "01001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 810 26 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 6616 "01001000" // VMAC.f dm3, dm3, ex10, ex11, r9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6617 "01101001" // /* MW 3 */ + 6618 "01110101" // /* MW 2 */ + 6619 "01001011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 795 30 first +.src_ref 2 "conv2d_bf16.h" 802 30 first +.src_ref 2 "conv2d_bf16.h" 1437 26 first +.aggressive_scheduled_block_id 7 +.noswbrkpt + 6620 "01001010" // VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex5, ex1, ex6, r23; VADD.f dm1, dm4, dm1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6621 "00111101" // /* MW 9 */ + 6622 "10000100" // /* MW 8 */ + 6623 "10100001" // /* MW 7 */ + 6624 "11000110" // /* MW 6 */ + 6625 "01011111" // /* MW 5 */ + 6626 "10001011" // /* MW 4 */ + 6627 "10101010" // /* MW 3 */ + 6628 "00000000" // /* MW 2 */ + 6629 "00000110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 796 30 first +.src_ref 2 "conv2d_bf16.h" 1436 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6630 "01100010" // VLDB.POP.576 ex6, [p1, lf1, r25]; VADD.f dm0, dm4, dm0, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6631 "00111101" // /* MW 7 */ + 6632 "10000000" // /* MW 6 */ + 6633 "10100000" // /* MW 5 */ + 6634 "00000000" // /* MW 4 */ + 6635 "10010100" // /* MW 3 */ + 6636 "00000001" // /* MW 2 */ + 6637 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 797 30 first +.src_ref 2 "conv2d_bf16.h" 1438 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6638 "01100010" // VLDB.POP.576 ex7, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6639 "00111101" // /* MW 7 */ + 6640 "10001000" // /* MW 6 */ + 6641 "10100010" // /* MW 5 */ + 6642 "00000000" // /* MW 4 */ + 6643 "11010100" // /* MW 3 */ + 6644 "00000001" // /* MW 2 */ + 6645 "00000011" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 799 30 first +.src_ref 2 "conv2d_bf16.h" 1439 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6646 "01001010" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VADD.f dm3, dm4, dm3, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6647 "00111101" // /* MW 9 */ + 6648 "10001100" // /* MW 8 */ + 6649 "10100011" // /* MW 7 */ + 6650 "00011101" // /* MW 6 */ + 6651 "00010100" // /* MW 5 */ + 6652 "00010010" // /* MW 4 */ + 6653 "01110101" // /* MW 3 */ + 6654 "00000001" // /* MW 2 */ + 6655 "01010101" // /* MW 1 */ +.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2032 +.src_ref 2 "conv2d_bf16.h" 792 8 first +.src_ref 2 "conv2d_bf16.h" 801 30 first +.begin_of_loop +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt +.loop_nesting 2 + 6656 "10110100" // VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex2, ex1, ex6, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6657 "10110111" // /* MW 5 */ + 6658 "00010110" // /* MW 4 */ + 6659 "10000010" // /* MW 3 */ + 6660 "10000010" // /* MW 2 */ + 6661 "10100010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 793 8 first +.src_ref 2 "conv2d_bf16.h" 804 30 first +.src_ref 2 "conv2d_bf16.h" 808 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6662 "01001010" // VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6663 "00001001" // /* MW 9 */ + 6664 "00101010" // /* MW 8 */ + 6665 "10011001" // /* MW 7 */ + 6666 "11000110" // /* MW 6 */ + 6667 "01011111" // /* MW 5 */ + 6668 "00111100" // /* MW 4 */ + 6669 "00101010" // /* MW 3 */ + 6670 "00101000" // /* MW 2 */ + 6671 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 795 30 first +.src_ref 2 "conv2d_bf16.h" 803 30 first +.src_ref 2 "conv2d_bf16.h" 807 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6672 "01001010" // VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6673 "00001001" // /* MW 9 */ + 6674 "00000100" // /* MW 8 */ + 6675 "10011000" // /* MW 7 */ + 6676 "11000110" // /* MW 6 */ + 6677 "01011011" // /* MW 5 */ + 6678 "10111100" // /* MW 4 */ + 6679 "10101001" // /* MW 3 */ + 6680 "00000000" // /* MW 2 */ + 6681 "00000110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 796 30 first +.src_ref 2 "conv2d_bf16.h" 810 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6682 "01100010" // VLDB.POP.576 ex6, [p1, lf1, r25]; VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6683 "00001001" // /* MW 7 */ + 6684 "01101000" // /* MW 6 */ + 6685 "10011011" // /* MW 5 */ + 6686 "00000000" // /* MW 4 */ + 6687 "10010100" // /* MW 3 */ + 6688 "00000001" // /* MW 2 */ + 6689 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 794 8 first +.src_ref 2 "conv2d_bf16.h" 797 30 first +.src_ref 2 "conv2d_bf16.h" 809 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6690 "01101110" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex7, [p1, lf1, r25];NOPS; NOPX; VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 6691 "00001001" // /* MW 13 */ + 6692 "01000110" // /* MW 12 */ + 6693 "10011010" // /* MW 11 */ + 6694 "01101100" // /* MW 10 */ + 6695 "00000101" // /* MW 9 */ + 6696 "00000000" // /* MW 8 */ + 6697 "00000000" // /* MW 7 */ + 6698 "00000000" // /* MW 6 */ + 6699 "10101000" // /* MW 5 */ + 6700 "00000011" // /* MW 4 */ + 6701 "01110110" // /* MW 3 */ + 6702 "10000001" // /* MW 2 */ + 6703 "00000010" // /* MW 1 */ +.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2080 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 799 30 first +.src_ref 2 "conv2d_bf16.h" 802 30 first +.end_of_loop +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6704 "11100001" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0];NOPS; NOPX; VSHUFFLE ex5, ex1, ex6, r23; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6705 "00000000" // /* MW 15 */ + 6706 "00000000" // /* MW 14 */ + 6707 "11101000" // /* MW 13 */ + 6708 "10101111" // /* MW 12 */ + 6709 "01000101" // /* MW 11 */ + 6710 "00000001" // /* MW 10 */ + 6711 "00000000" // /* MW 9 */ + 6712 "00000000" // /* MW 8 */ + 6713 "01011011" // /* MW 7 */ + 6714 "00000001" // /* MW 6 */ + 6715 "00101000" // /* MW 5 */ + 6716 "00100100" // /* MW 4 */ + 6717 "01111010" // /* MW 3 */ + 6718 "00000001" // /* MW 2 */ + 6719 "01010101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 first +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 801 30 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 6720 "11110110" // PADDA.3D [p0], d1; PADDB [p7], m6; MOVS p5, p7; VSHUFFLE ex2, ex1, ex6, r22 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6721 "11100000" // /* MW 11 */ + 6722 "10101101" // /* MW 10 */ + 6723 "10000101" // /* MW 9 */ + 6724 "00000000" // /* MW 8 */ + 6725 "10001011" // /* MW 7 */ + 6726 "10011100" // /* MW 6 */ + 6727 "00100101" // /* MW 5 */ + 6728 "10010111" // /* MW 4 */ + 6729 "11111111" // /* MW 3 */ + 6730 "00001100" // /* MW 2 */ + 6731 "00000111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 804 30 first +.src_ref 2 "conv2d_bf16.h" 808 26 first +.src_ref 2 "conv2d_bf16.h" 1517 32 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6732 "01100110" // PADDB [p7], m4; MOVS p4, p7; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6733 "00001001" // /* MW 11 */ + 6734 "00101010" // /* MW 10 */ + 6735 "10011001" // /* MW 9 */ + 6736 "11000110" // /* MW 8 */ + 6737 "01011111" // /* MW 7 */ + 6738 "00111100" // /* MW 6 */ + 6739 "00100010" // /* MW 5 */ + 6740 "00010111" // /* MW 4 */ + 6741 "01101111" // /* MW 3 */ + 6742 "10010001" // /* MW 2 */ + 6743 "10010011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 803 30 first +.src_ref 2 "conv2d_bf16.h" 807 26 first +.src_ref 2 "conv2d_bf16.h" 1518 37 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6744 "01100110" // PADDB [p7], m6; MOVS p3, p7; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6745 "00001001" // /* MW 11 */ + 6746 "00000100" // /* MW 10 */ + 6747 "10011000" // /* MW 9 */ + 6748 "11000110" // /* MW 8 */ + 6749 "01011011" // /* MW 7 */ + 6750 "10111100" // /* MW 6 */ + 6751 "00100001" // /* MW 5 */ + 6752 "10010111" // /* MW 4 */ + 6753 "01101111" // /* MW 3 */ + 6754 "10010001" // /* MW 2 */ + 6755 "01110011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 810 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6756 "01100010" // MOV dj2, r17; VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6757 "00001001" // /* MW 7 */ + 6758 "01101000" // /* MW 6 */ + 6759 "10011011" // /* MW 5 */ + 6760 "11100110" // /* MW 4 */ + 6761 "10100000" // /* MW 3 */ + 6762 "10001000" // /* MW 2 */ + 6763 "00000010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 first +.src_ref 2 "conv2d_bf16.h" 809 26 first +.src_ref 2 "conv2d_bf16.h" 1428 39 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6764 "01001010" // PADDB.3D [p1], d2; MOV r2, dc5; VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6765 "00001001" // /* MW 9 */ + 6766 "01000110" // /* MW 8 */ + 6767 "10011010" // /* MW 7 */ + 6768 "11100110" // /* MW 6 */ + 6769 "10000000" // /* MW 5 */ + 6770 "10011011" // /* MW 4 */ + 6771 "00100000" // /* MW 3 */ + 6772 "10110111" // /* MW 2 */ + 6773 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 801 30 first + 6774 "11011000" // VSHUFFLE ex2, ex1, ex6, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6775 "01011011" // /* MW 3 */ + 6776 "00001011" // /* MW 2 */ + 6777 "00011001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 802 30 first + 6778 "11011000" // VSHUFFLE ex5, ex1, ex6, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6779 "01011111" // /* MW 3 */ + 6780 "10001011" // /* MW 2 */ + 6781 "00011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 803 30 first +.src_ref 2 "conv2d_bf16.h" 807 26 first + 6782 "01100010" // VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6783 "00001001" // /* MW 7 */ + 6784 "00000100" // /* MW 6 */ + 6785 "10011000" // /* MW 5 */ + 6786 "11000110" // /* MW 4 */ + 6787 "01011011" // /* MW 3 */ + 6788 "10111100" // /* MW 2 */ + 6789 "00000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 804 30 first +.src_ref 2 "conv2d_bf16.h" 808 26 first + 6790 "01100010" // VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6791 "00001001" // /* MW 7 */ + 6792 "00101010" // /* MW 6 */ + 6793 "10011001" // /* MW 5 */ + 6794 "11000110" // /* MW 4 */ + 6795 "01011111" // /* MW 3 */ + 6796 "00111100" // /* MW 2 */ + 6797 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 809 26 first + 6798 "01001000" // VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6799 "00001001" // /* MW 3 */ + 6800 "01000110" // /* MW 2 */ + 6801 "10011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 810 26 first + 6802 "01001000" // VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6803 "00001001" // /* MW 3 */ + 6804 "01101000" // /* MW 2 */ + 6805 "10011011" // /* MW 1 */ + 6806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6807 "00000000" // /* MW 1 */ + 6808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6809 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first + 6810 "00011000" // VCONV.bf16.fp32 x10, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6811 "00010110" // /* MW 3 */ + 6812 "00010000" // /* MW 2 */ + 6813 "00001101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 + 6814 "00011000" // VCONV.bf16.fp32 x11, cml1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6815 "10010110" // /* MW 3 */ + 6816 "10010000" // /* MW 2 */ + 6817 "00001101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 1455 20 first + 6818 "00111010" // VCONV.bf16.fp32 x1, cmh1; JZ r21, #6928 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6928 delay_slots=5 */ + 6819 "01100001" // /* MW 9 */ + 6820 "00000000" // /* MW 8 */ + 6821 "00000000" // /* MW 7 */ + 6822 "01100010" // /* MW 6 */ + 6823 "00000011" // /* MW 5 */ + 6824 "00101010" // /* MW 4 */ + 6825 "11000000" // /* MW 3 */ + 6826 "00011010" // /* MW 2 */ + 6827 "00010010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first +.delay_slot + 6828 "00011000" // VCONV.bf16.fp32 x6, cmh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6829 "01010110" // /* MW 3 */ + 6830 "00010000" // /* MW 2 */ + 6831 "00001011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 6832 "00011000" // VCONV.bf16.fp32 x2, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6833 "10010110" // /* MW 3 */ + 6834 "00010001" // /* MW 2 */ + 6835 "00001001" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 6836 "00011000" // VCONV.bf16.fp32 x7, cmh3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6837 "11010110" // /* MW 3 */ + 6838 "10010001" // /* MW 2 */ + 6839 "00001011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 6840 "00011000" // VCONV.bf16.fp32 x5, cml2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6841 "00010110" // /* MW 3 */ + 6842 "10010001" // /* MW 2 */ + 6843 "00001010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 6844 "00011000" // VCONV.bf16.fp32 x8, cmh2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6845 "01010110" // /* MW 3 */ + 6846 "00010001" // /* MW 2 */ + 6847 "00001100" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first + 6848 "11111000" // VMAX_LT.bf16 x11, r16, x11, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6849 "11101100" // /* MW 3 */ + 6850 "11011100" // /* MW 2 */ + 6851 "00011101" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 + 6852 "11111000" // VMAX_LT.bf16 x1, r16, x1, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6853 "11101100" // /* MW 3 */ + 6854 "10001100" // /* MW 2 */ + 6855 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.src_ref 4 "max_min.hpp" 20 104 + 6856 "00000010" // VST x11, [p5, dj3]; VMAX_LT.bf16 x10, r16, x10, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6857 "01110000" // /* MW 7 */ + 6858 "01110110" // /* MW 6 */ + 6859 "10101010" // /* MW 5 */ + 6860 "00000010" // /* MW 4 */ + 6861 "01100000" // /* MW 3 */ + 6862 "01011010" // /* MW 2 */ + 6863 "10101100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first + 6864 "00000010" // VST x1, [p4, #64]; VMAX_LT.bf16 x1, r16, x6, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6865 "01110000" // /* MW 7 */ + 6866 "01110110" // /* MW 6 */ + 6867 "01011010" // /* MW 5 */ + 6868 "00000000" // /* MW 4 */ + 6869 "01100000" // /* MW 3 */ + 6870 "10001010" // /* MW 2 */ + 6871 "10000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first + 6872 "00111010" // VST x10, [p5]; J #6960 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=6960 delay_slots=5 */ + 6873 "00100001" // /* MW 9 */ + 6874 "00000000" // /* MW 8 */ + 6875 "00000000" // /* MW 7 */ + 6876 "01100110" // /* MW 6 */ + 6877 "00000011" // /* MW 5 */ + 6878 "00000000" // /* MW 4 */ + 6879 "01100000" // /* MW 3 */ + 6880 "11010010" // /* MW 2 */ + 6881 "10100000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 6882 "00000010" // VST x1, [p5, #64]; VMAX_LT.bf16 x10, r16, x2, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6883 "01110000" // /* MW 7 */ + 6884 "01110110" // /* MW 6 */ + 6885 "10001010" // /* MW 5 */ + 6886 "00000010" // /* MW 4 */ + 6887 "01100000" // /* MW 3 */ + 6888 "10001010" // /* MW 2 */ + 6889 "10100010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 +.delay_slot + 6890 "11111000" // VMAX_LT.bf16 x1, r16, x7, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6891 "11101100" // /* MW 3 */ + 6892 "10111100" // /* MW 2 */ + 6893 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.src_ref 4 "max_min.hpp" 20 104 +.delay_slot + 6894 "00000010" // VST x10, [p3, dj3]; VMAX_LT.bf16 x10, r16, x5, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6895 "01110000" // /* MW 7 */ + 6896 "01110110" // /* MW 6 */ + 6897 "10010110" // /* MW 5 */ + 6898 "00000010" // /* MW 4 */ + 6899 "01100000" // /* MW 3 */ + 6900 "01010010" // /* MW 2 */ + 6901 "01101100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 6902 "10111010" // NOPA; VST x1, [p7, #64]; VMAX_LT.bf16 x8, r16, x8, x9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6903 "01110010" // /* MW 9 */ + 6904 "01110110" // /* MW 8 */ + 6905 "00100010" // /* MW 7 */ + 6906 "00000010" // /* MW 6 */ + 6907 "01010011" // /* MW 5 */ + 6908 "00010100" // /* MW 4 */ + 6909 "11110111" // /* MW 3 */ + 6910 "00101100" // /* MW 2 */ + 6911 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.delay_slot + 6912 "11100001" // NOPA; NOPB; VST x10, [p4, dj7]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6913 "00000000" // /* MW 15 */ + 6914 "00000000" // /* MW 14 */ + 6915 "01111000" // /* MW 13 */ + 6916 "10100101" // /* MW 12 */ + 6917 "00000001" // /* MW 11 */ + 6918 "00000000" // /* MW 10 */ + 6919 "00000000" // /* MW 9 */ + 6920 "00000000" // /* MW 8 */ + 6921 "10010011" // /* MW 7 */ + 6922 "11100010" // /* MW 6 */ + 6923 "00100100" // /* MW 5 */ + 6924 "00000000" // /* MW 4 */ + 6925 "11110000" // /* MW 3 */ + 6926 "00101100" // /* MW 2 */ + 6927 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2304 +.src_ref 4 "vector.hpp" 1152 43 + 6928 "00011000" // VST.CONV.bf16.fp32 cml1, [p5, dj3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6929 "10100011" // /* MW 3 */ + 6930 "01100000" // /* MW 2 */ + 6931 "00001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6932 "00011000" // VST.CONV.bf16.fp32 cmh1, [p4, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6933 "11100011" // /* MW 3 */ + 6934 "00010100" // /* MW 2 */ + 6935 "00001100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6936 "00011000" // VST.CONV.bf16.fp32 cml0, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6937 "00100011" // /* MW 3 */ + 6938 "00000100" // /* MW 2 */ + 6939 "00001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6940 "00011000" // VST.CONV.bf16.fp32 cmh0, [p5, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6941 "01100011" // /* MW 3 */ + 6942 "00010100" // /* MW 2 */ + 6943 "00001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6944 "00011000" // VST.CONV.bf16.fp32 cml3, [p3, dj3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6945 "10100011" // /* MW 3 */ + 6946 "01100001" // /* MW 2 */ + 6947 "00001011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6948 "00011000" // VST.CONV.bf16.fp32 cmh3, [p7, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6949 "11100011" // /* MW 3 */ + 6950 "00010101" // /* MW 2 */ + 6951 "00001111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6952 "00000010" // VST.CONV.bf16.fp32 cml2, [p4, dj7]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6953 "01110000" // /* MW 7 */ + 6954 "10100101" // /* MW 6 */ + 6955 "00000001" // /* MW 5 */ + 6956 "00000000" // /* MW 4 */ + 6957 "01100000" // /* MW 3 */ + 6958 "00100100" // /* MW 2 */ + 6959 "10011100" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2336 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 1337 12 first + 6960 "01011100" // VST x8, [p3, #64]; JNZD r29, r29, p2 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 6961 "01000000" // /* MW 5 */ + 6962 "11110101" // /* MW 4 */ + 6963 "01101110" // /* MW 3 */ + 6964 "11000010" // /* MW 2 */ + 6965 "01100010" // /* MW 1 */ +.delay_slot + 6966 "00011000" // PADDB [p7], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6967 "10010000" // /* MW 3 */ + 6968 "10001011" // /* MW 2 */ + 6969 "00111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6971 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6973 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6975 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6976 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6977 "00000000" // /* MW 15 */ + 6978 "00000000" // /* MW 14 */ + 6979 "01111000" // /* MW 13 */ + 6980 "10100101" // /* MW 12 */ + 6981 "00000001" // /* MW 11 */ + 6982 "00000000" // /* MW 10 */ + 6983 "00000000" // /* MW 9 */ + 6984 "00000000" // /* MW 8 */ + 6985 "01011011" // /* MW 7 */ + 6986 "00000001" // /* MW 6 */ + 6987 "00100000" // /* MW 5 */ + 6988 "00000000" // /* MW 4 */ + 6989 "11110000" // /* MW 3 */ + 6990 "00101100" // /* MW 2 */ + 6991 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2368 +.loop_nesting 0 + 6992 "00011000" // LDA r15, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6993 "11110001" // /* MW 3 */ + 6994 "11101101" // /* MW 2 */ + 6995 "00000111" // /* MW 1 */ + 6996 "00011000" // LDA r12, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6997 "10010001" // /* MW 3 */ + 6998 "11110001" // /* MW 2 */ + 6999 "00000111" // /* MW 1 */ + 7000 "00011000" // LDA r9, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7001 "00110001" // /* MW 3 */ + 7002 "11110101" // /* MW 2 */ + 7003 "00000111" // /* MW 1 */ + 7004 "00011000" // LDA p6, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7005 "00011001" // /* MW 3 */ + 7006 "11101011" // /* MW 2 */ + 7007 "00000111" // /* MW 1 */ + 7008 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7009 "10011001" // /* MW 3 */ + 7010 "11111011" // /* MW 2 */ + 7011 "00000111" // /* MW 1 */ + 7012 "00011000" // LDA r14, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7013 "11010001" // /* MW 3 */ + 7014 "11111101" // /* MW 2 */ + 7015 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1873 first + 7016 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 7017 "00000000" // /* MW 3 */ + 7018 "00101000" // /* MW 2 */ + 7019 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1873 +.delay_slot + 7020 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7021 "00000001" // /* MW 5 */ + 7022 "00000000" // /* MW 4 */ + 7023 "00000000" // /* MW 3 */ + 7024 "11110000" // /* MW 2 */ + 7025 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7027 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7029 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7031 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7032 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params__end +.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_end0 + 7033 "00000000" // /* MW 1 */ +.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function conv2d_maxpool _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 7 "superkernels.cpp" 74 first +.src_ref 7 "superkernels.cpp" 79 6 +.src_ref 7 "superkernels.cpp" 81 4 +.function_start + 7040 "10111010" // MOVA r0, #1; MOVXM p4, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7041 "00010000" // /* MW 9 */ + 7042 "00100000" // /* MW 8 */ + 7043 "00110010" // /* MW 7 */ + 7044 "11110010" // /* MW 6 */ + 7045 "00000001" // /* MW 5 */ + 7046 "00000000" // /* MW 4 */ + 7047 "00000000" // /* MW 3 */ + 7048 "00100000" // /* MW 2 */ + 7049 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 79 6 first +.src_ref 7 "superkernels.cpp" 81 4 + 7050 "10111010" // LDA r16, [p4]; MOVX r1, #0; MOV r2, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7051 "01111000" // /* MW 9 */ + 7052 "11010000" // /* MW 8 */ + 7053 "01001011" // /* MW 7 */ + 7054 "00001000" // /* MW 6 */ + 7055 "00010000" // /* MW 5 */ + 7056 "00000000" // /* MW 4 */ + 7057 "11010000" // /* MW 3 */ + 7058 "11000010" // /* MW 2 */ + 7059 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 74 + 7060 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7061 "00000001" // /* MW 5 */ + 7062 "00000000" // /* MW 4 */ + 7063 "00000000" // /* MW 3 */ + 7064 "00001000" // /* MW 2 */ + 7065 "00000000" // /* MW 1 */ + 7066 "10011000" // ST r2, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7067 "01010101" // /* MW 3 */ + 7068 "11110000" // /* MW 2 */ + 7069 "00001111" // /* MW 1 */ + 7070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7071 "00000000" // /* MW 1 */ + 7072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7073 "00000000" // /* MW 1 */ + 7074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7075 "00000000" // /* MW 1 */ + 7076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7077 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 79 6 +.src_ref 7 "superkernels.cpp" 79 16 + 7078 "10000100" // JNZ r16, #7248 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7248 delay_slots=5 */ + 7079 "00000001" // /* MW 5 */ + 7080 "01000000" // /* MW 4 */ + 7081 "00101000" // /* MW 3 */ + 7082 "00001110" // /* MW 2 */ + 7083 "10000000" // /* MW 1 */ +.delay_slot + 7084 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7085 "10011101" // /* MW 3 */ + 7086 "11111011" // /* MW 2 */ + 7087 "00001111" // /* MW 1 */ +.delay_slot + 7088 "10011000" // ST p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7089 "00011101" // /* MW 3 */ + 7090 "11111111" // /* MW 2 */ + 7091 "00001111" // /* MW 1 */ +.delay_slot + 7092 "10011000" // ST p3, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7093 "10011101" // /* MW 3 */ + 7094 "11101101" // /* MW 2 */ + 7095 "00001111" // /* MW 1 */ +.delay_slot + 7096 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7097 "00111101" // /* MW 3 */ + 7098 "11110100" // /* MW 2 */ + 7099 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 +.delay_slot + 7100 "01000100" // MOVXM r15, #509504 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7101 "10000000" // /* MW 5 */ + 7102 "10101100" // /* MW 4 */ + 7103 "11000111" // /* MW 3 */ + 7104 "00000111" // /* MW 2 */ + 7105 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 113 2 +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7106 "00111010" // MOVS p6, p1; MOVXM p7, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7107 "00010001" // /* MW 9 */ + 7108 "00110010" // /* MW 8 */ + 7109 "10110010" // /* MW 7 */ + 7110 "11110011" // /* MW 6 */ + 7111 "00000001" // /* MW 5 */ + 7112 "00000000" // /* MW 4 */ + 7113 "01100000" // /* MW 3 */ + 7114 "10010001" // /* MW 2 */ + 7115 "11010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7116 "01110110" // ST.s8 r16, [p7]; MOVS p1, r15; MOVXM p7, #509024 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7117 "00010000" // /* MW 11 */ + 7118 "00110000" // /* MW 10 */ + 7119 "10110010" // /* MW 9 */ + 7120 "11110011" // /* MW 8 */ + 7121 "00000001" // /* MW 7 */ + 7122 "00000000" // /* MW 6 */ + 7123 "00001011" // /* MW 5 */ + 7124 "10001111" // /* MW 4 */ + 7125 "11100001" // /* MW 3 */ + 7126 "11000000" // /* MW 2 */ + 7127 "11100000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7129 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7131 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 7132 "00000100" // JL #2912 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=2912 delay_slots=5 */ + 7133 "00000001" // /* MW 5 */ + 7134 "00000000" // /* MW 4 */ + 7135 "10110000" // /* MW 3 */ + 7136 "00000101" // /* MW 2 */ + 7137 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7139 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7140 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7141 "00110001" // /* MW 3 */ + 7142 "00100000" // /* MW 2 */ + 7143 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 7144 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7145 "00000101" // /* MW 3 */ + 7146 "00100000" // /* MW 2 */ + 7147 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 113 2 +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 7148 "00000010" // ST r16, [p7]; MOV p7, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7149 "01110000" // /* MW 7 */ + 7150 "01100000" // /* MW 6 */ + 7151 "10110000" // /* MW 5 */ + 7152 "00000011" // /* MW 4 */ + 7153 "00110000" // /* MW 3 */ + 7154 "11000010" // /* MW 2 */ + 7155 "11100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 +.delay_slot + 7156 "11110110" // NOPA; NOPB; NOPS; MOV p0, p2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7157 "01110000" // /* MW 11 */ + 7158 "01100000" // /* MW 10 */ + 7159 "00110010" // /* MW 9 */ + 7160 "00000000" // /* MW 8 */ + 7161 "01011011" // /* MW 7 */ + 7162 "00000001" // /* MW 6 */ + 7163 "00100000" // /* MW 5 */ + 7164 "00000000" // /* MW 4 */ + 7165 "11110000" // /* MW 3 */ + 7166 "00101100" // /* MW 2 */ + 7167 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 +.return_address + 7168 "10011000" // ADD.NC p2, r15, #11 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7169 "10000101" // /* MW 3 */ + 7170 "01100111" // /* MW 2 */ + 7171 "00011010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 87 19 +.src_ref 7 "superkernels.cpp" 87 35 first + 7172 "10111010" // LDA.u8 r16, [p2], #7; MOVXM p1, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7173 "00010000" // /* MW 9 */ + 7174 "00100010" // /* MW 8 */ + 7175 "10110010" // /* MW 7 */ + 7176 "11110000" // /* MW 6 */ + 7177 "00000001" // /* MW 5 */ + 7178 "00000000" // /* MW 4 */ + 7179 "01010000" // /* MW 3 */ + 7180 "11000001" // /* MW 2 */ + 7181 "01001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 37 first +.src_ref 7 "superkernels.cpp" 89 13 + 7182 "10111010" // LDA.u16 r19, [p2], #2; MOVXM p0, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7183 "00010000" // /* MW 9 */ + 7184 "00101110" // /* MW 8 */ + 7185 "00110010" // /* MW 7 */ + 7186 "11110000" // /* MW 6 */ + 7187 "00000001" // /* MW 5 */ + 7188 "00000000" // /* MW 4 */ + 7189 "01010000" // /* MW 3 */ + 7190 "11001111" // /* MW 2 */ + 7191 "01000011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 73 + 7192 "10011000" // LDA.u16 r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7193 "00111010" // /* MW 3 */ + 7194 "00000110" // /* MW 2 */ + 7195 "00000010" // /* MW 1 */ + 7196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7197 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 110 + 7198 "10011000" // LDA.u16 r18, [p2, #2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7199 "01011010" // /* MW 3 */ + 7200 "00010110" // /* MW 2 */ + 7201 "00000010" // /* MW 1 */ + 7202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7203 "00000000" // /* MW 1 */ + 7204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7205 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 87 19 first +.src_ref 7 "superkernels.cpp" 113 2 + 7206 "00000010" // ST r16, [p1]; MOV p1, p6 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7207 "01110000" // /* MW 7 */ + 7208 "01100000" // /* MW 6 */ + 7209 "10110110" // /* MW 5 */ + 7210 "00000000" // /* MW 4 */ + 7211 "00110000" // /* MW 3 */ + 7212 "11000010" // /* MW 2 */ + 7213 "00100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 57 first + 7214 "10011000" // MUL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7215 "00001111" // /* MW 3 */ + 7216 "11100001" // /* MW 2 */ + 7217 "00010100" // /* MW 1 */ + 7218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7219 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 94 + 7220 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7221 "00001111" // /* MW 3 */ + 7222 "01100001" // /* MW 2 */ + 7223 "00010100" // /* MW 1 */ + 7224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7225 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 89 28 first + 7226 "10011000" // MUL r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7227 "00001111" // /* MW 3 */ + 7228 "10100001" // /* MW 2 */ + 7229 "00010100" // /* MW 1 */ + 7230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7231 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 89 13 +.src_ref 7 "superkernels.cpp" 113 2 + 7232 "11100001" // NOPA; NOPB; ST r16, [p0]; NOPX; MOV p0, p7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7233 "00000000" // /* MW 15 */ + 7234 "00000000" // /* MW 14 */ + 7235 "01111000" // /* MW 13 */ + 7236 "01100000" // /* MW 12 */ + 7237 "00110111" // /* MW 11 */ + 7238 "00000000" // /* MW 10 */ + 7239 "00000000" // /* MW 9 */ + 7240 "10000000" // /* MW 8 */ + 7241 "00010001" // /* MW 7 */ + 7242 "00000110" // /* MW 6 */ + 7243 "00100000" // /* MW 5 */ + 7244 "00000000" // /* MW 4 */ + 7245 "11110000" // /* MW 3 */ + 7246 "00101100" // /* MW 2 */ + 7247 "00000000" // /* MW 1 */ +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 7 "superkernels.cpp" 106 12 +.src_ref 7 "superkernels.cpp" 113 2 +.src_ref 7 "superkernels.cpp" 117 6 +.src_ref 7 "superkernels.cpp" 136 15 +.src_ref 1 "io_buffer_main.h" 218 49 +.src_ref 1 "io_buffer_main.h" 324 51 + 7248 "10111010" // LDA r15, [sp, #-20]; MOVXM p6, #509000 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7249 "00010000" // /* MW 9 */ + 7250 "00100100" // /* MW 8 */ + 7251 "00110010" // /* MW 7 */ + 7252 "11110011" // /* MW 6 */ + 7253 "00000001" // /* MW 5 */ + 7254 "00000000" // /* MW 4 */ + 7255 "00100000" // /* MW 3 */ + 7256 "10111110" // /* MW 2 */ + 7257 "11111101" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 106 12 first +.src_ref 7 "superkernels.cpp" 108 13 + 7258 "10111010" // LDA r16, [p6]; MOVXM p2, #509004 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7259 "00010000" // /* MW 9 */ + 7260 "00100110" // /* MW 8 */ + 7261 "00110010" // /* MW 7 */ + 7262 "11110001" // /* MW 6 */ + 7263 "00000001" // /* MW 5 */ + 7264 "00000000" // /* MW 4 */ + 7265 "11010000" // /* MW 3 */ + 7266 "11000010" // /* MW 2 */ + 7267 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 107 11 +.src_ref 7 "superkernels.cpp" 108 13 first +.src_ref 7 "superkernels.cpp" 139 6 +.src_ref 7 "superkernels.cpp" 140 14 + 7268 "10111010" // LDA r17, [p2]; MOVXM p7, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7269 "00010000" // /* MW 9 */ + 7270 "00100000" // /* MW 8 */ + 7271 "10110010" // /* MW 7 */ + 7272 "11110011" // /* MW 6 */ + 7273 "00000001" // /* MW 5 */ + 7274 "00000000" // /* MW 4 */ + 7275 "11010000" // /* MW 3 */ + 7276 "11000110" // /* MW 2 */ + 7277 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 107 11 first + 7278 "10011000" // LDA r18, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7279 "01010110" // /* MW 3 */ + 7280 "00000110" // /* MW 2 */ + 7281 "00000111" // /* MW 1 */ + 7282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7283 "00000000" // /* MW 1 */ + 7284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7285 "00000000" // /* MW 1 */ + 7286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7287 "00000000" // /* MW 1 */ + 7288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7289 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 110 6 first +.src_ref 7 "superkernels.cpp" 110 17 first + 7290 "10000100" // JNZ r16, #7376 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7376 delay_slots=5 */ + 7291 "00000001" // /* MW 5 */ + 7292 "01000000" // /* MW 4 */ + 7293 "01101000" // /* MW 3 */ + 7294 "00001110" // /* MW 2 */ + 7295 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 108 13 first +.delay_slot + 7296 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7297 "00000111" // /* MW 3 */ + 7298 "01100010" // /* MW 2 */ + 7299 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 107 11 first +.src_ref 7 "superkernels.cpp" 108 13 +.delay_slot + 7300 "01011100" // ST r17, [p2]; ADD r17, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7301 "00001110" // /* MW 5 */ + 7302 "01000100" // /* MW 4 */ + 7303 "00111001" // /* MW 3 */ + 7304 "11000110" // /* MW 2 */ + 7305 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 106 12 first +.delay_slot + 7306 "00011000" // ADD r19, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7307 "00000111" // /* MW 3 */ + 7308 "00100110" // /* MW 2 */ + 7309 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 106 12 +.delay_slot + 7310 "10011000" // ST r19, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7311 "01110001" // /* MW 3 */ + 7312 "00000110" // /* MW 2 */ + 7313 "00001110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 107 11 first +.delay_slot + 7314 "10011000" // ST r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7315 "00110001" // /* MW 3 */ + 7316 "00000110" // /* MW 2 */ + 7317 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 first + 7318 "00011000" // ADD.NC p2, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7319 "10000110" // /* MW 3 */ + 7320 "01100111" // /* MW 2 */ + 7321 "00011010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 7322 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7323 "01110110" // /* MW 3 */ + 7324 "11111111" // /* MW 2 */ + 7325 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 7326 "10011000" // LDA r16, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7327 "00010110" // /* MW 3 */ + 7328 "11111110" // /* MW 2 */ + 7329 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 7330 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7331 "00110110" // /* MW 3 */ + 7332 "11111110" // /* MW 2 */ + 7333 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 7334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7335 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 7336 "10011000" // LDA r16, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7337 "00010110" // /* MW 3 */ + 7338 "01000110" // /* MW 2 */ + 7339 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7341 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7343 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7345 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7347 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7348 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7349 "00000010" // /* MW 3 */ + 7350 "01100001" // /* MW 2 */ + 7351 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7352 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7353 "00010001" // /* MW 3 */ + 7354 "00000110" // /* MW 2 */ + 7355 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 + 7356 "00011000" // MOVX r17, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7357 "11111101" // /* MW 3 */ + 7358 "11100010" // /* MW 2 */ + 7359 "00010111" // /* MW 1 */ + 7360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7361 "00000000" // /* MW 1 */ + 7362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7363 "00000000" // /* MW 1 */ + 7364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7365 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 7366 "01111010" // NOPA; NOPS; ACQ r16, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7367 "00011000" // /* MW 9 */ + 7368 "00010011" // /* MW 8 */ + 7369 "00000100" // /* MW 7 */ + 7370 "00000000" // /* MW 6 */ + 7371 "01011011" // /* MW 5 */ + 7372 "00000001" // /* MW 4 */ + 7373 "11110000" // /* MW 3 */ + 7374 "00101100" // /* MW 2 */ + 7375 "00000000" // /* MW 1 */ +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_336 +.src_ref 7 "superkernels.cpp" 113 2 first +.no_stack_arguments + 7376 "00000100" // JL #4624 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4624 delay_slots=5 */ + 7377 "00000001" // /* MW 5 */ + 7378 "00000000" // /* MW 4 */ + 7379 "00001000" // /* MW 3 */ + 7380 "00001001" // /* MW 2 */ + 7381 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 113 2 +.delay_slot + 7382 "01000100" // MOVXM p3, #509504 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7383 "10000000" // /* MW 5 */ + 7384 "11001100" // /* MW 4 */ + 7385 "11000110" // /* MW 3 */ + 7386 "00000111" // /* MW 2 */ + 7387 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7389 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7391 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7393 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 113 2 +.delay_slot + 7394 "00101110" // NOPA; NOPS; MOV p2, r15; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 7395 "00011100" // /* MW 13 */ + 7396 "00000000" // /* MW 12 */ + 7397 "00000000" // /* MW 11 */ + 7398 "00000111" // /* MW 10 */ + 7399 "00111101" // /* MW 9 */ + 7400 "01010011" // /* MW 8 */ + 7401 "00000000" // /* MW 7 */ + 7402 "00000000" // /* MW 6 */ + 7403 "10110110" // /* MW 5 */ + 7404 "00000010" // /* MW 4 */ + 7405 "11110000" // /* MW 3 */ + 7406 "00101100" // /* MW 2 */ + 7407 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 117 6 first +.src_ref 7 "superkernels.cpp" 117 20 +.return_address + 7408 "10111010" // LDA r16, [p6]; MOVXM p1, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7409 "00010000" // /* MW 9 */ + 7410 "00100010" // /* MW 8 */ + 7411 "10110010" // /* MW 7 */ + 7412 "11110000" // /* MW 6 */ + 7413 "00000001" // /* MW 5 */ + 7414 "00000000" // /* MW 4 */ + 7415 "11010000" // /* MW 3 */ + 7416 "11000010" // /* MW 2 */ + 7417 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 117 20 + 7418 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7419 "00110110" // /* MW 3 */ + 7420 "00000110" // /* MW 2 */ + 7421 "00000001" // /* MW 1 */ + 7422 "00011000" // LDA r0, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7423 "00010001" // /* MW 3 */ + 7424 "11110000" // /* MW 2 */ + 7425 "00000111" // /* MW 1 */ + 7426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7427 "00000000" // /* MW 1 */ + 7428 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7429 "00000000" // /* MW 1 */ + 7430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7431 "00000000" // /* MW 1 */ + 7432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7433 "00000000" // /* MW 1 */ + 7434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7435 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 117 17 + 7436 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7437 "00001000" // /* MW 3 */ + 7438 "01100001" // /* MW 2 */ + 7439 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 117 6 + 7440 "10000100" // JNZ r16, #7520 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7520 delay_slots=5 */ + 7441 "00000001" // /* MW 5 */ + 7442 "01000000" // /* MW 4 */ + 7443 "10110000" // /* MW 3 */ + 7444 "00001110" // /* MW 2 */ + 7445 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 136 15 +.src_ref 7 "superkernels.cpp" 140 14 +.delay_slot + 7446 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7447 "00000001" // /* MW 3 */ + 7448 "00110000" // /* MW 2 */ + 7449 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7451 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7453 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7455 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7457 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 first +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 + 7458 "00100100" // MOVX r16, #1; ADD.NC p1, r15, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7459 "00010100" // /* MW 5 */ + 7460 "11001111" // /* MW 4 */ + 7461 "10100010" // /* MW 3 */ + 7462 "00000000" // /* MW 2 */ + 7463 "00000100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 + 7464 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7465 "00110110" // /* MW 3 */ + 7466 "00000110" // /* MW 2 */ + 7467 "00000001" // /* MW 1 */ + 7468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7469 "00000000" // /* MW 1 */ + 7470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7471 "00000000" // /* MW 1 */ + 7472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7473 "00000000" // /* MW 1 */ + 7474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7475 "00000000" // /* MW 1 */ + 7476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7477 "00000000" // /* MW 1 */ + 7478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7479 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 7480 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7481 "00001000" // /* MW 3 */ + 7482 "01010001" // /* MW 2 */ + 7483 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 136 15 first +.src_ref 1 "io_buffer_main.h" 327 40 first + 7484 "00001100" // LDA r17, [p1, #-8]; ST r24, [p6] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7485 "00100011" // /* MW 5 */ + 7486 "00001110" // /* MW 4 */ + 7487 "11011100" // /* MW 3 */ + 7488 "11000110" // /* MW 2 */ + 7489 "00111100" // /* MW 1 */ + 7490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7491 "00000000" // /* MW 1 */ + 7492 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7493 "00000000" // /* MW 1 */ + 7494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7495 "00000000" // /* MW 1 */ + 7496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7497 "00000000" // /* MW 1 */ + 7498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7499 "00000000" // /* MW 1 */ + 7500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7501 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 + 7502 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7503 "00010001" // /* MW 3 */ + 7504 "00100001" // /* MW 2 */ + 7505 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 7506 "00101110" // NOPA; ST r16, [p1, #-8]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 7507 "00011100" // /* MW 13 */ + 7508 "00000000" // /* MW 12 */ + 7509 "00000000" // /* MW 11 */ + 7510 "01010111" // /* MW 10 */ + 7511 "00011010" // /* MW 9 */ + 7512 "01000000" // /* MW 8 */ + 7513 "00000000" // /* MW 7 */ + 7514 "00000000" // /* MW 6 */ + 7515 "00100011" // /* MW 5 */ + 7516 "11001100" // /* MW 4 */ + 7517 "11110011" // /* MW 3 */ + 7518 "00101100" // /* MW 2 */ + 7519 "00000000" // /* MW 1 */ +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_480 +.src_ref 7 "superkernels.cpp" 139 6 first +.src_ref 7 "superkernels.cpp" 139 19 + 7520 "10111010" // LDA r16, [p7]; MOVXM p6, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7521 "00010000" // /* MW 9 */ + 7522 "00101110" // /* MW 8 */ + 7523 "00110010" // /* MW 7 */ + 7524 "11110011" // /* MW 6 */ + 7525 "00000001" // /* MW 5 */ + 7526 "00000000" // /* MW 4 */ + 7527 "11010000" // /* MW 3 */ + 7528 "11000010" // /* MW 2 */ + 7529 "11100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 139 19 + 7530 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7531 "00110110" // /* MW 3 */ + 7532 "00000110" // /* MW 2 */ + 7533 "00000110" // /* MW 1 */ + 7534 "00011000" // LDA p1, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7535 "10011001" // /* MW 3 */ + 7536 "11111000" // /* MW 2 */ + 7537 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 142 + 7538 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7539 "00111001" // /* MW 3 */ + 7540 "11110100" // /* MW 2 */ + 7541 "00000111" // /* MW 1 */ + 7542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7543 "00000000" // /* MW 1 */ + 7544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7545 "00000000" // /* MW 1 */ + 7546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7547 "00000000" // /* MW 1 */ + 7548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7549 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 139 16 + 7550 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7551 "00001000" // /* MW 3 */ + 7552 "01100001" // /* MW 2 */ + 7553 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 139 6 + 7554 "10000100" // JNZ r16, #7584 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7584 delay_slots=5 */ + 7555 "00000001" // /* MW 5 */ + 7556 "01000000" // /* MW 4 */ + 7557 "11010000" // /* MW 3 */ + 7558 "00001110" // /* MW 2 */ + 7559 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7561 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7563 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7565 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7567 "00000000" // /* MW 1 */ +.delay_slot + 7568 "11111000" // MOV r15, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7569 "00100000" // /* MW 3 */ + 7570 "11010000" // /* MW 2 */ + 7571 "00011011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 140 14 first + 7572 "00110110" // NOPA; NOPB; ST r24, [p7]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7573 "11000001" // /* MW 11 */ + 7574 "10001000" // /* MW 10 */ + 7575 "10000011" // /* MW 9 */ + 7576 "00000011" // /* MW 8 */ + 7577 "00000000" // /* MW 7 */ + 7578 "00000000" // /* MW 6 */ + 7579 "00100000" // /* MW 5 */ + 7580 "00000000" // /* MW 4 */ + 7581 "11110000" // /* MW 3 */ + 7582 "00101100" // /* MW 2 */ + 7583 "00000000" // /* MW 1 */ +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_544 + 7584 "00011000" // LDA p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7585 "00011001" // /* MW 3 */ + 7586 "11111111" // /* MW 2 */ + 7587 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 142 first + 7588 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 7589 "00000000" // /* MW 3 */ + 7590 "00101000" // /* MW 2 */ + 7591 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 142 +.delay_slot + 7592 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7593 "00000001" // /* MW 5 */ + 7594 "00000000" // /* MW 4 */ + 7595 "00000000" // /* MW 3 */ + 7596 "11111000" // /* MW 2 */ + 7597 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7598 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7599 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7601 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7603 "00000000" // /* MW 1 */ +.delay_slot + 7604 "00011000" // MOVS p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7605 "10001011" // /* MW 3 */ + 7606 "10000100" // /* MW 2 */ +.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 7607 "00001111" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.src_ref 3 "elementwise_binary.h" 141 first +.src_ref 3 "elementwise_binary.h" 142 23 +.src_ref 3 "elementwise_binary.h" 144 4 first +.function_start + 7616 "01100100" // RET lr; MOV r0, #64 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 7617 "00000001" // /* MW 5 */ + 7618 "00100001" // /* MW 4 */ + 7619 "00000000" // /* MW 3 */ + 7620 "00000000" // /* MW 2 */ + 7621 "00000101" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 141 +.delay_slot + 7622 "11111000" // MOV r1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7623 "11000000" // /* MW 3 */ + 7624 "01010000" // /* MW 2 */ + 7625 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 141 +.delay_slot + 7626 "00011000" // ADD.NC p0, r1, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7627 "10010000" // /* MW 3 */ + 7628 "01100000" // /* MW 2 */ + 7629 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 142 23 first +.delay_slot + 7630 "10011000" // ST r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7631 "00010001" // /* MW 3 */ + 7632 "00000100" // /* MW 2 */ + 7633 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 142 23 +.delay_slot + 7634 "10011000" // ST r0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7635 "00010001" // /* MW 3 */ + 7636 "00010100" // /* MW 2 */ + 7637 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_end0 + 7639 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 3 "elementwise_binary.h" 130 first +.src_ref 3 "elementwise_binary.h" 133 24 first +.function_start + 7648 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7649 "00101110" // /* MW 3 */ + 7650 "00011100" // /* MW 2 */ + 7651 "00000001" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 130 + 7652 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7653 "00000001" // /* MW 5 */ + 7654 "00000000" // /* MW 4 */ + 7655 "00000000" // /* MW 3 */ + 7656 "00001000" // /* MW 2 */ + 7657 "00000000" // /* MW 1 */ + 7658 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7659 "00111101" // /* MW 3 */ + 7660 "11111000" // /* MW 2 */ + 7661 "00001111" // /* MW 1 */ + 7662 "10011000" // ST r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7663 "11110101" // /* MW 3 */ + 7664 "11111101" // /* MW 2 */ + 7665 "00001111" // /* MW 1 */ + 7666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7667 "00000000" // /* MW 1 */ + 7668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7669 "00000000" // /* MW 1 */ + 7670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7671 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 133 22 first + 7672 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7673 "00101001" // /* MW 3 */ + 7674 "00011100" // /* MW 2 */ + 7675 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 134 24 first + 7676 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7677 "00101110" // /* MW 3 */ + 7678 "00011100" // /* MW 2 */ + 7679 "00000001" // /* MW 1 */ + 7680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7681 "00000000" // /* MW 1 */ + 7682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7683 "00000000" // /* MW 1 */ + 7684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7685 "00000000" // /* MW 1 */ + 7686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7687 "00000000" // /* MW 1 */ + 7688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7689 "00000000" // /* MW 1 */ + 7690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7691 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 134 22 + 7692 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7693 "00101001" // /* MW 3 */ + 7694 "00011100" // /* MW 2 */ + 7695 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 135 24 first + 7696 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7697 "00101110" // /* MW 3 */ + 7698 "00000100" // /* MW 2 */ + 7699 "00000001" // /* MW 1 */ + 7700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7701 "00000000" // /* MW 1 */ + 7702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7703 "00000000" // /* MW 1 */ + 7704 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7705 "00000000" // /* MW 1 */ + 7706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7707 "00000000" // /* MW 1 */ + 7708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7709 "00000000" // /* MW 1 */ + 7710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7711 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 135 22 + 7712 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7713 "00101001" // /* MW 3 */ + 7714 "00011100" // /* MW 2 */ + 7715 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 136 24 first + 7716 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7717 "00101110" // /* MW 3 */ + 7718 "00010100" // /* MW 2 */ + 7719 "00000001" // /* MW 1 */ + 7720 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7721 "00000000" // /* MW 1 */ + 7722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7723 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 137 8 first +.no_stack_arguments + 7724 "00000100" // JL #7616 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7616 delay_slots=5 */ + 7725 "00000001" // /* MW 5 */ + 7726 "00000000" // /* MW 4 */ + 7727 "11100000" // /* MW 3 */ + 7728 "00001110" // /* MW 2 */ + 7729 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7731 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7732 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7733 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7735 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 136 22 first +.delay_slot + 7736 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7737 "00101001" // /* MW 3 */ + 7738 "11011100" // /* MW 2 */ + 7739 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 137 8 +.delay_slot + 7740 "11111000" // MOV r15, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7741 "11000000" // /* MW 3 */ + 7742 "11010000" // /* MW 2 */ + 7743 "00011011" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 137 8 +.src_ref 3 "elementwise_binary.h" 139 4 +.src_ref 8 "add_impl.h" 146 29 +.return_address + 7744 "10111010" // LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7745 "00001000" // /* MW 9 */ + 7746 "11000100" // /* MW 8 */ + 7747 "00110011" // /* MW 7 */ + 7748 "01101000" // /* MW 6 */ + 7749 "00000000" // /* MW 5 */ + 7750 "00000001" // /* MW 4 */ + 7751 "00100000" // /* MW 3 */ + 7752 "00000111" // /* MW 2 */ + 7753 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 146 29 +.src_ref 8 "add_impl.h" 147 37 +.src_ref 8 "add_impl.h" 147 39 + 7754 "10111010" // MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7755 "01011000" // /* MW 9 */ + 7756 "11111101" // /* MW 8 */ + 7757 "00000111" // /* MW 7 */ + 7758 "00001000" // /* MW 6 */ + 7759 "10000000" // /* MW 5 */ + 7760 "00000001" // /* MW 4 */ + 7761 "10000000" // /* MW 3 */ + 7762 "11100010" // /* MW 2 */ + 7763 "00000001" // /* MW 1 */ +.src_ref 8 "add_impl.h" 146 29 first +.src_ref 8 "add_impl.h" 147 39 + 7764 "01111010" // LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7765 "00000001" // /* MW 9 */ + 7766 "10100000" // /* MW 8 */ + 7767 "00000111" // /* MW 7 */ + 7768 "10000000" // /* MW 6 */ + 7769 "00010001" // /* MW 5 */ + 7770 "00001010" // /* MW 4 */ + 7771 "00100000" // /* MW 3 */ + 7772 "10111110" // /* MW 2 */ + 7773 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 50 first + 7774 "10011000" // LDA.u8 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7775 "01001010" // /* MW 3 */ + 7776 "00000110" // /* MW 2 */ + 7777 "00000000" // /* MW 1 */ + 7778 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7779 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7781 "00000000" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 37 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7782 "00011000" // ST.s16 r16, [p0, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7783 "00010111" // /* MW 3 */ + 7784 "00000010" // /* MW 2 */ + 7785 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7786 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 7787 "00000000" // /* MW 3 */ + 7788 "00101000" // /* MW 2 */ + 7789 "00010000" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 54 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7790 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7791 "00000101" // /* MW 3 */ + 7792 "00100010" // /* MW 2 */ + 7793 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7794 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7795 "00000001" // /* MW 5 */ + 7796 "00000000" // /* MW 4 */ + 7797 "00000000" // /* MW 3 */ + 7798 "11111000" // /* MW 2 */ + 7799 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 54 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7800 "10011000" // EQ r27, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7801 "00100111" // /* MW 3 */ + 7802 "01110111" // /* MW 2 */ + 7803 "00010100" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 39 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7804 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7805 "10000010" // /* MW 3 */ + 7806 "00100001" // /* MW 2 */ + 7807 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 7809 "00000000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.src_ref 3 "elementwise_binary_broadcasting.h" 81 +.src_ref 3 "elementwise_binary_broadcasting.h" 81 first +.src_ref 3 "elementwise_binary_broadcasting.h" 82 25 +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 +.src_ref 3 "elementwise_binary_broadcasting.h" 83 25 +.function_start + 7824 "10111010" // MOVA m0, #20; MOVX r1, #6; MOV r0, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7825 "01111000" // /* MW 9 */ + 7826 "01100000" // /* MW 8 */ + 7827 "00001000" // /* MW 7 */ + 7828 "11001000" // /* MW 6 */ + 7829 "00010000" // /* MW 5 */ + 7830 "00000000" // /* MW 4 */ + 7831 "10000000" // /* MW 3 */ + 7832 "10000000" // /* MW 2 */ + 7833 "00000010" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 81 +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 + 7834 "00100100" // MOVX r0, #1; ADD.NC p0, r0, #12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7835 "00001100" // /* MW 5 */ + 7836 "11000000" // /* MW 4 */ + 7837 "10100000" // /* MW 3 */ + 7838 "00000000" // /* MW 2 */ + 7839 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first + 7840 "10011000" // LDA.u8 r2, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7841 "01001010" // /* MW 3 */ + 7842 "00001000" // /* MW 2 */ + 7843 "00000000" // /* MW 1 */ + 7844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7845 "00000000" // /* MW 1 */ + 7846 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7847 "00000000" // /* MW 1 */ + 7848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7849 "00000000" // /* MW 1 */ + 7850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7851 "00000000" // /* MW 1 */ + 7852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7853 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 84 4 first + 7854 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 7855 "00000000" // /* MW 3 */ + 7856 "00101000" // /* MW 2 */ + 7857 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first +.delay_slot + 7858 "10011000" // NE r0, r2, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7859 "00001000" // /* MW 3 */ + 7860 "10000000" // /* MW 2 */ + 7861 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 83 25 first +.delay_slot + 7862 "10011000" // LSHL r0, r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7863 "00011101" // /* MW 3 */ + 7864 "00000000" // /* MW 2 */ + 7865 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first +.src_ref 3 "elementwise_binary_broadcasting.h" 83 23 +.delay_slot + 7866 "01011100" // ST r0, [p0, #4]; NEZ r3, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7867 "11100000" // /* MW 5 */ + 7868 "00001101" // /* MW 4 */ + 7869 "00110001" // /* MW 3 */ + 7870 "10000010" // /* MW 2 */ + 7871 "00000010" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 25 +.delay_slot + 7872 "10011000" // LSHL r2, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7873 "00011101" // /* MW 3 */ + 7874 "11000100" // /* MW 2 */ + 7875 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 23 +.delay_slot + 7876 "10011000" // ST r2, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7877 "01010001" // /* MW 3 */ + 7878 "00000100" // /* MW 2 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_end0 + 7879 "00001000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 3 "elementwise_binary_broadcasting.h" 76 +.src_ref 3 "elementwise_binary_broadcasting.h" 76 first +.function_start + 7888 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7889 "00000001" // /* MW 5 */ + 7890 "00000000" // /* MW 4 */ + 7891 "00000000" // /* MW 3 */ + 7892 "00001000" // /* MW 2 */ + 7893 "00000000" // /* MW 1 */ + 7894 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7895 "00111101" // /* MW 3 */ + 7896 "11111100" // /* MW 2 */ + 7897 "00001111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 77 8 first +.no_stack_arguments + 7898 "00000100" // JL #7648 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7648 delay_slots=5 */ + 7899 "00000001" // /* MW 5 */ + 7900 "00000000" // /* MW 4 */ + 7901 "11110000" // /* MW 3 */ + 7902 "00001110" // /* MW 2 */ + 7903 "00000000" // /* MW 1 */ +.delay_slot + 7904 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7905 "10011101" // /* MW 3 */ + 7906 "11111011" // /* MW 2 */ + 7907 "00001111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 78 8 +.delay_slot + 7908 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7909 "11000000" // /* MW 3 */ + 7910 "01100000" // /* MW 2 */ + 7911 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7913 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7915 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7916 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7917 "01100111" // /* MW 3 */ + 7918 "00000001" // /* MW 2 */ + 7919 "00000000" // /* MW 1 */ +.return_address +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7920 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7921 "10011001" // /* MW 3 */ + 7922 "11111011" // /* MW 2 */ + 7923 "00000111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 78 8 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7924 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7925 "00111001" // /* MW 3 */ + 7926 "11111100" // /* MW 2 */ + 7927 "00000111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 78 8 first +.tail_call +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7928 "10000100" // J #7824 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=7824 delay_slots=5 */ + 7929 "00000000" // /* MW 5 */ + 7930 "00000000" // /* MW 4 */ + 7931 "01001000" // /* MW 3 */ + 7932 "00001111" // /* MW 2 */ + 7933 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 78 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7934 "11111000" // MOV p0, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7935 "11000000" // /* MW 3 */ + 7936 "01101110" // /* MW 2 */ + 7937 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 79 4 first +.delay_slot + 7938 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7939 "00000001" // /* MW 5 */ + 7940 "00000000" // /* MW 4 */ + 7941 "00000000" // /* MW 3 */ + 7942 "11111000" // /* MW 2 */ + 7943 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7945 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7947 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 7949 "00000000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.src_ref 3 "elementwise_binary_broadcasting.h" 89 first +.src_ref 3 "elementwise_binary_broadcasting.h" 96 37 first +.src_ref 3 "elementwise_binary_broadcasting.h" 102 19 +.function_start + 7952 "01010100" // LDA r0, [p3], #12; MOV m0, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7953 "01010001" // /* MW 5 */ + 7954 "00000000" // /* MW 4 */ + 7955 "11010000" // /* MW 3 */ + 7956 "10000010" // /* MW 2 */ + 7957 "01100111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 102 19 first +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 7958 "11010100" // LDA.u8 r1, [p3], m0; MOV p4, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7959 "10000001" // /* MW 5 */ + 7960 "11001101" // /* MW 4 */ + 7961 "01011000" // /* MW 3 */ + 7962 "00000101" // /* MW 2 */ + 7963 "01100001" // /* MW 1 */ + 7964 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7965 "00000000" // /* MW 1 */ + 7966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7967 "00000000" // /* MW 1 */ + 7968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7969 "00000000" // /* MW 1 */ + 7970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7971 "00000000" // /* MW 1 */ + 7972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7973 "00000000" // /* MW 1 */ + 7974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7975 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 102 12 +.src_ref 3 "elementwise_binary_broadcasting.h" 102 35 + 7976 "10000100" // JNZ r1, #8032 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8032 delay_slots=5 */ + 7977 "00000001" // /* MW 5 */ + 7978 "01000000" // /* MW 4 */ + 7979 "10110000" // /* MW 3 */ + 7980 "00001111" // /* MW 2 */ + 7981 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 96 78 +.delay_slot + 7982 "00011000" // MOVX r2, #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7983 "11101001" // /* MW 3 */ + 7984 "11000100" // /* MW 2 */ + 7985 "00010111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 96 78 first +.delay_slot + 7986 "10011000" // LSHL r0, r0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7987 "00101101" // /* MW 3 */ + 7988 "00000000" // /* MW 2 */ + 7989 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7991 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7993 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7995 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 103 28 first + 7996 "10011000" // LDA.s16 r1, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7997 "00110010" // /* MW 3 */ + 7998 "00000100" // /* MW 2 */ + 7999 "00000000" // /* MW 1 */ + 8000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8001 "00000000" // /* MW 1 */ + 8002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8003 "00000000" // /* MW 1 */ + 8004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8005 "00000000" // /* MW 1 */ + 8006 "10000100" // J #8064 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8064 delay_slots=5 */ + 8007 "00000000" // /* MW 5 */ + 8008 "00000000" // /* MW 4 */ + 8009 "11000000" // /* MW 3 */ + 8010 "00001111" // /* MW 2 */ + 8011 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8013 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8015 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first +.delay_slot + 8016 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8017 "01110010" // /* MW 3 */ + 8018 "00000101" // /* MW 2 */ + 8019 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8021 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.delay_slot + 8022 "01111010" // NOPA; VST x0, [p0]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8023 "00000000" // /* MW 9 */ + 8024 "00000000" // /* MW 8 */ + 8025 "00000000" // /* MW 7 */ + 8026 "00000000" // /* MW 6 */ + 8027 "00010011" // /* MW 5 */ + 8028 "00000100" // /* MW 4 */ + 8029 "11110000" // /* MW 3 */ + 8030 "00101100" // /* MW 2 */ + 8031 "00000000" // /* MW 1 */ +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_80 +.src_ref 3 "elementwise_binary_broadcasting.h" 106 28 first + 8032 "10011000" // LDA.s16 r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8033 "00110010" // /* MW 3 */ + 8034 "00000100" // /* MW 2 */ + 8035 "00000001" // /* MW 1 */ + 8036 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8037 "00000000" // /* MW 1 */ + 8038 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8039 "00000000" // /* MW 1 */ + 8040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8041 "00000000" // /* MW 1 */ + 8042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8043 "00000000" // /* MW 1 */ + 8044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8045 "00000000" // /* MW 1 */ + 8046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8047 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first + 8048 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8049 "01110010" // /* MW 3 */ + 8050 "00000101" // /* MW 2 */ + 8051 "00011000" // /* MW 1 */ + 8052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8053 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first + 8054 "01111010" // NOPA; VST x0, [p1]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8055 "00000000" // /* MW 9 */ + 8056 "00000000" // /* MW 8 */ + 8057 "00000000" // /* MW 7 */ + 8058 "00000000" // /* MW 6 */ + 8059 "00010011" // /* MW 5 */ + 8060 "00000100" // /* MW 4 */ + 8061 "11110001" // /* MW 3 */ + 8062 "00101100" // /* MW 2 */ + 8063 "00000000" // /* MW 1 */ +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_112 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 first +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 first + 8064 "10111010" // LDA m0, [p4, #20]; MOVX r0, #60; ADD.NC lc, r0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8065 "01001000" // /* MW 9 */ + 8066 "00111111" // /* MW 8 */ + 8067 "10111000" // /* MW 7 */ + 8068 "10001010" // /* MW 6 */ + 8069 "00000111" // /* MW 5 */ + 8070 "00000000" // /* MW 4 */ + 8071 "11010000" // /* MW 3 */ + 8072 "10000000" // /* MW 2 */ + 8073 "10001010" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 8074 "10111010" // LDA m1, [p3, #4]; MOVXM ls, #8176 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8075 "00010000" // /* MW 9 */ + 8076 "11111000" // /* MW 8 */ + 8077 "01111111" // /* MW 7 */ + 8078 "00000100" // /* MW 6 */ + 8079 "00000000" // /* MW 5 */ + 8080 "00000000" // /* MW 4 */ + 8081 "11010000" // /* MW 3 */ + 8082 "10010000" // /* MW 2 */ + 8083 "01100010" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 8084 "01000100" // MOVXM le, #8208 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8085 "00100000" // /* MW 5 */ + 8086 "11100000" // /* MW 4 */ + 8087 "00100110" // /* MW 3 */ + 8088 "00000000" // /* MW 2 */ + 8089 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 8090 "01000100" // MOVXM p4, #509028 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8091 "11001000" // /* MW 5 */ + 8092 "11001000" // /* MW 4 */ + 8093 "11001000" // /* MW 3 */ + 8094 "00000111" // /* MW 2 */ + 8095 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 8096 "10011000" // LDA.s8 r1, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8097 "00100010" // /* MW 3 */ + 8098 "00000100" // /* MW 2 */ + 8099 "00000100" // /* MW 1 */ + 8100 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8101 "00000000" // /* MW 1 */ + 8102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8103 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 187 20 first + 8104 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8105 "10101011" // /* MW 3 */ + 8106 "00001000" // /* MW 2 */ + 8107 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary.h" 189 20 first + 8108 "10011000" // VLDA.CONV.fp32.bf16 cml2, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8109 "00101011" // /* MW 3 */ + 8110 "00101001" // /* MW 2 */ + 8111 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 211 20 first + 8112 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8113 "00101011" // /* MW 3 */ + 8114 "00001000" // /* MW 2 */ + 8115 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8116 "10011000" // VLDA.CONV.fp32.bf16 cml4, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8117 "00101011" // /* MW 3 */ + 8118 "00101010" // /* MW 2 */ + 8119 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 195 20 +.src_ref 3 "elementwise_binary.h" 218 20 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8120 "00101100" // VLDA.CONV.fp32.bf16 cml1, [p0], m0; MOVX crRnd, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8121 "00000000" // /* MW 5 */ + 8122 "11110101" // /* MW 4 */ + 8123 "01110000" // /* MW 3 */ + 8124 "00010101" // /* MW 2 */ + 8125 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8126 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8127 "00111101" // /* MW 7 */ + 8128 "00101000" // /* MW 6 */ + 8129 "00000011" // /* MW 5 */ + 8130 "00000100" // /* MW 4 */ + 8131 "01110000" // /* MW 3 */ + 8132 "00100101" // /* MW 2 */ + 8133 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8134 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8135 "00101011" // /* MW 3 */ + 8136 "00001000" // /* MW 2 */ + 8137 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8138 "01100010" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8139 "00111101" // /* MW 7 */ + 8140 "00010000" // /* MW 6 */ + 8141 "00000100" // /* MW 5 */ + 8142 "00000100" // /* MW 4 */ + 8143 "01110000" // /* MW 3 */ + 8144 "01000101" // /* MW 2 */ + 8145 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 187 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8146 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8147 "10101011" // /* MW 3 */ + 8148 "00001000" // /* MW 2 */ + 8149 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8150 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8151 "00111101" // /* MW 7 */ + 8152 "00101000" // /* MW 6 */ + 8153 "00000011" // /* MW 5 */ + 8154 "00000100" // /* MW 4 */ + 8155 "01110000" // /* MW 3 */ + 8156 "00100101" // /* MW 2 */ + 8157 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8158 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8159 "00101011" // /* MW 3 */ + 8160 "00001000" // /* MW 2 */ + 8161 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8162 "01101110" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VST.CONV.bf16.fp32 cml3, [p2], #64; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 8163 "00111101" // /* MW 13 */ + 8164 "00010000" // /* MW 12 */ + 8165 "00000100" // /* MW 11 */ + 8166 "01010111" // /* MW 10 */ + 8167 "00011010" // /* MW 9 */ + 8168 "01000000" // /* MW 8 */ + 8169 "00000000" // /* MW 7 */ + 8170 "00000000" // /* MW 6 */ + 8171 "01000110" // /* MW 5 */ + 8172 "00111011" // /* MW 4 */ + 8173 "01110100" // /* MW 3 */ + 8174 "01000101" // /* MW 2 */ + 8175 "00100101" // /* MW 1 */ +.label ZLS_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_224 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 187 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 8176 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8177 "10101011" // /* MW 3 */ + 8178 "00001000" // /* MW 2 */ + 8179 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8180 "01100110" // VLDA.CONV.fp32.bf16 cml2, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8181 "00111101" // /* MW 11 */ + 8182 "00101000" // /* MW 10 */ + 8183 "00000011" // /* MW 9 */ + 8184 "10001110" // /* MW 8 */ + 8185 "00010001" // /* MW 7 */ + 8186 "00001111" // /* MW 6 */ + 8187 "00100001" // /* MW 5 */ + 8188 "00000000" // /* MW 4 */ + 8189 "01110000" // /* MW 3 */ + 8190 "00100101" // /* MW 2 */ + 8191 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8192 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8193 "00000000" // /* MW 15 */ + 8194 "00000000" // /* MW 14 */ + 8195 "01111000" // /* MW 13 */ + 8196 "10100101" // /* MW 12 */ + 8197 "00000001" // /* MW 11 */ + 8198 "00000000" // /* MW 10 */ + 8199 "00000000" // /* MW 9 */ + 8200 "00000000" // /* MW 8 */ + 8201 "01011011" // /* MW 7 */ + 8202 "00000001" // /* MW 6 */ + 8203 "00100000" // /* MW 5 */ + 8204 "00000000" // /* MW 4 */ + 8205 "01110000" // /* MW 3 */ + 8206 "00000101" // /* MW 2 */ + 8207 "00000001" // /* MW 1 */ +.label ZLE_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_256 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8208 "11101011" // VLDA.CONV.fp32.bf16 cml4, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml3, [p2], #64;NOPX; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8209 "10000001" // /* MW 15 */ + 8210 "00100000" // /* MW 14 */ + 8211 "01111000" // /* MW 13 */ + 8212 "10100101" // /* MW 12 */ + 8213 "00000001" // /* MW 11 */ + 8214 "00000000" // /* MW 10 */ + 8215 "00000000" // /* MW 9 */ + 8216 "00000000" // /* MW 8 */ + 8217 "10100011" // /* MW 7 */ + 8218 "00011101" // /* MW 6 */ + 8219 "00100010" // /* MW 5 */ + 8220 "00000000" // /* MW 4 */ + 8221 "01110000" // /* MW 3 */ + 8222 "01000101" // /* MW 2 */ + 8223 "00100101" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 8224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8225 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8226 "01100010" // VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8227 "00111101" // /* MW 7 */ + 8228 "00101000" // /* MW 6 */ + 8229 "00000011" // /* MW 5 */ + 8230 "00000010" // /* MW 4 */ + 8231 "01100000" // /* MW 3 */ + 8232 "11000100" // /* MW 2 */ + 8233 "01000011" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8234 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8235 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8236 "01100010" // VST.CONV.bf16.fp32 cml3, [p2], #64; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8237 "00111101" // /* MW 7 */ + 8238 "00010000" // /* MW 6 */ + 8239 "00000100" // /* MW 5 */ + 8240 "00000010" // /* MW 4 */ + 8241 "01100000" // /* MW 3 */ + 8242 "10110100" // /* MW 2 */ + 8243 "01000011" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8244 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8245 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 195 20 first +.src_ref 3 "elementwise_binary_broadcasting.h" 121 4 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8246 "01011100" // VST.CONV.bf16.fp32 cml4, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 8247 "00000000" // /* MW 5 */ + 8248 "01010000" // /* MW 4 */ + 8249 "01100000" // /* MW 3 */ + 8250 "11000100" // /* MW 2 */ + 8251 "01000011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8253 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.delay_slot + 8254 "00011000" // VST.CONV.bf16.fp32 cml3, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8255 "10100011" // /* MW 3 */ + 8256 "00011101" // /* MW 2 */ + 8257 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8259 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 195 20 first +.delay_slot + 8260 "00011000" // VST.CONV.bf16.fp32 cml4, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8261 "00100011" // /* MW 3 */ + 8262 "00011110" // /* MW 2 */ + 8263 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 8265 "00000000" // /* MW 1 */ +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 82 +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 82 first +.function_start + 8272 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8273 "00000001" // /* MW 5 */ + 8274 "00000000" // /* MW 4 */ + 8275 "00000000" // /* MW 3 */ + 8276 "00010000" // /* MW 2 */ + 8277 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 90 24 + 8278 "00000010" // ST lr, [sp, #-4]; MOV r16, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8279 "01110000" // /* MW 7 */ + 8280 "01100000" // /* MW 6 */ + 8281 "00001010" // /* MW 5 */ + 8282 "00000010" // /* MW 4 */ + 8283 "10110000" // /* MW 3 */ + 8284 "10000111" // /* MW 2 */ + 8285 "11111111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 90 24 first +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8 + 8286 "00000010" // MOVS p2, p1; ADD.NC p3, r16, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8287 "00000000" // /* MW 7 */ + 8288 "00000011" // /* MW 6 */ + 8289 "10110100" // /* MW 5 */ + 8290 "00000001" // /* MW 4 */ + 8291 "01100000" // /* MW 3 */ + 8292 "10010001" // /* MW 2 */ + 8293 "01010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 19 first +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35 + 8294 "11010100" // LDA.u8 r27, [p3], #2; MOV r16, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8295 "10000001" // /* MW 5 */ + 8296 "00100001" // /* MW 4 */ + 8297 "01011000" // /* MW 3 */ + 8298 "11101101" // /* MW 2 */ + 8299 "01100101" // /* MW 1 */ + 8300 "11010100" // LDA.s16 r18, [p3], #-14; MOV r17, sp /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8301 "11000001" // /* MW 5 */ + 8302 "10101011" // /* MW 4 */ + 8303 "01011000" // /* MW 3 */ + 8304 "11001010" // /* MW 2 */ + 8305 "01110011" // /* MW 1 */ + 8306 "00011000" // ADD.NC p0, r17, #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8307 "11000000" // /* MW 3 */ + 8308 "01101000" // /* MW 2 */ + 8309 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 538 13 first +.src_ref 4 "vector_native_types.hpp" 374 137 first + 8310 "00011000" // VST sfh, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8311 "00101011" // /* MW 3 */ + 8312 "00000111" // /* MW 2 */ + 8313 "00001000" // /* MW 1 */ + 8314 "00011000" // ST.s16 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8315 "01010111" // /* MW 3 */ + 8316 "00000110" // /* MW 2 */ + 8317 "00000000" // /* MW 1 */ + 8318 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8319 "00000000" // /* MW 1 */ + 8320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8321 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8 first +.no_stack_arguments + 8322 "00000100" // JL #7952 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7952 delay_slots=5 */ + 8323 "00000001" // /* MW 5 */ + 8324 "00000000" // /* MW 4 */ + 8325 "10001000" // /* MW 3 */ + 8326 "00001111" // /* MW 2 */ + 8327 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35 +.delay_slot + 8328 "11111000" // MOV r17, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8329 "11000000" // /* MW 3 */ + 8330 "01010000" // /* MW 2 */ + 8331 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8333 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35 first +.delay_slot + 8334 "00011000" // SEL.EQZ r18, r16, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8335 "00010010" // /* MW 3 */ + 8336 "00100101" // /* MW 2 */ + 8337 "00010100" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35 +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8 +.delay_slot + 8338 "11100100" // SEL.EQZ r16, r17, r16, r27; MOV p1, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8339 "01000001" // /* MW 5 */ + 8340 "11010010" // /* MW 4 */ + 8341 "01000010" // /* MW 3 */ + 8342 "00100000" // /* MW 2 */ + 8343 "10001100" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8 +.delay_slot + 8344 "00000010" // NOPS; MOV p0, r16 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8345 "01110000" // /* MW 7 */ + 8346 "00010000" // /* MW 6 */ + 8347 "00110100" // /* MW 5 */ + 8348 "00000000" // /* MW 4 */ + 8349 "01100000" // /* MW 3 */ + 8350 "00101011" // /* MW 2 */ + 8351 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4 +.return_address + 8352 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8353 "00111001" // /* MW 3 */ + 8354 "11111100" // /* MW 2 */ + 8355 "00000111" // /* MW 1 */ + 8356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8357 "00000000" // /* MW 1 */ + 8358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8359 "00000000" // /* MW 1 */ + 8360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8361 "00000000" // /* MW 1 */ + 8362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8363 "00000000" // /* MW 1 */ + 8364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8365 "00000000" // /* MW 1 */ + 8366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8367 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4 first + 8368 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 8369 "00000000" // /* MW 3 */ + 8370 "00101000" // /* MW 2 */ + 8371 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4 +.delay_slot + 8372 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8373 "00000001" // /* MW 5 */ + 8374 "00000000" // /* MW 4 */ + 8375 "00000000" // /* MW 3 */ + 8376 "11110000" // /* MW 2 */ + 8377 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8379 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8381 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8383 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 8385 "00000000" // /* MW 1 */ +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_add1d_attribute_broadcasting _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 7 "superkernels.cpp" 147 first +.src_ref 7 "superkernels.cpp" 152 6 +.function_start + 8400 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8401 "10000000" // /* MW 5 */ + 8402 "11001000" // /* MW 4 */ + 8403 "11000110" // /* MW 3 */ + 8404 "00000111" // /* MW 2 */ + 8405 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 152 6 first + 8406 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8407 "11000001" // /* MW 5 */ + 8408 "10110101" // /* MW 4 */ + 8409 "11011000" // /* MW 3 */ + 8410 "11000010" // /* MW 2 */ + 8411 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 147 + 8412 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8413 "00000001" // /* MW 5 */ + 8414 "00000000" // /* MW 4 */ + 8415 "00000000" // /* MW 3 */ + 8416 "00001000" // /* MW 2 */ + 8417 "00000000" // /* MW 1 */ + 8418 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8419 "01110000" // /* MW 7 */ + 8420 "11010000" // /* MW 6 */ + 8421 "00001011" // /* MW 5 */ + 8422 "00000000" // /* MW 4 */ + 8423 "10110000" // /* MW 3 */ + 8424 "01100011" // /* MW 2 */ + 8425 "11111111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 149 11 + 8426 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8427 "00010001" // /* MW 9 */ + 8428 "00101000" // /* MW 8 */ + 8429 "00110010" // /* MW 7 */ + 8430 "11110011" // /* MW 6 */ + 8431 "00000001" // /* MW 5 */ + 8432 "00000000" // /* MW 4 */ + 8433 "10110000" // /* MW 3 */ + 8434 "10000010" // /* MW 2 */ + 8435 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 8436 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8437 "11000000" // /* MW 3 */ + 8438 "11010100" // /* MW 2 */ + 8439 "00011011" // /* MW 1 */ + 8440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8441 "00000000" // /* MW 1 */ + 8442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8443 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 152 6 +.src_ref 7 "superkernels.cpp" 152 16 + 8444 "10000100" // JNZ r16, #8608 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8608 delay_slots=5 */ + 8445 "00000001" // /* MW 5 */ + 8446 "01000000" // /* MW 4 */ + 8447 "11010000" // /* MW 3 */ + 8448 "00010000" // /* MW 2 */ + 8449 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 149 22 first +.delay_slot + 8450 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8451 "10010000" // /* MW 3 */ + 8452 "01100010" // /* MW 2 */ + 8453 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 149 30 +.delay_slot + 8454 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8455 "11111011" // /* MW 3 */ + 8456 "01100011" // /* MW 2 */ + 8457 "00010100" // /* MW 1 */ +.delay_slot + 8458 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8459 "00111101" // /* MW 3 */ + 8460 "11110100" // /* MW 2 */ + 8461 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 149 11 +.src_ref 1 "io_buffer_main.h" 125 25 +.delay_slot + 8462 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8463 "01110000" // /* MW 7 */ + 8464 "01100000" // /* MW 6 */ + 8465 "00110000" // /* MW 5 */ + 8466 "00000011" // /* MW 4 */ + 8467 "00110000" // /* MW 3 */ + 8468 "11000110" // /* MW 2 */ + 8469 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 155 4 +.src_ref 7 "superkernels.cpp" 166 2 +.delay_slot + 8470 "01000100" // MOVXM p0, #509184 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8471 "00000000" // /* MW 5 */ + 8472 "11001010" // /* MW 4 */ + 8473 "11000000" // /* MW 3 */ + 8474 "00000111" // /* MW 2 */ + 8475 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8476 "01000100" // MOVXM p2, #509028 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8477 "11001000" // /* MW 5 */ + 8478 "11001000" // /* MW 4 */ + 8479 "11000100" // /* MW 3 */ + 8480 "00000111" // /* MW 2 */ + 8481 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8482 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8483 "00010000" // /* MW 9 */ + 8484 "00110000" // /* MW 8 */ + 8485 "00110010" // /* MW 7 */ + 8486 "11110001" // /* MW 6 */ + 8487 "00000001" // /* MW 5 */ + 8488 "00000000" // /* MW 4 */ + 8489 "11100000" // /* MW 3 */ + 8490 "11000000" // /* MW 2 */ + 8491 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8492 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8493 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 155 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 8494 "00000100" // JL #7888 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7888 delay_slots=5 */ + 8495 "00000001" // /* MW 5 */ + 8496 "00000000" // /* MW 4 */ + 8497 "01101000" // /* MW 3 */ + 8498 "00001111" // /* MW 2 */ + 8499 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8501 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8503 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8504 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8505 "00110001" // /* MW 3 */ + 8506 "00100000" // /* MW 2 */ + 8507 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 8508 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8509 "00000101" // /* MW 3 */ + 8510 "00100000" // /* MW 2 */ + 8511 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 8512 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8513 "00000000" // /* MW 15 */ + 8514 "00000000" // /* MW 14 */ + 8515 "01111000" // /* MW 13 */ + 8516 "10100101" // /* MW 12 */ + 8517 "00000001" // /* MW 11 */ + 8518 "00000000" // /* MW 10 */ + 8519 "00000000" // /* MW 9 */ + 8520 "10000000" // /* MW 8 */ + 8521 "00010001" // /* MW 7 */ + 8522 "00000110" // /* MW 6 */ + 8523 "00100010" // /* MW 5 */ + 8524 "00000000" // /* MW 4 */ + 8525 "11110000" // /* MW 3 */ + 8526 "00101100" // /* MW 2 */ + 8527 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 159 18 +.return_address + 8528 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8529 "10100000" // /* MW 5 */ + 8530 "11001000" // /* MW 4 */ + 8531 "11000100" // /* MW 3 */ + 8532 "00000111" // /* MW 2 */ + 8533 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 159 18 first +.src_ref 7 "superkernels.cpp" 159 65 + 8534 "10111010" // LDA r16, [p2]; MOVXM p2, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8535 "00010000" // /* MW 9 */ + 8536 "10000000" // /* MW 8 */ + 8537 "00110010" // /* MW 7 */ + 8538 "11110001" // /* MW 6 */ + 8539 "00000001" // /* MW 5 */ + 8540 "00000000" // /* MW 4 */ + 8541 "11010000" // /* MW 3 */ + 8542 "11000010" // /* MW 2 */ + 8543 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 157 51 +.src_ref 7 "superkernels.cpp" 159 65 +.src_ref 7 "superkernels.cpp" 166 2 + 8544 "10111010" // LDA r17, [p2]; MOVXM p2, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8545 "00010000" // /* MW 9 */ + 8546 "10000000" // /* MW 8 */ + 8547 "00110010" // /* MW 7 */ + 8548 "11110001" // /* MW 6 */ + 8549 "00000001" // /* MW 5 */ + 8550 "00000000" // /* MW 4 */ + 8551 "11010000" // /* MW 3 */ + 8552 "11000110" // /* MW 2 */ + 8553 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 157 51 first +.src_ref 7 "superkernels.cpp" 159 16 +.src_ref 7 "superkernels.cpp" 164 47 + 8554 "10111010" // LDA.u16 r18, [p2, #10]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8555 "00010000" // /* MW 9 */ + 8556 "00101010" // /* MW 8 */ + 8557 "10110010" // /* MW 7 */ + 8558 "11110000" // /* MW 6 */ + 8559 "00000001" // /* MW 5 */ + 8560 "00000000" // /* MW 4 */ + 8561 "01010000" // /* MW 3 */ + 8562 "11001011" // /* MW 2 */ + 8563 "01001010" // /* MW 1 */ + 8564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8565 "00000000" // /* MW 1 */ + 8566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8567 "00000000" // /* MW 1 */ + 8568 "10000100" // J #8624 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8624 delay_slots=5 */ + 8569 "00000000" // /* MW 5 */ + 8570 "00000000" // /* MW 4 */ + 8571 "11011000" // /* MW 3 */ + 8572 "00010000" // /* MW 2 */ + 8573 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 157 13 +.delay_slot + 8574 "01000100" // MOVXM p0, #509020 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8575 "10111000" // /* MW 5 */ + 8576 "11001000" // /* MW 4 */ + 8577 "11000000" // /* MW 3 */ + 8578 "00000111" // /* MW 2 */ + 8579 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8581 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 159 27 first +.delay_slot + 8582 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8583 "00001111" // /* MW 3 */ + 8584 "01100001" // /* MW 2 */ + 8585 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 157 13 first +.delay_slot + 8586 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8587 "10100011" // /* MW 5 */ + 8588 "00001100" // /* MW 4 */ + 8589 "11110000" // /* MW 3 */ + 8590 "00101100" // /* MW 2 */ + 8591 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 159 16 first +.delay_slot + 8592 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8593 "00000000" // /* MW 15 */ + 8594 "00000000" // /* MW 14 */ + 8595 "01111000" // /* MW 13 */ + 8596 "10100101" // /* MW 12 */ + 8597 "00000001" // /* MW 11 */ + 8598 "00000000" // /* MW 10 */ + 8599 "00000000" // /* MW 9 */ + 8600 "10000000" // /* MW 8 */ + 8601 "00010001" // /* MW 7 */ + 8602 "00000110" // /* MW 6 */ + 8603 "00100001" // /* MW 5 */ + 8604 "00000000" // /* MW 4 */ + 8605 "11110000" // /* MW 3 */ + 8606 "00101100" // /* MW 2 */ + 8607 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 7 "superkernels.cpp" 164 47 +.src_ref 7 "superkernels.cpp" 166 2 + 8608 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8609 "00000000" // /* MW 15 */ + 8610 "00000000" // /* MW 14 */ + 8611 "00010000" // /* MW 13 */ + 8612 "00101010" // /* MW 12 */ + 8613 "10110010" // /* MW 11 */ + 8614 "11110000" // /* MW 10 */ + 8615 "00000001" // /* MW 9 */ + 8616 "00000000" // /* MW 8 */ + 8617 "10001011" // /* MW 7 */ + 8618 "10000000" // /* MW 6 */ + 8619 "00100010" // /* MW 5 */ + 8620 "00000000" // /* MW 4 */ + 8621 "11110000" // /* MW 3 */ + 8622 "00101100" // /* MW 2 */ + 8623 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 218 49 first + 8624 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8625 "00000000" // /* MW 7 */ + 8626 "11000011" // /* MW 6 */ + 8627 "10110011" // /* MW 5 */ + 8628 "00000011" // /* MW 4 */ + 8629 "01100000" // /* MW 3 */ + 8630 "10010001" // /* MW 2 */ + 8631 "01110011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 163 2 +.src_ref 1 "io_buffer_main.h" 218 49 + 8632 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8633 "00010000" // /* MW 9 */ + 8634 "00100000" // /* MW 8 */ + 8635 "00110010" // /* MW 7 */ + 8636 "11110000" // /* MW 6 */ + 8637 "00000001" // /* MW 5 */ + 8638 "00000000" // /* MW 4 */ + 8639 "11010000" // /* MW 3 */ + 8640 "11101110" // /* MW 2 */ + 8641 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 8642 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8643 "00010110" // /* MW 3 */ + 8644 "11111110" // /* MW 2 */ + 8645 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 8646 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8647 "00110110" // /* MW 3 */ + 8648 "11111110" // /* MW 2 */ + 8649 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 8650 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8651 "01010110" // /* MW 3 */ + 8652 "01000110" // /* MW 2 */ + 8653 "00000111" // /* MW 1 */ + 8654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8655 "00000000" // /* MW 1 */ + 8656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8657 "00000000" // /* MW 1 */ + 8658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8659 "00000000" // /* MW 1 */ + 8660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8661 "00000000" // /* MW 1 */ + 8662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8663 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 8664 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8665 "00000010" // /* MW 3 */ + 8666 "01100001" // /* MW 2 */ + 8667 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 8668 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8669 "00010001" // /* MW 3 */ + 8670 "00000110" // /* MW 2 */ + 8671 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 + 8672 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8673 "11111101" // /* MW 3 */ + 8674 "11100000" // /* MW 2 */ + 8675 "00010111" // /* MW 1 */ + 8676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8677 "00000000" // /* MW 1 */ + 8678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8679 "00000000" // /* MW 1 */ + 8680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8681 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 8682 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8683 "00001000" // /* MW 3 */ + 8684 "10010011" // /* MW 2 */ + 8685 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 164 45 + 8686 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8687 "10000001" // /* MW 5 */ + 8688 "10101101" // /* MW 4 */ + 8689 "10100111" // /* MW 3 */ + 8690 "00000000" // /* MW 2 */ + 8691 "00000100" // /* MW 1 */ + 8692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8693 "00000000" // /* MW 1 */ + 8694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8695 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 163 2 first + 8696 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8697 "00110110" // /* MW 3 */ + 8698 "00000110" // /* MW 2 */ + 8699 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 1 "io_buffer_main.h" 324 51 + 8700 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8701 "10000001" // /* MW 5 */ + 8702 "11011101" // /* MW 4 */ + 8703 "11011100" // /* MW 3 */ + 8704 "11001010" // /* MW 2 */ + 8705 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 164 47 first + 8706 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8707 "01110110" // /* MW 3 */ + 8708 "00000110" // /* MW 2 */ + 8709 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 8710 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8711 "10011110" // /* MW 3 */ + 8712 "01011100" // /* MW 2 */ + 8713 "00000111" // /* MW 1 */ + 8714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8715 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 166 2 first +.no_stack_arguments + 8716 "00000100" // JL #8272 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8272 delay_slots=5 */ + 8717 "00000001" // /* MW 5 */ + 8718 "00000000" // /* MW 4 */ + 8719 "00101000" // /* MW 3 */ + 8720 "00010000" // /* MW 2 */ + 8721 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8723 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 163 2 first +.delay_slot + 8724 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8725 "00000111" // /* MW 3 */ + 8726 "01100010" // /* MW 2 */ + 8727 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 163 2 +.delay_slot + 8728 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8729 "00110001" // /* MW 3 */ + 8730 "00000110" // /* MW 2 */ + 8731 "00001000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 164 45 first +.delay_slot + 8732 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8733 "00001101" // /* MW 3 */ + 8734 "11100001" // /* MW 2 */ + 8735 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 164 45 +.delay_slot + 8736 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8737 "00000000" // /* MW 15 */ + 8738 "00000000" // /* MW 14 */ + 8739 "10101000" // /* MW 13 */ + 8740 "10100000" // /* MW 12 */ + 8741 "00110100" // /* MW 11 */ + 8742 "00000000" // /* MW 10 */ + 8743 "00000000" // /* MW 9 */ + 8744 "00000000" // /* MW 8 */ + 8745 "01011011" // /* MW 7 */ + 8746 "00000001" // /* MW 6 */ + 8747 "00100000" // /* MW 5 */ + 8748 "00000000" // /* MW 4 */ + 8749 "11110000" // /* MW 3 */ + 8750 "00101100" // /* MW 2 */ + 8751 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 6 +.src_ref 7 "superkernels.cpp" 169 14 +.src_ref 1 "io_buffer_main.h" 324 51 first +.return_address + 8752 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8753 "00010000" // /* MW 9 */ + 8754 "00100000" // /* MW 8 */ + 8755 "00110010" // /* MW 7 */ + 8756 "11110011" // /* MW 6 */ + 8757 "00000001" // /* MW 5 */ + 8758 "00000000" // /* MW 4 */ + 8759 "11010000" // /* MW 3 */ + 8760 "11000110" // /* MW 2 */ + 8761 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 + 8762 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8763 "00000101" // /* MW 3 */ + 8764 "00100000" // /* MW 2 */ + 8765 "00010000" // /* MW 1 */ + 8766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8767 "00000000" // /* MW 1 */ + 8768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8769 "00000000" // /* MW 1 */ + 8770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8771 "00000000" // /* MW 1 */ + 8772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8773 "00000000" // /* MW 1 */ + 8774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8775 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 8776 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8777 "00001000" // /* MW 3 */ + 8778 "01010001" // /* MW 2 */ + 8779 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 19 +.src_ref 1 "io_buffer_main.h" 327 40 first + 8780 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8781 "00010000" // /* MW 9 */ + 8782 "00101110" // /* MW 8 */ + 8783 "00110010" // /* MW 7 */ + 8784 "11110001" // /* MW 6 */ + 8785 "00000001" // /* MW 5 */ + 8786 "00000000" // /* MW 4 */ + 8787 "11010000" // /* MW 3 */ + 8788 "11001110" // /* MW 2 */ + 8789 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 6 first + 8790 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8791 "00110110" // /* MW 3 */ + 8792 "00000110" // /* MW 2 */ + 8793 "00000110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 19 + 8794 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8795 "01010110" // /* MW 3 */ + 8796 "00000110" // /* MW 2 */ + 8797 "00000010" // /* MW 1 */ + 8798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8799 "00000000" // /* MW 1 */ + 8800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8801 "00000000" // /* MW 1 */ + 8802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8803 "00000000" // /* MW 1 */ + 8804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8805 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 8806 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8807 "00110001" // /* MW 3 */ + 8808 "00100001" // /* MW 2 */ + 8809 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 8810 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8811 "00010001" // /* MW 3 */ + 8812 "11100110" // /* MW 2 */ + 8813 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 16 first + 8814 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8815 "00101000" // /* MW 3 */ + 8816 "01100001" // /* MW 2 */ + 8817 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 6 + 8818 "10000100" // JNZ r16, #8848 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8848 delay_slots=5 */ + 8819 "00000001" // /* MW 5 */ + 8820 "01000000" // /* MW 4 */ + 8821 "01001000" // /* MW 3 */ + 8822 "00010001" // /* MW 2 */ + 8823 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8824 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8825 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8827 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8829 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8831 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8833 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 169 14 + 8834 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8835 "00000001" // /* MW 3 */ + 8836 "00100000" // /* MW 2 */ + 8837 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 169 14 first + 8838 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8839 "00000000" // /* MW 9 */ + 8840 "00000000" // /* MW 8 */ + 8841 "00000000" // /* MW 7 */ + 8842 "10000000" // /* MW 6 */ + 8843 "00010001" // /* MW 5 */ + 8844 "00000110" // /* MW 4 */ + 8845 "11110110" // /* MW 3 */ + 8846 "00101100" // /* MW 2 */ + 8847 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 7 "superkernels.cpp" 171 + 8848 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8849 "00111001" // /* MW 3 */ + 8850 "11110100" // /* MW 2 */ + 8851 "00000111" // /* MW 1 */ + 8852 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8853 "00011001" // /* MW 3 */ + 8854 "11111011" // /* MW 2 */ + 8855 "00000111" // /* MW 1 */ + 8856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8857 "00000000" // /* MW 1 */ + 8858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8859 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 8860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8861 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 8862 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8863 "11110001" // /* MW 3 */ + 8864 "11111101" // /* MW 2 */ + 8865 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 8866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8867 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 171 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 8868 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 8869 "00000000" // /* MW 3 */ + 8870 "00101000" // /* MW 2 */ + 8871 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8872 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8873 "10100000" // /* MW 3 */ + 8874 "01100111" // /* MW 2 */ + 8875 "00011111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 171 +.delay_slot + 8876 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8877 "00000001" // /* MW 5 */ + 8878 "00000000" // /* MW 4 */ + 8879 "00000000" // /* MW 3 */ + 8880 "11111000" // /* MW 2 */ + 8881 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8883 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8885 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 8887 "00000000" // /* MW 1 */ +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.function setup _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.src_ref 3 "elementwise_unary.h" 124 first +.src_ref 3 "elementwise_unary.h" 126 24 first +.function_start + 8896 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8897 "00101110" // /* MW 3 */ + 8898 "00011100" // /* MW 2 */ + 8899 "00000001" // /* MW 1 */ + 8900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8901 "00000000" // /* MW 1 */ + 8902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8903 "00000000" // /* MW 1 */ + 8904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8905 "00000000" // /* MW 1 */ + 8906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8907 "00000000" // /* MW 1 */ + 8908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8909 "00000000" // /* MW 1 */ + 8910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8911 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 126 22 first + 8912 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8913 "00101001" // /* MW 3 */ + 8914 "00011100" // /* MW 2 */ + 8915 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 127 24 first + 8916 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8917 "00101110" // /* MW 3 */ + 8918 "00011100" // /* MW 2 */ + 8919 "00000001" // /* MW 1 */ + 8920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8921 "00000000" // /* MW 1 */ + 8922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8923 "00000000" // /* MW 1 */ + 8924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8925 "00000000" // /* MW 1 */ + 8926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8927 "00000000" // /* MW 1 */ + 8928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8929 "00000000" // /* MW 1 */ + 8930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8931 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 127 22 + 8932 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8933 "00101001" // /* MW 3 */ + 8934 "00011100" // /* MW 2 */ + 8935 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 128 24 first + 8936 "10011000" // LDA el0, [p1], #24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8937 "00101110" // /* MW 3 */ + 8938 "01101100" // /* MW 2 */ + 8939 "00000001" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 113 33 first + 8940 "10011000" // LDA.s16 r0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8941 "00010010" // /* MW 3 */ + 8942 "00000100" // /* MW 2 */ + 8943 "00000001" // /* MW 1 */ + 8944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8945 "00000000" // /* MW 1 */ + 8946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8947 "00000000" // /* MW 1 */ + 8948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8949 "00000000" // /* MW 1 */ + 8950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8951 "00000000" // /* MW 1 */ + 8952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8953 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 128 22 first + 8954 "10011000" // ST el0, [p0], #24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8955 "00101001" // /* MW 3 */ + 8956 "01101100" // /* MW 2 */ + 8957 "00001000" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 113 33 first + 8958 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8959 "00010111" // /* MW 3 */ + 8960 "00000100" // /* MW 2 */ + 8961 "00000000" // /* MW 1 */ + 8962 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8963 "00000000" // /* MW 1 */ + 8964 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8965 "00000000" // /* MW 1 */ + 8966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8967 "00000000" // /* MW 1 */ + 8968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8969 "00000000" // /* MW 1 */ + 8970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8971 "00000000" // /* MW 1 */ + 8972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8973 "00000000" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 114 33 first + 8974 "10011000" // LDA.s16 r0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8975 "00010010" // /* MW 3 */ + 8976 "00100100" // /* MW 2 */ + 8977 "00000001" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 114 33 + 8978 "00011000" // ST.s16 r0, [p0, #2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8979 "00010111" // /* MW 3 */ + 8980 "00010100" // /* MW 2 */ + 8981 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 130 4 first + 8982 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 8983 "00000000" // /* MW 3 */ + 8984 "00101000" // /* MW 2 */ + 8985 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8987 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8989 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8991 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8993 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv__end +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_end0 + 8995 "00000000" // /* MW 1 */ +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.function run _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_unary.h" 136 first +.src_ref 3 "elementwise_unary.h" 142 37 +.src_ref 3 "elementwise_unary.h" 154 8 first +.src_ref 3 "elementwise_unary.h" 171 19 +.function_start + 9008 "10110110" // MOVA dj0, #-34; VLDB x4, [p0], #64; MOVXM ls, #9136 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9009 "00010000" // /* MW 11 */ + 9010 "11011000" // /* MW 10 */ + 9011 "01111001" // /* MW 9 */ + 9012 "00001000" // /* MW 8 */ + 9013 "00000000" // /* MW 7 */ + 9014 "00000000" // /* MW 6 */ + 9015 "01101000" // /* MW 5 */ + 9016 "00111010" // /* MW 4 */ + 9017 "10000000" // /* MW 3 */ + 9018 "11000010" // /* MW 2 */ + 9019 "11111011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_unary.h" 142 78 +.src_ref 3 "elementwise_unary.h" 154 8 first +.src_ref 3 "elementwise_unary.h" 190 19 first + 9020 "10110110" // MOVA r17, #-6; VLDB x2, [p0], #64; MOVXM le, #9184 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9021 "00010000" // /* MW 11 */ + 9022 "11110000" // /* MW 10 */ + 9023 "10111001" // /* MW 9 */ + 9024 "00001001" // /* MW 8 */ + 9025 "00000000" // /* MW 7 */ + 9026 "00000000" // /* MW 6 */ + 9027 "01101000" // /* MW 5 */ + 9028 "00111001" // /* MW 4 */ + 9029 "00000000" // /* MW 3 */ + 9030 "01010001" // /* MW 2 */ + 9031 "11111111" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 136 + 9032 "11111000" // MOV r0, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9033 "11000000" // /* MW 3 */ + 9034 "00010100" // /* MW 2 */ + 9035 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 136 first + 9036 "00011000" // ADD.NC p2, r0, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9037 "00010000" // /* MW 3 */ + 9038 "01100000" // /* MW 2 */ + 9039 "00011010" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 103 16 first + 9040 "10011000" // LDA.s16 r2, [p2], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9041 "01010010" // /* MW 3 */ + 9042 "00011100" // /* MW 2 */ + 9043 "00000010" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 142 37 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9044 "10011000" // LDA r0, [p2, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9045 "00010110" // /* MW 3 */ + 9046 "00000000" // /* MW 2 */ + 9047 "00000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_unary.h" 171 19 first +.src_ref 8 "clip_impl.h" 104 16 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9048 "00111100" // LDA.s16 r1, [p2]; VLDB x4, [p0], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9049 "01101000" // /* MW 5 */ + 9050 "00111010" // /* MW 4 */ + 9051 "01010000" // /* MW 3 */ + 9052 "10000110" // /* MW 2 */ + 9053 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9055 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9057 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9059 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_unary.h" 190 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9060 "00011000" // VLDB x2, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9061 "10110100" // /* MW 3 */ + 9062 "00011100" // /* MW 2 */ + 9063 "00111000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9064 "11111000" // VBCST.16 x0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9065 "01110010" // /* MW 3 */ + 9066 "00001001" // /* MW 2 */ + 9067 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 3 "elementwise_unary.h" 142 78 first +.src_ref 3 "elementwise_unary.h" 171 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9068 "00111010" // VLDB x4, [p0], #64; LSHL r17, r0, r17; VMAX_LT.bf16 x5, r16, x4, x0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9069 "01111000" // /* MW 9 */ + 9070 "00110110" // /* MW 8 */ + 9071 "01010000" // /* MW 7 */ + 9072 "11101101" // /* MW 6 */ + 9073 "00011000" // /* MW 5 */ + 9074 "00000001" // /* MW 4 */ + 9075 "01101000" // /* MW 3 */ + 9076 "00111010" // /* MW 2 */ + 9077 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 154 8 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9078 "10011000" // ADD.NC lc, r17, #-3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9079 "11111110" // /* MW 3 */ + 9080 "01111000" // /* MW 2 */ + 9081 "00011101" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9082 "11111000" // VBCST.16 x1, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9083 "01110010" // /* MW 3 */ + 9084 "10000101" // /* MW 2 */ + 9085 "00011000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9086 "11111000" // VMIN_GE.bf16 x3, r16, x5, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9087 "10101100" // /* MW 3 */ + 9088 "10101000" // /* MW 2 */ + 9089 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 3 "elementwise_unary.h" 190 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9090 "01111110" // NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 9091 "01100000" // /* MW 13 */ + 9092 "00101011" // /* MW 12 */ + 9093 "00000000" // /* MW 11 */ + 9094 "11001111" // /* MW 10 */ + 9095 "00000110" // /* MW 9 */ + 9096 "00110001" // /* MW 8 */ + 9097 "00000000" // /* MW 7 */ + 9098 "00000000" // /* MW 6 */ + 9099 "01101000" // /* MW 5 */ + 9100 "00111001" // /* MW 4 */ + 9101 "11110000" // /* MW 3 */ + 9102 "00101100" // /* MW 2 */ + 9103 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9104 "11100001" // NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9105 "00000000" // /* MW 15 */ + 9106 "00000000" // /* MW 14 */ + 9107 "01111000" // /* MW 13 */ + 9108 "01010110" // /* MW 12 */ + 9109 "11011000" // /* MW 11 */ + 9110 "00000001" // /* MW 10 */ + 9111 "00000000" // /* MW 9 */ + 9112 "00000000" // /* MW 8 */ + 9113 "11010011" // /* MW 7 */ + 9114 "00011100" // /* MW 6 */ + 9115 "00100001" // /* MW 5 */ + 9116 "00000000" // /* MW 4 */ + 9117 "11110000" // /* MW 3 */ + 9118 "00101100" // /* MW 2 */ + 9119 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9120 "11100001" // NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9121 "00000000" // /* MW 15 */ + 9122 "00000000" // /* MW 14 */ + 9123 "01111000" // /* MW 13 */ + 9124 "00110110" // /* MW 12 */ + 9125 "01010000" // /* MW 11 */ + 9126 "00000001" // /* MW 10 */ + 9127 "00000000" // /* MW 9 */ + 9128 "00000000" // /* MW 8 */ + 9129 "01011011" // /* MW 7 */ + 9130 "00000001" // /* MW 6 */ + 9131 "00100000" // /* MW 5 */ + 9132 "00000000" // /* MW 4 */ + 9133 "11110000" // /* MW 3 */ + 9134 "00101100" // /* MW 2 */ + 9135 "00000000" // /* MW 1 */ +.label ZLS_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_128 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 171 19 first +.src_ref 3 "elementwise_unary.h" 176 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 9136 "11100001" // NOPA; VLDB x4, [p0], #64; VST x7, [p1], #64; NOPX; VMIN_GE.bf16 x3, r16, x5, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9137 "00000000" // /* MW 15 */ + 9138 "00000000" // /* MW 14 */ + 9139 "01111000" // /* MW 13 */ + 9140 "01010110" // /* MW 12 */ + 9141 "11010100" // /* MW 11 */ + 9142 "00000000" // /* MW 10 */ + 9143 "00000000" // /* MW 9 */ + 9144 "00000000" // /* MW 8 */ + 9145 "11010011" // /* MW 7 */ + 9146 "00011101" // /* MW 6 */ + 9147 "01101001" // /* MW 5 */ + 9148 "00111010" // /* MW 4 */ + 9149 "11110000" // /* MW 3 */ + 9150 "00101100" // /* MW 2 */ + 9151 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 3 "elementwise_unary.h" 190 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9152 "11100001" // NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9153 "00000000" // /* MW 15 */ + 9154 "00000000" // /* MW 14 */ + 9155 "01111000" // /* MW 13 */ + 9156 "00110110" // /* MW 12 */ + 9157 "10001000" // /* MW 11 */ + 9158 "00000001" // /* MW 10 */ + 9159 "00000000" // /* MW 9 */ + 9160 "00000000" // /* MW 8 */ + 9161 "01011011" // /* MW 7 */ + 9162 "00000001" // /* MW 6 */ + 9163 "01101000" // /* MW 5 */ + 9164 "00111001" // /* MW 4 */ + 9165 "11110000" // /* MW 3 */ + 9166 "00101100" // /* MW 2 */ + 9167 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9168 "11100001" // NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9169 "00000000" // /* MW 15 */ + 9170 "00000000" // /* MW 14 */ + 9171 "01111000" // /* MW 13 */ + 9172 "01010110" // /* MW 12 */ + 9173 "11011000" // /* MW 11 */ + 9174 "00000001" // /* MW 10 */ + 9175 "00000000" // /* MW 9 */ + 9176 "00000000" // /* MW 8 */ + 9177 "11010011" // /* MW 7 */ + 9178 "00011100" // /* MW 6 */ + 9179 "00100001" // /* MW 5 */ + 9180 "00000000" // /* MW 4 */ + 9181 "11110000" // /* MW 3 */ + 9182 "00101100" // /* MW 2 */ + 9183 "00000000" // /* MW 1 */ +.label ZLE_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_176 +.src_ref 4 "max_min.hpp" 20 104 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9184 "11100001" // NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9185 "00000000" // /* MW 15 */ + 9186 "00000000" // /* MW 14 */ + 9187 "01111000" // /* MW 13 */ + 9188 "00110110" // /* MW 12 */ + 9189 "01010000" // /* MW 11 */ + 9190 "00000001" // /* MW 10 */ + 9191 "00000000" // /* MW 9 */ + 9192 "00000000" // /* MW 8 */ + 9193 "01011011" // /* MW 7 */ + 9194 "00000001" // /* MW 6 */ + 9195 "00100000" // /* MW 5 */ + 9196 "00000000" // /* MW 4 */ + 9197 "11110000" // /* MW 3 */ + 9198 "00101100" // /* MW 2 */ + 9199 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 176 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 9200 "00000010" // VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9201 "01110000" // /* MW 7 */ + 9202 "01010110" // /* MW 6 */ + 9203 "11010100" // /* MW 5 */ + 9204 "00000000" // /* MW 4 */ + 9205 "01100000" // /* MW 3 */ + 9206 "10111010" // /* MW 2 */ + 9207 "00100011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9208 "11111000" // VMAX_LT.bf16 x6, r16, x2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9209 "01101100" // /* MW 3 */ + 9210 "00010000" // /* MW 2 */ + 9211 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 195 20 first + 9212 "00000010" // VST x3, [p1], #64; VMIN_GE.bf16 x7, r16, x6, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9213 "01110000" // /* MW 7 */ + 9214 "01010110" // /* MW 6 */ + 9215 "11011000" // /* MW 5 */ + 9216 "00000001" // /* MW 4 */ + 9217 "01100000" // /* MW 3 */ + 9218 "10011010" // /* MW 2 */ + 9219 "00100011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 3 "elementwise_unary.h" 158 4 first + 9220 "11100100" // RET lr; VMAX_LT.bf16 x5, r16, x4, x0 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 9221 "11011001" // /* MW 5 */ + 9222 "01000000" // /* MW 4 */ + 9223 "00000101" // /* MW 3 */ + 9224 "00000000" // /* MW 2 */ + 9225 "00000101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 176 20 first +.delay_slot + 9226 "00000010" // VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9227 "01110000" // /* MW 7 */ + 9228 "01010110" // /* MW 6 */ + 9229 "11010100" // /* MW 5 */ + 9230 "00000000" // /* MW 4 */ + 9231 "01100000" // /* MW 3 */ + 9232 "10111010" // /* MW 2 */ + 9233 "00100011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 9234 "11111000" // VMAX_LT.bf16 x6, r16, x2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9235 "01101100" // /* MW 3 */ + 9236 "00010000" // /* MW 2 */ + 9237 "00011011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.delay_slot + 9238 "11111000" // VMIN_GE.bf16 x7, r16, x6, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9239 "10101100" // /* MW 3 */ + 9240 "10110000" // /* MW 2 */ + 9241 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "elementwise_unary.h" 195 20 first +.delay_slot + 9242 "00011000" // VST x3, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9243 "11010011" // /* MW 3 */ + 9244 "00011100" // /* MW 2 */ + 9245 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "elementwise_unary.h" 176 20 first +.delay_slot + 9246 "00011000" // VST x7, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9247 "11010011" // /* MW 3 */ + 9248 "00011101" // /* MW 2 */ +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E__end +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_end0 + 9249 "00001001" // /* MW 1 */ +.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_clip1d _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 7 "superkernels.cpp" 176 first +.src_ref 7 "superkernels.cpp" 181 6 +.function_start + 9264 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9265 "10000000" // /* MW 5 */ + 9266 "11001000" // /* MW 4 */ + 9267 "11000110" // /* MW 3 */ + 9268 "00000111" // /* MW 2 */ + 9269 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 181 6 first + 9270 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9271 "11000001" // /* MW 5 */ + 9272 "10110101" // /* MW 4 */ + 9273 "11011000" // /* MW 3 */ + 9274 "11000010" // /* MW 2 */ + 9275 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 176 + 9276 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9277 "00000001" // /* MW 5 */ + 9278 "00000000" // /* MW 4 */ + 9279 "00000000" // /* MW 3 */ + 9280 "00001000" // /* MW 2 */ + 9281 "00000000" // /* MW 1 */ + 9282 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9283 "01110000" // /* MW 7 */ + 9284 "11010000" // /* MW 6 */ + 9285 "00001011" // /* MW 5 */ + 9286 "00000000" // /* MW 4 */ + 9287 "10110000" // /* MW 3 */ + 9288 "01100011" // /* MW 2 */ + 9289 "11111111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 178 11 + 9290 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9291 "00010001" // /* MW 9 */ + 9292 "00101000" // /* MW 8 */ + 9293 "00110010" // /* MW 7 */ + 9294 "11110011" // /* MW 6 */ + 9295 "00000001" // /* MW 5 */ + 9296 "00000000" // /* MW 4 */ + 9297 "10110000" // /* MW 3 */ + 9298 "10000010" // /* MW 2 */ + 9299 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 9300 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9301 "11000000" // /* MW 3 */ + 9302 "11010100" // /* MW 2 */ + 9303 "00011011" // /* MW 1 */ + 9304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9305 "00000000" // /* MW 1 */ + 9306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9307 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 181 6 +.src_ref 7 "superkernels.cpp" 181 16 + 9308 "10000100" // JNZ r16, #9472 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9472 delay_slots=5 */ + 9309 "00000001" // /* MW 5 */ + 9310 "01000000" // /* MW 4 */ + 9311 "10000000" // /* MW 3 */ + 9312 "00010010" // /* MW 2 */ + 9313 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 178 22 first +.delay_slot + 9314 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9315 "10010000" // /* MW 3 */ + 9316 "01100010" // /* MW 2 */ + 9317 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 178 30 +.delay_slot + 9318 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9319 "11111011" // /* MW 3 */ + 9320 "01100011" // /* MW 2 */ + 9321 "00010100" // /* MW 1 */ +.delay_slot + 9322 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9323 "00111101" // /* MW 3 */ + 9324 "11110100" // /* MW 2 */ + 9325 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 178 11 +.src_ref 1 "io_buffer_main.h" 125 25 +.delay_slot + 9326 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9327 "01110000" // /* MW 7 */ + 9328 "01100000" // /* MW 6 */ + 9329 "00110000" // /* MW 5 */ + 9330 "00000011" // /* MW 4 */ + 9331 "00110000" // /* MW 3 */ + 9332 "11000110" // /* MW 2 */ + 9333 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 184 4 +.src_ref 7 "superkernels.cpp" 195 2 +.delay_slot + 9334 "01000100" // MOVXM p0, #509440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9335 "00000000" // /* MW 5 */ + 9336 "11001100" // /* MW 4 */ + 9337 "11000000" // /* MW 3 */ + 9338 "00000111" // /* MW 2 */ + 9339 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9340 "01000100" // MOVXM p2, #509028 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9341 "11001000" // /* MW 5 */ + 9342 "11001000" // /* MW 4 */ + 9343 "11000100" // /* MW 3 */ + 9344 "00000111" // /* MW 2 */ + 9345 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9346 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9347 "00010000" // /* MW 9 */ + 9348 "00110000" // /* MW 8 */ + 9349 "00110010" // /* MW 7 */ + 9350 "11110001" // /* MW 6 */ + 9351 "00000001" // /* MW 5 */ + 9352 "00000000" // /* MW 4 */ + 9353 "11100000" // /* MW 3 */ + 9354 "11000000" // /* MW 2 */ + 9355 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9357 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 184 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 9358 "00000100" // JL #8896 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8896 delay_slots=5 */ + 9359 "00000001" // /* MW 5 */ + 9360 "00000000" // /* MW 4 */ + 9361 "01100000" // /* MW 3 */ + 9362 "00010001" // /* MW 2 */ + 9363 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9365 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9367 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9368 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9369 "00110001" // /* MW 3 */ + 9370 "00100000" // /* MW 2 */ + 9371 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 9372 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9373 "00000101" // /* MW 3 */ + 9374 "00100000" // /* MW 2 */ + 9375 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 9376 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9377 "00000000" // /* MW 15 */ + 9378 "00000000" // /* MW 14 */ + 9379 "01111000" // /* MW 13 */ + 9380 "10100101" // /* MW 12 */ + 9381 "00000001" // /* MW 11 */ + 9382 "00000000" // /* MW 10 */ + 9383 "00000000" // /* MW 9 */ + 9384 "10000000" // /* MW 8 */ + 9385 "00010001" // /* MW 7 */ + 9386 "00000110" // /* MW 6 */ + 9387 "00100010" // /* MW 5 */ + 9388 "00000000" // /* MW 4 */ + 9389 "11110000" // /* MW 3 */ + 9390 "00101100" // /* MW 2 */ + 9391 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 188 18 +.return_address + 9392 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9393 "10100000" // /* MW 5 */ + 9394 "11001000" // /* MW 4 */ + 9395 "11000100" // /* MW 3 */ + 9396 "00000111" // /* MW 2 */ + 9397 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 188 18 first +.src_ref 7 "superkernels.cpp" 188 43 + 9398 "10111010" // LDA r16, [p2]; MOVXM p2, #509440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9399 "00010000" // /* MW 9 */ + 9400 "00000000" // /* MW 8 */ + 9401 "00110011" // /* MW 7 */ + 9402 "11110001" // /* MW 6 */ + 9403 "00000001" // /* MW 5 */ + 9404 "00000000" // /* MW 4 */ + 9405 "11010000" // /* MW 3 */ + 9406 "11000010" // /* MW 2 */ + 9407 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 186 29 +.src_ref 7 "superkernels.cpp" 188 43 +.src_ref 7 "superkernels.cpp" 195 2 + 9408 "10111010" // LDA r17, [p2]; MOVXM p2, #509440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9409 "00010000" // /* MW 9 */ + 9410 "00000000" // /* MW 8 */ + 9411 "00110011" // /* MW 7 */ + 9412 "11110001" // /* MW 6 */ + 9413 "00000001" // /* MW 5 */ + 9414 "00000000" // /* MW 4 */ + 9415 "11010000" // /* MW 3 */ + 9416 "11000110" // /* MW 2 */ + 9417 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 186 29 first +.src_ref 7 "superkernels.cpp" 188 16 +.src_ref 7 "superkernels.cpp" 193 47 + 9418 "10111010" // LDA.u16 r18, [p2, #8]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9419 "00010000" // /* MW 9 */ + 9420 "00101010" // /* MW 8 */ + 9421 "10110010" // /* MW 7 */ + 9422 "11110000" // /* MW 6 */ + 9423 "00000001" // /* MW 5 */ + 9424 "00000000" // /* MW 4 */ + 9425 "01010000" // /* MW 3 */ + 9426 "11001011" // /* MW 2 */ + 9427 "01001000" // /* MW 1 */ + 9428 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9429 "00000000" // /* MW 1 */ + 9430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9431 "00000000" // /* MW 1 */ + 9432 "10000100" // J #9488 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9488 delay_slots=5 */ + 9433 "00000000" // /* MW 5 */ + 9434 "00000000" // /* MW 4 */ + 9435 "10001000" // /* MW 3 */ + 9436 "00010010" // /* MW 2 */ + 9437 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 186 13 +.delay_slot + 9438 "01000100" // MOVXM p0, #509020 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9439 "10111000" // /* MW 5 */ + 9440 "11001000" // /* MW 4 */ + 9441 "11000000" // /* MW 3 */ + 9442 "00000111" // /* MW 2 */ + 9443 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9445 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 188 27 first +.delay_slot + 9446 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9447 "00001111" // /* MW 3 */ + 9448 "01100001" // /* MW 2 */ + 9449 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 186 13 first +.delay_slot + 9450 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9451 "10100011" // /* MW 5 */ + 9452 "00001100" // /* MW 4 */ + 9453 "11110000" // /* MW 3 */ + 9454 "00101100" // /* MW 2 */ + 9455 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 188 16 first +.delay_slot + 9456 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9457 "00000000" // /* MW 15 */ + 9458 "00000000" // /* MW 14 */ + 9459 "01111000" // /* MW 13 */ + 9460 "10100101" // /* MW 12 */ + 9461 "00000001" // /* MW 11 */ + 9462 "00000000" // /* MW 10 */ + 9463 "00000000" // /* MW 9 */ + 9464 "10000000" // /* MW 8 */ + 9465 "00010001" // /* MW 7 */ + 9466 "00000110" // /* MW 6 */ + 9467 "00100001" // /* MW 5 */ + 9468 "00000000" // /* MW 4 */ + 9469 "11110000" // /* MW 3 */ + 9470 "00101100" // /* MW 2 */ + 9471 "00000000" // /* MW 1 */ +.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 7 "superkernels.cpp" 193 47 +.src_ref 7 "superkernels.cpp" 195 2 + 9472 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9473 "00000000" // /* MW 15 */ + 9474 "00000000" // /* MW 14 */ + 9475 "00010000" // /* MW 13 */ + 9476 "00101010" // /* MW 12 */ + 9477 "10110010" // /* MW 11 */ + 9478 "11110000" // /* MW 10 */ + 9479 "00000001" // /* MW 9 */ + 9480 "00000000" // /* MW 8 */ + 9481 "10001011" // /* MW 7 */ + 9482 "10000000" // /* MW 6 */ + 9483 "00100010" // /* MW 5 */ + 9484 "00000000" // /* MW 4 */ + 9485 "11110000" // /* MW 3 */ + 9486 "00101100" // /* MW 2 */ + 9487 "00000000" // /* MW 1 */ +.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 218 49 first + 9488 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9489 "00000000" // /* MW 7 */ + 9490 "11000011" // /* MW 6 */ + 9491 "10110011" // /* MW 5 */ + 9492 "00000011" // /* MW 4 */ + 9493 "01100000" // /* MW 3 */ + 9494 "10010001" // /* MW 2 */ + 9495 "01110011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 192 2 +.src_ref 1 "io_buffer_main.h" 218 49 + 9496 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9497 "00010000" // /* MW 9 */ + 9498 "00100000" // /* MW 8 */ + 9499 "00110010" // /* MW 7 */ + 9500 "11110000" // /* MW 6 */ + 9501 "00000001" // /* MW 5 */ + 9502 "00000000" // /* MW 4 */ + 9503 "11010000" // /* MW 3 */ + 9504 "11101110" // /* MW 2 */ + 9505 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 9506 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9507 "00010110" // /* MW 3 */ + 9508 "11111110" // /* MW 2 */ + 9509 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 9510 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9511 "00110110" // /* MW 3 */ + 9512 "11111110" // /* MW 2 */ + 9513 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 9514 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9515 "01010110" // /* MW 3 */ + 9516 "01000110" // /* MW 2 */ + 9517 "00000111" // /* MW 1 */ + 9518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9519 "00000000" // /* MW 1 */ + 9520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9521 "00000000" // /* MW 1 */ + 9522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9523 "00000000" // /* MW 1 */ + 9524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9525 "00000000" // /* MW 1 */ + 9526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9527 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 9528 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9529 "00000010" // /* MW 3 */ + 9530 "01100001" // /* MW 2 */ + 9531 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 9532 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9533 "00010001" // /* MW 3 */ + 9534 "00000110" // /* MW 2 */ + 9535 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 + 9536 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9537 "11111101" // /* MW 3 */ + 9538 "11100000" // /* MW 2 */ + 9539 "00010111" // /* MW 1 */ + 9540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9541 "00000000" // /* MW 1 */ + 9542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9543 "00000000" // /* MW 1 */ + 9544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9545 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 9546 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9547 "00001000" // /* MW 3 */ + 9548 "10010011" // /* MW 2 */ + 9549 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 193 45 + 9550 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9551 "10000001" // /* MW 5 */ + 9552 "10101101" // /* MW 4 */ + 9553 "10100111" // /* MW 3 */ + 9554 "00000000" // /* MW 2 */ + 9555 "00000100" // /* MW 1 */ + 9556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9557 "00000000" // /* MW 1 */ + 9558 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9559 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 192 2 first + 9560 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9561 "00110110" // /* MW 3 */ + 9562 "00000110" // /* MW 2 */ + 9563 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 1 "io_buffer_main.h" 324 51 + 9564 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9565 "10000001" // /* MW 5 */ + 9566 "11011101" // /* MW 4 */ + 9567 "11011100" // /* MW 3 */ + 9568 "11001010" // /* MW 2 */ + 9569 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 193 47 first + 9570 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9571 "01110110" // /* MW 3 */ + 9572 "00000110" // /* MW 2 */ + 9573 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 9574 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9575 "10011110" // /* MW 3 */ + 9576 "01011100" // /* MW 2 */ + 9577 "00000111" // /* MW 1 */ + 9578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9579 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 195 2 first +.no_stack_arguments + 9580 "00000100" // JL #9008 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9008 delay_slots=5 */ + 9581 "00000001" // /* MW 5 */ + 9582 "00000000" // /* MW 4 */ + 9583 "10011000" // /* MW 3 */ + 9584 "00010001" // /* MW 2 */ + 9585 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9587 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 192 2 first +.delay_slot + 9588 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9589 "00000111" // /* MW 3 */ + 9590 "01100010" // /* MW 2 */ + 9591 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 192 2 +.delay_slot + 9592 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9593 "00110001" // /* MW 3 */ + 9594 "00000110" // /* MW 2 */ + 9595 "00001000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 193 45 first +.delay_slot + 9596 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9597 "00001101" // /* MW 3 */ + 9598 "11100001" // /* MW 2 */ + 9599 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 193 45 +.delay_slot + 9600 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9601 "00000000" // /* MW 15 */ + 9602 "00000000" // /* MW 14 */ + 9603 "10101000" // /* MW 13 */ + 9604 "10100000" // /* MW 12 */ + 9605 "00110100" // /* MW 11 */ + 9606 "00000000" // /* MW 10 */ + 9607 "00000000" // /* MW 9 */ + 9608 "00000000" // /* MW 8 */ + 9609 "01011011" // /* MW 7 */ + 9610 "00000001" // /* MW 6 */ + 9611 "00100000" // /* MW 5 */ + 9612 "00000000" // /* MW 4 */ + 9613 "11110000" // /* MW 3 */ + 9614 "00101100" // /* MW 2 */ + 9615 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 6 +.src_ref 7 "superkernels.cpp" 198 14 +.src_ref 1 "io_buffer_main.h" 324 51 first +.return_address + 9616 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9617 "00010000" // /* MW 9 */ + 9618 "00100000" // /* MW 8 */ + 9619 "00110010" // /* MW 7 */ + 9620 "11110011" // /* MW 6 */ + 9621 "00000001" // /* MW 5 */ + 9622 "00000000" // /* MW 4 */ + 9623 "11010000" // /* MW 3 */ + 9624 "11000110" // /* MW 2 */ + 9625 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 + 9626 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9627 "00000101" // /* MW 3 */ + 9628 "00100000" // /* MW 2 */ + 9629 "00010000" // /* MW 1 */ + 9630 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9631 "00000000" // /* MW 1 */ + 9632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9633 "00000000" // /* MW 1 */ + 9634 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9635 "00000000" // /* MW 1 */ + 9636 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9637 "00000000" // /* MW 1 */ + 9638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9639 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 9640 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9641 "00001000" // /* MW 3 */ + 9642 "01010001" // /* MW 2 */ + 9643 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 19 +.src_ref 1 "io_buffer_main.h" 327 40 first + 9644 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9645 "00010000" // /* MW 9 */ + 9646 "00101110" // /* MW 8 */ + 9647 "00110010" // /* MW 7 */ + 9648 "11110001" // /* MW 6 */ + 9649 "00000001" // /* MW 5 */ + 9650 "00000000" // /* MW 4 */ + 9651 "11010000" // /* MW 3 */ + 9652 "11001110" // /* MW 2 */ + 9653 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 6 first + 9654 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9655 "00110110" // /* MW 3 */ + 9656 "00000110" // /* MW 2 */ + 9657 "00000110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 19 + 9658 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9659 "01010110" // /* MW 3 */ + 9660 "00000110" // /* MW 2 */ + 9661 "00000010" // /* MW 1 */ + 9662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9663 "00000000" // /* MW 1 */ + 9664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9665 "00000000" // /* MW 1 */ + 9666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9667 "00000000" // /* MW 1 */ + 9668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9669 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 9670 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9671 "00110001" // /* MW 3 */ + 9672 "00100001" // /* MW 2 */ + 9673 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 9674 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9675 "00010001" // /* MW 3 */ + 9676 "11100110" // /* MW 2 */ + 9677 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 16 first + 9678 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9679 "00101000" // /* MW 3 */ + 9680 "01100001" // /* MW 2 */ + 9681 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 6 + 9682 "10000100" // JNZ r16, #9712 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9712 delay_slots=5 */ + 9683 "00000001" // /* MW 5 */ + 9684 "01000000" // /* MW 4 */ + 9685 "11111000" // /* MW 3 */ + 9686 "00010010" // /* MW 2 */ + 9687 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9689 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9691 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9693 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9695 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9697 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 198 14 + 9698 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9699 "00000001" // /* MW 3 */ + 9700 "00100000" // /* MW 2 */ + 9701 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 198 14 first + 9702 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9703 "00000000" // /* MW 9 */ + 9704 "00000000" // /* MW 8 */ + 9705 "00000000" // /* MW 7 */ + 9706 "10000000" // /* MW 6 */ + 9707 "00010001" // /* MW 5 */ + 9708 "00000110" // /* MW 4 */ + 9709 "11110110" // /* MW 3 */ + 9710 "00101100" // /* MW 2 */ + 9711 "00000000" // /* MW 1 */ +.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 7 "superkernels.cpp" 200 + 9712 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9713 "00111001" // /* MW 3 */ + 9714 "11110100" // /* MW 2 */ + 9715 "00000111" // /* MW 1 */ + 9716 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9717 "00011001" // /* MW 3 */ + 9718 "11111011" // /* MW 2 */ + 9719 "00000111" // /* MW 1 */ + 9720 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9721 "00000000" // /* MW 1 */ + 9722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9723 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 9724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9725 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 9726 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9727 "11110001" // /* MW 3 */ + 9728 "11111101" // /* MW 2 */ + 9729 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9731 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 200 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9732 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9733 "00000000" // /* MW 3 */ + 9734 "00101000" // /* MW 2 */ + 9735 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9736 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9737 "10100000" // /* MW 3 */ + 9738 "01100111" // /* MW 2 */ + 9739 "00011111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 200 +.delay_slot + 9740 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9741 "00000001" // /* MW 5 */ + 9742 "00000000" // /* MW 4 */ + 9743 "00000000" // /* MW 3 */ + 9744 "11111000" // /* MW 2 */ + 9745 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9747 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9749 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 9751 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv +.function shared_setup_backbone _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv +.src_ref 3 "elementwise_binary_shared.h" 205 first +.src_ref 3 "elementwise_binary_shared.h" 211 24 first +.src_ref 3 "elementwise_binary_shared.h" 216 36 +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.function_start + 9760 "10111010" // LDA el0, [p1], #4; MOVX r2, #256; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9761 "01011000" // /* MW 9 */ + 9762 "00000000" // /* MW 8 */ + 9763 "00001000" // /* MW 7 */ + 9764 "00001011" // /* MW 6 */ + 9765 "00100000" // /* MW 5 */ + 9766 "00001000" // /* MW 4 */ + 9767 "11010000" // /* MW 3 */ + 9768 "10000101" // /* MW 2 */ + 9769 "00100011" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 36 + 9770 "00011000" // MOVX r0, #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9771 "00000001" // /* MW 3 */ + 9772 "10000000" // /* MW 2 */ + 9773 "00010111" // /* MW 1 */ + 9774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9775 "00000000" // /* MW 1 */ + 9776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9777 "00000000" // /* MW 1 */ + 9778 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9779 "00000000" // /* MW 1 */ + 9780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9781 "00000000" // /* MW 1 */ + 9782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9783 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 211 22 first + 9784 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9785 "00101001" // /* MW 3 */ + 9786 "00011100" // /* MW 2 */ + 9787 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 212 24 first + 9788 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9789 "00101110" // /* MW 3 */ + 9790 "00011100" // /* MW 2 */ + 9791 "00000001" // /* MW 1 */ + 9792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9793 "00000000" // /* MW 1 */ + 9794 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9795 "00000000" // /* MW 1 */ + 9796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9797 "00000000" // /* MW 1 */ + 9798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9799 "00000000" // /* MW 1 */ + 9800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9801 "00000000" // /* MW 1 */ + 9802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9803 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 212 22 + 9804 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9805 "00101001" // /* MW 3 */ + 9806 "00011100" // /* MW 2 */ + 9807 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 213 24 first + 9808 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9809 "00101110" // /* MW 3 */ + 9810 "00000100" // /* MW 2 */ + 9811 "00000001" // /* MW 1 */ + 9812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9813 "00000000" // /* MW 1 */ + 9814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9815 "00000000" // /* MW 1 */ + 9816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9817 "00000000" // /* MW 1 */ + 9818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9819 "00000000" // /* MW 1 */ + 9820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9821 "00000000" // /* MW 1 */ + 9822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9823 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 213 22 + 9824 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9825 "00101001" // /* MW 3 */ + 9826 "00011100" // /* MW 2 */ + 9827 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 214 24 first + 9828 "10011000" // LDA r3, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9829 "01110110" // /* MW 3 */ + 9830 "00010100" // /* MW 2 */ + 9831 "00000001" // /* MW 1 */ + 9832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9833 "00000000" // /* MW 1 */ + 9834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9835 "00000000" // /* MW 1 */ + 9836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9837 "00000000" // /* MW 1 */ + 9838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9839 "00000000" // /* MW 1 */ + 9840 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9841 "00000000" // /* MW 1 */ + 9842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9843 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 214 22 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9844 "10011000" // ST r3, [p0], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9845 "01110001" // /* MW 3 */ + 9846 "01001100" // /* MW 2 */ + 9847 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 34 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9848 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9849 "00010111" // /* MW 3 */ + 9850 "00000100" // /* MW 2 */ + 9851 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 217 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9852 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9853 "00000000" // /* MW 3 */ + 9854 "00101000" // /* MW 2 */ + 9855 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9856 "01000100" // MOVXM r1, #65280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9857 "00000000" // /* MW 5 */ + 9858 "10111110" // /* MW 4 */ + 9859 "11110000" // /* MW 3 */ + 9860 "00000000" // /* MW 2 */ + 9861 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9862 "10011000" // AND r1, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9863 "00010100" // /* MW 3 */ + 9864 "11000010" // /* MW 2 */ + 9865 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9866 "10011000" // EQ r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9867 "00100111" // /* MW 3 */ + 9868 "01110110" // /* MW 2 */ + 9869 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 36 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9870 "00011000" // SEL.EQZ r0, r0, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9871 "10000010" // /* MW 3 */ + 9872 "00000001" // /* MW 2 */ + 9873 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9874 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_end0 + 9875 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv +.function setup _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv +.src_ref 3 "elementwise_binary_shared.h" 219 +.src_ref 3 "elementwise_binary_shared.h" 219 first +.function_start + 9888 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9889 "00000001" // /* MW 5 */ + 9890 "00000000" // /* MW 4 */ + 9891 "00000000" // /* MW 3 */ + 9892 "00001000" // /* MW 2 */ + 9893 "00000000" // /* MW 1 */ + 9894 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9895 "00111101" // /* MW 3 */ + 9896 "11111000" // /* MW 2 */ + 9897 "00001111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 220 8 first +.no_stack_arguments + 9898 "00000100" // JL #9760 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9760 delay_slots=5 */ + 9899 "00000001" // /* MW 5 */ + 9900 "00000000" // /* MW 4 */ + 9901 "00010000" // /* MW 3 */ + 9902 "00010011" // /* MW 2 */ + 9903 "00000000" // /* MW 1 */ +.delay_slot + 9904 "10011000" // ST p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9905 "10011101" // /* MW 3 */ + 9906 "11111111" // /* MW 2 */ + 9907 "00001111" // /* MW 1 */ +.src_ref 8 "mul_impl.h" 193 25 +.delay_slot + 9908 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9909 "11000000" // /* MW 3 */ + 9910 "01100000" // /* MW 2 */ + 9911 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9913 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9915 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9916 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9917 "01100111" // /* MW 3 */ + 9918 "00000001" // /* MW 2 */ + 9919 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 222 4 +.return_address + 9920 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9921 "00111001" // /* MW 3 */ + 9922 "11111000" // /* MW 2 */ + 9923 "00000111" // /* MW 1 */ + 9924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9925 "00000000" // /* MW 1 */ + 9926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9927 "00000000" // /* MW 1 */ + 9928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9929 "00000000" // /* MW 1 */ + 9930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9931 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9933 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9934 "00011000" // LDA p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9935 "10011001" // /* MW 3 */ + 9936 "11111111" // /* MW 2 */ + 9937 "00000111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 222 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9938 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9939 "00000000" // /* MW 3 */ + 9940 "00101000" // /* MW 2 */ + 9941 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9943 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9945 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9947 "00000000" // /* MW 1 */ +.src_ref 8 "mul_impl.h" 193 25 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9948 "00011000" // MOVX r16, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9949 "00001001" // /* MW 3 */ + 9950 "00100000" // /* MW 2 */ + 9951 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 222 4 +.src_ref 8 "mul_impl.h" 193 25 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9952 "00111010" // ST r16, [p7, #16]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9953 "01110001" // /* MW 9 */ + 9954 "00000000" // /* MW 8 */ + 9955 "00000000" // /* MW 7 */ + 9956 "00000000" // /* MW 6 */ + 9957 "11111110" // /* MW 5 */ + 9958 "00111111" // /* MW 4 */ + 9959 "00110000" // /* MW 3 */ + 9960 "11000010" // /* MW 2 */ +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + 9961 "11101000" // /* MW 1 */ +.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE +.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_begin0 +.function shared_run_backbone _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE +.src_ref 3 "elementwise_binary_shared.h" 107 first +.src_ref 3 "elementwise_binary_shared.h" 119 37 +.src_ref 3 "elementwise_binary_shared.h" 126 34 +.src_ref 3 "elementwise_binary_shared.h" 131 19 +.function_start + 9968 "11111000" // MOV r0, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9969 "11000000" // /* MW 3 */ + 9970 "00010110" // /* MW 2 */ + 9971 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 119 37 first + 9972 "00011000" // ADD.NC p3, r0, #14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9973 "00000111" // /* MW 3 */ + 9974 "01100000" // /* MW 2 */ + 9975 "00011011" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 122 22 first + 9976 "10011000" // LDA.s16 r2, [p3], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9977 "01010010" // /* MW 3 */ + 9978 "00011100" // /* MW 2 */ + 9979 "00000011" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 124 15 first + 9980 "10011000" // LDA r4, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9981 "10010110" // /* MW 3 */ + 9982 "00000100" // /* MW 2 */ + 9983 "00000011" // /* MW 1 */ + 9984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9985 "00000000" // /* MW 1 */ + 9986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9987 "00000000" // /* MW 1 */ + 9988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9989 "00000000" // /* MW 1 */ + 9990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9991 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 124 26 + 9992 "00011000" // MOVX r3, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9993 "00001001" // /* MW 3 */ + 9994 "00000110" // /* MW 2 */ + 9995 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 107 + 9996 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9997 "00000001" // /* MW 5 */ + 9998 "00000000" // /* MW 4 */ + 9999 "00000000" // /* MW 3 */ + 10000 "00010000" // /* MW 2 */ + 10001 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 124 26 + 10002 "10011000" // LTU r3, r3, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10003 "01001100" // /* MW 3 */ + 10004 "11000110" // /* MW 2 */ + 10005 "00010000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 65 25 +.src_ref 3 "elementwise_binary_shared.h" 124 8 + 10006 "10111010" // MOVA r1, #0; JNZ r3, #10160 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10160 delay_slots=5 */ + 10007 "01100000" // /* MW 9 */ + 10008 "00000000" // /* MW 8 */ + 10009 "00010000" // /* MW 7 */ + 10010 "11110110" // /* MW 6 */ + 10011 "00000100" // /* MW 5 */ + 10012 "00000110" // /* MW 4 */ + 10013 "00000000" // /* MW 3 */ + 10014 "00000001" // /* MW 2 */ + 10015 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 65 25 first +.delay_slot + 10016 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10017 "01110010" // /* MW 3 */ + 10018 "00000101" // /* MW 2 */ + 10019 "00011000" // /* MW 1 */ +.delay_slot + 10020 "11111000" // MOV r1, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10021 "11000000" // /* MW 3 */ + 10022 "01011110" // /* MW 2 */ + 10023 "00011000" // /* MW 1 */ +.delay_slot + 10024 "11111000" // MOV p7, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10025 "11100000" // /* MW 3 */ + 10026 "01100101" // /* MW 2 */ + 10027 "00011111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.delay_slot + 10028 "11110100" // PADDB [p7], #-64; MOV p5, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10029 "10000001" // /* MW 5 */ + 10030 "11011101" // /* MW 4 */ + 10031 "00001010" // /* MW 3 */ + 10032 "11110010" // /* MW 2 */ + 10033 "11111111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 119 37 first +.delay_slot + 10034 "00011000" // VST x0, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10035 "00010011" // /* MW 3 */ + 10036 "00000100" // /* MW 2 */ + 10037 "00001111" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first +.src_ref 3 "elementwise_binary_shared.h" 126 34 +.src_ref 3 "elementwise_binary_shared.h" 126 34 +.src_ref 3 "elementwise_binary_shared.h" 131 19 +.src_ref 3 "elementwise_binary_shared.h" 131 19 + 10038 "10111010" // MOVA dj0, #12; MOVS p4, r0; VBCST.16 x0, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10039 "01110010" // /* MW 9 */ + 10040 "10111001" // /* MW 8 */ + 10041 "00000100" // /* MW 7 */ + 10042 "00000000" // /* MW 6 */ + 10043 "00001011" // /* MW 5 */ + 10044 "10000000" // /* MW 4 */ + 10045 "10000100" // /* MW 3 */ + 10046 "10000010" // /* MW 2 */ + 10047 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 126 34 first +.src_ref 3 "elementwise_binary_shared.h" 131 19 first +.src_ref 3 "elementwise_binary_shared.h" 171 16 + 10048 "01010100" // LDA.u8 r0, [p4, dj0]; MOV m2, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10049 "00000001" // /* MW 5 */ + 10050 "00000001" // /* MW 4 */ + 10051 "01010100" // /* MW 3 */ + 10052 "00000001" // /* MW 2 */ + 10053 "10000000" // /* MW 1 */ + 10054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10055 "00000000" // /* MW 1 */ + 10056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10057 "00000000" // /* MW 1 */ + 10058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10059 "00000000" // /* MW 1 */ + 10060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10061 "00000000" // /* MW 1 */ + 10062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10063 "00000000" // /* MW 1 */ + 10064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10065 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 131 12 +.src_ref 3 "elementwise_binary_shared.h" 131 35 + 10066 "10000100" // JNZ r0, #10112 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10112 delay_slots=5 */ + 10067 "00000001" // /* MW 5 */ + 10068 "01000000" // /* MW 4 */ + 10069 "11000000" // /* MW 3 */ + 10070 "00010011" // /* MW 2 */ + 10071 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary_shared.h" 173 18 +.delay_slot + 10072 "10111000" // MOV m0, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10073 "00000000" // /* MW 3 */ + 10074 "00000000" // /* MW 2 */ + 10075 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 166 31 +.delay_slot + 10076 "01000100" // MOVXM p4, #509028 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10077 "11001000" // /* MW 5 */ + 10078 "11001000" // /* MW 4 */ + 10079 "11001000" // /* MW 3 */ + 10080 "00000111" // /* MW 2 */ + 10081 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10083 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10085 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10087 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 169 16 + 10088 "10111010" // MOVA m1, #0; J #10128 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=10128 delay_slots=5 */ + 10089 "00100000" // /* MW 9 */ + 10090 "00000000" // /* MW 8 */ + 10091 "00000000" // /* MW 7 */ + 10092 "11110010" // /* MW 6 */ + 10093 "00000100" // /* MW 5 */ + 10094 "00000000" // /* MW 4 */ + 10095 "10000000" // /* MW 3 */ + 10096 "00000100" // /* MW 2 */ + 10097 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10099 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10100 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10101 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10103 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10105 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.delay_slot + 10106 "00001100" // NOPA; VST x0, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10107 "00100110" // /* MW 5 */ + 10108 "00001000" // /* MW 4 */ + 10109 "11110000" // /* MW 3 */ + 10110 "00101100" // /* MW 2 */ + 10111 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_144 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 169 16 + 10112 "10111000" // MOV m1, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10113 "10000000" // /* MW 3 */ + 10114 "00000000" // /* MW 2 */ + 10115 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "elementwise_binary_shared.h" 171 16 + 10116 "11110110" // NOPA; NOPB; VST x0, [p1]; MOV m2, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10117 "01010000" // /* MW 11 */ + 10118 "00000000" // /* MW 10 */ + 10119 "00000000" // /* MW 9 */ + 10120 "00000001" // /* MW 8 */ + 10121 "00010011" // /* MW 7 */ + 10122 "00000100" // /* MW 6 */ + 10123 "00100001" // /* MW 5 */ + 10124 "00000000" // /* MW 4 */ + 10125 "11110000" // /* MW 3 */ + 10126 "00101100" // /* MW 2 */ + 10127 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_160 + 10128 "10000100" // J #10288 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10288 delay_slots=5 */ + 10129 "00000000" // /* MW 5 */ + 10130 "00000000" // /* MW 4 */ + 10131 "00011000" // /* MW 3 */ + 10132 "00010100" // /* MW 2 */ + 10133 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary_shared.h" 169 16 +.src_ref 3 "elementwise_binary_shared.h" 173 18 +.delay_slot + 10134 "00000010" // MOVS p0, p7; MOV p7, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10135 "01110000" // /* MW 7 */ + 10136 "01100000" // /* MW 6 */ + 10137 "10110000" // /* MW 5 */ + 10138 "00000011" // /* MW 4 */ + 10139 "01100000" // /* MW 3 */ + 10140 "10010001" // /* MW 2 */ + 10141 "00010011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10143 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10145 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10147 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10148 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10149 "10000001" // /* MW 11 */ + 10150 "10101101" // /* MW 10 */ + 10151 "00000000" // /* MW 9 */ + 10152 "00000000" // /* MW 8 */ + 10153 "00000000" // /* MW 7 */ + 10154 "00000000" // /* MW 6 */ + 10155 "00100000" // /* MW 5 */ + 10156 "00000000" // /* MW 4 */ + 10157 "11110000" // /* MW 3 */ + 10158 "00101100" // /* MW 2 */ + 10159 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_192 +.src_ref 3 "elementwise_binary_shared.h" 150 97 + 10160 "00011000" // MOVX r2, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10161 "00001101" // /* MW 3 */ + 10162 "00000100" // /* MW 2 */ + 10163 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 97 first + 10164 "10011000" // EQ r2, r2, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10165 "01000111" // /* MW 3 */ + 10166 "10000100" // /* MW 2 */ + 10167 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 + 10168 "10000100" // JNZ r2, #10208 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10208 delay_slots=5 */ + 10169 "00000001" // /* MW 5 */ + 10170 "01000000" // /* MW 4 */ + 10171 "11110000" // /* MW 3 */ + 10172 "00010011" // /* MW 2 */ + 10173 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.delay_slot + 10174 "01000100" // MOVXM r0, #1065353216 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10175 "00000000" // /* MW 5 */ + 10176 "00100000" // /* MW 4 */ + 10177 "00000000" // /* MW 3 */ + 10178 "10000000" // /* MW 2 */ + 10179 "00111111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.src_ref 3 "elementwise_binary_shared.h" 166 31 +.delay_slot + 10180 "01000100" // MOVXM p4, #509028 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10181 "11001000" // /* MW 5 */ + 10182 "11001000" // /* MW 4 */ + 10183 "11001000" // /* MW 3 */ + 10184 "00000111" // /* MW 2 */ + 10185 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10187 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10189 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10191 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 10192 "11100001" // NOPA; NOPB; NOPS; MOVXM r0, #-1082130432; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10193 "00000000" // /* MW 15 */ + 10194 "00000000" // /* MW 14 */ + 10195 "00010000" // /* MW 13 */ + 10196 "00000000" // /* MW 12 */ + 10197 "00001000" // /* MW 11 */ + 10198 "00000000" // /* MW 10 */ + 10199 "11100000" // /* MW 9 */ + 10200 "00101111" // /* MW 8 */ + 10201 "01011011" // /* MW 7 */ + 10202 "00000001" // /* MW 6 */ + 10203 "00100000" // /* MW 5 */ + 10204 "00000000" // /* MW 4 */ + 10205 "11110000" // /* MW 3 */ + 10206 "00101100" // /* MW 2 */ + 10207 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_240 +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10208 "10111010" // LDA.s8 r0, [p4]; MOVX vaddSign0, #1; MOV dj0, #-66 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10209 "01011000" // /* MW 9 */ + 10210 "10111110" // /* MW 8 */ + 10211 "01000111" // /* MW 7 */ + 10212 "00000000" // /* MW 6 */ + 10213 "11010010" // /* MW 5 */ + 10214 "00000010" // /* MW 4 */ + 10215 "01010000" // /* MW 3 */ + 10216 "10000000" // /* MW 2 */ + 10217 "10000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary_shared.h" 173 18 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10218 "10111000" // MOV m0, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10219 "10000000" // /* MW 3 */ + 10220 "00000000" // /* MW 2 */ + 10221 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 169 16 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10222 "10111000" // MOV m1, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10223 "00000000" // /* MW 3 */ + 10224 "00000000" // /* MW 2 */ + 10225 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 171 16 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10226 "10111000" // MOV m2, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10227 "10000000" // /* MW 3 */ + 10228 "00000000" // /* MW 2 */ + 10229 "00011010" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10231 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10232 "01111000" // VINSERT.32 x0, x0, #0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10233 "00010001" // /* MW 3 */ + 10234 "00000000" // /* MW 2 */ + 10235 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10236 "11010100" // ST.s16 r0, [p5, dj0]; VMOV bmll1, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10237 "00100101" // /* MW 5 */ + 10238 "00000001" // /* MW 4 */ + 10239 "11100010" // /* MW 3 */ + 10240 "00000010" // /* MW 2 */ + 10241 "10100000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10242 "00011000" // MOVX crRnd, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10243 "10000000" // /* MW 3 */ + 10244 "00111010" // /* MW 2 */ + 10245 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10246 "00011000" // VCONV.bf16.fp32 wl0, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10247 "10010110" // /* MW 3 */ + 10248 "01000000" // /* MW 2 */ + 10249 "00001000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10251 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10252 "10111000" // VEXTRACT.16 r0, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10253 "00000001" // /* MW 3 */ + 10254 "00000001" // /* MW 2 */ + 10255 "00011000" // /* MW 1 */ + 10256 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10257 "00000000" // /* MW 1 */ + 10258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10259 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 + 10260 "10011000" // LDA.s16 r0, [p5, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10261 "00010010" // /* MW 3 */ + 10262 "00000000" // /* MW 2 */ + 10263 "00000101" // /* MW 1 */ + 10264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10265 "00000000" // /* MW 1 */ + 10266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10267 "00000000" // /* MW 1 */ + 10268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10269 "00000000" // /* MW 1 */ + 10270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10271 "00000000" // /* MW 1 */ + 10272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10273 "00000000" // /* MW 1 */ + 10274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10275 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first + 10276 "11111000" // VBCST.16 x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10277 "01110010" // /* MW 3 */ + 10278 "00000001" // /* MW 2 */ + 10279 "00011000" // /* MW 1 */ + 10280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10281 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first + 10282 "00001100" // NOPA; VST x0, [sp, #-64] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10283 "01100110" // /* MW 5 */ + 10284 "11111000" // /* MW 4 */ + 10285 "11111111" // /* MW 3 */ + 10286 "00101100" // /* MW 2 */ + 10287 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_320 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary_shared.h" 166 4 first +.src_ref 3 "elementwise_binary_shared.h" 166 31 first +.src_ref 3 "elementwise_binary_shared.h" 169 16 first + 10288 "10110110" // LDA r2, [p3, #-16]; VLDB x1, [p7], m1; MOVXM ls, #10400 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10289 "00010000" // /* MW 11 */ + 10290 "01010000" // /* MW 10 */ + 10291 "01111100" // /* MW 9 */ + 10292 "00001000" // /* MW 8 */ + 10293 "00000000" // /* MW 7 */ + 10294 "00000000" // /* MW 6 */ + 10295 "11101000" // /* MW 5 */ + 10296 "01010000" // /* MW 4 */ + 10297 "11011110" // /* MW 3 */ + 10298 "10001010" // /* MW 2 */ + 10299 "01111000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 166 4 +.src_ref 3 "elementwise_binary_shared.h" 166 31 +.src_ref 3 "elementwise_binary_shared.h" 171 16 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 10300 "10110110" // MOVA r3, #-5; VLDB x0, [p1], m2; MOVXM le, #10448 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10301 "00010000" // /* MW 11 */ + 10302 "01101000" // /* MW 10 */ + 10303 "10111100" // /* MW 9 */ + 10304 "00001001" // /* MW 8 */ + 10305 "00000000" // /* MW 7 */ + 10306 "00000000" // /* MW 6 */ + 10307 "01101000" // /* MW 5 */ + 10308 "10010000" // /* MW 4 */ + 10309 "00000010" // /* MW 3 */ + 10310 "01100011" // /* MW 2 */ + 10311 "11111111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary_shared.h" 169 16 first +.src_ref 3 "elementwise_binary_shared.h" 173 18 first +.src_ref 3 "elementwise_binary_shared.h" 177 44 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10312 "00010010" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;VLDB x1, [p7], m1; MOVX r0, #60 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10313 "11110001" // /* MW 7 */ + 10314 "00000000" // /* MW 6 */ + 10315 "11101000" // /* MW 5 */ + 10316 "01010000" // /* MW 4 */ + 10317 "01111110" // /* MW 3 */ + 10318 "00000101" // /* MW 2 */ + 10319 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 166 31 first +.src_ref 3 "elementwise_binary_shared.h" 171 16 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10320 "00111100" // LDA.s8 r4, [p4]; VLDB x0, [p1], m2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10321 "01101000" // /* MW 5 */ + 10322 "10010000" // /* MW 4 */ + 10323 "01010010" // /* MW 3 */ + 10324 "10010000" // /* MW 2 */ + 10325 "10000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10327 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary_shared.h" 173 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10328 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10329 "00101011" // /* MW 3 */ + 10330 "00001000" // /* MW 2 */ + 10331 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10333 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 166 31 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10334 "10011000" // LSHL r2, r2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10335 "00111101" // /* MW 3 */ + 10336 "10000100" // /* MW 2 */ + 10337 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 166 4 +.src_ref 3 "elementwise_binary_shared.h" 177 44 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10338 "01100010" // ADD.NC lc, r2, #-3; VMAC.f dm1, dm0, x1, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10339 "00000001" // /* MW 7 */ + 10340 "00000010" // /* MW 6 */ + 10341 "00000001" // /* MW 5 */ + 10342 "10000110" // /* MW 4 */ + 10343 "01111110" // /* MW 3 */ + 10344 "01110001" // /* MW 2 */ + 10345 "00000101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary_shared.h" 169 16 first +.src_ref 3 "elementwise_binary_shared.h" 171 16 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10346 "00111100" // VLDA x0, [p1], m2; VLDB x1, [p7], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10347 "11101000" // /* MW 5 */ + 10348 "01010000" // /* MW 4 */ + 10349 "01111110" // /* MW 3 */ + 10350 "00000011" // /* MW 2 */ + 10351 "00101001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary_shared.h" 173 18 first +.src_ref 3 "elementwise_binary_shared.h" 185 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10352 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; MOVX crRnd, r4; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10353 "00000000" // /* MW 15 */ + 10354 "00000000" // /* MW 14 */ + 10355 "01111000" // /* MW 13 */ + 10356 "10100101" // /* MW 12 */ + 10357 "00000001" // /* MW 11 */ + 10358 "00000000" // /* MW 10 */ + 10359 "11010100" // /* MW 9 */ + 10360 "00001001" // /* MW 8 */ + 10361 "01011011" // /* MW 7 */ + 10362 "00000001" // /* MW 6 */ + 10363 "00100000" // /* MW 5 */ + 10364 "00000000" // /* MW 4 */ + 10365 "01110000" // /* MW 3 */ + 10366 "00000101" // /* MW 2 */ + 10367 "00000001" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10368 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10369 "00000000" // /* MW 15 */ + 10370 "00000000" // /* MW 14 */ + 10371 "01111000" // /* MW 13 */ + 10372 "10100101" // /* MW 12 */ + 10373 "00000001" // /* MW 11 */ + 10374 "00000000" // /* MW 10 */ + 10375 "00000000" // /* MW 9 */ + 10376 "00000000" // /* MW 8 */ + 10377 "01011011" // /* MW 7 */ + 10378 "00000001" // /* MW 6 */ + 10379 "00100000" // /* MW 5 */ + 10380 "00000000" // /* MW 4 */ + 10381 "11110000" // /* MW 3 */ + 10382 "00101100" // /* MW 2 */ + 10383 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 177 44 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10384 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10385 "00010000" // /* MW 15 */ + 10386 "00001000" // /* MW 14 */ + 10387 "01111000" // /* MW 13 */ + 10388 "10100101" // /* MW 12 */ + 10389 "00000001" // /* MW 11 */ + 10390 "00000000" // /* MW 10 */ + 10391 "00000000" // /* MW 9 */ + 10392 "00000000" // /* MW 8 */ + 10393 "01011011" // /* MW 7 */ + 10394 "00000001" // /* MW 6 */ + 10395 "00100000" // /* MW 5 */ + 10396 "00000000" // /* MW 4 */ + 10397 "11110000" // /* MW 3 */ + 10398 "00101100" // /* MW 2 */ + 10399 "00000000" // /* MW 1 */ +.label ZLS_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_432 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary_shared.h" 169 16 first +.src_ref 3 "elementwise_binary_shared.h" 171 16 first +.begin_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 10400 "11100001" // VLDA x0, [p1], m2; VLDB x1, [p7], m1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10401 "00000000" // /* MW 15 */ + 10402 "00000000" // /* MW 14 */ + 10403 "01111000" // /* MW 13 */ + 10404 "10100101" // /* MW 12 */ + 10405 "00000001" // /* MW 11 */ + 10406 "00000000" // /* MW 10 */ + 10407 "00000000" // /* MW 9 */ + 10408 "00000000" // /* MW 8 */ + 10409 "01011011" // /* MW 7 */ + 10410 "00000001" // /* MW 6 */ + 10411 "11101000" // /* MW 5 */ + 10412 "01010000" // /* MW 4 */ + 10413 "01111110" // /* MW 3 */ + 10414 "00000011" // /* MW 2 */ + 10415 "00101001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary_shared.h" 173 18 first +.src_ref 3 "elementwise_binary_shared.h" 185 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10416 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10417 "00000000" // /* MW 15 */ + 10418 "00000000" // /* MW 14 */ + 10419 "01111000" // /* MW 13 */ + 10420 "10100101" // /* MW 12 */ + 10421 "00000001" // /* MW 11 */ + 10422 "00000000" // /* MW 10 */ + 10423 "00000000" // /* MW 9 */ + 10424 "00000000" // /* MW 8 */ + 10425 "10100011" // /* MW 7 */ + 10426 "00011100" // /* MW 6 */ + 10427 "00100010" // /* MW 5 */ + 10428 "00000000" // /* MW 4 */ + 10429 "01110000" // /* MW 3 */ + 10430 "00000101" // /* MW 2 */ + 10431 "00000001" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10432 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10433 "00000000" // /* MW 15 */ + 10434 "00000000" // /* MW 14 */ + 10435 "01111000" // /* MW 13 */ + 10436 "10100101" // /* MW 12 */ + 10437 "00000001" // /* MW 11 */ + 10438 "00000000" // /* MW 10 */ + 10439 "00000000" // /* MW 9 */ + 10440 "00000000" // /* MW 8 */ + 10441 "01011011" // /* MW 7 */ + 10442 "00000001" // /* MW 6 */ + 10443 "00100000" // /* MW 5 */ + 10444 "00000000" // /* MW 4 */ + 10445 "11110000" // /* MW 3 */ + 10446 "00101100" // /* MW 2 */ + 10447 "00000000" // /* MW 1 */ +.label ZLE_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_480 +.src_ref 3 "elementwise_binary_shared.h" 177 44 first +.end_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10448 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10449 "00010000" // /* MW 15 */ + 10450 "00001000" // /* MW 14 */ + 10451 "01111000" // /* MW 13 */ + 10452 "10100101" // /* MW 12 */ + 10453 "00000001" // /* MW 11 */ + 10454 "00000000" // /* MW 10 */ + 10455 "00000000" // /* MW 9 */ + 10456 "00000000" // /* MW 8 */ + 10457 "01011011" // /* MW 7 */ + 10458 "00000001" // /* MW 6 */ + 10459 "00100000" // /* MW 5 */ + 10460 "00000000" // /* MW 4 */ + 10461 "11110000" // /* MW 3 */ + 10462 "00101100" // /* MW 2 */ + 10463 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 187 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 10464 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10465 "00000001" // /* MW 5 */ + 10466 "00000000" // /* MW 4 */ + 10467 "00000000" // /* MW 3 */ + 10468 "11110000" // /* MW 2 */ + 10469 "11111111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary_shared.h" 185 18 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10470 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10471 "10100011" // /* MW 3 */ + 10472 "00011100" // /* MW 2 */ + 10473 "00001010" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 10474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10475 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 177 44 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 10476 "01001000" // VMAC.f dm1, dm0, x1, x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10477 "00000001" // /* MW 3 */ + 10478 "00000010" // /* MW 2 */ + 10479 "00000001" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 10480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10481 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 187 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 10482 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10483 "00000000" // /* MW 3 */ + 10484 "00101000" // /* MW 2 */ + 10485 "00010000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary_shared.h" 185 18 first +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10486 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10487 "10100011" // /* MW 3 */ + 10488 "00011100" // /* MW 2 */ + 10489 "00001010" // /* MW 1 */ +.delay_slot + 10490 "11111000" // MOV p7, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10491 "10100000" // /* MW 3 */ + 10492 "01100000" // /* MW 2 */ + 10493 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10495 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary_shared.h" 185 18 +.delay_slot + 10496 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10497 "10100011" // /* MW 3 */ + 10498 "00011100" // /* MW 2 */ + 10499 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE__end +.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_end0 + 10501 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E +.function run _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E +.src_ref 4 "vector.hpp" 538 13 +.src_ref 3 "elementwise_binary_shared.h" 237 first +.src_ref 3 "elementwise_binary_shared.h" 244 19 +.src_ref 3 "elementwise_binary_shared.h" 245 12 +.src_ref 3 "elementwise_binary_shared.h" 247 12 +.src_ref 3 "elementwise_binary_shared.h" 250 4 +.function_start + 10512 "10111010" // MOVA dj0, #12; MOVS p3, p2; MOV dc0, lr /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10513 "01110010" // /* MW 9 */ + 10514 "11110000" // /* MW 8 */ + 10515 "01100000" // /* MW 7 */ + 10516 "00000000" // /* MW 6 */ + 10517 "10001011" // /* MW 5 */ + 10518 "10001000" // /* MW 4 */ + 10519 "10000011" // /* MW 3 */ + 10520 "10000010" // /* MW 2 */ + 10521 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 538 13 first +.src_ref 3 "elementwise_binary_shared.h" 244 19 first +.src_ref 3 "elementwise_binary_shared.h" 245 12 +.src_ref 3 "elementwise_binary_shared.h" 247 12 + 10522 "11010100" // LDA.u8 r0, [p2, dj0]; MOV p2, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10523 "10000001" // /* MW 5 */ + 10524 "11000101" // /* MW 4 */ + 10525 "01010100" // /* MW 3 */ + 10526 "00000001" // /* MW 2 */ + 10527 "01000000" // /* MW 1 */ + 10528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10529 "00000000" // /* MW 1 */ + 10530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10531 "00000000" // /* MW 1 */ + 10532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10533 "00000000" // /* MW 1 */ + 10534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10535 "00000000" // /* MW 1 */ + 10536 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10537 "00000000" // /* MW 1 */ + 10538 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10539 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 244 12 +.src_ref 3 "elementwise_binary_shared.h" 244 35 + 10540 "10000100" // JZ r0, #10608 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10608 delay_slots=5 */ + 10541 "00000001" // /* MW 5 */ + 10542 "00000000" // /* MW 4 */ + 10543 "10111000" // /* MW 3 */ + 10544 "00010100" // /* MW 2 */ + 10545 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 237 +.delay_slot + 10546 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10547 "00000001" // /* MW 5 */ + 10548 "00000000" // /* MW 4 */ + 10549 "00000000" // /* MW 3 */ + 10550 "00001000" // /* MW 2 */ + 10551 "00000000" // /* MW 1 */ +.delay_slot + 10552 "11111000" // MOV r1, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10553 "11100000" // /* MW 3 */ + 10554 "01010101" // /* MW 2 */ + 10555 "00011000" // /* MW 1 */ +.delay_slot + 10556 "00011000" // ADD.NC p1, r1, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10557 "11100000" // /* MW 3 */ + 10558 "01100000" // /* MW 2 */ + 10559 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 538 13 +.src_ref 4 "vector_native_types.hpp" 374 137 first +.delay_slot + 10560 "00011000" // VST sfh, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10561 "00101011" // /* MW 3 */ + 10562 "00000111" // /* MW 2 */ + 10563 "00001001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10565 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 247 12 first +.no_stack_arguments + 10566 "00000100" // JL #9968 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9968 delay_slots=5 */ + 10567 "00000001" // /* MW 5 */ + 10568 "00000000" // /* MW 4 */ + 10569 "01111000" // /* MW 3 */ + 10570 "00010011" // /* MW 2 */ + 10571 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10572 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10573 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10575 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10577 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10579 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10580 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10581 "10000001" // /* MW 11 */ + 10582 "10101101" // /* MW 10 */ + 10583 "00000000" // /* MW 9 */ + 10584 "00000000" // /* MW 8 */ + 10585 "00000000" // /* MW 7 */ + 10586 "00000000" // /* MW 6 */ + 10587 "00100000" // /* MW 5 */ + 10588 "00000000" // /* MW 4 */ + 10589 "11110000" // /* MW 3 */ + 10590 "00101100" // /* MW 2 */ + 10591 "00000000" // /* MW 1 */ +.return_address + 10592 "10000100" // J #10640 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10640 delay_slots=5 */ + 10593 "00000000" // /* MW 5 */ + 10594 "00000000" // /* MW 4 */ + 10595 "11001000" // /* MW 3 */ + 10596 "00010100" // /* MW 2 */ + 10597 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10598 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10599 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10601 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10603 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10605 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10607 "00000000" // /* MW 1 */ +.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_96 +.src_ref 3 "elementwise_binary_shared.h" 245 12 first +.no_stack_arguments + 10608 "00000100" // JL #9968 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9968 delay_slots=5 */ + 10609 "00000001" // /* MW 5 */ + 10610 "00000000" // /* MW 4 */ + 10611 "01111000" // /* MW 3 */ + 10612 "00010011" // /* MW 2 */ + 10613 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 245 12 +.src_ref 3 "elementwise_binary_shared.h" 245 12 +.delay_slot + 10614 "00000010" // MOVS p0, p1; MOV p1, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10615 "01110000" // /* MW 7 */ + 10616 "01100000" // /* MW 6 */ + 10617 "10110000" // /* MW 5 */ + 10618 "00000000" // /* MW 4 */ + 10619 "01100000" // /* MW 3 */ + 10620 "10010001" // /* MW 2 */ + 10621 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10623 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10625 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10627 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10628 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10629 "10000001" // /* MW 11 */ + 10630 "10101101" // /* MW 10 */ + 10631 "00000000" // /* MW 9 */ + 10632 "00000000" // /* MW 8 */ + 10633 "00000000" // /* MW 7 */ + 10634 "00000000" // /* MW 6 */ + 10635 "00100000" // /* MW 5 */ + 10636 "00000000" // /* MW 4 */ + 10637 "11110000" // /* MW 3 */ + 10638 "00101100" // /* MW 2 */ + 10639 "00000000" // /* MW 1 */ +.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_128 +.src_ref 3 "elementwise_binary_shared.h" 250 4 +.return_address + 10640 "11111000" // MOV lr, dc0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10641 "10000000" // /* MW 3 */ + 10642 "01110001" // /* MW 2 */ + 10643 "00011111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 250 4 first + 10644 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10645 "00000000" // /* MW 3 */ + 10646 "00101000" // /* MW 2 */ + 10647 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 250 4 +.delay_slot + 10648 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10649 "00000001" // /* MW 5 */ + 10650 "00000000" // /* MW 4 */ + 10651 "00000000" // /* MW 3 */ + 10652 "11111000" // /* MW 2 */ + 10653 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10655 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10657 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10659 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_end0 + 10661 "00000000" // /* MW 1 */ +.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_mul1d_attribute_broadcasting _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 7 "superkernels.cpp" 205 first +.src_ref 7 "superkernels.cpp" 210 6 +.function_start + 10672 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10673 "10000000" // /* MW 5 */ + 10674 "11001000" // /* MW 4 */ + 10675 "11000110" // /* MW 3 */ + 10676 "00000111" // /* MW 2 */ + 10677 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 210 6 first + 10678 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10679 "11000001" // /* MW 5 */ + 10680 "10110101" // /* MW 4 */ + 10681 "11011000" // /* MW 3 */ + 10682 "11000010" // /* MW 2 */ + 10683 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 205 + 10684 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10685 "00000001" // /* MW 5 */ + 10686 "00000000" // /* MW 4 */ + 10687 "00000000" // /* MW 3 */ + 10688 "00001000" // /* MW 2 */ + 10689 "00000000" // /* MW 1 */ + 10690 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10691 "01110000" // /* MW 7 */ + 10692 "11010000" // /* MW 6 */ + 10693 "00001011" // /* MW 5 */ + 10694 "00000000" // /* MW 4 */ + 10695 "10110000" // /* MW 3 */ + 10696 "01100011" // /* MW 2 */ + 10697 "11111111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 207 11 + 10698 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10699 "00010001" // /* MW 9 */ + 10700 "00101000" // /* MW 8 */ + 10701 "00110010" // /* MW 7 */ + 10702 "11110011" // /* MW 6 */ + 10703 "00000001" // /* MW 5 */ + 10704 "00000000" // /* MW 4 */ + 10705 "10110000" // /* MW 3 */ + 10706 "10000010" // /* MW 2 */ + 10707 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 10708 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10709 "11000000" // /* MW 3 */ + 10710 "11010100" // /* MW 2 */ + 10711 "00011011" // /* MW 1 */ + 10712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10713 "00000000" // /* MW 1 */ + 10714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10715 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 210 6 +.src_ref 7 "superkernels.cpp" 210 16 + 10716 "10000100" // JNZ r16, #10880 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10880 delay_slots=5 */ + 10717 "00000001" // /* MW 5 */ + 10718 "01000000" // /* MW 4 */ + 10719 "01000000" // /* MW 3 */ + 10720 "00010101" // /* MW 2 */ + 10721 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 207 22 first +.delay_slot + 10722 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10723 "10010000" // /* MW 3 */ + 10724 "01100010" // /* MW 2 */ + 10725 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 207 30 +.delay_slot + 10726 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10727 "11111011" // /* MW 3 */ + 10728 "01100011" // /* MW 2 */ + 10729 "00010100" // /* MW 1 */ +.delay_slot + 10730 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10731 "00111101" // /* MW 3 */ + 10732 "11110100" // /* MW 2 */ + 10733 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 207 11 +.src_ref 1 "io_buffer_main.h" 125 25 +.delay_slot + 10734 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10735 "01110000" // /* MW 7 */ + 10736 "01100000" // /* MW 6 */ + 10737 "00110000" // /* MW 5 */ + 10738 "00000011" // /* MW 4 */ + 10739 "00110000" // /* MW 3 */ + 10740 "11000110" // /* MW 2 */ + 10741 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 213 4 +.src_ref 7 "superkernels.cpp" 224 2 +.delay_slot + 10742 "01000100" // MOVXM p0, #509248 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10743 "10000000" // /* MW 5 */ + 10744 "11001010" // /* MW 4 */ + 10745 "11000000" // /* MW 3 */ + 10746 "00000111" // /* MW 2 */ + 10747 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 10748 "01000100" // MOVXM p2, #509028 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10749 "11001000" // /* MW 5 */ + 10750 "11001000" // /* MW 4 */ + 10751 "11000100" // /* MW 3 */ + 10752 "00000111" // /* MW 2 */ + 10753 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10754 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10755 "00010000" // /* MW 9 */ + 10756 "00110000" // /* MW 8 */ + 10757 "00110010" // /* MW 7 */ + 10758 "11110001" // /* MW 6 */ + 10759 "00000001" // /* MW 5 */ + 10760 "00000000" // /* MW 4 */ + 10761 "11100000" // /* MW 3 */ + 10762 "11000000" // /* MW 2 */ + 10763 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10765 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 213 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 10766 "00000100" // JL #9888 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9888 delay_slots=5 */ + 10767 "00000001" // /* MW 5 */ + 10768 "00000000" // /* MW 4 */ + 10769 "01010000" // /* MW 3 */ + 10770 "00010011" // /* MW 2 */ + 10771 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10773 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10775 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10776 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10777 "00110001" // /* MW 3 */ + 10778 "00100000" // /* MW 2 */ + 10779 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 10780 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10781 "00000101" // /* MW 3 */ + 10782 "00100000" // /* MW 2 */ + 10783 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 10784 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10785 "00000000" // /* MW 15 */ + 10786 "00000000" // /* MW 14 */ + 10787 "01111000" // /* MW 13 */ + 10788 "10100101" // /* MW 12 */ + 10789 "00000001" // /* MW 11 */ + 10790 "00000000" // /* MW 10 */ + 10791 "00000000" // /* MW 9 */ + 10792 "10000000" // /* MW 8 */ + 10793 "00010001" // /* MW 7 */ + 10794 "00000110" // /* MW 6 */ + 10795 "00100010" // /* MW 5 */ + 10796 "00000000" // /* MW 4 */ + 10797 "11110000" // /* MW 3 */ + 10798 "00101100" // /* MW 2 */ + 10799 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 217 18 +.return_address + 10800 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10801 "10100000" // /* MW 5 */ + 10802 "11001000" // /* MW 4 */ + 10803 "11000100" // /* MW 3 */ + 10804 "00000111" // /* MW 2 */ + 10805 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 217 18 first +.src_ref 7 "superkernels.cpp" 217 65 + 10806 "10111010" // LDA r16, [p2]; MOVXM p2, #509248 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10807 "00010000" // /* MW 9 */ + 10808 "10100000" // /* MW 8 */ + 10809 "00110010" // /* MW 7 */ + 10810 "11110001" // /* MW 6 */ + 10811 "00000001" // /* MW 5 */ + 10812 "00000000" // /* MW 4 */ + 10813 "11010000" // /* MW 3 */ + 10814 "11000010" // /* MW 2 */ + 10815 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 215 51 +.src_ref 7 "superkernels.cpp" 217 65 +.src_ref 7 "superkernels.cpp" 224 2 + 10816 "10111010" // LDA r17, [p2]; MOVXM p2, #509248 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10817 "00010000" // /* MW 9 */ + 10818 "10100000" // /* MW 8 */ + 10819 "00110010" // /* MW 7 */ + 10820 "11110001" // /* MW 6 */ + 10821 "00000001" // /* MW 5 */ + 10822 "00000000" // /* MW 4 */ + 10823 "11010000" // /* MW 3 */ + 10824 "11000110" // /* MW 2 */ + 10825 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 215 51 first +.src_ref 7 "superkernels.cpp" 217 16 +.src_ref 7 "superkernels.cpp" 222 47 + 10826 "10111010" // LDA.u16 r18, [p2, #10]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10827 "00010000" // /* MW 9 */ + 10828 "00101010" // /* MW 8 */ + 10829 "10110010" // /* MW 7 */ + 10830 "11110000" // /* MW 6 */ + 10831 "00000001" // /* MW 5 */ + 10832 "00000000" // /* MW 4 */ + 10833 "01010000" // /* MW 3 */ + 10834 "11001011" // /* MW 2 */ + 10835 "01001010" // /* MW 1 */ + 10836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10837 "00000000" // /* MW 1 */ + 10838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10839 "00000000" // /* MW 1 */ + 10840 "10000100" // J #10896 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10896 delay_slots=5 */ + 10841 "00000000" // /* MW 5 */ + 10842 "00000000" // /* MW 4 */ + 10843 "01001000" // /* MW 3 */ + 10844 "00010101" // /* MW 2 */ + 10845 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 215 13 +.delay_slot + 10846 "01000100" // MOVXM p0, #509020 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10847 "10111000" // /* MW 5 */ + 10848 "11001000" // /* MW 4 */ + 10849 "11000000" // /* MW 3 */ + 10850 "00000111" // /* MW 2 */ + 10851 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10853 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 217 27 first +.delay_slot + 10854 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10855 "00001111" // /* MW 3 */ + 10856 "01100001" // /* MW 2 */ + 10857 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 215 13 first +.delay_slot + 10858 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10859 "10100011" // /* MW 5 */ + 10860 "00001100" // /* MW 4 */ + 10861 "11110000" // /* MW 3 */ + 10862 "00101100" // /* MW 2 */ + 10863 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 217 16 first +.delay_slot + 10864 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10865 "00000000" // /* MW 15 */ + 10866 "00000000" // /* MW 14 */ + 10867 "01111000" // /* MW 13 */ + 10868 "10100101" // /* MW 12 */ + 10869 "00000001" // /* MW 11 */ + 10870 "00000000" // /* MW 10 */ + 10871 "00000000" // /* MW 9 */ + 10872 "10000000" // /* MW 8 */ + 10873 "00010001" // /* MW 7 */ + 10874 "00000110" // /* MW 6 */ + 10875 "00100001" // /* MW 5 */ + 10876 "00000000" // /* MW 4 */ + 10877 "11110000" // /* MW 3 */ + 10878 "00101100" // /* MW 2 */ + 10879 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 7 "superkernels.cpp" 222 47 +.src_ref 7 "superkernels.cpp" 224 2 + 10880 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10881 "00000000" // /* MW 15 */ + 10882 "00000000" // /* MW 14 */ + 10883 "00010000" // /* MW 13 */ + 10884 "00101010" // /* MW 12 */ + 10885 "10110010" // /* MW 11 */ + 10886 "11110000" // /* MW 10 */ + 10887 "00000001" // /* MW 9 */ + 10888 "00000000" // /* MW 8 */ + 10889 "10001011" // /* MW 7 */ + 10890 "10000000" // /* MW 6 */ + 10891 "00100010" // /* MW 5 */ + 10892 "00000000" // /* MW 4 */ + 10893 "11110000" // /* MW 3 */ + 10894 "00101100" // /* MW 2 */ + 10895 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 218 49 first + 10896 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10897 "00000000" // /* MW 7 */ + 10898 "11000011" // /* MW 6 */ + 10899 "10110011" // /* MW 5 */ + 10900 "00000011" // /* MW 4 */ + 10901 "01100000" // /* MW 3 */ + 10902 "10010001" // /* MW 2 */ + 10903 "01110011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 221 2 +.src_ref 1 "io_buffer_main.h" 218 49 + 10904 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10905 "00010000" // /* MW 9 */ + 10906 "00100000" // /* MW 8 */ + 10907 "00110010" // /* MW 7 */ + 10908 "11110000" // /* MW 6 */ + 10909 "00000001" // /* MW 5 */ + 10910 "00000000" // /* MW 4 */ + 10911 "11010000" // /* MW 3 */ + 10912 "11101110" // /* MW 2 */ + 10913 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 10914 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10915 "00010110" // /* MW 3 */ + 10916 "11111110" // /* MW 2 */ + 10917 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 10918 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10919 "00110110" // /* MW 3 */ + 10920 "11111110" // /* MW 2 */ + 10921 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 10922 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10923 "01010110" // /* MW 3 */ + 10924 "01000110" // /* MW 2 */ + 10925 "00000111" // /* MW 1 */ + 10926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10927 "00000000" // /* MW 1 */ + 10928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10929 "00000000" // /* MW 1 */ + 10930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10931 "00000000" // /* MW 1 */ + 10932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10933 "00000000" // /* MW 1 */ + 10934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10935 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 10936 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10937 "00000010" // /* MW 3 */ + 10938 "01100001" // /* MW 2 */ + 10939 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 10940 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10941 "00010001" // /* MW 3 */ + 10942 "00000110" // /* MW 2 */ + 10943 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 + 10944 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10945 "11111101" // /* MW 3 */ + 10946 "11100000" // /* MW 2 */ + 10947 "00010111" // /* MW 1 */ + 10948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10949 "00000000" // /* MW 1 */ + 10950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10951 "00000000" // /* MW 1 */ + 10952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10953 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 10954 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10955 "00001000" // /* MW 3 */ + 10956 "10010011" // /* MW 2 */ + 10957 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 222 45 + 10958 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10959 "10000001" // /* MW 5 */ + 10960 "10101101" // /* MW 4 */ + 10961 "10100111" // /* MW 3 */ + 10962 "00000000" // /* MW 2 */ + 10963 "00000100" // /* MW 1 */ + 10964 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10965 "00000000" // /* MW 1 */ + 10966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10967 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 221 2 first + 10968 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10969 "00110110" // /* MW 3 */ + 10970 "00000110" // /* MW 2 */ + 10971 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 1 "io_buffer_main.h" 324 51 + 10972 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10973 "10000001" // /* MW 5 */ + 10974 "11011101" // /* MW 4 */ + 10975 "11011100" // /* MW 3 */ + 10976 "11001010" // /* MW 2 */ + 10977 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 222 47 first + 10978 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10979 "01110110" // /* MW 3 */ + 10980 "00000110" // /* MW 2 */ + 10981 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 10982 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10983 "10011110" // /* MW 3 */ + 10984 "01011100" // /* MW 2 */ + 10985 "00000111" // /* MW 1 */ + 10986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10987 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 224 2 first +.no_stack_arguments + 10988 "00000100" // JL #10512 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10512 delay_slots=5 */ + 10989 "00000001" // /* MW 5 */ + 10990 "00000000" // /* MW 4 */ + 10991 "10001000" // /* MW 3 */ + 10992 "00010100" // /* MW 2 */ + 10993 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10995 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 221 2 first +.delay_slot + 10996 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10997 "00000111" // /* MW 3 */ + 10998 "01100010" // /* MW 2 */ + 10999 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 221 2 +.delay_slot + 11000 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11001 "00110001" // /* MW 3 */ + 11002 "00000110" // /* MW 2 */ + 11003 "00001000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 222 45 first +.delay_slot + 11004 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11005 "00001101" // /* MW 3 */ + 11006 "11100001" // /* MW 2 */ + 11007 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 222 45 +.delay_slot + 11008 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11009 "00000000" // /* MW 15 */ + 11010 "00000000" // /* MW 14 */ + 11011 "10101000" // /* MW 13 */ + 11012 "10100000" // /* MW 12 */ + 11013 "00110100" // /* MW 11 */ + 11014 "00000000" // /* MW 10 */ + 11015 "00000000" // /* MW 9 */ + 11016 "00000000" // /* MW 8 */ + 11017 "01011011" // /* MW 7 */ + 11018 "00000001" // /* MW 6 */ + 11019 "00100000" // /* MW 5 */ + 11020 "00000000" // /* MW 4 */ + 11021 "11110000" // /* MW 3 */ + 11022 "00101100" // /* MW 2 */ + 11023 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 6 +.src_ref 7 "superkernels.cpp" 227 14 +.src_ref 1 "io_buffer_main.h" 324 51 first +.return_address + 11024 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11025 "00010000" // /* MW 9 */ + 11026 "00100000" // /* MW 8 */ + 11027 "00110010" // /* MW 7 */ + 11028 "11110011" // /* MW 6 */ + 11029 "00000001" // /* MW 5 */ + 11030 "00000000" // /* MW 4 */ + 11031 "11010000" // /* MW 3 */ + 11032 "11000110" // /* MW 2 */ + 11033 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 + 11034 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11035 "00000101" // /* MW 3 */ + 11036 "00100000" // /* MW 2 */ + 11037 "00010000" // /* MW 1 */ + 11038 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11039 "00000000" // /* MW 1 */ + 11040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11041 "00000000" // /* MW 1 */ + 11042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11043 "00000000" // /* MW 1 */ + 11044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11045 "00000000" // /* MW 1 */ + 11046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11047 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 11048 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11049 "00001000" // /* MW 3 */ + 11050 "01010001" // /* MW 2 */ + 11051 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 19 +.src_ref 1 "io_buffer_main.h" 327 40 first + 11052 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11053 "00010000" // /* MW 9 */ + 11054 "00101110" // /* MW 8 */ + 11055 "00110010" // /* MW 7 */ + 11056 "11110001" // /* MW 6 */ + 11057 "00000001" // /* MW 5 */ + 11058 "00000000" // /* MW 4 */ + 11059 "11010000" // /* MW 3 */ + 11060 "11001110" // /* MW 2 */ + 11061 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 6 first + 11062 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11063 "00110110" // /* MW 3 */ + 11064 "00000110" // /* MW 2 */ + 11065 "00000110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 19 + 11066 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11067 "01010110" // /* MW 3 */ + 11068 "00000110" // /* MW 2 */ + 11069 "00000010" // /* MW 1 */ + 11070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11071 "00000000" // /* MW 1 */ + 11072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11073 "00000000" // /* MW 1 */ + 11074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11075 "00000000" // /* MW 1 */ + 11076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11077 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 11078 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11079 "00110001" // /* MW 3 */ + 11080 "00100001" // /* MW 2 */ + 11081 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 11082 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11083 "00010001" // /* MW 3 */ + 11084 "11100110" // /* MW 2 */ + 11085 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 16 first + 11086 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11087 "00101000" // /* MW 3 */ + 11088 "01100001" // /* MW 2 */ + 11089 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 6 + 11090 "10000100" // JNZ r16, #11120 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11120 delay_slots=5 */ + 11091 "00000001" // /* MW 5 */ + 11092 "01000000" // /* MW 4 */ + 11093 "10111000" // /* MW 3 */ + 11094 "00010101" // /* MW 2 */ + 11095 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11097 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11099 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11100 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11101 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11103 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11105 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 227 14 + 11106 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11107 "00000001" // /* MW 3 */ + 11108 "00100000" // /* MW 2 */ + 11109 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 227 14 first + 11110 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11111 "00000000" // /* MW 9 */ + 11112 "00000000" // /* MW 8 */ + 11113 "00000000" // /* MW 7 */ + 11114 "10000000" // /* MW 6 */ + 11115 "00010001" // /* MW 5 */ + 11116 "00000110" // /* MW 4 */ + 11117 "11110110" // /* MW 3 */ + 11118 "00101100" // /* MW 2 */ + 11119 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 7 "superkernels.cpp" 229 + 11120 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11121 "00111001" // /* MW 3 */ + 11122 "11110100" // /* MW 2 */ + 11123 "00000111" // /* MW 1 */ + 11124 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11125 "00011001" // /* MW 3 */ + 11126 "11111011" // /* MW 2 */ + 11127 "00000111" // /* MW 1 */ + 11128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11129 "00000000" // /* MW 1 */ + 11130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11131 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 11132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11133 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 11134 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11135 "11110001" // /* MW 3 */ + 11136 "11111101" // /* MW 2 */ + 11137 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11139 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 229 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11140 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11141 "00000000" // /* MW 3 */ + 11142 "00101000" // /* MW 2 */ + 11143 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11144 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11145 "10100000" // /* MW 3 */ + 11146 "01100111" // /* MW 2 */ + 11147 "00011111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 229 +.delay_slot + 11148 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11149 "00000001" // /* MW 5 */ + 11150 "00000000" // /* MW 4 */ + 11151 "00000000" // /* MW 3 */ + 11152 "11111000" // /* MW 2 */ + 11153 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11155 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11157 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 11159 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv +.function shared_setup_backbone _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv +.src_ref 3 "elementwise_binary_shared.h" 205 first +.src_ref 3 "elementwise_binary_shared.h" 211 24 first +.src_ref 3 "elementwise_binary_shared.h" 216 36 +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.function_start + 11168 "10111010" // LDA el0, [p1], #4; MOVX r2, #256; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11169 "01011000" // /* MW 9 */ + 11170 "00000000" // /* MW 8 */ + 11171 "00001000" // /* MW 7 */ + 11172 "00001011" // /* MW 6 */ + 11173 "00100000" // /* MW 5 */ + 11174 "00001000" // /* MW 4 */ + 11175 "11010000" // /* MW 3 */ + 11176 "10000101" // /* MW 2 */ + 11177 "00100011" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 36 + 11178 "00011000" // MOVX r0, #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11179 "00000001" // /* MW 3 */ + 11180 "10000000" // /* MW 2 */ + 11181 "00010111" // /* MW 1 */ + 11182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11183 "00000000" // /* MW 1 */ + 11184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11185 "00000000" // /* MW 1 */ + 11186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11187 "00000000" // /* MW 1 */ + 11188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11189 "00000000" // /* MW 1 */ + 11190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11191 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 211 22 first + 11192 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11193 "00101001" // /* MW 3 */ + 11194 "00011100" // /* MW 2 */ + 11195 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 212 24 first + 11196 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11197 "00101110" // /* MW 3 */ + 11198 "00011100" // /* MW 2 */ + 11199 "00000001" // /* MW 1 */ + 11200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11201 "00000000" // /* MW 1 */ + 11202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11203 "00000000" // /* MW 1 */ + 11204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11205 "00000000" // /* MW 1 */ + 11206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11207 "00000000" // /* MW 1 */ + 11208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11209 "00000000" // /* MW 1 */ + 11210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11211 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 212 22 + 11212 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11213 "00101001" // /* MW 3 */ + 11214 "00011100" // /* MW 2 */ + 11215 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 213 24 first + 11216 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11217 "00101110" // /* MW 3 */ + 11218 "00000100" // /* MW 2 */ + 11219 "00000001" // /* MW 1 */ + 11220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11221 "00000000" // /* MW 1 */ + 11222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11223 "00000000" // /* MW 1 */ + 11224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11225 "00000000" // /* MW 1 */ + 11226 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11227 "00000000" // /* MW 1 */ + 11228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11229 "00000000" // /* MW 1 */ + 11230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11231 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 213 22 + 11232 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11233 "00101001" // /* MW 3 */ + 11234 "00011100" // /* MW 2 */ + 11235 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 214 24 first + 11236 "10011000" // LDA r3, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11237 "01110110" // /* MW 3 */ + 11238 "00010100" // /* MW 2 */ + 11239 "00000001" // /* MW 1 */ + 11240 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11241 "00000000" // /* MW 1 */ + 11242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11243 "00000000" // /* MW 1 */ + 11244 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11245 "00000000" // /* MW 1 */ + 11246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11247 "00000000" // /* MW 1 */ + 11248 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11249 "00000000" // /* MW 1 */ + 11250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11251 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 214 22 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11252 "10011000" // ST r3, [p0], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11253 "01110001" // /* MW 3 */ + 11254 "01001100" // /* MW 2 */ + 11255 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 34 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11256 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11257 "00010111" // /* MW 3 */ + 11258 "00000100" // /* MW 2 */ + 11259 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 217 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11260 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11261 "00000000" // /* MW 3 */ + 11262 "00101000" // /* MW 2 */ + 11263 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11264 "01000100" // MOVXM r1, #65280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11265 "00000000" // /* MW 5 */ + 11266 "10111110" // /* MW 4 */ + 11267 "11110000" // /* MW 3 */ + 11268 "00000000" // /* MW 2 */ + 11269 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11270 "10011000" // AND r1, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11271 "00010100" // /* MW 3 */ + 11272 "11000010" // /* MW 2 */ + 11273 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11274 "10011000" // EQ r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11275 "00100111" // /* MW 3 */ + 11276 "01110110" // /* MW 2 */ + 11277 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 36 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11278 "00011000" // SEL.EQZ r0, r0, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11279 "10000010" // /* MW 3 */ + 11280 "00000001" // /* MW 2 */ + 11281 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_end0 + 11283 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 3 "elementwise_binary_shared.h" 219 +.src_ref 3 "elementwise_binary_shared.h" 219 first +.function_start + 11296 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11297 "00000001" // /* MW 5 */ + 11298 "00000000" // /* MW 4 */ + 11299 "00000000" // /* MW 3 */ + 11300 "00001000" // /* MW 2 */ + 11301 "00000000" // /* MW 1 */ + 11302 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11303 "00111101" // /* MW 3 */ + 11304 "11111000" // /* MW 2 */ + 11305 "00001111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 220 8 first +.no_stack_arguments + 11306 "00000100" // JL #11168 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11168 delay_slots=5 */ + 11307 "00000001" // /* MW 5 */ + 11308 "00000000" // /* MW 4 */ + 11309 "11010000" // /* MW 3 */ + 11310 "00010101" // /* MW 2 */ + 11311 "00000000" // /* MW 1 */ +.delay_slot + 11312 "11111000" // MOV r0, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11313 "10100000" // /* MW 3 */ + 11314 "00010111" // /* MW 2 */ + 11315 "00011000" // /* MW 1 */ +.delay_slot + 11316 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11317 "00010101" // /* MW 3 */ + 11318 "11111100" // /* MW 2 */ + 11319 "00001111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 220 8 +.delay_slot + 11320 "11111000" // MOV r15, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11321 "11000000" // /* MW 3 */ + 11322 "11010000" // /* MW 2 */ + 11323 "00011011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11325 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11327 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 220 8 +.src_ref 3 "elementwise_binary_shared.h" 222 4 +.src_ref 8 "add_impl.h" 146 29 +.return_address + 11328 "10111010" // LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11329 "00001000" // /* MW 9 */ + 11330 "11000100" // /* MW 8 */ + 11331 "00110011" // /* MW 7 */ + 11332 "01101000" // /* MW 6 */ + 11333 "00000000" // /* MW 5 */ + 11334 "00000001" // /* MW 4 */ + 11335 "00100000" // /* MW 3 */ + 11336 "00000111" // /* MW 2 */ + 11337 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 146 29 +.src_ref 8 "add_impl.h" 147 37 +.src_ref 8 "add_impl.h" 147 39 + 11338 "10111010" // MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11339 "01011000" // /* MW 9 */ + 11340 "11111101" // /* MW 8 */ + 11341 "00000111" // /* MW 7 */ + 11342 "00001000" // /* MW 6 */ + 11343 "10000000" // /* MW 5 */ + 11344 "00000001" // /* MW 4 */ + 11345 "10000000" // /* MW 3 */ + 11346 "11100010" // /* MW 2 */ + 11347 "00000001" // /* MW 1 */ +.src_ref 8 "add_impl.h" 146 29 first +.src_ref 8 "add_impl.h" 147 39 + 11348 "01111010" // LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11349 "00000001" // /* MW 9 */ + 11350 "10100000" // /* MW 8 */ + 11351 "00000111" // /* MW 7 */ + 11352 "10000000" // /* MW 6 */ + 11353 "00010001" // /* MW 5 */ + 11354 "00001010" // /* MW 4 */ + 11355 "00100000" // /* MW 3 */ + 11356 "10111110" // /* MW 2 */ + 11357 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 50 first + 11358 "10011000" // LDA.u8 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11359 "01001010" // /* MW 3 */ + 11360 "00000110" // /* MW 2 */ + 11361 "00000000" // /* MW 1 */ + 11362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11363 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11365 "00000000" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 37 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11366 "00011000" // ST.s16 r16, [p0, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11367 "00010111" // /* MW 3 */ + 11368 "00000010" // /* MW 2 */ + 11369 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 222 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11370 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11371 "00000000" // /* MW 3 */ + 11372 "00101000" // /* MW 2 */ + 11373 "00010000" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 54 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11374 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11375 "00000101" // /* MW 3 */ + 11376 "00100010" // /* MW 2 */ + 11377 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 222 4 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11378 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11379 "00000001" // /* MW 5 */ + 11380 "00000000" // /* MW 4 */ + 11381 "00000000" // /* MW 3 */ + 11382 "11111000" // /* MW 2 */ + 11383 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 54 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11384 "10011000" // EQ r27, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11385 "00100111" // /* MW 3 */ + 11386 "01110111" // /* MW 2 */ + 11387 "00010100" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 39 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11388 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11389 "10000010" // /* MW 3 */ + 11390 "00100001" // /* MW 2 */ + 11391 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 11393 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.src_ref 3 "elementwise_binary_shared.h" 227 first +.src_ref 3 "elementwise_binary_shared.h" 232 8 first +.tail_call +.function_start + 11408 "10000100" // J #9968 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=9968 delay_slots=5 */ + 11409 "00000000" // /* MW 5 */ + 11410 "00000000" // /* MW 4 */ + 11411 "01111000" // /* MW 3 */ + 11412 "00010011" // /* MW 2 */ + 11413 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11415 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11417 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11419 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11421 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 11423 "00000000" // /* MW 1 */ +.label __Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.label _Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.function superkernel_add1d _Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.src_ref 7 "superkernels.cpp" 332 first +.src_ref 7 "superkernels.cpp" 337 6 +.function_start + 11424 "01000100" // MOVXM p4, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11425 "10000000" // /* MW 5 */ + 11426 "11001000" // /* MW 4 */ + 11427 "11001000" // /* MW 3 */ + 11428 "00000111" // /* MW 2 */ + 11429 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 337 6 first + 11430 "11010100" // LDA r16, [p4]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11431 "11000001" // /* MW 5 */ + 11432 "10110101" // /* MW 4 */ + 11433 "11011000" // /* MW 3 */ + 11434 "11000010" // /* MW 2 */ + 11435 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 332 + 11436 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11437 "00000001" // /* MW 5 */ + 11438 "00000000" // /* MW 4 */ + 11439 "00000000" // /* MW 3 */ + 11440 "00001000" // /* MW 2 */ + 11441 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 334 22 first +.src_ref 1 "io_buffer_main.h" 218 49 + 11442 "00111010" // ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11443 "01111001" // /* MW 9 */ + 11444 "01100000" // /* MW 8 */ + 11445 "11001010" // /* MW 7 */ + 11446 "10000001" // /* MW 6 */ + 11447 "00010100" // /* MW 5 */ + 11448 "00100011" // /* MW 4 */ + 11449 "10110000" // /* MW 3 */ + 11450 "00111010" // /* MW 2 */ + 11451 "11111111" // /* MW 1 */ + 11452 "00000010" // ST p0, [sp, #-20]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11453 "01110000" // /* MW 7 */ + 11454 "11010000" // /* MW 6 */ + 11455 "00001011" // /* MW 5 */ + 11456 "00000000" // /* MW 4 */ + 11457 "10110000" // /* MW 3 */ + 11458 "10000011" // /* MW 2 */ + 11459 "11111101" // /* MW 1 */ + 11460 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11461 "00010101" // /* MW 3 */ + 11462 "11111100" // /* MW 2 */ + 11463 "00001111" // /* MW 1 */ + 11464 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11465 "00111101" // /* MW 3 */ + 11466 "11110000" // /* MW 2 */ + 11467 "00001111" // /* MW 1 */ + 11468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11469 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 337 6 first +.src_ref 7 "superkernels.cpp" 337 16 first + 11470 "10000100" // JNZ r16, #11616 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11616 delay_slots=5 */ + 11471 "00000001" // /* MW 5 */ + 11472 "01000000" // /* MW 4 */ + 11473 "10110000" // /* MW 3 */ + 11474 "00010110" // /* MW 2 */ + 11475 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 334 30 first +.delay_slot + 11476 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11477 "11111011" // /* MW 3 */ + 11478 "01100011" // /* MW 2 */ + 11479 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 334 11 +.delay_slot + 11480 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11481 "10100000" // /* MW 5 */ + 11482 "11001000" // /* MW 4 */ + 11483 "11000100" // /* MW 3 */ + 11484 "00000111" // /* MW 2 */ + 11485 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 334 11 +.delay_slot + 11486 "00000010" // ST r17, [p2]; MOV p2, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11487 "01110000" // /* MW 7 */ + 11488 "01100000" // /* MW 6 */ + 11489 "00110111" // /* MW 5 */ + 11490 "00000001" // /* MW 4 */ + 11491 "00110000" // /* MW 3 */ + 11492 "11000110" // /* MW 2 */ + 11493 "01000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 +.delay_slot + 11494 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11495 "11000000" // /* MW 3 */ + 11496 "11010110" // /* MW 2 */ + 11497 "00011011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 340 4 +.src_ref 7 "superkernels.cpp" 342 28 +.src_ref 7 "superkernels.cpp" 344 42 +.src_ref 7 "superkernels.cpp" 356 2 +.delay_slot + 11498 "00111010" // ST p2, [sp, #-12]; MOVXM p7, #509312 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11499 "00010001" // /* MW 9 */ + 11500 "11000000" // /* MW 8 */ + 11501 "10110010" // /* MW 7 */ + 11502 "11110011" // /* MW 6 */ + 11503 "00000001" // /* MW 5 */ + 11504 "00000000" // /* MW 4 */ + 11505 "10110000" // /* MW 3 */ + 11506 "10100011" // /* MW 2 */ + 11507 "11111110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 340 4 +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11508 "00111010" // MOVS p0, p7; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11509 "00010001" // /* MW 9 */ + 11510 "00110010" // /* MW 8 */ + 11511 "00110010" // /* MW 7 */ + 11512 "11110001" // /* MW 6 */ + 11513 "00000001" // /* MW 5 */ + 11514 "00000000" // /* MW 4 */ + 11515 "01100000" // /* MW 3 */ + 11516 "10010001" // /* MW 2 */ + 11517 "00010011" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11518 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11519 "00010000" // /* MW 9 */ + 11520 "00110000" // /* MW 8 */ + 11521 "00110010" // /* MW 7 */ + 11522 "11110001" // /* MW 6 */ + 11523 "00000001" // /* MW 5 */ + 11524 "00000000" // /* MW 4 */ + 11525 "11100000" // /* MW 3 */ + 11526 "11000000" // /* MW 2 */ + 11527 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11529 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 340 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 11530 "00000100" // JL #11296 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11296 delay_slots=5 */ + 11531 "00000001" // /* MW 5 */ + 11532 "00000000" // /* MW 4 */ + 11533 "00010000" // /* MW 3 */ + 11534 "00010110" // /* MW 2 */ + 11535 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11536 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11537 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11538 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11539 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11540 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11541 "00110001" // /* MW 3 */ + 11542 "00100000" // /* MW 2 */ + 11543 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 11544 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11545 "00000101" // /* MW 3 */ + 11546 "00100000" // /* MW 2 */ + 11547 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 11548 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11549 "00010001" // /* MW 3 */ + 11550 "00000110" // /* MW 2 */ + 11551 "00001010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 344 18 +.src_ref 7 "superkernels.cpp" 344 42 first +.return_address + 11552 "10111010" // LDA r16, [p7]; MOVXM p1, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11553 "00010000" // /* MW 9 */ + 11554 "00101000" // /* MW 8 */ + 11555 "10110010" // /* MW 7 */ + 11556 "11110000" // /* MW 6 */ + 11557 "00000001" // /* MW 5 */ + 11558 "00000000" // /* MW 4 */ + 11559 "11010000" // /* MW 3 */ + 11560 "11000010" // /* MW 2 */ + 11561 "11100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 344 16 +.src_ref 7 "superkernels.cpp" 344 18 +.src_ref 7 "superkernels.cpp" 353 48 + 11562 "10111010" // LDA r17, [p1]; MOVXM p3, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11563 "00010000" // /* MW 9 */ + 11564 "00101010" // /* MW 8 */ + 11565 "10110010" // /* MW 7 */ + 11566 "11110001" // /* MW 6 */ + 11567 "00000001" // /* MW 5 */ + 11568 "00000000" // /* MW 4 */ + 11569 "11010000" // /* MW 3 */ + 11570 "11000110" // /* MW 2 */ + 11571 "00100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 342 28 first +.src_ref 7 "superkernels.cpp" 345 16 +.src_ref 7 "superkernels.cpp" 354 48 + 11572 "10111010" // LDA.u16 r18, [p7, #10]; MOVXM p1, #509016 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11573 "00010000" // /* MW 9 */ + 11574 "00101100" // /* MW 8 */ + 11575 "10110010" // /* MW 7 */ + 11576 "11110000" // /* MW 6 */ + 11577 "00000001" // /* MW 5 */ + 11578 "00000000" // /* MW 4 */ + 11579 "01010000" // /* MW 3 */ + 11580 "11001011" // /* MW 2 */ + 11581 "11101010" // /* MW 1 */ + 11582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11583 "00000000" // /* MW 1 */ + 11584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11585 "00000000" // /* MW 1 */ + 11586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11587 "00000000" // /* MW 1 */ + 11588 "10000100" // J #11632 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11632 delay_slots=5 */ + 11589 "00000000" // /* MW 5 */ + 11590 "00000000" // /* MW 4 */ + 11591 "10111000" // /* MW 3 */ + 11592 "00010110" // /* MW 2 */ + 11593 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 342 13 +.delay_slot + 11594 "01000100" // MOVXM p2, #509020 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11595 "10111000" // /* MW 5 */ + 11596 "11001000" // /* MW 4 */ + 11597 "11000100" // /* MW 3 */ + 11598 "00000111" // /* MW 2 */ + 11599 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 344 27 first +.delay_slot + 11600 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11601 "00001111" // /* MW 3 */ + 11602 "01100001" // /* MW 2 */ + 11603 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 342 13 first +.delay_slot + 11604 "10011000" // ST r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11605 "01010001" // /* MW 3 */ + 11606 "00000110" // /* MW 2 */ + 11607 "00001010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 344 16 first +.delay_slot + 11608 "10011000" // ST r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11609 "00010001" // /* MW 3 */ + 11610 "00000110" // /* MW 2 */ + 11611 "00001011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 345 16 first +.delay_slot + 11612 "10011000" // ST r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11613 "00010001" // /* MW 3 */ + 11614 "00000110" // /* MW 2 */ + 11615 "00001001" // /* MW 1 */ +.label TGT_F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 +.src_ref 7 "superkernels.cpp" 353 48 + 11616 "01000100" // MOVXM p3, #509012 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11617 "10101000" // /* MW 5 */ + 11618 "11001000" // /* MW 4 */ + 11619 "11000110" // /* MW 3 */ + 11620 "00000111" // /* MW 2 */ + 11621 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 354 48 + 11622 "10111010" // NOPA; MOVXM p1, #509016 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11623 "00010000" // /* MW 9 */ + 11624 "00101100" // /* MW 8 */ + 11625 "10110010" // /* MW 7 */ + 11626 "11110000" // /* MW 6 */ + 11627 "00000001" // /* MW 5 */ + 11628 "00000000" // /* MW 4 */ + 11629 "11110000" // /* MW 3 */ + 11630 "00101100" // /* MW 2 */ + 11631 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 +.src_ref 1 "io_buffer_main.h" 218 49 first + 11632 "00011000" // ADD.NC p0, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11633 "10000110" // /* MW 3 */ + 11634 "01100111" // /* MW 2 */ + 11635 "00011000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 348 2 +.src_ref 1 "io_buffer_main.h" 218 49 + 11636 "10111010" // LDA r27, [p0], #-4; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11637 "00010000" // /* MW 9 */ + 11638 "00100000" // /* MW 8 */ + 11639 "00110010" // /* MW 7 */ + 11640 "11110001" // /* MW 6 */ + 11641 "00000001" // /* MW 5 */ + 11642 "00000000" // /* MW 4 */ + 11643 "11010000" // /* MW 3 */ + 11644 "11101110" // /* MW 2 */ + 11645 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 11646 "10011000" // LDA r16, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11647 "00010110" // /* MW 3 */ + 11648 "11111110" // /* MW 2 */ + 11649 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 11650 "10011000" // LDA r17, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11651 "00110110" // /* MW 3 */ + 11652 "11111110" // /* MW 2 */ + 11653 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 348 2 first + 11654 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11655 "01010110" // /* MW 3 */ + 11656 "00000110" // /* MW 2 */ + 11657 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 11658 "10011000" // LDA r19, [p0, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11659 "01110110" // /* MW 3 */ + 11660 "01000110" // /* MW 2 */ + 11661 "00000000" // /* MW 1 */ + 11662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11663 "00000000" // /* MW 1 */ + 11664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11665 "00000000" // /* MW 1 */ + 11666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11667 "00000000" // /* MW 1 */ + 11668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11669 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 11670 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11671 "00000010" // /* MW 3 */ + 11672 "01100001" // /* MW 2 */ + 11673 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 348 2 first +.src_ref 1 "io_buffer_main.h" 218 20 + 11674 "01011100" // ST r16, [p0]; ADD r16, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11675 "00001110" // /* MW 5 */ + 11676 "01000000" // /* MW 4 */ + 11677 "00111001" // /* MW 3 */ + 11678 "11000010" // /* MW 2 */ + 11679 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 348 2 + 11680 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11681 "00010001" // /* MW 3 */ + 11682 "00000110" // /* MW 2 */ + 11683 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 +.src_ref 1 "io_buffer_main.h" 395 8 + 11684 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11685 "11111101" // /* MW 3 */ + 11686 "11100000" // /* MW 2 */ + 11687 "00010111" // /* MW 1 */ + 11688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11689 "00000000" // /* MW 1 */ + 11690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11691 "00000000" // /* MW 1 */ + 11692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11693 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 11694 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11695 "00001000" // /* MW 3 */ + 11696 "11010011" // /* MW 2 */ + 11697 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 first + 11698 "00011000" // ADD.NC p2, r14, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11699 "00000110" // /* MW 3 */ + 11700 "01100111" // /* MW 2 */ + 11701 "00011010" // /* MW 1 */ + 11702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11703 "00000000" // /* MW 1 */ + 11704 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11705 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 11706 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11707 "01110110" // /* MW 3 */ + 11708 "11111111" // /* MW 2 */ + 11709 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 11710 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11711 "00110110" // /* MW 3 */ + 11712 "11111110" // /* MW 2 */ + 11713 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 11714 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11715 "01010110" // /* MW 3 */ + 11716 "11111110" // /* MW 2 */ + 11717 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 47 first + 11718 "10011000" // LDA r19, [p2, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11719 "01110110" // /* MW 3 */ + 11720 "01010110" // /* MW 2 */ + 11721 "00000010" // /* MW 1 */ + 11722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11723 "00000000" // /* MW 1 */ + 11724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11725 "00000000" // /* MW 1 */ + 11726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11727 "00000000" // /* MW 1 */ + 11728 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11729 "00000000" // /* MW 1 */ + 11730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11731 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 11732 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11733 "00010010" // /* MW 3 */ + 11734 "10100011" // /* MW 2 */ + 11735 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 11736 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11737 "00110001" // /* MW 3 */ + 11738 "00000110" // /* MW 2 */ + 11739 "00001010" // /* MW 1 */ + 11740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11741 "00000000" // /* MW 1 */ + 11742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11743 "00000000" // /* MW 1 */ + 11744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11745 "00000000" // /* MW 1 */ + 11746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11747 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 11748 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11749 "00001000" // /* MW 3 */ + 11750 "11010011" // /* MW 2 */ + 11751 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 353 46 +.src_ref 7 "superkernels.cpp" 354 46 +.src_ref 1 "io_buffer_main.h" 324 32 + 11752 "00111010" // MOVS p6, p2; MOVX r16, #1; MOV r14, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11753 "01111001" // /* MW 9 */ + 11754 "01100000" // /* MW 8 */ + 11755 "11001110" // /* MW 7 */ + 11756 "00101001" // /* MW 6 */ + 11757 "00000000" // /* MW 5 */ + 11758 "00000001" // /* MW 4 */ + 11759 "01100000" // /* MW 3 */ + 11760 "00010001" // /* MW 2 */ + 11761 "11010001" // /* MW 1 */ + 11762 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11763 "00000000" // /* MW 1 */ + 11764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11765 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 + 11766 "00011000" // LDA p4, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11767 "00011001" // /* MW 3 */ + 11768 "11101110" // /* MW 2 */ + 11769 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 353 48 first + 11770 "00001100" // LDA r17, [p3]; ST p0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11771 "00111011" // /* MW 5 */ + 11772 "11011000" // /* MW 4 */ + 11773 "11011111" // /* MW 3 */ + 11774 "11000110" // /* MW 2 */ + 11775 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 354 48 first +.src_ref 7 "superkernels.cpp" 356 2 + 11776 "11010100" // LDA r20, [p1]; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11777 "10000001" // /* MW 5 */ + 11778 "11011101" // /* MW 4 */ + 11779 "11010110" // /* MW 3 */ + 11780 "11010010" // /* MW 2 */ + 11781 "00100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 11782 "10011000" // LDA r18, [p2], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11783 "01010110" // /* MW 3 */ + 11784 "01001110" // /* MW 2 */ + 11785 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 11786 "10011000" // LDA p2, [p0], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11787 "00011110" // /* MW 3 */ + 11788 "01011101" // /* MW 2 */ + 11789 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 +.src_ref 1 "io_buffer_main.h" 327 40 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11790 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11791 "11000000" // /* MW 3 */ + 11792 "01100000" // /* MW 2 */ + 11793 "00011111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11794 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11795 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11796 "10011000" // LDA r19, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11797 "01110110" // /* MW 3 */ + 11798 "00000110" // /* MW 2 */ + 11799 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11801 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 356 2 first +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 11802 "00000100" // JL #11408 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11408 delay_slots=5 */ + 11803 "00000001" // /* MW 5 */ + 11804 "00000000" // /* MW 4 */ + 11805 "01001000" // /* MW 3 */ + 11806 "00010110" // /* MW 2 */ + 11807 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 +.src_ref 1 "io_buffer_main.h" 327 40 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11808 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11809 "11000000" // /* MW 3 */ + 11810 "11010100" // /* MW 2 */ + 11811 "00011011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 353 46 first +.delay_slot + 11812 "10011000" // LSHL r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11813 "00001101" // /* MW 3 */ + 11814 "01100011" // /* MW 2 */ + 11815 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 354 46 first +.delay_slot + 11816 "10011000" // LSHL r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11817 "00001101" // /* MW 3 */ + 11818 "00100001" // /* MW 2 */ + 11819 "00010101" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 354 46 +.delay_slot + 11820 "01011000" // ADD.NC p1, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11821 "01000001" // /* MW 3 */ + 11822 "01101001" // /* MW 2 */ + 11823 "00011001" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 353 46 first +.delay_slot + 11824 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11825 "00000000" // /* MW 15 */ + 11826 "00000000" // /* MW 14 */ + 11827 "10101000" // /* MW 13 */ + 11828 "11100010" // /* MW 12 */ + 11829 "00110100" // /* MW 11 */ + 11830 "00000000" // /* MW 10 */ + 11831 "00000000" // /* MW 9 */ + 11832 "00000000" // /* MW 8 */ + 11833 "01011011" // /* MW 7 */ + 11834 "00000001" // /* MW 6 */ + 11835 "00100000" // /* MW 5 */ + 11836 "00000000" // /* MW 4 */ + 11837 "11110000" // /* MW 3 */ + 11838 "00101100" // /* MW 2 */ + 11839 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 32 first +.src_ref 1 "io_buffer_main.h" 327 28 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 40 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 +.return_address + 11840 "10111010" // LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11841 "01111000" // /* MW 9 */ + 11842 "11010000" // /* MW 8 */ + 11843 "10110011" // /* MW 7 */ + 11844 "00101000" // /* MW 6 */ + 11845 "00000000" // /* MW 5 */ + 11846 "00000001" // /* MW 4 */ + 11847 "11010000" // /* MW 3 */ + 11848 "11000110" // /* MW 2 */ + 11849 "11001000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 360 19 + 11850 "01000100" // MOVXM p6, #509020 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11851 "10111000" // /* MW 5 */ + 11852 "11001000" // /* MW 4 */ + 11853 "11001100" // /* MW 3 */ + 11854 "00000111" // /* MW 2 */ + 11855 "00000000" // /* MW 1 */ + 11856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11857 "00000000" // /* MW 1 */ + 11858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11859 "00000000" // /* MW 1 */ + 11860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11861 "00000000" // /* MW 1 */ + 11862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11863 "00000000" // /* MW 1 */ + 11864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11865 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 11866 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11867 "00001000" // /* MW 3 */ + 11868 "01010001" // /* MW 2 */ + 11869 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 40 first + 11870 "10011000" // LDA r17, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11871 "00110110" // /* MW 3 */ + 11872 "11110110" // /* MW 2 */ + 11873 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 + 11874 "00011000" // LDA p2, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11875 "00011001" // /* MW 3 */ + 11876 "11101101" // /* MW 2 */ + 11877 "00000111" // /* MW 1 */ + 11878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11879 "00000000" // /* MW 1 */ + 11880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11881 "00000000" // /* MW 1 */ + 11882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11883 "00000000" // /* MW 1 */ + 11884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11885 "00000000" // /* MW 1 */ + 11886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11887 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 + 11888 "10011000" // SUB r17, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11889 "00010001" // /* MW 3 */ + 11890 "00100011" // /* MW 2 */ + 11891 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 first +.src_ref 1 "io_buffer_main.h" 327 28 + 11892 "00001100" // LDA r17, [p2, #20]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11893 "01100011" // /* MW 5 */ + 11894 "11101100" // /* MW 4 */ + 11895 "11010011" // /* MW 3 */ + 11896 "11000110" // /* MW 2 */ + 11897 "01001010" // /* MW 1 */ + 11898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11899 "00000000" // /* MW 1 */ + 11900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11901 "00000000" // /* MW 1 */ + 11902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11903 "00000000" // /* MW 1 */ + 11904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11905 "00000000" // /* MW 1 */ + 11906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11907 "00000000" // /* MW 1 */ + 11908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11909 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 11910 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11911 "00001000" // /* MW 3 */ + 11912 "01010001" // /* MW 2 */ + 11913 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 360 6 +.src_ref 7 "superkernels.cpp" 361 14 +.src_ref 1 "io_buffer_main.h" 327 40 first + 11914 "10111010" // LDA r19, [p7, #-8]; MOVXM p1, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11915 "00010000" // /* MW 9 */ + 11916 "00100000" // /* MW 8 */ + 11917 "10110010" // /* MW 7 */ + 11918 "11110000" // /* MW 6 */ + 11919 "00000001" // /* MW 5 */ + 11920 "00000000" // /* MW 4 */ + 11921 "11010000" // /* MW 3 */ + 11922 "11001110" // /* MW 2 */ + 11923 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 360 19 first + 11924 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11925 "01010110" // /* MW 3 */ + 11926 "00000110" // /* MW 2 */ + 11927 "00000110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 360 6 + 11928 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11929 "00110110" // /* MW 3 */ + 11930 "00000110" // /* MW 2 */ + 11931 "00000001" // /* MW 1 */ + 11932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11933 "00000000" // /* MW 1 */ + 11934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11935 "00000000" // /* MW 1 */ + 11936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11937 "00000000" // /* MW 1 */ + 11938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11939 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 11940 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11941 "00110001" // /* MW 3 */ + 11942 "00100001" // /* MW 2 */ + 11943 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 11944 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11945 "00010001" // /* MW 3 */ + 11946 "11100110" // /* MW 2 */ + 11947 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 360 16 first + 11948 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11949 "00101000" // /* MW 3 */ + 11950 "01100001" // /* MW 2 */ + 11951 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 360 6 + 11952 "10000100" // JNZ r16, #11984 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11984 delay_slots=5 */ + 11953 "00000001" // /* MW 5 */ + 11954 "01000000" // /* MW 4 */ + 11955 "01101000" // /* MW 3 */ + 11956 "00010111" // /* MW 2 */ + 11957 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11959 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11961 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11962 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11963 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11964 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11965 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11967 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 361 14 + 11968 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11969 "00000001" // /* MW 3 */ + 11970 "00100000" // /* MW 2 */ + 11971 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 361 14 first + 11972 "00110110" // NOPA; NOPB; ST r16, [p1]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 11973 "11000001" // /* MW 11 */ + 11974 "00001000" // /* MW 10 */ + 11975 "10000011" // /* MW 9 */ + 11976 "00000000" // /* MW 8 */ + 11977 "00000000" // /* MW 7 */ + 11978 "00000000" // /* MW 6 */ + 11979 "00100000" // /* MW 5 */ + 11980 "00000000" // /* MW 4 */ + 11981 "11110000" // /* MW 3 */ + 11982 "00101100" // /* MW 2 */ + 11983 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 +.src_ref 7 "superkernels.cpp" 363 + 11984 "00011000" // LDA lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11985 "00111001" // /* MW 3 */ + 11986 "11110000" // /* MW 2 */ + 11987 "00000111" // /* MW 1 */ + 11988 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11989 "11110001" // /* MW 3 */ + 11990 "11111101" // /* MW 2 */ + 11991 "00000111" // /* MW 1 */ + 11992 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11993 "10011001" // /* MW 3 */ + 11994 "11110111" // /* MW 2 */ + 11995 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 11996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11997 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.noswbrkpt + 11998 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11999 "11010001" // /* MW 3 */ + 12000 "11111001" // /* MW 2 */ + 12001 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 12002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12003 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 12004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12005 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 363 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 12006 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12007 "00000000" // /* MW 3 */ + 12008 "00101000" // /* MW 2 */ + 12009 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12010 "00011000" // MOVS p6, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12011 "00001011" // /* MW 3 */ + 12012 "10001110" // /* MW 2 */ + 12013 "00001110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 363 +.delay_slot + 12014 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12015 "00000001" // /* MW 5 */ + 12016 "00000000" // /* MW 4 */ + 12017 "00000000" // /* MW 3 */ + 12018 "11111000" // /* MW 2 */ + 12019 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12021 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12023 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end +.label __Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 + 12025 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.src_ref 3 "elementwise_binary.h" 141 first +.src_ref 3 "elementwise_binary.h" 142 23 +.src_ref 3 "elementwise_binary.h" 144 4 first +.function_start + 12032 "01100100" // RET lr; MOV r0, #64 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 12033 "00000001" // /* MW 5 */ + 12034 "00100001" // /* MW 4 */ + 12035 "00000000" // /* MW 3 */ + 12036 "00000000" // /* MW 2 */ + 12037 "00000101" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 141 +.delay_slot + 12038 "11111000" // MOV r1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12039 "11000000" // /* MW 3 */ + 12040 "01010000" // /* MW 2 */ + 12041 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 141 +.delay_slot + 12042 "00011000" // ADD.NC p0, r1, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12043 "10010000" // /* MW 3 */ + 12044 "01100000" // /* MW 2 */ + 12045 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 142 23 first +.delay_slot + 12046 "10011000" // ST r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12047 "00010001" // /* MW 3 */ + 12048 "00000100" // /* MW 2 */ + 12049 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 142 23 +.delay_slot + 12050 "10011000" // ST r0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12051 "00010001" // /* MW 3 */ + 12052 "00010100" // /* MW 2 */ + 12053 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_end0 + 12055 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.src_ref 3 "elementwise_binary.h" 130 first +.src_ref 3 "elementwise_binary.h" 133 24 first +.function_start + 12064 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12065 "00101110" // /* MW 3 */ + 12066 "00011100" // /* MW 2 */ + 12067 "00000001" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 130 + 12068 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12069 "00000001" // /* MW 5 */ + 12070 "00000000" // /* MW 4 */ + 12071 "00000000" // /* MW 3 */ + 12072 "00001000" // /* MW 2 */ + 12073 "00000000" // /* MW 1 */ + 12074 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12075 "00111101" // /* MW 3 */ + 12076 "11111100" // /* MW 2 */ + 12077 "00001111" // /* MW 1 */ + 12078 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12079 "00000000" // /* MW 1 */ + 12080 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12081 "00000000" // /* MW 1 */ + 12082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12083 "00000000" // /* MW 1 */ + 12084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12085 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 133 22 first + 12086 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12087 "00101001" // /* MW 3 */ + 12088 "00011100" // /* MW 2 */ + 12089 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 134 24 first + 12090 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12091 "00101110" // /* MW 3 */ + 12092 "00011100" // /* MW 2 */ + 12093 "00000001" // /* MW 1 */ + 12094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12095 "00000000" // /* MW 1 */ + 12096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12097 "00000000" // /* MW 1 */ + 12098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12099 "00000000" // /* MW 1 */ + 12100 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12101 "00000000" // /* MW 1 */ + 12102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12103 "00000000" // /* MW 1 */ + 12104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12105 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 134 22 + 12106 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12107 "00101001" // /* MW 3 */ + 12108 "00011100" // /* MW 2 */ + 12109 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 135 24 first + 12110 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12111 "00101110" // /* MW 3 */ + 12112 "00000100" // /* MW 2 */ + 12113 "00000001" // /* MW 1 */ + 12114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12115 "00000000" // /* MW 1 */ + 12116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12117 "00000000" // /* MW 1 */ + 12118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12119 "00000000" // /* MW 1 */ + 12120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12121 "00000000" // /* MW 1 */ + 12122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12123 "00000000" // /* MW 1 */ + 12124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12125 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 135 22 + 12126 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12127 "00101001" // /* MW 3 */ + 12128 "00011100" // /* MW 2 */ + 12129 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 136 24 first + 12130 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12131 "00101110" // /* MW 3 */ + 12132 "00010100" // /* MW 2 */ + 12133 "00000001" // /* MW 1 */ + 12134 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12135 "00000000" // /* MW 1 */ + 12136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12137 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 137 8 first +.no_stack_arguments + 12138 "00000100" // JL #12032 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12032 delay_slots=5 */ + 12139 "00000001" // /* MW 5 */ + 12140 "00000000" // /* MW 4 */ + 12141 "10000000" // /* MW 3 */ + 12142 "00010111" // /* MW 2 */ + 12143 "00000000" // /* MW 1 */ +.delay_slot + 12144 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12145 "10011101" // /* MW 3 */ + 12146 "11111011" // /* MW 2 */ + 12147 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12149 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12150 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12151 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 136 22 first +.delay_slot + 12152 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12153 "00101001" // /* MW 3 */ + 12154 "11011100" // /* MW 2 */ + 12155 "00001000" // /* MW 1 */ +.src_ref 8 "mul_impl.h" 134 25 +.delay_slot + 12156 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12157 "11000000" // /* MW 3 */ + 12158 "01100000" // /* MW 2 */ + 12159 "00011111" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 +.return_address + 12160 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12161 "00111001" // /* MW 3 */ + 12162 "11111100" // /* MW 2 */ + 12163 "00000111" // /* MW 1 */ + 12164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12165 "00000000" // /* MW 1 */ + 12166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12167 "00000000" // /* MW 1 */ + 12168 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12169 "00000000" // /* MW 1 */ + 12170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12171 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 12172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12173 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.noswbrkpt + 12174 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12175 "10011001" // /* MW 3 */ + 12176 "11111011" // /* MW 2 */ + 12177 "00000111" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12178 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12179 "00000000" // /* MW 3 */ + 12180 "00101000" // /* MW 2 */ + 12181 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12183 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12185 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12187 "00000000" // /* MW 1 */ +.src_ref 8 "mul_impl.h" 134 25 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12188 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12189 "00000001" // /* MW 3 */ + 12190 "00100000" // /* MW 2 */ + 12191 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 +.src_ref 8 "mul_impl.h" 134 25 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12192 "00111010" // ST r16, [p7, #16]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12193 "01110001" // /* MW 9 */ + 12194 "00000000" // /* MW 8 */ + 12195 "00000000" // /* MW 7 */ + 12196 "00000000" // /* MW 6 */ + 12197 "11111110" // /* MW 5 */ + 12198 "00111111" // /* MW 4 */ + 12199 "00110000" // /* MW 3 */ + 12200 "11000010" // /* MW 2 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + 12201 "11101000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.function run _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.src_ref 3 "elementwise_binary.h" 149 first +.src_ref 3 "elementwise_binary.h" 156 37 +.src_ref 3 "elementwise_binary.h" 168 8 first +.function_start + 12208 "10111010" // MOVA m0, #32; MOVXM ls, #12384 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12209 "00010000" // /* MW 9 */ + 12210 "00110000" // /* MW 8 */ + 12211 "01111000" // /* MW 7 */ + 12212 "00001100" // /* MW 6 */ + 12213 "00000000" // /* MW 5 */ + 12214 "00000000" // /* MW 4 */ + 12215 "10000000" // /* MW 3 */ + 12216 "00000000" // /* MW 2 */ + 12217 "00000100" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 156 37 first +.src_ref 3 "elementwise_binary.h" 168 8 first + 12218 "10111010" // LDA r3, [p3], m0; MOVXM le, #12400 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12219 "00010000" // /* MW 9 */ + 12220 "00111000" // /* MW 8 */ + 12221 "10111000" // /* MW 7 */ + 12222 "00001101" // /* MW 6 */ + 12223 "00000000" // /* MW 5 */ + 12224 "00000000" // /* MW 4 */ + 12225 "11010000" // /* MW 3 */ + 12226 "00001110" // /* MW 2 */ + 12227 "01100001" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 156 78 +.src_ref 3 "elementwise_binary.h" 156 78 + 12228 "10111010" // LDA m1, [p3]; MOVX r1, #-6; MOV r0, #828 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12229 "01011000" // /* MW 9 */ + 12230 "00111100" // /* MW 8 */ + 12231 "00001011" // /* MW 7 */ + 12232 "01001000" // /* MW 6 */ + 12233 "00010111" // /* MW 5 */ + 12234 "00111110" // /* MW 4 */ + 12235 "11010000" // /* MW 3 */ + 12236 "10010000" // /* MW 2 */ + 12237 "01100000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 156 78 +.src_ref 3 "elementwise_binary.h" 156 78 + 12238 "10111010" // LDA m0, [p3, #4]; MOVXM p4, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12239 "00010000" // /* MW 9 */ + 12240 "00110010" // /* MW 8 */ + 12241 "00110010" // /* MW 7 */ + 12242 "11110010" // /* MW 6 */ + 12243 "00000001" // /* MW 5 */ + 12244 "00000000" // /* MW 4 */ + 12245 "11010000" // /* MW 3 */ + 12246 "10000000" // /* MW 2 */ + 12247 "01100010" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 156 78 + 12248 "10011000" // LDA.s8 r2, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12249 "01000010" // /* MW 3 */ + 12250 "00000100" // /* MW 2 */ + 12251 "00000100" // /* MW 1 */ + 12252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12253 "00000000" // /* MW 1 */ + 12254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12255 "00000000" // /* MW 1 */ + 12256 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12257 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 156 78 + 12258 "10011000" // LSHL r1, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12259 "00011101" // /* MW 3 */ + 12260 "11000010" // /* MW 2 */ + 12261 "00010000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary.h" 168 8 +.src_ref 3 "elementwise_binary.h" 187 20 first + 12262 "00110100" // VLDB x1, [p0], m1; ADD.NC lc, r1, #-7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12263 "11111001" // /* MW 5 */ + 12264 "11100001" // /* MW 4 */ + 12265 "10001010" // /* MW 3 */ + 12266 "00001110" // /* MW 2 */ + 12267 "00000101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary.h" 189 20 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 12268 "00111100" // VLDA x2, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12269 "01101000" // /* MW 5 */ + 12270 "01010000" // /* MW 4 */ + 12271 "01110000" // /* MW 3 */ + 12272 "00010011" // /* MW 2 */ + 12273 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 195 20 +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 12274 "00010010" // VLDA x3, [p1], m0; VLDB x1, [p0], m1; MOVX crRnd, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12275 "10000000" // /* MW 7 */ + 12276 "10111010" // /* MW 6 */ + 12277 "11101000" // /* MW 5 */ + 12278 "01010000" // /* MW 4 */ + 12279 "01110000" // /* MW 3 */ + 12280 "00011011" // /* MW 2 */ + 12281 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary.h" 189 20 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12282 "00111100" // VLDA x2, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12283 "01101000" // /* MW 5 */ + 12284 "01010000" // /* MW 4 */ + 12285 "01110000" // /* MW 3 */ + 12286 "00010011" // /* MW 2 */ + 12287 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12288 "00111100" // VLDA x3, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12289 "11101000" // /* MW 5 */ + 12290 "01010000" // /* MW 4 */ + 12291 "01110000" // /* MW 3 */ + 12292 "00011011" // /* MW 2 */ + 12293 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12294 "10011000" // VLDA x2, [p1], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12295 "10011011" // /* MW 3 */ + 12296 "00001000" // /* MW 2 */ + 12297 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12298 "00111100" // VLDA x3, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12299 "01101000" // /* MW 5 */ + 12300 "01010000" // /* MW 4 */ + 12301 "01110000" // /* MW 3 */ + 12302 "00011011" // /* MW 2 */ + 12303 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12304 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12305 "11101000" // /* MW 5 */ + 12306 "01010000" // /* MW 4 */ + 12307 "01110000" // /* MW 3 */ + 12308 "00010011" // /* MW 2 */ + 12309 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12310 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12311 "01000001" // /* MW 9 */ + 12312 "11100010" // /* MW 8 */ + 12313 "00000000" // /* MW 7 */ + 12314 "00011101" // /* MW 6 */ + 12315 "00110100" // /* MW 5 */ + 12316 "00101000" // /* MW 4 */ + 12317 "01110000" // /* MW 3 */ + 12318 "00011011" // /* MW 2 */ + 12319 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12320 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12321 "01100001" // /* MW 9 */ + 12322 "11100000" // /* MW 8 */ + 12323 "00000001" // /* MW 7 */ + 12324 "00011101" // /* MW 6 */ + 12325 "01110100" // /* MW 5 */ + 12326 "00101000" // /* MW 4 */ + 12327 "01110000" // /* MW 3 */ + 12328 "00010011" // /* MW 2 */ + 12329 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12330 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12331 "01000001" // /* MW 9 */ + 12332 "11100010" // /* MW 8 */ + 12333 "00000000" // /* MW 7 */ + 12334 "00011101" // /* MW 6 */ + 12335 "00110100" // /* MW 5 */ + 12336 "00101000" // /* MW 4 */ + 12337 "01110000" // /* MW 3 */ + 12338 "00011011" // /* MW 2 */ + 12339 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12340 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12341 "01100001" // /* MW 9 */ + 12342 "11100000" // /* MW 8 */ + 12343 "00000001" // /* MW 7 */ + 12344 "00011101" // /* MW 6 */ + 12345 "01110100" // /* MW 5 */ + 12346 "00101000" // /* MW 4 */ + 12347 "01110000" // /* MW 3 */ + 12348 "00010011" // /* MW 2 */ + 12349 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12350 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12351 "01000001" // /* MW 9 */ + 12352 "11100010" // /* MW 8 */ + 12353 "00000000" // /* MW 7 */ + 12354 "00011101" // /* MW 6 */ + 12355 "00110100" // /* MW 5 */ + 12356 "00101000" // /* MW 4 */ + 12357 "01110000" // /* MW 3 */ + 12358 "00011011" // /* MW 2 */ + 12359 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12360 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12361 "01100001" // /* MW 9 */ + 12362 "11100000" // /* MW 8 */ + 12363 "00000001" // /* MW 7 */ + 12364 "00011101" // /* MW 6 */ + 12365 "01110100" // /* MW 5 */ + 12366 "00101000" // /* MW 4 */ + 12367 "01110000" // /* MW 3 */ + 12368 "00010011" // /* MW 2 */ + 12369 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12370 "01101110" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; VMUL.f dm0, x1, x2, r0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 12371 "01000001" // /* MW 13 */ + 12372 "11100010" // /* MW 12 */ + 12373 "00000000" // /* MW 11 */ + 12374 "10001100" // /* MW 10 */ + 12375 "01110000" // /* MW 9 */ + 12376 "00001000" // /* MW 8 */ + 12377 "00000000" // /* MW 7 */ + 12378 "00000000" // /* MW 6 */ + 12379 "01101000" // /* MW 5 */ + 12380 "01010000" // /* MW 4 */ + 12381 "01110000" // /* MW 3 */ + 12382 "00011011" // /* MW 2 */ + 12383 "00100001" // /* MW 1 */ +.label ZLS_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_176 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.src_ref 3 "elementwise_binary.h" 195 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 12384 "00001011" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; VMUL.f dm1, x0, x3, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12385 "00000011" // /* MW 15 */ + 12386 "00001111" // /* MW 14 */ + 12387 "01111000" // /* MW 13 */ + 12388 "10100101" // /* MW 12 */ + 12389 "00000001" // /* MW 11 */ + 12390 "00000000" // /* MW 10 */ + 12391 "00000000" // /* MW 9 */ + 12392 "00000000" // /* MW 8 */ + 12393 "10100011" // /* MW 7 */ + 12394 "00011100" // /* MW 6 */ + 12395 "11101010" // /* MW 5 */ + 12396 "01010000" // /* MW 4 */ + 12397 "01110000" // /* MW 3 */ + 12398 "00010011" // /* MW 2 */ + 12399 "00100001" // /* MW 1 */ +.label ZLE_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_192 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12400 "00001011" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12401 "00010010" // /* MW 15 */ + 12402 "00000111" // /* MW 14 */ + 12403 "01111000" // /* MW 13 */ + 12404 "10100101" // /* MW 12 */ + 12405 "00000001" // /* MW 11 */ + 12406 "00000000" // /* MW 10 */ + 12407 "00000000" // /* MW 9 */ + 12408 "00000000" // /* MW 8 */ + 12409 "00100011" // /* MW 7 */ + 12410 "00011100" // /* MW 6 */ + 12411 "01101010" // /* MW 5 */ + 12412 "01010000" // /* MW 4 */ + 12413 "01110000" // /* MW 3 */ + 12414 "00011011" // /* MW 2 */ + 12415 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 12416 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12417 "01100001" // /* MW 7 */ + 12418 "11100000" // /* MW 6 */ + 12419 "00000001" // /* MW 5 */ + 12420 "00000010" // /* MW 4 */ + 12421 "01100000" // /* MW 3 */ + 12422 "10010100" // /* MW 2 */ + 12423 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12424 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12425 "01000001" // /* MW 7 */ + 12426 "11100010" // /* MW 6 */ + 12427 "00000000" // /* MW 5 */ + 12428 "00000010" // /* MW 4 */ + 12429 "01100000" // /* MW 3 */ + 12430 "10000100" // /* MW 2 */ + 12431 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12432 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12433 "01100001" // /* MW 7 */ + 12434 "11100000" // /* MW 6 */ + 12435 "00000001" // /* MW 5 */ + 12436 "00000010" // /* MW 4 */ + 12437 "01100000" // /* MW 3 */ + 12438 "10010100" // /* MW 2 */ + 12439 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12440 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12441 "01000001" // /* MW 7 */ + 12442 "11100010" // /* MW 6 */ + 12443 "00000000" // /* MW 5 */ + 12444 "00000010" // /* MW 4 */ + 12445 "01100000" // /* MW 3 */ + 12446 "10000100" // /* MW 2 */ + 12447 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12448 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12449 "01100001" // /* MW 7 */ + 12450 "11100000" // /* MW 6 */ + 12451 "00000001" // /* MW 5 */ + 12452 "00000010" // /* MW 4 */ + 12453 "01100000" // /* MW 3 */ + 12454 "10010100" // /* MW 2 */ + 12455 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12456 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12457 "01000001" // /* MW 7 */ + 12458 "11100010" // /* MW 6 */ + 12459 "00000000" // /* MW 5 */ + 12460 "00000010" // /* MW 4 */ + 12461 "01100000" // /* MW 3 */ + 12462 "10000100" // /* MW 2 */ + 12463 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12464 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12465 "01100001" // /* MW 7 */ + 12466 "11100000" // /* MW 6 */ + 12467 "00000001" // /* MW 5 */ + 12468 "00000010" // /* MW 4 */ + 12469 "01100000" // /* MW 3 */ + 12470 "10010100" // /* MW 2 */ + 12471 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12472 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12473 "00100011" // /* MW 3 */ + 12474 "00011100" // /* MW 2 */ + 12475 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 172 4 first +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12476 "01011100" // VST.CONV.bf16.fp32 cml1, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 12477 "00000000" // /* MW 5 */ + 12478 "01010000" // /* MW 4 */ + 12479 "01100000" // /* MW 3 */ + 12480 "10010100" // /* MW 2 */ + 12481 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12482 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12483 "00100011" // /* MW 3 */ + 12484 "00011100" // /* MW 2 */ + 12485 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 195 20 first +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12486 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12487 "10100011" // /* MW 3 */ + 12488 "00011100" // /* MW 2 */ + 12489 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.delay_slot + 12490 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12491 "00100011" // /* MW 3 */ + 12492 "00011100" // /* MW 2 */ + 12493 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 195 20 first +.delay_slot + 12494 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12495 "10100011" // /* MW 3 */ + 12496 "00011100" // /* MW 2 */ + 12497 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_end0 + 12499 "00000000" // /* MW 1 */ +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.function superkernel_mul1d _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.src_ref 7 "superkernels.cpp" 369 first +.src_ref 7 "superkernels.cpp" 374 6 +.function_start + 12512 "01000100" // MOVXM p4, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12513 "10000000" // /* MW 5 */ + 12514 "11001000" // /* MW 4 */ + 12515 "11001000" // /* MW 3 */ + 12516 "00000111" // /* MW 2 */ + 12517 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 374 6 first + 12518 "11010100" // LDA r16, [p4]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12519 "11000001" // /* MW 5 */ + 12520 "10110101" // /* MW 4 */ + 12521 "11011000" // /* MW 3 */ + 12522 "11000010" // /* MW 2 */ + 12523 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 369 + 12524 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12525 "00000001" // /* MW 5 */ + 12526 "00000000" // /* MW 4 */ + 12527 "00000000" // /* MW 3 */ + 12528 "00001000" // /* MW 2 */ + 12529 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 371 22 first +.src_ref 1 "io_buffer_main.h" 218 49 + 12530 "00111010" // ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12531 "01111001" // /* MW 9 */ + 12532 "01100000" // /* MW 8 */ + 12533 "11001010" // /* MW 7 */ + 12534 "10000001" // /* MW 6 */ + 12535 "00010100" // /* MW 5 */ + 12536 "00100011" // /* MW 4 */ + 12537 "10110000" // /* MW 3 */ + 12538 "00111010" // /* MW 2 */ + 12539 "11111111" // /* MW 1 */ + 12540 "00000010" // ST p0, [sp, #-20]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12541 "01110000" // /* MW 7 */ + 12542 "11010000" // /* MW 6 */ + 12543 "00001011" // /* MW 5 */ + 12544 "00000000" // /* MW 4 */ + 12545 "10110000" // /* MW 3 */ + 12546 "10000011" // /* MW 2 */ + 12547 "11111101" // /* MW 1 */ + 12548 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12549 "00010101" // /* MW 3 */ + 12550 "11111100" // /* MW 2 */ + 12551 "00001111" // /* MW 1 */ + 12552 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12553 "00111101" // /* MW 3 */ + 12554 "11110000" // /* MW 2 */ + 12555 "00001111" // /* MW 1 */ + 12556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12557 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 374 6 first +.src_ref 7 "superkernels.cpp" 374 16 first + 12558 "10000100" // JNZ r16, #12704 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12704 delay_slots=5 */ + 12559 "00000001" // /* MW 5 */ + 12560 "01000000" // /* MW 4 */ + 12561 "11010000" // /* MW 3 */ + 12562 "00011000" // /* MW 2 */ + 12563 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 371 30 first +.delay_slot + 12564 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12565 "11111011" // /* MW 3 */ + 12566 "01100011" // /* MW 2 */ + 12567 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 371 11 +.delay_slot + 12568 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12569 "10100000" // /* MW 5 */ + 12570 "11001000" // /* MW 4 */ + 12571 "11000100" // /* MW 3 */ + 12572 "00000111" // /* MW 2 */ + 12573 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 371 11 +.delay_slot + 12574 "00000010" // ST r17, [p2]; MOV p2, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12575 "01110000" // /* MW 7 */ + 12576 "01100000" // /* MW 6 */ + 12577 "00110111" // /* MW 5 */ + 12578 "00000001" // /* MW 4 */ + 12579 "00110000" // /* MW 3 */ + 12580 "11000110" // /* MW 2 */ + 12581 "01000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 +.delay_slot + 12582 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12583 "11000000" // /* MW 3 */ + 12584 "11010110" // /* MW 2 */ + 12585 "00011011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 377 4 +.src_ref 7 "superkernels.cpp" 379 28 +.src_ref 7 "superkernels.cpp" 381 42 +.src_ref 7 "superkernels.cpp" 393 2 +.delay_slot + 12586 "00111010" // ST p2, [sp, #-12]; MOVXM p7, #509376 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12587 "00010001" // /* MW 9 */ + 12588 "11100000" // /* MW 8 */ + 12589 "10110010" // /* MW 7 */ + 12590 "11110011" // /* MW 6 */ + 12591 "00000001" // /* MW 5 */ + 12592 "00000000" // /* MW 4 */ + 12593 "10110000" // /* MW 3 */ + 12594 "10100011" // /* MW 2 */ + 12595 "11111110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 377 4 +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 12596 "00111010" // MOVS p0, p7; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12597 "00010001" // /* MW 9 */ + 12598 "00110010" // /* MW 8 */ + 12599 "00110010" // /* MW 7 */ + 12600 "11110001" // /* MW 6 */ + 12601 "00000001" // /* MW 5 */ + 12602 "00000000" // /* MW 4 */ + 12603 "01100000" // /* MW 3 */ + 12604 "10010001" // /* MW 2 */ + 12605 "00010011" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 12606 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12607 "00010000" // /* MW 9 */ + 12608 "00110000" // /* MW 8 */ + 12609 "00110010" // /* MW 7 */ + 12610 "11110001" // /* MW 6 */ + 12611 "00000001" // /* MW 5 */ + 12612 "00000000" // /* MW 4 */ + 12613 "11100000" // /* MW 3 */ + 12614 "11000000" // /* MW 2 */ + 12615 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12616 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12617 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 377 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 12618 "00000100" // JL #12064 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12064 delay_slots=5 */ + 12619 "00000001" // /* MW 5 */ + 12620 "00000000" // /* MW 4 */ + 12621 "10010000" // /* MW 3 */ + 12622 "00010111" // /* MW 2 */ + 12623 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12625 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12627 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12628 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12629 "00110001" // /* MW 3 */ + 12630 "00100000" // /* MW 2 */ + 12631 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 12632 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12633 "00000101" // /* MW 3 */ + 12634 "00100000" // /* MW 2 */ + 12635 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 12636 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12637 "00010001" // /* MW 3 */ + 12638 "00000110" // /* MW 2 */ + 12639 "00001010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 381 18 +.src_ref 7 "superkernels.cpp" 381 42 first +.return_address + 12640 "10111010" // LDA r16, [p7]; MOVXM p1, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12641 "00010000" // /* MW 9 */ + 12642 "00101000" // /* MW 8 */ + 12643 "10110010" // /* MW 7 */ + 12644 "11110000" // /* MW 6 */ + 12645 "00000001" // /* MW 5 */ + 12646 "00000000" // /* MW 4 */ + 12647 "11010000" // /* MW 3 */ + 12648 "11000010" // /* MW 2 */ + 12649 "11100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 381 16 +.src_ref 7 "superkernels.cpp" 381 18 +.src_ref 7 "superkernels.cpp" 390 48 + 12650 "10111010" // LDA r17, [p1]; MOVXM p3, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12651 "00010000" // /* MW 9 */ + 12652 "00101010" // /* MW 8 */ + 12653 "10110010" // /* MW 7 */ + 12654 "11110001" // /* MW 6 */ + 12655 "00000001" // /* MW 5 */ + 12656 "00000000" // /* MW 4 */ + 12657 "11010000" // /* MW 3 */ + 12658 "11000110" // /* MW 2 */ + 12659 "00100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 379 28 first +.src_ref 7 "superkernels.cpp" 382 16 +.src_ref 7 "superkernels.cpp" 391 48 + 12660 "10111010" // LDA.u16 r18, [p7, #10]; MOVXM p1, #509016 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12661 "00010000" // /* MW 9 */ + 12662 "00101100" // /* MW 8 */ + 12663 "10110010" // /* MW 7 */ + 12664 "11110000" // /* MW 6 */ + 12665 "00000001" // /* MW 5 */ + 12666 "00000000" // /* MW 4 */ + 12667 "01010000" // /* MW 3 */ + 12668 "11001011" // /* MW 2 */ + 12669 "11101010" // /* MW 1 */ + 12670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12671 "00000000" // /* MW 1 */ + 12672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12673 "00000000" // /* MW 1 */ + 12674 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12675 "00000000" // /* MW 1 */ + 12676 "10000100" // J #12720 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=12720 delay_slots=5 */ + 12677 "00000000" // /* MW 5 */ + 12678 "00000000" // /* MW 4 */ + 12679 "11011000" // /* MW 3 */ + 12680 "00011000" // /* MW 2 */ + 12681 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 379 13 +.delay_slot + 12682 "01000100" // MOVXM p2, #509020 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12683 "10111000" // /* MW 5 */ + 12684 "11001000" // /* MW 4 */ + 12685 "11000100" // /* MW 3 */ + 12686 "00000111" // /* MW 2 */ + 12687 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 381 27 first +.delay_slot + 12688 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12689 "00001111" // /* MW 3 */ + 12690 "01100001" // /* MW 2 */ + 12691 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 379 13 first +.delay_slot + 12692 "10011000" // ST r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12693 "01010001" // /* MW 3 */ + 12694 "00000110" // /* MW 2 */ + 12695 "00001010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 381 16 first +.delay_slot + 12696 "10011000" // ST r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12697 "00010001" // /* MW 3 */ + 12698 "00000110" // /* MW 2 */ + 12699 "00001011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 382 16 first +.delay_slot + 12700 "10011000" // ST r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12701 "00010001" // /* MW 3 */ + 12702 "00000110" // /* MW 2 */ + 12703 "00001001" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 +.src_ref 7 "superkernels.cpp" 390 48 + 12704 "01000100" // MOVXM p3, #509012 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12705 "10101000" // /* MW 5 */ + 12706 "11001000" // /* MW 4 */ + 12707 "11000110" // /* MW 3 */ + 12708 "00000111" // /* MW 2 */ + 12709 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 391 48 + 12710 "10111010" // NOPA; MOVXM p1, #509016 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12711 "00010000" // /* MW 9 */ + 12712 "00101100" // /* MW 8 */ + 12713 "10110010" // /* MW 7 */ + 12714 "11110000" // /* MW 6 */ + 12715 "00000001" // /* MW 5 */ + 12716 "00000000" // /* MW 4 */ + 12717 "11110000" // /* MW 3 */ + 12718 "00101100" // /* MW 2 */ + 12719 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 +.src_ref 1 "io_buffer_main.h" 218 49 first + 12720 "00011000" // ADD.NC p0, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12721 "10000110" // /* MW 3 */ + 12722 "01100111" // /* MW 2 */ + 12723 "00011000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 385 2 +.src_ref 1 "io_buffer_main.h" 218 49 + 12724 "10111010" // LDA r27, [p0], #-4; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12725 "00010000" // /* MW 9 */ + 12726 "00100000" // /* MW 8 */ + 12727 "00110010" // /* MW 7 */ + 12728 "11110001" // /* MW 6 */ + 12729 "00000001" // /* MW 5 */ + 12730 "00000000" // /* MW 4 */ + 12731 "11010000" // /* MW 3 */ + 12732 "11101110" // /* MW 2 */ + 12733 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 12734 "10011000" // LDA r16, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12735 "00010110" // /* MW 3 */ + 12736 "11111110" // /* MW 2 */ + 12737 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 12738 "10011000" // LDA r17, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12739 "00110110" // /* MW 3 */ + 12740 "11111110" // /* MW 2 */ + 12741 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 385 2 first + 12742 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12743 "01010110" // /* MW 3 */ + 12744 "00000110" // /* MW 2 */ + 12745 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 12746 "10011000" // LDA r19, [p0, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12747 "01110110" // /* MW 3 */ + 12748 "01000110" // /* MW 2 */ + 12749 "00000000" // /* MW 1 */ + 12750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12751 "00000000" // /* MW 1 */ + 12752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12753 "00000000" // /* MW 1 */ + 12754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12755 "00000000" // /* MW 1 */ + 12756 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12757 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 12758 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12759 "00000010" // /* MW 3 */ + 12760 "01100001" // /* MW 2 */ + 12761 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 385 2 first +.src_ref 1 "io_buffer_main.h" 218 20 + 12762 "01011100" // ST r16, [p0]; ADD r16, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12763 "00001110" // /* MW 5 */ + 12764 "01000000" // /* MW 4 */ + 12765 "00111001" // /* MW 3 */ + 12766 "11000010" // /* MW 2 */ + 12767 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 385 2 + 12768 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12769 "00010001" // /* MW 3 */ + 12770 "00000110" // /* MW 2 */ + 12771 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 +.src_ref 1 "io_buffer_main.h" 395 8 + 12772 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12773 "11111101" // /* MW 3 */ + 12774 "11100000" // /* MW 2 */ + 12775 "00010111" // /* MW 1 */ + 12776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12777 "00000000" // /* MW 1 */ + 12778 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12779 "00000000" // /* MW 1 */ + 12780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12781 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 12782 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12783 "00001000" // /* MW 3 */ + 12784 "11010011" // /* MW 2 */ + 12785 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 first + 12786 "00011000" // ADD.NC p2, r14, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12787 "00000110" // /* MW 3 */ + 12788 "01100111" // /* MW 2 */ + 12789 "00011010" // /* MW 1 */ + 12790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12791 "00000000" // /* MW 1 */ + 12792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12793 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 12794 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12795 "01110110" // /* MW 3 */ + 12796 "11111111" // /* MW 2 */ + 12797 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 12798 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12799 "00110110" // /* MW 3 */ + 12800 "11111110" // /* MW 2 */ + 12801 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 12802 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12803 "01010110" // /* MW 3 */ + 12804 "11111110" // /* MW 2 */ + 12805 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 47 first + 12806 "10011000" // LDA r19, [p2, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12807 "01110110" // /* MW 3 */ + 12808 "01010110" // /* MW 2 */ + 12809 "00000010" // /* MW 1 */ + 12810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12811 "00000000" // /* MW 1 */ + 12812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12813 "00000000" // /* MW 1 */ + 12814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12815 "00000000" // /* MW 1 */ + 12816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12817 "00000000" // /* MW 1 */ + 12818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12819 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 12820 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12821 "00010010" // /* MW 3 */ + 12822 "10100011" // /* MW 2 */ + 12823 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 12824 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12825 "00110001" // /* MW 3 */ + 12826 "00000110" // /* MW 2 */ + 12827 "00001010" // /* MW 1 */ + 12828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12829 "00000000" // /* MW 1 */ + 12830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12831 "00000000" // /* MW 1 */ + 12832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12833 "00000000" // /* MW 1 */ + 12834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12835 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 12836 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12837 "00001000" // /* MW 3 */ + 12838 "11010011" // /* MW 2 */ + 12839 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 390 46 +.src_ref 7 "superkernels.cpp" 391 46 +.src_ref 1 "io_buffer_main.h" 324 32 + 12840 "00111010" // MOVS p6, p2; MOVX r16, #1; MOV r14, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12841 "01111001" // /* MW 9 */ + 12842 "01100000" // /* MW 8 */ + 12843 "11001110" // /* MW 7 */ + 12844 "00101001" // /* MW 6 */ + 12845 "00000000" // /* MW 5 */ + 12846 "00000001" // /* MW 4 */ + 12847 "01100000" // /* MW 3 */ + 12848 "00010001" // /* MW 2 */ + 12849 "11010001" // /* MW 1 */ + 12850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12851 "00000000" // /* MW 1 */ + 12852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12853 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 + 12854 "00011000" // LDA p4, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12855 "00011001" // /* MW 3 */ + 12856 "11101110" // /* MW 2 */ + 12857 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 390 48 first + 12858 "00001100" // LDA r17, [p3]; ST p0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12859 "00111011" // /* MW 5 */ + 12860 "11011000" // /* MW 4 */ + 12861 "11011111" // /* MW 3 */ + 12862 "11000110" // /* MW 2 */ + 12863 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 391 48 first +.src_ref 7 "superkernels.cpp" 393 2 + 12864 "11010100" // LDA r20, [p1]; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12865 "10000001" // /* MW 5 */ + 12866 "11011101" // /* MW 4 */ + 12867 "11010110" // /* MW 3 */ + 12868 "11010010" // /* MW 2 */ + 12869 "00100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 12870 "10011000" // LDA r18, [p2], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12871 "01010110" // /* MW 3 */ + 12872 "01001110" // /* MW 2 */ + 12873 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 12874 "10011000" // LDA p2, [p0], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12875 "00011110" // /* MW 3 */ + 12876 "01011101" // /* MW 2 */ + 12877 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 +.src_ref 1 "io_buffer_main.h" 327 40 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12878 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12879 "11000000" // /* MW 3 */ + 12880 "01100000" // /* MW 2 */ + 12881 "00011111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12883 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12884 "10011000" // LDA r19, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12885 "01110110" // /* MW 3 */ + 12886 "00000110" // /* MW 2 */ + 12887 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12889 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 393 2 first +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 12890 "00000100" // JL #12208 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12208 delay_slots=5 */ + 12891 "00000001" // /* MW 5 */ + 12892 "00000000" // /* MW 4 */ + 12893 "11011000" // /* MW 3 */ + 12894 "00010111" // /* MW 2 */ + 12895 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 +.src_ref 1 "io_buffer_main.h" 327 40 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12896 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12897 "11000000" // /* MW 3 */ + 12898 "11010100" // /* MW 2 */ + 12899 "00011011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 390 46 first +.delay_slot + 12900 "10011000" // LSHL r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12901 "00001101" // /* MW 3 */ + 12902 "01100011" // /* MW 2 */ + 12903 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 391 46 first +.delay_slot + 12904 "10011000" // LSHL r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12905 "00001101" // /* MW 3 */ + 12906 "00100001" // /* MW 2 */ + 12907 "00010101" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 391 46 +.delay_slot + 12908 "01011000" // ADD.NC p1, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12909 "01000001" // /* MW 3 */ + 12910 "01101001" // /* MW 2 */ + 12911 "00011001" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 390 46 first +.delay_slot + 12912 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12913 "00000000" // /* MW 15 */ + 12914 "00000000" // /* MW 14 */ + 12915 "10101000" // /* MW 13 */ + 12916 "11100010" // /* MW 12 */ + 12917 "00110100" // /* MW 11 */ + 12918 "00000000" // /* MW 10 */ + 12919 "00000000" // /* MW 9 */ + 12920 "00000000" // /* MW 8 */ + 12921 "01011011" // /* MW 7 */ + 12922 "00000001" // /* MW 6 */ + 12923 "00100000" // /* MW 5 */ + 12924 "00000000" // /* MW 4 */ + 12925 "11110000" // /* MW 3 */ + 12926 "00101100" // /* MW 2 */ + 12927 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 32 first +.src_ref 1 "io_buffer_main.h" 327 28 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 40 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 +.return_address + 12928 "10111010" // LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12929 "01111000" // /* MW 9 */ + 12930 "11010000" // /* MW 8 */ + 12931 "10110011" // /* MW 7 */ + 12932 "00101000" // /* MW 6 */ + 12933 "00000000" // /* MW 5 */ + 12934 "00000001" // /* MW 4 */ + 12935 "11010000" // /* MW 3 */ + 12936 "11000110" // /* MW 2 */ + 12937 "11001000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 19 + 12938 "01000100" // MOVXM p6, #509020 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12939 "10111000" // /* MW 5 */ + 12940 "11001000" // /* MW 4 */ + 12941 "11001100" // /* MW 3 */ + 12942 "00000111" // /* MW 2 */ + 12943 "00000000" // /* MW 1 */ + 12944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12945 "00000000" // /* MW 1 */ + 12946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12947 "00000000" // /* MW 1 */ + 12948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12949 "00000000" // /* MW 1 */ + 12950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12951 "00000000" // /* MW 1 */ + 12952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12953 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 12954 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12955 "00001000" // /* MW 3 */ + 12956 "01010001" // /* MW 2 */ + 12957 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 40 first + 12958 "10011000" // LDA r17, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12959 "00110110" // /* MW 3 */ + 12960 "11110110" // /* MW 2 */ + 12961 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 + 12962 "00011000" // LDA p2, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12963 "00011001" // /* MW 3 */ + 12964 "11101101" // /* MW 2 */ + 12965 "00000111" // /* MW 1 */ + 12966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12967 "00000000" // /* MW 1 */ + 12968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12969 "00000000" // /* MW 1 */ + 12970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12971 "00000000" // /* MW 1 */ + 12972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12973 "00000000" // /* MW 1 */ + 12974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12975 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 + 12976 "10011000" // SUB r17, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12977 "00010001" // /* MW 3 */ + 12978 "00100011" // /* MW 2 */ + 12979 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 first +.src_ref 1 "io_buffer_main.h" 327 28 + 12980 "00001100" // LDA r17, [p2, #20]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12981 "01100011" // /* MW 5 */ + 12982 "11101100" // /* MW 4 */ + 12983 "11010011" // /* MW 3 */ + 12984 "11000110" // /* MW 2 */ + 12985 "01001010" // /* MW 1 */ + 12986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12987 "00000000" // /* MW 1 */ + 12988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12989 "00000000" // /* MW 1 */ + 12990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12991 "00000000" // /* MW 1 */ + 12992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12993 "00000000" // /* MW 1 */ + 12994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12995 "00000000" // /* MW 1 */ + 12996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12997 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 12998 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12999 "00001000" // /* MW 3 */ + 13000 "01010001" // /* MW 2 */ + 13001 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 6 +.src_ref 7 "superkernels.cpp" 398 14 +.src_ref 1 "io_buffer_main.h" 327 40 first + 13002 "10111010" // LDA r19, [p7, #-8]; MOVXM p1, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13003 "00010000" // /* MW 9 */ + 13004 "00100000" // /* MW 8 */ + 13005 "10110010" // /* MW 7 */ + 13006 "11110000" // /* MW 6 */ + 13007 "00000001" // /* MW 5 */ + 13008 "00000000" // /* MW 4 */ + 13009 "11010000" // /* MW 3 */ + 13010 "11001110" // /* MW 2 */ + 13011 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 19 first + 13012 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13013 "01010110" // /* MW 3 */ + 13014 "00000110" // /* MW 2 */ + 13015 "00000110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 6 + 13016 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13017 "00110110" // /* MW 3 */ + 13018 "00000110" // /* MW 2 */ + 13019 "00000001" // /* MW 1 */ + 13020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13021 "00000000" // /* MW 1 */ + 13022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13023 "00000000" // /* MW 1 */ + 13024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13025 "00000000" // /* MW 1 */ + 13026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13027 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 13028 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13029 "00110001" // /* MW 3 */ + 13030 "00100001" // /* MW 2 */ + 13031 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 13032 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13033 "00010001" // /* MW 3 */ + 13034 "11100110" // /* MW 2 */ + 13035 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 16 first + 13036 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13037 "00101000" // /* MW 3 */ + 13038 "01100001" // /* MW 2 */ + 13039 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 6 + 13040 "10000100" // JNZ r16, #13072 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=13072 delay_slots=5 */ + 13041 "00000001" // /* MW 5 */ + 13042 "01000000" // /* MW 4 */ + 13043 "10001000" // /* MW 3 */ + 13044 "00011001" // /* MW 2 */ + 13045 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13047 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13049 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13051 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13053 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13055 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 398 14 + 13056 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13057 "00000001" // /* MW 3 */ + 13058 "00100000" // /* MW 2 */ + 13059 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 398 14 first + 13060 "00110110" // NOPA; NOPB; ST r16, [p1]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 13061 "11000001" // /* MW 11 */ + 13062 "00001000" // /* MW 10 */ + 13063 "10000011" // /* MW 9 */ + 13064 "00000000" // /* MW 8 */ + 13065 "00000000" // /* MW 7 */ + 13066 "00000000" // /* MW 6 */ + 13067 "00100000" // /* MW 5 */ + 13068 "00000000" // /* MW 4 */ + 13069 "11110000" // /* MW 3 */ + 13070 "00101100" // /* MW 2 */ + 13071 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 +.src_ref 7 "superkernels.cpp" 400 + 13072 "00011000" // LDA lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13073 "00111001" // /* MW 3 */ + 13074 "11110000" // /* MW 2 */ + 13075 "00000111" // /* MW 1 */ + 13076 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13077 "11110001" // /* MW 3 */ + 13078 "11111101" // /* MW 2 */ + 13079 "00000111" // /* MW 1 */ + 13080 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13081 "10011001" // /* MW 3 */ + 13082 "11110111" // /* MW 2 */ + 13083 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 13084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13085 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.noswbrkpt + 13086 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13087 "11010001" // /* MW 3 */ + 13088 "11111001" // /* MW 2 */ + 13089 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13091 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13093 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 400 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13094 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 13095 "00000000" // /* MW 3 */ + 13096 "00101000" // /* MW 2 */ + 13097 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13098 "00011000" // MOVS p6, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13099 "00001011" // /* MW 3 */ + 13100 "10001110" // /* MW 2 */ + 13101 "00001110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 400 +.delay_slot + 13102 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13103 "00000001" // /* MW 5 */ + 13104 "00000000" // /* MW 4 */ + 13105 "00000000" // /* MW 3 */ + 13106 "11111000" // /* MW 2 */ + 13107 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13109 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13111 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 + 13113 "00000000" // /* MW 1 */ +.label __Z13_b881_wrapperPPv___func_begin0 +.label _Z13_b881_wrapperPPv +.function _b881_wrapper _Z13_b881_wrapperPPv +.src_ref 0 "0_0_reloadable0.cc" 21 first +.src_ref 0 "0_0_reloadable0.cc" 23 79 +.function_start + 13120 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13121 "11000000" // /* MW 3 */ + 13122 "01100000" // /* MW 2 */ + 13123 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 23 79 first + 13124 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13125 "00011110" // /* MW 3 */ + 13126 "00101100" // /* MW 2 */ + 13127 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 25 81 first + 13128 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13129 "00011110" // /* MW 3 */ + 13130 "11110101" // /* MW 2 */ + 13131 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 24 47 first + 13132 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13133 "10011110" // /* MW 3 */ + 13134 "00000100" // /* MW 2 */ + 13135 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 22 4 first +.tail_call + 13136 "10000100" // J #10672 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10672 delay_slots=5 */ + 13137 "00000000" // /* MW 5 */ + 13138 "00000000" // /* MW 4 */ + 13139 "11011000" // /* MW 3 */ + 13140 "00010100" // /* MW 2 */ + 13141 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13143 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13145 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13147 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13149 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13150 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b881_wrapperPPv__end +.label __Z13_b881_wrapperPPv___func_end0 + 13151 "00000000" // /* MW 1 */ +.label __Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA5_Kj___func_begin0 +.label _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA5_Kj +.function setup_transposeshuffle_params _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA5_Kj +.src_ref 8 "transposeshuffle_params.h" 93 first +.src_ref 8 "transposeshuffle_params.h" 102 18 first +.function_start + 13152 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13153 "00101110" // /* MW 3 */ + 13154 "00011100" // /* MW 2 */ + 13155 "00000001" // /* MW 1 */ + 13156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13157 "00000000" // /* MW 1 */ + 13158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13159 "00000000" // /* MW 1 */ + 13160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13161 "00000000" // /* MW 1 */ + 13162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13163 "00000000" // /* MW 1 */ + 13164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13165 "00000000" // /* MW 1 */ + 13166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13167 "00000000" // /* MW 1 */ +.src_ref 8 "transposeshuffle_params.h" 102 16 first + 13168 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13169 "00101001" // /* MW 3 */ + 13170 "00011100" // /* MW 2 */ + 13171 "00001000" // /* MW 1 */ +.src_ref 8 "transposeshuffle_params.h" 102 18 + 13172 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13173 "00101110" // /* MW 3 */ + 13174 "00011100" // /* MW 2 */ + 13175 "00000001" // /* MW 1 */ +.src_ref 8 "transposeshuffle_params.h" 102 18 + 13176 "10011000" // LDA eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13177 "00001110" // /* MW 3 */ + 13178 "00011100" // /* MW 2 */ + 13179 "00000001" // /* MW 1 */ + 13180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13181 "00000000" // /* MW 1 */ + 13182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13183 "00000000" // /* MW 1 */ + 13184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13185 "00000000" // /* MW 1 */ + 13186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13187 "00000000" // /* MW 1 */ + 13188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13189 "00000000" // /* MW 1 */ +.src_ref 8 "transposeshuffle_params.h" 102 16 + 13190 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13191 "00101001" // /* MW 3 */ + 13192 "00011100" // /* MW 2 */ + 13193 "00001000" // /* MW 1 */ +.src_ref 8 "transposeshuffle_params.h" 102 16 + 13194 "10011000" // ST eh0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13195 "00001001" // /* MW 3 */ + 13196 "00011100" // /* MW 2 */ + 13197 "00001000" // /* MW 1 */ +.src_ref 8 "transposeshuffle_params.h" 102 18 + 13198 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13199 "00101110" // /* MW 3 */ + 13200 "00000100" // /* MW 2 */ + 13201 "00000001" // /* MW 1 */ +.src_ref 8 "transposeshuffle_params.h" 102 18 + 13202 "10011000" // LDA eh0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13203 "00001110" // /* MW 3 */ + 13204 "00010100" // /* MW 2 */ + 13205 "00000001" // /* MW 1 */ + 13206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13207 "00000000" // /* MW 1 */ +.src_ref 8 "transposeshuffle_params.h" 111 first + 13208 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 13209 "00000000" // /* MW 3 */ + 13210 "00101000" // /* MW 2 */ + 13211 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13212 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13213 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13215 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13216 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13217 "00000000" // /* MW 1 */ +.src_ref 8 "transposeshuffle_params.h" 102 16 first +.delay_slot + 13218 "10011000" // ST el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13219 "00101001" // /* MW 3 */ + 13220 "00000100" // /* MW 2 */ + 13221 "00001000" // /* MW 1 */ +.src_ref 8 "transposeshuffle_params.h" 102 16 +.delay_slot + 13222 "10011000" // ST eh0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13223 "00001001" // /* MW 3 */ + 13224 "00010100" // /* MW 2 */ +.label _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA5_Kj__end +.label __Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA5_Kj___func_end0 + 13225 "00001000" // /* MW 1 */ +.label __Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_EEvPS1_S2_R23transposeshuffle_params___func_begin0 +.label _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_EEvPS1_S2_R23transposeshuffle_params +.function transposeshuffle _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_EEvPS1_S2_R23transposeshuffle_params +.src_ref 8 "transposeshuffle.h" 71 first +.src_ref 8 "transposeshuffle.h" 78 8 +.src_ref 8 "transposeshuffle.h" 78 14 +.src_ref 8 "transposeshuffle.h" 78 23 +.function_start + 13232 "11100100" // MOVX r1, #22; MOV r2, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13233 "10000001" // /* MW 5 */ + 13234 "00101001" // /* MW 4 */ + 13235 "00100001" // /* MW 3 */ + 13236 "01001011" // /* MW 2 */ + 13237 "00000000" // /* MW 1 */ +.src_ref 8 "transposeshuffle.h" 78 14 first + 13238 "00011000" // ADD.NC p2, r2, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13239 "00000110" // /* MW 3 */ + 13240 "01100001" // /* MW 2 */ + 13241 "00011010" // /* MW 1 */ +.src_ref 8 "transposeshuffle.h" 81 26 first + 13242 "10011000" // LDA r2, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13243 "01010110" // /* MW 3 */ + 13244 "00010100" // /* MW 2 */ + 13245 "00000010" // /* MW 1 */ +.src_ref 8 "transposeshuffle.h" 78 14 first + 13246 "10011000" // LDA r27, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13247 "01110110" // /* MW 3 */ + 13248 "00000111" // /* MW 2 */ + 13249 "00000010" // /* MW 1 */ + 13250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13251 "00000000" // /* MW 1 */ + 13252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13253 "00000000" // /* MW 1 */ + 13254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13255 "00000000" // /* MW 1 */ + 13256 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13257 "00000000" // /* MW 1 */ + 13258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13259 "00000000" // /* MW 1 */ +.src_ref 8 "transposeshuffle.h" 81 4 first +.src_ref 8 "transposeshuffle.h" 81 19 first + 13260 "10000100" // JZ r2, #13680 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=13680 delay_slots=5 */ + 13261 "00000001" // /* MW 5 */ + 13262 "00000000" // /* MW 4 */ + 13263 "10111000" // /* MW 3 */ + 13264 "00011010" // /* MW 2 */ + 13265 "00010000" // /* MW 1 */ +.src_ref 8 "transposeshuffle.h" 78 8 +.src_ref 8 "transposeshuffle.h" 78 23 +.delay_slot + 13266 "00011000" // MOVX r0, #29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13267 "01110101" // /* MW 3 */ + 13268 "00000000" // /* MW 2 */ + 13269 "00010000" // /* MW 1 */ +.src_ref 8 "transposeshuffle.h" 78 8 first +.src_ref 8 "transposeshuffle.h" 78 23 first +.delay_slot + 13270 "00011000" // SEL.EQZ r0, r1, r0, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13271 "00000010" // /* MW 3 */ + 13272 "01000000" // /* MW 2 */ + 13273 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13275 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13277 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13279 "00000000" // /* MW 1 */ + 13280 "00011000" // MOVX r1, #10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13281 "00101001" // /* MW 3 */ + 13282 "00000010" // /* MW 2 */ + 13283 "00010000" // /* MW 1 */ + 13284 "10011000" // LTU r1, r2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13285 "00011100" // /* MW 3 */ + 13286 "10000010" // /* MW 2 */ + 13287 "00010000" // /* MW 1 */ + 13288 "10000100" // JNZ r1, #13536 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=13536 delay_slots=5 */ + 13289 "00000001" // /* MW 5 */ + 13290 "01000000" // /* MW 4 */ + 13291 "01110000" // /* MW 3 */ + 13292 "00011010" // /* MW 2 */ + 13293 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13294 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13295 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13297 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13298 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13299 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13300 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13301 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13303 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 8 "transposeshuffle.h" 81 4 first +.src_ref 8 "transposeshuffle.h" 83 46 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 13304 "00111010" // VLDB x0, [p0], #64; MOVXM ls, #13440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13305 "00010000" // /* MW 9 */ + 13306 "01000000" // /* MW 8 */ + 13307 "01111010" // /* MW 7 */ + 13308 "00001100" // /* MW 6 */ + 13309 "00000000" // /* MW 5 */ + 13310 "00000000" // /* MW 4 */ + 13311 "01101000" // /* MW 3 */ + 13312 "00111000" // /* MW 2 */ + 13313 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 8 "transposeshuffle.h" 81 4 +.src_ref 8 "transposeshuffle.h" 83 46 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 13314 "01111110" // NOPA; VLDB x0, [p0], #64; NOPS; MOVXM le, #13440 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 13315 "01100000" // /* MW 13 */ + 13316 "00101011" // /* MW 12 */ + 13317 "00000000" // /* MW 11 */ + 13318 "00000010" // /* MW 10 */ + 13319 "01001000" // /* MW 9 */ + 13320 "10110111" // /* MW 8 */ + 13321 "00000001" // /* MW 7 */ + 13322 "00000000" // /* MW 6 */ + 13323 "01101000" // /* MW 5 */ + 13324 "00111000" // /* MW 4 */ + 13325 "11110000" // /* MW 3 */ + 13326 "00101100" // /* MW 2 */ + 13327 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 8 "transposeshuffle.h" 81 4 +.src_ref 8 "transposeshuffle.h" 83 46 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13328 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; ADD.NC lc, r2, #-9; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13329 "00000000" // /* MW 15 */ + 13330 "00000000" // /* MW 14 */ + 13331 "11001000" // /* MW 13 */ + 13332 "10111101" // /* MW 12 */ + 13333 "10111000" // /* MW 11 */ + 13334 "00000010" // /* MW 10 */ + 13335 "00000000" // /* MW 9 */ + 13336 "00000000" // /* MW 8 */ + 13337 "01011011" // /* MW 7 */ + 13338 "00000001" // /* MW 6 */ + 13339 "01101000" // /* MW 5 */ + 13340 "00111000" // /* MW 4 */ + 13341 "11110000" // /* MW 3 */ + 13342 "00101100" // /* MW 2 */ + 13343 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 8 "transposeshuffle.h" 83 46 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13344 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13345 "00000000" // /* MW 15 */ + 13346 "00000000" // /* MW 14 */ + 13347 "01111000" // /* MW 13 */ + 13348 "10100101" // /* MW 12 */ + 13349 "00000001" // /* MW 11 */ + 13350 "00000000" // /* MW 10 */ + 13351 "00000000" // /* MW 9 */ + 13352 "00000000" // /* MW 8 */ + 13353 "01011011" // /* MW 7 */ + 13354 "00000001" // /* MW 6 */ + 13355 "01101000" // /* MW 5 */ + 13356 "00111000" // /* MW 4 */ + 13357 "11110000" // /* MW 3 */ + 13358 "00101100" // /* MW 2 */ + 13359 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 8 "transposeshuffle.h" 83 46 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13360 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13361 "00000000" // /* MW 15 */ + 13362 "00000000" // /* MW 14 */ + 13363 "01111000" // /* MW 13 */ + 13364 "10100101" // /* MW 12 */ + 13365 "00000001" // /* MW 11 */ + 13366 "00000000" // /* MW 10 */ + 13367 "00000000" // /* MW 9 */ + 13368 "00000000" // /* MW 8 */ + 13369 "01011011" // /* MW 7 */ + 13370 "00000001" // /* MW 6 */ + 13371 "01101000" // /* MW 5 */ + 13372 "00111000" // /* MW 4 */ + 13373 "11110000" // /* MW 3 */ + 13374 "00101100" // /* MW 2 */ + 13375 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 8 "transposeshuffle.h" 83 46 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13376 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13377 "00000000" // /* MW 15 */ + 13378 "00000000" // /* MW 14 */ + 13379 "01111000" // /* MW 13 */ + 13380 "10100101" // /* MW 12 */ + 13381 "00000001" // /* MW 11 */ + 13382 "00000000" // /* MW 10 */ + 13383 "00000000" // /* MW 9 */ + 13384 "00000000" // /* MW 8 */ + 13385 "01011011" // /* MW 7 */ + 13386 "00000001" // /* MW 6 */ + 13387 "01101000" // /* MW 5 */ + 13388 "00111000" // /* MW 4 */ + 13389 "11110000" // /* MW 3 */ + 13390 "00101100" // /* MW 2 */ + 13391 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 8 "transposeshuffle.h" 83 46 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13392 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13393 "00000000" // /* MW 15 */ + 13394 "00000000" // /* MW 14 */ + 13395 "01111000" // /* MW 13 */ + 13396 "10100101" // /* MW 12 */ + 13397 "00000001" // /* MW 11 */ + 13398 "00000000" // /* MW 10 */ + 13399 "00000000" // /* MW 9 */ + 13400 "00000000" // /* MW 8 */ + 13401 "01011011" // /* MW 7 */ + 13402 "00000001" // /* MW 6 */ + 13403 "01101000" // /* MW 5 */ + 13404 "00111000" // /* MW 4 */ + 13405 "11110000" // /* MW 3 */ + 13406 "00101100" // /* MW 2 */ + 13407 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 8 "transposeshuffle.h" 83 46 +.src_ref 8 "transposeshuffle.h" 84 13 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13408 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13409 "00000000" // /* MW 15 */ + 13410 "00000000" // /* MW 14 */ + 13411 "11101000" // /* MW 13 */ + 13412 "00000000" // /* MW 12 */ + 13413 "00000000" // /* MW 11 */ + 13414 "00000000" // /* MW 10 */ + 13415 "00000000" // /* MW 9 */ + 13416 "00000000" // /* MW 8 */ + 13417 "01011011" // /* MW 7 */ + 13418 "00000001" // /* MW 6 */ + 13419 "01101000" // /* MW 5 */ + 13420 "00111000" // /* MW 4 */ + 13421 "11110000" // /* MW 3 */ + 13422 "00101100" // /* MW 2 */ + 13423 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 8 "transposeshuffle.h" 83 46 first +.src_ref 8 "transposeshuffle.h" 84 13 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13424 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13425 "00000000" // /* MW 15 */ + 13426 "00000000" // /* MW 14 */ + 13427 "11101000" // /* MW 13 */ + 13428 "00000000" // /* MW 12 */ + 13429 "00000000" // /* MW 11 */ + 13430 "00000000" // /* MW 10 */ + 13431 "00000000" // /* MW 9 */ + 13432 "00000000" // /* MW 8 */ + 13433 "01011011" // /* MW 7 */ + 13434 "00000001" // /* MW 6 */ + 13435 "01101000" // /* MW 5 */ + 13436 "00111000" // /* MW 4 */ + 13437 "11110000" // /* MW 3 */ + 13438 "00101100" // /* MW 2 */ + 13439 "00000000" // /* MW 1 */ +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_EEvPS1_S2_R23transposeshuffle_params_208 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 8 "transposeshuffle.h" 83 46 +.src_ref 8 "transposeshuffle.h" 84 13 first +.src_ref 8 "transposeshuffle.h" 85 46 first +.begin_of_loop +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 13440 "11100001" // NOPA; VLDB x0, [p0], #64; VST bmll0, [p1], #64; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13441 "00000000" // /* MW 15 */ + 13442 "00000000" // /* MW 14 */ + 13443 "11101000" // /* MW 13 */ + 13444 "00000000" // /* MW 12 */ + 13445 "00000000" // /* MW 11 */ + 13446 "00000000" // /* MW 10 */ + 13447 "00000000" // /* MW 9 */ + 13448 "10000000" // /* MW 8 */ + 13449 "00000110" // /* MW 7 */ + 13450 "00011100" // /* MW 6 */ + 13451 "01101001" // /* MW 5 */ + 13452 "00111000" // /* MW 4 */ + 13453 "11110000" // /* MW 3 */ + 13454 "00101100" // /* MW 2 */ + 13455 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 8 "transposeshuffle.h" 84 13 +.src_ref 8 "transposeshuffle.h" 85 46 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 13456 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13457 "11100000" // /* MW 7 */ + 13458 "00000000" // /* MW 6 */ + 13459 "00000000" // /* MW 5 */ + 13460 "00000000" // /* MW 4 */ + 13461 "11010000" // /* MW 3 */ + 13462 "10000000" // /* MW 2 */ + 13463 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 8 "transposeshuffle.h" 84 13 +.src_ref 8 "transposeshuffle.h" 85 46 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13464 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13465 "11100000" // /* MW 7 */ + 13466 "00000000" // /* MW 6 */ + 13467 "00000000" // /* MW 5 */ + 13468 "00000000" // /* MW 4 */ + 13469 "11010000" // /* MW 3 */ + 13470 "10000000" // /* MW 2 */ + 13471 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 8 "transposeshuffle.h" 84 13 +.src_ref 8 "transposeshuffle.h" 85 46 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13472 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13473 "11100000" // /* MW 7 */ + 13474 "00000000" // /* MW 6 */ + 13475 "00000000" // /* MW 5 */ + 13476 "00000000" // /* MW 4 */ + 13477 "11010000" // /* MW 3 */ + 13478 "10000000" // /* MW 2 */ + 13479 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 8 "transposeshuffle.h" 84 13 +.src_ref 8 "transposeshuffle.h" 85 46 +.src_ref 8 "transposeshuffle.h" 88 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13480 "00111010" // VST bmll0, [p1], #64; RET lr; VSHUFFLE bmll0, x0, x0, r0 /* MW 10 */ /* control_operation: words=10 rts unconditional cycles_taken=1 delay_slots=5 */ + 13481 "11101001" // /* MW 9 */ + 13482 "00000000" // /* MW 8 */ + 13483 "00000000" // /* MW 7 */ + 13484 "00000000" // /* MW 6 */ + 13485 "01000000" // /* MW 5 */ + 13486 "00000001" // /* MW 4 */ + 13487 "11010000" // /* MW 3 */ + 13488 "10000000" // /* MW 2 */ + 13489 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 8 "transposeshuffle.h" 84 13 first +.src_ref 8 "transposeshuffle.h" 85 46 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13490 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13491 "11100000" // /* MW 7 */ + 13492 "00000000" // /* MW 6 */ + 13493 "00000000" // /* MW 5 */ + 13494 "00000000" // /* MW 4 */ + 13495 "11010000" // /* MW 3 */ + 13496 "10000000" // /* MW 2 */ + 13497 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 8 "transposeshuffle.h" 84 13 +.src_ref 8 "transposeshuffle.h" 85 46 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13498 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13499 "11100000" // /* MW 7 */ + 13500 "00000000" // /* MW 6 */ + 13501 "00000000" // /* MW 5 */ + 13502 "00000000" // /* MW 4 */ + 13503 "11010000" // /* MW 3 */ + 13504 "10000000" // /* MW 2 */ + 13505 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 8 "transposeshuffle.h" 84 13 +.src_ref 8 "transposeshuffle.h" 85 46 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13506 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13507 "11100000" // /* MW 7 */ + 13508 "00000000" // /* MW 6 */ + 13509 "00000000" // /* MW 5 */ + 13510 "00000000" // /* MW 4 */ + 13511 "11010000" // /* MW 3 */ + 13512 "10000000" // /* MW 2 */ + 13513 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 8 "transposeshuffle.h" 85 46 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13514 "00001100" // NOPA; VST bmll0, [p1], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13515 "00001101" // /* MW 5 */ + 13516 "00111000" // /* MW 4 */ + 13517 "11110010" // /* MW 3 */ + 13518 "00101100" // /* MW 2 */ + 13519 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 8 "transposeshuffle.h" 85 46 +.delay_slot + 13520 "11100001" // NOPA; NOPB; VST bmll0, [p1], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13521 "00000000" // /* MW 15 */ + 13522 "00000000" // /* MW 14 */ + 13523 "01111000" // /* MW 13 */ + 13524 "10100101" // /* MW 12 */ + 13525 "00000001" // /* MW 11 */ + 13526 "00000000" // /* MW 10 */ + 13527 "00000000" // /* MW 9 */ + 13528 "10000000" // /* MW 8 */ + 13529 "00000110" // /* MW 7 */ + 13530 "00011100" // /* MW 6 */ + 13531 "00100001" // /* MW 5 */ + 13532 "00000000" // /* MW 4 */ + 13533 "11110000" // /* MW 3 */ + 13534 "00101100" // /* MW 2 */ + 13535 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_EEvPS1_S2_R23transposeshuffle_params_304 +.src_ref 8 "transposeshuffle.h" 81 4 first + 13536 "11111000" // MOV lc, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13537 "00100000" // /* MW 3 */ + 13538 "01110001" // /* MW 2 */ + 13539 "00011101" // /* MW 1 */ +.src_ref 8 "transposeshuffle.h" 81 4 + 13540 "01000100" // MOVXM ls, #13552 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13541 "11100000" // /* MW 5 */ + 13542 "11101001" // /* MW 4 */ + 13543 "00110001" // /* MW 3 */ + 13544 "00000000" // /* MW 2 */ + 13545 "00000000" // /* MW 1 */ +.src_ref 8 "transposeshuffle.h" 81 4 + 13546 "01000100" // MOVXM le, #13664 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13547 "11000000" // /* MW 5 */ + 13548 "11101010" // /* MW 4 */ + 13549 "00110110" // /* MW 3 */ + 13550 "00000000" // /* MW 2 */ + 13551 "00000000" // /* MW 1 */ +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_EEvPS1_S2_R23transposeshuffle_params_320 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 8 "transposeshuffle.h" 83 46 first +.begin_of_loop +.loop_nesting 1 + 13552 "00011000" // VLDB x0, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13553 "00110100" // /* MW 3 */ + 13554 "00011100" // /* MW 2 */ + 13555 "00111000" // /* MW 1 */ + 13556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13557 "00000000" // /* MW 1 */ + 13558 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13559 "01111110" // /* MW 9 */ + 13560 "10100101" // /* MW 8 */ + 13561 "00000001" // /* MW 7 */ + 13562 "00000000" // /* MW 6 */ + 13563 "00010000" // /* MW 5 */ + 13564 "00000000" // /* MW 4 */ + 13565 "11110000" // /* MW 3 */ + 13566 "00101100" // /* MW 2 */ + 13567 "00000000" // /* MW 1 */ + 13568 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13569 "00000000" // /* MW 15 */ + 13570 "00000000" // /* MW 14 */ + 13571 "01111000" // /* MW 13 */ + 13572 "10100101" // /* MW 12 */ + 13573 "00000001" // /* MW 11 */ + 13574 "00000000" // /* MW 10 */ + 13575 "00000000" // /* MW 9 */ + 13576 "00000000" // /* MW 8 */ + 13577 "01011011" // /* MW 7 */ + 13578 "00000001" // /* MW 6 */ + 13579 "00100000" // /* MW 5 */ + 13580 "00000000" // /* MW 4 */ + 13581 "11110000" // /* MW 3 */ + 13582 "00101100" // /* MW 2 */ + 13583 "00000000" // /* MW 1 */ + 13584 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13585 "00000000" // /* MW 15 */ + 13586 "00000000" // /* MW 14 */ + 13587 "01111000" // /* MW 13 */ + 13588 "10100101" // /* MW 12 */ + 13589 "00000001" // /* MW 11 */ + 13590 "00000000" // /* MW 10 */ + 13591 "00000000" // /* MW 9 */ + 13592 "00000000" // /* MW 8 */ + 13593 "01011011" // /* MW 7 */ + 13594 "00000001" // /* MW 6 */ + 13595 "00100000" // /* MW 5 */ + 13596 "00000000" // /* MW 4 */ + 13597 "11110000" // /* MW 3 */ + 13598 "00101100" // /* MW 2 */ + 13599 "00000000" // /* MW 1 */ + 13600 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13601 "00000000" // /* MW 15 */ + 13602 "00000000" // /* MW 14 */ + 13603 "01111000" // /* MW 13 */ + 13604 "10100101" // /* MW 12 */ + 13605 "00000001" // /* MW 11 */ + 13606 "00000000" // /* MW 10 */ + 13607 "00000000" // /* MW 9 */ + 13608 "00000000" // /* MW 8 */ + 13609 "01011011" // /* MW 7 */ + 13610 "00000001" // /* MW 6 */ + 13611 "00100000" // /* MW 5 */ + 13612 "00000000" // /* MW 4 */ + 13613 "11110000" // /* MW 3 */ + 13614 "00101100" // /* MW 2 */ + 13615 "00000000" // /* MW 1 */ + 13616 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13617 "00000000" // /* MW 15 */ + 13618 "00000000" // /* MW 14 */ + 13619 "01111000" // /* MW 13 */ + 13620 "10100101" // /* MW 12 */ + 13621 "00000001" // /* MW 11 */ + 13622 "00000000" // /* MW 10 */ + 13623 "00000000" // /* MW 9 */ + 13624 "00000000" // /* MW 8 */ + 13625 "01011011" // /* MW 7 */ + 13626 "00000001" // /* MW 6 */ + 13627 "00100000" // /* MW 5 */ + 13628 "00000000" // /* MW 4 */ + 13629 "11110000" // /* MW 3 */ + 13630 "00101100" // /* MW 2 */ + 13631 "00000000" // /* MW 1 */ +.src_ref 8 "transposeshuffle.h" 84 13 first + 13632 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13633 "00000000" // /* MW 15 */ + 13634 "00000000" // /* MW 14 */ + 13635 "11101000" // /* MW 13 */ + 13636 "00000000" // /* MW 12 */ + 13637 "00000000" // /* MW 11 */ + 13638 "00000000" // /* MW 10 */ + 13639 "00000000" // /* MW 9 */ + 13640 "00000000" // /* MW 8 */ + 13641 "01011011" // /* MW 7 */ + 13642 "00000001" // /* MW 6 */ + 13643 "00100000" // /* MW 5 */ + 13644 "00000000" // /* MW 4 */ + 13645 "11110000" // /* MW 3 */ + 13646 "00101100" // /* MW 2 */ + 13647 "00000000" // /* MW 1 */ + 13648 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13649 "00000000" // /* MW 15 */ + 13650 "00000000" // /* MW 14 */ + 13651 "01111000" // /* MW 13 */ + 13652 "10100101" // /* MW 12 */ + 13653 "00000001" // /* MW 11 */ + 13654 "00000000" // /* MW 10 */ + 13655 "00000000" // /* MW 9 */ + 13656 "00000000" // /* MW 8 */ + 13657 "01011011" // /* MW 7 */ + 13658 "00000001" // /* MW 6 */ + 13659 "00100000" // /* MW 5 */ + 13660 "00000000" // /* MW 4 */ + 13661 "11110000" // /* MW 3 */ + 13662 "00101100" // /* MW 2 */ + 13663 "00000000" // /* MW 1 */ +.label ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_EEvPS1_S2_R23transposeshuffle_params_432 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 8 "transposeshuffle.h" 85 46 first +.end_of_loop + 13664 "11100001" // NOPA; NOPB; VST bmll0, [p1], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13665 "00000000" // /* MW 15 */ + 13666 "00000000" // /* MW 14 */ + 13667 "01111000" // /* MW 13 */ + 13668 "10100101" // /* MW 12 */ + 13669 "00000001" // /* MW 11 */ + 13670 "00000000" // /* MW 10 */ + 13671 "00000000" // /* MW 9 */ + 13672 "10000000" // /* MW 8 */ + 13673 "00000110" // /* MW 7 */ + 13674 "00011100" // /* MW 6 */ + 13675 "00100001" // /* MW 5 */ + 13676 "00000000" // /* MW 4 */ + 13677 "11110000" // /* MW 3 */ + 13678 "00101100" // /* MW 2 */ + 13679 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_EEvPS1_S2_R23transposeshuffle_params_448 +.src_ref 8 "transposeshuffle.h" 88 first +.loop_nesting 0 + 13680 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 13681 "00000000" // /* MW 3 */ + 13682 "00101000" // /* MW 2 */ + 13683 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13685 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13687 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13689 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13691 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_EEvPS1_S2_R23transposeshuffle_params__end +.label __Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_EEvPS1_S2_R23transposeshuffle_params___func_end0 + 13693 "00000000" // /* MW 1 */ +.label __ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA5_Kj___func_begin0 +.label _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA5_Kj +.function transpose4d_adf_wrapper, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> > > _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA5_Kj +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 9 "transpose4d_adf_wrapper.cpp" 78 +.src_ref 9 "transpose4d_adf_wrapper.cpp" 78 first +.function_start + 13696 "00111010" // MOVS p3, p1; PADDXM [sp], #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13697 "01110001" // /* MW 9 */ + 13698 "00000000" // /* MW 8 */ + 13699 "00000000" // /* MW 7 */ + 13700 "00000000" // /* MW 6 */ + 13701 "00000010" // /* MW 5 */ + 13702 "00000000" // /* MW 4 */ + 13703 "01100000" // /* MW 3 */ + 13704 "10010001" // /* MW 2 */ + 13705 "01110000" // /* MW 1 */ + 13706 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13707 "00111101" // /* MW 3 */ + 13708 "11111100" // /* MW 2 */ + 13709 "00001111" // /* MW 1 */ +.src_ref 9 "transpose4d_adf_wrapper.cpp" 80 4 first +.no_stack_arguments + 13710 "00000100" // JL #13152 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=13152 delay_slots=5 */ + 13711 "00000001" // /* MW 5 */ + 13712 "00000000" // /* MW 4 */ + 13713 "10110000" // /* MW 3 */ + 13714 "00011001" // /* MW 2 */ + 13715 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 9 "transpose4d_adf_wrapper.cpp" 80 4 +.delay_slot + 13716 "00000010" // MOVS p2, p0; MOV p1, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13717 "01110000" // /* MW 7 */ + 13718 "01100000" // /* MW 6 */ + 13719 "10110010" // /* MW 5 */ + 13720 "00000000" // /* MW 4 */ + 13721 "01100000" // /* MW 3 */ + 13722 "00010001" // /* MW 2 */ + 13723 "01010000" // /* MW 1 */ +.src_ref 9 "transpose4d_adf_wrapper.cpp" 80 4 +.delay_slot + 13724 "00111010" // ST p7, [sp, #-12]; MOVXM p0, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13725 "00010001" // /* MW 9 */ + 13726 "01100000" // /* MW 8 */ + 13727 "00110010" // /* MW 7 */ + 13728 "11110000" // /* MW 6 */ + 13729 "00000001" // /* MW 5 */ + 13730 "00000000" // /* MW 4 */ + 13731 "10110000" // /* MW 3 */ + 13732 "11110011" // /* MW 2 */ + 13733 "11111110" // /* MW 1 */ +.delay_slot + 13734 "10011000" // ST p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13735 "00011101" // /* MW 3 */ + 13736 "11111011" // /* MW 2 */ + 13737 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.delay_slot + 13738 "11010100" // NOPA; MOV p6, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13739 "10000001" // /* MW 5 */ + 13740 "11001101" // /* MW 4 */ + 13741 "11111100" // /* MW 3 */ + 13742 "00101100" // /* MW 2 */ + 13743 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.delay_slot + 13744 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV p7, p2; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13745 "00000000" // /* MW 15 */ + 13746 "00000000" // /* MW 14 */ + 13747 "01111000" // /* MW 13 */ + 13748 "01100000" // /* MW 12 */ + 13749 "10110010" // /* MW 11 */ + 13750 "00000011" // /* MW 10 */ + 13751 "00000000" // /* MW 9 */ + 13752 "00000000" // /* MW 8 */ + 13753 "01011011" // /* MW 7 */ + 13754 "00000001" // /* MW 6 */ + 13755 "00100000" // /* MW 5 */ + 13756 "00000000" // /* MW 4 */ + 13757 "11110000" // /* MW 3 */ + 13758 "00101100" // /* MW 2 */ + 13759 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 1 "io_buffer_main.h" 125 25 +.return_address + 13760 "10111010" // LDA p7, [sp, #-12]; MOVS p1, p6; MOV p0, p7 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13761 "01110010" // /* MW 9 */ + 13762 "01100000" // /* MW 8 */ + 13763 "00110111" // /* MW 7 */ + 13764 "00000000" // /* MW 6 */ + 13765 "10001011" // /* MW 5 */ + 13766 "10011000" // /* MW 4 */ + 13767 "00100001" // /* MW 3 */ + 13768 "11110011" // /* MW 2 */ + 13769 "11111110" // /* MW 1 */ + 13770 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13771 "00011001" // /* MW 3 */ + 13772 "11111011" // /* MW 2 */ + 13773 "00000111" // /* MW 1 */ +.src_ref 8 "transposeshuffle.h" 99 11 + 13774 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13775 "00111001" // /* MW 3 */ + 13776 "11111100" // /* MW 2 */ + 13777 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 13778 "10011000" // LDA p0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13779 "00011110" // /* MW 3 */ + 13780 "00000100" // /* MW 2 */ + 13781 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 + 13782 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13783 "10011110" // /* MW 3 */ + 13784 "00000100" // /* MW 2 */ + 13785 "00000001" // /* MW 1 */ +.src_ref 8 "transposeshuffle.h" 99 11 first +.tail_call + 13786 "10000100" // J #13232 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=13232 delay_slots=5 */ + 13787 "00000000" // /* MW 5 */ + 13788 "00000000" // /* MW 4 */ + 13789 "11011000" // /* MW 3 */ + 13790 "00011001" // /* MW 2 */ + 13791 "00000000" // /* MW 1 */ +.src_ref 9 "transpose4d_adf_wrapper.cpp" 82 first +.delay_slot + 13792 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13793 "00000001" // /* MW 5 */ + 13794 "00000000" // /* MW 4 */ + 13795 "00000000" // /* MW 3 */ + 13796 "11111000" // /* MW 2 */ + 13797 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13799 "00000000" // /* MW 1 */ +.src_ref 8 "transposeshuffle.h" 99 11 +.delay_slot + 13800 "01000100" // MOVXM p2, #509120 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13801 "10000000" // /* MW 5 */ + 13802 "11001001" // /* MW 4 */ + 13803 "11000100" // /* MW 3 */ + 13804 "00000111" // /* MW 2 */ + 13805 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13807 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA5_Kj__end +.label __ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA5_Kj___func_end0 + 13809 "00000000" // /* MW 1 */ +.label __Z13_b719_wrapperPPv___func_begin0 +.label _Z13_b719_wrapperPPv +.function _b719_wrapper _Z13_b719_wrapperPPv +.src_ref 0 "0_0_reloadable0.cc" 29 first +.src_ref 0 "0_0_reloadable0.cc" 31 79 +.function_start + 13824 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13825 "11000000" // /* MW 3 */ + 13826 "01100000" // /* MW 2 */ + 13827 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 31 79 first + 13828 "10011000" // LDA p0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13829 "00011110" // /* MW 3 */ + 13830 "00011100" // /* MW 2 */ + 13831 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 33 46 first + 13832 "10011000" // LDA p2, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13833 "00011110" // /* MW 3 */ + 13834 "00010101" // /* MW 2 */ + 13835 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 32 80 first + 13836 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13837 "10011110" // /* MW 3 */ + 13838 "00000100" // /* MW 2 */ + 13839 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 30 4 first +.tail_call + 13840 "10000100" // J #13696 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=13696 delay_slots=5 */ + 13841 "00000000" // /* MW 5 */ + 13842 "00000000" // /* MW 4 */ + 13843 "11000000" // /* MW 3 */ + 13844 "00011010" // /* MW 2 */ + 13845 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13846 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13847 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13849 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13851 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13853 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13854 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b719_wrapperPPv__end +.label __Z13_b719_wrapperPPv___func_end0 + 13855 "00000000" // /* MW 1 */ +.label __Z13_b886_wrapperPPv___func_begin0 +.label _Z13_b886_wrapperPPv +.function _b886_wrapper _Z13_b886_wrapperPPv +.src_ref 0 "0_0_reloadable0.cc" 37 first +.src_ref 0 "0_0_reloadable0.cc" 39 79 +.function_start + 13856 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13857 "11000000" // /* MW 3 */ + 13858 "01100000" // /* MW 2 */ + 13859 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 39 79 first + 13860 "10011000" // LDA p0, [p2], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13861 "00011110" // /* MW 3 */ + 13862 "00111100" // /* MW 2 */ + 13863 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 40 47 first + 13864 "10011000" // LDA p1, [p2], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13865 "10011110" // /* MW 3 */ + 13866 "11101100" // /* MW 2 */ + 13867 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 42 81 first + 13868 "10011000" // LDA p3, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13869 "10011110" // /* MW 3 */ + 13870 "00010101" // /* MW 2 */ + 13871 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 41 80 first + 13872 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13873 "00011110" // /* MW 3 */ + 13874 "00000101" // /* MW 2 */ + 13875 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 38 4 first +.tail_call + 13876 "10000100" // J #11424 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=11424 delay_slots=5 */ + 13877 "00000000" // /* MW 5 */ + 13878 "00000000" // /* MW 4 */ + 13879 "01010000" // /* MW 3 */ + 13880 "00010110" // /* MW 2 */ + 13881 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13883 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13885 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13887 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13889 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b886_wrapperPPv__end +.label __Z13_b886_wrapperPPv___func_end0 + 13891 "00000000" // /* MW 1 */ +.label __Z13_b891_wrapperPPv___func_begin0 +.label _Z13_b891_wrapperPPv +.function _b891_wrapper _Z13_b891_wrapperPPv +.src_ref 0 "0_0_reloadable0.cc" 46 first +.src_ref 0 "0_0_reloadable0.cc" 48 79 +.function_start + 13904 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13905 "11000000" // /* MW 3 */ + 13906 "01100000" // /* MW 2 */ + 13907 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 48 79 first + 13908 "10011000" // LDA p0, [p2], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13909 "00011110" // /* MW 3 */ + 13910 "00111100" // /* MW 2 */ + 13911 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 49 47 first + 13912 "10011000" // LDA p1, [p2], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13913 "10011110" // /* MW 3 */ + 13914 "11101100" // /* MW 2 */ + 13915 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 51 81 first + 13916 "10011000" // LDA p3, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13917 "10011110" // /* MW 3 */ + 13918 "00010101" // /* MW 2 */ + 13919 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 50 80 first + 13920 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13921 "00011110" // /* MW 3 */ + 13922 "00000101" // /* MW 2 */ + 13923 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 47 4 first +.tail_call + 13924 "10000100" // J #12512 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=12512 delay_slots=5 */ + 13925 "00000000" // /* MW 5 */ + 13926 "00000000" // /* MW 4 */ + 13927 "01110000" // /* MW 3 */ + 13928 "00011000" // /* MW 2 */ + 13929 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13931 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13933 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13935 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13937 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b891_wrapperPPv__end +.label __Z13_b891_wrapperPPv___func_end0 + 13939 "00000000" // /* MW 1 */ +.label __Z13_b896_wrapperPPv___func_begin0 +.label _Z13_b896_wrapperPPv +.function _b896_wrapper _Z13_b896_wrapperPPv +.src_ref 0 "0_0_reloadable0.cc" 55 first +.src_ref 0 "0_0_reloadable0.cc" 57 79 +.function_start + 13952 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13953 "11000000" // /* MW 3 */ + 13954 "01100000" // /* MW 2 */ + 13955 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 57 79 first + 13956 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13957 "00011110" // /* MW 3 */ + 13958 "00011100" // /* MW 2 */ + 13959 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 58 79 first + 13960 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13961 "10011110" // /* MW 3 */ + 13962 "00101100" // /* MW 2 */ + 13963 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 60 81 first + 13964 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13965 "10011110" // /* MW 3 */ + 13966 "11110101" // /* MW 2 */ + 13967 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 59 47 first + 13968 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13969 "00011110" // /* MW 3 */ + 13970 "00000101" // /* MW 2 */ + 13971 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 56 4 first +.tail_call + 13972 "10000100" // J #7040 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=7040 delay_slots=5 */ + 13973 "00000000" // /* MW 5 */ + 13974 "00000000" // /* MW 4 */ + 13975 "11000000" // /* MW 3 */ + 13976 "00001101" // /* MW 2 */ + 13977 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13979 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13980 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13981 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13983 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13985 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b896_wrapperPPv__end +.label __Z13_b896_wrapperPPv___func_end0 + 13987 "00000000" // /* MW 1 */ +.label __Z13_b901_wrapperPPv___func_begin0 +.label _Z13_b901_wrapperPPv +.function _b901_wrapper _Z13_b901_wrapperPPv +.src_ref 0 "0_0_reloadable0.cc" 64 first +.src_ref 0 "0_0_reloadable0.cc" 66 79 +.function_start + 14000 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14001 "11000000" // /* MW 3 */ + 14002 "01100000" // /* MW 2 */ + 14003 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 66 79 first + 14004 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14005 "00011110" // /* MW 3 */ + 14006 "00101100" // /* MW 2 */ + 14007 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 68 81 first + 14008 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14009 "00011110" // /* MW 3 */ + 14010 "11110101" // /* MW 2 */ + 14011 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 67 47 first + 14012 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14013 "10011110" // /* MW 3 */ + 14014 "00000100" // /* MW 2 */ + 14015 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 65 4 first +.tail_call + 14016 "10000100" // J #8400 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=8400 delay_slots=5 */ + 14017 "00000000" // /* MW 5 */ + 14018 "00000000" // /* MW 4 */ + 14019 "01101000" // /* MW 3 */ + 14020 "00010000" // /* MW 2 */ + 14021 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14023 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14025 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14027 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14029 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b901_wrapperPPv__end +.label __Z13_b901_wrapperPPv___func_end0 + 14031 "00000000" // /* MW 1 */ +.label __Z13_b906_wrapperPPv___func_begin0 +.label _Z13_b906_wrapperPPv +.function _b906_wrapper _Z13_b906_wrapperPPv +.src_ref 0 "0_0_reloadable0.cc" 72 first +.src_ref 0 "0_0_reloadable0.cc" 74 79 +.function_start + 14032 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14033 "11000000" // /* MW 3 */ + 14034 "01100000" // /* MW 2 */ + 14035 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 74 79 first + 14036 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14037 "00011110" // /* MW 3 */ + 14038 "00101100" // /* MW 2 */ + 14039 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 76 81 first + 14040 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14041 "00011110" // /* MW 3 */ + 14042 "11110101" // /* MW 2 */ + 14043 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 75 47 first + 14044 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14045 "10011110" // /* MW 3 */ + 14046 "00000100" // /* MW 2 */ + 14047 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 73 4 first +.tail_call + 14048 "10000100" // J #9264 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=9264 delay_slots=5 */ + 14049 "00000000" // /* MW 5 */ + 14050 "00000000" // /* MW 4 */ + 14051 "00011000" // /* MW 3 */ + 14052 "00010010" // /* MW 2 */ + 14053 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14055 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14057 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14059 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14061 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b906_wrapperPPv__end +.label __Z13_b906_wrapperPPv___func_end0 + 14063 "00000000" // /* MW 1 */ +.dir 0 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable0/src" +.dir 1 "/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer" +.dir 2 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/conv" +.dir 3 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common" +.dir 4 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2" +.dir 5 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p" +.dir 6 "/usr/local/lib/python3.10/dist-packages/data/aie2p/lib" +.dir 7 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend" +.dir 8 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc" +.dir 9 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf" diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3_reloadable0/Release/0_3_reloadable0.cmico b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3_reloadable0/Release/0_3_reloadable0.cmico new file mode 100644 index 0000000000000000000000000000000000000000..f377058758269f564988080a1597f499edc1b997 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3_reloadable0/Release/0_3_reloadable0.cmico @@ -0,0 +1 @@ ++Mdec diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3_reloadable0/Release/0_3_reloadable0.lst b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3_reloadable0/Release/0_3_reloadable0.lst new file mode 100644 index 0000000000000000000000000000000000000000..ca5a46b8ce16faa30431f6ad067a245af6add294 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3_reloadable0/Release/0_3_reloadable0.lst @@ -0,0 +1,4879 @@ + +// File generated by darts version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:44:07 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// darts -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -d -h -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable0 me + +// Release: ipp V-2024.06-TGT-241219 + +.text_segment PM 2352 +.entry_point +.label __Z13kernelWrapperPPvjjjj___func_begin0 +.label _Z13kernelWrapperPPvjjjj +.function_start + 2352 0x00 0xc2 0xd0 0xe9 0xe0 0x2c LDA r16, [p0]; NEZ r26, r1 + 2358 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 2364 0x0f 0xef 0x1d 0x98 ST p6, [sp, #-20] + 2368 0xfe 0x3a 0xb0 0x01 0xc8 0xd0 0x70 0x02 ST r14, [sp, #-16]; MOV r14, r3 + 2376 0xff 0x3e 0xb0 0x01 0xe8 0x50 0x70 0x02 ST r15, [sp, #-8]; MOV r15, r1 + 2384 0x0f 0xf7 0x9d 0x98 ST p7, [sp, #-12] + 2388 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] + 2392 0x1e 0x68 0x02 0x18 ADD.NC p6, r16, #4 + 2396 0x06 0x1e 0x16 0x98 LDA r16, [p6], #4 + 2400 0x06 0x3e 0x56 0x98 LDA r18, [p6], #12 + 2404 0x06 0xee 0x36 0x98 LDA r17, [p6], #-8 + 2408 0x06 0x07 0x76 0x98 LDA r27, [p6] + 2412 0x00 0x00 NOPX + 2414 0x00 0x00 NOPX + 2416 0x00 0x00 NOPX + 2418 0x00 0x00 NOPX + 2420 0x00 0x00 NOPX + 2422 0x00 0x00 NOPX + 2424 0x14 0x21 0x22 0x18 SEL.EQZ r16, r16, r18, r27 + 2428 0x0e 0xd6 0x11 0x98 ST r16, [p6, #-12] + 2432 0xfc 0x1f 0xa0 0x35 0x39 0xe4 MOVX r16, #-1; MOV el0, r26 + 2438 0x00 0x00 NOPX + 2440 0x00 0x00 NOPX + 2442 0x00 0x00 NOPX + 2444 0x14 0x57 0x08 0x18 ACQ.COND r17, r16, r26 + 2448 0x04 0x41 0x29 0xa0 0x05 0x64 MOVX r17, #2; MOV r19, #1 + 2454 0xd5 0x23 0xb9 0x21 0x81 0xe4 LSHL r20, r26, r17; MOV r18, p0 + 2460 0x9c 0x9f 0x9c 0xd2 0xa2 0xa4 LTU r18, r19, r15; ADD.NC p6, r18, r20 + 2466 0xc0 0xd2 0xd7 0xe6 0x95 0x82 0x6e 0x60 0x72 0xba LDA r20, [p6]; ST r20, [sp, #-28]; MOV r19, p6 + 2476 0xfd 0x4a 0xb0 0x03 0x4c 0x90 0x70 0x02 ST r18, [sp, #-24]; MOV r26, r18 + 2484 0x00 0x00 NOPX + 2486 0x00 0x00 NOPX + 2488 0x00 0x00 NOPX + 2490 0x00 0x00 NOPX + 2492 0x00 0x00 NOPX + 2494 0x1e 0x6a 0x02 0x18 ADD.NC p6, r20, #4 + 2498 0x06 0x1e 0x96 0x98 LDA r20, [p6], #4 + 2502 0x06 0x3e 0xd6 0x98 LDA r22, [p6], #12 + 2506 0x06 0xee 0xb6 0x98 LDA r21, [p6], #-8 + 2510 0x06 0x07 0x76 0x98 LDA r27, [p6] + 2514 0x00 0x00 NOPX + 2516 0x00 0x00 NOPX + 2518 0x00 0x00 NOPX + 2520 0x00 0x00 NOPX + 2522 0x00 0x00 NOPX + 2524 0x00 0x00 NOPX + 2526 0x15 0x29 0x62 0x18 SEL.EQZ r20, r20, r22, r27 + 2530 0x0e 0xd6 0x91 0x98 ST r20, [p6, #-12] + 2534 0x00 0x00 NOPX + 2536 0x00 0x00 NOPX + 2538 0x00 0x00 NOPX + 2540 0x00 0x00 NOPX + 2542 0x15 0x57 0x08 0x18 ACQ.COND r21, r16, r26 + 2546 0x14 0xa5 0x1d 0x98 LSHL r18, r18, r17 + 2550 0x14 0xa3 0xb9 0xb3 0x92 0xa4 LSHL r18, r2, r17; ADD.NC r19, r19, r18 + 2556 0x76 0x9e 0x0c 0xd3 0x92 0xa4 NEZ r26, r14; ADD.NC p6, r19, r18 + 2562 0xc0 0xca 0xdf 0xc6 0xab 0x0c LDA r18, [p6]; ST r26, [sp, #-32] + 2568 0x00 0x00 NOPX + 2570 0x00 0x00 NOPX + 2572 0x00 0x00 NOPX + 2574 0x00 0x00 NOPX + 2576 0x00 0x00 NOPX + 2578 0x00 0x00 NOPX + 2580 0x1f 0x69 0x02 0x18 ADD.NC p7, r18, #4 + 2584 0x07 0x3e 0x76 0x98 LDA r19, [p7], #12 + 2588 0x07 0xee 0x56 0x98 LDA r18, [p7], #-8 + 2592 0x07 0x1e 0x96 0x98 LDA r20, [p7], #4 + 2596 0x07 0x07 0x76 0x98 LDA r27, [p7] + 2600 0x00 0x00 NOPX + 2602 0x00 0x00 NOPX + 2604 0x00 0x00 NOPX + 2606 0x00 0x00 NOPX + 2608 0x00 0x00 NOPX + 2610 0x00 0x00 NOPX + 2612 0x14 0xe7 0x42 0x18 SEL.EQZ r19, r19, r20, r27 + 2616 0x0f 0xd6 0x71 0x98 ST r19, [p7, #-12] + 2620 0x00 0x00 NOPX + 2622 0x00 0x00 NOPX + 2624 0x00 0x00 NOPX + 2626 0x00 0x00 NOPX + 2628 0x14 0x97 0x08 0x18 ACQ.COND r18, r16, r26 + 2632 0x10 0x21 0x1d 0x98 LSHL r16, r0, r17 + 2636 0x18 0x88 0x20 0xf8 MOV dj0, r16 + 2640 0x00 0x07 0xce 0xc9 0x00 0x44 MOVXM p7, #509056 + 2646 0xe0 0x13 0xdf 0xb8 0x5b 0x0c LDA p1, [p7, dj0]; ST el0, [sp, #-36] + 2652 0x00 0x00 NOPX + 2654 0x00 0x00 NOPX + 2656 0x00 0x00 NOPX + 2658 0x00 0x00 NOPX + 2660 0x00 0x00 NOPX + 2662 0x00 0x00 NOPX +.no_stack_arguments + 2664 0x10 0x30 0x40 0x18 JL p1 +.delay_slot + 2668 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.delay_slot +.swstall delay_slot + 2672 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2674 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2676 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 2678 0x00 0x2c 0xf0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba NOPA; NOPB; NOPM +.return_address + 2688 0xe0 0xc6 0xd0 0x40 0x0a 0x2c LDA r17, [p7]; MOVX r16, #1 + 2694 0x07 0xdf 0x51 0x18 LDA r26, [sp, #-36] + 2698 0x07 0xe4 0x41 0x18 LDA dj0, [sp, #-28] + 2702 0x07 0xe8 0x29 0x18 LDA el0, [sp, #-24] + 2706 0x07 0xe0 0x09 0x18 LDA eh0, [sp, #-32] + 2710 0x00 0x00 NOPX + 2712 0x00 0x00 NOPX + 2714 0x18 0x68 0x88 0x18 ADD.NC p0, r17, #16 + 2718 0x00 0x06 0x36 0x98 LDA r17, [p0] + 2722 0x00 0x00 NOPX + 2724 0x00 0x00 NOPX + 2726 0x00 0x00 NOPX + 2728 0x00 0x00 NOPX + 2730 0x00 0x00 NOPX + 2732 0x00 0x00 NOPX + 2734 0x14 0x55 0x08 0x18 REL.COND r17, r16, r26 + 2738 0x1e 0xc6 0xdd 0xaf 0x41 0xd4 LDA r17, [p0, #-4]; MOV r27, r15 + 2744 0xe0 0x4a 0xdd 0x40 0x39 0xd4 LDA r18, [p7, dj0]; MOV r26, el0 + 2750 0x00 0x00 NOPX + 2752 0x00 0x00 NOPX + 2754 0x00 0x00 NOPX + 2756 0x00 0x00 NOPX + 2758 0x00 0x00 NOPX + 2760 0x14 0x27 0x11 0x98 SUB r19, r16, r17 + 2764 0x8c 0x66 0x4e 0xd2 0x10 0x24 SEL.EQZ r17, r17, r19, r27; ADD.NC p7, r18, #16 + 2770 0xe0 0xc6 0xd1 0xec 0x63 0x0c LDA r17, [p7]; ST r17, [p0, #-4] + 2776 0x00 0x00 NOPX + 2778 0x00 0x00 NOPX + 2780 0x00 0x00 NOPX + 2782 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 2784 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 2786 0x1e 0xa1 0x1c 0xf8 MOV r26, eh0 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2790 0x14 0x55 0x08 0x18 REL.COND r17, r16, r26 + 2794 0xfe 0xc6 0xdd 0xc0 0x39 0xd4 LDA r17, [p7, #-4]; MOV r27, el0 + 2800 0x06 0x06 0x56 0x98 LDA r18, [p6] + 2804 0x00 0x00 NOPX + 2806 0x00 0x00 NOPX + 2808 0x00 0x00 NOPX + 2810 0x00 0x00 NOPX + 2812 0x00 0x00 NOPX + 2814 0x14 0x27 0x11 0x98 SUB r19, r16, r17 + 2818 0x8c 0x66 0x40 0xd2 0x14 0x24 SEL.EQZ r17, r17, r19, r27; ADD.NC p0, r18, #20 + 2824 0x00 0xc6 0xdf 0xec 0x63 0x0c LDA r17, [p0]; ST r17, [p7, #-4] + 2830 0x00 0x00 NOPX + 2832 0x00 0x00 NOPX + 2834 0x00 0x00 NOPX + 2836 0x00 0x00 NOPX + 2838 0x00 0x00 NOPX + 2840 0x00 0x00 NOPX + 2842 0x14 0x55 0x08 0x18 REL.COND r17, r16, r26 + 2846 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] + 2850 0x00 0xe6 0x36 0x98 LDA r17, [p0, #-8] + 2854 0x07 0xef 0x19 0x18 LDA p6, [sp, #-20] +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 2858 0x07 0xf7 0x99 0x18 LDA p7, [sp, #-12] +.aggressive_scheduled_block_id 2 +.noswbrkpt + 2862 0x07 0xf1 0xd1 0x18 LDA r14, [sp, #-16] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 2866 0x07 0xf9 0xf1 0x18 LDA r15, [sp, #-8] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 2870 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 2876 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 2880 0x14 0x21 0x11 0x98 SUB r16, r16, r17 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 2884 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2886 0x1e 0xd7 0x20 0xf8 MOV r27, r14 +.delay_slot + 2890 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 +.delay_slot + 2894 0x08 0xe6 0x11 0x98 ST r16, [p0, #-8] +.label _Z13kernelWrapperPPvjjjj__end +.label __Z13kernelWrapperPPvjjjj___func_end0 + +.text_segment PM 2912 +.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_begin0 +.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh +.function_start + 2912 0x03 0x85 0xd0 0x00 0x40 0x88 0x49 0x60 0x78 0xba LDA el0, [p0], #4; MOVX r4, #4; MOV r2, p1 + 2922 0x03 0x81 0xd0 0x3e 0x57 0xe9 0x30 0x82 0x48 0xba LDA eh0, [p0], #4; MOVX r5, #-1; ADD.NC p2, r2, #9 + 2932 0xff 0x81 0x00 0x00 0x02 0x00 0x00 0x00 0x70 0xba MOVA r1, #-4; PADDXM [sp], #64 + 2942 0x01 0x86 0x07 0xfd 0xb5 0x81 0x00 0x28 0x00 0x10 0x58 0x76 MOVA r6, #12; ST r13, [sp, #-4]; MOVX r16, #1; MOV m0, #16 + 2954 0x00 0x63 0x07 0xf9 0xd5 0xbf 0x57 0xaa 0x88 0x0f 0x58 0x76 MOVA r3, #3; ST r14, [sp, #-8]; MOVX r21, #-3; MOV r20, #15 + 2966 0xfe 0xbe 0xb0 0x60 0x02 0x5c ST r15, [sp, #-12]; MOVX r24, #0 + 2972 0x00 0x00 NOPX + 2974 0x09 0x1c 0x29 0x98 ST el0, [p1], #4 + 2978 0x09 0x1c 0x09 0x98 ST eh0, [p1], #4 + 2982 0x00 0x1c 0x2e 0x98 LDA el0, [p0], #4 + 2986 0x00 0x1c 0x0e 0x98 LDA eh0, [p0], #4 + 2990 0x00 0x00 NOPX + 2992 0x00 0x00 NOPX + 2994 0x00 0x00 NOPX + 2996 0x00 0x00 NOPX + 2998 0x00 0x00 NOPX + 3000 0x09 0x1c 0x29 0x98 ST el0, [p1], #4 + 3004 0x09 0x1c 0x09 0x98 ST eh0, [p1], #4 + 3008 0x00 0x1c 0x2e 0x98 LDA el0, [p0], #4 + 3012 0x00 0x1c 0x0e 0x98 LDA eh0, [p0], #4 + 3016 0x00 0x00 NOPX + 3018 0x00 0x00 NOPX + 3020 0x00 0x00 NOPX + 3022 0x00 0x00 NOPX + 3024 0x00 0x00 NOPX + 3026 0x09 0x1c 0x29 0x98 ST el0, [p1], #4 + 3030 0x09 0x1c 0x09 0x98 ST eh0, [p1], #4 + 3034 0x00 0x04 0x0e 0x98 LDA eh0, [p0] + 3038 0x00 0x14 0x2e 0x98 LDA el0, [p0, #4] + 3042 0x00 0x00 NOPX + 3044 0x00 0x00 NOPX + 3046 0x00 0x00 NOPX + 3048 0x00 0x00 NOPX + 3050 0x00 0x00 NOPX + 3052 0x09 0x04 0x09 0x98 ST eh0, [p1] + 3056 0x09 0x14 0x29 0x98 ST el0, [p1, #4] + 3060 0x02 0xdd 0xaa 0x98 LDA.u8 r13, [p2], #-3 + 3064 0x02 0x1e 0x2a 0x98 LDA.u8 r17, [p2], #1 + 3068 0x02 0xbd 0xca 0x98 LDA.u8 r14, [p2], #-5 + 3072 0x02 0xfd 0xfa 0x98 LDA.u16 r15, [p2], #-2 + 3076 0x02 0x0a 0x6a 0x98 LDA.u8 r19, [p2], m0 + 3080 0x02 0xac 0xea 0x98 LDA.u8 r7, [p2], #-6 + 3084 0x00 0x00 NOPX + 3086 0x13 0x42 0x1d 0x98 LSHL r1, r13, r1 + 3090 0x0c 0x20 0xf9 0x31 0x01 0x24 EQ r16, r1, r16; ADD.NC r18, r17, #1 + 3096 0x14 0xa4 0x5d 0x98 LSHL r18, r18, r5 + 3100 0x13 0xf6 0x47 0x98 EQ r27, r15, r4 + 3104 0xc1 0x4a 0x40 0xb7 0x39 0xe4 SEL.EQZ r5, r24, r5, r27; MOV eh0, r27 + 3110 0x14 0x7b 0x22 0x18 SEL.EQZ r29, r17, r18, r27 + 3114 0x11 0xcc 0x67 0x98 EQ r6, r7, r6 + 3118 0x11 0xb7 0x04 0x98 AND r27, r6, r16 + 3122 0x7b 0xeb 0xbc 0xbb 0x41 0xe4 LSHL r15, r15, r21; MOV r25, r27 + 3128 0xfd 0xbe 0xb3 0x9b 0x04 0x5c ST r15, [sp, #-20]; SEL.EQZ r6, r7, r24, r27 + 3134 0xc8 0x06 0x48 0x40 0x01 0x84 JNZ r25, #3216 +.delay_slot + 3140 0x11 0xb6 0x47 0x98 EQ r27, r6, r4 +.delay_slot + 3144 0x13 0x71 0x44 0x98 AND r24, r13, r20 +.delay_slot + 3148 0x14 0xfc 0x5d 0x98 LSHL r30, r19, r5 +.delay_slot + 3152 0x16 0xe8 0x4d 0x98 LSHL r20, r27, r4 +.delay_slot + 3156 0x11 0x8c 0x32 0x18 SEL.EQZ r6, r6, r3, r27 + 3160 0xd8 0x06 0x48 0x40 0x01 0x84 JNZ r27, #3216 +.delay_slot +.swstall delay_slot + 3166 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3168 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3170 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3172 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3174 0x00 0x00 NOPX + 3176 0x00 0x2f 0x00 0x00 0x01 0x98 0x00 0x00 0x20 0xba MOVA r15, #1; J #3264 +.delay_slot + 3186 0x00 0x1a 0x00 0x3e 0x57 0xab 0x88 0x0c 0x58 0xba MOVA r26, #0; MOVX r5, #-3; MOV r28, #12 +.delay_slot + 3196 0x05 0x42 0x21 0x20 0x41 0x64 MOVX r21, #4; MOV r2, #16 +.delay_slot + 3202 0x10 0x1a 0x0d 0x18 MOVX r13, #3 +.delay_slot + 3206 0x10 0x0e 0x3d 0x18 MOVX r7, #15 +.delay_slot + 3210 0x00 0x2c 0xff 0x91 0xe2 0x2c NOPA; MOVX r4, #-4 +.label __ll6__Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh + 3216 0x02 0x02 0x81 0x82 0x0b 0x01 0x50 0x88 0x8f 0xfc 0x58 0x76 MOVA dj0, #16; MOVS p1, r2; MOVX r21, #4; MOV r4, #-4 + 3228 0x20 0x18 0xe0 0x01 0xa0 0x0b 0x88 0x0c 0x58 0xba ST.s8 r6, [p1, dj0]; MOVX r26, #0; MOV r28, #12 + 3238 0x02 0x02 0x00 0x3e 0x57 0xa9 0xe8 0x01 0x58 0xba MOVA r2, #16; MOVX r5, #-3; MOV r15, #1 + 3248 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x71 0xe9 0xa8 0x03 0x58 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVX r7, #15; MOV r13, #3; NOPV +.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_352 + 3264 0x5d 0xc5 0x50 0x1b 0xb3 0x3c 0x00 0x3c 0x58 0xba LDA.u8 r17, [p2], #-2; EQ r27, r13, r6; MOV m0, #60 + 3274 0x41 0x05 0x50 0x03 0x2d 0x12 0x87 0xcd 0x58 0xba LDA.u8 r1, [p2], m0; SEL.EQZ r18, r1, r26, r27; MOV m5, #-51 + 3284 0x00 0x57 0x00 0x3b 0xda 0x91 0x80 0x37 0x58 0xba MOVA r23, #2; SEL.EQZ r29, r29, r21, r27; MOV m3, #55 + 3294 0x01 0x03 0x00 0x2b 0xb0 0x3d 0x07 0xbc 0x58 0xba MOVA r3, #8; EQ r27, r21, r0; MOV m2, #-68 + 3304 0x40 0x10 0x00 0x1f 0x6c 0x6c 0x80 0x70 0x58 0xba MOVA r16, #512; LSHL r22, r15, r24; MOV m1, #112 + 3314 0xb5 0x92 0x08 0x1e 0x5d 0x64 EXTEND.u8 r22, r22; MOV m4, #-105 + 3320 0xfe 0x5a 0xb0 0x2d 0x61 0x6f 0x80 0x31 0x59 0x3a ST r22, [sp, #-16]; LSHL r22, r22, r2; MOV m7, #49 + 3330 0xf7 0xba 0x3c 0x1f 0x05 0x64 SUB r30, r30, r29; MOV m6, #-63 + 3336 0x13 0xc2 0x11 0x98 SUB r1, r15, r1 + 3340 0x8f 0xc3 0xf0 0xa0 0x1d 0x64 MUL r31, r17, r1; MOV r1, #7 + 3346 0x16 0xa3 0x21 0x98 SUB r17, r26, r18 + 3350 0x17 0xfe 0x1d 0x98 LSHL r31, r31, r1 + 3354 0x55 0x7e 0x30 0x3b 0xf1 0xee 0x80 0x57 0x59 0x3a ST r31, [p2], m5; LSHL r31, r29, r3; MOV m5, #87 + 3364 0x4d 0x55 0x50 0x2f 0x30 0x3d 0x87 0xb2 0x58 0xba LDA.u8 r21, [p2], m3; EQ r19, r23, r0; MOV m3, #-78 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3374 0xfd 0x4e 0xb9 0xcc 0x7b 0x5c ST r19, [sp, #-24]; LSHL r19, r19, r3 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3380 0x49 0x54 0xe0 0x3f 0x6b 0x2d 0x00 0xf6 0x58 0xba ST.s8 r21, [p2], m2; OR r22, r31, r22; MOV m2, #246 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3390 0x45 0x43 0x50 0x27 0x38 0x10 0x87 0x50 0x58 0xba LDA.u16 r16, [p2], m1; SEL.EQZ r19, r19, r16, r27; MOV m1, #-176 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3400 0x15 0xfe 0x67 0x98 EQ r31, r23, r6 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3404 0x10 0xe0 0x67 0x98 EQ r16, r3, r6 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3408 0x17 0xf7 0x05 0x98 OR r27, r31, r16 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3412 0x11 0xeb 0x54 0x98 AND r21, r7, r21 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3416 0xf7 0xa3 0xd8 0xa0 0x61 0x64 ASHL r30, r30, r17; MOV r17, #24 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3422 0xfc 0x42 0xb0 0x1f 0x29 0x6f 0xcf 0x80 0x49 0x3a ST r16, [sp, #-32]; LSHL r18, r15, r18; ADD.NC r30, r30, #1 + 3432 0x43 0xea 0x3f 0x46 0x3b 0x5c ST r26, [p2], #4; LSHL r17, r30, r17 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 3438 0x51 0x6a 0x30 0x02 0x00 0xa8 0x50 0x02 ST r26, [p2], m4; MOV m4, #168 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 3446 0x5d 0x49 0x57 0xe7 0xf5 0xa7 0xb0 0x2c 0x0d 0xce 0x78 0x76 LDA.u8 r18, [p2], m7; ST r31, [sp, #-28]; OR r27, r19, r0; MOV el0, r27 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3458 0x16 0xe3 0x15 0x98 OR r17, r27, r17 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3462 0x16 0xb7 0x81 0x98 SUB r27, r26, r24 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3466 0x14 0xb0 0x90 0x18 EXTEND.u8 r24, r18 + 3470 0x00 0x00 NOPX + 3472 0x00 0x00 NOPX + 3474 0x00 0x00 NOPX + 3476 0x13 0xe5 0x21 0x98 SUB r18, r15, r18 + 3480 0x0a 0xca 0x51 0x98 ST r18, [p2], m6 + 3484 0x02 0xaa 0x4a 0x98 LDA.u8 r18, [p2], m5 + 3488 0x00 0x00 NOPX + 3490 0x00 0x00 NOPX + 3492 0x00 0x00 NOPX + 3494 0x00 0x00 NOPX + 3496 0x00 0x00 NOPX + 3498 0x00 0x00 NOPX + 3500 0x14 0xa4 0xe1 0x98 SUB r18, r18, r14 + 3504 0x14 0xa5 0xbe 0x98 ASHL r18, r18, r27 + 3508 0x14 0xa4 0x2d 0x98 LSHL r18, r18, r2 + 3512 0x00 0x01 0x0d 0xa0 0x00 0x44 MOVXM r27, #65536 + 3518 0x16 0xe5 0x20 0x98 ADD r18, r27, r18 + 3522 0x00 0xff 0x0d 0xa0 0x00 0x44 MOVXM r27, #16711680 + 3528 0xde 0xe4 0x99 0x3f 0xc1 0x64 AND r27, r27, r18; MOV r18, #-16 + 3534 0xde 0xe2 0xb8 0xbf 0xe1 0x64 OR r27, r27, r17; MOV r17, #-8 + 3540 0x43 0xee 0x39 0xce 0x3b 0x5c ST r27, [p2], #4; LSHL r19, r19, r17 + 3546 0x16 0xb5 0x31 0x98 SUB r26, r26, r19 + 3550 0x15 0x29 0xad 0x98 LSHL r20, r20, r26 + 3554 0x13 0xb5 0x65 0x98 OR r26, r14, r22 + 3558 0x4d 0x6a 0x3f 0x69 0x20 0x5c ST r26, [p2], m3; EXTEND.u8 r26, r30 + 3564 0x49 0x65 0x50 0x37 0x49 0x6f 0xce 0xa8 0xa8 0xba LDA.u8 r25, [p2], m2; LSHL r20, r27, r18; ADD.NC r30, r26, r20 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 3574 0xb5 0xa3 0xb8 0xa3 0xf9 0x64 LSHL r22, r22, r17; MOV r17, #254 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 3580 0x45 0x64 0xed 0x6b 0x1f 0x2c ST.s8 r25, [p2], m1; MUL r26, r26, r24 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3586 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3588 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3590 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3592 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3594 0x86 0x5f 0xbd 0xb5 0xca 0xa4 LSHL r25, r16, r15; ADD.NC r27, r21, r25 + 3600 0xf8 0x07 0x48 0x40 0x01 0x84 JNZ r31, #3728 +.delay_slot + 3606 0x9d 0x41 0xed 0xbb 0xf2 0xa4 ADD r21, r19, #3; ADD.NC r27, r27, r30 +.delay_slot + 3612 0x16 0xeb 0x5d 0x98 LSHL r21, r27, r21 +.delay_slot + 3616 0x16 0x63 0x14 0x98 AND r17, r25, r17 +.delay_slot + 3620 0x51 0x46 0x30 0x0d 0xbe 0x3e 0x28 0x01 0x59 0x3a ST r17, [p2], m4; EQ r27, r6, r28; MOV r17, #1 +.delay_slot + 3630 0x18 0x9b 0x9c 0xf8 MOV el1, r27 + 3634 0x07 0xe3 0x91 0x18 LDA r28, [sp, #-32] + 3638 0x00 0x00 NOPX + 3640 0x00 0x00 NOPX + 3642 0x00 0x00 NOPX + 3644 0x00 0x00 NOPX + 3646 0x00 0x00 NOPX + 3648 0x00 0x00 NOPX + 3650 0xe0 0x07 0x48 0x40 0x01 0x84 JNZ r28, #3728 +.delay_slot +.swstall delay_slot + 3656 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3658 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3660 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3662 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 3664 0x00 0x00 NOPX + 3666 0x00 0xbc 0x00 0x01 0x10 0x8b 0x28 0x40 0x58 0xba MOVA r28, #5; MOVX r17, #4; MOV r25, #64 + 3676 0x14 0x7e 0xd2 0x18 SEL.EQZ r31, r17, r13, r27 + 3680 0x16 0x76 0x67 0x98 EQ r27, r25, r6 + 3684 0xff 0x38 0x4f 0xa0 0x01 0x64 SEL.EQZ r28, r31, r28, r27; MOV r31, #0 + 3690 0x10 0x32 0x50 0x18 EXTEND.s8 r25, r0 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 3694 0x16 0x7d 0xef 0x98 MUL r30, r25, r30 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 3698 0xce 0xe3 0x5d 0xc4 0x39 0xe4 LT r27, r25, r17; MOV r27, el1 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3704 0x13 0xe3 0x82 0x18 SEL.EQZ r17, r15, r24, r27 + 3708 0x14 0x63 0xef 0x98 MUL r17, r17, r30 + 3712 0x17 0xf9 0xc1 0x98 SUB r28, r31, r28 + 3716 0x14 0x63 0xce 0x98 ASHL r17, r17, r28 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 3720 0x00 0x23 0x14 0x81 0x00 0x00 0x1c 0x22 EXTEND.u8 r17, r17; NOPV +.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_816 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 3728 0x00 0x19 0x00 0x3f 0xc7 0xeb 0x70 0x0e 0x78 0xba MOVA r25, #0; MOVX r28, #-1; MOV r27, el0 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3738 0x16 0x7f 0xc2 0x18 SEL.EQZ r31, r25, r28, r27 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 3742 0xfd 0x6e 0x20 0x21 0x04 0x83 0x4f 0x74 0xa8 0xba LDA r27, [sp, #-24]; EXTEND.u8 r16, r16; ADD.NC r26, r29, r26 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3752 0x09 0x1e 0x00 0x29 0x44 0x83 0xa8 0x09 0x58 0xba MOVA r30, #72; EXTEND.u8 r20, r20; MOV r29, #9 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3762 0x17 0x73 0xe2 0x18 SEL.EQZ r25, r29, r30, r27 + 3766 0x15 0xf9 0x88 0x98 NE r28, r23, r24 +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 3770 0x17 0x7b 0x3d 0x98 LSHL r29, r29, r19 +.aggressive_scheduled_block_id 6 +.noswbrkpt + 3774 0xfd 0xde 0x20 0x00 0x00 0x03 0x0a 0x04 0x10 0xba LDA r23, [sp, #-20]; MOVXM r24, #1032 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 3784 0xcc 0xe7 0xbf 0x3a 0xff 0x24 LSHL r19, r25, r19; ADD.NC r30, r26, #-1 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 3790 0x14 0xcf 0xe6 0x18 MAC r7, r7, r19, r30 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 3794 0xb4 0xd2 0x0b 0xa8 0x29 0x64 EXTEND.u8 r19, r22; MOV r23, #522 +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3800 0xbd 0xb0 0x4d 0x21 0x01 0x64 SEL.EQZ r22, r23, r24, r27; MOV r26, #64 + 3806 0x31 0xb5 0x1d 0xc2 0x39 0xe4 NE r6, r6, r26; MOV r27, eh0 + 3812 0x11 0xcf 0x24 0x98 AND r7, r7, r18 + 3816 0xbd 0xde 0x4d 0xa6 0x41 0xe4 SEL.EQZ r23, r23, r15, r27; MOV r27, r6 + 3822 0x29 0x08 0x49 0x20 0x7d 0x64 SEL.EQZ r4, r5, r4, r27; MOV r18, #31 + 3828 0x15 0xef 0x24 0x98 AND r23, r23, r18 + 3832 0xdc 0x1e 0x00 0x20 0x42 0x6e 0x4f 0x70 0x58 0xba MOVA r30, #-288; LSHL r4, r16, r4; MOV r18, #-144 + 3842 0x17 0xbd 0x22 0x18 SEL.EQZ r30, r30, r18, r27 + 3846 0x12 0x05 0x00 0x2f 0xa9 0xfe 0x09 0x20 0x58 0xba MOVA r5, #144; MUL r26, r23, r19; MOV r16, #288 + 3856 0x14 0x20 0x52 0x18 SEL.EQZ r16, r16, r5, r27 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 3860 0x8e 0x09 0xfd 0xbd 0xf2 0xa4 MUL r24, r17, r4; ADD.NC r27, r29, r30 +.aggressive_scheduled_block_id 7 +.noswbrkpt + 3866 0x84 0x3f 0xbd 0xc4 0x39 0xe4 LSHL r16, r16, r31; MOV r27, el1 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3872 0xfb 0xee 0xb7 0x6b 0x5f 0x5c ST r27, [sp, #-36]; MUL r26, r14, r26 + 3878 0x10 0x84 0x32 0x18 SEL.EQZ r2, r2, r3, r27 + 3882 0x13 0x7e 0x0c 0x98 LTU r31, r13, r0 + 3886 0x15 0x31 0x8f 0x98 MUL r24, r20, r24 + 3890 0x17 0xf7 0xc5 0x98 OR r27, r31, r28 + 3894 0x10 0x03 0x07 0xee 0x95 0xb7 0xc0 0xee 0x89 0x00 0x58 0x76 MOVA r3, #128; ST r20, [sp, #-20]; LSHL r28, r27, r1; MOV r20, #256 + 3906 0x1d 0x28 0x40 0xb7 0x39 0xe4 SEL.EQZ r20, r3, r20, r27; MOV eh0, r27 + 3912 0x00 0x00 0x0f 0xac 0x0c 0x44 MOVXM r31, #1542 + 3918 0xfd 0x12 0xb0 0x1f 0xb0 0x3c 0x89 0x3f 0xc9 0x3a ST r4, [sp, #-24]; EQ r27, r15, r0; ADD.NC r4, r4, #-1 + 3928 0xed 0x8c 0x82 0x1c 0x91 0xad 0xff 0x92 0xcc 0x7f 0xc8 0x76 MOVA m3, #-148; ST r4, [p2], #4; SEL.EQZ r31, r22, r31, r27; ADD.NC r22, r17, #-1 + 3940 0x4d 0x5a 0x30 0x2b 0x57 0xef 0x70 0x8e 0x79 0x3a ST r22, [p2], m3; LSHL r21, r21, r15; MOV r27, eh0 + 3950 0x02 0xd9 0x02 0x1f 0x51 0xa9 0x4e 0x0e 0x00 0x58 0x58 0x76 MOVA r25, #22; ST r26, [p2], #4; SUB r20, r20, r28; MOV m4, #88 +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 3962 0x51 0x62 0x3f 0xe3 0x24 0x5c ST r24, [p2], m4; SEL.EQZ r24, r31, r25, r27 +.aggressive_scheduled_block_id 8 +.noswbrkpt + 3968 0xfc 0x6e 0x22 0xef 0x91 0xab 0xce 0x0f 0x69 0x90 0x78 0x76 LDA r27, [sp, #-32]; ST r28, [p2], #-8; SUB r28, r21, r28; MOV r27, r6 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3980 0x22 0xf3 0x00 0x29 0xce 0x12 0x8c 0xff 0xc8 0xba MOVA r19, #279; SEL.EQZ r28, r20, r28, r27; ADD.NC r20, r19, #-1 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3990 0xf7 0x1d 0x00 0x3b 0xea 0x73 0x70 0x8e 0x78 0xba MOVA r29, #-72; MSC r30, r30, r29, r20; MOV r27, eh0 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 4000 0xfc 0xee 0x2e 0xca 0x44 0x2c LDA r27, [sp, #-28]; SEL.EQZ r18, r29, r18, r27 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 4006 0x04 0x1f 0x00 0x3f 0x39 0x93 0x69 0x90 0x78 0xba MOVA r31, #32; SEL.EQZ r19, r31, r19, r27; MOV r27, r6 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 4016 0x17 0xc4 0x22 0x18 SEL.EQZ r2, r31, r2, r27 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 4020 0x10 0xeb 0x51 0x98 SUB r21, r3, r21 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 4024 0x47 0x8e 0x30 0x04 0x27 0x90 0x6f 0xc0 0x59 0x3a ST r3, [p2], #12; SEL.EQZ r2, r2, r15, r27; MOV r3, #-64 +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4034 0x17 0x38 0x32 0x18 SEL.EQZ r28, r28, r3, r27 +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id first + 4038 0x15 0xfe 0x7f 0x98 MUL r31, r23, r7 +.aggressive_scheduled_block_id 9 +.noswbrkpt + 4042 0xfb 0xc6 0x2e 0x0c 0x64 0x2c LDA r17, [sp, #-36]; SEL.EQZ r3, r28, r3, r27 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 4048 0x5d 0x8e 0x30 0x35 0x29 0x7c 0x80 0x28 0x59 0x3a ST r3, [p2], #-8; MUL r18, r26, r18; MOV m1, #40 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 4058 0x45 0x56 0x31 0x0d 0xe4 0x5c ST r21, [p2], m1; SEL.EQZ r3, r2, r15, r27 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 4064 0xfe 0x02 0x20 0x06 0x38 0xfe 0xa9 0xfc 0xa8 0xba LDA r0, [sp, #-16]; MUL r3, r3, r17; ADD.NC r21, r7, r30 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 4074 0xfd 0xc6 0x22 0x1f 0x11 0x80 0x05 0x06 0x06 0x7a LDA r17, [sp, #-20]; ST r24, [p2], #4; MAC r3, r3, r20, r0 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 4084 0x4f 0xce 0x30 0x00 0x00 0x3e 0x6f 0xf8 0x11 0x3a ST r19, [p2], #28; MOVXM r19, #65520 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 4094 0x43 0xd2 0x30 0x3f 0x49 0xa4 0x4b 0xbf 0xc9 0x3a ST r20, [p2], #4; AND r20, r31, r19; ADD.NC r2, r14, #-1 +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4104 0x43 0xc6 0x31 0x56 0x9c 0x5c ST r17, [p2], #4; MSC r21, r21, r2, r20 + 4110 0x43 0x8a 0x3f 0x7a 0x81 0x5c ST r2, [p2], #4; ADD r30, r30, r20 +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id first + 4116 0x43 0xfa 0x38 0x73 0xe3 0x5c ST r30, [p2], #4; SUB r28, r16, r31 +.aggressive_scheduled_block_id 10 +.noswbrkpt + 4122 0x43 0xd6 0x30 0x2d 0xf8 0x30 0x60 0x00 0x59 0x3a ST r21, [p2], #4; MAC r31, r31, r22, r16; MOV dc0, #0 +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4132 0x43 0x8c 0x30 0x3e 0x20 0x7e 0x2c 0x7f 0xc9 0x3a ST dc0, [p2], #4; MUL r2, r31, r0; ADD.NC r17, r17, #-1 + 4142 0x43 0x8c 0x38 0xb8 0x0c 0x5c ST dc0, [p2], #4; MAC r14, r14, r17, r0 + 4148 0x43 0xda 0x30 0x27 0x01 0x24 0x48 0x00 0x59 0x3a ST r22, [p2], #4; AND r16, r19, r2; MOV r2, #0 + 4158 0x43 0xf2 0x30 0x05 0x1f 0x8f 0x70 0x0e 0x79 0x3a ST r28, [p2], #4; SUB r17, r2, r31; MOV r27, el0 + 4168 0x43 0x92 0x3b 0xb9 0xdf 0x5c ST r4, [p2], #4; MUL r14, r23, r14 + 4174 0x43 0xc6 0x30 0x21 0x0f 0x8c 0x08 0x06 0x59 0x3a ST r17, [p2], #4; SUB r16, r16, r31; MOV r0, #6 +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id first + 4184 0x09 0x00 0x02 0x1e 0x11 0x9b 0x00 0x13 0x69 0x90 0x78 0x76 MOVA r0, #72; ST r16, [p2], #4; SEL.EQZ r16, r13, r0, r27; MOV r27, r6 +.aggressive_scheduled_block_id 11 +.noswbrkpt + 4196 0xfd 0x16 0x20 0x14 0xa4 0x2c LDA r5, [sp, #-24]; SEL.EQZ r5, r0, r5, r27 +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4202 0x11 0x63 0xaf 0x98 MUL r17, r5, r26 + 4206 0x43 0x8c 0x30 0x07 0x08 0x6d 0x07 0xc8 0x59 0x3a ST dc0, [p2], #4; LSHL r16, r3, r16; MOV m2, #-56 + 4216 0xfc 0x63 0x02 0x48 0x61 0xa0 0xf7 0xed 0xa8 0xc1 0xc8 0x76 MOVA r3, #-29; ST dc0, [p2], m2; LSHL r15, r16, r15; ADD.NC r13, r3, #7 + 4228 0x41 0x0a 0x36 0xc0 0x7b 0x5c ST r2, [p2], m0; LSHL r16, r13, r3 + 4234 0x43 0xda 0x38 0x8e 0x41 0x5c ST r22, [p2], #4; ADD r3, r17, r18 + 4240 0x43 0xca 0x38 0xc8 0x9c 0x5c ST r18, [p2], #4; MSC r18, r18, r17, r4 + 4246 0x43 0x92 0x32 0x94 0xdb 0x5c ST r4, [p2], #4; LSHL r5, r5, r6 + 4252 0x43 0x8e 0x30 0x1a 0x38 0x04 0x0f 0xfd 0x59 0x3a ST r3, [p2], #4; ADD r3, r13, r16; MOV r0, #-3 + 4262 0x10 0xc0 0x0e 0x98 ASHL r0, r3, r0 + 4266 0x43 0xca 0x37 0x10 0x1f 0x5c ST r18, [p2], #4; MUL r4, r14, r0 + 4272 0x43 0x8c 0x30 0x0c 0x3b 0x5c ST dc0, [p2], #4; LSHL r3, r0, r1 + 4278 0xff 0xb6 0x22 0x1c 0x61 0x80 0x03 0xc6 0x31 0xfa LDA r13, [sp, #-4]; ST dc0, [p2], #4; SUB r3, r15, r3 + 4288 0xff 0x3a 0x22 0x1c 0x91 0xba 0x70 0x30 0x28 0x3f 0xc8 0x76 LDA r14, [sp, #-8]; ST r4, [p2], #4; MAC r7, r7, r29, r0; ADD.NC r1, r0, #-1 + 4300 0xfe 0xbe 0x22 0x1c 0x31 0x80 0x01 0x41 0xaf 0xfa LDA r15, [sp, #-12]; ST r1, [p2], #4; MUL r0, r5, r26 + 4310 0x43 0x8e 0x30 0x50 0x00 0x5c ST r3, [p2], #4; RET lr +.delay_slot + 4316 0x0a 0x5c 0xf1 0x98 ST r7, [p2], #20 +.delay_slot + 4320 0x0a 0x1c 0x11 0x98 ST r0, [p2], #4 +.delay_slot + 4324 0x0a 0x1c 0x51 0x98 ST r2, [p2], #4 +.delay_slot + 4328 0x0a 0x04 0x51 0x98 ST r2, [p2] +.delay_slot + 4332 0x42 0x8a 0x30 0x3f 0xfe 0x00 0x00 0x00 0x71 0x3a ST r2, [p2, #4]; PADDXM [sp], #-64 +.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh__end +.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_end0 + +.text_segment PM 4352 +.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_begin0 +.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams +.function_start + 4352 0x00 0x03 0x82 0x84 0x8b 0x01 0x80 0x08 0x0a 0x60 0x78 0x76 MOVA dc0, #0; MOVS p2, p1; MOVX r24, #0; MOV r0, p2 + 4364 0x00 0x06 0x88 0x28 0x28 0x34 0x01 0x36 0x00 0x21 0x20 0x09 0x60 0x7e MOVA dj1, #0; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc1, dc0; MOVX r26, #0; ADD.NC p3, r0, #4 + 4378 0x63 0x94 0xd0 0x00 0x01 0xf2 0x32 0x32 0x10 0xba LDA dn1, [p3], #4; MOVXM p4, #509028 + 4388 0x63 0x90 0xd0 0x00 0x00 0x04 0x78 0xc8 0x10 0xba LDA m1, [p3], #4; MOVXM ls, #4496 + 4398 0x60 0x80 0xd0 0x00 0x00 0x05 0xb8 0xe0 0x10 0xba LDA m0, [p3]; MOVXM le, #4544 + 4408 0x7a 0x82 0xd1 0x00 0x01 0x54 LDA r0, [p3, #-12]; MOV dj0, #0 + 4414 0x04 0x04 0x22 0x98 LDA.s8 r1, [p4] + 4418 0x00 0x00 NOPX + 4420 0x00 0x00 NOPX + 4422 0x00 0x0a 0x80 0x85 0x01 0xf4 VLDB.POP.512 x1, [p0, lf0, r24]; MOV dn0, dn1 + 4428 0x3e 0x30 0x14 0x18 VLDB.POP.512.2D x0, [p0, lf0, r24, d1] +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4432 0x3c 0x14 0x14 0x18 VLDB.FILL.512 [p0, lf0, r24] +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4436 0x00 0x0a 0x8a 0xe0 0xfd 0x34 VLDB.POP.512 x1, [p0, lf0, r24]; ADD.NC lc, r0, #-3 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4442 0xc6 0x02 0x80 0xf5 0x00 0x1c VLDB.POP.512.2D x0, [p0, lf0, r24, d1]; MOVX crRnd, r1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4448 0x3c 0x14 0x14 0x18 VLDB.FILL.512 [p0, lf0, r24] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4452 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4454 0x00 0x2c 0xf0 0x00 0x54 0x00 0x01 0xa5 0x7e 0xba NOPA; VLDB.POP.512 x1, [p0, lf0, r24]; NOPM +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4464 0x00 0x2c 0xfc 0x60 0x28 0x01 0x5b 0x00 0x00 0x00 0x01 0xc5 0x78 0x00 0x00 0xe1 NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];NOPS; NOPX; VCONV.fp32.bf16 cml0, x1; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4480 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x40 0xc5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV +.label ZLS_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_144 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4496 0x00 0x2c 0xf8 0x28 0x28 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB.FILL.512 [p0, lf0, r24]; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4512 0x00 0x2c 0xf0 0x00 0xad 0x80 0x03 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB.POP.512 x1, [p0, lf0, r24];VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4528 0x00 0x2c 0xfc 0x60 0x29 0x00 0x03 0x00 0x00 0x00 0x01 0xc5 0x78 0x00 0x00 0xe1 NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];VST.FLUSH.512.CONV [p2, sf, r26];NOPX; VCONV.fp32.bf16 cml0, x1; NOPV +.label ZLE_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_192 +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4544 0x00 0x2c 0xf0 0x00 0x23 0x00 0x03 0x00 0x00 0x00 0x40 0xc5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST.FLUSH.512.CONV.2D [p2, sf, r26, d0];NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV +.loop_nesting 0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4560 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4562 0x0d 0x80 0x03 0x18 VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4566 0x20 0x00 0x60 0x00 0x01 0xc5 0x70 0x02 VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4574 0x18 0x81 0x8a 0xf8 VCONV.fp32.bf16 cmh0, x0 + 4578 0x0b 0x00 0x03 0x18 VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] + 4582 0xb0 0x00 0x60 0x00 0x01 0xc5 0x70 0x02 VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1 + 4590 0x20 0x00 0x60 0x00 0x40 0xc5 0x70 0x02 VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cmh0, x0 + 4598 0x0b 0x00 0x03 0x18 VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] + 4602 0xb0 0x00 0x60 0x50 0x00 0x5c VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];RET lr +.delay_slot + 4608 0x09 0x00 0x03 0x18 VST.FLUSH.512.CONV [p2, sf, r26] +.delay_slot + 4612 0x0b 0x00 0x03 0x18 VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] +.delay_slot +.swstall delay_slot + 4616 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4618 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4620 0x00 0x00 NOPX +.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams__end +.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_end0 + +.text_segment PM 4624 +.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_begin0 +.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params +.function_start + 4624 0xf5 0xe0 0x86 0x3f 0x20 0x00 0x80 0x00 0x00 0x0e 0x91 0x11 0x60 0x7e MOVA m0, #-81; PADDB [p3], #64; MOVS p4, p2; PADDXM [sp], #128 + 4638 0x00 0x73 0x07 0xf1 0x95 0xbf 0xc5 0x0a 0x2b 0x60 0x78 0x76 MOVA r19, #3; ST r12, [sp, #-16]; MOVX r28, #-24; MOV r17, p3 + 4650 0x00 0x19 0x07 0xda 0x35 0x81 0x10 0x29 0x34 0x47 0x08 0x76 MOVA r25, #0; ST r17, [sp, #-40]; MOVX r17, #1; ADD.NC p2, r17, #28 + 4662 0x40 0xca 0xd7 0xf5 0x35 0x80 0x40 0x03 0xa8 0x00 0x10 0x76 LDA r18, [p2]; ST r9, [sp, #-12]; MOVXM r29, #16777216 + 4674 0x0b 0x18 0x87 0xfd 0xd5 0x80 0x7f 0xff 0xef 0xff 0x90 0x76 MOVA m6, #88; ST r14, [sp, #-4]; MOVXM r31, #33554431 + 4686 0x00 0xb4 0x07 0xe1 0xb5 0x81 0x61 0x0a 0x07 0xec 0x58 0x76 MOVA r20, #5; ST r13, [sp, #-32]; MOVX r22, #8; MOV m4, #-20 + 4698 0x01 0x95 0x07 0xed 0xf5 0x87 0x77 0xca 0x87 0xc4 0x58 0x76 MOVA r21, #12; ST r15, [sp, #-20]; MOVX r23, #254; MOV m5, #-60 + 4710 0xff 0x73 0xb0 0x03 0x80 0x40 0x50 0x02 ST p7, [sp, #-8]; MOV m7, #64 + 4718 0x0f 0xe4 0x3d 0x98 ST lr, [sp, #-28] + 4722 0x00 0x00 NOPX + 4724 0x17 0x59 0x20 0x98 ADD r12, r29, r18 + 4728 0x41 0x32 0x36 0x77 0x9b 0x5c ST r12, [p2], m0; LSHL r29, r12, r28 + 4734 0x5b 0xf9 0x5e 0xf2 0x2f 0x2c LDA.u8 r30, [p2], #-3; EQ r28, r29, r17 + 4740 0x02 0xc9 0x2a 0x98 LDA.u8 r9, [p2], m6 + 4744 0x00 0x00 NOPX + 4746 0x00 0x00 NOPX + 4748 0x00 0x00 NOPX + 4750 0x00 0x00 NOPX + 4752 0x00 0x00 NOPX + 4754 0x17 0x77 0xec 0x98 LTU r27, r29, r30 + 4758 0x16 0x5d 0x32 0x18 SEL.EQZ r14, r25, r19, r27 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4762 0x17 0xf6 0xcc 0x98 LTU r27, r31, r12 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4766 0x51 0x70 0xee 0xb7 0xcf 0x2c ST.s8 r28, [p2], m4; EQ r13, r29, r30 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4772 0x13 0x7f 0x1d 0x98 LSHL r31, r13, r17 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4776 0x16 0x58 0xe2 0x18 SEL.EQZ r12, r25, r14, r27 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4780 0x17 0xf9 0xc5 0x98 OR r28, r31, r28 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4784 0x8e 0xfd 0x9e 0x3c 0x62 0xa4 LTU r27, r17, r30; ADD.NC r28, r28, r12 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4790 0x16 0x79 0xc2 0x18 SEL.EQZ r28, r25, r28, r27 + 4794 0x14 0x7f 0xcc 0x98 LTU r31, r17, r28 + 4798 0x55 0x7e 0x3e 0xf7 0xd1 0x5c ST r31, [p2], m5; NE r29, r29, r30 + 4804 0x5d 0x79 0x54 0xb2 0x31 0x2c LDA.u8 r30, [p2], m7; NE r12, r9, r17 + 4810 0x00 0x00 NOPX + 4812 0x00 0x00 NOPX + 4814 0x00 0x00 NOPX + 4816 0x00 0x00 NOPX + 4818 0x00 0x00 NOPX + 4820 0x00 0x00 NOPX + 4822 0xf5 0xad 0x1f 0xbe 0xfc 0x24 NE r22, r30, r22; ADD.NC r31, r30, #-4 + 4828 0x60 0x09 0x90 0x40 0x01 0x84 JNZ r12, #4896 +.delay_slot + 4834 0x17 0x93 0x48 0x98 NE r9, r30, r20 +.delay_slot + 4838 0x17 0xfe 0x90 0x18 EXTEND.u8 r31, r31 +.delay_slot + 4842 0x12 0x6d 0x64 0x98 AND r22, r9, r22 +.delay_slot + 4846 0x17 0xef 0x7c 0x98 LTU r23, r31, r23 +.delay_slot + 4850 0x15 0xe1 0x64 0x98 AND r16, r23, r22 + 4854 0xe8 0x09 0x90 0x40 0x01 0x84 JNZ r29, #4896 +.delay_slot + 4860 0x0f 0xeb 0x1d 0x98 ST p6, [sp, #-24] +.delay_slot +.swstall delay_slot + 4864 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4866 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4868 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4870 0x00 0x00 NOPX + 4872 0x00 0x3b 0x00 0x00 0x02 0x6a 0x00 0x00 0x20 0xba MOVA r27, #1; J #4944 +.delay_slot + 4882 0x18 0x19 0x9c 0xf8 MOV el0, r25 +.delay_slot + 4886 0x10 0x26 0x05 0x18 MOVX r19, #1 +.delay_slot +.swstall delay_slot + 4890 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4892 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4894 0x00 0x00 NOPX +.label __ll6__Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params + 4896 0x00 0x95 0x07 0xeb 0x1d 0xab 0xbf 0x3c 0x0c 0xce 0x78 0x76 MOVA r21, #4; ST p6, [sp, #-24]; EQ r27, r21, r30; MOV el0, r25 + 4908 0x17 0xab 0x5d 0x98 LSHL r21, r30, r21 + 4912 0x15 0x6b 0x92 0x18 SEL.EQZ r21, r21, r25, r27 + 4916 0x14 0xf7 0xe7 0x98 EQ r27, r19, r30 + 4920 0xac 0xf2 0x4d 0xb0 0x41 0xe4 SEL.EQZ r19, r21, r25, r27; MOV r27, r16 + 4926 0x16 0x67 0x32 0x18 SEL.EQZ r19, r25, r19, r27 + 4930 0x17 0x29 0x44 0x98 AND r20, r28, r20 + 4934 0x15 0x36 0xf0 0x18 NEZ r27, r20 + 4938 0x00 0x2c 0xf9 0xcf 0x8b 0x2c NOPA; OR r19, r19, r28 +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_320 + 4944 0x01 0x90 0x82 0x6f 0x71 0xba 0x02 0x70 0x10 0x00 0x60 0x76 MOVA m4, #12; ST r27, [p2], #24; JNZ r29, #4992 +.delay_slot + 4956 0x02 0x8a 0x67 0x18 ST.s8 r19, [p2], m4 +.delay_slot +.swstall delay_slot + 4960 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4962 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4964 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 4966 0x00 0x00 NOPX + 4968 0x00 0xff 0xfa 0x3f 0xfe 0x44 MOVXM r20, #16777215 + 4974 0x14 0xa5 0x44 0x98 AND r18, r18, r20 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4978 0x00 0x2c 0xf6 0xec 0xa3 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; ST r18, [p3, #28]; NOPM; NOPV +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_368 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4992 0x51 0xd2 0xd0 0x27 0x44 0x82 0xcf 0xfd 0x58 0xba LDA r20, [p2], #-32; EXTEND.u8 r20, r19; MOV r22, #-3 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5002 0x00 0x52 0x00 0x29 0x5f 0xfa 0x00 0x24 0x58 0xba MOVA r18, #2; ADD r21, r20, #-1; MOV m4, #36 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 5012 0x51 0x5a 0xd7 0xd0 0x2d 0xab 0x6b 0x26 0x07 0xcc 0x58 0x76 LDA r22, [p2], m4; ST el0, [sp, #-48]; AND r22, r21, r22; MOV m4, #-52 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5024 0x51 0x5e 0xd7 0xde 0xd5 0xbf 0x37 0xea 0x00 0xc4 0x58 0x76 LDA r23, [p2], m4; ST r22, [sp, #-36]; MOVX r19, #-1; MOV m4, #196 + 5036 0x02 0xff 0xb6 0x98 LDA r29, [p2], #-4 + 5040 0x02 0x8b 0xf6 0x98 LDA r31, [p2], m4 + 5044 0x01 0x06 0xb6 0x98 LDA r21, [p1] + 5048 0x00 0xd2 0xda 0x26 0x5b 0x2c LDA r20, [p0]; LSHL r9, r20, r18 + 5054 0x04 0x07 0xd6 0x98 LDA r30, [p4] + 5058 0x15 0xad 0x2d 0x98 LSHL r22, r22, r18 + 5062 0x00 0x00 NOPX + 5064 0x17 0x67 0x3e 0x98 ASHL r19, r29, r19 + 5068 0x17 0xe3 0x18 0x98 NE r17, r31, r17 + 5072 0x88 0x0a 0x20 0x40 0x01 0x84 JNZ r17, #5184 +.delay_slot + 5078 0xbd 0xa5 0xba 0xb5 0xb2 0xa4 LSHL r22, r23, r18; ADD.NC r21, r21, r22 +.delay_slot + 5084 0x9d 0x65 0xb0 0x95 0xb2 0xa4 LSHL r21, r19, r18; ADD.NC dn0, r21, r22 +.delay_slot + 5090 0xfa 0x84 0xb0 0x01 0xca 0x68 0xa0 0x02 ST dn0, [sp, #-44]; ADD.NC r14, r9, r20 +.delay_slot + 5098 0x1b 0xd0 0x80 0xf8 MOV r15, dn0 +.delay_slot + 5102 0x1e 0x6a 0xf9 0x58 ADD.NC p6, r21, r30 + 5106 0x00 0x07 0xce 0xc8 0xc8 0x44 MOVXM p7, #509028 + 5112 0xe0 0xc4 0x50 0xb4 0x80 0x2c LDA.s8 r17, [p7]; MOVX vaddSign0, #1 + 5118 0x00 0x00 NOPX + 5120 0xff 0x7f 0x0a 0x20 0x00 0x44 MOVXM r20, #-8454144 + 5126 0x18 0x02 0x91 0x78 VINSERT.32 x0, x0, #0, r20 + 5130 0x1d 0x15 0xe0 0xf8 MOV r20, sp +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 5134 0x1f 0x6a 0x5f 0x18 ADD.NC p7, r20, #-66 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 5138 0xe0 0xc6 0xe0 0x01 0x25 0xd4 ST.s16 r17, [p7]; VMOV bmll0, x0 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 5144 0x14 0x7a 0x80 0x18 MOVX crRnd, r17 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 5148 0x08 0x40 0x16 0x18 VCONV.bf16.fp32 wl0, bmll0 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 5152 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5154 0x1c 0x41 0x01 0xb8 VEXTRACT.16 r17, x0, #0, vaddSign0 + 5158 0x00 0x00 NOPX + 5160 0x00 0x00 NOPX + 5162 0x07 0x06 0x32 0x98 LDA.s16 r17, [p7] + 5166 0x00 0x00 NOPX + 5168 0x00 0x00 NOPX + 5170 0x00 0x00 NOPX + 5172 0x00 0x00 NOPX + 5174 0x00 0x00 NOPX + 5176 0x00 0x00 NOPX + 5178 0x00 0x2c 0xff 0xa4 0x6b 0x0c NOPA; ST r17, [sp, #-48] +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_560 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 5184 0x0b 0x90 0x81 0x8e 0x0b 0x00 0x01 0xf1 0xb2 0x32 0x10 0x76 MOVA m4, #92; MOVS p1, r14; MOVXM p3, #509028 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 5196 0x51 0x45 0x50 0x84 0x8b 0x33 0x19 0x92 0x68 0x0b 0x58 0x76 LDA.u8 r17, [p2], m4; MOVS p0, p1; SEL.EQZ r17, r25, r19, r27; MOV r19, #11 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5208 0x61 0x96 0x00 0x39 0xb9 0x65 0xaa 0x60 0x78 0xba MOVA r22, #780; LTU r27, r28, r18; MOV r13, p2 +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5218 0x03 0x06 0x67 0x18 ST.s8 r19, [p3] +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5222 0x00 0x00 NOPX +.aggressive_scheduled_block_id 4 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 5224 0x00 0x08 0x80 0x00 0x01 0x04 JL #4352 +.delay_slot +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5230 0x14 0x6b 0x2d 0x98 LSHL r21, r17, r18 +.delay_slot + 5234 0x1f 0x6a 0xf9 0x58 ADD.NC p7, r21, r30 +.delay_slot + 5238 0x16 0x63 0x11 0x98 SUB r17, r25, r17 +.delay_slot + 5242 0x8c 0x65 0xba 0x2c 0x35 0x64 LSHL r17, r17, r18; MOV r20, #781 +.delay_slot + 5248 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x2c 0x9a 0x11 0x8b 0xe2 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; SEL.EQZ r9, r22, r20, r27; ADD.NC r12, r15, r17; NOPV +.return_address + 5264 0x07 0xd4 0x99 0x18 LDA p1, [sp, #-44] +.no_stack_arguments + 5268 0x00 0x08 0x80 0x00 0x01 0x04 JL #4352 +.delay_slot +.swstall delay_slot + 5274 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5276 0x00 0x00 NOPX +.delay_slot + 5278 0x1b 0x56 0x90 0x18 ADD.NC r13, r13, #32 +.delay_slot + 5282 0x1a 0x66 0xa0 0xf8 MOV p2, r13 +.delay_slot + 5286 0x00 0x2c 0xf0 0x8f 0x0b 0x00 0x00 0x00 0x00 0x7a NOPA; MOVS p0, r15; NOPX +.return_address + 5296 0xd6 0x9a 0x80 0x01 0x37 0xea 0x33 0x63 0x08 0xba MOVA dj6, #-332; MOVX r19, #63; ADD.NC p4, r13, #-116 + 5306 0x83 0x84 0xd0 0x34 0x62 0x2c LDA dn0, [p4], #4; MOVX r13, #12 + 5312 0x04 0x1c 0x46 0x98 LDA dj0, [p4], #4 + 5316 0x04 0x1e 0x26 0x98 LDA dn4, [p4], #4 + 5320 0x04 0x1e 0x46 0x98 LDA dj4, [p4], #4 + 5324 0x04 0x1c 0x06 0x98 LDA m0, [p4], #4 + 5328 0x04 0x1c 0x66 0x98 LDA dc0, [p4], #4 + 5332 0x04 0x1e 0x66 0x98 LDA dc4, [p4], #4 + 5336 0x04 0x1e 0xd6 0x98 LDA r22, [p4], #4 + 5340 0x04 0x1e 0x36 0x98 LDA r17, [p4], #4 + 5344 0x04 0x1f 0x96 0x98 LDA r28, [p4], #4 + 5348 0x04 0x1e 0xb6 0x98 LDA r21, [p4], #4 + 5352 0x04 0x1e 0xf6 0x98 LDA r23, [p4], #4 + 5356 0x04 0x1d 0x9e 0x98 LDA p3, [p4], #4 + 5360 0x04 0x1d 0x26 0x98 LDA dn2, [p4], #4 + 5364 0x04 0x1c 0xa6 0x98 LDA dn1, [p4], #4 + 5368 0x04 0x1c 0xc6 0x98 LDA dj1, [p4], #4 + 5372 0x04 0x1e 0xa6 0x98 LDA dn5, [p4], #4 + 5376 0x04 0x1f 0xd6 0x98 LDA r30, [p4], #4 + 5380 0x04 0x1f 0xb6 0x98 LDA r29, [p4], #4 + 5384 0x04 0x1c 0xe6 0x98 LDA dc1, [p4], #4 + 5388 0x04 0xc2 0x4a 0x98 LDA.u8 r18, [p4, dj6] + 5392 0x07 0xd2 0x91 0x18 LDA r20, [sp, #-48] + 5396 0x04 0x04 0x56 0x98 LDA r2, [p4] + 5400 0x00 0x00 NOPX + 5402 0x00 0x00 NOPX + 5404 0x00 0x00 NOPX + 5406 0x00 0x00 NOPX + 5408 0x14 0xe7 0x2c 0x98 LTU r19, r19, r18 + 5412 0x98 0x0c 0x60 0x40 0x01 0x84 JNZ r19, #6336 +.delay_slot + 5418 0x00 0x07 0xc4 0xc8 0xc8 0x44 MOVXM p2, #509028 +.delay_slot + 5424 0x02 0x05 0xa7 0x18 ST.s8 r13, [p2] +.delay_slot + 5428 0x1c 0xd1 0x72 0xf8 VBCST.16 x9, r20 +.delay_slot +.swstall delay_slot + 5432 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 5434 0x00 0x00 NOPX + 5436 0xfb 0x43 0x20 0x1b 0xb9 0x3f 0x80 0x84 0x58 0xba LDA p4, [sp, #-40]; EQ r27, r13, r18; MOV m7, #132 + 5446 0x00 0x13 0x00 0x3d 0x20 0x0a 0x00 0x3c 0x58 0xba MOVA r19, #0; MOVX r18, #-128; MOV m4, #60 + 5456 0xf8 0x14 0x80 0x01 0xa0 0x0b 0xe4 0xd0 0x78 0xba MOVA m5, #-64; MOVX r26, #0; MOV dc7, r19 + 5466 0xef 0x98 0x82 0x1c 0x4b 0x1b 0xd4 0x01 0xa7 0xc0 0x78 0x76 MOVA m6, #-132; MOVS dc2, dc7; MOVX crRnd, r13; MOV dn3, dc7 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 5478 0xfa 0x96 0x26 0x1c 0x4b 0x01 0xf7 0x89 0xe8 0x07 0x58 0x76 LDA r5, [sp, #-44]; MOVS dc6, dc7; MOVX r31, #60; MOV r15, #7 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 5490 0xfb 0xca 0x20 0x00 0x00 0x05 0x32 0xf0 0x10 0xba LDA r18, [sp, #-36]; MOVXM p2, #5600 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5500 0xfc 0x36 0x20 0x34 0x69 0x12 0x8b 0x0c 0x58 0xba LDA r13, [sp, #-32]; SEL.EQZ r6, r26, r18, r27; MOV r20, #780 + 5510 0xfc 0x87 0x29 0xd7 0x20 0x01 0x90 0x0b 0x08 0x00 0x58 0xb6 LDA lr, [sp, #-28]; PADDB [p4], m7; MOVX r25, #0; MOV r24, #0 + 5522 0x04 0x88 0x16 0x98 LDA r0, [p4], m4 + 5526 0x04 0xab 0x26 0x98 LDA dn6, [p4], m5 + 5530 0x04 0x2f 0x76 0x98 LDA r27, [p4], #8 + 5534 0x04 0x1e 0x86 0x98 LDA m5, [p4], #4 + 5538 0x04 0x8a 0xc6 0x98 LDA dj5, [p4], m4 + 5542 0x04 0x9e 0x06 0x98 LDA m4, [p4], #-28 + 5546 0x04 0x1c 0x36 0x98 LDA r1, [p4], #4 + 5550 0x99 0x02 0xdd 0x06 0x02 0x94 LDA r0, [p4], m6; ADD.NC dj6, r6, r0 + 5556 0x04 0x14 0x76 0x98 LDA r3, [p4, #4] + 5560 0x04 0x04 0x96 0x98 LDA r4, [p4] + 5564 0x19 0xda 0x00 0xf8 MOV r7, m5 + 5568 0x1a 0x83 0x99 0x58 ADD.NC dj2, r7, r6 + 5572 0x1c 0x1b 0x00 0xf8 MOV r16, dj5 + 5576 0x1a 0x0d 0x99 0x58 ADD.NC m2, r27, r6 + 5580 0x1e 0x03 0xe0 0x18 ADD.NC m6, r7, #-64 + 5584 0x18 0xff 0xee 0x10 0xc0 0x24 ADD r3, r3, #-1; ADD.NC m7, r16, #-64 + 5590 0x00 0x2c 0xf0 0x00 0x10 0x00 0x82 0x80 0x7e 0xba NOPA; NOPB; MOV m1, dj2 +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_976 +.loop_nesting 1 + 5600 0xc3 0x85 0x71 0x85 0x0b 0x04 0xe7 0xec 0x33 0x90 0x78 0x76 VLDA.CONV.fp32.bf16 cml0, [p6], #64; MOVS p1, r5; LSHL r14, r2, r15; MOV p0, r14 + 5612 0x22 0x81 0x78 0x28 0x2b 0x0e 0x4b 0x02 0x33 0x98 0xa0 0xf6 VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc3, dn3; ADD.NC p4, r14, r12 + 5624 0xa0 0x39 0x78 0x28 0x2f 0x5a 0x4b 0x03 0xc6 0x80 0x70 0xf6 VLDA.POP.576 ex7, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];MOVS dn7, dn6; MOV dj7, dj6 + 5636 0xd9 0x0d 0x74 0x03 0x2b 0x53 0x0b 0x01 0x82 0x00 0x70 0xf6 VLDA.CONV.fp32.bf16 cmh0, [p6], m6;VLDB.POP.576 ex6, [p0, lf0, r24];MOVS dn3, r19; MOV m3, m2 + 5648 0x71 0x41 0x74 0x12 0xd4 0x01 0xc0 0x00 0x5e 0xba VLDA.POP.576 ex8, [p1, lf1, r25, m4];VLDB.POP.576.3D ex11, [p0, lf0, r24, d0]; MOV dj3, #0 + 5658 0xc3 0x95 0x78 0x28 0x28 0x00 0x00 0x05 0xbb 0x90 0x10 0xb6 VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.FILL.512 [p0, lf0, r24]; MOVXM le, #5920 + 5670 0xdd 0x1d 0x78 0x28 0x28 0x00 0x00 0x04 0x7b 0x78 0x10 0xb6 VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.FILL.512 [p0, lf0, r24]; MOVXM ls, #5872 + 5682 0x80 0xb5 0x74 0x01 0x28 0x3c VLDA.CONV.fp32.bf16 cml3, [p4];VLDB.POP.576 ex2, [p0, lf0, r24] + 5688 0xc3 0xa5 0x78 0x22 0x28 0x3c VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.POP.576.3D ex4, [p0, lf0, r24, d0] + 5694 0xd9 0x2d 0x78 0x28 0x28 0x3c VLDA.CONV.fp32.bf16 cmh2, [p6], m6;VLDB.FILL.512 [p0, lf0, r24] +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 5700 0x22 0x81 0x78 0x28 0x28 0x3c VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24] +.aggressive_scheduled_block_id 6 +.noswbrkpt + 5706 0x83 0xbd 0x74 0x01 0x28 0x3c VLDA.CONV.fp32.bf16 cmh3, [p4], #64;VLDB.POP.576 ex2, [p0, lf0, r24] +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5712 0x80 0xcd 0x74 0x11 0x14 0x02 0x9a 0xc3 0xee 0xba VLDA.CONV.fp32.bf16 cmh4, [p4];VLDB.POP.576.3D ex4, [p0, lf0, r24, d0]; VSHUFFLE ex10, ex6, ex11, r1 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5722 0x80 0xc5 0x78 0x28 0x2c 0x98 0x8b 0x01 0x9a 0xc1 0xe0 0xf6 VLDA.CONV.fp32.bf16 cml4, [p4];VLDB.FILL.512 [p0, lf0, r24];MOVS p4, p6; VSHUFFLE ex6, ex6, ex11, r0 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5734 0x02 0x81 0x73 0x00 0x54 0x1d 0x48 0x14 0xe9 0x4a VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex1, [p1, lf1, r25]; VMAC.f dm0, dm0, ex10, ex7, r9 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5744 0xcf 0x35 0x76 0x94 0x96 0x00 0x00 0x5c 0x58 0x07 0x49 0x2c 0xe9 0x6e VLDA.3D.CONV.fp32.bf16 cml3, [p6], d3; MOVS dn3, dn2; MOV dj3, dj5; VMAC.f dm1, dm1, ex6, ex7, r9 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5758 0x82 0xbd 0x7a 0x38 0x96 0x00 0x00 0x4c 0x90 0x3e 0x4a 0x55 0x09 0x6e VLDA.CONV.fp32.bf16 cmh3, [p4, #64]; MOVS dc5, dc7; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm2, dm2, ex10, ex8, r9 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5772 0x71 0x01 0x74 0x98 0x96 0x00 0x00 0x54 0x90 0x1e 0xf8 0x60 0x3d 0x6e VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dn2, dc3; VSHUFFLE ex5, ex2, ex4, r0; VADD.f dm0, dm3, dm0, r31 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5786 0x22 0x81 0x70 0x04 0xf9 0x64 0x3d 0x62 VLDA.FILL.512 [p1, lf1, r25]; VADD.f dm1, dm3, dm1, r31 +.aggressive_scheduled_block_id 6 +.noswbrkpt + 5794 0xa0 0x09 0x70 0x04 0xfa 0x88 0x3d 0x62 VLDA.POP.576 ex1, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r31 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5802 0x71 0x01 0x70 0x04 0x4b 0x6d 0x09 0x62 VLDA.POP.576 ex0, [p1, lf1, r25, m4]; VMAC.f dm3, dm3, ex6, ex8, r9 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5810 0x22 0x81 0x74 0x01 0x28 0x3c VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24] +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5816 0x3c 0x11 0x14 0x18 VLDB.POP.576.3D ex4, [p0, lf0, r24, d0] +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5820 0xa0 0x09 0x78 0x28 0x2d 0x72 0x7d 0x82 0xfb 0x8c 0x3d 0x66 VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24]; ADD.NC lc, r4, #-5; VADD.f dm3, dm4, dm3, r31 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5832 0x71 0x01 0x74 0x14 0x14 0x1d 0xa0 0x06 0x29 0x4a VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24]; VMAC.f dm0, dm0, ex3, ex1, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5842 0x22 0x81 0x74 0x01 0x28 0x00 0x00 0x58 0xaa 0x0f 0xa2 0x46 0x09 0x4e VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24]; NOPX; MOV dj5, r21; VMAC.f dm2, dm2, ex3, ex0, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5856 0x00 0x2c 0xf8 0x22 0x28 0x01 0x5b 0x00 0x00 0x00 0xc9 0x03 0xed 0x09 0x51 0x4b NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 +.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1248 +.loop_nesting 2 +.begin_of_loop +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5872 0xa0 0x09 0x78 0x28 0x28 0x01 0x5b 0x00 0x00 0x01 0x49 0x01 0xed 0x1b 0x50 0x4b VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5888 0x71 0x01 0x78 0x28 0x28 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7d 0x00 0x31 0x4b VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm0, dm0, ex3, ex1, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5904 0x22 0x81 0x74 0x01 0x28 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x7d 0x12 0x30 0x4b VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm2, dm2, ex3, ex0, r20 +.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1296 +.end_of_loop +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5920 0x00 0x2c 0xf8 0x22 0x28 0x01 0x5b 0x00 0x00 0x00 0xc9 0x03 0xed 0x09 0x51 0x4b NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 +.loop_nesting 1 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5936 0xa0 0x09 0x7c 0xbc 0x96 0x00 0x00 0x54 0x90 0x1e 0xa3 0x6a 0x09 0x6e VLDA.POP.576 ex1, [p1, lf1, r25]; MOVS dn6, dn7; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5950 0x71 0x01 0x7e 0x1c 0x96 0x00 0x00 0x7c 0x38 0x07 0xa0 0x06 0x29 0x6e VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dc7, dn3; MOV dj7, dj3; VMAC.f dm0, dm0, ex3, ex1, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5964 0x61 0x91 0x61 0x55 0x00 0xe4 0xa2 0x46 0x09 0x4a MOVS dc3, p3; MOV r5, dj2; VMAC.f dm2, dm2, ex3, ex0, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5974 0x6a 0xc1 0x61 0x92 0x07 0xc4 0xa1 0x2a 0x29 0x4a MOVS dn3, r22; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5984 0xeb 0x81 0x62 0x92 0x03 0xc4 0xa3 0x6a 0x09 0x4a MOVS dn7, r28; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5994 0xb3 0x91 0x6f 0x57 0x22 0x8f 0x00 0xe6 0xa0 0x06 0x29 0x66 PADDB [p7], m5; MOVS p5, p7; MOV dj2, dj7; VMAC.f dm0, dm0, ex3, ex1, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 6006 0x93 0x91 0x62 0x06 0x00 0xe4 0xa2 0x46 0x09 0x4a MOVS p4, p7; MOV m2, m3; VMAC.f dm2, dm2, ex3, ex0, r20 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 6016 0x02 0x92 0x03 0xc6 0xa1 0x2a 0x29 0x62 VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm1, dm1, ex5, ex1, r20 +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6024 0x01 0x92 0x07 0xc6 0xa3 0x6a 0x09 0x62 VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm3, dm3, ex5, ex0, r20 + 6032 0x1f 0x8b 0x00 0xf8 MOV dj7, dj5 + 6036 0x03 0x0b 0xa0 0xe6 0xa1 0x2a 0x29 0x62 MOV m3, r23; VMAC.f dm1, dm1, ex5, ex1, r20 + 6044 0x03 0x88 0xa0 0xe6 0xa0 0x06 0x29 0x62 MOV dj3, r17; VMAC.f dm0, dm0, ex3, ex1, r20 + 6052 0x00 0xf7 0x23 0x05 0x00 0xe6 0xa3 0x6a 0x09 0x4a PADDB.3D [p0], d3; MOV m3, dj2; VMAC.f dm3, dm3, ex5, ex0, r20 + 6062 0x71 0x89 0x6e 0xd7 0x25 0x82 0xa0 0xe6 0xa2 0x46 0x09 0x66 PADDB [p7], m3; MOVS p3, dc3; MOV dj5, r5; VMAC.f dm2, dm2, ex3, ex0, r20 + 6074 0x62 0x89 0x60 0x03 0xc5 0x80 0x70 0x02 MOVS dc3, dc5; MOV dj7, dj5 + 6082 0xa0 0x41 0x60 0x01 0x81 0x00 0x70 0x02 MOVS dc5, r2; MOV m3, m1 + 6090 0xb2 0x12 0xc0 0x00 0x87 0x50 0x70 0x02 VCONV.bf16.fp32 x11, cml1; MOV m1, r29 + 6098 0xa2 0x02 0xc0 0x02 0xc7 0x90 0x70 0x02 VCONV.bf16.fp32 x10, cml0; MOV dj5, r30 + 6106 0x13 0x91 0x61 0x3b 0x90 0x01 0xc8 0x60 0x76 0xba PADDB.3D [p1], d1; MOVS p0, p7; MOV r14, p0 + 6116 0x62 0x0a 0xc0 0x00 0x83 0x00 0x70 0x02 VCONV.bf16.fp32 x6, cmh0; MOV m1, m3 + 6124 0x52 0x22 0xc0 0x57 0x20 0x24 0x03 0x0e 0x00 0x00 0x60 0x36 PADDB [p0], m1; VCONV.bf16.fp32 x5, cml2; JZ r18, #6256 +.delay_slot + 6136 0x72 0x1a 0xc0 0x00 0xa9 0x60 0x70 0x02 VCONV.bf16.fp32 x7, cmh1; MOV r5, p1 +.delay_slot + 6144 0x82 0x32 0xc0 0x03 0xa7 0xc0 0x70 0x02 VCONV.bf16.fp32 x8, cml3; MOV dn7, dc7 +.delay_slot + 6152 0x12 0x3a 0xc5 0x2b 0x90 0x00 0xb5 0x60 0x76 0xba PADDB [p5], m1; VCONV.bf16.fp32 x1, cmh3; MOV p1, p5 +.delay_slot + 6162 0x22 0x2a 0xc0 0x02 0xc2 0x80 0x70 0x02 VCONV.bf16.fp32 x2, cmh2; MOV dj5, dj2 +.delay_slot + 6170 0xe1 0x89 0x60 0x00 0x4d 0xc0 0x70 0x02 MOVS dc7, dc3; MOV r2, dc5 + 6178 0x1d 0xdc 0xec 0xf8 VMAX_LT.bf16 x11, r16, x11, x9 + 6182 0x1b 0xbc 0xec 0xf8 VMAX_LT.bf16 x7, r16, x7, x9 + 6186 0x3c 0x5a 0x60 0x02 0xaa 0x76 0x70 0x02 VST x11, [p1, dj7]; VMAX_LT.bf16 x10, r16, x10, x9 + 6194 0xa2 0xba 0x60 0x01 0xda 0x76 0x70 0x02 VST x7, [p5, #64]; VMAX_LT.bf16 x7, r16, x6, x9 + 6202 0x20 0xd2 0x60 0x00 0x03 0x12 0x00 0x00 0x21 0x3a VST x10, [p1]; J #6288 +.delay_slot + 6212 0x22 0xba 0x60 0x02 0xa2 0x76 0x70 0x02 VST x7, [p1, #64]; VMAX_LT.bf16 x10, r16, x8, x9 +.delay_slot + 6220 0x1b 0x8c 0xec 0xf8 VMAX_LT.bf16 x7, r16, x1, x9 +.delay_slot + 6224 0x00 0xd2 0x60 0x02 0x96 0x76 0x70 0x02 VST x10, [p0]; VMAX_LT.bf16 x10, r16, x5, x9 +.delay_slot + 6232 0x02 0xba 0x60 0x00 0x8a 0x76 0x70 0x02 VST x7, [p0, #64]; VMAX_LT.bf16 x2, r16, x2, x9 +.delay_slot + 6240 0x00 0x2c 0xf0 0x00 0x24 0xa2 0x93 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x10, [p4, dj5]; NOPX; NOPM; NOPV +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1632 + 6256 0x09 0xe0 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p1, dj7] + 6260 0x0d 0x14 0xe3 0x18 VST.CONV.bf16.fp32 cmh1, [p5, #64] + 6264 0x09 0x04 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p1] + 6268 0x09 0x14 0x63 0x18 VST.CONV.bf16.fp32 cmh0, [p1, #64] + 6272 0x08 0x06 0x13 0x18 VST x8, [p0] + 6276 0x08 0x15 0xe3 0x18 VST.CONV.bf16.fp32 cmh3, [p0, #64] + 6280 0x94 0x24 0x60 0x00 0x01 0xa5 0x70 0x02 VST.CONV.bf16.fp32 cml2, [p4, dj5]; NOPM +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1664 + 6288 0xe2 0x92 0x6f 0x57 0x20 0x06 0x35 0x01 0x40 0x00 0x58 0x36 PADDB [p7], m5; VST x2, [p7, #64]; JNZD r3, r3, p2; MOV dj2, #0 +.delay_slot + 6300 0x1b 0x44 0x80 0xf8 MOV dn3, dn2 +.delay_slot + 6304 0x1a 0x49 0xa0 0xf8 MOV dn2, r19 +.delay_slot + 6308 0xeb 0x72 0x05 0x1e 0x01 0xf4 PADDB.3D [p7], d2; MOV dj2, dj7 +.delay_slot + 6314 0x1a 0x4e 0x80 0xf8 MOV dn2, dn7 +.delay_slot +.swstall delay_slot + 6318 0x00 0x00 NOPX +.loop_nesting 0 + 6320 0x00 0x0d 0xa8 0x00 0x00 0x84 J #6992 +.delay_slot +.swstall delay_slot + 6326 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6328 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6330 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6332 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6334 0x00 0x00 NOPX +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1712 + 6336 0xfb 0x7e 0x22 0x0c 0x8b 0x04 0xe1 0x08 0xb3 0x90 0x78 0x76 LDA r31, [sp, #-40]; MOVS dc2, p3; MOVX r14, #136; MOV p1, r14 + 6348 0x07 0x90 0x82 0x56 0x0b 0x1b 0xd4 0x03 0x62 0x40 0x78 0x76 MOVA m4, #60; MOVS dn2, r22; MOVX crRnd, r13; MOV dc6, dn2 + 6360 0x07 0x94 0x00 0x19 0x31 0x89 0x05 0xd0 0x78 0xba MOVA r20, #60; MOVX r19, #780; MOV m2, r23 + 6370 0xef 0x98 0x86 0x5c 0x0b 0x01 0x20 0xca 0xc7 0x90 0x78 0x76 MOVA m6, #-132; MOVS dn6, r28; MOVX r18, #6; MOV dj5, r30 + 6382 0xfa 0x83 0x25 0x02 0x0b 0x01 0x90 0x08 0x87 0x50 0x78 0x76 LDA p0, [sp, #-44]; MOVS dc5, r2; MOVX r25, #0; MOV m1, r29 + 6394 0xfb 0xd6 0x20 0x01 0x80 0x0b 0x45 0x50 0x78 0xba LDA r21, [sp, #-36]; MOVX r24, #0; MOV dj6, r21 + 6404 0xfc 0x36 0x20 0x00 0x00 0x05 0x34 0xa8 0x10 0xba LDA r13, [sp, #-32]; MOVXM p2, #6480 + 6414 0xfc 0x87 0x26 0xdf 0x72 0x94 LDA lr, [sp, #-28]; ADD.NC p3, r31, r14 + 6420 0x03 0x1d 0xc6 0x98 LDA dj3, [p3], #4 + 6424 0x03 0x8a 0x06 0x98 LDA m4, [p3], m4 + 6428 0x03 0x9e 0x86 0x98 LDA m5, [p3], #-28 + 6432 0x03 0x1e 0xd6 0x98 LDA r22, [p3], #4 + 6436 0x03 0xca 0xf6 0x98 LDA r23, [p3], m6 + 6440 0x03 0x17 0xb6 0x98 LDA r29, [p3, #4] + 6444 0x03 0x07 0x96 0x98 LDA r28, [p3] + 6448 0x00 0x00 NOPX + 6450 0x1f 0x98 0x00 0xf8 MOV r30, m4 + 6454 0x1e 0x07 0x00 0xf8 MOV m6, dj3 + 6458 0x1f 0xdc 0x00 0xf8 MOV r31, m6 + 6462 0x1b 0x0f 0xe0 0x18 ADD.NC m3, r31, #-64 + 6466 0xef 0x7f 0xee 0x1e 0xc0 0x24 ADD r29, r29, #-1; ADD.NC m7, r30, #-64 + 6472 0x00 0x2b 0x60 0x03 0xc7 0x90 0x70 0x02 NOPS; MOV dj7, r30 +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1856 +.loop_nesting 1 + 6480 0xc3 0x85 0x7a 0x28 0x28 0x00 0x00 0x8f 0xa0 0x02 0x71 0x81 0x60 0x7e VLDA.CONV.fp32.bf16 cml0, [p6], #64;VLDB.FILL.512 [p1, lf1, r25];MOVS p3, r12; MOVXM ls, #6656 + 6494 0xcd 0x0d 0x7a 0x28 0x28 0x00 0x00 0x05 0xbd 0x18 0x10 0xb6 VLDA.CONV.fp32.bf16 cmh0, [p6], m3;VLDB.FILL.512 [p1, lf1, r25]; MOVXM le, #6704 + 6506 0x02 0x81 0x76 0x05 0x28 0x05 0xe9 0x6e 0xbf 0x3f 0x48 0xb6 VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex10, [p1, lf1, r25]; LSHL r30, r2, r18; ADD.NC lc, r28, #-3 + 6518 0x55 0x59 0x73 0x01 0x14 0x01 0x47 0x90 0x7e 0xba VLDA.POP.576 ex11, [p0, lf0, r24, m5];VLDB.POP.576 ex4, [p1, lf1, r25]; MOV dj2, r30 + 6528 0xc3 0x95 0x76 0x01 0x28 0x3c VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.POP.576 ex2, [p1, lf1, r25] + 6534 0xdd 0x1d 0x7a 0x21 0xa8 0x3c VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.POP.576.3D ex3, [p1, lf1, r25, d0] + 6540 0xc3 0xa5 0x7a 0x28 0x28 0x3c VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.FILL.512 [p1, lf1, r25] + 6546 0xcd 0x2d 0x7a 0x28 0x28 0x3c VLDA.CONV.fp32.bf16 cmh2, [p6], m3;VLDB.FILL.512 [p1, lf1, r25] + 6552 0xc3 0xb5 0x76 0x00 0xa8 0x3c VLDA.CONV.fp32.bf16 cml3, [p6], #64;VLDB.POP.576 ex1, [p1, lf1, r25] + 6558 0xdd 0x3d 0x76 0x03 0x28 0x3c VLDA.CONV.fp32.bf16 cmh3, [p6], m7;VLDB.POP.576 ex6, [p1, lf1, r25] + 6564 0x68 0x45 0x76 0x03 0xa8 0x3c VLDA.CONV.fp32.bf16 cml4, [p3, dj2];VLDB.POP.576 ex7, [p1, lf1, r25] + 6570 0x68 0x4d 0x75 0x12 0x14 0x01 0x69 0x2d 0xee 0xba VLDA.CONV.fp32.bf16 cmh4, [p3, dj2];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VSHUFFLE ex5, ex10, ex4, r22 + 6580 0x02 0x81 0x75 0x14 0x14 0x02 0xa9 0x2f 0xee 0xba VLDA.FILL.512 [p0, lf0, r24]; VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex10, ex10, ex4, r23 + 6590 0x55 0x01 0x7a 0x28 0x2a 0x11 0xdb 0xc2 0x48 0x0b 0x69 0x66 VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex2, ex3, r22; VMAC.f dm0, dm0, ex5, ex11, r9 + 6602 0x02 0x81 0x75 0x11 0xdf 0xc2 0x49 0x35 0x69 0x4a VLDA.FILL.512 [p0, lf0, r24]; VSHUFFLE ex10, ex2, ex3, r23; VMAC.f dm1, dm1, ex10, ex11, r9 + 6612 0x4a 0x49 0x69 0x48 VMAC.f dm2, dm2, ex4, ex11, r9 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 6616 0x4b 0x75 0x69 0x48 VMAC.f dm3, dm3, ex10, ex11, r9 +.aggressive_scheduled_block_id 7 +.noswbrkpt + 6620 0x06 0x00 0xaa 0x8b 0x5f 0xc6 0xa1 0x84 0x3d 0x4a VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex5, ex1, ex6, r23; VADD.f dm1, dm4, dm1, r20 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6630 0x03 0x01 0x94 0x00 0xa0 0x80 0x3d 0x62 VLDB.POP.576 ex6, [p1, lf1, r25]; VADD.f dm0, dm4, dm0, r20 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6638 0x03 0x01 0xd4 0x00 0xa2 0x88 0x3d 0x62 VLDB.POP.576 ex7, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r20 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6646 0x55 0x01 0x75 0x12 0x14 0x1d 0xa3 0x8c 0x3d 0x4a VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VADD.f dm3, dm4, dm3, r20 +.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2032 +.loop_nesting 2 +.begin_of_loop +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6656 0xa2 0x82 0x82 0x16 0xb7 0xb4 VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex2, ex1, ex6, r22 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6662 0x0a 0x28 0x2a 0x3c 0x5f 0xc6 0x99 0x2a 0x09 0x4a VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6672 0x06 0x00 0xa9 0xbc 0x5b 0xc6 0x98 0x04 0x09 0x4a VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6682 0x03 0x01 0x94 0x00 0x9b 0x68 0x09 0x62 VLDB.POP.576 ex6, [p1, lf1, r25]; VMAC.f dm3, dm3, ex4, ex0, r19 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6690 0x02 0x81 0x76 0x03 0xa8 0x00 0x00 0x00 0x05 0x6c 0x9a 0x46 0x09 0x6e VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex7, [p1, lf1, r25];NOPS; NOPX; VMAC.f dm2, dm2, ex3, ex0, r19 +.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2080 +.end_of_loop +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6704 0x55 0x01 0x7a 0x24 0x28 0x01 0x5b 0x00 0x00 0x01 0x45 0xaf 0xe8 0x00 0x00 0xe1 VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0];NOPS; NOPX; VSHUFFLE ex5, ex1, ex6, r23; NOPV +.loop_nesting 1 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6720 0x07 0x0c 0xff 0x97 0x25 0x9c 0x8b 0x00 0x85 0xad 0xe0 0xf6 PADDA.3D [p0], d1; PADDB [p7], m6; MOVS p5, p7; VSHUFFLE ex2, ex1, ex6, r22 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6732 0x93 0x91 0x6f 0x17 0x22 0x3c 0x5f 0xc6 0x99 0x2a 0x09 0x66 PADDB [p7], m4; MOVS p4, p7; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6744 0x73 0x91 0x6f 0x97 0x21 0xbc 0x5b 0xc6 0x98 0x04 0x09 0x66 PADDB [p7], m6; MOVS p3, p7; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6756 0x02 0x88 0xa0 0xe6 0x9b 0x68 0x09 0x62 MOV dj2, r17; VMAC.f dm3, dm3, ex4, ex0, r19 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6764 0x02 0xb7 0x20 0x9b 0x80 0xe6 0x9a 0x46 0x09 0x4a PADDB.3D [p1], d2; MOV r2, dc5; VMAC.f dm2, dm2, ex3, ex0, r19 + 6774 0x19 0x0b 0x5b 0xd8 VSHUFFLE ex2, ex1, ex6, r22 + 6778 0x1a 0x8b 0x5f 0xd8 VSHUFFLE ex5, ex1, ex6, r23 + 6782 0x01 0xbc 0x5b 0xc6 0x98 0x04 0x09 0x62 VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 + 6790 0x02 0x3c 0x5f 0xc6 0x99 0x2a 0x09 0x62 VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 + 6798 0x9a 0x46 0x09 0x48 VMAC.f dm2, dm2, ex3, ex0, r19 + 6802 0x9b 0x68 0x09 0x48 VMAC.f dm3, dm3, ex4, ex0, r19 + 6806 0x00 0x00 NOPX + 6808 0x00 0x00 NOPX + 6810 0x0d 0x10 0x16 0x18 VCONV.bf16.fp32 x10, cml0 + 6814 0x0d 0x90 0x96 0x18 VCONV.bf16.fp32 x11, cml1 + 6818 0x12 0x1a 0xc0 0x2a 0x03 0x62 0x00 0x00 0x61 0x3a VCONV.bf16.fp32 x1, cmh1; JZ r21, #6928 +.delay_slot + 6828 0x0b 0x10 0x56 0x18 VCONV.bf16.fp32 x6, cmh0 +.delay_slot + 6832 0x09 0x11 0x96 0x18 VCONV.bf16.fp32 x2, cml3 +.delay_slot + 6836 0x0b 0x91 0xd6 0x18 VCONV.bf16.fp32 x7, cmh3 +.delay_slot + 6840 0x0a 0x91 0x16 0x18 VCONV.bf16.fp32 x5, cml2 +.delay_slot + 6844 0x0c 0x11 0x56 0x18 VCONV.bf16.fp32 x8, cmh2 + 6848 0x1d 0xdc 0xec 0xf8 VMAX_LT.bf16 x11, r16, x11, x9 + 6852 0x18 0x8c 0xec 0xf8 VMAX_LT.bf16 x1, r16, x1, x9 + 6856 0xac 0x5a 0x60 0x02 0xaa 0x76 0x70 0x02 VST x11, [p5, dj3]; VMAX_LT.bf16 x10, r16, x10, x9 + 6864 0x82 0x8a 0x60 0x00 0x5a 0x76 0x70 0x02 VST x1, [p4, #64]; VMAX_LT.bf16 x1, r16, x6, x9 + 6872 0xa0 0xd2 0x60 0x00 0x03 0x66 0x00 0x00 0x21 0x3a VST x10, [p5]; J #6960 +.delay_slot + 6882 0xa2 0x8a 0x60 0x02 0x8a 0x76 0x70 0x02 VST x1, [p5, #64]; VMAX_LT.bf16 x10, r16, x2, x9 +.delay_slot + 6890 0x18 0xbc 0xec 0xf8 VMAX_LT.bf16 x1, r16, x7, x9 +.delay_slot + 6894 0x6c 0x52 0x60 0x02 0x96 0x76 0x70 0x02 VST x10, [p3, dj3]; VMAX_LT.bf16 x10, r16, x5, x9 +.delay_slot + 6902 0x00 0x2c 0xf7 0x14 0x53 0x02 0x22 0x76 0x72 0xba NOPA; VST x1, [p7, #64]; VMAX_LT.bf16 x8, r16, x8, x9 +.delay_slot + 6912 0x00 0x2c 0xf0 0x00 0x24 0xe2 0x93 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x10, [p4, dj7]; NOPX; NOPM; NOPV +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2304 + 6928 0x0d 0x60 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p5, dj3] + 6932 0x0c 0x14 0xe3 0x18 VST.CONV.bf16.fp32 cmh1, [p4, #64] + 6936 0x0d 0x04 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p5] + 6940 0x0d 0x14 0x63 0x18 VST.CONV.bf16.fp32 cmh0, [p5, #64] + 6944 0x0b 0x61 0xa3 0x18 VST.CONV.bf16.fp32 cml3, [p3, dj3] + 6948 0x0f 0x15 0xe3 0x18 VST.CONV.bf16.fp32 cmh3, [p7, #64] + 6952 0x9c 0x24 0x60 0x00 0x01 0xa5 0x70 0x02 VST.CONV.bf16.fp32 cml2, [p4, dj7]; NOPM +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2336 + 6960 0x62 0xc2 0x6e 0xf5 0x40 0x5c VST x8, [p3, #64]; JNZD r29, r29, p2 +.delay_slot + 6966 0x3f 0x8b 0x90 0x18 PADDB [p7], m4 +.delay_slot +.swstall delay_slot + 6970 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6972 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6974 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 6976 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2368 +.loop_nesting 0 + 6992 0x07 0xed 0xf1 0x18 LDA r15, [sp, #-20] + 6996 0x07 0xf1 0x91 0x18 LDA r12, [sp, #-16] + 7000 0x07 0xf5 0x31 0x18 LDA r9, [sp, #-12] + 7004 0x07 0xeb 0x19 0x18 LDA p6, [sp, #-24] + 7008 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8] + 7012 0x07 0xfd 0xd1 0x18 LDA r14, [sp, #-4] + 7016 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 7020 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128 +.delay_slot +.swstall delay_slot + 7026 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7028 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7030 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7032 0x00 0x00 NOPX +.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params__end +.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_end0 + +.text_segment PM 7040 +.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function_start + 7040 0x00 0x20 0x00 0x00 0x01 0xf2 0x32 0x20 0x10 0xba MOVA r0, #1; MOVXM p4, #508992 + 7050 0x80 0xc2 0xd0 0x00 0x10 0x08 0x4b 0xd0 0x78 0xba LDA r16, [p4]; MOVX r1, #0; MOV r2, r15 + 7060 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 7066 0x0f 0xf0 0x55 0x98 ST r2, [sp, #-16] + 7070 0x00 0x00 NOPX + 7072 0x00 0x00 NOPX + 7074 0x00 0x00 NOPX + 7076 0x00 0x00 NOPX + 7078 0x80 0x0e 0x28 0x40 0x01 0x84 JNZ r16, #7248 +.delay_slot + 7084 0x0f 0xfb 0x9d 0x98 ST p7, [sp, #-8] +.delay_slot + 7088 0x0f 0xff 0x1d 0x98 ST p6, [sp, #-4] +.delay_slot + 7092 0x0f 0xed 0x9d 0x98 ST p3, [sp, #-20] +.delay_slot + 7096 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12] +.delay_slot + 7100 0x00 0x07 0xc7 0xac 0x80 0x44 MOVXM r15, #509504 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7106 0xd0 0x91 0x60 0x00 0x01 0xf3 0xb2 0x32 0x11 0x3a MOVS p6, p1; MOVXM p7, #509028 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7116 0xe0 0xc0 0xe1 0x8f 0x0b 0x00 0x01 0xf3 0xb2 0x30 0x10 0x76 ST.s8 r16, [p7]; MOVS p1, r15; MOVXM p7, #509024 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7128 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7130 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 7132 0x00 0x05 0xb0 0x00 0x01 0x04 JL #2912 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7138 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7140 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 7144 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 7148 0xe0 0xc2 0x30 0x03 0xb0 0x60 0x70 0x02 ST r16, [p7]; MOV p7, p0 +.delay_slot + 7156 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x32 0x60 0x70 0xf6 NOPA; NOPB; NOPS; MOV p0, p2 +.return_address + 7168 0x1a 0x67 0x85 0x98 ADD.NC p2, r15, #11 + 7172 0x4f 0xc1 0x50 0x00 0x01 0xf0 0xb2 0x22 0x10 0xba LDA.u8 r16, [p2], #7; MOVXM p1, #508996 + 7182 0x43 0xcf 0x50 0x00 0x01 0xf0 0x32 0x2e 0x10 0xba LDA.u16 r19, [p2], #2; MOVXM p0, #509020 + 7192 0x02 0x06 0x3a 0x98 LDA.u16 r17, [p2] + 7196 0x00 0x00 NOPX + 7198 0x02 0x16 0x5a 0x98 LDA.u16 r18, [p2, #2] + 7202 0x00 0x00 NOPX + 7204 0x00 0x00 NOPX + 7206 0x20 0xc2 0x30 0x00 0xb6 0x60 0x70 0x02 ST r16, [p1]; MOV p1, p6 + 7214 0x14 0xe1 0x0f 0x98 MUL r16, r19, r16 + 7218 0x00 0x00 NOPX + 7220 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 + 7224 0x00 0x00 NOPX + 7226 0x14 0xa1 0x0f 0x98 MUL r16, r18, r16 + 7230 0x00 0x00 NOPX + 7232 0x00 0x2c 0xf0 0x00 0x20 0x06 0x11 0x80 0x00 0x00 0x37 0x60 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p0]; NOPX; MOV p0, p7; NOPV +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 + 7248 0xfd 0xbe 0x20 0x00 0x01 0xf3 0x32 0x24 0x10 0xba LDA r15, [sp, #-20]; MOVXM p6, #509000 + 7258 0xc0 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x26 0x10 0xba LDA r16, [p6]; MOVXM p2, #509004 + 7268 0x40 0xc6 0xd0 0x00 0x01 0xf3 0xb2 0x20 0x10 0xba LDA r17, [p2]; MOVXM p7, #508992 + 7278 0x07 0x06 0x56 0x98 LDA r18, [p7] + 7282 0x00 0x00 NOPX + 7284 0x00 0x00 NOPX + 7286 0x00 0x00 NOPX + 7288 0x00 0x00 NOPX + 7290 0x80 0x0e 0x68 0x40 0x01 0x84 JNZ r16, #7376 +.delay_slot + 7296 0x14 0x62 0x07 0x18 ADD r17, r17, #1 +.delay_slot + 7300 0x40 0xc6 0x39 0x44 0x0e 0x5c ST r17, [p2]; ADD r17, r18, #1 +.delay_slot + 7306 0x14 0x26 0x07 0x18 ADD r19, r16, #1 +.delay_slot + 7310 0x0e 0x06 0x71 0x98 ST r19, [p6] +.delay_slot + 7314 0x0f 0x06 0x31 0x98 ST r17, [p7] + 7318 0x1a 0x67 0x86 0x18 ADD.NC p2, r15, #12 + 7322 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4 + 7326 0x02 0xfe 0x16 0x98 LDA r16, [p2], #-4 + 7330 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 7334 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.noswbrkpt + 7336 0x02 0x46 0x16 0x98 LDA r16, [p2, #16] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7340 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7342 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7344 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7346 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7348 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7352 0x0a 0x06 0x11 0x98 ST r16, [p2] + 7356 0x17 0xe2 0xfd 0x18 MOVX r17, #-1 + 7360 0x00 0x00 NOPX + 7362 0x00 0x00 NOPX + 7364 0x00 0x00 NOPX + 7366 0x00 0x2c 0xf0 0x01 0x5b 0x00 0x04 0x13 0x18 0x7a NOPA; NOPS; ACQ r16, r17 +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_336 +.no_stack_arguments + 7376 0x00 0x09 0x08 0x00 0x01 0x04 JL #4624 +.delay_slot + 7382 0x00 0x07 0xc6 0xcc 0x80 0x44 MOVXM p3, #509504 +.delay_slot +.swstall delay_slot + 7388 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7390 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7392 0x00 0x00 NOPX +.delay_slot + 7394 0x00 0x2c 0xf0 0x02 0xb6 0x00 0x00 0x53 0x3d 0x07 0x00 0x00 0x1c 0x2e NOPA; NOPS; MOV p2, r15; NOPV +.return_address + 7408 0xc0 0xc2 0xd0 0x00 0x01 0xf0 0xb2 0x22 0x10 0xba LDA r16, [p6]; MOVXM p1, #508996 + 7418 0x01 0x06 0x36 0x98 LDA r17, [p1] + 7422 0x07 0xf0 0x11 0x18 LDA r0, [sp, #-16] + 7426 0x00 0x00 NOPX + 7428 0x00 0x00 NOPX + 7430 0x00 0x00 NOPX + 7432 0x00 0x00 NOPX + 7434 0x00 0x00 NOPX + 7436 0x14 0x61 0x08 0x98 NE r16, r17, r16 + 7440 0x80 0x0e 0xb0 0x40 0x01 0x84 JNZ r16, #7520 +.delay_slot + 7446 0x10 0x30 0x01 0x18 MOVX r24, #0 +.delay_slot +.swstall delay_slot + 7450 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7452 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7454 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7456 0x00 0x00 NOPX + 7458 0x04 0x00 0xa2 0xcf 0x14 0x24 MOVX r16, #1; ADD.NC p1, r15, #20 + 7464 0x01 0x06 0x36 0x98 LDA r17, [p1] + 7468 0x00 0x00 NOPX + 7470 0x00 0x00 NOPX + 7472 0x00 0x00 NOPX + 7474 0x00 0x00 NOPX + 7476 0x00 0x00 NOPX + 7478 0x00 0x00 NOPX + 7480 0x14 0x51 0x08 0x18 REL r17, r16 + 7484 0x3c 0xc6 0xdc 0x0e 0x23 0x0c LDA r17, [p1, #-8]; ST r24, [p6] + 7490 0x00 0x00 NOPX + 7492 0x00 0x00 NOPX + 7494 0x00 0x00 NOPX + 7496 0x00 0x00 NOPX + 7498 0x00 0x00 NOPX + 7500 0x00 0x00 NOPX + 7502 0x14 0x21 0x11 0x98 SUB r16, r16, r17 + 7506 0x00 0x2c 0xf3 0xcc 0x23 0x00 0x00 0x40 0x1a 0x57 0x00 0x00 0x1c 0x2e NOPA; ST r16, [p1, #-8]; NOPM; NOPV +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_480 + 7520 0xe0 0xc2 0xd0 0x00 0x01 0xf3 0x32 0x2e 0x10 0xba LDA r16, [p7]; MOVXM p6, #509020 + 7530 0x06 0x06 0x36 0x98 LDA r17, [p6] + 7534 0x07 0xf8 0x99 0x18 LDA p1, [sp, #-8] + 7538 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12] + 7542 0x00 0x00 NOPX + 7544 0x00 0x00 NOPX + 7546 0x00 0x00 NOPX + 7548 0x00 0x00 NOPX + 7550 0x14 0x61 0x08 0x98 NE r16, r17, r16 + 7554 0x80 0x0e 0xd0 0x40 0x01 0x84 JNZ r16, #7584 +.delay_slot +.swstall delay_slot + 7560 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7562 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7564 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7566 0x00 0x00 NOPX +.delay_slot + 7568 0x1b 0xd0 0x20 0xf8 MOV r15, r0 + 7572 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x03 0x83 0x88 0xc1 0x36 NOPA; NOPB; ST r24, [p7]; NOPX +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_544 + 7584 0x07 0xff 0x19 0x18 LDA p6, [sp, #-4] + 7588 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 7592 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 7598 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7600 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7602 0x00 0x00 NOPX +.delay_slot + 7604 0x0f 0x84 0x8b 0x18 MOVS p7, p1 +.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + +.text_segment PM 7616 +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.function_start + 7616 0x05 0x00 0x00 0x21 0x01 0x64 RET lr; MOV r0, #64 +.delay_slot + 7622 0x18 0x50 0xc0 0xf8 MOV r1, p0 +.delay_slot + 7626 0x18 0x60 0x90 0x18 ADD.NC p0, r1, #32 +.delay_slot + 7630 0x08 0x04 0x11 0x98 ST r0, [p0] +.delay_slot + 7634 0x08 0x14 0x11 0x98 ST r0, [p0, #4] +.delay_slot +.swstall delay_slot + 7638 0x00 0x00 NOPX +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_end0 + +.text_segment PM 7648 +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.function_start + 7648 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 7652 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 7658 0x0f 0xf8 0x3d 0x98 ST lr, [sp, #-8] + 7662 0x0f 0xfd 0xf5 0x98 ST r15, [sp, #-4] + 7666 0x00 0x00 NOPX + 7668 0x00 0x00 NOPX + 7670 0x00 0x00 NOPX + 7672 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 7676 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 7680 0x00 0x00 NOPX + 7682 0x00 0x00 NOPX + 7684 0x00 0x00 NOPX + 7686 0x00 0x00 NOPX + 7688 0x00 0x00 NOPX + 7690 0x00 0x00 NOPX + 7692 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 7696 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 7700 0x00 0x00 NOPX + 7702 0x00 0x00 NOPX + 7704 0x00 0x00 NOPX + 7706 0x00 0x00 NOPX + 7708 0x00 0x00 NOPX + 7710 0x00 0x00 NOPX + 7712 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 7716 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4] + 7720 0x00 0x00 NOPX + 7722 0x00 0x00 NOPX +.no_stack_arguments + 7724 0x00 0x0e 0xe0 0x00 0x01 0x04 JL #7616 +.delay_slot +.swstall delay_slot + 7730 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7732 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7734 0x00 0x00 NOPX +.delay_slot + 7736 0x08 0xdc 0x29 0x98 ST el0, [p0], #-12 +.delay_slot + 7740 0x1b 0xd0 0xc0 0xf8 MOV r15, p0 +.return_address + 7744 0xff 0x07 0x20 0x01 0x00 0x68 0x33 0xc4 0x08 0xba LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 + 7754 0x01 0xe2 0x80 0x01 0x80 0x08 0x07 0xfd 0x58 0xba MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 + 7764 0xff 0xbe 0x20 0x0a 0x11 0x80 0x07 0xa0 0x01 0x7a LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 + 7774 0x00 0x06 0x4a 0x98 LDA.u8 r18, [p0] + 7778 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7780 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7782 0x00 0x02 0x17 0x18 ST.s16 r16, [p0, dj0] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7786 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7790 0x10 0x22 0x05 0x18 MOVX r17, #1 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7794 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7800 0x14 0x77 0x27 0x98 EQ r27, r17, r18 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7804 0x14 0x21 0x82 0x18 SEL.EQZ r16, r16, r24, r27 +.delay_slot +.swstall delay_slot + 7808 0x00 0x00 NOPX +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + +.text_segment PM 7824 +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.function_start + 7824 0x02 0x80 0x80 0x00 0x10 0xc8 0x08 0x60 0x78 0xba MOVA m0, #20; MOVX r1, #6; MOV r0, p0 + 7834 0x00 0x00 0xa0 0xc0 0x0c 0x24 MOVX r0, #1; ADD.NC p0, r0, #12 + 7840 0x00 0x08 0x4a 0x98 LDA.u8 r2, [p0], m0 + 7844 0x00 0x00 NOPX + 7846 0x00 0x00 NOPX + 7848 0x00 0x00 NOPX + 7850 0x00 0x00 NOPX + 7852 0x00 0x00 NOPX + 7854 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 7858 0x10 0x80 0x08 0x98 NE r0, r2, r0 +.delay_slot + 7862 0x10 0x00 0x1d 0x98 LSHL r0, r0, r1 +.delay_slot + 7866 0x02 0x82 0x31 0x0d 0xe0 0x5c ST r0, [p0, #4]; NEZ r3, r2 +.delay_slot + 7872 0x10 0xc4 0x1d 0x98 LSHL r2, r3, r1 +.delay_slot + 7876 0x08 0x04 0x51 0x98 ST r2, [p0] +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_end0 + +.text_segment PM 7888 +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.function_start + 7888 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 7894 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] +.no_stack_arguments + 7898 0x00 0x0e 0xf0 0x00 0x01 0x04 JL #7648 +.delay_slot + 7904 0x0f 0xfb 0x9d 0x98 ST p7, [sp, #-8] +.delay_slot + 7908 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.delay_slot +.swstall delay_slot + 7912 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7914 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7916 0x00 0x01 0x67 0x98 NOPA +.return_address +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7920 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7924 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] +.tail_call +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7928 0x00 0x0f 0x48 0x00 0x00 0x84 J #7824 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7934 0x18 0x6e 0xc0 0xf8 MOV p0, p7 +.delay_slot + 7938 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 7944 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7946 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7948 0x00 0x00 NOPX +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + +.text_segment PM 7952 +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.function_start + 7952 0x67 0x82 0xd0 0x00 0x51 0x54 LDA r0, [p3], #12; MOV m0, #20 + 7958 0x61 0x05 0x58 0xcd 0x81 0xd4 LDA.u8 r1, [p3], m0; MOV p4, p3 + 7964 0x00 0x00 NOPX + 7966 0x00 0x00 NOPX + 7968 0x00 0x00 NOPX + 7970 0x00 0x00 NOPX + 7972 0x00 0x00 NOPX + 7974 0x00 0x00 NOPX + 7976 0x08 0x0f 0xb0 0x40 0x01 0x84 JNZ r1, #8032 +.delay_slot + 7982 0x17 0xc4 0xe9 0x18 MOVX r2, #-6 +.delay_slot + 7986 0x10 0x00 0x2d 0x98 LSHL r0, r0, r2 +.delay_slot +.swstall delay_slot + 7990 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7992 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 7994 0x00 0x00 NOPX + 7996 0x00 0x04 0x32 0x98 LDA.s16 r1, [p0] + 8000 0x00 0x00 NOPX + 8002 0x00 0x00 NOPX + 8004 0x00 0x00 NOPX + 8006 0x00 0x0f 0xc0 0x00 0x00 0x84 J #8064 +.delay_slot +.swstall delay_slot + 8012 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8014 0x00 0x00 NOPX +.delay_slot + 8016 0x18 0x05 0x72 0xf8 VBCST.16 x0, r1 +.delay_slot +.swstall delay_slot + 8020 0x00 0x00 NOPX +.delay_slot + 8022 0x00 0x2c 0xf0 0x04 0x13 0x00 0x00 0x00 0x00 0x7a NOPA; VST x0, [p0]; NOPX +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_80 + 8032 0x01 0x04 0x32 0x98 LDA.s16 r1, [p1] + 8036 0x00 0x00 NOPX + 8038 0x00 0x00 NOPX + 8040 0x00 0x00 NOPX + 8042 0x00 0x00 NOPX + 8044 0x00 0x00 NOPX + 8046 0x00 0x00 NOPX + 8048 0x18 0x05 0x72 0xf8 VBCST.16 x0, r1 + 8052 0x00 0x00 NOPX + 8054 0x00 0x2c 0xf1 0x04 0x13 0x00 0x00 0x00 0x00 0x7a NOPA; VST x0, [p1]; NOPX +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_112 + 8064 0x8a 0x80 0xd0 0x00 0x07 0x8a 0xb8 0x3f 0x48 0xba LDA m0, [p4, #20]; MOVX r0, #60; ADD.NC lc, r0, #-3 + 8074 0x62 0x90 0xd0 0x00 0x00 0x04 0x7f 0xf8 0x10 0xba LDA m1, [p3, #4]; MOVXM ls, #8176 + 8084 0x00 0x00 0x26 0xe0 0x20 0x44 MOVXM le, #8208 + 8090 0x00 0x07 0xc8 0xc8 0xc8 0x44 MOVXM p4, #509028 + 8096 0x04 0x04 0x22 0x98 LDA.s8 r1, [p4] + 8100 0x00 0x00 NOPX + 8102 0x00 0x00 NOPX + 8104 0x00 0x08 0xab 0x98 VLDA.CONV.fp32.bf16 cml1, [p0], m0 + 8108 0x01 0x29 0x2b 0x98 VLDA.CONV.fp32.bf16 cml2, [p1], m1 + 8112 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8116 0x01 0x2a 0x2b 0x98 VLDA.CONV.fp32.bf16 cml4, [p1], m1 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8120 0x01 0x15 0x70 0xf5 0x00 0x2c VLDA.CONV.fp32.bf16 cml1, [p0], m0; MOVX crRnd, r1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8126 0x25 0x25 0x70 0x04 0x03 0x28 0x3d 0x62 VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8134 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8138 0x25 0x45 0x70 0x04 0x04 0x10 0x3d 0x62 VLDA.CONV.fp32.bf16 cml4, [p1], m1; VADD.f dm4, dm0, dm4, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8146 0x00 0x08 0xab 0x98 VLDA.CONV.fp32.bf16 cml1, [p0], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8150 0x25 0x25 0x70 0x04 0x03 0x28 0x3d 0x62 VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8158 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8162 0x25 0x45 0x74 0x3b 0x46 0x00 0x00 0x40 0x1a 0x57 0x04 0x10 0x3d 0x6e VLDA.CONV.fp32.bf16 cml4, [p1], m1; VST.CONV.bf16.fp32 cml3, [p2], #64; NOPM; VADD.f dm4, dm0, dm4, r0 +.label ZLS_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_224 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8176 0x00 0x08 0xab 0x98 VLDA.CONV.fp32.bf16 cml1, [p0], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8180 0x25 0x25 0x70 0x00 0x21 0x0f 0x11 0x8e 0x03 0x28 0x3d 0x66 VLDA.CONV.fp32.bf16 cml2, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8192 0x01 0x05 0x70 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLE_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_256 +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8208 0x25 0x45 0x70 0x00 0x22 0x1d 0xa3 0x00 0x00 0x00 0x01 0xa5 0x78 0x20 0x81 0xeb VLDA.CONV.fp32.bf16 cml4, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml3, [p2], #64;NOPX; NOPM; VADD.f dm4, dm0, dm4, r0 +.loop_nesting 0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8224 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8226 0x43 0xc4 0x60 0x02 0x03 0x28 0x3d 0x62 VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8234 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8236 0x43 0xb4 0x60 0x02 0x04 0x10 0x3d 0x62 VST.CONV.bf16.fp32 cml3, [p2], #64; VADD.f dm4, dm0, dm4, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8244 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8246 0x43 0xc4 0x60 0x50 0x00 0x5c VST.CONV.bf16.fp32 cml4, [p2], #64;RET lr +.delay_slot +.swstall delay_slot + 8252 0x00 0x00 NOPX +.delay_slot + 8254 0x0a 0x1d 0xa3 0x18 VST.CONV.bf16.fp32 cml3, [p2], #64 +.delay_slot +.swstall delay_slot + 8258 0x00 0x00 NOPX +.delay_slot + 8260 0x0a 0x1e 0x23 0x18 VST.CONV.bf16.fp32 cml4, [p2], #64 +.delay_slot +.swstall delay_slot + 8264 0x00 0x00 NOPX +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 + +.text_segment PM 8272 +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.function_start + 8272 0x00 0x10 0x00 0x00 0x01 0xc4 PADDXM [sp], #128 + 8278 0xff 0x87 0xb0 0x02 0x0a 0x60 0x70 0x02 ST lr, [sp, #-4]; MOV r16, p2 + 8286 0x50 0x91 0x60 0x01 0xb4 0x03 0x00 0x02 MOVS p2, p1; ADD.NC p3, r16, #12 + 8294 0x65 0xed 0x58 0x21 0x81 0xd4 LDA.u8 r27, [p3], #2; MOV r16, p0 + 8300 0x73 0xca 0x58 0xab 0xc1 0xd4 LDA.s16 r18, [p3], #-14; MOV r17, sp + 8306 0x18 0x68 0xc0 0x18 ADD.NC p0, r17, #-128 + 8310 0x08 0x07 0x2b 0x18 VST sfh, [p0] + 8314 0x00 0x06 0x57 0x18 ST.s16 r18, [p0] + 8318 0x00 0x00 NOPX + 8320 0x00 0x00 NOPX +.no_stack_arguments + 8322 0x00 0x0f 0x88 0x00 0x01 0x04 JL #7952 +.delay_slot + 8328 0x1c 0x50 0xc0 0xf8 MOV r17, p0 +.delay_slot +.swstall delay_slot + 8332 0x00 0x00 NOPX +.delay_slot + 8334 0x14 0x25 0x12 0x18 SEL.EQZ r18, r16, r17, r27 +.delay_slot + 8338 0x8c 0x20 0x42 0xd2 0x41 0xe4 SEL.EQZ r16, r17, r16, r27; MOV p1, r18 +.delay_slot + 8344 0x00 0x2b 0x60 0x00 0x34 0x10 0x70 0x02 NOPS; MOV p0, r16 +.return_address + 8352 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] + 8356 0x00 0x00 NOPX + 8358 0x00 0x00 NOPX + 8360 0x00 0x00 NOPX + 8362 0x00 0x00 NOPX + 8364 0x00 0x00 NOPX + 8366 0x00 0x00 NOPX + 8368 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 8372 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128 +.delay_slot +.swstall delay_slot + 8378 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8380 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8382 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8384 0x00 0x00 NOPX +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_end0 + +.text_segment PM 8400 +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function_start + 8400 0x00 0x07 0xc6 0xc8 0x80 0x44 MOVXM p3, #508992 + 8406 0x60 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p3]; MOV r17, CORE_ID + 8412 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 8418 0xff 0x63 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p6, [sp, #-8]; MOV r0, r15 + 8426 0xff 0x82 0xb0 0x00 0x01 0xf3 0x32 0x28 0x11 0x3a ST r0, [sp, #-4]; MOVXM p6, #509008 + 8436 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 + 8440 0x00 0x00 NOPX + 8442 0x00 0x00 NOPX + 8444 0x80 0x10 0xd0 0x40 0x01 0x84 JNZ r16, #8608 +.delay_slot + 8450 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17 +.delay_slot + 8454 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 8458 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12] +.delay_slot + 8462 0xc0 0xc6 0x30 0x03 0x30 0x60 0x70 0x02 ST r17, [p6]; MOV p6, p0 +.delay_slot + 8470 0x00 0x07 0xc0 0xca 0x00 0x44 MOVXM p0, #509184 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8476 0x00 0x07 0xc4 0xc8 0xc8 0x44 MOVXM p2, #509028 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8482 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x32 0x30 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #509024 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8492 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 8494 0x00 0x0f 0x68 0x00 0x01 0x04 JL #7888 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8500 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8502 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8504 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 8508 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 8512 0x00 0x2c 0xf0 0x00 0x22 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV +.return_address + 8528 0x00 0x07 0xc4 0xc8 0xa0 0x44 MOVXM p2, #509008 + 8534 0x40 0xc2 0xd0 0x00 0x01 0xf1 0x32 0x80 0x10 0xba LDA r16, [p2]; MOVXM p2, #509184 + 8544 0x40 0xc6 0xd0 0x00 0x01 0xf1 0x32 0x80 0x10 0xba LDA r17, [p2]; MOVXM p2, #509184 + 8554 0x4a 0xcb 0x50 0x00 0x01 0xf0 0xb2 0x2a 0x10 0xba LDA.u16 r18, [p2, #10]; MOVXM p1, #509012 + 8564 0x00 0x00 NOPX + 8566 0x00 0x00 NOPX + 8568 0x00 0x10 0xd8 0x00 0x00 0x84 J #8624 +.delay_slot + 8574 0x00 0x07 0xc0 0xc8 0xb8 0x44 MOVXM p0, #509020 +.delay_slot +.swstall delay_slot + 8580 0x00 0x00 NOPX +.delay_slot + 8582 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 8586 0x00 0x2c 0xf0 0x0c 0xa3 0x0c NOPA; ST r18, [p0] +.delay_slot + 8592 0x00 0x2c 0xf0 0x00 0x21 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 + 8608 0x00 0x2c 0xf0 0x00 0x22 0x80 0x8b 0x00 0x01 0xf0 0xb2 0x2a 0x10 0x00 0x00 0xe1 NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 + 8624 0x73 0x91 0x60 0x03 0xb3 0xc3 0x00 0x02 MOVS p3, p7; ADD.NC p7, r15, #12 + 8632 0xff 0xee 0xd0 0x00 0x01 0xf0 0x32 0x20 0x10 0xba LDA r27, [p7], #-4; MOVXM p0, #508992 + 8642 0x07 0xfe 0x16 0x98 LDA r16, [p7], #-4 + 8646 0x07 0xfe 0x36 0x98 LDA r17, [p7], #-4 + 8650 0x07 0x46 0x56 0x98 LDA r18, [p7, #16] + 8654 0x00 0x00 NOPX + 8656 0x00 0x00 NOPX + 8658 0x00 0x00 NOPX + 8660 0x00 0x00 NOPX + 8662 0x00 0x00 NOPX + 8664 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 8668 0x0f 0x06 0x11 0x98 ST r16, [p7] + 8672 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 8676 0x00 0x00 NOPX + 8678 0x00 0x00 NOPX + 8680 0x00 0x00 NOPX + 8682 0x14 0x93 0x08 0x18 ACQ r18, r16 + 8686 0x04 0x00 0xa7 0xad 0x81 0xe4 MOVX r16, #1; MOV r15, p3 + 8692 0x00 0x00 NOPX + 8694 0x00 0x00 NOPX + 8696 0x00 0x06 0x36 0x98 LDA r17, [p0] + 8700 0xc0 0xca 0xdc 0xdd 0x81 0xd4 LDA r18, [p6]; MOV p6, p7 + 8706 0x01 0x06 0x76 0x98 LDA r19, [p1] + 8710 0x07 0x5c 0x9e 0x98 LDA p1, [p7], #20 + 8714 0x00 0x00 NOPX +.no_stack_arguments + 8716 0x00 0x10 0x28 0x00 0x01 0x04 JL #8272 +.delay_slot +.swstall delay_slot + 8722 0x00 0x00 NOPX +.delay_slot + 8724 0x14 0x62 0x07 0x18 ADD r17, r17, #1 +.delay_slot + 8728 0x08 0x06 0x31 0x98 ST r17, [p0] +.delay_slot + 8732 0x14 0xe1 0x0d 0x98 LSHL r16, r19, r16 +.delay_slot + 8736 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xa0 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV +.return_address + 8752 0xca 0xc6 0xd0 0x00 0x01 0xf3 0x32 0x20 0x10 0xba LDA r17, [p6, #20]; MOVXM p6, #508992 + 8762 0x10 0x20 0x05 0x18 MOVX r16, #1 + 8766 0x00 0x00 NOPX + 8768 0x00 0x00 NOPX + 8770 0x00 0x00 NOPX + 8772 0x00 0x00 NOPX + 8774 0x00 0x00 NOPX + 8776 0x14 0x51 0x08 0x18 REL r17, r16 + 8780 0xfc 0xce 0xd0 0x00 0x01 0xf1 0x32 0x2e 0x10 0xba LDA r19, [p7, #-8]; MOVXM p2, #509020 + 8790 0x06 0x06 0x36 0x98 LDA r17, [p6] + 8794 0x02 0x06 0x56 0x98 LDA r18, [p2] + 8798 0x00 0x00 NOPX + 8800 0x00 0x00 NOPX + 8802 0x00 0x00 NOPX + 8804 0x00 0x00 NOPX + 8806 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 8810 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 8814 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 8818 0x80 0x11 0x48 0x40 0x01 0x84 JNZ r16, #8848 +.delay_slot +.swstall delay_slot + 8824 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8826 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8828 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8830 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8832 0x00 0x00 NOPX + 8834 0x10 0x20 0x01 0x18 MOVX r16, #0 + 8838 0x00 0x2c 0xf6 0x06 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r16, [p6]; NOPX +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 + 8848 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12] + 8852 0x07 0xfb 0x19 0x18 LDA p6, [sp, #-8] + 8856 0x00 0x00 NOPX + 8858 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 8860 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.noswbrkpt + 8862 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 8866 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 8868 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8872 0x1f 0x67 0xa0 0xf8 MOV p7, r15 +.delay_slot + 8876 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 8882 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8884 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8886 0x00 0x00 NOPX +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + +.text_segment PM 8896 +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.function_start + 8896 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 8900 0x00 0x00 NOPX + 8902 0x00 0x00 NOPX + 8904 0x00 0x00 NOPX + 8906 0x00 0x00 NOPX + 8908 0x00 0x00 NOPX + 8910 0x00 0x00 NOPX + 8912 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 8916 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 8920 0x00 0x00 NOPX + 8922 0x00 0x00 NOPX + 8924 0x00 0x00 NOPX + 8926 0x00 0x00 NOPX + 8928 0x00 0x00 NOPX + 8930 0x00 0x00 NOPX + 8932 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 8936 0x01 0x6c 0x2e 0x98 LDA el0, [p1], #24 + 8940 0x01 0x04 0x12 0x98 LDA.s16 r0, [p1] + 8944 0x00 0x00 NOPX + 8946 0x00 0x00 NOPX + 8948 0x00 0x00 NOPX + 8950 0x00 0x00 NOPX + 8952 0x00 0x00 NOPX + 8954 0x08 0x6c 0x29 0x98 ST el0, [p0], #24 + 8958 0x00 0x04 0x17 0x18 ST.s16 r0, [p0] + 8962 0x00 0x00 NOPX + 8964 0x00 0x00 NOPX + 8966 0x00 0x00 NOPX + 8968 0x00 0x00 NOPX + 8970 0x00 0x00 NOPX + 8972 0x00 0x00 NOPX + 8974 0x01 0x24 0x12 0x98 LDA.s16 r0, [p1, #4] + 8978 0x00 0x14 0x17 0x18 ST.s16 r0, [p0, #2] + 8982 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot + 8986 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8988 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8990 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8992 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 8994 0x00 0x00 NOPX +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv__end +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_end0 + +.text_segment PM 9008 +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.function_start + 9008 0xfb 0xc2 0x80 0x3a 0x68 0x00 0x00 0x08 0x79 0xd8 0x10 0xb6 MOVA dj0, #-34; VLDB x4, [p0], #64; MOVXM ls, #9136 + 9020 0xff 0x51 0x00 0x39 0x68 0x00 0x00 0x09 0xb9 0xf0 0x10 0xb6 MOVA r17, #-6; VLDB x2, [p0], #64; MOVXM le, #9184 + 9032 0x18 0x14 0xc0 0xf8 MOV r0, p2 + 9036 0x1a 0x60 0x10 0x18 ADD.NC p2, r0, #32 + 9040 0x02 0x1c 0x52 0x98 LDA.s16 r2, [p2], #2 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9044 0x02 0x00 0x16 0x98 LDA r0, [p2, dj0] +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9048 0x40 0x86 0x50 0x3a 0x68 0x3c LDA.s16 r1, [p2]; VLDB x4, [p0], #64 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9054 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9056 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9058 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9060 0x38 0x1c 0xb4 0x18 VLDB x2, [p0], #64 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9064 0x18 0x09 0x72 0xf8 VBCST.16 x0, r2 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9068 0x00 0x3a 0x68 0x01 0x18 0xed 0x50 0x36 0x78 0x3a VLDB x4, [p0], #64; LSHL r17, r0, r17; VMAX_LT.bf16 x5, r16, x4, x0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9078 0x1d 0x78 0xfe 0x98 ADD.NC lc, r17, #-3 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9082 0x18 0x85 0x72 0xf8 VBCST.16 x1, r1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9086 0x19 0xa8 0xac 0xf8 VMIN_GE.bf16 x3, r16, x5, x1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9090 0x00 0x2c 0xf0 0x39 0x68 0x00 0x00 0x31 0x06 0xcf 0x00 0x2b 0x60 0x7e NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9104 0x00 0x2c 0xf0 0x00 0x21 0x1c 0xd3 0x00 0x00 0x01 0xd8 0x56 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9120 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x01 0x50 0x36 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV +.label ZLS_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_128 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9136 0x00 0x2c 0xf0 0x3a 0x69 0x1d 0xd3 0x00 0x00 0x00 0xd4 0x56 0x78 0x00 0x00 0xe1 NOPA; VLDB x4, [p0], #64; VST x7, [p1], #64; NOPX; VMIN_GE.bf16 x3, r16, x5, x1; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9152 0x00 0x2c 0xf0 0x39 0x68 0x01 0x5b 0x00 0x00 0x01 0x88 0x36 0x78 0x00 0x00 0xe1 NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9168 0x00 0x2c 0xf0 0x00 0x21 0x1c 0xd3 0x00 0x00 0x01 0xd8 0x56 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV +.label ZLE_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_176 +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9184 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x01 0x50 0x36 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV +.loop_nesting 0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9200 0x23 0xba 0x60 0x00 0xd4 0x56 0x70 0x02 VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9208 0x1b 0x10 0x6c 0xf8 VMAX_LT.bf16 x6, r16, x2, x0 + 9212 0x23 0x9a 0x60 0x01 0xd8 0x56 0x70 0x02 VST x3, [p1], #64; VMIN_GE.bf16 x7, r16, x6, x1 + 9220 0x05 0x00 0x05 0x40 0xd9 0xe4 RET lr; VMAX_LT.bf16 x5, r16, x4, x0 +.delay_slot + 9226 0x23 0xba 0x60 0x00 0xd4 0x56 0x70 0x02 VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1 +.delay_slot + 9234 0x1b 0x10 0x6c 0xf8 VMAX_LT.bf16 x6, r16, x2, x0 +.delay_slot + 9238 0x1b 0xb0 0xac 0xf8 VMIN_GE.bf16 x7, r16, x6, x1 +.delay_slot + 9242 0x09 0x1c 0xd3 0x18 VST x3, [p1], #64 +.delay_slot + 9246 0x09 0x1d 0xd3 0x18 VST x7, [p1], #64 +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E__end +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_end0 + +.text_segment PM 9264 +.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function_start + 9264 0x00 0x07 0xc6 0xc8 0x80 0x44 MOVXM p3, #508992 + 9270 0x60 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p3]; MOV r17, CORE_ID + 9276 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 9282 0xff 0x63 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p6, [sp, #-8]; MOV r0, r15 + 9290 0xff 0x82 0xb0 0x00 0x01 0xf3 0x32 0x28 0x11 0x3a ST r0, [sp, #-4]; MOVXM p6, #509008 + 9300 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 + 9304 0x00 0x00 NOPX + 9306 0x00 0x00 NOPX + 9308 0x80 0x12 0x80 0x40 0x01 0x84 JNZ r16, #9472 +.delay_slot + 9314 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17 +.delay_slot + 9318 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 9322 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12] +.delay_slot + 9326 0xc0 0xc6 0x30 0x03 0x30 0x60 0x70 0x02 ST r17, [p6]; MOV p6, p0 +.delay_slot + 9334 0x00 0x07 0xc0 0xcc 0x00 0x44 MOVXM p0, #509440 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9340 0x00 0x07 0xc4 0xc8 0xc8 0x44 MOVXM p2, #509028 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9346 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x32 0x30 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #509024 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9356 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 9358 0x00 0x11 0x60 0x00 0x01 0x04 JL #8896 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9364 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9366 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9368 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 9372 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 9376 0x00 0x2c 0xf0 0x00 0x22 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV +.return_address + 9392 0x00 0x07 0xc4 0xc8 0xa0 0x44 MOVXM p2, #509008 + 9398 0x40 0xc2 0xd0 0x00 0x01 0xf1 0x33 0x00 0x10 0xba LDA r16, [p2]; MOVXM p2, #509440 + 9408 0x40 0xc6 0xd0 0x00 0x01 0xf1 0x33 0x00 0x10 0xba LDA r17, [p2]; MOVXM p2, #509440 + 9418 0x48 0xcb 0x50 0x00 0x01 0xf0 0xb2 0x2a 0x10 0xba LDA.u16 r18, [p2, #8]; MOVXM p1, #509012 + 9428 0x00 0x00 NOPX + 9430 0x00 0x00 NOPX + 9432 0x00 0x12 0x88 0x00 0x00 0x84 J #9488 +.delay_slot + 9438 0x00 0x07 0xc0 0xc8 0xb8 0x44 MOVXM p0, #509020 +.delay_slot +.swstall delay_slot + 9444 0x00 0x00 NOPX +.delay_slot + 9446 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 9450 0x00 0x2c 0xf0 0x0c 0xa3 0x0c NOPA; ST r18, [p0] +.delay_slot + 9456 0x00 0x2c 0xf0 0x00 0x21 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV +.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 + 9472 0x00 0x2c 0xf0 0x00 0x22 0x80 0x8b 0x00 0x01 0xf0 0xb2 0x2a 0x10 0x00 0x00 0xe1 NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV +.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 + 9488 0x73 0x91 0x60 0x03 0xb3 0xc3 0x00 0x02 MOVS p3, p7; ADD.NC p7, r15, #12 + 9496 0xff 0xee 0xd0 0x00 0x01 0xf0 0x32 0x20 0x10 0xba LDA r27, [p7], #-4; MOVXM p0, #508992 + 9506 0x07 0xfe 0x16 0x98 LDA r16, [p7], #-4 + 9510 0x07 0xfe 0x36 0x98 LDA r17, [p7], #-4 + 9514 0x07 0x46 0x56 0x98 LDA r18, [p7, #16] + 9518 0x00 0x00 NOPX + 9520 0x00 0x00 NOPX + 9522 0x00 0x00 NOPX + 9524 0x00 0x00 NOPX + 9526 0x00 0x00 NOPX + 9528 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 9532 0x0f 0x06 0x11 0x98 ST r16, [p7] + 9536 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 9540 0x00 0x00 NOPX + 9542 0x00 0x00 NOPX + 9544 0x00 0x00 NOPX + 9546 0x14 0x93 0x08 0x18 ACQ r18, r16 + 9550 0x04 0x00 0xa7 0xad 0x81 0xe4 MOVX r16, #1; MOV r15, p3 + 9556 0x00 0x00 NOPX + 9558 0x00 0x00 NOPX + 9560 0x00 0x06 0x36 0x98 LDA r17, [p0] + 9564 0xc0 0xca 0xdc 0xdd 0x81 0xd4 LDA r18, [p6]; MOV p6, p7 + 9570 0x01 0x06 0x76 0x98 LDA r19, [p1] + 9574 0x07 0x5c 0x9e 0x98 LDA p1, [p7], #20 + 9578 0x00 0x00 NOPX +.no_stack_arguments + 9580 0x00 0x11 0x98 0x00 0x01 0x04 JL #9008 +.delay_slot +.swstall delay_slot + 9586 0x00 0x00 NOPX +.delay_slot + 9588 0x14 0x62 0x07 0x18 ADD r17, r17, #1 +.delay_slot + 9592 0x08 0x06 0x31 0x98 ST r17, [p0] +.delay_slot + 9596 0x14 0xe1 0x0d 0x98 LSHL r16, r19, r16 +.delay_slot + 9600 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xa0 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV +.return_address + 9616 0xca 0xc6 0xd0 0x00 0x01 0xf3 0x32 0x20 0x10 0xba LDA r17, [p6, #20]; MOVXM p6, #508992 + 9626 0x10 0x20 0x05 0x18 MOVX r16, #1 + 9630 0x00 0x00 NOPX + 9632 0x00 0x00 NOPX + 9634 0x00 0x00 NOPX + 9636 0x00 0x00 NOPX + 9638 0x00 0x00 NOPX + 9640 0x14 0x51 0x08 0x18 REL r17, r16 + 9644 0xfc 0xce 0xd0 0x00 0x01 0xf1 0x32 0x2e 0x10 0xba LDA r19, [p7, #-8]; MOVXM p2, #509020 + 9654 0x06 0x06 0x36 0x98 LDA r17, [p6] + 9658 0x02 0x06 0x56 0x98 LDA r18, [p2] + 9662 0x00 0x00 NOPX + 9664 0x00 0x00 NOPX + 9666 0x00 0x00 NOPX + 9668 0x00 0x00 NOPX + 9670 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 9674 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 9678 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 9682 0x80 0x12 0xf8 0x40 0x01 0x84 JNZ r16, #9712 +.delay_slot +.swstall delay_slot + 9688 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9690 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9692 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9694 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9696 0x00 0x00 NOPX + 9698 0x10 0x20 0x01 0x18 MOVX r16, #0 + 9702 0x00 0x2c 0xf6 0x06 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r16, [p6]; NOPX +.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 + 9712 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12] + 9716 0x07 0xfb 0x19 0x18 LDA p6, [sp, #-8] + 9720 0x00 0x00 NOPX + 9722 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 9724 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.noswbrkpt + 9726 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9730 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9732 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9736 0x1f 0x67 0xa0 0xf8 MOV p7, r15 +.delay_slot + 9740 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 9746 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9748 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9750 0x00 0x00 NOPX +.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + +.text_segment PM 9760 +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv +.function_start + 9760 0x23 0x85 0xd0 0x08 0x20 0x0b 0x08 0x00 0x58 0xba LDA el0, [p1], #4; MOVX r2, #256; MOV r24, #0 + 9770 0x17 0x80 0x01 0x18 MOVX r0, #-128 + 9774 0x00 0x00 NOPX + 9776 0x00 0x00 NOPX + 9778 0x00 0x00 NOPX + 9780 0x00 0x00 NOPX + 9782 0x00 0x00 NOPX + 9784 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 9788 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 9792 0x00 0x00 NOPX + 9794 0x00 0x00 NOPX + 9796 0x00 0x00 NOPX + 9798 0x00 0x00 NOPX + 9800 0x00 0x00 NOPX + 9802 0x00 0x00 NOPX + 9804 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 9808 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 9812 0x00 0x00 NOPX + 9814 0x00 0x00 NOPX + 9816 0x00 0x00 NOPX + 9818 0x00 0x00 NOPX + 9820 0x00 0x00 NOPX + 9822 0x00 0x00 NOPX + 9824 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 9828 0x01 0x14 0x76 0x98 LDA r3, [p1, #4] + 9832 0x00 0x00 NOPX + 9834 0x00 0x00 NOPX + 9836 0x00 0x00 NOPX + 9838 0x00 0x00 NOPX + 9840 0x00 0x00 NOPX + 9842 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9844 0x08 0x4c 0x71 0x98 ST r3, [p0], #16 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9848 0x00 0x04 0x17 0x18 ST.s16 r0, [p0] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9852 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9856 0x00 0x00 0xf0 0xbe 0x00 0x44 MOVXM r1, #65280 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9862 0x10 0xc2 0x14 0x98 AND r1, r3, r1 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9866 0x10 0x76 0x27 0x98 EQ r27, r1, r2 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9870 0x10 0x01 0x82 0x18 SEL.EQZ r0, r0, r24, r27 +.delay_slot +.swstall delay_slot + 9874 0x00 0x00 NOPX +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_end0 + +.text_segment PM 9888 +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv +.function_start + 9888 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 9894 0x0f 0xf8 0x3d 0x98 ST lr, [sp, #-8] +.no_stack_arguments + 9898 0x00 0x13 0x10 0x00 0x01 0x04 JL #9760 +.delay_slot + 9904 0x0f 0xff 0x9d 0x98 ST p7, [sp, #-4] +.delay_slot + 9908 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.delay_slot +.swstall delay_slot + 9912 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9914 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 9916 0x00 0x01 0x67 0x98 NOPA +.return_address + 9920 0x07 0xf8 0x39 0x18 LDA lr, [sp, #-8] + 9924 0x00 0x00 NOPX + 9926 0x00 0x00 NOPX + 9928 0x00 0x00 NOPX + 9930 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9932 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9934 0x07 0xff 0x99 0x18 LDA p7, [sp, #-4] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9938 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9942 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9944 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9946 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9948 0x10 0x20 0x09 0x18 MOVX r16, #2 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9952 0xe8 0xc2 0x30 0x3f 0xfe 0x00 0x00 0x00 0x71 0x3a ST r16, [p7, #16]; PADDXM [sp], #-64 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + +.text_segment PM 9968 +.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE +.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_begin0 +.function_start + 9968 0x18 0x16 0xc0 0xf8 MOV r0, p3 + 9972 0x1b 0x60 0x07 0x18 ADD.NC p3, r0, #14 + 9976 0x03 0x1c 0x52 0x98 LDA.s16 r2, [p3], #2 + 9980 0x03 0x04 0x96 0x98 LDA r4, [p3] + 9984 0x00 0x00 NOPX + 9986 0x00 0x00 NOPX + 9988 0x00 0x00 NOPX + 9990 0x00 0x00 NOPX + 9992 0x10 0x06 0x09 0x18 MOVX r3, #2 + 9996 0x00 0x10 0x00 0x00 0x01 0xc4 PADDXM [sp], #128 + 10002 0x10 0xc6 0x4c 0x98 LTU r3, r3, r4 + 10006 0x00 0x01 0x00 0x06 0x04 0xf6 0x10 0x00 0x60 0xba MOVA r1, #0; JNZ r3, #10160 +.delay_slot + 10016 0x18 0x05 0x72 0xf8 VBCST.16 x0, r1 +.delay_slot + 10020 0x18 0x5e 0xc0 0xf8 MOV r1, p7 +.delay_slot + 10024 0x1f 0x65 0xe0 0xf8 MOV p7, sp +.delay_slot + 10028 0xff 0xf2 0x0a 0xdd 0x81 0xf4 PADDB [p7], #-64; MOV p5, p7 +.delay_slot + 10034 0x0f 0x04 0x13 0x18 VST x0, [p7] + 10038 0x01 0x82 0x84 0x80 0x0b 0x00 0x04 0xb9 0x72 0xba MOVA dj0, #12; MOVS p4, r0; VBCST.16 x0, r2 + 10048 0x80 0x01 0x54 0x01 0x01 0x54 LDA.u8 r0, [p4, dj0]; MOV m2, #64 + 10054 0x00 0x00 NOPX + 10056 0x00 0x00 NOPX + 10058 0x00 0x00 NOPX + 10060 0x00 0x00 NOPX + 10062 0x00 0x00 NOPX + 10064 0x00 0x00 NOPX + 10066 0x00 0x13 0xc0 0x40 0x01 0x84 JNZ r0, #10112 +.delay_slot + 10072 0x18 0x00 0x00 0xb8 MOV m0, #0 +.delay_slot + 10076 0x00 0x07 0xc8 0xc8 0xc8 0x44 MOVXM p4, #509028 +.delay_slot +.swstall delay_slot + 10082 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10084 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10086 0x00 0x00 NOPX + 10088 0x00 0x04 0x80 0x00 0x04 0xf2 0x00 0x00 0x20 0xba MOVA m1, #0; J #10128 +.delay_slot +.swstall delay_slot + 10098 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10100 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10102 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10104 0x00 0x00 NOPX +.delay_slot + 10106 0x00 0x2c 0xf0 0x08 0x26 0x0c NOPA; VST x0, [p0] +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_144 + 10112 0x19 0x00 0x80 0xb8 MOV m1, #64 + 10116 0x00 0x2c 0xf0 0x00 0x21 0x04 0x13 0x01 0x00 0x00 0x50 0xf6 NOPA; NOPB; VST x0, [p1]; MOV m2, #0 +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_160 + 10128 0x00 0x14 0x18 0x00 0x00 0x84 J #10288 +.delay_slot + 10134 0x13 0x91 0x60 0x03 0xb0 0x60 0x70 0x02 MOVS p0, p7; MOV p7, p0 +.delay_slot +.swstall delay_slot + 10142 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10144 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10146 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10148 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_192 + 10160 0x10 0x04 0x0d 0x18 MOVX r2, #3 + 10164 0x10 0x84 0x47 0x98 EQ r2, r2, r4 + 10168 0x10 0x13 0xf0 0x40 0x01 0x84 JNZ r2, #10208 +.delay_slot + 10174 0x3f 0x80 0x00 0x20 0x00 0x44 MOVXM r0, #1065353216 +.delay_slot + 10180 0x00 0x07 0xc8 0xc8 0xc8 0x44 MOVXM p4, #509028 +.delay_slot +.swstall delay_slot + 10186 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10188 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10190 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 10192 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x2f 0xe0 0x00 0x08 0x00 0x10 0x00 0x00 0xe1 NOPA; NOPB; NOPS; MOVXM r0, #-1082130432; NOPV +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_240 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10208 0x80 0x80 0x50 0x02 0xd2 0x00 0x47 0xbe 0x58 0xba LDA.s8 r0, [p4]; MOVX vaddSign0, #1; MOV dj0, #-66 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10218 0x18 0x00 0x80 0xb8 MOV m0, #64 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10222 0x19 0x00 0x00 0xb8 MOV m1, #0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10226 0x1a 0x00 0x80 0xb8 MOV m2, #64 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10230 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10232 0x18 0x00 0x11 0x78 VINSERT.32 x0, x0, #0, r0 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10236 0xa0 0x02 0xe2 0x01 0x25 0xd4 ST.s16 r0, [p5, dj0]; VMOV bmll1, x0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10242 0x10 0x3a 0x80 0x18 MOVX crRnd, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10246 0x08 0x40 0x96 0x18 VCONV.bf16.fp32 wl0, bmll1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10250 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10252 0x18 0x01 0x01 0xb8 VEXTRACT.16 r0, x0, #0, vaddSign0 + 10256 0x00 0x00 NOPX + 10258 0x00 0x00 NOPX + 10260 0x05 0x00 0x12 0x98 LDA.s16 r0, [p5, dj0] + 10264 0x00 0x00 NOPX + 10266 0x00 0x00 NOPX + 10268 0x00 0x00 NOPX + 10270 0x00 0x00 NOPX + 10272 0x00 0x00 NOPX + 10274 0x00 0x00 NOPX + 10276 0x18 0x01 0x72 0xf8 VBCST.16 x0, r0 + 10280 0x00 0x00 NOPX + 10282 0x00 0x2c 0xff 0xf8 0x66 0x0c NOPA; VST x0, [sp, #-64] +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_320 + 10288 0x78 0x8a 0xde 0x50 0xe8 0x00 0x00 0x08 0x7c 0x50 0x10 0xb6 LDA r2, [p3, #-16]; VLDB x1, [p7], m1; MOVXM ls, #10400 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 10300 0xff 0x63 0x02 0x90 0x68 0x00 0x00 0x09 0xbc 0x68 0x10 0xb6 MOVA r3, #-5; VLDB x0, [p1], m2; MOVXM le, #10448 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10312 0x01 0x05 0x7e 0x50 0xe8 0x00 0xf1 0x12 VLDA.CONV.fp32.bf16 cml0, [p0], m0;VLDB x1, [p7], m1; MOVX r0, #60 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10320 0x80 0x90 0x52 0x90 0x68 0x3c LDA.s8 r4, [p4]; VLDB x0, [p1], m2 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10326 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10328 0x00 0x08 0x2b 0x98 VLDA.CONV.fp32.bf16 cml0, [p0], m0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10332 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10334 0x10 0x84 0x3d 0x98 LSHL r2, r2, r3 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10338 0x05 0x71 0x7e 0x86 0x01 0x02 0x01 0x62 ADD.NC lc, r2, #-3; VMAC.f dm1, dm0, x1, x0, r0 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10346 0x29 0x03 0x7e 0x50 0xe8 0x3c VLDA x0, [p1], m2; VLDB x1, [p7], m1 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10352 0x01 0x05 0x70 0x00 0x20 0x01 0x5b 0x09 0xd4 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; MOVX crRnd, r4; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10368 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10384 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x08 0x10 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 +.label ZLS_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_432 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10400 0x29 0x03 0x7e 0x50 0xe8 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA x0, [p1], m2; VLDB x1, [p7], m1; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10416 0x01 0x05 0x70 0x00 0x22 0x1c 0xa3 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10432 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLE_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_480 +.end_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10448 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x08 0x10 0x0b NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 +.loop_nesting 0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10464 0xff 0xf0 0x00 0x00 0x01 0xc4 PADDXM [sp], #-128 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10470 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 10474 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.noswbrkpt + 10476 0x01 0x02 0x01 0x48 VMAC.f dm1, dm0, x1, x0, r0 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 10480 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 10482 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10486 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.delay_slot + 10490 0x1f 0x60 0xa0 0xf8 MOV p7, r1 +.delay_slot +.swstall delay_slot + 10494 0x00 0x00 NOPX +.delay_slot + 10496 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.delay_slot +.swstall delay_slot + 10500 0x00 0x00 NOPX +.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE__end +.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_end0 + +.text_segment PM 10512 +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E +.function_start + 10512 0x01 0x82 0x83 0x88 0x8b 0x00 0x60 0xf0 0x72 0xba MOVA dj0, #12; MOVS p3, p2; MOV dc0, lr + 10522 0x40 0x01 0x54 0xc5 0x81 0xd4 LDA.u8 r0, [p2, dj0]; MOV p2, p1 + 10528 0x00 0x00 NOPX + 10530 0x00 0x00 NOPX + 10532 0x00 0x00 NOPX + 10534 0x00 0x00 NOPX + 10536 0x00 0x00 NOPX + 10538 0x00 0x00 NOPX + 10540 0x00 0x14 0xb8 0x00 0x01 0x84 JZ r0, #10608 +.delay_slot + 10546 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 +.delay_slot + 10552 0x18 0x55 0xe0 0xf8 MOV r1, sp +.delay_slot + 10556 0x19 0x60 0xe0 0x18 ADD.NC p1, r1, #-64 +.delay_slot + 10560 0x09 0x07 0x2b 0x18 VST sfh, [p1] +.delay_slot +.swstall delay_slot + 10564 0x00 0x00 NOPX +.no_stack_arguments + 10566 0x00 0x13 0x78 0x00 0x01 0x04 JL #9968 +.delay_slot +.swstall delay_slot + 10572 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10574 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10576 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10578 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10580 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.return_address + 10592 0x00 0x14 0xc8 0x00 0x00 0x84 J #10640 +.delay_slot +.swstall delay_slot + 10598 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10600 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10602 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10604 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10606 0x00 0x00 NOPX +.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_96 +.no_stack_arguments + 10608 0x00 0x13 0x78 0x00 0x01 0x04 JL #9968 +.delay_slot + 10614 0x10 0x91 0x60 0x00 0xb0 0x60 0x70 0x02 MOVS p0, p1; MOV p1, p0 +.delay_slot +.swstall delay_slot + 10622 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10624 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10626 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10628 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x00 0xad 0x81 0x36 NOPA; NOPB; NOPS; NOPX +.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_128 +.return_address + 10640 0x1f 0x71 0x80 0xf8 MOV lr, dc0 + 10644 0x10 0x28 0x00 0x18 RET lr +.delay_slot + 10648 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 10654 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10656 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10658 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 10660 0x00 0x00 NOPX +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_end0 + +.text_segment PM 10672 +.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function_start + 10672 0x00 0x07 0xc6 0xc8 0x80 0x44 MOVXM p3, #508992 + 10678 0x60 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p3]; MOV r17, CORE_ID + 10684 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 10690 0xff 0x63 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p6, [sp, #-8]; MOV r0, r15 + 10698 0xff 0x82 0xb0 0x00 0x01 0xf3 0x32 0x28 0x11 0x3a ST r0, [sp, #-4]; MOVXM p6, #509008 + 10708 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 + 10712 0x00 0x00 NOPX + 10714 0x00 0x00 NOPX + 10716 0x80 0x15 0x40 0x40 0x01 0x84 JNZ r16, #10880 +.delay_slot + 10722 0x14 0x62 0x90 0x18 EXTEND.u8 r17, r17 +.delay_slot + 10726 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 10730 0x0f 0xf4 0x3d 0x98 ST lr, [sp, #-12] +.delay_slot + 10734 0xc0 0xc6 0x30 0x03 0x30 0x60 0x70 0x02 ST r17, [p6]; MOV p6, p0 +.delay_slot + 10742 0x00 0x07 0xc0 0xca 0x80 0x44 MOVXM p0, #509248 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 10748 0x00 0x07 0xc4 0xc8 0xc8 0x44 MOVXM p2, #509028 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10754 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x32 0x30 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #509024 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10764 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 10766 0x00 0x13 0x50 0x00 0x01 0x04 JL #9888 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10772 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10774 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10776 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 10780 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 10784 0x00 0x2c 0xf0 0x00 0x22 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV +.return_address + 10800 0x00 0x07 0xc4 0xc8 0xa0 0x44 MOVXM p2, #509008 + 10806 0x40 0xc2 0xd0 0x00 0x01 0xf1 0x32 0xa0 0x10 0xba LDA r16, [p2]; MOVXM p2, #509248 + 10816 0x40 0xc6 0xd0 0x00 0x01 0xf1 0x32 0xa0 0x10 0xba LDA r17, [p2]; MOVXM p2, #509248 + 10826 0x4a 0xcb 0x50 0x00 0x01 0xf0 0xb2 0x2a 0x10 0xba LDA.u16 r18, [p2, #10]; MOVXM p1, #509012 + 10836 0x00 0x00 NOPX + 10838 0x00 0x00 NOPX + 10840 0x00 0x15 0x48 0x00 0x00 0x84 J #10896 +.delay_slot + 10846 0x00 0x07 0xc0 0xc8 0xb8 0x44 MOVXM p0, #509020 +.delay_slot +.swstall delay_slot + 10852 0x00 0x00 NOPX +.delay_slot + 10854 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 10858 0x00 0x2c 0xf0 0x0c 0xa3 0x0c NOPA; ST r18, [p0] +.delay_slot + 10864 0x00 0x2c 0xf0 0x00 0x21 0x06 0x11 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 + 10880 0x00 0x2c 0xf0 0x00 0x22 0x80 0x8b 0x00 0x01 0xf0 0xb2 0x2a 0x10 0x00 0x00 0xe1 NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 + 10896 0x73 0x91 0x60 0x03 0xb3 0xc3 0x00 0x02 MOVS p3, p7; ADD.NC p7, r15, #12 + 10904 0xff 0xee 0xd0 0x00 0x01 0xf0 0x32 0x20 0x10 0xba LDA r27, [p7], #-4; MOVXM p0, #508992 + 10914 0x07 0xfe 0x16 0x98 LDA r16, [p7], #-4 + 10918 0x07 0xfe 0x36 0x98 LDA r17, [p7], #-4 + 10922 0x07 0x46 0x56 0x98 LDA r18, [p7, #16] + 10926 0x00 0x00 NOPX + 10928 0x00 0x00 NOPX + 10930 0x00 0x00 NOPX + 10932 0x00 0x00 NOPX + 10934 0x00 0x00 NOPX + 10936 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 10940 0x0f 0x06 0x11 0x98 ST r16, [p7] + 10944 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 10948 0x00 0x00 NOPX + 10950 0x00 0x00 NOPX + 10952 0x00 0x00 NOPX + 10954 0x14 0x93 0x08 0x18 ACQ r18, r16 + 10958 0x04 0x00 0xa7 0xad 0x81 0xe4 MOVX r16, #1; MOV r15, p3 + 10964 0x00 0x00 NOPX + 10966 0x00 0x00 NOPX + 10968 0x00 0x06 0x36 0x98 LDA r17, [p0] + 10972 0xc0 0xca 0xdc 0xdd 0x81 0xd4 LDA r18, [p6]; MOV p6, p7 + 10978 0x01 0x06 0x76 0x98 LDA r19, [p1] + 10982 0x07 0x5c 0x9e 0x98 LDA p1, [p7], #20 + 10986 0x00 0x00 NOPX +.no_stack_arguments + 10988 0x00 0x14 0x88 0x00 0x01 0x04 JL #10512 +.delay_slot +.swstall delay_slot + 10994 0x00 0x00 NOPX +.delay_slot + 10996 0x14 0x62 0x07 0x18 ADD r17, r17, #1 +.delay_slot + 11000 0x08 0x06 0x31 0x98 ST r17, [p0] +.delay_slot + 11004 0x14 0xe1 0x0d 0x98 LSHL r16, r19, r16 +.delay_slot + 11008 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xa0 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV +.return_address + 11024 0xca 0xc6 0xd0 0x00 0x01 0xf3 0x32 0x20 0x10 0xba LDA r17, [p6, #20]; MOVXM p6, #508992 + 11034 0x10 0x20 0x05 0x18 MOVX r16, #1 + 11038 0x00 0x00 NOPX + 11040 0x00 0x00 NOPX + 11042 0x00 0x00 NOPX + 11044 0x00 0x00 NOPX + 11046 0x00 0x00 NOPX + 11048 0x14 0x51 0x08 0x18 REL r17, r16 + 11052 0xfc 0xce 0xd0 0x00 0x01 0xf1 0x32 0x2e 0x10 0xba LDA r19, [p7, #-8]; MOVXM p2, #509020 + 11062 0x06 0x06 0x36 0x98 LDA r17, [p6] + 11066 0x02 0x06 0x56 0x98 LDA r18, [p2] + 11070 0x00 0x00 NOPX + 11072 0x00 0x00 NOPX + 11074 0x00 0x00 NOPX + 11076 0x00 0x00 NOPX + 11078 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 11082 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 11086 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 11090 0x80 0x15 0xb8 0x40 0x01 0x84 JNZ r16, #11120 +.delay_slot +.swstall delay_slot + 11096 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11098 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11100 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11102 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11104 0x00 0x00 NOPX + 11106 0x10 0x20 0x01 0x18 MOVX r16, #0 + 11110 0x00 0x2c 0xf6 0x06 0x11 0x80 0x00 0x00 0x00 0x7a NOPA; ST r16, [p6]; NOPX +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 + 11120 0x07 0xf4 0x39 0x18 LDA lr, [sp, #-12] + 11124 0x07 0xfb 0x19 0x18 LDA p6, [sp, #-8] + 11128 0x00 0x00 NOPX + 11130 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 11132 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.noswbrkpt + 11134 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11138 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11140 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11144 0x1f 0x67 0xa0 0xf8 MOV p7, r15 +.delay_slot + 11148 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 11154 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11156 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11158 0x00 0x00 NOPX +.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + +.text_segment PM 11168 +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv +.function_start + 11168 0x23 0x85 0xd0 0x08 0x20 0x0b 0x08 0x00 0x58 0xba LDA el0, [p1], #4; MOVX r2, #256; MOV r24, #0 + 11178 0x17 0x80 0x01 0x18 MOVX r0, #-128 + 11182 0x00 0x00 NOPX + 11184 0x00 0x00 NOPX + 11186 0x00 0x00 NOPX + 11188 0x00 0x00 NOPX + 11190 0x00 0x00 NOPX + 11192 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 11196 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 11200 0x00 0x00 NOPX + 11202 0x00 0x00 NOPX + 11204 0x00 0x00 NOPX + 11206 0x00 0x00 NOPX + 11208 0x00 0x00 NOPX + 11210 0x00 0x00 NOPX + 11212 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 11216 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 11220 0x00 0x00 NOPX + 11222 0x00 0x00 NOPX + 11224 0x00 0x00 NOPX + 11226 0x00 0x00 NOPX + 11228 0x00 0x00 NOPX + 11230 0x00 0x00 NOPX + 11232 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 11236 0x01 0x14 0x76 0x98 LDA r3, [p1, #4] + 11240 0x00 0x00 NOPX + 11242 0x00 0x00 NOPX + 11244 0x00 0x00 NOPX + 11246 0x00 0x00 NOPX + 11248 0x00 0x00 NOPX + 11250 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11252 0x08 0x4c 0x71 0x98 ST r3, [p0], #16 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11256 0x00 0x04 0x17 0x18 ST.s16 r0, [p0] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11260 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11264 0x00 0x00 0xf0 0xbe 0x00 0x44 MOVXM r1, #65280 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11270 0x10 0xc2 0x14 0x98 AND r1, r3, r1 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11274 0x10 0x76 0x27 0x98 EQ r27, r1, r2 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11278 0x10 0x01 0x82 0x18 SEL.EQZ r0, r0, r24, r27 +.delay_slot +.swstall delay_slot + 11282 0x00 0x00 NOPX +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_end0 + +.text_segment PM 11296 +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv +.function_start + 11296 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 11302 0x0f 0xf8 0x3d 0x98 ST lr, [sp, #-8] +.no_stack_arguments + 11306 0x00 0x15 0xd0 0x00 0x01 0x04 JL #11168 +.delay_slot + 11312 0x18 0x17 0xa0 0xf8 MOV r0, r15 +.delay_slot + 11316 0x0f 0xfc 0x15 0x98 ST r0, [sp, #-4] +.delay_slot + 11320 0x1b 0xd0 0xc0 0xf8 MOV r15, p0 +.delay_slot +.swstall delay_slot + 11324 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11326 0x00 0x00 NOPX +.return_address + 11328 0xff 0x07 0x20 0x01 0x00 0x68 0x33 0xc4 0x08 0xba LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 + 11338 0x01 0xe2 0x80 0x01 0x80 0x08 0x07 0xfd 0x58 0xba MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 + 11348 0xff 0xbe 0x20 0x0a 0x11 0x80 0x07 0xa0 0x01 0x7a LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 + 11358 0x00 0x06 0x4a 0x98 LDA.u8 r18, [p0] + 11362 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11364 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11366 0x00 0x02 0x17 0x18 ST.s16 r16, [p0, dj0] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11370 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11374 0x10 0x22 0x05 0x18 MOVX r17, #1 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11378 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11384 0x14 0x77 0x27 0x98 EQ r27, r17, r18 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11388 0x14 0x21 0x82 0x18 SEL.EQZ r16, r16, r24, r27 +.delay_slot +.swstall delay_slot + 11392 0x00 0x00 NOPX +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + +.text_segment PM 11408 +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.tail_call +.function_start + 11408 0x00 0x13 0x78 0x00 0x00 0x84 J #9968 +.delay_slot +.swstall delay_slot + 11414 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11416 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11418 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11420 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11422 0x00 0x00 NOPX +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 +.label __Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.label _Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.function_start + 11424 0x00 0x07 0xc8 0xc8 0x80 0x44 MOVXM p4, #508992 + 11430 0x80 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p4]; MOV r17, CORE_ID + 11436 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 11442 0xff 0x3a 0xb0 0x23 0x14 0x81 0xca 0x60 0x79 0x3a ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 + 11452 0xfd 0x83 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p0, [sp, #-20]; MOV r0, r15 + 11460 0x0f 0xfc 0x15 0x98 ST r0, [sp, #-4] + 11464 0x0f 0xf0 0x3d 0x98 ST lr, [sp, #-16] + 11468 0x00 0x00 NOPX + 11470 0x80 0x16 0xb0 0x40 0x01 0x84 JNZ r16, #11616 +.delay_slot + 11476 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 11480 0x00 0x07 0xc4 0xc8 0xa0 0x44 MOVXM p2, #509008 +.delay_slot + 11486 0x40 0xc6 0x30 0x01 0x37 0x60 0x70 0x02 ST r17, [p2]; MOV p2, p7 +.delay_slot + 11494 0x1b 0xd6 0xc0 0xf8 MOV r15, p3 +.delay_slot + 11498 0xfe 0xa3 0xb0 0x00 0x01 0xf3 0xb2 0xc0 0x11 0x3a ST p2, [sp, #-12]; MOVXM p7, #509312 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11508 0x13 0x91 0x60 0x00 0x01 0xf1 0x32 0x32 0x11 0x3a MOVS p0, p7; MOVXM p2, #509028 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11518 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x32 0x30 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #509024 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11528 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 11530 0x00 0x16 0x10 0x00 0x01 0x04 JL #11296 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11536 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11538 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11540 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 11544 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 11548 0x0a 0x06 0x11 0x98 ST r16, [p2] +.return_address + 11552 0xe0 0xc2 0xd0 0x00 0x01 0xf0 0xb2 0x28 0x10 0xba LDA r16, [p7]; MOVXM p1, #509008 + 11562 0x20 0xc6 0xd0 0x00 0x01 0xf1 0xb2 0x2a 0x10 0xba LDA r17, [p1]; MOVXM p3, #509012 + 11572 0xea 0xcb 0x50 0x00 0x01 0xf0 0xb2 0x2c 0x10 0xba LDA.u16 r18, [p7, #10]; MOVXM p1, #509016 + 11582 0x00 0x00 NOPX + 11584 0x00 0x00 NOPX + 11586 0x00 0x00 NOPX + 11588 0x00 0x16 0xb8 0x00 0x00 0x84 J #11632 +.delay_slot + 11594 0x00 0x07 0xc4 0xc8 0xb8 0x44 MOVXM p2, #509020 +.delay_slot + 11600 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 11604 0x0a 0x06 0x51 0x98 ST r18, [p2] +.delay_slot + 11608 0x0b 0x06 0x11 0x98 ST r16, [p3] +.delay_slot + 11612 0x09 0x06 0x11 0x98 ST r16, [p1] +.label TGT_F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 + 11616 0x00 0x07 0xc6 0xc8 0xa8 0x44 MOVXM p3, #509012 + 11622 0x00 0x2c 0xf0 0x00 0x01 0xf0 0xb2 0x2c 0x10 0xba NOPA; MOVXM p1, #509016 +.label TGT_F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 + 11632 0x18 0x67 0x86 0x18 ADD.NC p0, r15, #12 + 11636 0x1f 0xee 0xd0 0x00 0x01 0xf1 0x32 0x20 0x10 0xba LDA r27, [p0], #-4; MOVXM p2, #508992 + 11646 0x00 0xfe 0x16 0x98 LDA r16, [p0], #-4 + 11650 0x00 0xfe 0x36 0x98 LDA r17, [p0], #-4 + 11654 0x02 0x06 0x56 0x98 LDA r18, [p2] + 11658 0x00 0x46 0x76 0x98 LDA r19, [p0, #16] + 11662 0x00 0x00 NOPX + 11664 0x00 0x00 NOPX + 11666 0x00 0x00 NOPX + 11668 0x00 0x00 NOPX + 11670 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 11674 0x00 0xc2 0x39 0x40 0x0e 0x5c ST r16, [p0]; ADD r16, r18, #1 + 11680 0x0a 0x06 0x11 0x98 ST r16, [p2] + 11684 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 11688 0x00 0x00 NOPX + 11690 0x00 0x00 NOPX + 11692 0x00 0x00 NOPX + 11694 0x14 0xd3 0x08 0x18 ACQ r19, r16 + 11698 0x1a 0x67 0x06 0x18 ADD.NC p2, r14, #12 + 11702 0x00 0x00 NOPX + 11704 0x00 0x00 NOPX + 11706 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4 + 11710 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 + 11714 0x02 0xfe 0x56 0x98 LDA r18, [p2], #-4 + 11718 0x02 0x56 0x76 0x98 LDA r19, [p2, #20] + 11722 0x00 0x00 NOPX + 11724 0x00 0x00 NOPX + 11726 0x00 0x00 NOPX + 11728 0x00 0x00 NOPX + 11730 0x00 0x00 NOPX + 11732 0x14 0xa3 0x12 0x18 SEL.EQZ r17, r18, r17, r27 + 11736 0x0a 0x06 0x31 0x98 ST r17, [p2] + 11740 0x00 0x00 NOPX + 11742 0x00 0x00 NOPX + 11744 0x00 0x00 NOPX + 11746 0x00 0x00 NOPX + 11748 0x14 0xd3 0x08 0x18 ACQ r19, r16 + 11752 0xd1 0x11 0x60 0x01 0x00 0x29 0xce 0x60 0x79 0x3a MOVS p6, p2; MOVX r16, #1; MOV r14, p6 + 11762 0x00 0x00 NOPX + 11764 0x00 0x00 NOPX + 11766 0x07 0xee 0x19 0x18 LDA p4, [sp, #-20] + 11770 0x60 0xc6 0xdf 0xd8 0x3b 0x0c LDA r17, [p3]; ST p0, [sp, #-20] + 11776 0x20 0xd2 0xd6 0xdd 0x81 0xd4 LDA r20, [p1]; MOV p3, p7 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 11782 0x02 0x4e 0x56 0x98 LDA r18, [p2], #16 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 11786 0x00 0x5d 0x1e 0x98 LDA p2, [p0], #20 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11790 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11794 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11796 0x04 0x06 0x76 0x98 LDA r19, [p4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11800 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 11802 0x00 0x16 0x48 0x00 0x01 0x04 JL #11408 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11808 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 +.delay_slot + 11812 0x14 0x63 0x0d 0x98 LSHL r17, r17, r16 +.delay_slot + 11816 0x15 0x21 0x0d 0x98 LSHL r16, r20, r16 +.delay_slot + 11820 0x19 0x69 0x41 0x58 ADD.NC p1, r18, r16 +.delay_slot + 11824 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xe2 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV +.return_address + 11840 0xc8 0xc6 0xd0 0x01 0x00 0x28 0xb3 0xd0 0x78 0xba LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 + 11850 0x00 0x07 0xcc 0xc8 0xb8 0x44 MOVXM p6, #509020 + 11856 0x00 0x00 NOPX + 11858 0x00 0x00 NOPX + 11860 0x00 0x00 NOPX + 11862 0x00 0x00 NOPX + 11864 0x00 0x00 NOPX + 11866 0x14 0x51 0x08 0x18 REL r17, r16 + 11870 0x01 0xf6 0x36 0x98 LDA r17, [p1, #-4] + 11874 0x07 0xed 0x19 0x18 LDA p2, [sp, #-20] + 11878 0x00 0x00 NOPX + 11880 0x00 0x00 NOPX + 11882 0x00 0x00 NOPX + 11884 0x00 0x00 NOPX + 11886 0x00 0x00 NOPX + 11888 0x14 0x23 0x11 0x98 SUB r17, r16, r17 + 11892 0x4a 0xc6 0xd3 0xec 0x63 0x0c LDA r17, [p2, #20]; ST r17, [p1, #-4] + 11898 0x00 0x00 NOPX + 11900 0x00 0x00 NOPX + 11902 0x00 0x00 NOPX + 11904 0x00 0x00 NOPX + 11906 0x00 0x00 NOPX + 11908 0x00 0x00 NOPX + 11910 0x14 0x51 0x08 0x18 REL r17, r16 + 11914 0xfc 0xce 0xd0 0x00 0x01 0xf0 0xb2 0x20 0x10 0xba LDA r19, [p7, #-8]; MOVXM p1, #508992 + 11924 0x06 0x06 0x56 0x98 LDA r18, [p6] + 11928 0x01 0x06 0x36 0x98 LDA r17, [p1] + 11932 0x00 0x00 NOPX + 11934 0x00 0x00 NOPX + 11936 0x00 0x00 NOPX + 11938 0x00 0x00 NOPX + 11940 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 11944 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 11948 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 11952 0x80 0x17 0x68 0x40 0x01 0x84 JNZ r16, #11984 +.delay_slot +.swstall delay_slot + 11958 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11960 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11962 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11964 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 11966 0x00 0x00 NOPX + 11968 0x10 0x20 0x01 0x18 MOVX r16, #0 + 11972 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x83 0x08 0xc1 0x36 NOPA; NOPB; ST r16, [p1]; NOPX +.label TGT_F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 + 11984 0x07 0xf0 0x39 0x18 LDA lr, [sp, #-16] + 11988 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] + 11992 0x07 0xf7 0x99 0x18 LDA p7, [sp, #-12] +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 11996 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.noswbrkpt + 11998 0x07 0xf9 0xd1 0x18 LDA r14, [sp, #-8] +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 12002 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 12004 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 12006 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12010 0x0e 0x8e 0x0b 0x18 MOVS p6, r14 +.delay_slot + 12014 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 12020 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12022 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12024 0x00 0x00 NOPX +.label _Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end +.label __Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 + +.text_segment PM 12032 +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.function_start + 12032 0x05 0x00 0x00 0x21 0x01 0x64 RET lr; MOV r0, #64 +.delay_slot + 12038 0x18 0x50 0xc0 0xf8 MOV r1, p0 +.delay_slot + 12042 0x18 0x60 0x90 0x18 ADD.NC p0, r1, #32 +.delay_slot + 12046 0x08 0x04 0x11 0x98 ST r0, [p0] +.delay_slot + 12050 0x08 0x14 0x11 0x98 ST r0, [p0, #4] +.delay_slot +.swstall delay_slot + 12054 0x00 0x00 NOPX +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_end0 + +.text_segment PM 12064 +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.function_start + 12064 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 12068 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 12074 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] + 12078 0x00 0x00 NOPX + 12080 0x00 0x00 NOPX + 12082 0x00 0x00 NOPX + 12084 0x00 0x00 NOPX + 12086 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 12090 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 12094 0x00 0x00 NOPX + 12096 0x00 0x00 NOPX + 12098 0x00 0x00 NOPX + 12100 0x00 0x00 NOPX + 12102 0x00 0x00 NOPX + 12104 0x00 0x00 NOPX + 12106 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 12110 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 12114 0x00 0x00 NOPX + 12116 0x00 0x00 NOPX + 12118 0x00 0x00 NOPX + 12120 0x00 0x00 NOPX + 12122 0x00 0x00 NOPX + 12124 0x00 0x00 NOPX + 12126 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 12130 0x01 0x14 0x2e 0x98 LDA el0, [p1, #4] + 12134 0x00 0x00 NOPX + 12136 0x00 0x00 NOPX +.no_stack_arguments + 12138 0x00 0x17 0x80 0x00 0x01 0x04 JL #12032 +.delay_slot + 12144 0x0f 0xfb 0x9d 0x98 ST p7, [sp, #-8] +.delay_slot +.swstall delay_slot + 12148 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 12150 0x00 0x00 NOPX +.delay_slot + 12152 0x08 0xdc 0x29 0x98 ST el0, [p0], #-12 +.delay_slot + 12156 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.return_address + 12160 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] + 12164 0x00 0x00 NOPX + 12166 0x00 0x00 NOPX + 12168 0x00 0x00 NOPX + 12170 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 12172 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.noswbrkpt + 12174 0x07 0xfb 0x99 0x18 LDA p7, [sp, #-8] +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12178 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12182 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12184 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12186 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12188 0x10 0x20 0x01 0x18 MOVX r16, #0 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12192 0xe8 0xc2 0x30 0x3f 0xfe 0x00 0x00 0x00 0x71 0x3a ST r16, [p7, #16]; PADDXM [sp], #-64 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + +.text_segment PM 12208 +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.function_start + 12208 0x04 0x00 0x80 0x00 0x00 0x0c 0x78 0x30 0x10 0xba MOVA m0, #32; MOVXM ls, #12384 + 12218 0x61 0x0e 0xd0 0x00 0x00 0x0d 0xb8 0x38 0x10 0xba LDA r3, [p3], m0; MOVXM le, #12400 + 12228 0x60 0x90 0xd0 0x3e 0x17 0x48 0x0b 0x3c 0x58 0xba LDA m1, [p3]; MOVX r1, #-6; MOV r0, #828 + 12238 0x62 0x80 0xd0 0x00 0x01 0xf2 0x32 0x32 0x10 0xba LDA m0, [p3, #4]; MOVXM p4, #509028 + 12248 0x04 0x04 0x42 0x98 LDA.s8 r2, [p4] + 12252 0x00 0x00 NOPX + 12254 0x00 0x00 NOPX + 12256 0x00 0x00 NOPX + 12258 0x10 0xc2 0x1d 0x98 LSHL r1, r3, r1 + 12262 0x05 0x0e 0x8a 0xe1 0xf9 0x34 VLDB x1, [p0], m1; ADD.NC lc, r1, #-7 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 12268 0x21 0x13 0x70 0x50 0x68 0x3c VLDA x2, [p1], m0; VLDB x0, [p0], m1 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 12274 0x21 0x1b 0x70 0x50 0xe8 0xba 0x80 0x12 VLDA x3, [p1], m0; VLDB x1, [p0], m1; MOVX crRnd, r2 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12282 0x21 0x13 0x70 0x50 0x68 0x3c VLDA x2, [p1], m0; VLDB x0, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12288 0x21 0x1b 0x70 0x50 0xe8 0x3c VLDA x3, [p1], m0; VLDB x1, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12294 0x01 0x08 0x9b 0x98 VLDA x2, [p1], m0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12298 0x21 0x1b 0x70 0x50 0x68 0x3c VLDA x3, [p1], m0; VLDB x0, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12304 0x21 0x13 0x70 0x50 0xe8 0x3c VLDA x2, [p1], m0; VLDB x1, [p0], m1 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12310 0x21 0x1b 0x70 0x28 0x34 0x1d 0x00 0xe2 0x41 0x4a VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12320 0x21 0x13 0x70 0x28 0x74 0x1d 0x01 0xe0 0x61 0x4a VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12330 0x21 0x1b 0x70 0x28 0x34 0x1d 0x00 0xe2 0x41 0x4a VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12340 0x21 0x13 0x70 0x28 0x74 0x1d 0x01 0xe0 0x61 0x4a VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12350 0x21 0x1b 0x70 0x28 0x34 0x1d 0x00 0xe2 0x41 0x4a VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12360 0x21 0x13 0x70 0x28 0x74 0x1d 0x01 0xe0 0x61 0x4a VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12370 0x21 0x1b 0x70 0x50 0x68 0x00 0x00 0x08 0x70 0x8c 0x00 0xe2 0x41 0x6e VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; VMUL.f dm0, x1, x2, r0 +.label ZLS_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_176 +.loop_nesting 1 +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12384 0x21 0x13 0x70 0x50 0xea 0x1c 0xa3 0x00 0x00 0x00 0x01 0xa5 0x78 0x0f 0x03 0x0b VLDA x2, [p1], m0; VLDB x1, [p0], m1; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; VMUL.f dm1, x0, x3, r0 +.label ZLE_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_192 +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12400 0x21 0x1b 0x70 0x50 0x6a 0x1c 0x23 0x00 0x00 0x00 0x01 0xa5 0x78 0x07 0x12 0x0b VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 +.loop_nesting 0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12416 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12424 0x43 0x84 0x60 0x02 0x00 0xe2 0x41 0x62 VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12432 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12440 0x43 0x84 0x60 0x02 0x00 0xe2 0x41 0x62 VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12448 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12456 0x43 0x84 0x60 0x02 0x00 0xe2 0x41 0x62 VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12464 0x43 0x94 0x60 0x02 0x01 0xe0 0x61 0x62 VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12472 0x0a 0x1c 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p2], #64 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12476 0x43 0x94 0x60 0x50 0x00 0x5c VST.CONV.bf16.fp32 cml1, [p2], #64;RET lr +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12482 0x0a 0x1c 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p2], #64 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12486 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.delay_slot + 12490 0x0a 0x1c 0x23 0x18 VST.CONV.bf16.fp32 cml0, [p2], #64 +.delay_slot + 12494 0x0a 0x1c 0xa3 0x18 VST.CONV.bf16.fp32 cml1, [p2], #64 +.delay_slot +.swstall delay_slot + 12498 0x00 0x00 NOPX +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_end0 + +.text_segment PM 12512 +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.function_start + 12512 0x00 0x07 0xc8 0xc8 0x80 0x44 MOVXM p4, #508992 + 12518 0x80 0xc2 0xd8 0xb5 0xc1 0xd4 LDA r16, [p4]; MOV r17, CORE_ID + 12524 0x00 0x08 0x00 0x00 0x01 0xc4 PADDXM [sp], #64 + 12530 0xff 0x3a 0xb0 0x23 0x14 0x81 0xca 0x60 0x79 0x3a ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 + 12540 0xfd 0x83 0xb0 0x00 0x0b 0xd0 0x70 0x02 ST p0, [sp, #-20]; MOV r0, r15 + 12548 0x0f 0xfc 0x15 0x98 ST r0, [sp, #-4] + 12552 0x0f 0xf0 0x3d 0x98 ST lr, [sp, #-16] + 12556 0x00 0x00 NOPX + 12558 0x80 0x18 0xd0 0x40 0x01 0x84 JNZ r16, #12704 +.delay_slot + 12564 0x14 0x63 0xfb 0x18 ADD r17, r17, #-2 +.delay_slot + 12568 0x00 0x07 0xc4 0xc8 0xa0 0x44 MOVXM p2, #509008 +.delay_slot + 12574 0x40 0xc6 0x30 0x01 0x37 0x60 0x70 0x02 ST r17, [p2]; MOV p2, p7 +.delay_slot + 12582 0x1b 0xd6 0xc0 0xf8 MOV r15, p3 +.delay_slot + 12586 0xfe 0xa3 0xb0 0x00 0x01 0xf3 0xb2 0xe0 0x11 0x3a ST p2, [sp, #-12]; MOVXM p7, #509376 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 12596 0x13 0x91 0x60 0x00 0x01 0xf1 0x32 0x32 0x11 0x3a MOVS p0, p7; MOVXM p2, #509028 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 12606 0x40 0xc0 0xe0 0x00 0x01 0xf1 0x32 0x30 0x10 0xba ST.s8 r16, [p2]; MOVXM p2, #509024 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12616 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 12618 0x00 0x17 0x90 0x00 0x01 0x04 JL #12064 +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12624 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12626 0x00 0x00 NOPX +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12628 0x10 0x20 0x31 0x18 MOVX r16, #12 +.delay_slot + 12632 0x10 0x20 0x05 0x18 MOVX r16, #1 +.delay_slot + 12636 0x0a 0x06 0x11 0x98 ST r16, [p2] +.return_address + 12640 0xe0 0xc2 0xd0 0x00 0x01 0xf0 0xb2 0x28 0x10 0xba LDA r16, [p7]; MOVXM p1, #509008 + 12650 0x20 0xc6 0xd0 0x00 0x01 0xf1 0xb2 0x2a 0x10 0xba LDA r17, [p1]; MOVXM p3, #509012 + 12660 0xea 0xcb 0x50 0x00 0x01 0xf0 0xb2 0x2c 0x10 0xba LDA.u16 r18, [p7, #10]; MOVXM p1, #509016 + 12670 0x00 0x00 NOPX + 12672 0x00 0x00 NOPX + 12674 0x00 0x00 NOPX + 12676 0x00 0x18 0xd8 0x00 0x00 0x84 J #12720 +.delay_slot + 12682 0x00 0x07 0xc4 0xc8 0xb8 0x44 MOVXM p2, #509020 +.delay_slot + 12688 0x14 0x61 0x0f 0x98 MUL r16, r17, r16 +.delay_slot + 12692 0x0a 0x06 0x51 0x98 ST r18, [p2] +.delay_slot + 12696 0x0b 0x06 0x11 0x98 ST r16, [p3] +.delay_slot + 12700 0x09 0x06 0x11 0x98 ST r16, [p1] +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 + 12704 0x00 0x07 0xc6 0xc8 0xa8 0x44 MOVXM p3, #509012 + 12710 0x00 0x2c 0xf0 0x00 0x01 0xf0 0xb2 0x2c 0x10 0xba NOPA; MOVXM p1, #509016 +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 + 12720 0x18 0x67 0x86 0x18 ADD.NC p0, r15, #12 + 12724 0x1f 0xee 0xd0 0x00 0x01 0xf1 0x32 0x20 0x10 0xba LDA r27, [p0], #-4; MOVXM p2, #508992 + 12734 0x00 0xfe 0x16 0x98 LDA r16, [p0], #-4 + 12738 0x00 0xfe 0x36 0x98 LDA r17, [p0], #-4 + 12742 0x02 0x06 0x56 0x98 LDA r18, [p2] + 12746 0x00 0x46 0x76 0x98 LDA r19, [p0, #16] + 12750 0x00 0x00 NOPX + 12752 0x00 0x00 NOPX + 12754 0x00 0x00 NOPX + 12756 0x00 0x00 NOPX + 12758 0x14 0x61 0x02 0x18 SEL.EQZ r16, r17, r16, r27 + 12762 0x00 0xc2 0x39 0x40 0x0e 0x5c ST r16, [p0]; ADD r16, r18, #1 + 12768 0x0a 0x06 0x11 0x98 ST r16, [p2] + 12772 0x17 0xe0 0xfd 0x18 MOVX r16, #-1 + 12776 0x00 0x00 NOPX + 12778 0x00 0x00 NOPX + 12780 0x00 0x00 NOPX + 12782 0x14 0xd3 0x08 0x18 ACQ r19, r16 + 12786 0x1a 0x67 0x06 0x18 ADD.NC p2, r14, #12 + 12790 0x00 0x00 NOPX + 12792 0x00 0x00 NOPX + 12794 0x02 0xff 0x76 0x98 LDA r27, [p2], #-4 + 12798 0x02 0xfe 0x36 0x98 LDA r17, [p2], #-4 + 12802 0x02 0xfe 0x56 0x98 LDA r18, [p2], #-4 + 12806 0x02 0x56 0x76 0x98 LDA r19, [p2, #20] + 12810 0x00 0x00 NOPX + 12812 0x00 0x00 NOPX + 12814 0x00 0x00 NOPX + 12816 0x00 0x00 NOPX + 12818 0x00 0x00 NOPX + 12820 0x14 0xa3 0x12 0x18 SEL.EQZ r17, r18, r17, r27 + 12824 0x0a 0x06 0x31 0x98 ST r17, [p2] + 12828 0x00 0x00 NOPX + 12830 0x00 0x00 NOPX + 12832 0x00 0x00 NOPX + 12834 0x00 0x00 NOPX + 12836 0x14 0xd3 0x08 0x18 ACQ r19, r16 + 12840 0xd1 0x11 0x60 0x01 0x00 0x29 0xce 0x60 0x79 0x3a MOVS p6, p2; MOVX r16, #1; MOV r14, p6 + 12850 0x00 0x00 NOPX + 12852 0x00 0x00 NOPX + 12854 0x07 0xee 0x19 0x18 LDA p4, [sp, #-20] + 12858 0x60 0xc6 0xdf 0xd8 0x3b 0x0c LDA r17, [p3]; ST p0, [sp, #-20] + 12864 0x20 0xd2 0xd6 0xdd 0x81 0xd4 LDA r20, [p1]; MOV p3, p7 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 12870 0x02 0x4e 0x56 0x98 LDA r18, [p2], #16 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 12874 0x00 0x5d 0x1e 0x98 LDA p2, [p0], #20 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12878 0x1f 0x60 0xc0 0xf8 MOV p7, p0 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12882 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12884 0x04 0x06 0x76 0x98 LDA r19, [p4] +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12888 0x00 0x00 NOPX +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 12890 0x00 0x17 0xd8 0x00 0x01 0x04 JL #12208 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12896 0x1b 0xd4 0xc0 0xf8 MOV r15, p2 +.delay_slot + 12900 0x14 0x63 0x0d 0x98 LSHL r17, r17, r16 +.delay_slot + 12904 0x15 0x21 0x0d 0x98 LSHL r16, r20, r16 +.delay_slot + 12908 0x19 0x69 0x41 0x58 ADD.NC p1, r18, r16 +.delay_slot + 12912 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x34 0xe2 0xa8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV +.return_address + 12928 0xc8 0xc6 0xd0 0x01 0x00 0x28 0xb3 0xd0 0x78 0xba LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 + 12938 0x00 0x07 0xcc 0xc8 0xb8 0x44 MOVXM p6, #509020 + 12944 0x00 0x00 NOPX + 12946 0x00 0x00 NOPX + 12948 0x00 0x00 NOPX + 12950 0x00 0x00 NOPX + 12952 0x00 0x00 NOPX + 12954 0x14 0x51 0x08 0x18 REL r17, r16 + 12958 0x01 0xf6 0x36 0x98 LDA r17, [p1, #-4] + 12962 0x07 0xed 0x19 0x18 LDA p2, [sp, #-20] + 12966 0x00 0x00 NOPX + 12968 0x00 0x00 NOPX + 12970 0x00 0x00 NOPX + 12972 0x00 0x00 NOPX + 12974 0x00 0x00 NOPX + 12976 0x14 0x23 0x11 0x98 SUB r17, r16, r17 + 12980 0x4a 0xc6 0xd3 0xec 0x63 0x0c LDA r17, [p2, #20]; ST r17, [p1, #-4] + 12986 0x00 0x00 NOPX + 12988 0x00 0x00 NOPX + 12990 0x00 0x00 NOPX + 12992 0x00 0x00 NOPX + 12994 0x00 0x00 NOPX + 12996 0x00 0x00 NOPX + 12998 0x14 0x51 0x08 0x18 REL r17, r16 + 13002 0xfc 0xce 0xd0 0x00 0x01 0xf0 0xb2 0x20 0x10 0xba LDA r19, [p7, #-8]; MOVXM p1, #508992 + 13012 0x06 0x06 0x56 0x98 LDA r18, [p6] + 13016 0x01 0x06 0x36 0x98 LDA r17, [p1] + 13020 0x00 0x00 NOPX + 13022 0x00 0x00 NOPX + 13024 0x00 0x00 NOPX + 13026 0x00 0x00 NOPX + 13028 0x14 0x21 0x31 0x98 SUB r16, r16, r19 + 13032 0x0f 0xe6 0x11 0x98 ST r16, [p7, #-8] + 13036 0x14 0x61 0x28 0x98 NE r16, r17, r18 + 13040 0x80 0x19 0x88 0x40 0x01 0x84 JNZ r16, #13072 +.delay_slot +.swstall delay_slot + 13046 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13048 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13050 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13052 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13054 0x00 0x00 NOPX + 13056 0x10 0x20 0x01 0x18 MOVX r16, #0 + 13060 0x00 0x2c 0xf0 0x00 0x20 0x00 0x00 0x00 0x83 0x08 0xc1 0x36 NOPA; NOPB; ST r16, [p1]; NOPX +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 + 13072 0x07 0xf0 0x39 0x18 LDA lr, [sp, #-16] + 13076 0x07 0xfd 0xf1 0x18 LDA r15, [sp, #-4] + 13080 0x07 0xf7 0x99 0x18 LDA p7, [sp, #-12] +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 13084 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.noswbrkpt + 13086 0x07 0xf9 0xd1 0x18 LDA r14, [sp, #-8] +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13090 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13092 0x00 0x00 NOPX +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13094 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13098 0x0e 0x8e 0x0b 0x18 MOVS p6, r14 +.delay_slot + 13102 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 13108 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13110 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13112 0x00 0x00 NOPX +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 + +.text_segment PM 13120 +.label __Z13_b881_wrapperPPv___func_begin0 +.label _Z13_b881_wrapperPPv +.function_start + 13120 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 13124 0x01 0x2c 0x1e 0x98 LDA p0, [p1], #8 + 13128 0x01 0xf5 0x1e 0x98 LDA p2, [p1, #-4] + 13132 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 13136 0x00 0x14 0xd8 0x00 0x00 0x84 J #10672 +.delay_slot +.swstall delay_slot + 13142 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13144 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13146 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13148 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13150 0x00 0x00 NOPX +.label _Z13_b881_wrapperPPv__end +.label __Z13_b881_wrapperPPv___func_end0 +.label __Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA5_Kj___func_begin0 +.label _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA5_Kj +.function_start + 13152 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 13156 0x00 0x00 NOPX + 13158 0x00 0x00 NOPX + 13160 0x00 0x00 NOPX + 13162 0x00 0x00 NOPX + 13164 0x00 0x00 NOPX + 13166 0x00 0x00 NOPX + 13168 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 13172 0x01 0x1c 0x2e 0x98 LDA el0, [p1], #4 + 13176 0x01 0x1c 0x0e 0x98 LDA eh0, [p1], #4 + 13180 0x00 0x00 NOPX + 13182 0x00 0x00 NOPX + 13184 0x00 0x00 NOPX + 13186 0x00 0x00 NOPX + 13188 0x00 0x00 NOPX + 13190 0x08 0x1c 0x29 0x98 ST el0, [p0], #4 + 13194 0x08 0x1c 0x09 0x98 ST eh0, [p0], #4 + 13198 0x01 0x04 0x2e 0x98 LDA el0, [p1] + 13202 0x01 0x14 0x0e 0x98 LDA eh0, [p1, #4] + 13206 0x00 0x00 NOPX + 13208 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot + 13212 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13214 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13216 0x00 0x00 NOPX +.delay_slot + 13218 0x08 0x04 0x29 0x98 ST el0, [p0] +.delay_slot + 13222 0x08 0x14 0x09 0x98 ST eh0, [p0, #4] +.label _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA5_Kj__end +.label __Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA5_Kj___func_end0 + +.text_segment PM 13232 +.label __Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_EEvPS1_S2_R23transposeshuffle_params___func_begin0 +.label _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_EEvPS1_S2_R23transposeshuffle_params +.function_start + 13232 0x00 0x4b 0x21 0x29 0x81 0xe4 MOVX r1, #22; MOV r2, p2 + 13238 0x1a 0x61 0x06 0x18 ADD.NC p2, r2, #12 + 13242 0x02 0x14 0x56 0x98 LDA r2, [p2, #4] + 13246 0x02 0x07 0x76 0x98 LDA r27, [p2] + 13250 0x00 0x00 NOPX + 13252 0x00 0x00 NOPX + 13254 0x00 0x00 NOPX + 13256 0x00 0x00 NOPX + 13258 0x00 0x00 NOPX + 13260 0x10 0x1a 0xb8 0x00 0x01 0x84 JZ r2, #13680 +.delay_slot + 13266 0x10 0x00 0x75 0x18 MOVX r0, #29 +.delay_slot + 13270 0x10 0x40 0x02 0x18 SEL.EQZ r0, r1, r0, r27 +.delay_slot +.swstall delay_slot + 13274 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13276 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13278 0x00 0x00 NOPX + 13280 0x10 0x02 0x29 0x18 MOVX r1, #10 + 13284 0x10 0x82 0x1c 0x98 LTU r1, r2, r1 + 13288 0x08 0x1a 0x70 0x40 0x01 0x84 JNZ r1, #13536 +.delay_slot +.swstall delay_slot + 13294 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13296 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13298 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13300 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13302 0x00 0x00 NOPX +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 13304 0x00 0x38 0x68 0x00 0x00 0x0c 0x7a 0x40 0x10 0x3a VLDB x0, [p0], #64; MOVXM ls, #13440 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 13314 0x00 0x2c 0xf0 0x38 0x68 0x00 0x01 0xb7 0x48 0x02 0x00 0x2b 0x60 0x7e NOPA; VLDB x0, [p0], #64; NOPS; MOVXM le, #13440 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13328 0x00 0x2c 0xf0 0x38 0x68 0x01 0x5b 0x00 0x00 0x02 0xb8 0xbd 0xc8 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; NOPS; NOPX; ADD.NC lc, r2, #-9; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13344 0x00 0x2c 0xf0 0x38 0x68 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13360 0x00 0x2c 0xf0 0x38 0x68 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13376 0x00 0x2c 0xf0 0x38 0x68 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13392 0x00 0x2c 0xf0 0x38 0x68 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13408 0x00 0x2c 0xf0 0x38 0x68 0x01 0x5b 0x00 0x00 0x00 0x00 0x00 0xe8 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13424 0x00 0x2c 0xf0 0x38 0x68 0x01 0x5b 0x00 0x00 0x00 0x00 0x00 0xe8 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_EEvPS1_S2_R23transposeshuffle_params_208 +.loop_nesting 1 +.begin_of_loop +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13440 0x00 0x2c 0xf0 0x38 0x69 0x1c 0x06 0x80 0x00 0x00 0x00 0x00 0xe8 0x00 0x00 0xe1 NOPA; VLDB x0, [p0], #64; VST bmll0, [p1], #64; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV +.loop_nesting 0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13456 0x23 0x80 0xd0 0x00 0x00 0x00 0xe0 0x02 VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13464 0x23 0x80 0xd0 0x00 0x00 0x00 0xe0 0x02 VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13472 0x23 0x80 0xd0 0x00 0x00 0x00 0xe0 0x02 VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13480 0x23 0x80 0xd0 0x01 0x40 0x00 0x00 0x00 0xe9 0x3a VST bmll0, [p1], #64; RET lr; VSHUFFLE bmll0, x0, x0, r0 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13490 0x23 0x80 0xd0 0x00 0x00 0x00 0xe0 0x02 VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13498 0x23 0x80 0xd0 0x00 0x00 0x00 0xe0 0x02 VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13506 0x23 0x80 0xd0 0x00 0x00 0x00 0xe0 0x02 VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13514 0x00 0x2c 0xf2 0x38 0x0d 0x0c NOPA; VST bmll0, [p1], #64 +.delay_slot + 13520 0x00 0x2c 0xf0 0x00 0x21 0x1c 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmll0, [p1], #64; NOPX; NOPM; NOPV +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_EEvPS1_S2_R23transposeshuffle_params_304 + 13536 0x1d 0x71 0x20 0xf8 MOV lc, r2 + 13540 0x00 0x00 0x31 0xe9 0xe0 0x44 MOVXM ls, #13552 + 13546 0x00 0x00 0x36 0xea 0xc0 0x44 MOVXM le, #13664 +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_EEvPS1_S2_R23transposeshuffle_params_320 +.loop_nesting 1 +.begin_of_loop + 13552 0x38 0x1c 0x34 0x18 VLDB x0, [p0], #64 + 13556 0x00 0x00 NOPX + 13558 0x00 0x2c 0xf0 0x00 0x10 0x00 0x01 0xa5 0x7e 0xba NOPA; NOPB; NOPM + 13568 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 13584 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 13600 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 13616 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV + 13632 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x00 0x00 0xe8 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV + 13648 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; NOPM; NOPV +.label ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_EEvPS1_S2_R23transposeshuffle_params_432 +.end_of_loop + 13664 0x00 0x2c 0xf0 0x00 0x21 0x1c 0x06 0x80 0x00 0x00 0x01 0xa5 0x78 0x00 0x00 0xe1 NOPA; NOPB; VST bmll0, [p1], #64; NOPX; NOPM; NOPV +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_EEvPS1_S2_R23transposeshuffle_params_448 +.loop_nesting 0 + 13680 0x10 0x28 0x00 0x18 RET lr +.delay_slot +.swstall delay_slot + 13684 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13686 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13688 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13690 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13692 0x00 0x00 NOPX +.label _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_EEvPS1_S2_R23transposeshuffle_params__end +.label __Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_EEvPS1_S2_R23transposeshuffle_params___func_end0 + +.text_segment PM 13696 +.label __ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA5_Kj___func_begin0 +.label _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA5_Kj +.function_start + 13696 0x70 0x91 0x60 0x00 0x02 0x00 0x00 0x00 0x71 0x3a MOVS p3, p1; PADDXM [sp], #64 + 13706 0x0f 0xfc 0x3d 0x98 ST lr, [sp, #-4] +.no_stack_arguments + 13710 0x00 0x19 0xb0 0x00 0x01 0x04 JL #13152 +.delay_slot + 13716 0x50 0x11 0x60 0x00 0xb2 0x60 0x70 0x02 MOVS p2, p0; MOV p1, p2 +.delay_slot + 13724 0xfe 0xf3 0xb0 0x00 0x01 0xf0 0x32 0x60 0x11 0x3a ST p7, [sp, #-12]; MOVXM p0, #509120 +.delay_slot + 13734 0x0f 0xfb 0x1d 0x98 ST p6, [sp, #-8] +.delay_slot + 13738 0x00 0x2c 0xfc 0xcd 0x81 0xd4 NOPA; MOV p6, p3 +.delay_slot + 13744 0x00 0x2c 0xf0 0x00 0x20 0x01 0x5b 0x00 0x00 0x03 0xb2 0x60 0x78 0x00 0x00 0xe1 NOPA; NOPB; NOPS; NOPX; MOV p7, p2; NOPV +.return_address + 13760 0xfe 0xf3 0x21 0x98 0x8b 0x00 0x37 0x60 0x72 0xba LDA p7, [sp, #-12]; MOVS p1, p6; MOV p0, p7 + 13770 0x07 0xfb 0x19 0x18 LDA p6, [sp, #-8] + 13774 0x07 0xfc 0x39 0x18 LDA lr, [sp, #-4] + 13778 0x00 0x04 0x1e 0x98 LDA p0, [p0] + 13782 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 13786 0x00 0x19 0xd8 0x00 0x00 0x84 J #13232 +.delay_slot + 13792 0xff 0xf8 0x00 0x00 0x01 0xc4 PADDXM [sp], #-64 +.delay_slot +.swstall delay_slot + 13798 0x00 0x00 NOPX +.delay_slot + 13800 0x00 0x07 0xc4 0xc9 0x80 0x44 MOVXM p2, #509120 +.delay_slot +.swstall delay_slot + 13806 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13808 0x00 0x00 NOPX +.label _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA5_Kj__end +.label __ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA5_Kj___func_end0 + +.text_segment PM 13824 +.label __Z13_b719_wrapperPPv___func_begin0 +.label _Z13_b719_wrapperPPv +.function_start + 13824 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 13828 0x01 0x1c 0x1e 0x98 LDA p0, [p1], #4 + 13832 0x01 0x15 0x1e 0x98 LDA p2, [p1, #4] + 13836 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 13840 0x00 0x1a 0xc0 0x00 0x00 0x84 J #13696 +.delay_slot +.swstall delay_slot + 13846 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13848 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13850 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13852 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13854 0x00 0x00 NOPX +.label _Z13_b719_wrapperPPv__end +.label __Z13_b719_wrapperPPv___func_end0 +.label __Z13_b886_wrapperPPv___func_begin0 +.label _Z13_b886_wrapperPPv +.function_start + 13856 0x1a 0x60 0xc0 0xf8 MOV p2, p0 + 13860 0x02 0x3c 0x1e 0x98 LDA p0, [p2], #12 + 13864 0x02 0xec 0x9e 0x98 LDA p1, [p2], #-8 + 13868 0x02 0x15 0x9e 0x98 LDA p3, [p2, #4] + 13872 0x02 0x05 0x1e 0x98 LDA p2, [p2] +.tail_call + 13876 0x00 0x16 0x50 0x00 0x00 0x84 J #11424 +.delay_slot +.swstall delay_slot + 13882 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13884 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13886 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13888 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13890 0x00 0x00 NOPX +.label _Z13_b886_wrapperPPv__end +.label __Z13_b886_wrapperPPv___func_end0 + +.text_segment PM 13904 +.label __Z13_b891_wrapperPPv___func_begin0 +.label _Z13_b891_wrapperPPv +.function_start + 13904 0x1a 0x60 0xc0 0xf8 MOV p2, p0 + 13908 0x02 0x3c 0x1e 0x98 LDA p0, [p2], #12 + 13912 0x02 0xec 0x9e 0x98 LDA p1, [p2], #-8 + 13916 0x02 0x15 0x9e 0x98 LDA p3, [p2, #4] + 13920 0x02 0x05 0x1e 0x98 LDA p2, [p2] +.tail_call + 13924 0x00 0x18 0x70 0x00 0x00 0x84 J #12512 +.delay_slot +.swstall delay_slot + 13930 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13932 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13934 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13936 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13938 0x00 0x00 NOPX +.label _Z13_b891_wrapperPPv__end +.label __Z13_b891_wrapperPPv___func_end0 + +.text_segment PM 13952 +.label __Z13_b896_wrapperPPv___func_begin0 +.label _Z13_b896_wrapperPPv +.function_start + 13952 0x1a 0x60 0xc0 0xf8 MOV p2, p0 + 13956 0x02 0x1c 0x1e 0x98 LDA p0, [p2], #4 + 13960 0x02 0x2c 0x9e 0x98 LDA p1, [p2], #8 + 13964 0x02 0xf5 0x9e 0x98 LDA p3, [p2, #-4] + 13968 0x02 0x05 0x1e 0x98 LDA p2, [p2] +.tail_call + 13972 0x00 0x0d 0xc0 0x00 0x00 0x84 J #7040 +.delay_slot +.swstall delay_slot + 13978 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13980 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13982 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13984 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 13986 0x00 0x00 NOPX +.label _Z13_b896_wrapperPPv__end +.label __Z13_b896_wrapperPPv___func_end0 + +.text_segment PM 14000 +.label __Z13_b901_wrapperPPv___func_begin0 +.label _Z13_b901_wrapperPPv +.function_start + 14000 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 14004 0x01 0x2c 0x1e 0x98 LDA p0, [p1], #8 + 14008 0x01 0xf5 0x1e 0x98 LDA p2, [p1, #-4] + 14012 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 14016 0x00 0x10 0x68 0x00 0x00 0x84 J #8400 +.delay_slot +.swstall delay_slot + 14022 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14024 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14026 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14028 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14030 0x00 0x00 NOPX +.label _Z13_b901_wrapperPPv__end +.label __Z13_b901_wrapperPPv___func_end0 +.label __Z13_b906_wrapperPPv___func_begin0 +.label _Z13_b906_wrapperPPv +.function_start + 14032 0x19 0x60 0xc0 0xf8 MOV p1, p0 + 14036 0x01 0x2c 0x1e 0x98 LDA p0, [p1], #8 + 14040 0x01 0xf5 0x1e 0x98 LDA p2, [p1, #-4] + 14044 0x01 0x04 0x9e 0x98 LDA p1, [p1] +.tail_call + 14048 0x00 0x12 0x18 0x00 0x00 0x84 J #9264 +.delay_slot +.swstall delay_slot + 14054 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14056 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14058 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14060 0x00 0x00 NOPX +.delay_slot +.swstall delay_slot + 14062 0x00 0x00 NOPX +.label _Z13_b906_wrapperPPv__end +.label __Z13_b906_wrapperPPv___func_end0 + +.bss_segment DMb 508992 28 + +.data_segment DMb 509020 +.label _ZL8num_iter + 0x1 + 0x0 + 0x0 + 0x0 + +.bss_segment DMb 509024 4 + +.bss_segment DMb 509028 1 + +.rodata_segment DMb 509056 +.label _ZL20g_uniformKernelFuncs + 0x40 + 0x33 + 0x0 + 0x0 + 0x0 + 0x36 + 0x0 + 0x0 + 0x20 + 0x36 + 0x0 + 0x0 + 0x50 + 0x36 + 0x0 + 0x0 + 0x80 + 0x36 + 0x0 + 0x0 + 0xb0 + 0x36 + 0x0 + 0x0 + 0xd0 + 0x36 + 0x0 + 0x0 + +.bss_segment DMb 509120 832 + +.stack DM_stack 506560 508928 diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3_reloadable0/Release/0_3_reloadable0.map b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3_reloadable0/Release/0_3_reloadable0.map new file mode 100644 index 0000000000000000000000000000000000000000..12e9b9c319aaf9f4d9b1920c64a318de08695a5f --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3_reloadable0/Release/0_3_reloadable0.map @@ -0,0 +1,295 @@ + +// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:44:06 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// bridge -o../Release/0_0_reloadable0 ../Release/0_0_reloadable0.o -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/isg -g -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable0.bcf -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork1033 -pme + +// Release: ipp V-2024.06-TGT-241219 + +Memory map for memory 'DM_stack': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 2368 + + 0x0007bac0..0x0007c3ff ( 2368 items) : Stack + +Memory map for memory 'DMb': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 3265 + + 0x00000000..0x0007babf ( 506560 items) : Reserved + 0x0007bac0..0x0007c3ff ( 2368 items) : Stack + 0x0007c400..0x0007c43f ( 64 items) : Reserved + 0x0007c440..0x0007c443 ( 4 items) : ../Release/0_0_reloadable0.o::_ZL9curr_iter (Data, Local, .bss.DMb.4) + 0x0007c444..0x0007c447 ( 4 items) : ../Release/0_0_reloadable0.o::_ZL14num_depth_iter (Data, Local, .bss.DMb.4) + 0x0007c448..0x0007c44b ( 4 items) : ../Release/0_0_reloadable0.o::_ZL10depth_iter (Data, Local, .bss.DMb.4) + 0x0007c44c..0x0007c44f ( 4 items) : ../Release/0_0_reloadable0.o::_ZL11total_iters (Data, Local, .bss.DMb.4) + 0x0007c450..0x0007c453 ( 4 items) : ../Release/0_0_reloadable0.o::_ZL8core_row (Data, Local, .bss.DMb.4) + 0x0007c454..0x0007c457 ( 4 items) : ../Release/0_0_reloadable0.o::_ZL11ifm1_offset (Data, Local, .bss.DMb.4) + 0x0007c458..0x0007c45b ( 4 items) : ../Release/0_0_reloadable0.o::_ZL11ifm2_offset (Data, Local, .bss.DMb.4) + 0x0007c45c..0x0007c45f ( 4 items) : ../Release/0_0_reloadable0.o::_ZL8num_iter (Data, Local, .data.DMb.4) + 0x0007c460..0x0007c463 ( 4 items) : me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive11control_satE (Data, Global, .bss.DMb.4) + 0x0007c464..0x0007c464 ( 1 items) : me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a)::_ZN12me_primitive11control_rndE (Data, Global, .bss.DMb.1) + 0x0007c480..0x0007c49b ( 28 items) : ../Release/0_0_reloadable0.o::_ZL20g_uniformKernelFuncs (Data, Local, .rodata.DMb.64) + + Called functions : _Z13_b881_wrapperPPv + _Z13_b719_wrapperPPv + _Z13_b886_wrapperPPv + _Z13_b891_wrapperPPv + _Z13_b896_wrapperPPv + _Z13_b901_wrapperPPv + _Z13_b906_wrapperPPv + + 0x0007c4c0..0x0007c4ff ( 64 items) : ../Release/0_0_reloadable0.o::_ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA5_KjE6params (Data, Weak, .bss.DMb.64) + 0x0007c500..0x0007c53f ( 64 items) : ../Release/0_0_reloadable0.o::add1d_attribute_broadcasting_params (Data, Global, .bss.DMb.64) + 0x0007c540..0x0007c57f ( 64 items) : ../Release/0_0_reloadable0.o::mul1d_attribute_broadcasting_params (Data, Global, .bss.DMb.64) + 0x0007c580..0x0007c5bf ( 64 items) : ../Release/0_0_reloadable0.o::add1d_params (Data, Global, .bss.DMb.64) + 0x0007c5c0..0x0007c5ff ( 64 items) : ../Release/0_0_reloadable0.o::mul1d_params (Data, Global, .bss.DMb.64) + 0x0007c600..0x0007c63f ( 64 items) : ../Release/0_0_reloadable0.o::clip1d_params (Data, Global, .bss.DMb.64) + 0x0007c640..0x0007c7ff ( 448 items) : ../Release/0_0_reloadable0.o::conv2d_params (Data, Global, .bss.DMb.64) + 0x0007ccc0..0x000fffff ( 537408 items) : Reserved + +Memory map for memory 'PM': + + Size = 1048576 + Width = 8 bits + Offset = 0 + Used = 11410 + + 0x00000000..0x0000092f ( 2352 items) : Reserved + 0x00000930..0x00000b51 ( 546 items) : ../Release/0_0_reloadable0.o::_Z13kernelWrapperPPvjjjj (Function, Global, .text) (stack frame size = 64) + + Referenced symbols: _ZL20g_uniformKernelFuncs + + 0x00000b60..0x000010f5 ( 1430 items) : ../Release/0_0_reloadable0.o::_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh (Function, Weak, .text) (stack frame size = 64) + 0x00001100..0x0000120d ( 270 items) : ../Release/0_0_reloadable0.o::_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: _ZN12me_primitive11control_rndE + + 0x00001210..0x00001b79 ( 2410 items) : ../Release/0_0_reloadable0.o::_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params (Function, Weak, .text) (stack frame size = 128) + + Called functions : _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams + + Referenced symbols: _ZN12me_primitive11control_rndE + + 0x00001b80..0x00001db7 ( 568 items) : ../Release/0_0_reloadable0.o::_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64) + + Called functions : _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh + _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params + + Referenced symbols: _ZL9curr_iter + conv2d_params + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL14num_depth_iter + _ZL8num_iter + _ZL10depth_iter + _ZL11total_iters + + 0x00001dc0..0x00001dd7 ( 24 items) : ../Release/0_0_reloadable0.o::_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0) + 0x00001de0..0x00001e81 ( 162 items) : ../Release/0_0_reloadable0.o::_ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E + + 0x00001e90..0x00001ec7 ( 56 items) : ../Release/0_0_reloadable0.o::_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0) + 0x00001ed0..0x00001f0d ( 62 items) : ../Release/0_0_reloadable0.o::_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E + + 0x00001f10..0x00002049 ( 314 items) : ../Release/0_0_reloadable0.o::_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: _ZN12me_primitive11control_rndE + + 0x00002050..0x000020c1 ( 114 items) : ../Release/0_0_reloadable0.o::_ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 128) + + Called functions : _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E + + 0x000020d0..0x000022b7 ( 488 items) : ../Release/0_0_reloadable0.o::_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64) + + Called functions : _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + add1d_attribute_broadcasting_params + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL11ifm1_offset + _ZL8num_iter + + 0x000022c0..0x00002323 ( 100 items) : ../Release/0_0_reloadable0.o::_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 0) + 0x00002330..0x00002421 ( 242 items) : ../Release/0_0_reloadable0.o::_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E (Function, Weak, .text) (stack frame size = 0) + 0x00002430..0x00002617 ( 488 items) : ../Release/0_0_reloadable0.o::_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64) + + Called functions : _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv + _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + clip1d_params + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL11ifm1_offset + _ZL8num_iter + + 0x00002620..0x00002693 ( 116 items) : ../Release/0_0_reloadable0.o::_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 0) + 0x000026a0..0x000026e9 ( 74 items) : ../Release/0_0_reloadable0.o::_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv + + 0x000026f0..0x00002905 ( 534 items) : ../Release/0_0_reloadable0.o::_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE (Function, Local, .text) (stack frame size = 128) + + Referenced symbols: _ZN12me_primitive11control_rndE + + 0x00002910..0x000029a5 ( 150 items) : ../Release/0_0_reloadable0.o::_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE + + 0x000029b0..0x00002b97 ( 488 items) : ../Release/0_0_reloadable0.o::_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE (Function, Global, .text) (stack frame size = 64) + + Called functions : _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv + _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + mul1d_attribute_broadcasting_params + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL11ifm1_offset + _ZL8num_iter + + 0x00002ba0..0x00002c13 ( 116 items) : ../Release/0_0_reloadable0.o::_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv (Function, Weak, .text) (stack frame size = 0) + 0x00002c20..0x00002c81 ( 98 items) : ../Release/0_0_reloadable0.o::_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv + + 0x00002c90..0x00002c9f ( 16 items) : ../Release/0_0_reloadable0.o::_ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E (Function, Weak, .text) (stack frame size = 0) + + Called functions : _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE + + 0x00002ca0..0x00002ef9 ( 602 items) : ../Release/0_0_reloadable0.o::_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE (Function, Global, .text) (stack frame size = 64) + + Called functions : _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + add1d_params + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL11ifm1_offset + _ZL11ifm2_offset + _ZL8num_iter + + 0x00002f00..0x00002f17 ( 24 items) : ../Release/0_0_reloadable0.o::_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E (Function, Weak, .text) (stack frame size = 0) + 0x00002f20..0x00002fa9 ( 138 items) : ../Release/0_0_reloadable0.o::_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv (Function, Weak, .text) (stack frame size = 64) + + Called functions : _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E + + 0x00002fb0..0x000030d3 ( 292 items) : ../Release/0_0_reloadable0.o::_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E (Function, Weak, .text) (stack frame size = 0) + + Referenced symbols: _ZN12me_primitive11control_rndE + + 0x000030e0..0x00003339 ( 602 items) : ../Release/0_0_reloadable0.o::_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE (Function, Global, .text) (stack frame size = 64) + + Called functions : _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E + + Referenced symbols: _ZL9curr_iter + _ZL8core_row + mul1d_params + _ZN12me_primitive11control_rndE + _ZN12me_primitive11control_satE + _ZL11ifm1_offset + _ZL11ifm2_offset + _ZL8num_iter + + 0x00003340..0x0000335f ( 32 items) : ../Release/0_0_reloadable0.o::_Z13_b881_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + + 0x00003360..0x000033a9 ( 74 items) : ../Release/0_0_reloadable0.o::_Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA5_Kj (Function, Weak, .text) (stack frame size = 0) + 0x000033b0..0x0000357d ( 462 items) : ../Release/0_0_reloadable0.o::_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_EEvPS1_S2_R23transposeshuffle_params (Function, Weak, .text) (stack frame size = 0) + 0x00003580..0x000035f1 ( 114 items) : ../Release/0_0_reloadable0.o::_ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA5_Kj (Function, Weak, .text) (stack frame size = 64) + + Called functions : _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA5_Kj + _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_EEvPS1_S2_R23transposeshuffle_params + + Referenced symbols: _ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA5_KjE6params + + 0x00003600..0x0000361f ( 32 items) : ../Release/0_0_reloadable0.o::_Z13_b719_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA5_Kj + + 0x00003620..0x00003643 ( 36 items) : ../Release/0_0_reloadable0.o::_Z13_b886_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE + + 0x00003650..0x00003673 ( 36 items) : ../Release/0_0_reloadable0.o::_Z13_b891_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE + + 0x00003680..0x000036a3 ( 36 items) : ../Release/0_0_reloadable0.o::_Z13_b896_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + + 0x000036b0..0x000036cf ( 32 items) : ../Release/0_0_reloadable0.o::_Z13_b901_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + + 0x000036d0..0x000036ef ( 32 items) : ../Release/0_0_reloadable0.o::_Z13_b906_wrapperPPv (Function, Global, .text) (stack frame size = 0) + + Called functions : _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + + +External symbols: + + __dso_handle = 0x0 + _ctors_end = 0x0 + _ctors_start = 0x0 + _dtors_end = 0x0 + _dtors_start = 0x0 + _pc_end = 0x36f0 + _pc_start = 0x930 + _sp_end_DM_stack = 0x7c400 + _sp_start_DM_stack = 0x7bac0 + +Section summary for memory 'DM_stack': + + .stack File + ---------- ---------- + 2368 + ---------- ---------- + 2368 Total + +Section summary for memory 'DMb': + + .bss .data .rodata File + ---------- ---------- ---------- ---------- + 860 4 28 ../Release/0_0_reloadable0.o + 5 0 0 me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a) + ---------- ---------- ---------- ---------- + 865 4 28 Total + +Section summary for memory 'PM': + + .text File + ---------- ---------- + 11410 ../Release/0_0_reloadable0.o + ---------- ---------- + 11410 Total + +File summary: + +../Release/0_0_reloadable0.o + DMb 892 + PM 11410 + +me_defs.o(/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release/libme.a) + DMb 5 + diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3_reloadable0/Release/0_3_reloadable0.sdr b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3_reloadable0/Release/0_3_reloadable0.sdr new file mode 100644 index 0000000000000000000000000000000000000000..090cc5bb2ca634e0bda429d015e7078714693b7c --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3_reloadable0/Release/0_3_reloadable0.sdr @@ -0,0 +1,129 @@ + +// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:44:06 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// bridge -o../Release/0_0_reloadable0 ../Release/0_0_reloadable0.o -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/isg -g -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable0.bcf -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork1033 -pme + +// Release: ipp V-2024.06-TGT-241219 + +// Symbols in memory 'DM_bankA': +// Symbols in memory 'DM_bankAB': +// Symbols in memory 'DM_bankAC': +// Symbols in memory 'DM_bankAD': +// Symbols in memory 'DM_bankB': +// Symbols in memory 'DM_bankBC': +// Symbols in memory 'DM_bankBD': +// Symbols in memory 'DM_bankC': +// Symbols in memory 'DM_bankCD': +// Symbols in memory 'DM_bankD': +// Symbols in memory 'DM_stack': +// Symbols in memory 'DM_test': +// Symbols in memory 'DMb': +_symbol _ZN12me_primitive11control_satE 0x0007c460 +_symbol _ZN12me_primitive11control_rndE 0x0007c464 +_symbol _ZZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA5_KjE6params 0x0007c4c0 +_symbol add1d_attribute_broadcasting_params 0x0007c500 +_symbol mul1d_attribute_broadcasting_params 0x0007c540 +_symbol add1d_params 0x0007c580 +_symbol mul1d_params 0x0007c5c0 +_symbol clip1d_params 0x0007c600 +_symbol conv2d_params 0x0007c640 +// Symbols in memory 'DMh': +// Symbols in memory 'DMh_bankA': +// Symbols in memory 'DMh_bankAB': +// Symbols in memory 'DMh_bankAC': +// Symbols in memory 'DMh_bankAD': +// Symbols in memory 'DMh_bankB': +// Symbols in memory 'DMh_bankBC': +// Symbols in memory 'DMh_bankBD': +// Symbols in memory 'DMh_bankC': +// Symbols in memory 'DMh_bankCD': +// Symbols in memory 'DMh_bankD': +// Symbols in memory 'DMh_stack': +// Symbols in memory 'DMs': +// Symbols in memory 'DMs_bankA': +// Symbols in memory 'DMs_bankAB': +// Symbols in memory 'DMs_bankAC': +// Symbols in memory 'DMs_bankAD': +// Symbols in memory 'DMs_bankB': +// Symbols in memory 'DMs_bankBC': +// Symbols in memory 'DMs_bankBD': +// Symbols in memory 'DMs_bankC': +// Symbols in memory 'DMs_bankCD': +// Symbols in memory 'DMs_bankD': +// Symbols in memory 'DMs_stack': +// Symbols in memory 'DMv': +// Symbols in memory 'DMv_bankA': +// Symbols in memory 'DMv_bankAB': +// Symbols in memory 'DMv_bankAC': +// Symbols in memory 'DMv_bankAD': +// Symbols in memory 'DMv_bankB': +// Symbols in memory 'DMv_bankBC': +// Symbols in memory 'DMv_bankBD': +// Symbols in memory 'DMv_bankC': +// Symbols in memory 'DMv_bankCD': +// Symbols in memory 'DMv_bankD': +// Symbols in memory 'DMv_stack': +// Symbols in memory 'DMw': +// Symbols in memory 'DMw_bankA': +// Symbols in memory 'DMw_bankAB': +// Symbols in memory 'DMw_bankAC': +// Symbols in memory 'DMw_bankAD': +// Symbols in memory 'DMw_bankB': +// Symbols in memory 'DMw_bankBC': +// Symbols in memory 'DMw_bankBD': +// Symbols in memory 'DMw_bankC': +// Symbols in memory 'DMw_bankCD': +// Symbols in memory 'DMw_bankD': +// Symbols in memory 'DMw_stack': +// Symbols in memory 'DMx': +// Symbols in memory 'DMx_bankA': +// Symbols in memory 'DMx_bankAB': +// Symbols in memory 'DMx_bankAC': +// Symbols in memory 'DMx_bankAD': +// Symbols in memory 'DMx_bankB': +// Symbols in memory 'DMx_bankBC': +// Symbols in memory 'DMx_bankBD': +// Symbols in memory 'DMx_bankC': +// Symbols in memory 'DMx_bankCD': +// Symbols in memory 'DMx_bankD': +// Symbols in memory 'DMx_stack': +// Symbols in memory 'PM': +_symbol _Z13kernelWrapperPPvjjjj 0x00000930 +_symbol _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh 0x00000b60 +_symbol _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams 0x00001100 +_symbol _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params 0x00001210 +_symbol _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00001b80 +_symbol _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E 0x00001dc0 +_symbol _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv 0x00001de0 +_symbol _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E 0x00001e90 +_symbol _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv 0x00001ed0 +_symbol _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E 0x00001f10 +_symbol _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E 0x00002050 +_symbol _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x000020d0 +_symbol _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv 0x000022c0 +_symbol _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E 0x00002330 +_symbol _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x00002430 +_symbol _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv 0x00002620 +_symbol _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv 0x000026a0 +_symbol _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E 0x00002910 +_symbol _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE 0x000029b0 +_symbol _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv 0x00002ba0 +_symbol _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv 0x00002c20 +_symbol _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E 0x00002c90 +_symbol _Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE 0x00002ca0 +_symbol _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E 0x00002f00 +_symbol _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv 0x00002f20 +_symbol _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E 0x00002fb0 +_symbol _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE 0x000030e0 +_symbol _Z13_b881_wrapperPPv 0x00003340 +_symbol _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA5_Kj 0x00003360 +_symbol _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_EEvPS1_S2_R23transposeshuffle_params 0x000033b0 +_symbol _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA5_Kj 0x00003580 +_symbol _Z13_b719_wrapperPPv 0x00003600 +_symbol _Z13_b886_wrapperPPv 0x00003620 +_symbol _Z13_b891_wrapperPPv 0x00003650 +_symbol _Z13_b896_wrapperPPv 0x00003680 +_symbol _Z13_b901_wrapperPPv 0x000036b0 +_symbol _Z13_b906_wrapperPPv 0x000036d0 +// Symbols in memory 'PMw': +// Symbols in memory 'TM4': diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3_reloadable0/Release/0_3_reloadable0.srv b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3_reloadable0/Release/0_3_reloadable0.srv new file mode 100644 index 0000000000000000000000000000000000000000..5adeecb8c28386f72eb4d0cc18df3c83a0980d91 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3_reloadable0/Release/0_3_reloadable0.srv @@ -0,0 +1,16705 @@ + +// File generated by darts version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:44:07 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// darts -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -d -h -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -L +Ihex +nanno +u ../Release/0_0_reloadable0 me + +// Release: ipp V-2024.06-TGT-241219 +.label __Z13kernelWrapperPPvjjjj___func_begin0 +.label _Z13kernelWrapperPPvjjjj +.function kernelWrapper _Z13kernelWrapperPPvjjjj +.src_ref 0 "0_0_reloadable0.cc" 92 first +.src_ref 0 "0_0_reloadable0.cc" 94 60 +.src_ref 0 "0_0_reloadable0.cc" 94 110 first +.function_start + 2352 "00101100" // LDA r16, [p0]; NEZ r26, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2353 "11100000" // /* MW 5 */ + 2354 "11101001" // /* MW 4 */ + 2355 "11010000" // /* MW 3 */ + 2356 "11000010" // /* MW 2 */ + 2357 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 92 + 2358 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2359 "00000001" // /* MW 5 */ + 2360 "00000000" // /* MW 4 */ + 2361 "00000000" // /* MW 3 */ + 2362 "00001000" // /* MW 2 */ + 2363 "00000000" // /* MW 1 */ + 2364 "10011000" // ST p6, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2365 "00011101" // /* MW 3 */ + 2366 "11101111" // /* MW 2 */ + 2367 "00001111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 99 112 +.src_ref 1 "io_buffer_compiler.h" 606 24 + 2368 "00000010" // ST r14, [sp, #-16]; MOV r14, r3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2369 "01110000" // /* MW 7 */ + 2370 "11010000" // /* MW 6 */ + 2371 "11001000" // /* MW 5 */ + 2372 "00000001" // /* MW 4 */ + 2373 "10110000" // /* MW 3 */ + 2374 "00111010" // /* MW 2 */ + 2375 "11111110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 96 110 +.src_ref 1 "io_buffer_compiler.h" 606 24 + 2376 "00000010" // ST r15, [sp, #-8]; MOV r15, r1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2377 "01110000" // /* MW 7 */ + 2378 "01010000" // /* MW 6 */ + 2379 "11101000" // /* MW 5 */ + 2380 "00000001" // /* MW 4 */ + 2381 "10110000" // /* MW 3 */ + 2382 "00111110" // /* MW 2 */ + 2383 "11111111" // /* MW 1 */ + 2384 "10011000" // ST p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2385 "10011101" // /* MW 3 */ + 2386 "11110111" // /* MW 2 */ + 2387 "00001111" // /* MW 1 */ + 2388 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2389 "00111101" // /* MW 3 */ + 2390 "11111100" // /* MW 2 */ + 2391 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 first + 2392 "00011000" // ADD.NC p6, r16, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2393 "00000010" // /* MW 3 */ + 2394 "01101000" // /* MW 2 */ + 2395 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 + 2396 "10011000" // LDA r16, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2397 "00010110" // /* MW 3 */ + 2398 "00011110" // /* MW 2 */ + 2399 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 36 + 2400 "10011000" // LDA r18, [p6], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2401 "01010110" // /* MW 3 */ + 2402 "00111110" // /* MW 2 */ + 2403 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 46 + 2404 "10011000" // LDA r17, [p6], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2405 "00110110" // /* MW 3 */ + 2406 "11101110" // /* MW 2 */ + 2407 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 60 + 2408 "10011000" // LDA r27, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2409 "01110110" // /* MW 3 */ + 2410 "00000111" // /* MW 2 */ + 2411 "00000110" // /* MW 1 */ + 2412 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2413 "00000000" // /* MW 1 */ + 2414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2415 "00000000" // /* MW 1 */ + 2416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2417 "00000000" // /* MW 1 */ + 2418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2419 "00000000" // /* MW 1 */ + 2420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2421 "00000000" // /* MW 1 */ + 2422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2423 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 195 30 first +.src_ref 1 "io_buffer_compiler.h" 195 37 first + 2424 "00011000" // SEL.EQZ r16, r16, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2425 "00100010" // /* MW 3 */ + 2426 "00100001" // /* MW 2 */ + 2427 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 194 23 first + 2428 "10011000" // ST r16, [p6, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2429 "00010001" // /* MW 3 */ + 2430 "11010110" // /* MW 2 */ + 2431 "00001110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 +.src_ref 1 "io_buffer_main.h" 410 8 +.src_ref 1 "io_buffer_main.h" 410 8 + 2432 "11100100" // MOVX r16, #-1; MOV el0, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2433 "00111001" // /* MW 5 */ + 2434 "00110101" // /* MW 4 */ + 2435 "10100000" // /* MW 3 */ + 2436 "00011111" // /* MW 2 */ + 2437 "11111100" // /* MW 1 */ + 2438 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2439 "00000000" // /* MW 1 */ + 2440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2441 "00000000" // /* MW 1 */ + 2442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2443 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 first + 2444 "00011000" // ACQ.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2445 "00001000" // /* MW 3 */ + 2446 "01010111" // /* MW 2 */ + 2447 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 96 60 +.src_ref 0 "0_0_reloadable0.cc" 96 110 +.src_ref 0 "0_0_reloadable0.cc" 99 60 +.src_ref 0 "0_0_reloadable0.cc" 102 7 + 2448 "01100100" // MOVX r17, #2; MOV r19, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2449 "00000101" // /* MW 5 */ + 2450 "10100000" // /* MW 4 */ + 2451 "00101001" // /* MW 3 */ + 2452 "01000001" // /* MW 2 */ + 2453 "00000100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 96 60 +.src_ref 0 "0_0_reloadable0.cc" 96 60 first + 2454 "11100100" // LSHL r20, r26, r17; MOV r18, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2455 "10000001" // /* MW 5 */ + 2456 "00100001" // /* MW 4 */ + 2457 "10111001" // /* MW 3 */ + 2458 "00100011" // /* MW 2 */ + 2459 "11010101" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 96 60 +.src_ref 0 "0_0_reloadable0.cc" 96 110 + 2460 "10100100" // LTU r18, r19, r15; ADD.NC p6, r18, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2461 "10100010" // /* MW 5 */ + 2462 "11010010" // /* MW 4 */ + 2463 "10011100" // /* MW 3 */ + 2464 "10011111" // /* MW 2 */ + 2465 "10011100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 96 60 +.src_ref 0 "0_0_reloadable0.cc" 99 60 + 2466 "10111010" // LDA r20, [p6]; ST r20, [sp, #-28]; MOV r19, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2467 "01110010" // /* MW 9 */ + 2468 "01100000" // /* MW 8 */ + 2469 "01101110" // /* MW 7 */ + 2470 "10000010" // /* MW 6 */ + 2471 "10010101" // /* MW 5 */ + 2472 "11100110" // /* MW 4 */ + 2473 "11010111" // /* MW 3 */ + 2474 "11010010" // /* MW 2 */ + 2475 "11000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 + 2476 "00000010" // ST r18, [sp, #-24]; MOV r26, r18 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 2477 "01110000" // /* MW 7 */ + 2478 "10010000" // /* MW 6 */ + 2479 "01001100" // /* MW 5 */ + 2480 "00000011" // /* MW 4 */ + 2481 "10110000" // /* MW 3 */ + 2482 "01001010" // /* MW 2 */ + 2483 "11111101" // /* MW 1 */ + 2484 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2485 "00000000" // /* MW 1 */ + 2486 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2487 "00000000" // /* MW 1 */ + 2488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2489 "00000000" // /* MW 1 */ + 2490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2491 "00000000" // /* MW 1 */ + 2492 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2493 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 first + 2494 "00011000" // ADD.NC p6, r20, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2495 "00000010" // /* MW 3 */ + 2496 "01101010" // /* MW 2 */ + 2497 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 + 2498 "10011000" // LDA r20, [p6], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2499 "10010110" // /* MW 3 */ + 2500 "00011110" // /* MW 2 */ + 2501 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 36 + 2502 "10011000" // LDA r22, [p6], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2503 "11010110" // /* MW 3 */ + 2504 "00111110" // /* MW 2 */ + 2505 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 46 + 2506 "10011000" // LDA r21, [p6], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2507 "10110110" // /* MW 3 */ + 2508 "11101110" // /* MW 2 */ + 2509 "00000110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 60 + 2510 "10011000" // LDA r27, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2511 "01110110" // /* MW 3 */ + 2512 "00000111" // /* MW 2 */ + 2513 "00000110" // /* MW 1 */ + 2514 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2515 "00000000" // /* MW 1 */ + 2516 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2517 "00000000" // /* MW 1 */ + 2518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2519 "00000000" // /* MW 1 */ + 2520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2521 "00000000" // /* MW 1 */ + 2522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2523 "00000000" // /* MW 1 */ + 2524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2525 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 195 30 first +.src_ref 1 "io_buffer_compiler.h" 195 37 first + 2526 "00011000" // SEL.EQZ r20, r20, r22, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2527 "01100010" // /* MW 3 */ + 2528 "00101001" // /* MW 2 */ + 2529 "00010101" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 194 23 first + 2530 "10011000" // ST r20, [p6, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2531 "10010001" // /* MW 3 */ + 2532 "11010110" // /* MW 2 */ + 2533 "00001110" // /* MW 1 */ + 2534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2535 "00000000" // /* MW 1 */ + 2536 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2537 "00000000" // /* MW 1 */ + 2538 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2539 "00000000" // /* MW 1 */ + 2540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2541 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 first + 2542 "00011000" // ACQ.COND r21, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2543 "00001000" // /* MW 3 */ + 2544 "01010111" // /* MW 2 */ + 2545 "00010101" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 99 60 first + 2546 "10011000" // LSHL r18, r18, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2547 "00011101" // /* MW 3 */ + 2548 "10100101" // /* MW 2 */ + 2549 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 99 60 +.src_ref 0 "0_0_reloadable0.cc" 99 60 + 2550 "10100100" // LSHL r18, r2, r17; ADD.NC r19, r19, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2551 "10010010" // /* MW 5 */ + 2552 "10110011" // /* MW 4 */ + 2553 "10111001" // /* MW 3 */ + 2554 "10100011" // /* MW 2 */ + 2555 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 99 60 +.src_ref 0 "0_0_reloadable0.cc" 99 112 + 2556 "10100100" // NEZ r26, r14; ADD.NC p6, r19, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2557 "10010010" // /* MW 5 */ + 2558 "11010011" // /* MW 4 */ + 2559 "00001100" // /* MW 3 */ + 2560 "10011110" // /* MW 2 */ + 2561 "01110110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 99 60 + 2562 "00001100" // LDA r18, [p6]; ST r26, [sp, #-32] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2563 "10101011" // /* MW 5 */ + 2564 "11000110" // /* MW 4 */ + 2565 "11011111" // /* MW 3 */ + 2566 "11001010" // /* MW 2 */ + 2567 "11000000" // /* MW 1 */ + 2568 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2569 "00000000" // /* MW 1 */ + 2570 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2571 "00000000" // /* MW 1 */ + 2572 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2573 "00000000" // /* MW 1 */ + 2574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2575 "00000000" // /* MW 1 */ + 2576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2577 "00000000" // /* MW 1 */ + 2578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2579 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 first + 2580 "00011000" // ADD.NC p7, r18, #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2581 "00000010" // /* MW 3 */ + 2582 "01101001" // /* MW 2 */ + 2583 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 13 + 2584 "10011000" // LDA r19, [p7], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2585 "01110110" // /* MW 3 */ + 2586 "00111110" // /* MW 2 */ + 2587 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 23 + 2588 "10011000" // LDA r18, [p7], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2589 "01010110" // /* MW 3 */ + 2590 "11101110" // /* MW 2 */ + 2591 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 36 + 2592 "10011000" // LDA r20, [p7], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2593 "10010110" // /* MW 3 */ + 2594 "00011110" // /* MW 2 */ + 2595 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 590 60 + 2596 "10011000" // LDA r27, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2597 "01110110" // /* MW 3 */ + 2598 "00000111" // /* MW 2 */ + 2599 "00000111" // /* MW 1 */ + 2600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2601 "00000000" // /* MW 1 */ + 2602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2603 "00000000" // /* MW 1 */ + 2604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2605 "00000000" // /* MW 1 */ + 2606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2607 "00000000" // /* MW 1 */ + 2608 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2609 "00000000" // /* MW 1 */ + 2610 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2611 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 195 30 first +.src_ref 1 "io_buffer_compiler.h" 195 37 first + 2612 "00011000" // SEL.EQZ r19, r19, r20, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2613 "01000010" // /* MW 3 */ + 2614 "11100111" // /* MW 2 */ + 2615 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 194 23 first + 2616 "10011000" // ST r19, [p7, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2617 "01110001" // /* MW 3 */ + 2618 "11010110" // /* MW 2 */ + 2619 "00001111" // /* MW 1 */ + 2620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2621 "00000000" // /* MW 1 */ + 2622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2623 "00000000" // /* MW 1 */ + 2624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2625 "00000000" // /* MW 1 */ + 2626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2627 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 410 8 first + 2628 "00011000" // ACQ.COND r18, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2629 "00001000" // /* MW 3 */ + 2630 "10010111" // /* MW 2 */ + 2631 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 102 7 first + 2632 "10011000" // LSHL r16, r0, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2633 "00011101" // /* MW 3 */ + 2634 "00100001" // /* MW 2 */ + 2635 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 102 7 + 2636 "11111000" // MOV dj0, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2637 "00100000" // /* MW 3 */ + 2638 "10001000" // /* MW 2 */ + 2639 "00011000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 102 7 + 2640 "01000100" // MOVXM p7, #509056 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2641 "00000000" // /* MW 5 */ + 2642 "11001001" // /* MW 4 */ + 2643 "11001110" // /* MW 3 */ + 2644 "00000111" // /* MW 2 */ + 2645 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 102 7 + 2646 "00001100" // LDA p1, [p7, dj0]; ST el0, [sp, #-36] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2647 "01011011" // /* MW 5 */ + 2648 "10111000" // /* MW 4 */ + 2649 "11011111" // /* MW 3 */ + 2650 "00010011" // /* MW 2 */ + 2651 "11100000" // /* MW 1 */ + 2652 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2653 "00000000" // /* MW 1 */ + 2654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2655 "00000000" // /* MW 1 */ + 2656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2657 "00000000" // /* MW 1 */ + 2658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2659 "00000000" // /* MW 1 */ + 2660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2661 "00000000" // /* MW 1 */ + 2662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2663 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 102 4 +.no_stack_arguments + 2664 "00011000" // JL p1 /* MW 4 */ /* control_operation: words=4 call unconditional cycles_taken=1 indirect absolute delay_slots=5 */ + 2665 "01000000" // /* MW 3 */ + 2666 "00110000" // /* MW 2 */ + 2667 "00010000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 105 60 +.src_ref 0 "0_0_reloadable0.cc" 107 60 +.delay_slot + 2668 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2669 "11000000" // /* MW 3 */ + 2670 "01100000" // /* MW 2 */ + 2671 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2673 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2674 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2675 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2677 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 2678 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2679 "01111110" // /* MW 9 */ + 2680 "10100101" // /* MW 8 */ + 2681 "00000001" // /* MW 7 */ + 2682 "00000000" // /* MW 6 */ + 2683 "00010000" // /* MW 5 */ + 2684 "00000000" // /* MW 4 */ + 2685 "11110000" // /* MW 3 */ + 2686 "00101100" // /* MW 2 */ + 2687 "00000000" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 105 60 first +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_main.h" 440 8 +.src_ref 1 "io_buffer_main.h" 440 8 +.src_ref 1 "io_buffer_main.h" 440 8 +.return_address + 2688 "00101100" // LDA r17, [p7]; MOVX r16, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2689 "00001010" // /* MW 5 */ + 2690 "01000000" // /* MW 4 */ + 2691 "11010000" // /* MW 3 */ + 2692 "11000110" // /* MW 2 */ + 2693 "11100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 + 2694 "00011000" // LDA r26, [sp, #-36] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2695 "01010001" // /* MW 3 */ + 2696 "11011111" // /* MW 2 */ + 2697 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 107 60 + 2698 "00011000" // LDA dj0, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2699 "01000001" // /* MW 3 */ + 2700 "11100100" // /* MW 2 */ + 2701 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_main.h" 440 8 + 2702 "00011000" // LDA el0, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2703 "00101001" // /* MW 3 */ + 2704 "11101000" // /* MW 2 */ + 2705 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 + 2706 "00011000" // LDA eh0, [sp, #-32] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2707 "00001001" // /* MW 3 */ + 2708 "11100000" // /* MW 2 */ + 2709 "00000111" // /* MW 1 */ + 2710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2711 "00000000" // /* MW 1 */ + 2712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2713 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 first + 2714 "00011000" // ADD.NC p0, r17, #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2715 "10001000" // /* MW 3 */ + 2716 "01101000" // /* MW 2 */ + 2717 "00011000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 + 2718 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2719 "00110110" // /* MW 3 */ + 2720 "00000110" // /* MW 2 */ + 2721 "00000000" // /* MW 1 */ + 2722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2723 "00000000" // /* MW 1 */ + 2724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2725 "00000000" // /* MW 1 */ + 2726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2727 "00000000" // /* MW 1 */ + 2728 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2729 "00000000" // /* MW 1 */ + 2730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2731 "00000000" // /* MW 1 */ + 2732 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2733 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 first + 2734 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2735 "00001000" // /* MW 3 */ + 2736 "01010101" // /* MW 2 */ + 2737 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_compiler.h" 606 24 first + 2738 "11010100" // LDA r17, [p0, #-4]; MOV r27, r15 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2739 "01000001" // /* MW 5 */ + 2740 "10101111" // /* MW 4 */ + 2741 "11011101" // /* MW 3 */ + 2742 "11000110" // /* MW 2 */ + 2743 "00011110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 107 60 first +.src_ref 1 "io_buffer_main.h" 440 8 + 2744 "11010100" // LDA r18, [p7, dj0]; MOV r26, el0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2745 "00111001" // /* MW 5 */ + 2746 "01000000" // /* MW 4 */ + 2747 "11011101" // /* MW 3 */ + 2748 "01001010" // /* MW 2 */ + 2749 "11100000" // /* MW 1 */ + 2750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2751 "00000000" // /* MW 1 */ + 2752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2753 "00000000" // /* MW 1 */ + 2754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2755 "00000000" // /* MW 1 */ + 2756 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2757 "00000000" // /* MW 1 */ + 2758 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2759 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 first + 2760 "10011000" // SUB r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2761 "00010001" // /* MW 3 */ + 2762 "00100111" // /* MW 2 */ + 2763 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 first +.src_ref 1 "io_buffer_compiler.h" 606 24 + 2764 "00100100" // SEL.EQZ r17, r17, r19, r27; ADD.NC p7, r18, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2765 "00010000" // /* MW 5 */ + 2766 "11010010" // /* MW 4 */ + 2767 "01001110" // /* MW 3 */ + 2768 "01100110" // /* MW 2 */ + 2769 "10001100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 12 +.src_ref 1 "io_buffer_compiler.h" 606 22 first + 2770 "00001100" // LDA r17, [p7]; ST r17, [p0, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2771 "01100011" // /* MW 5 */ + 2772 "11101100" // /* MW 4 */ + 2773 "11010001" // /* MW 3 */ + 2774 "11000110" // /* MW 2 */ + 2775 "11100000" // /* MW 1 */ + 2776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2777 "00000000" // /* MW 1 */ + 2778 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2779 "00000000" // /* MW 1 */ + 2780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2781 "00000000" // /* MW 1 */ + 2782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2783 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 2784 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2785 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 2786 "11111000" // MOV r26, eh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2787 "00011100" // /* MW 3 */ + 2788 "10100001" // /* MW 2 */ + 2789 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2790 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2791 "00001000" // /* MW 3 */ + 2792 "01010101" // /* MW 2 */ + 2793 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.src_ref 1 "io_buffer_compiler.h" 606 24 first + 2794 "11010100" // LDA r17, [p7, #-4]; MOV r27, el0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2795 "00111001" // /* MW 5 */ + 2796 "11000000" // /* MW 4 */ + 2797 "11011101" // /* MW 3 */ + 2798 "11000110" // /* MW 2 */ + 2799 "11111110" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 110 60 first + 2800 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2801 "01010110" // /* MW 3 */ + 2802 "00000110" // /* MW 2 */ + 2803 "00000110" // /* MW 1 */ + 2804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2805 "00000000" // /* MW 1 */ + 2806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2807 "00000000" // /* MW 1 */ + 2808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2809 "00000000" // /* MW 1 */ + 2810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2811 "00000000" // /* MW 1 */ + 2812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2813 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 first + 2814 "10011000" // SUB r19, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2815 "00010001" // /* MW 3 */ + 2816 "00100111" // /* MW 2 */ + 2817 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 25 first +.src_ref 1 "io_buffer_compiler.h" 606 24 + 2818 "00100100" // SEL.EQZ r17, r17, r19, r27; ADD.NC p0, r18, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2819 "00010100" // /* MW 5 */ + 2820 "11010010" // /* MW 4 */ + 2821 "01000000" // /* MW 3 */ + 2822 "01100110" // /* MW 2 */ + 2823 "10001100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 605 25 +.src_ref 1 "io_buffer_compiler.h" 606 22 first + 2824 "00001100" // LDA r17, [p0]; ST r17, [p7, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2825 "01100011" // /* MW 5 */ + 2826 "11101100" // /* MW 4 */ + 2827 "11011111" // /* MW 3 */ + 2828 "11000110" // /* MW 2 */ + 2829 "00000000" // /* MW 1 */ + 2830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2831 "00000000" // /* MW 1 */ + 2832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2833 "00000000" // /* MW 1 */ + 2834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2835 "00000000" // /* MW 1 */ + 2836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2837 "00000000" // /* MW 1 */ + 2838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2839 "00000000" // /* MW 1 */ + 2840 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2841 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 440 8 first + 2842 "00011000" // REL.COND r17, r16, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2843 "00001000" // /* MW 3 */ + 2844 "01010101" // /* MW 2 */ + 2845 "00010100" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 112 + 2846 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2847 "00111001" // /* MW 3 */ + 2848 "11111100" // /* MW 2 */ + 2849 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 first + 2850 "10011000" // LDA r17, [p0, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2851 "00110110" // /* MW 3 */ + 2852 "11100110" // /* MW 2 */ + 2853 "00000000" // /* MW 1 */ + 2854 "00011000" // LDA p6, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2855 "00011001" // /* MW 3 */ + 2856 "11101111" // /* MW 2 */ + 2857 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 2858 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2859 "10011001" // /* MW 3 */ + 2860 "11110111" // /* MW 2 */ + 2861 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 2862 "00011000" // LDA r14, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2863 "11010001" // /* MW 3 */ + 2864 "11110001" // /* MW 2 */ + 2865 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 2866 "00011000" // LDA r15, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2867 "11110001" // /* MW 3 */ + 2868 "11111001" // /* MW 2 */ + 2869 "00000111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 112 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 2870 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2871 "00000001" // /* MW 5 */ + 2872 "00000000" // /* MW 4 */ + 2873 "00000000" // /* MW 3 */ + 2874 "11111000" // /* MW 2 */ + 2875 "11111111" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 112 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 2876 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 2877 "00000000" // /* MW 3 */ + 2878 "00101000" // /* MW 2 */ + 2879 "00010000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 first +.delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 2880 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2881 "00010001" // /* MW 3 */ + 2882 "00100001" // /* MW 2 */ + 2883 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 2884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2885 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 2886 "11111000" // MOV r27, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2887 "00100000" // /* MW 3 */ + 2888 "11010111" // /* MW 2 */ + 2889 "00011110" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 24 +.delay_slot + 2890 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2891 "00000010" // /* MW 3 */ + 2892 "01100001" // /* MW 2 */ + 2893 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_compiler.h" 606 22 +.delay_slot + 2894 "10011000" // ST r16, [p0, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2895 "00010001" // /* MW 3 */ + 2896 "11100110" // /* MW 2 */ +.label _Z13kernelWrapperPPvjjjj__end +.label __Z13kernelWrapperPPvjjjj___func_end0 + 2897 "00001000" // /* MW 1 */ +.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_begin0 +.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh +.function setup_conv2d_bf16_params _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh +.src_ref 2 "conv2d_bf16_params.h" 432 first +.src_ref 2 "conv2d_bf16_params.h" 438 17 first +.src_ref 2 "conv2d_bf16_params.h" 452 40 +.src_ref 2 "conv2d_bf16_params.h" 453 40 +.src_ref 2 "conv2d_bf16_params.h" 458 36 +.src_ref 2 "conv2d_bf16_params.h" 470 11 +.function_start + 2912 "10111010" // LDA el0, [p0], #4; MOVX r4, #4; MOV r2, p1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2913 "01111000" // /* MW 9 */ + 2914 "01100000" // /* MW 8 */ + 2915 "01001001" // /* MW 7 */ + 2916 "10001000" // /* MW 6 */ + 2917 "01000000" // /* MW 5 */ + 2918 "00000000" // /* MW 4 */ + 2919 "11010000" // /* MW 3 */ + 2920 "10000101" // /* MW 2 */ + 2921 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 +.src_ref 2 "conv2d_bf16_params.h" 438 17 first +.src_ref 2 "conv2d_bf16_params.h" 452 40 +.src_ref 2 "conv2d_bf16_params.h" 462 7 + 2922 "10111010" // LDA eh0, [p0], #4; MOVX r5, #-1; ADD.NC p2, r2, #9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2923 "01001000" // /* MW 9 */ + 2924 "10000010" // /* MW 8 */ + 2925 "00110000" // /* MW 7 */ + 2926 "11101001" // /* MW 6 */ + 2927 "01010111" // /* MW 5 */ + 2928 "00111110" // /* MW 4 */ + 2929 "11010000" // /* MW 3 */ + 2930 "10000001" // /* MW 2 */ + 2931 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 432 +.src_ref 2 "conv2d_bf16_params.h" 444 52 + 2932 "10111010" // MOVA r1, #-4; PADDXM [sp], #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 2933 "01110000" // /* MW 9 */ + 2934 "00000000" // /* MW 8 */ + 2935 "00000000" // /* MW 7 */ + 2936 "00000000" // /* MW 6 */ + 2937 "00000010" // /* MW 5 */ + 2938 "00000000" // /* MW 4 */ + 2939 "00000000" // /* MW 3 */ + 2940 "10000001" // /* MW 2 */ + 2941 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 453 40 +.src_ref 2 "conv2d_bf16_params.h" 458 30 +.src_ref 2 "conv2d_bf16_params.h" 458 30 + 2942 "01110110" // MOVA r6, #12; ST r13, [sp, #-4]; MOVX r16, #1; MOV m0, #16 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 2943 "01011000" // /* MW 11 */ + 2944 "00010000" // /* MW 10 */ + 2945 "00000000" // /* MW 9 */ + 2946 "00101000" // /* MW 8 */ + 2947 "00000000" // /* MW 7 */ + 2948 "10000001" // /* MW 6 */ + 2949 "10110101" // /* MW 5 */ + 2950 "11111101" // /* MW 4 */ + 2951 "00000111" // /* MW 3 */ + 2952 "10000110" // /* MW 2 */ + 2953 "00000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 444 52 +.src_ref 2 "conv2d_bf16_params.h" 470 11 +.src_ref 2 "conv2d_bf16_params.h" 477 40 +.src_ref 2 "conv2d_bf16_params.h" 557 34 + 2954 "01110110" // MOVA r3, #3; ST r14, [sp, #-8]; MOVX r21, #-3; MOV r20, #15 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 2955 "01011000" // /* MW 11 */ + 2956 "00001111" // /* MW 10 */ + 2957 "10001000" // /* MW 9 */ + 2958 "10101010" // /* MW 8 */ + 2959 "01010111" // /* MW 7 */ + 2960 "10111111" // /* MW 6 */ + 2961 "11010101" // /* MW 5 */ + 2962 "11111001" // /* MW 4 */ + 2963 "00000111" // /* MW 3 */ + 2964 "01100011" // /* MW 2 */ + 2965 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 452 40 +.src_ref 2 "conv2d_bf16_params.h" 458 36 +.src_ref 2 "conv2d_bf16_params.h" 462 7 + 2966 "01011100" // ST r15, [sp, #-12]; MOVX r24, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 2967 "00000010" // /* MW 5 */ + 2968 "01100000" // /* MW 4 */ + 2969 "10110000" // /* MW 3 */ + 2970 "10111110" // /* MW 2 */ + 2971 "11111110" // /* MW 1 */ + 2972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2973 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2974 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2975 "00101001" // /* MW 3 */ + 2976 "00011100" // /* MW 2 */ + 2977 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 2978 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2979 "00001001" // /* MW 3 */ + 2980 "00011100" // /* MW 2 */ + 2981 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2982 "10011000" // LDA el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2983 "00101110" // /* MW 3 */ + 2984 "00011100" // /* MW 2 */ + 2985 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 2986 "10011000" // LDA eh0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 2987 "00001110" // /* MW 3 */ + 2988 "00011100" // /* MW 2 */ + 2989 "00000000" // /* MW 1 */ + 2990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2991 "00000000" // /* MW 1 */ + 2992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2993 "00000000" // /* MW 1 */ + 2994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2995 "00000000" // /* MW 1 */ + 2996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2997 "00000000" // /* MW 1 */ + 2998 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 2999 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 3000 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3001 "00101001" // /* MW 3 */ + 3002 "00011100" // /* MW 2 */ + 3003 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 3004 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3005 "00001001" // /* MW 3 */ + 3006 "00011100" // /* MW 2 */ + 3007 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 3008 "10011000" // LDA el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3009 "00101110" // /* MW 3 */ + 3010 "00011100" // /* MW 2 */ + 3011 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 3012 "10011000" // LDA eh0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3013 "00001110" // /* MW 3 */ + 3014 "00011100" // /* MW 2 */ + 3015 "00000000" // /* MW 1 */ + 3016 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3017 "00000000" // /* MW 1 */ + 3018 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3019 "00000000" // /* MW 1 */ + 3020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3021 "00000000" // /* MW 1 */ + 3022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3023 "00000000" // /* MW 1 */ + 3024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3025 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 3026 "10011000" // ST el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3027 "00101001" // /* MW 3 */ + 3028 "00011100" // /* MW 2 */ + 3029 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 3030 "10011000" // ST eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3031 "00001001" // /* MW 3 */ + 3032 "00011100" // /* MW 2 */ + 3033 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 3034 "10011000" // LDA eh0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3035 "00001110" // /* MW 3 */ + 3036 "00000100" // /* MW 2 */ + 3037 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 17 + 3038 "10011000" // LDA el0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3039 "00101110" // /* MW 3 */ + 3040 "00010100" // /* MW 2 */ + 3041 "00000000" // /* MW 1 */ + 3042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3043 "00000000" // /* MW 1 */ + 3044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3045 "00000000" // /* MW 1 */ + 3046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3047 "00000000" // /* MW 1 */ + 3048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3049 "00000000" // /* MW 1 */ + 3050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3051 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 3052 "10011000" // ST eh0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3053 "00001001" // /* MW 3 */ + 3054 "00000100" // /* MW 2 */ + 3055 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 438 15 + 3056 "10011000" // ST el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3057 "00101001" // /* MW 3 */ + 3058 "00010100" // /* MW 2 */ + 3059 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 444 40 first + 3060 "10011000" // LDA.u8 r13, [p2], #-3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3061 "10101010" // /* MW 3 */ + 3062 "11011101" // /* MW 2 */ + 3063 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 447 34 first + 3064 "10011000" // LDA.u8 r17, [p2], #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3065 "00101010" // /* MW 3 */ + 3066 "00011110" // /* MW 2 */ + 3067 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 448 34 first + 3068 "10011000" // LDA.u8 r14, [p2], #-5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3069 "11001010" // /* MW 3 */ + 3070 "10111101" // /* MW 2 */ + 3071 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 452 40 first + 3072 "10011000" // LDA.u16 r15, [p2], #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3073 "11111010" // /* MW 3 */ + 3074 "11111101" // /* MW 2 */ + 3075 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 453 40 first + 3076 "10011000" // LDA.u8 r19, [p2], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3077 "01101010" // /* MW 3 */ + 3078 "00001010" // /* MW 2 */ + 3079 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 20 first + 3080 "10011000" // LDA.u8 r7, [p2], #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3081 "11101010" // /* MW 3 */ + 3082 "10101100" // /* MW 2 */ + 3083 "00000010" // /* MW 1 */ + 3084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3085 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 444 52 first + 3086 "10011000" // LSHL r1, r13, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3087 "00011101" // /* MW 3 */ + 3088 "01000010" // /* MW 2 */ + 3089 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 30 first +.src_ref 2 "conv2d_bf16_params.h" 462 7 first + 3090 "00100100" // EQ r16, r1, r16; ADD.NC r18, r17, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3091 "00000001" // /* MW 5 */ + 3092 "00110001" // /* MW 4 */ + 3093 "11111001" // /* MW 3 */ + 3094 "00100000" // /* MW 2 */ + 3095 "00001100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 462 7 + 3096 "10011000" // LSHL r18, r18, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3097 "01011101" // /* MW 3 */ + 3098 "10100100" // /* MW 2 */ + 3099 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 452 40 first + 3100 "10011000" // EQ r27, r15, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3101 "01000111" // /* MW 3 */ + 3102 "11110110" // /* MW 2 */ + 3103 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 452 40 +.src_ref 2 "conv2d_bf16_params.h" 462 7 first +.src_ref 2 "conv2d_bf16_params.h" 557 34 + 3104 "11100100" // SEL.EQZ r5, r24, r5, r27; MOV eh0, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3105 "00111001" // /* MW 5 */ + 3106 "10110111" // /* MW 4 */ + 3107 "01000000" // /* MW 3 */ + 3108 "01001010" // /* MW 2 */ + 3109 "11000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 462 7 + 3110 "00011000" // SEL.EQZ r29, r17, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3111 "00100010" // /* MW 3 */ + 3112 "01111011" // /* MW 2 */ + 3113 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 30 first + 3114 "10011000" // EQ r6, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3115 "01100111" // /* MW 3 */ + 3116 "11001100" // /* MW 2 */ + 3117 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 + 3118 "10011000" // AND r27, r6, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3119 "00000100" // /* MW 3 */ + 3120 "10110111" // /* MW 2 */ + 3121 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 +.src_ref 2 "conv2d_bf16_params.h" 477 40 +.src_ref 2 "conv2d_bf16_params.h" 557 34 first + 3122 "11100100" // LSHL r15, r15, r21; MOV r25, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3123 "01000001" // /* MW 5 */ + 3124 "10111011" // /* MW 4 */ + 3125 "10111100" // /* MW 3 */ + 3126 "11101011" // /* MW 2 */ + 3127 "01111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 first + 3128 "01011100" // ST r15, [sp, #-20]; SEL.EQZ r6, r7, r24, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3129 "00000100" // /* MW 5 */ + 3130 "10011011" // /* MW 4 */ + 3131 "10110011" // /* MW 3 */ + 3132 "10111110" // /* MW 2 */ + 3133 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 +.src_ref 2 "conv2d_bf16_params.h" 477 40 first + 3134 "10000100" // JNZ r25, #3216 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3216 delay_slots=5 */ + 3135 "00000001" // /* MW 5 */ + 3136 "01000000" // /* MW 4 */ + 3137 "01001000" // /* MW 3 */ + 3138 "00000110" // /* MW 2 */ + 3139 "11001000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 458 36 first +.delay_slot + 3140 "10011000" // EQ r27, r6, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3141 "01000111" // /* MW 3 */ + 3142 "10110110" // /* MW 2 */ + 3143 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 444 52 first +.delay_slot + 3144 "10011000" // AND r24, r13, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3145 "01000100" // /* MW 3 */ + 3146 "01110001" // /* MW 2 */ + 3147 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 462 7 first +.delay_slot + 3148 "10011000" // LSHL r30, r19, r5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3149 "01011101" // /* MW 3 */ + 3150 "11111100" // /* MW 2 */ + 3151 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 470 11 first +.delay_slot + 3152 "10011000" // LSHL r20, r27, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3153 "01001101" // /* MW 3 */ + 3154 "11101000" // /* MW 2 */ + 3155 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 470 11 +.src_ref 2 "conv2d_bf16_params.h" 477 40 first +.delay_slot + 3156 "00011000" // SEL.EQZ r6, r6, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3157 "00110010" // /* MW 3 */ + 3158 "10001100" // /* MW 2 */ + 3159 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 40 + 3160 "10000100" // JNZ r27, #3216 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3216 delay_slots=5 */ + 3161 "00000001" // /* MW 5 */ + 3162 "01000000" // /* MW 4 */ + 3163 "01001000" // /* MW 3 */ + 3164 "00000110" // /* MW 2 */ + 3165 "11011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3167 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3168 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3169 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3171 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3173 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3175 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 491 25 +.src_ref 2 "conv2d_bf16_params.h" 492 25 +.src_ref 2 "conv2d_bf16_params.h" 495 99 +.src_ref 2 "conv2d_bf16_params.h" 502 57 +.src_ref 2 "conv2d_bf16_params.h" 533 46 +.src_ref 2 "conv2d_bf16_params.h" 539 82 +.src_ref 2 "conv2d_bf16_params.h" 557 34 +.src_ref 2 "conv2d_bf16_params.h" 621 240 +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.src_ref 2 "conv2d_bf16_params.h" 709 76 + 3176 "10111010" // MOVA r15, #1; J #3264 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=3264 delay_slots=5 */ + 3177 "00100000" // /* MW 9 */ + 3178 "00000000" // /* MW 8 */ + 3179 "00000000" // /* MW 7 */ + 3180 "10011000" // /* MW 6 */ + 3181 "00000001" // /* MW 5 */ + 3182 "00000000" // /* MW 4 */ + 3183 "00000000" // /* MW 3 */ + 3184 "00101111" // /* MW 2 */ + 3185 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 481 24 +.src_ref 2 "conv2d_bf16_params.h" 500 53 +.src_ref 2 "conv2d_bf16_params.h" 506 73 +.src_ref 2 "conv2d_bf16_params.h" 507 53 +.src_ref 2 "conv2d_bf16_params.h" 524 122 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.delay_slot + 3186 "10111010" // MOVA r26, #0; MOVX r5, #-3; MOV r28, #12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3187 "01011000" // /* MW 9 */ + 3188 "00001100" // /* MW 8 */ + 3189 "10001000" // /* MW 7 */ + 3190 "10101011" // /* MW 6 */ + 3191 "01010111" // /* MW 5 */ + 3192 "00111110" // /* MW 4 */ + 3193 "00000000" // /* MW 3 */ + 3194 "00011010" // /* MW 2 */ + 3195 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 504 45 +.src_ref 2 "conv2d_bf16_params.h" 510 45 +.src_ref 2 "conv2d_bf16_params.h" 520 48 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.delay_slot + 3196 "01100100" // MOVX r21, #4; MOV r2, #16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3197 "01000001" // /* MW 5 */ + 3198 "00100000" // /* MW 4 */ + 3199 "00100001" // /* MW 3 */ + 3200 "01000010" // /* MW 2 */ + 3201 "00000101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 40 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 578 52 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.delay_slot + 3202 "00011000" // MOVX r13, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3203 "00001101" // /* MW 3 */ + 3204 "00011010" // /* MW 2 */ + 3205 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 78 +.src_ref 2 "conv2d_bf16_params.h" 642 20 +.src_ref 2 "conv2d_bf16_params.h" 642 87 +.delay_slot + 3206 "00011000" // MOVX r7, #15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3207 "00111101" // /* MW 3 */ + 3208 "00001110" // /* MW 2 */ + 3209 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.delay_slot + 3210 "00101100" // NOPA; MOVX r4, #-4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3211 "11100010" // /* MW 5 */ + 3212 "10010001" // /* MW 4 */ + 3213 "11111111" // /* MW 3 */ + 3214 "00101100" // /* MW 2 */ + 3215 "00000000" // /* MW 1 */ +.label __ll6__Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh +.src_ref 2 "conv2d_bf16_params.h" 453 40 +.src_ref 2 "conv2d_bf16_params.h" 453 40 +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 504 45 +.src_ref 2 "conv2d_bf16_params.h" 655 23 + 3216 "01110110" // MOVA dj0, #16; MOVS p1, r2; MOVX r21, #4; MOV r4, #-4 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3217 "01011000" // /* MW 11 */ + 3218 "11111100" // /* MW 10 */ + 3219 "10001111" // /* MW 9 */ + 3220 "10001000" // /* MW 8 */ + 3221 "01010000" // /* MW 7 */ + 3222 "00000001" // /* MW 6 */ + 3223 "00001011" // /* MW 5 */ + 3224 "10000010" // /* MW 4 */ + 3225 "10000001" // /* MW 3 */ + 3226 "00000010" // /* MW 2 */ + 3227 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 453 40 first +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 481 24 +.src_ref 2 "conv2d_bf16_params.h" 500 53 +.src_ref 2 "conv2d_bf16_params.h" 506 73 +.src_ref 2 "conv2d_bf16_params.h" 507 53 +.src_ref 2 "conv2d_bf16_params.h" 524 122 +.src_ref 2 "conv2d_bf16_params.h" 539 139 + 3228 "10111010" // ST.s8 r6, [p1, dj0]; MOVX r26, #0; MOV r28, #12 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3229 "01011000" // /* MW 9 */ + 3230 "00001100" // /* MW 8 */ + 3231 "10001000" // /* MW 7 */ + 3232 "00001011" // /* MW 6 */ + 3233 "10100000" // /* MW 5 */ + 3234 "00000001" // /* MW 4 */ + 3235 "11100000" // /* MW 3 */ + 3236 "00011000" // /* MW 2 */ + 3237 "00100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 491 25 +.src_ref 2 "conv2d_bf16_params.h" 492 25 +.src_ref 2 "conv2d_bf16_params.h" 495 99 +.src_ref 2 "conv2d_bf16_params.h" 502 57 +.src_ref 2 "conv2d_bf16_params.h" 510 45 +.src_ref 2 "conv2d_bf16_params.h" 520 48 +.src_ref 2 "conv2d_bf16_params.h" 533 46 +.src_ref 2 "conv2d_bf16_params.h" 539 82 +.src_ref 2 "conv2d_bf16_params.h" 557 34 +.src_ref 2 "conv2d_bf16_params.h" 621 240 +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.src_ref 2 "conv2d_bf16_params.h" 709 76 + 3238 "10111010" // MOVA r2, #16; MOVX r5, #-3; MOV r15, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3239 "01011000" // /* MW 9 */ + 3240 "00000001" // /* MW 8 */ + 3241 "11101000" // /* MW 7 */ + 3242 "10101001" // /* MW 6 */ + 3243 "01010111" // /* MW 5 */ + 3244 "00111110" // /* MW 4 */ + 3245 "00000000" // /* MW 3 */ + 3246 "00000010" // /* MW 2 */ + 3247 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 40 +.src_ref 2 "conv2d_bf16_params.h" 529 78 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 578 52 +.src_ref 2 "conv2d_bf16_params.h" 642 20 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 642 87 + 3248 "11100001" // NOPA; NOPB; NOPS; MOVX r7, #15; MOV r13, #3; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 3249 "00000000" // /* MW 15 */ + 3250 "00000000" // /* MW 14 */ + 3251 "01011000" // /* MW 13 */ + 3252 "00000011" // /* MW 12 */ + 3253 "10101000" // /* MW 11 */ + 3254 "11101001" // /* MW 10 */ + 3255 "01110001" // /* MW 9 */ + 3256 "00000000" // /* MW 8 */ + 3257 "01011011" // /* MW 7 */ + 3258 "00000001" // /* MW 6 */ + 3259 "00100000" // /* MW 5 */ + 3260 "00000000" // /* MW 4 */ + 3261 "11110000" // /* MW 3 */ + 3262 "00101100" // /* MW 2 */ + 3263 "00000000" // /* MW 1 */ +.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_352 +.src_ref 2 "conv2d_bf16_params.h" 477 40 first +.src_ref 2 "conv2d_bf16_params.h" 495 68 first +.src_ref 2 "conv2d_bf16_params.h" 495 112 +.src_ref 2 "conv2d_bf16_params.h" 682 38 + 3264 "10111010" // LDA.u8 r17, [p2], #-2; EQ r27, r13, r6; MOV m0, #60 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3265 "01011000" // /* MW 9 */ + 3266 "00111100" // /* MW 8 */ + 3267 "00000000" // /* MW 7 */ + 3268 "00111100" // /* MW 6 */ + 3269 "10110011" // /* MW 5 */ + 3270 "00011011" // /* MW 4 */ + 3271 "01010000" // /* MW 3 */ + 3272 "11000101" // /* MW 2 */ + 3273 "01011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 18 +.src_ref 2 "conv2d_bf16_params.h" 481 24 first +.src_ref 2 "conv2d_bf16_params.h" 495 53 +.src_ref 2 "conv2d_bf16_params.h" 495 112 + 3274 "10111010" // LDA.u8 r1, [p2], m0; SEL.EQZ r18, r1, r26, r27; MOV m5, #-51 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3275 "01011000" // /* MW 9 */ + 3276 "11001101" // /* MW 8 */ + 3277 "10000111" // /* MW 7 */ + 3278 "00010010" // /* MW 6 */ + 3279 "00101101" // /* MW 5 */ + 3280 "00000011" // /* MW 4 */ + 3281 "01010000" // /* MW 3 */ + 3282 "00000101" // /* MW 2 */ + 3283 "01000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 477 18 first +.src_ref 2 "conv2d_bf16_params.h" 496 68 +.src_ref 2 "conv2d_bf16_params.h" 504 35 +.src_ref 2 "conv2d_bf16_params.h" 539 14 +.src_ref 2 "conv2d_bf16_params.h" 578 47 + 3284 "10111010" // MOVA r23, #2; SEL.EQZ r29, r29, r21, r27; MOV m3, #55 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3285 "01011000" // /* MW 9 */ + 3286 "00110111" // /* MW 8 */ + 3287 "10000000" // /* MW 7 */ + 3288 "10010001" // /* MW 6 */ + 3289 "11011010" // /* MW 5 */ + 3290 "00111011" // /* MW 4 */ + 3291 "00000000" // /* MW 3 */ + 3292 "01010111" // /* MW 2 */ + 3293 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 496 53 +.src_ref 2 "conv2d_bf16_params.h" 499 51 +.src_ref 2 "conv2d_bf16_params.h" 504 45 first +.src_ref 2 "conv2d_bf16_params.h" 509 50 +.src_ref 2 "conv2d_bf16_params.h" 519 42 +.src_ref 2 "conv2d_bf16_params.h" 700 34 + 3294 "10111010" // MOVA r3, #8; EQ r27, r21, r0; MOV m2, #-68 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3295 "01011000" // /* MW 9 */ + 3296 "10111100" // /* MW 8 */ + 3297 "00000111" // /* MW 7 */ + 3298 "00111101" // /* MW 6 */ + 3299 "10110000" // /* MW 5 */ + 3300 "00101011" // /* MW 4 */ + 3301 "00000000" // /* MW 3 */ + 3302 "00000011" // /* MW 2 */ + 3303 "00000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 492 25 first +.src_ref 2 "conv2d_bf16_params.h" 497 46 +.src_ref 2 "conv2d_bf16_params.h" 509 50 + 3304 "10111010" // MOVA r16, #512; LSHL r22, r15, r24; MOV m1, #112 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3305 "01011000" // /* MW 9 */ + 3306 "01110000" // /* MW 8 */ + 3307 "10000000" // /* MW 7 */ + 3308 "01101100" // /* MW 6 */ + 3309 "01101100" // /* MW 5 */ + 3310 "00011111" // /* MW 4 */ + 3311 "00000000" // /* MW 3 */ + 3312 "00010000" // /* MW 2 */ + 3313 "01000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 500 53 +.src_ref 2 "conv2d_bf16_params.h" 520 34 first + 3314 "01100100" // EXTEND.u8 r22, r22; MOV m4, #-105 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3315 "01011101" // /* MW 5 */ + 3316 "00011110" // /* MW 4 */ + 3317 "00001000" // /* MW 3 */ + 3318 "10010010" // /* MW 2 */ + 3319 "10110101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 77 +.src_ref 2 "conv2d_bf16_params.h" 520 48 + 3320 "00111010" // ST r22, [sp, #-16]; LSHL r22, r22, r2; MOV m7, #49 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3321 "01011001" // /* MW 9 */ + 3322 "00110001" // /* MW 8 */ + 3323 "10000000" // /* MW 7 */ + 3324 "01101111" // /* MW 6 */ + 3325 "01100001" // /* MW 5 */ + 3326 "00101101" // /* MW 4 */ + 3327 "10110000" // /* MW 3 */ + 3328 "01011010" // /* MW 2 */ + 3329 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 53 +.src_ref 2 "conv2d_bf16_params.h" 507 42 first + 3330 "01100100" // SUB r30, r30, r29; MOV m6, #-63 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3331 "00000101" // /* MW 5 */ + 3332 "00011111" // /* MW 4 */ + 3333 "00111100" // /* MW 3 */ + 3334 "10111010" // /* MW 2 */ + 3335 "11110111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 495 99 first + 3336 "10011000" // SUB r1, r15, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3337 "00010001" // /* MW 3 */ + 3338 "11000010" // /* MW 2 */ + 3339 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 495 96 +.src_ref 2 "conv2d_bf16_params.h" 495 96 +.src_ref 2 "conv2d_bf16_params.h" 610 64 +.src_ref 2 "conv2d_bf16_params.h" 709 96 + 3340 "01100100" // MUL r31, r17, r1; MOV r1, #7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3341 "00011101" // /* MW 5 */ + 3342 "10100000" // /* MW 4 */ + 3343 "11110000" // /* MW 3 */ + 3344 "11000011" // /* MW 2 */ + 3345 "10001111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 507 53 first + 3346 "10011000" // SUB r17, r26, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3347 "00100001" // /* MW 3 */ + 3348 "10100011" // /* MW 2 */ + 3349 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 495 96 first + 3350 "10011000" // LSHL r31, r31, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3351 "00011101" // /* MW 3 */ + 3352 "11111110" // /* MW 2 */ + 3353 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 495 53 +.src_ref 2 "conv2d_bf16_params.h" 506 48 +.src_ref 2 "conv2d_bf16_params.h" 519 42 first + 3354 "00111010" // ST r31, [p2], m5; LSHL r31, r29, r3; MOV m5, #87 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3355 "01011001" // /* MW 9 */ + 3356 "01010111" // /* MW 8 */ + 3357 "10000000" // /* MW 7 */ + 3358 "11101110" // /* MW 6 */ + 3359 "11110001" // /* MW 5 */ + 3360 "00111011" // /* MW 4 */ + 3361 "00110000" // /* MW 3 */ + 3362 "01111110" // /* MW 2 */ + 3363 "01010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 496 68 first +.src_ref 2 "conv2d_bf16_params.h" 504 35 first +.src_ref 2 "conv2d_bf16_params.h" 522 68 + 3364 "10111010" // LDA.u8 r21, [p2], m3; EQ r19, r23, r0; MOV m3, #-78 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3365 "01011000" // /* MW 9 */ + 3366 "10110010" // /* MW 8 */ + 3367 "10000111" // /* MW 7 */ + 3368 "00111101" // /* MW 6 */ + 3369 "00110000" // /* MW 5 */ + 3370 "00101111" // /* MW 4 */ + 3371 "01010000" // /* MW 3 */ + 3372 "01010101" // /* MW 2 */ + 3373 "01001101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 509 50 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 3374 "01011100" // ST r19, [sp, #-24]; LSHL r19, r19, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3375 "01111011" // /* MW 5 */ + 3376 "11001100" // /* MW 4 */ + 3377 "10111001" // /* MW 3 */ + 3378 "01001110" // /* MW 2 */ + 3379 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 496 53 first +.src_ref 2 "conv2d_bf16_params.h" 520 19 first +.src_ref 2 "conv2d_bf16_params.h" 529 62 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 3380 "10111010" // ST.s8 r21, [p2], m2; OR r22, r31, r22; MOV m2, #246 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3381 "01011000" // /* MW 9 */ + 3382 "11110110" // /* MW 8 */ + 3383 "00000000" // /* MW 7 */ + 3384 "00101101" // /* MW 6 */ + 3385 "01101011" // /* MW 5 */ + 3386 "00111111" // /* MW 4 */ + 3387 "11100000" // /* MW 3 */ + 3388 "01010100" // /* MW 2 */ + 3389 "01001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 497 46 first +.src_ref 2 "conv2d_bf16_params.h" 509 50 first +.src_ref 2 "conv2d_bf16_params.h" 533 44 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3390 "10111010" // LDA.u16 r16, [p2], m1; SEL.EQZ r19, r19, r16, r27; MOV m1, #-176 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3391 "01011000" // /* MW 9 */ + 3392 "01010000" // /* MW 8 */ + 3393 "10000111" // /* MW 7 */ + 3394 "00010000" // /* MW 6 */ + 3395 "00111000" // /* MW 5 */ + 3396 "00100111" // /* MW 4 */ + 3397 "01010000" // /* MW 3 */ + 3398 "01000011" // /* MW 2 */ + 3399 "01000101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 14 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3400 "10011000" // EQ r31, r23, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3401 "01100111" // /* MW 3 */ + 3402 "11111110" // /* MW 2 */ + 3403 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 499 51 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3404 "10011000" // EQ r16, r3, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3405 "01100111" // /* MW 3 */ + 3406 "11100000" // /* MW 2 */ + 3407 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 499 51 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3408 "10011000" // OR r27, r31, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3409 "00000101" // /* MW 3 */ + 3410 "11110111" // /* MW 2 */ + 3411 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 78 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3412 "10011000" // AND r21, r7, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3413 "01010100" // /* MW 3 */ + 3414 "11101011" // /* MW 2 */ + 3415 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 507 53 first +.src_ref 2 "conv2d_bf16_params.h" 511 47 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 3416 "01100100" // ASHL r30, r30, r17; MOV r17, #24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3417 "01100001" // /* MW 5 */ + 3418 "10100000" // /* MW 4 */ + 3419 "11011000" // /* MW 3 */ + 3420 "10100011" // /* MW 2 */ + 3421 "11110111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 491 25 first +.src_ref 2 "conv2d_bf16_params.h" 507 34 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3422 "00111010" // ST r16, [sp, #-32]; LSHL r18, r15, r18; ADD.NC r30, r30, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3423 "01001001" // /* MW 9 */ + 3424 "10000000" // /* MW 8 */ + 3425 "11001111" // /* MW 7 */ + 3426 "01101111" // /* MW 6 */ + 3427 "00101001" // /* MW 5 */ + 3428 "00011111" // /* MW 4 */ + 3429 "10110000" // /* MW 3 */ + 3430 "01000010" // /* MW 2 */ + 3431 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 500 53 first +.src_ref 2 "conv2d_bf16_params.h" 511 47 first + 3432 "01011100" // ST r26, [p2], #4; LSHL r17, r30, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3433 "00111011" // /* MW 5 */ + 3434 "01000110" // /* MW 4 */ + 3435 "00111111" // /* MW 3 */ + 3436 "11101010" // /* MW 2 */ + 3437 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 500 53 +.src_ref 2 "conv2d_bf16_params.h" 534 44 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 3438 "00000010" // ST r26, [p2], m4; MOV m4, #168 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3439 "01010000" // /* MW 7 */ + 3440 "10101000" // /* MW 6 */ + 3441 "00000000" // /* MW 5 */ + 3442 "00000010" // /* MW 4 */ + 3443 "00110000" // /* MW 3 */ + 3444 "01101010" // /* MW 2 */ + 3445 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 77 first +.src_ref 2 "conv2d_bf16_params.h" 509 19 first +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 3446 "01110110" // LDA.u8 r18, [p2], m7; ST r31, [sp, #-28]; OR r27, r19, r0; MOV el0, r27 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3447 "01111000" // /* MW 11 */ + 3448 "11001110" // /* MW 10 */ + 3449 "00001101" // /* MW 9 */ + 3450 "00101100" // /* MW 8 */ + 3451 "10110000" // /* MW 7 */ + 3452 "10100111" // /* MW 6 */ + 3453 "11110101" // /* MW 5 */ + 3454 "11100111" // /* MW 4 */ + 3455 "01010111" // /* MW 3 */ + 3456 "01001001" // /* MW 2 */ + 3457 "01011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 19 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3458 "10011000" // OR r17, r27, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3459 "00010101" // /* MW 3 */ + 3460 "11100011" // /* MW 2 */ + 3461 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 506 73 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 3462 "10011000" // SUB r27, r26, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3463 "10000001" // /* MW 3 */ + 3464 "10110111" // /* MW 2 */ + 3465 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 527 47 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3466 "00011000" // EXTEND.u8 r24, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3467 "10010000" // /* MW 3 */ + 3468 "10110000" // /* MW 2 */ + 3469 "00010100" // /* MW 1 */ + 3470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3471 "00000000" // /* MW 1 */ + 3472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3473 "00000000" // /* MW 1 */ + 3474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3475 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 57 first + 3476 "10011000" // SUB r18, r15, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3477 "00100001" // /* MW 3 */ + 3478 "11100101" // /* MW 2 */ + 3479 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 502 53 + 3480 "10011000" // ST r18, [p2], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3481 "01010001" // /* MW 3 */ + 3482 "11001010" // /* MW 2 */ + 3483 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 506 48 first + 3484 "10011000" // LDA.u8 r18, [p2], m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3485 "01001010" // /* MW 3 */ + 3486 "10101010" // /* MW 2 */ + 3487 "00000010" // /* MW 1 */ + 3488 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3489 "00000000" // /* MW 1 */ + 3490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3491 "00000000" // /* MW 1 */ + 3492 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3493 "00000000" // /* MW 1 */ + 3494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3495 "00000000" // /* MW 1 */ + 3496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3497 "00000000" // /* MW 1 */ + 3498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3499 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 506 62 + 3500 "10011000" // SUB r18, r18, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3501 "11100001" // /* MW 3 */ + 3502 "10100100" // /* MW 2 */ + 3503 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 506 73 + 3504 "10011000" // ASHL r18, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3505 "10111110" // /* MW 3 */ + 3506 "10100101" // /* MW 2 */ + 3507 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 first + 3508 "10011000" // LSHL r18, r18, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3509 "00101101" // /* MW 3 */ + 3510 "10100100" // /* MW 2 */ + 3511 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 + 3512 "01000100" // MOVXM r27, #65536 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3513 "00000000" // /* MW 5 */ + 3514 "10100000" // /* MW 4 */ + 3515 "00001101" // /* MW 3 */ + 3516 "00000001" // /* MW 2 */ + 3517 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 + 3518 "10011000" // ADD r18, r27, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3519 "00100000" // /* MW 3 */ + 3520 "11100101" // /* MW 2 */ + 3521 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 + 3522 "01000100" // MOVXM r27, #16711680 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3523 "00000000" // /* MW 5 */ + 3524 "10100000" // /* MW 4 */ + 3525 "00001101" // /* MW 3 */ + 3526 "11111111" // /* MW 2 */ + 3527 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 510 45 +.src_ref 2 "conv2d_bf16_params.h" 539 14 +.src_ref 2 "conv2d_bf16_params.h" 642 99 + 3528 "01100100" // AND r27, r27, r18; MOV r18, #-16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3529 "11000001" // /* MW 5 */ + 3530 "00111111" // /* MW 4 */ + 3531 "10011001" // /* MW 3 */ + 3532 "11100100" // /* MW 2 */ + 3533 "11011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 511 19 first +.src_ref 2 "conv2d_bf16_params.h" 524 122 +.src_ref 2 "conv2d_bf16_params.h" 539 14 + 3534 "01100100" // OR r27, r27, r17; MOV r17, #-8 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3535 "11100001" // /* MW 5 */ + 3536 "10111111" // /* MW 4 */ + 3537 "10111000" // /* MW 3 */ + 3538 "11100010" // /* MW 2 */ + 3539 "11011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 512 64 first +.src_ref 2 "conv2d_bf16_params.h" 524 122 first + 3540 "01011100" // ST r27, [p2], #4; LSHL r19, r19, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3541 "00111011" // /* MW 5 */ + 3542 "11001110" // /* MW 4 */ + 3543 "00111001" // /* MW 3 */ + 3544 "11101110" // /* MW 2 */ + 3545 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 524 122 + 3546 "10011000" // SUB r26, r26, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3547 "00110001" // /* MW 3 */ + 3548 "10110101" // /* MW 2 */ + 3549 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 524 122 + 3550 "10011000" // LSHL r20, r20, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3551 "10101101" // /* MW 3 */ + 3552 "00101001" // /* MW 2 */ + 3553 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 520 19 first + 3554 "10011000" // OR r26, r14, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3555 "01100101" // /* MW 3 */ + 3556 "10110101" // /* MW 2 */ + 3557 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 511 36 first +.src_ref 2 "conv2d_bf16_params.h" 522 68 first + 3558 "01011100" // ST r26, [p2], m3; EXTEND.u8 r26, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3559 "00100000" // /* MW 5 */ + 3560 "01101001" // /* MW 4 */ + 3561 "00111111" // /* MW 3 */ + 3562 "01101010" // /* MW 2 */ + 3563 "01001101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 524 65 first +.src_ref 2 "conv2d_bf16_params.h" 529 62 first +.src_ref 2 "conv2d_bf16_params.h" 539 14 first + 3564 "10111010" // LDA.u8 r25, [p2], m2; LSHL r20, r27, r18; ADD.NC r30, r26, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3565 "10101000" // /* MW 9 */ + 3566 "10101000" // /* MW 8 */ + 3567 "11001110" // /* MW 7 */ + 3568 "01101111" // /* MW 6 */ + 3569 "01001001" // /* MW 5 */ + 3570 "00110111" // /* MW 4 */ + 3571 "01010000" // /* MW 3 */ + 3572 "01100101" // /* MW 2 */ + 3573 "01001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 534 93 +.src_ref 2 "conv2d_bf16_params.h" 539 14 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 3574 "01100100" // LSHL r22, r22, r17; MOV r17, #254 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3575 "11111001" // /* MW 5 */ + 3576 "10100011" // /* MW 4 */ + 3577 "10111000" // /* MW 3 */ + 3578 "10100011" // /* MW 2 */ + 3579 "10110101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 527 45 first +.src_ref 2 "conv2d_bf16_params.h" 533 44 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 3580 "00101100" // ST.s8 r25, [p2], m1; MUL r26, r26, r24 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3581 "00011111" // /* MW 5 */ + 3582 "01101011" // /* MW 4 */ + 3583 "11101101" // /* MW 3 */ + 3584 "01100100" // /* MW 2 */ + 3585 "01000101" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3587 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3588 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3589 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3590 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3591 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 3592 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3593 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 48 first +.src_ref 2 "conv2d_bf16_params.h" 533 46 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3594 "10100100" // LSHL r25, r16, r15; ADD.NC r27, r21, r25 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3595 "11001010" // /* MW 5 */ + 3596 "10110101" // /* MW 4 */ + 3597 "10111101" // /* MW 3 */ + 3598 "01011111" // /* MW 2 */ + 3599 "10000110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 14 first + 3600 "10000100" // JNZ r31, #3728 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3728 delay_slots=5 */ + 3601 "00000001" // /* MW 5 */ + 3602 "01000000" // /* MW 4 */ + 3603 "01001000" // /* MW 3 */ + 3604 "00000111" // /* MW 2 */ + 3605 "11111000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 76 first +.src_ref 2 "conv2d_bf16_params.h" 529 122 +.delay_slot + 3606 "10100100" // ADD r21, r19, #3; ADD.NC r27, r27, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3607 "11110010" // /* MW 5 */ + 3608 "10111011" // /* MW 4 */ + 3609 "11101101" // /* MW 3 */ + 3610 "01000001" // /* MW 2 */ + 3611 "10011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 529 122 +.delay_slot + 3612 "10011000" // LSHL r21, r27, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3613 "01011101" // /* MW 3 */ + 3614 "11101011" // /* MW 2 */ + 3615 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 534 93 first +.delay_slot + 3616 "10011000" // AND r17, r25, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3617 "00010100" // /* MW 3 */ + 3618 "01100011" // /* MW 2 */ + 3619 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 534 44 +.src_ref 2 "conv2d_bf16_params.h" 539 139 first +.src_ref 2 "conv2d_bf16_params.h" 555 59 +.src_ref 2 "conv2d_bf16_params.h" 559 59 +.src_ref 2 "conv2d_bf16_params.h" 700 17 +.delay_slot + 3620 "00111010" // ST r17, [p2], m4; EQ r27, r6, r28; MOV r17, #1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3621 "01011001" // /* MW 9 */ + 3622 "00000001" // /* MW 8 */ + 3623 "00101000" // /* MW 7 */ + 3624 "00111110" // /* MW 6 */ + 3625 "10111110" // /* MW 5 */ + 3626 "00001101" // /* MW 4 */ + 3627 "00110000" // /* MW 3 */ + 3628 "01000110" // /* MW 2 */ + 3629 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 669 63 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.delay_slot + 3630 "11111000" // MOV el1, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3631 "10011100" // /* MW 3 */ + 3632 "10011011" // /* MW 2 */ + 3633 "00011000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 14 + 3634 "00011000" // LDA r28, [sp, #-32] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3635 "10010001" // /* MW 3 */ + 3636 "11100011" // /* MW 2 */ + 3637 "00000111" // /* MW 1 */ + 3638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3639 "00000000" // /* MW 1 */ + 3640 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3641 "00000000" // /* MW 1 */ + 3642 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3643 "00000000" // /* MW 1 */ + 3644 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3645 "00000000" // /* MW 1 */ + 3646 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3647 "00000000" // /* MW 1 */ + 3648 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3649 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 14 + 3650 "10000100" // JNZ r28, #3728 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=3728 delay_slots=5 */ + 3651 "00000001" // /* MW 5 */ + 3652 "01000000" // /* MW 4 */ + 3653 "01001000" // /* MW 3 */ + 3654 "00000111" // /* MW 2 */ + 3655 "11100000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3657 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3659 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3661 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3663 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 3664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 3665 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 115 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 539 139 +.src_ref 2 "conv2d_bf16_params.h" 539 162 + 3666 "10111010" // MOVA r28, #5; MOVX r17, #4; MOV r25, #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3667 "01011000" // /* MW 9 */ + 3668 "01000000" // /* MW 8 */ + 3669 "00101000" // /* MW 7 */ + 3670 "10001011" // /* MW 6 */ + 3671 "00010000" // /* MW 5 */ + 3672 "00000001" // /* MW 4 */ + 3673 "00000000" // /* MW 3 */ + 3674 "10111100" // /* MW 2 */ + 3675 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 139 + 3676 "00011000" // SEL.EQZ r31, r17, r13, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3677 "11010010" // /* MW 3 */ + 3678 "01111110" // /* MW 2 */ + 3679 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 162 + 3680 "10011000" // EQ r27, r25, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3681 "01100111" // /* MW 3 */ + 3682 "01110110" // /* MW 2 */ + 3683 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 135 +.src_ref 2 "conv2d_bf16_params.h" 539 139 + 3684 "01100100" // SEL.EQZ r28, r31, r28, r27; MOV r31, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3685 "00000001" // /* MW 5 */ + 3686 "10100000" // /* MW 4 */ + 3687 "01001111" // /* MW 3 */ + 3688 "00111000" // /* MW 2 */ + 3689 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 46 + 3690 "00011000" // EXTEND.s8 r25, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3691 "01010000" // /* MW 3 */ + 3692 "00110010" // /* MW 2 */ + 3693 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 44 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 3694 "10011000" // MUL r30, r25, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3695 "11101111" // /* MW 3 */ + 3696 "01111101" // /* MW 2 */ + 3697 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 115 +.src_ref 2 "conv2d_bf16_params.h" 669 63 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 3698 "11100100" // LT r27, r25, r17; MOV r27, el1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3699 "00111001" // /* MW 5 */ + 3700 "11000100" // /* MW 4 */ + 3701 "01011101" // /* MW 3 */ + 3702 "11100011" // /* MW 2 */ + 3703 "11001110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 82 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3704 "00011000" // SEL.EQZ r17, r15, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3705 "10000010" // /* MW 3 */ + 3706 "11100011" // /* MW 2 */ + 3707 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 79 + 3708 "10011000" // MUL r17, r17, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3709 "11101111" // /* MW 3 */ + 3710 "01100011" // /* MW 2 */ + 3711 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 135 + 3712 "10011000" // SUB r28, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3713 "11000001" // /* MW 3 */ + 3714 "11111001" // /* MW 2 */ + 3715 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 539 135 + 3716 "10011000" // ASHL r17, r17, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3717 "11001110" // /* MW 3 */ + 3718 "01100011" // /* MW 2 */ + 3719 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 555 55 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 3720 "00100010" // EXTEND.u8 r17, r17; NOPV /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 3721 "00011100" // /* MW 7 */ + 3722 "00000000" // /* MW 6 */ + 3723 "00000000" // /* MW 5 */ + 3724 "10000001" // /* MW 4 */ + 3725 "00010100" // /* MW 3 */ + 3726 "00100011" // /* MW 2 */ + 3727 "00000000" // /* MW 1 */ +.label TGT_F_Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh_816 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 669 63 +.src_ref 2 "conv2d_bf16_params.h" 669 63 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 3728 "10111010" // MOVA r25, #0; MOVX r28, #-1; MOV r27, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3729 "01111000" // /* MW 9 */ + 3730 "00001110" // /* MW 8 */ + 3731 "01110000" // /* MW 7 */ + 3732 "11101011" // /* MW 6 */ + 3733 "11000111" // /* MW 5 */ + 3734 "00111111" // /* MW 4 */ + 3735 "00000000" // /* MW 3 */ + 3736 "00011001" // /* MW 2 */ + 3737 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 669 63 first +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3738 "00011000" // SEL.EQZ r31, r25, r28, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3739 "11000010" // /* MW 3 */ + 3740 "01111111" // /* MW 2 */ + 3741 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 497 34 first +.src_ref 2 "conv2d_bf16_params.h" 641 32 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 3742 "10111010" // LDA r27, [sp, #-24]; EXTEND.u8 r16, r16; ADD.NC r26, r29, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3743 "10101000" // /* MW 9 */ + 3744 "01110100" // /* MW 8 */ + 3745 "01001111" // /* MW 7 */ + 3746 "10000011" // /* MW 6 */ + 3747 "00000100" // /* MW 5 */ + 3748 "00100001" // /* MW 4 */ + 3749 "00100000" // /* MW 3 */ + 3750 "01101110" // /* MW 2 */ + 3751 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 559 61 first +.src_ref 2 "conv2d_bf16_params.h" 640 16 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.aggressive_scheduled_block_id 5 +.nohwbrkpt +.noswbrkpt + 3752 "10111010" // MOVA r30, #72; EXTEND.u8 r20, r20; MOV r29, #9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3753 "01011000" // /* MW 9 */ + 3754 "00001001" // /* MW 8 */ + 3755 "10101000" // /* MW 7 */ + 3756 "10000011" // /* MW 6 */ + 3757 "01000100" // /* MW 5 */ + 3758 "00101001" // /* MW 4 */ + 3759 "00000000" // /* MW 3 */ + 3760 "00011110" // /* MW 2 */ + 3761 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 25 first +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3762 "00011000" // SEL.EQZ r25, r29, r30, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3763 "11100010" // /* MW 3 */ + 3764 "01110011" // /* MW 2 */ + 3765 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 578 47 first + 3766 "10011000" // NE r28, r23, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3767 "10001000" // /* MW 3 */ + 3768 "11111001" // /* MW 2 */ + 3769 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 640 16 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 3770 "10011000" // LSHL r29, r29, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3771 "00111101" // /* MW 3 */ + 3772 "01111011" // /* MW 2 */ + 3773 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 557 34 +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.aggressive_scheduled_block_id 6 +.noswbrkpt + 3774 "10111010" // LDA r23, [sp, #-20]; MOVXM r24, #1032 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3775 "00010000" // /* MW 9 */ + 3776 "00000100" // /* MW 8 */ + 3777 "00001010" // /* MW 7 */ + 3778 "00000011" // /* MW 6 */ + 3779 "00000000" // /* MW 5 */ + 3780 "00000000" // /* MW 4 */ + 3781 "00100000" // /* MW 3 */ + 3782 "11011110" // /* MW 2 */ + 3783 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 641 44 first +.src_ref 2 "conv2d_bf16_params.h" 642 45 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 3784 "00100100" // LSHL r19, r25, r19; ADD.NC r30, r26, #-1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3785 "11111111" // /* MW 5 */ + 3786 "00111010" // /* MW 4 */ + 3787 "10111111" // /* MW 3 */ + 3788 "11100111" // /* MW 2 */ + 3789 "11001100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 20 +.src_ref 2 "conv2d_bf16_params.h" 642 87 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 3790 "00011000" // MAC r7, r7, r19, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3791 "11100110" // /* MW 3 */ + 3792 "11001111" // /* MW 2 */ + 3793 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 55 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 3794 "01100100" // EXTEND.u8 r19, r22; MOV r23, #522 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3795 "00101001" // /* MW 5 */ + 3796 "10101000" // /* MW 4 */ + 3797 "00001011" // /* MW 3 */ + 3798 "11010010" // /* MW 2 */ + 3799 "10110100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 540 38 +.src_ref 2 "conv2d_bf16_params.h" 645 41 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3800 "01100100" // SEL.EQZ r22, r23, r24, r27; MOV r26, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3801 "00000001" // /* MW 5 */ + 3802 "00100001" // /* MW 4 */ + 3803 "01001101" // /* MW 3 */ + 3804 "10110000" // /* MW 2 */ + 3805 "10111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 540 38 first +.src_ref 2 "conv2d_bf16_params.h" 557 34 + 3806 "11100100" // NE r6, r6, r26; MOV r27, eh0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3807 "00111001" // /* MW 5 */ + 3808 "11000010" // /* MW 4 */ + 3809 "00011101" // /* MW 3 */ + 3810 "10110101" // /* MW 2 */ + 3811 "00110001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 99 first + 3812 "10011000" // AND r7, r7, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3813 "00100100" // /* MW 3 */ + 3814 "11001111" // /* MW 2 */ + 3815 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 557 34 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 + 3816 "11100100" // SEL.EQZ r23, r23, r15, r27; MOV r27, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3817 "01000001" // /* MW 5 */ + 3818 "10100110" // /* MW 4 */ + 3819 "01001101" // /* MW 3 */ + 3820 "11011110" // /* MW 2 */ + 3821 "10111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 119 +.src_ref 2 "conv2d_bf16_params.h" 655 23 first + 3822 "01100100" // SEL.EQZ r4, r5, r4, r27; MOV r18, #31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3823 "01111101" // /* MW 5 */ + 3824 "00100000" // /* MW 4 */ + 3825 "01001001" // /* MW 3 */ + 3826 "00001000" // /* MW 2 */ + 3827 "00101001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 119 first + 3828 "10011000" // AND r23, r23, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3829 "00100100" // /* MW 3 */ + 3830 "11101111" // /* MW 2 */ + 3831 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 540 15 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 + 3832 "10111010" // MOVA r30, #-288; LSHL r4, r16, r4; MOV r18, #-144 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3833 "01011000" // /* MW 9 */ + 3834 "01110000" // /* MW 8 */ + 3835 "01001111" // /* MW 7 */ + 3836 "01101110" // /* MW 6 */ + 3837 "01000010" // /* MW 5 */ + 3838 "00100000" // /* MW 4 */ + 3839 "00000000" // /* MW 3 */ + 3840 "00011110" // /* MW 2 */ + 3841 "11011100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first + 3842 "00011000" // SEL.EQZ r30, r30, r18, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3843 "00100010" // /* MW 3 */ + 3844 "10111101" // /* MW 2 */ + 3845 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 85 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 + 3846 "10111010" // MOVA r5, #144; MUL r26, r23, r19; MOV r16, #288 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3847 "01011000" // /* MW 9 */ + 3848 "00100000" // /* MW 8 */ + 3849 "00001001" // /* MW 7 */ + 3850 "11111110" // /* MW 6 */ + 3851 "10101001" // /* MW 5 */ + 3852 "00101111" // /* MW 4 */ + 3853 "00000000" // /* MW 3 */ + 3854 "00000101" // /* MW 2 */ + 3855 "00010010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first + 3856 "00011000" // SEL.EQZ r16, r16, r5, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3857 "01010010" // /* MW 3 */ + 3858 "00100000" // /* MW 2 */ + 3859 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 126 21 first +.src_ref 2 "conv2d_bf16_params.h" 559 59 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 3860 "10100100" // MUL r24, r17, r4; ADD.NC r27, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3861 "11110010" // /* MW 5 */ + 3862 "10111101" // /* MW 4 */ + 3863 "11111101" // /* MW 3 */ + 3864 "00001001" // /* MW 2 */ + 3865 "10001110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 669 41 first +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.aggressive_scheduled_block_id 7 +.noswbrkpt + 3866 "11100100" // LSHL r16, r16, r31; MOV r27, el1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3867 "00111001" // /* MW 5 */ + 3868 "11000100" // /* MW 4 */ + 3869 "10111101" // /* MW 3 */ + 3870 "00111111" // /* MW 2 */ + 3871 "10000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 117 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 3872 "01011100" // ST r27, [sp, #-36]; MUL r26, r14, r26 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3873 "01011111" // /* MW 5 */ + 3874 "01101011" // /* MW 4 */ + 3875 "10110111" // /* MW 3 */ + 3876 "11101110" // /* MW 2 */ + 3877 "11111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 700 34 first + 3878 "00011000" // SEL.EQZ r2, r2, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3879 "00110010" // /* MW 3 */ + 3880 "10000100" // /* MW 2 */ + 3881 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 578 52 first + 3882 "10011000" // LTU r31, r13, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3883 "00001100" // /* MW 3 */ + 3884 "01111110" // /* MW 2 */ + 3885 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 559 92 first + 3886 "10011000" // MUL r24, r20, r24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3887 "10001111" // /* MW 3 */ + 3888 "00110001" // /* MW 2 */ + 3889 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 578 36 first + 3890 "10011000" // OR r27, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 3891 "11000101" // /* MW 3 */ + 3892 "11110111" // /* MW 2 */ + 3893 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 610 64 first +.src_ref 2 "conv2d_bf16_params.h" 611 47 +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 629 82 + 3894 "01110110" // MOVA r3, #128; ST r20, [sp, #-20]; LSHL r28, r27, r1; MOV r20, #256 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3895 "01011000" // /* MW 11 */ + 3896 "00000000" // /* MW 10 */ + 3897 "10001001" // /* MW 9 */ + 3898 "11101110" // /* MW 8 */ + 3899 "11000000" // /* MW 7 */ + 3900 "10110111" // /* MW 6 */ + 3901 "10010101" // /* MW 5 */ + 3902 "11101110" // /* MW 4 */ + 3903 "00000111" // /* MW 3 */ + 3904 "00000011" // /* MW 2 */ + 3905 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 621 156 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 +.src_ref 2 "conv2d_bf16_params.h" 649 41 + 3906 "11100100" // SEL.EQZ r20, r3, r20, r27; MOV eh0, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3907 "00111001" // /* MW 5 */ + 3908 "10110111" // /* MW 4 */ + 3909 "01000000" // /* MW 3 */ + 3910 "00101000" // /* MW 2 */ + 3911 "00011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 645 41 + 3912 "01000100" // MOVXM r31, #1542 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3913 "00001100" // /* MW 5 */ + 3914 "10101100" // /* MW 4 */ + 3915 "00001111" // /* MW 3 */ + 3916 "00000000" // /* MW 2 */ + 3917 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 554 60 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 first + 3918 "00111010" // ST r4, [sp, #-24]; EQ r27, r15, r0; ADD.NC r4, r4, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3919 "11001001" // /* MW 9 */ + 3920 "00111111" // /* MW 8 */ + 3921 "10001001" // /* MW 7 */ + 3922 "00111100" // /* MW 6 */ + 3923 "10110000" // /* MW 5 */ + 3924 "00011111" // /* MW 4 */ + 3925 "10110000" // /* MW 3 */ + 3926 "00010010" // /* MW 2 */ + 3927 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 554 53 +.src_ref 2 "conv2d_bf16_params.h" 555 53 +.src_ref 2 "conv2d_bf16_params.h" 555 59 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 + 3928 "01110110" // MOVA m3, #-148; ST r4, [p2], #4; SEL.EQZ r31, r22, r31, r27; ADD.NC r22, r17, #-1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3929 "11001000" // /* MW 11 */ + 3930 "01111111" // /* MW 10 */ + 3931 "11001100" // /* MW 9 */ + 3932 "10010010" // /* MW 8 */ + 3933 "11111111" // /* MW 7 */ + 3934 "10101101" // /* MW 6 */ + 3935 "10010001" // /* MW 5 */ + 3936 "00011100" // /* MW 4 */ + 3937 "10000010" // /* MW 3 */ + 3938 "10001100" // /* MW 2 */ + 3939 "11101101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 555 53 +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 621 240 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 + 3940 "00111010" // ST r22, [p2], m3; LSHL r21, r21, r15; MOV r27, eh0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3941 "01111001" // /* MW 9 */ + 3942 "10001110" // /* MW 8 */ + 3943 "01110000" // /* MW 7 */ + 3944 "11101111" // /* MW 6 */ + 3945 "01010111" // /* MW 5 */ + 3946 "00101011" // /* MW 4 */ + 3947 "00110000" // /* MW 3 */ + 3948 "01011010" // /* MW 2 */ + 3949 "01001101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 558 53 first +.src_ref 2 "conv2d_bf16_params.h" 559 53 +.src_ref 2 "conv2d_bf16_params.h" 621 140 +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 645 41 + 3950 "01110110" // MOVA r25, #22; ST r26, [p2], #4; SUB r20, r20, r28; MOV m4, #88 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3951 "01011000" // /* MW 11 */ + 3952 "01011000" // /* MW 10 */ + 3953 "00000000" // /* MW 9 */ + 3954 "00001110" // /* MW 8 */ + 3955 "01001110" // /* MW 7 */ + 3956 "10101001" // /* MW 6 */ + 3957 "01010001" // /* MW 5 */ + 3958 "00011111" // /* MW 4 */ + 3959 "00000010" // /* MW 3 */ + 3960 "11011001" // /* MW 2 */ + 3961 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 559 53 first +.src_ref 2 "conv2d_bf16_params.h" 621 156 first +.src_ref 2 "conv2d_bf16_params.h" 645 41 first +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id first + 3962 "01011100" // ST r24, [p2], m4; SEL.EQZ r24, r31, r25, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 3963 "00100100" // /* MW 5 */ + 3964 "11100011" // /* MW 4 */ + 3965 "00111111" // /* MW 3 */ + 3966 "01100010" // /* MW 2 */ + 3967 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 610 47 first +.src_ref 2 "conv2d_bf16_params.h" 621 222 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 +.aggressive_scheduled_block_id 8 +.noswbrkpt + 3968 "01110110" // LDA r27, [sp, #-32]; ST r28, [p2], #-8; SUB r28, r21, r28; MOV r27, r6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 3969 "01111000" // /* MW 11 */ + 3970 "10010000" // /* MW 10 */ + 3971 "01101001" // /* MW 9 */ + 3972 "00001111" // /* MW 8 */ + 3973 "11001110" // /* MW 7 */ + 3974 "10101011" // /* MW 6 */ + 3975 "10010001" // /* MW 5 */ + 3976 "11101111" // /* MW 4 */ + 3977 "00100010" // /* MW 3 */ + 3978 "01101110" // /* MW 2 */ + 3979 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 649 41 +.src_ref 2 "conv2d_bf16_params.h" 655 23 first +.src_ref 2 "conv2d_bf16_params.h" 661 61 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3980 "10111010" // MOVA r19, #279; SEL.EQZ r28, r20, r28, r27; ADD.NC r20, r19, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3981 "11001000" // /* MW 9 */ + 3982 "11111111" // /* MW 8 */ + 3983 "10001100" // /* MW 7 */ + 3984 "00010010" // /* MW 6 */ + 3985 "11001110" // /* MW 5 */ + 3986 "00101001" // /* MW 4 */ + 3987 "00000000" // /* MW 3 */ + 3988 "11110011" // /* MW 2 */ + 3989 "00100010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 127 19 first +.src_ref 2 "conv2d_bf16_params.h" 621 156 +.src_ref 2 "conv2d_bf16_params.h" 649 41 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 +.src_ref 2 "conv2d_bf16_params.h" 710 60 +.src_ref 2 "conv2d_bf16_params.h" 710 65 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 3990 "10111010" // MOVA r29, #-72; MSC r30, r30, r29, r20; MOV r27, eh0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 3991 "01111000" // /* MW 9 */ + 3992 "10001110" // /* MW 8 */ + 3993 "01110000" // /* MW 7 */ + 3994 "01110011" // /* MW 6 */ + 3995 "11101010" // /* MW 5 */ + 3996 "00111011" // /* MW 4 */ + 3997 "00000000" // /* MW 3 */ + 3998 "00011101" // /* MW 2 */ + 3999 "11110111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first +.src_ref 2 "conv2d_bf16_params.h" 679 23 first +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 4000 "00101100" // LDA r27, [sp, #-28]; SEL.EQZ r18, r29, r18, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4001 "01000100" // /* MW 5 */ + 4002 "11001010" // /* MW 4 */ + 4003 "00101110" // /* MW 3 */ + 4004 "11101110" // /* MW 2 */ + 4005 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 621 156 first +.src_ref 2 "conv2d_bf16_params.h" 649 41 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.src_ref 2 "conv2d_bf16_params.h" 700 34 +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 4006 "10111010" // MOVA r31, #32; SEL.EQZ r19, r31, r19, r27; MOV r27, r6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4007 "01111000" // /* MW 9 */ + 4008 "10010000" // /* MW 8 */ + 4009 "01101001" // /* MW 7 */ + 4010 "10010011" // /* MW 6 */ + 4011 "00111001" // /* MW 5 */ + 4012 "00111111" // /* MW 4 */ + 4013 "00000000" // /* MW 3 */ + 4014 "00011111" // /* MW 2 */ + 4015 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first +.src_ref 2 "conv2d_bf16_params.h" 700 34 first +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 4016 "00011000" // SEL.EQZ r2, r31, r2, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4017 "00100010" // /* MW 3 */ + 4018 "11000100" // /* MW 2 */ + 4019 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 629 82 first +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 4020 "10011000" // SUB r21, r3, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4021 "01010001" // /* MW 3 */ + 4022 "11101011" // /* MW 2 */ + 4023 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 611 47 first +.aggressive_scheduled_block_id 8 +.nohwbrkpt +.noswbrkpt + 4024 "00111010" // ST r3, [p2], #12; SEL.EQZ r2, r2, r15, r27; MOV r3, #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4025 "01011001" // /* MW 9 */ + 4026 "11000000" // /* MW 8 */ + 4027 "01101111" // /* MW 7 */ + 4028 "10010000" // /* MW 6 */ + 4029 "00100111" // /* MW 5 */ + 4030 "00000100" // /* MW 4 */ + 4031 "00110000" // /* MW 3 */ + 4032 "10001110" // /* MW 2 */ + 4033 "01000111" // /* MW 1 */ +.aggressive_scheduled_block_id 8 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4034 "00011000" // SEL.EQZ r28, r28, r3, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4035 "00110010" // /* MW 3 */ + 4036 "00111000" // /* MW 2 */ + 4037 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 643 22 first +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id first + 4038 "10011000" // MUL r31, r23, r7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4039 "01111111" // /* MW 3 */ + 4040 "11111110" // /* MW 2 */ + 4041 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.aggressive_scheduled_block_id 9 +.noswbrkpt + 4042 "00101100" // LDA r17, [sp, #-36]; SEL.EQZ r3, r28, r3, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4043 "01100100" // /* MW 5 */ + 4044 "00001100" // /* MW 4 */ + 4045 "00101110" // /* MW 3 */ + 4046 "11000110" // /* MW 2 */ + 4047 "11111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 621 47 first +.src_ref 2 "conv2d_bf16_params.h" 629 45 +.src_ref 2 "conv2d_bf16_params.h" 684 30 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 4048 "00111010" // ST r3, [p2], #-8; MUL r18, r26, r18; MOV m1, #40 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4049 "01011001" // /* MW 9 */ + 4050 "00101000" // /* MW 8 */ + 4051 "10000000" // /* MW 7 */ + 4052 "01111100" // /* MW 6 */ + 4053 "00101001" // /* MW 5 */ + 4054 "00110101" // /* MW 4 */ + 4055 "00110000" // /* MW 3 */ + 4056 "10001110" // /* MW 2 */ + 4057 "01011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 629 45 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 4058 "01011100" // ST r21, [p2], m1; SEL.EQZ r3, r2, r15, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4059 "11100100" // /* MW 5 */ + 4060 "00001101" // /* MW 4 */ + 4061 "00110001" // /* MW 3 */ + 4062 "01010110" // /* MW 2 */ + 4063 "01000101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 644 22 +.src_ref 2 "conv2d_bf16_params.h" 700 17 first +.src_ref 2 "conv2d_bf16_params.h" 705 50 +.src_ref 2 "conv2d_bf16_params.h" 705 61 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 4064 "10111010" // LDA r0, [sp, #-16]; MUL r3, r3, r17; ADD.NC r21, r7, r30 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4065 "10101000" // /* MW 9 */ + 4066 "11111100" // /* MW 8 */ + 4067 "10101001" // /* MW 7 */ + 4068 "11111110" // /* MW 6 */ + 4069 "00111000" // /* MW 5 */ + 4070 "00000110" // /* MW 4 */ + 4071 "00100000" // /* MW 3 */ + 4072 "00000010" // /* MW 2 */ + 4073 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 645 38 first +.src_ref 2 "conv2d_bf16_params.h" 700 111 +.src_ref 2 "conv2d_bf16_params.h" 700 149 +.src_ref 2 "conv2d_bf16_params.h" 705 45 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 4074 "01111010" // LDA r17, [sp, #-20]; ST r24, [p2], #4; MAC r3, r3, r20, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4075 "00000110" // /* MW 9 */ + 4076 "00000110" // /* MW 8 */ + 4077 "00000101" // /* MW 7 */ + 4078 "10000000" // /* MW 6 */ + 4079 "00010001" // /* MW 5 */ + 4080 "00011111" // /* MW 4 */ + 4081 "00100010" // /* MW 3 */ + 4082 "11000110" // /* MW 2 */ + 4083 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 644 14 +.src_ref 2 "conv2d_bf16_params.h" 649 38 first +.src_ref 2 "conv2d_bf16_params.h" 674 24 +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 4084 "00111010" // ST r19, [p2], #28; MOVXM r19, #65520 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4085 "00010001" // /* MW 9 */ + 4086 "11111000" // /* MW 8 */ + 4087 "01101111" // /* MW 7 */ + 4088 "00111110" // /* MW 6 */ + 4089 "00000000" // /* MW 5 */ + 4090 "00000000" // /* MW 4 */ + 4091 "00110000" // /* MW 3 */ + 4092 "11001110" // /* MW 2 */ + 4093 "01001111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 644 14 first +.src_ref 2 "conv2d_bf16_params.h" 662 61 +.src_ref 2 "conv2d_bf16_params.h" 664 38 first +.aggressive_scheduled_block_id 9 +.nohwbrkpt +.noswbrkpt + 4094 "00111010" // ST r20, [p2], #4; AND r20, r31, r19; ADD.NC r2, r14, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4095 "11001001" // /* MW 9 */ + 4096 "10111111" // /* MW 8 */ + 4097 "01001011" // /* MW 7 */ + 4098 "10100100" // /* MW 6 */ + 4099 "01001001" // /* MW 5 */ + 4100 "00111111" // /* MW 4 */ + 4101 "00110000" // /* MW 3 */ + 4102 "11010010" // /* MW 2 */ + 4103 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 19 first +.src_ref 2 "conv2d_bf16_params.h" 663 22 first +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.aggressive_scheduled_block_id 9 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4104 "01011100" // ST r17, [p2], #4; MSC r21, r21, r2, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4105 "10011100" // /* MW 5 */ + 4106 "01010110" // /* MW 4 */ + 4107 "00110001" // /* MW 3 */ + 4108 "11000110" // /* MW 2 */ + 4109 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 126 21 first +.src_ref 2 "conv2d_bf16_params.h" 664 38 first + 4110 "01011100" // ST r2, [p2], #4; ADD r30, r30, r20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4111 "10000001" // /* MW 5 */ + 4112 "01111010" // /* MW 4 */ + 4113 "00111111" // /* MW 3 */ + 4114 "10001010" // /* MW 2 */ + 4115 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 126 21 +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id first + 4116 "01011100" // ST r30, [p2], #4; SUB r28, r16, r31 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4117 "11100011" // /* MW 5 */ + 4118 "01110011" // /* MW 4 */ + 4119 "00111000" // /* MW 3 */ + 4120 "11111010" // /* MW 2 */ + 4121 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 127 19 first +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.src_ref 2 "conv2d_bf16_params.h" 675 38 +.src_ref 2 "conv2d_bf16_params.h" 696 37 +.aggressive_scheduled_block_id 10 +.noswbrkpt + 4122 "00111010" // ST r21, [p2], #4; MAC r31, r31, r22, r16; MOV dc0, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4123 "01011001" // /* MW 9 */ + 4124 "00000000" // /* MW 8 */ + 4125 "01100000" // /* MW 7 */ + 4126 "00110000" // /* MW 6 */ + 4127 "11111000" // /* MW 5 */ + 4128 "00101101" // /* MW 4 */ + 4129 "00110000" // /* MW 3 */ + 4130 "11010110" // /* MW 2 */ + 4131 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 644 22 first +.src_ref 2 "conv2d_bf16_params.h" 664 38 first +.src_ref 2 "conv2d_bf16_params.h" 705 45 +.aggressive_scheduled_block_id 10 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4132 "00111010" // ST dc0, [p2], #4; MUL r2, r31, r0; ADD.NC r17, r17, #-1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4133 "11001001" // /* MW 9 */ + 4134 "01111111" // /* MW 8 */ + 4135 "00101100" // /* MW 7 */ + 4136 "01111110" // /* MW 6 */ + 4137 "00100000" // /* MW 5 */ + 4138 "00111110" // /* MW 4 */ + 4139 "00110000" // /* MW 3 */ + 4140 "10001100" // /* MW 2 */ + 4141 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 664 38 +.src_ref 2 "conv2d_bf16_params.h" 705 50 first +.src_ref 2 "conv2d_bf16_params.h" 705 61 first + 4142 "01011100" // ST dc0, [p2], #4; MAC r14, r14, r17, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4143 "00001100" // /* MW 5 */ + 4144 "10111000" // /* MW 4 */ + 4145 "00111000" // /* MW 3 */ + 4146 "10001100" // /* MW 2 */ + 4147 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 +.src_ref 2 "conv2d_bf16_params.h" 674 24 first +.src_ref 2 "conv2d_bf16_params.h" 675 38 first +.src_ref 2 "conv2d_bf16_params.h" 682 38 +.src_ref 2 "conv2d_bf16_params.h" 718 48 +.src_ref 2 "conv2d_bf16_params.h" 720 50 + 4148 "00111010" // ST r22, [p2], #4; AND r16, r19, r2; MOV r2, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4149 "01011001" // /* MW 9 */ + 4150 "00000000" // /* MW 8 */ + 4151 "01001000" // /* MW 7 */ + 4152 "00100100" // /* MW 6 */ + 4153 "00000001" // /* MW 5 */ + 4154 "00100111" // /* MW 4 */ + 4155 "00110000" // /* MW 3 */ + 4156 "11011010" // /* MW 2 */ + 4157 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 675 38 + 4158 "00111010" // ST r28, [p2], #4; SUB r17, r2, r31; MOV r27, el0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4159 "01111001" // /* MW 9 */ + 4160 "00001110" // /* MW 8 */ + 4161 "01110000" // /* MW 7 */ + 4162 "10001111" // /* MW 6 */ + 4163 "00011111" // /* MW 5 */ + 4164 "00000101" // /* MW 4 */ + 4165 "00110000" // /* MW 3 */ + 4166 "11110010" // /* MW 2 */ + 4167 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 675 38 first +.src_ref 2 "conv2d_bf16_params.h" 707 61 first + 4168 "01011100" // ST r4, [p2], #4; MUL r14, r23, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4169 "11011111" // /* MW 5 */ + 4170 "10111001" // /* MW 4 */ + 4171 "00111011" // /* MW 3 */ + 4172 "10010010" // /* MW 2 */ + 4173 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 25 +.src_ref 2 "conv2d_bf16_params.h" 674 22 first +.src_ref 2 "conv2d_bf16_params.h" 675 38 + 4174 "00111010" // ST r17, [p2], #4; SUB r16, r16, r31; MOV r0, #6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4175 "01011001" // /* MW 9 */ + 4176 "00000110" // /* MW 8 */ + 4177 "00001000" // /* MW 7 */ + 4178 "10001100" // /* MW 6 */ + 4179 "00001111" // /* MW 5 */ + 4180 "00100001" // /* MW 4 */ + 4181 "00110000" // /* MW 3 */ + 4182 "11000110" // /* MW 2 */ + 4183 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 642 25 first +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 655 23 +.src_ref 2 "conv2d_bf16_params.h" 675 38 first +.src_ref 2 "conv2d_bf16_params.h" 679 23 +.src_ref 2 "conv2d_bf16_params.h" 679 23 +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id first + 4184 "01110110" // MOVA r0, #72; ST r16, [p2], #4; SEL.EQZ r16, r13, r0, r27; MOV r27, r6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4185 "01111000" // /* MW 11 */ + 4186 "10010000" // /* MW 10 */ + 4187 "01101001" // /* MW 9 */ + 4188 "00010011" // /* MW 8 */ + 4189 "00000000" // /* MW 7 */ + 4190 "10011011" // /* MW 6 */ + 4191 "00010001" // /* MW 5 */ + 4192 "00011110" // /* MW 4 */ + 4193 "00000010" // /* MW 3 */ + 4194 "00000000" // /* MW 2 */ + 4195 "00001001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 655 23 first +.src_ref 2 "conv2d_bf16_params.h" 679 23 first +.src_ref 2 "conv2d_bf16_params.h" 713 12 +.aggressive_scheduled_block_id 11 +.noswbrkpt + 4196 "00101100" // LDA r5, [sp, #-24]; SEL.EQZ r5, r0, r5, r27 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4197 "10100100" // /* MW 5 */ + 4198 "00010100" // /* MW 4 */ + 4199 "00100000" // /* MW 3 */ + 4200 "00010110" // /* MW 2 */ + 4201 "11111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 691 56 first +.aggressive_scheduled_block_id 11 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4202 "10011000" // MUL r17, r5, r26 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4203 "10101111" // /* MW 3 */ + 4204 "01100011" // /* MW 2 */ + 4205 "00010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 675 38 +.src_ref 2 "conv2d_bf16_params.h" 675 38 first +.src_ref 2 "conv2d_bf16_params.h" 709 71 first + 4206 "00111010" // ST dc0, [p2], #4; LSHL r16, r3, r16; MOV m2, #-56 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4207 "01011001" // /* MW 9 */ + 4208 "11001000" // /* MW 8 */ + 4209 "00000111" // /* MW 7 */ + 4210 "01101101" // /* MW 6 */ + 4211 "00001000" // /* MW 5 */ + 4212 "00000111" // /* MW 4 */ + 4213 "00110000" // /* MW 3 */ + 4214 "10001100" // /* MW 2 */ + 4215 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 675 38 +.src_ref 2 "conv2d_bf16_params.h" 706 23 first +.src_ref 2 "conv2d_bf16_params.h" 706 28 +.src_ref 2 "conv2d_bf16_params.h" 709 76 + 4216 "01110110" // MOVA r3, #-29; ST dc0, [p2], m2; LSHL r15, r16, r15; ADD.NC r13, r3, #7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4217 "11001000" // /* MW 11 */ + 4218 "11000001" // /* MW 10 */ + 4219 "10101000" // /* MW 9 */ + 4220 "11101101" // /* MW 8 */ + 4221 "11110111" // /* MW 7 */ + 4222 "10100000" // /* MW 6 */ + 4223 "01100001" // /* MW 5 */ + 4224 "01001000" // /* MW 4 */ + 4225 "00000010" // /* MW 3 */ + 4226 "01100011" // /* MW 2 */ + 4227 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 682 38 first +.src_ref 2 "conv2d_bf16_params.h" 706 28 + 4228 "01011100" // ST r2, [p2], m0; LSHL r16, r13, r3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4229 "01111011" // /* MW 5 */ + 4230 "11000000" // /* MW 4 */ + 4231 "00110110" // /* MW 3 */ + 4232 "00001010" // /* MW 2 */ + 4233 "01000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 126 21 first +.src_ref 2 "conv2d_bf16_params.h" 696 37 first + 4234 "01011100" // ST r22, [p2], #4; ADD r3, r17, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4235 "01000001" // /* MW 5 */ + 4236 "10001110" // /* MW 4 */ + 4237 "00111000" // /* MW 3 */ + 4238 "11011010" // /* MW 2 */ + 4239 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 127 10 first +.src_ref 2 "conv2d_bf16_params.h" 127 19 first +.src_ref 2 "conv2d_bf16_params.h" 696 37 + 4240 "01011100" // ST r18, [p2], #4; MSC r18, r18, r17, r4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4241 "10011100" // /* MW 5 */ + 4242 "11001000" // /* MW 4 */ + 4243 "00111000" // /* MW 3 */ + 4244 "11001010" // /* MW 2 */ + 4245 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 first +.src_ref 2 "conv2d_bf16_params.h" 713 12 first + 4246 "01011100" // ST r4, [p2], #4; LSHL r5, r5, r6 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4247 "11011011" // /* MW 5 */ + 4248 "10010100" // /* MW 4 */ + 4249 "00110010" // /* MW 3 */ + 4250 "10010010" // /* MW 2 */ + 4251 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 +.src_ref 2 "conv2d_bf16_params.h" 706 28 +.src_ref 2 "conv2d_bf16_params.h" 706 28 first + 4252 "00111010" // ST r3, [p2], #4; ADD r3, r13, r16; MOV r0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4253 "01011001" // /* MW 9 */ + 4254 "11111101" // /* MW 8 */ + 4255 "00001111" // /* MW 7 */ + 4256 "00000100" // /* MW 6 */ + 4257 "00111000" // /* MW 5 */ + 4258 "00011010" // /* MW 4 */ + 4259 "00110000" // /* MW 3 */ + 4260 "10001110" // /* MW 2 */ + 4261 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 706 28 + 4262 "10011000" // ASHL r0, r3, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4263 "00001110" // /* MW 3 */ + 4264 "11000000" // /* MW 2 */ + 4265 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 first +.src_ref 2 "conv2d_bf16_params.h" 707 66 first + 4266 "01011100" // ST r18, [p2], #4; MUL r4, r14, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4267 "00011111" // /* MW 5 */ + 4268 "00010000" // /* MW 4 */ + 4269 "00110111" // /* MW 3 */ + 4270 "11001010" // /* MW 2 */ + 4271 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 +.src_ref 2 "conv2d_bf16_params.h" 709 96 first + 4272 "01011100" // ST dc0, [p2], #4; LSHL r3, r0, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4273 "00111011" // /* MW 5 */ + 4274 "00001100" // /* MW 4 */ + 4275 "00110000" // /* MW 3 */ + 4276 "10001100" // /* MW 2 */ + 4277 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 696 37 first +.src_ref 2 "conv2d_bf16_params.h" 709 90 + 4278 "11111010" // LDA r13, [sp, #-4]; ST dc0, [p2], #4; SUB r3, r15, r3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4279 "00110001" // /* MW 9 */ + 4280 "11000110" // /* MW 8 */ + 4281 "00000011" // /* MW 7 */ + 4282 "10000000" // /* MW 6 */ + 4283 "01100001" // /* MW 5 */ + 4284 "00011100" // /* MW 4 */ + 4285 "00100010" // /* MW 3 */ + 4286 "10110110" // /* MW 2 */ + 4287 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 707 50 first +.src_ref 2 "conv2d_bf16_params.h" 708 59 +.src_ref 2 "conv2d_bf16_params.h" 710 60 first +.src_ref 2 "conv2d_bf16_params.h" 710 65 first + 4288 "01110110" // LDA r14, [sp, #-8]; ST r4, [p2], #4; MAC r7, r7, r29, r0; ADD.NC r1, r0, #-1 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4289 "11001000" // /* MW 11 */ + 4290 "00111111" // /* MW 10 */ + 4291 "00101000" // /* MW 9 */ + 4292 "00110000" // /* MW 8 */ + 4293 "01110000" // /* MW 7 */ + 4294 "10111010" // /* MW 6 */ + 4295 "10010001" // /* MW 5 */ + 4296 "00011100" // /* MW 4 */ + 4297 "00100010" // /* MW 3 */ + 4298 "00111010" // /* MW 2 */ + 4299 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 708 48 first +.src_ref 2 "conv2d_bf16_params.h" 713 12 first + 4300 "11111010" // LDA r15, [sp, #-12]; ST r1, [p2], #4; MUL r0, r5, r26 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4301 "10101111" // /* MW 9 */ + 4302 "01000001" // /* MW 8 */ + 4303 "00000001" // /* MW 7 */ + 4304 "10000000" // /* MW 6 */ + 4305 "00110001" // /* MW 5 */ + 4306 "00011100" // /* MW 4 */ + 4307 "00100010" // /* MW 3 */ + 4308 "10111110" // /* MW 2 */ + 4309 "11111110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 709 50 first +.src_ref 2 "conv2d_bf16_params.h" 800 first + 4310 "01011100" // ST r3, [p2], #4; RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 4311 "00000000" // /* MW 5 */ + 4312 "01010000" // /* MW 4 */ + 4313 "00110000" // /* MW 3 */ + 4314 "10001110" // /* MW 2 */ + 4315 "01000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 710 50 first +.delay_slot + 4316 "10011000" // ST r7, [p2], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4317 "11110001" // /* MW 3 */ + 4318 "01011100" // /* MW 2 */ + 4319 "00001010" // /* MW 1 */ +.delay_slot + 4320 "10011000" // ST r0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4321 "00010001" // /* MW 3 */ + 4322 "00011100" // /* MW 2 */ + 4323 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 718 48 first +.delay_slot + 4324 "10011000" // ST r2, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4325 "01010001" // /* MW 3 */ + 4326 "00011100" // /* MW 2 */ + 4327 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 718 48 +.delay_slot + 4328 "10011000" // ST r2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4329 "01010001" // /* MW 3 */ + 4330 "00000100" // /* MW 2 */ + 4331 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 720 50 first +.src_ref 2 "conv2d_bf16_params.h" 800 first +.delay_slot + 4332 "00111010" // ST r2, [p2, #4]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4333 "01110001" // /* MW 9 */ + 4334 "00000000" // /* MW 8 */ + 4335 "00000000" // /* MW 7 */ + 4336 "00000000" // /* MW 6 */ + 4337 "11111110" // /* MW 5 */ + 4338 "00111111" // /* MW 4 */ + 4339 "00110000" // /* MW 3 */ + 4340 "10001010" // /* MW 2 */ +.label _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh__end +.label __Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh___func_end0 + 4341 "01000010" // /* MW 1 */ +.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_begin0 +.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams +.function convert_bf16_to_bfp16 _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams +.src_ref 3 "utils.h" 526 11 +.src_ref 3 "utils.h" 531 4 +.src_ref 2 "conv2d_bf16.h" 689 first +.src_ref 2 "conv2d_bf16.h" 698 28 +.src_ref 2 "conv2d_bf16.h" 704 12 +.src_ref 2 "conv2d_bf16.h" 707 12 +.src_ref 2 "conv2d_bf16.h" 707 30 +.function_start + 4352 "01110110" // MOVA dc0, #0; MOVS p2, p1; MOVX r24, #0; MOV r0, p2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4353 "01111000" // /* MW 11 */ + 4354 "01100000" // /* MW 10 */ + 4355 "00001010" // /* MW 9 */ + 4356 "00001000" // /* MW 8 */ + 4357 "10000000" // /* MW 7 */ + 4358 "00000001" // /* MW 6 */ + 4359 "10001011" // /* MW 5 */ + 4360 "10000100" // /* MW 4 */ + 4361 "10000010" // /* MW 3 */ + 4362 "00000011" // /* MW 2 */ + 4363 "00000000" // /* MW 1 */ +.src_ref 3 "utils.h" 526 11 +.src_ref 3 "utils.h" 526 11 +.src_ref 2 "conv2d_bf16.h" 698 28 first +.src_ref 2 "conv2d_bf16.h" 704 12 first +.src_ref 2 "conv2d_bf16.h" 707 12 +.src_ref 2 "conv2d_bf16.h" 707 30 + 4364 "01111110" // MOVA dj1, #0; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc1, dc0; MOVX r26, #0; ADD.NC p3, r0, #4 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 4365 "01100000" // /* MW 13 */ + 4366 "00001001" // /* MW 12 */ + 4367 "00100000" // /* MW 11 */ + 4368 "00100001" // /* MW 10 */ + 4369 "00000000" // /* MW 9 */ + 4370 "00110110" // /* MW 8 */ + 4371 "00000001" // /* MW 7 */ + 4372 "00110100" // /* MW 6 */ + 4373 "00101000" // /* MW 5 */ + 4374 "00101000" // /* MW 4 */ + 4375 "10001000" // /* MW 3 */ + 4376 "00000110" // /* MW 2 */ + 4377 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 698 28 +.src_ref 2 "conv2d_bf16.h" 702 37 + 4378 "10111010" // LDA dn1, [p3], #4; MOVXM p4, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4379 "00010000" // /* MW 9 */ + 4380 "00110010" // /* MW 8 */ + 4381 "00110010" // /* MW 7 */ + 4382 "11110010" // /* MW 6 */ + 4383 "00000001" // /* MW 5 */ + 4384 "00000000" // /* MW 4 */ + 4385 "11010000" // /* MW 3 */ + 4386 "10010100" // /* MW 2 */ + 4387 "01100011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 698 43 +.src_ref 2 "conv2d_bf16.h" 702 4 first + 4388 "10111010" // LDA m1, [p3], #4; MOVXM ls, #4496 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4389 "00010000" // /* MW 9 */ + 4390 "11001000" // /* MW 8 */ + 4391 "01111000" // /* MW 7 */ + 4392 "00000100" // /* MW 6 */ + 4393 "00000000" // /* MW 5 */ + 4394 "00000000" // /* MW 4 */ + 4395 "11010000" // /* MW 3 */ + 4396 "10010000" // /* MW 2 */ + 4397 "01100011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 699 43 first +.src_ref 2 "conv2d_bf16.h" 702 4 + 4398 "10111010" // LDA m0, [p3]; MOVXM le, #4544 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4399 "00010000" // /* MW 9 */ + 4400 "11100000" // /* MW 8 */ + 4401 "10111000" // /* MW 7 */ + 4402 "00000101" // /* MW 6 */ + 4403 "00000000" // /* MW 5 */ + 4404 "00000000" // /* MW 4 */ + 4405 "11010000" // /* MW 3 */ + 4406 "10000000" // /* MW 2 */ + 4407 "01100000" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 +.src_ref 2 "conv2d_bf16.h" 702 37 first + 4408 "01010100" // LDA r0, [p3, #-12]; MOV dj0, #0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4409 "00000001" // /* MW 5 */ + 4410 "00000000" // /* MW 4 */ + 4411 "11010001" // /* MW 3 */ + 4412 "10000010" // /* MW 2 */ + 4413 "01111010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 702 37 + 4414 "10011000" // LDA.s8 r1, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4415 "00100010" // /* MW 3 */ + 4416 "00000100" // /* MW 2 */ + 4417 "00000100" // /* MW 1 */ + 4418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4419 "00000000" // /* MW 1 */ + 4420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4421 "00000000" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 +.src_ref 2 "conv2d_bf16.h" 705 66 first + 4422 "11110100" // VLDB.POP.512 x1, [p0, lf0, r24]; MOV dn0, dn1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4423 "00000001" // /* MW 5 */ + 4424 "10000101" // /* MW 4 */ + 4425 "10000000" // /* MW 3 */ + 4426 "00001010" // /* MW 2 */ + 4427 "00000000" // /* MW 1 */ +.src_ref 3 "utils.h" 526 11 first + 4428 "00011000" // VLDB.POP.512.2D x0, [p0, lf0, r24, d1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4429 "00010100" // /* MW 3 */ + 4430 "00110000" // /* MW 2 */ + 4431 "00111110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 704 12 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4432 "00011000" // VLDB.FILL.512 [p0, lf0, r24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4433 "00010100" // /* MW 3 */ + 4434 "00010100" // /* MW 2 */ + 4435 "00111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 702 4 first +.src_ref 2 "conv2d_bf16.h" 705 66 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4436 "00110100" // VLDB.POP.512 x1, [p0, lf0, r24]; ADD.NC lc, r0, #-3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4437 "11111101" // /* MW 5 */ + 4438 "11100000" // /* MW 4 */ + 4439 "10001010" // /* MW 3 */ + 4440 "00001010" // /* MW 2 */ + 4441 "00000000" // /* MW 1 */ +.src_ref 3 "utils.h" 526 11 first +.src_ref 2 "conv2d_bf16.h" 707 12 +.src_ref 2 "conv2d_bf16.h" 707 30 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4442 "00011100" // VLDB.POP.512.2D x0, [p0, lf0, r24, d1]; MOVX crRnd, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4443 "00000000" // /* MW 5 */ + 4444 "11110101" // /* MW 4 */ + 4445 "10000000" // /* MW 3 */ + 4446 "00000010" // /* MW 2 */ + 4447 "11000110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 704 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4448 "00011000" // VLDB.FILL.512 [p0, lf0, r24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4449 "00010100" // /* MW 3 */ + 4450 "00010100" // /* MW 2 */ + 4451 "00111100" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4453 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 705 66 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4454 "10111010" // NOPA; VLDB.POP.512 x1, [p0, lf0, r24]; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4455 "01111110" // /* MW 9 */ + 4456 "10100101" // /* MW 8 */ + 4457 "00000001" // /* MW 7 */ + 4458 "00000000" // /* MW 6 */ + 4459 "01010100" // /* MW 5 */ + 4460 "00000000" // /* MW 4 */ + 4461 "11110000" // /* MW 3 */ + 4462 "00101100" // /* MW 2 */ + 4463 "00000000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 first +.src_ref 3 "utils.h" 526 11 first +.src_ref 2 "conv2d_bf16.h" 705 30 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4464 "11100001" // NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];NOPS; NOPX; VCONV.fp32.bf16 cml0, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4465 "00000000" // /* MW 15 */ + 4466 "00000000" // /* MW 14 */ + 4467 "01111000" // /* MW 13 */ + 4468 "11000101" // /* MW 12 */ + 4469 "00000001" // /* MW 11 */ + 4470 "00000000" // /* MW 10 */ + 4471 "00000000" // /* MW 9 */ + 4472 "00000000" // /* MW 8 */ + 4473 "01011011" // /* MW 7 */ + 4474 "00000001" // /* MW 6 */ + 4475 "00101000" // /* MW 5 */ + 4476 "01100000" // /* MW 4 */ + 4477 "11111100" // /* MW 3 */ + 4478 "00101100" // /* MW 2 */ + 4479 "00000000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 +.src_ref 2 "conv2d_bf16.h" 706 18 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4480 "11100001" // NOPA; NOPB; NOPS; NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4481 "00000000" // /* MW 15 */ + 4482 "00000000" // /* MW 14 */ + 4483 "01111000" // /* MW 13 */ + 4484 "11000101" // /* MW 12 */ + 4485 "01000000" // /* MW 11 */ + 4486 "00000000" // /* MW 10 */ + 4487 "00000000" // /* MW 9 */ + 4488 "00000000" // /* MW 8 */ + 4489 "01011011" // /* MW 7 */ + 4490 "00000001" // /* MW 6 */ + 4491 "00100000" // /* MW 5 */ + 4492 "00000000" // /* MW 4 */ + 4493 "11110000" // /* MW 3 */ + 4494 "00101100" // /* MW 2 */ + 4495 "00000000" // /* MW 1 */ +.label ZLS_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_144 +.src_ref 2 "conv2d_bf16.h" 704 12 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 4496 "11100001" // NOPA; VLDB.FILL.512 [p0, lf0, r24]; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4497 "00000000" // /* MW 15 */ + 4498 "00000000" // /* MW 14 */ + 4499 "01111000" // /* MW 13 */ + 4500 "10100101" // /* MW 12 */ + 4501 "00000001" // /* MW 11 */ + 4502 "00000000" // /* MW 10 */ + 4503 "00000000" // /* MW 9 */ + 4504 "00000000" // /* MW 8 */ + 4505 "01011011" // /* MW 7 */ + 4506 "00000001" // /* MW 6 */ + 4507 "00101000" // /* MW 5 */ + 4508 "00101000" // /* MW 4 */ + 4509 "11111000" // /* MW 3 */ + 4510 "00101100" // /* MW 2 */ + 4511 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 705 66 first +.src_ref 2 "conv2d_bf16.h" 707 12 first +.src_ref 2 "conv2d_bf16.h" 707 30 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4512 "11100001" // NOPA; VLDB.POP.512 x1, [p0, lf0, r24];VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4513 "00000000" // /* MW 15 */ + 4514 "00000000" // /* MW 14 */ + 4515 "01111000" // /* MW 13 */ + 4516 "10100101" // /* MW 12 */ + 4517 "00000001" // /* MW 11 */ + 4518 "00000000" // /* MW 10 */ + 4519 "00000000" // /* MW 9 */ + 4520 "00000000" // /* MW 8 */ + 4521 "00000011" // /* MW 7 */ + 4522 "10000000" // /* MW 6 */ + 4523 "10101101" // /* MW 5 */ + 4524 "00000000" // /* MW 4 */ + 4525 "11110000" // /* MW 3 */ + 4526 "00101100" // /* MW 2 */ + 4527 "00000000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 first +.src_ref 3 "utils.h" 526 11 first +.src_ref 2 "conv2d_bf16.h" 705 30 +.src_ref 2 "conv2d_bf16.h" 708 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4528 "11100001" // NOPA; VLDB.POP.512.2D x0, [p0, lf0, r24, d1];VST.FLUSH.512.CONV [p2, sf, r26];NOPX; VCONV.fp32.bf16 cml0, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4529 "00000000" // /* MW 15 */ + 4530 "00000000" // /* MW 14 */ + 4531 "01111000" // /* MW 13 */ + 4532 "11000101" // /* MW 12 */ + 4533 "00000001" // /* MW 11 */ + 4534 "00000000" // /* MW 10 */ + 4535 "00000000" // /* MW 9 */ + 4536 "00000000" // /* MW 8 */ + 4537 "00000011" // /* MW 7 */ + 4538 "00000000" // /* MW 6 */ + 4539 "00101001" // /* MW 5 */ + 4540 "01100000" // /* MW 4 */ + 4541 "11111100" // /* MW 3 */ + 4542 "00101100" // /* MW 2 */ + 4543 "00000000" // /* MW 1 */ +.label ZLE_F_Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams_192 +.src_ref 3 "kernel_helpers.h" 350 11 +.src_ref 3 "utils.h" 531 4 first +.src_ref 2 "conv2d_bf16.h" 706 18 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4544 "11100001" // NOPA; NOPB; VST.FLUSH.512.CONV.2D [p2, sf, r26, d0];NOPX; VCONV.fp32.bf16 cmh0, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 4545 "00000000" // /* MW 15 */ + 4546 "00000000" // /* MW 14 */ + 4547 "01111000" // /* MW 13 */ + 4548 "11000101" // /* MW 12 */ + 4549 "01000000" // /* MW 11 */ + 4550 "00000000" // /* MW 10 */ + 4551 "00000000" // /* MW 9 */ + 4552 "00000000" // /* MW 8 */ + 4553 "00000011" // /* MW 7 */ + 4554 "00000000" // /* MW 6 */ + 4555 "00100011" // /* MW 5 */ + 4556 "00000000" // /* MW 4 */ + 4557 "11110000" // /* MW 3 */ + 4558 "00101100" // /* MW 2 */ + 4559 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 4560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4561 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 707 12 first +.src_ref 2 "conv2d_bf16.h" 707 30 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4562 "00011000" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4563 "00000011" // /* MW 3 */ + 4564 "10000000" // /* MW 2 */ + 4565 "00001101" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 first +.src_ref 2 "conv2d_bf16.h" 705 30 first +.src_ref 2 "conv2d_bf16.h" 708 12 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4566 "00000010" // VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4567 "01110000" // /* MW 7 */ + 4568 "11000101" // /* MW 6 */ + 4569 "00000001" // /* MW 5 */ + 4570 "00000000" // /* MW 4 */ + 4571 "01100000" // /* MW 3 */ + 4572 "00000000" // /* MW 2 */ + 4573 "00100000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 +.src_ref 2 "conv2d_bf16.h" 706 18 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4574 "11111000" // VCONV.fp32.bf16 cmh0, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4575 "10001010" // /* MW 3 */ + 4576 "10000001" // /* MW 2 */ + 4577 "00011000" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 first + 4578 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4579 "00000011" // /* MW 3 */ + 4580 "00000000" // /* MW 2 */ + 4581 "00001011" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 first +.src_ref 2 "conv2d_bf16.h" 705 30 first +.src_ref 2 "conv2d_bf16.h" 707 12 first +.src_ref 2 "conv2d_bf16.h" 707 30 first + 4582 "00000010" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26]; VCONV.fp32.bf16 cml0, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4583 "01110000" // /* MW 7 */ + 4584 "11000101" // /* MW 6 */ + 4585 "00000001" // /* MW 5 */ + 4586 "00000000" // /* MW 4 */ + 4587 "01100000" // /* MW 3 */ + 4588 "00000000" // /* MW 2 */ + 4589 "10110000" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 350 11 +.src_ref 2 "conv2d_bf16.h" 706 18 first +.src_ref 2 "conv2d_bf16.h" 708 12 first + 4590 "00000010" // VST.FLUSH.512.CONV [p2, sf, r26]; VCONV.fp32.bf16 cmh0, x0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4591 "01110000" // /* MW 7 */ + 4592 "11000101" // /* MW 6 */ + 4593 "01000000" // /* MW 5 */ + 4594 "00000000" // /* MW 4 */ + 4595 "01100000" // /* MW 3 */ + 4596 "00000000" // /* MW 2 */ + 4597 "00100000" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 first + 4598 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4599 "00000011" // /* MW 3 */ + 4600 "00000000" // /* MW 2 */ + 4601 "00001011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 707 12 first +.src_ref 2 "conv2d_bf16.h" 707 30 first +.src_ref 2 "conv2d_bf16.h" 723 first + 4602 "01011100" // VST.PUSH.576.CONV.bfp16ebs8.fp32 dm0, [p2, sf, r26];RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 4603 "00000000" // /* MW 5 */ + 4604 "01010000" // /* MW 4 */ + 4605 "01100000" // /* MW 3 */ + 4606 "00000000" // /* MW 2 */ + 4607 "10110000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 708 12 first +.delay_slot + 4608 "00011000" // VST.FLUSH.512.CONV [p2, sf, r26] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4609 "00000011" // /* MW 3 */ + 4610 "00000000" // /* MW 2 */ + 4611 "00001001" // /* MW 1 */ +.src_ref 3 "utils.h" 531 4 first +.delay_slot + 4612 "00011000" // VST.FLUSH.512.CONV.2D [p2, sf, r26, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4613 "00000011" // /* MW 3 */ + 4614 "00000000" // /* MW 2 */ + 4615 "00001011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4616 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4617 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4618 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4619 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4620 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams__end +.label __Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams___func_end0 + 4621 "00000000" // /* MW 1 */ +.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_begin0 +.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params +.function conv2d_bf16<(unsigned char)'\x01', (act_t)0, bfloat16, bfloat16, bfloat16, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::async, adf::addressing::linear, adf::margin<0U> >, false, false, true, false> _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 2 "conv2d_bf16.h" 1836 +.src_ref 2 "conv2d_bf16.h" 1836 first +.src_ref 2 "conv2d_bf16.h" 1836 first +.src_ref 2 "conv2d_bf16_params.h" 240 68 +.function_start + 4624 "01111110" // MOVA m0, #-81; PADDB [p3], #64; MOVS p4, p2; PADDXM [sp], #128 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 4625 "01100000" // /* MW 13 */ + 4626 "00010001" // /* MW 12 */ + 4627 "10010001" // /* MW 11 */ + 4628 "00001110" // /* MW 10 */ + 4629 "00000000" // /* MW 9 */ + 4630 "00000000" // /* MW 8 */ + 4631 "10000000" // /* MW 7 */ + 4632 "00000000" // /* MW 6 */ + 4633 "00100000" // /* MW 5 */ + 4634 "00111111" // /* MW 4 */ + 4635 "10000110" // /* MW 3 */ + 4636 "11100000" // /* MW 2 */ + 4637 "11110101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1836 +.src_ref 2 "conv2d_bf16_params.h" 241 95 +.src_ref 2 "conv2d_bf16_params.h" 242 153 +.src_ref 2 "conv2d_bf16_params.h" 250 129 + 4638 "01110110" // MOVA r19, #3; ST r12, [sp, #-16]; MOVX r28, #-24; MOV r17, p3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4639 "01111000" // /* MW 11 */ + 4640 "01100000" // /* MW 10 */ + 4641 "00101011" // /* MW 9 */ + 4642 "00001010" // /* MW 8 */ + 4643 "11000101" // /* MW 7 */ + 4644 "10111111" // /* MW 6 */ + 4645 "10010101" // /* MW 5 */ + 4646 "11110001" // /* MW 4 */ + 4647 "00000111" // /* MW 3 */ + 4648 "01110011" // /* MW 2 */ + 4649 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 866 21 +.src_ref 2 "conv2d_bf16.h" 876 12 +.src_ref 2 "conv2d_bf16.h" 876 51 +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16.h" 1836 +.src_ref 2 "conv2d_bf16_params.h" 242 41 +.src_ref 2 "conv2d_bf16_params.h" 242 54 +.src_ref 2 "conv2d_bf16_params.h" 242 94 +.src_ref 2 "conv2d_bf16_params.h" 242 100 +.src_ref 2 "conv2d_bf16_params.h" 242 153 +.src_ref 2 "conv2d_bf16_params.h" 243 80 +.src_ref 2 "conv2d_bf16_params.h" 245 28 +.src_ref 2 "conv2d_bf16_params.h" 250 106 +.src_ref 2 "conv2d_bf16_params.h" 250 133 + 4650 "01110110" // MOVA r25, #0; ST r17, [sp, #-40]; MOVX r17, #1; ADD.NC p2, r17, #28 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4651 "00001000" // /* MW 11 */ + 4652 "01000111" // /* MW 10 */ + 4653 "00110100" // /* MW 9 */ + 4654 "00101001" // /* MW 8 */ + 4655 "00010000" // /* MW 7 */ + 4656 "10000001" // /* MW 6 */ + 4657 "00110101" // /* MW 5 */ + 4658 "11011010" // /* MW 4 */ + 4659 "00000111" // /* MW 3 */ + 4660 "00011001" // /* MW 2 */ + 4661 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 240 68 +.src_ref 2 "conv2d_bf16_params.h" 240 68 first + 4662 "01110110" // LDA r18, [p2]; ST r9, [sp, #-12]; MOVXM r29, #16777216 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4663 "00010000" // /* MW 11 */ + 4664 "00000000" // /* MW 10 */ + 4665 "10101000" // /* MW 9 */ + 4666 "00000011" // /* MW 8 */ + 4667 "01000000" // /* MW 7 */ + 4668 "10000000" // /* MW 6 */ + 4669 "00110101" // /* MW 5 */ + 4670 "11110101" // /* MW 4 */ + 4671 "11010111" // /* MW 3 */ + 4672 "11001010" // /* MW 2 */ + 4673 "01000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 171 +.src_ref 2 "conv2d_bf16_params.h" 245 20 + 4674 "01110110" // MOVA m6, #88; ST r14, [sp, #-4]; MOVXM r31, #33554431 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4675 "10010000" // /* MW 11 */ + 4676 "11111111" // /* MW 10 */ + 4677 "11101111" // /* MW 9 */ + 4678 "11111111" // /* MW 8 */ + 4679 "01111111" // /* MW 7 */ + 4680 "10000000" // /* MW 6 */ + 4681 "11010101" // /* MW 5 */ + 4682 "11111101" // /* MW 4 */ + 4683 "10000111" // /* MW 3 */ + 4684 "00011000" // /* MW 2 */ + 4685 "00001011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 39 +.src_ref 2 "conv2d_bf16_params.h" 244 82 +.src_ref 2 "conv2d_bf16_params.h" 244 156 +.src_ref 2 "conv2d_bf16_params.h" 249 87 + 4686 "01110110" // MOVA r20, #5; ST r13, [sp, #-32]; MOVX r22, #8; MOV m4, #-20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4687 "01011000" // /* MW 11 */ + 4688 "11101100" // /* MW 10 */ + 4689 "00000111" // /* MW 9 */ + 4690 "00001010" // /* MW 8 */ + 4691 "01100001" // /* MW 7 */ + 4692 "10000001" // /* MW 6 */ + 4693 "10110101" // /* MW 5 */ + 4694 "11100001" // /* MW 4 */ + 4695 "00000111" // /* MW 3 */ + 4696 "10110100" // /* MW 2 */ + 4697 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 243 39 +.src_ref 2 "conv2d_bf16_params.h" 244 87 +.src_ref 2 "conv2d_bf16_params.h" 250 71 + 4698 "01110110" // MOVA r21, #12; ST r15, [sp, #-20]; MOVX r23, #254; MOV m5, #-60 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4699 "01011000" // /* MW 11 */ + 4700 "11000100" // /* MW 10 */ + 4701 "10000111" // /* MW 9 */ + 4702 "11001010" // /* MW 8 */ + 4703 "01110111" // /* MW 7 */ + 4704 "10000111" // /* MW 6 */ + 4705 "11110101" // /* MW 5 */ + 4706 "11101101" // /* MW 4 */ + 4707 "00000111" // /* MW 3 */ + 4708 "10010101" // /* MW 2 */ + 4709 "00000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 44 + 4710 "00000010" // ST p7, [sp, #-8]; MOV m7, #64 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 4711 "01010000" // /* MW 7 */ + 4712 "01000000" // /* MW 6 */ + 4713 "10000000" // /* MW 5 */ + 4714 "00000011" // /* MW 4 */ + 4715 "10110000" // /* MW 3 */ + 4716 "01110011" // /* MW 2 */ + 4717 "11111111" // /* MW 1 */ + 4718 "10011000" // ST lr, [sp, #-28] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4719 "00111101" // /* MW 3 */ + 4720 "11100100" // /* MW 2 */ + 4721 "00001111" // /* MW 1 */ + 4722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4723 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 240 68 + 4724 "10011000" // ADD r12, r29, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4725 "00100000" // /* MW 3 */ + 4726 "01011001" // /* MW 2 */ + 4727 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 240 68 +.src_ref 2 "conv2d_bf16_params.h" 241 95 first + 4728 "01011100" // ST r12, [p2], m0; LSHL r29, r12, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4729 "10011011" // /* MW 5 */ + 4730 "01110111" // /* MW 4 */ + 4731 "00110110" // /* MW 3 */ + 4732 "00110010" // /* MW 2 */ + 4733 "01000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 54 first +.src_ref 2 "conv2d_bf16_params.h" 242 94 first + 4734 "00101100" // LDA.u8 r30, [p2], #-3; EQ r28, r29, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4735 "00101111" // /* MW 5 */ + 4736 "11110010" // /* MW 4 */ + 4737 "01011110" // /* MW 3 */ + 4738 "11111001" // /* MW 2 */ + 4739 "01011011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 245 20 first + 4740 "10011000" // LDA.u8 r9, [p2], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4741 "00101010" // /* MW 3 */ + 4742 "11001001" // /* MW 2 */ + 4743 "00000010" // /* MW 1 */ + 4744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4745 "00000000" // /* MW 1 */ + 4746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4747 "00000000" // /* MW 1 */ + 4748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4749 "00000000" // /* MW 1 */ + 4750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4751 "00000000" // /* MW 1 */ + 4752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4753 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 174 first + 4754 "10011000" // LTU r27, r29, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4755 "11101100" // /* MW 3 */ + 4756 "01110111" // /* MW 2 */ + 4757 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 153 + 4758 "00011000" // SEL.EQZ r14, r25, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4759 "00110010" // /* MW 3 */ + 4760 "01011101" // /* MW 2 */ + 4761 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 171 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 4762 "10011000" // LTU r27, r31, r12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4763 "11001100" // /* MW 3 */ + 4764 "11110110" // /* MW 2 */ + 4765 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 241 95 first +.src_ref 2 "conv2d_bf16_params.h" 242 39 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 4766 "00101100" // ST.s8 r28, [p2], m4; EQ r13, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4767 "11001111" // /* MW 5 */ + 4768 "10110111" // /* MW 4 */ + 4769 "11101110" // /* MW 3 */ + 4770 "01110000" // /* MW 2 */ + 4771 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 100 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4772 "10011000" // LSHL r31, r13, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4773 "00011101" // /* MW 3 */ + 4774 "01111111" // /* MW 2 */ + 4775 "00010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 153 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4776 "00011000" // SEL.EQZ r12, r25, r14, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4777 "11100010" // /* MW 3 */ + 4778 "01011000" // /* MW 2 */ + 4779 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 98 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4780 "10011000" // OR r28, r31, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4781 "11000101" // /* MW 3 */ + 4782 "11111001" // /* MW 2 */ + 4783 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 54 +.src_ref 2 "conv2d_bf16_params.h" 242 151 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 4784 "10100100" // LTU r27, r17, r30; ADD.NC r28, r28, r12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4785 "01100010" // /* MW 5 */ + 4786 "00111100" // /* MW 4 */ + 4787 "10011110" // /* MW 3 */ + 4788 "11111101" // /* MW 2 */ + 4789 "10001110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 41 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 4790 "00011000" // SEL.EQZ r28, r25, r28, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4791 "11000010" // /* MW 3 */ + 4792 "01111001" // /* MW 2 */ + 4793 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 243 80 first + 4794 "10011000" // LTU r31, r17, r28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4795 "11001100" // /* MW 3 */ + 4796 "01111111" // /* MW 2 */ + 4797 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 242 117 first +.src_ref 2 "conv2d_bf16_params.h" 243 39 + 4798 "01011100" // ST r31, [p2], m5; NE r29, r29, r30 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4799 "11010001" // /* MW 5 */ + 4800 "11110111" // /* MW 4 */ + 4801 "00111110" // /* MW 3 */ + 4802 "01111110" // /* MW 2 */ + 4803 "01010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 44 first +.src_ref 2 "conv2d_bf16_params.h" 245 28 first + 4804 "00101100" // LDA.u8 r30, [p2], m7; NE r12, r9, r17 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4805 "00110001" // /* MW 5 */ + 4806 "10110010" // /* MW 4 */ + 4807 "01010100" // /* MW 3 */ + 4808 "01111001" // /* MW 2 */ + 4809 "01011101" // /* MW 1 */ + 4810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4811 "00000000" // /* MW 1 */ + 4812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4813 "00000000" // /* MW 1 */ + 4814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4815 "00000000" // /* MW 1 */ + 4816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4817 "00000000" // /* MW 1 */ + 4818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4819 "00000000" // /* MW 1 */ + 4820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4821 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 82 +.src_ref 2 "conv2d_bf16_params.h" 244 87 + 4822 "00100100" // NE r22, r30, r22; ADD.NC r31, r30, #-4 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4823 "11111100" // /* MW 5 */ + 4824 "10111110" // /* MW 4 */ + 4825 "00011111" // /* MW 3 */ + 4826 "10101101" // /* MW 2 */ + 4827 "11110101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 245 33 + 4828 "10000100" // JNZ r12, #4896 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4896 delay_slots=5 */ + 4829 "00000001" // /* MW 5 */ + 4830 "01000000" // /* MW 4 */ + 4831 "10010000" // /* MW 3 */ + 4832 "00001001" // /* MW 2 */ + 4833 "01100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 156 +.delay_slot + 4834 "10011000" // NE r9, r30, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4835 "01001000" // /* MW 3 */ + 4836 "10010011" // /* MW 2 */ + 4837 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 87 +.delay_slot + 4838 "00011000" // EXTEND.u8 r31, r31 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4839 "10010000" // /* MW 3 */ + 4840 "11111110" // /* MW 2 */ + 4841 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 87 +.delay_slot + 4842 "10011000" // AND r22, r9, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4843 "01100100" // /* MW 3 */ + 4844 "01101101" // /* MW 2 */ + 4845 "00010010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 87 +.delay_slot + 4846 "10011000" // LTU r23, r31, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4847 "01111100" // /* MW 3 */ + 4848 "11101111" // /* MW 2 */ + 4849 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 244 132 +.delay_slot + 4850 "10011000" // AND r16, r23, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4851 "01100100" // /* MW 3 */ + 4852 "11100001" // /* MW 2 */ + 4853 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 245 33 + 4854 "10000100" // JNZ r29, #4896 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4896 delay_slots=5 */ + 4855 "00000001" // /* MW 5 */ + 4856 "01000000" // /* MW 4 */ + 4857 "10010000" // /* MW 3 */ + 4858 "00001001" // /* MW 2 */ + 4859 "11101000" // /* MW 1 */ +.delay_slot + 4860 "10011000" // ST p6, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4861 "00011101" // /* MW 3 */ + 4862 "11101011" // /* MW 2 */ + 4863 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4865 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4867 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4868 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4869 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4870 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4871 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 +.src_ref 2 "conv2d_bf16.h" 876 51 + 4872 "10111010" // MOVA r27, #1; J #4944 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=4944 delay_slots=5 */ + 4873 "00100000" // /* MW 9 */ + 4874 "00000000" // /* MW 8 */ + 4875 "00000000" // /* MW 7 */ + 4876 "01101010" // /* MW 6 */ + 4877 "00000010" // /* MW 5 */ + 4878 "00000000" // /* MW 4 */ + 4879 "00000000" // /* MW 3 */ + 4880 "00111011" // /* MW 2 */ + 4881 "00000000" // /* MW 1 */ +.delay_slot + 4882 "11111000" // MOV el0, r25 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4883 "10011100" // /* MW 3 */ + 4884 "00011001" // /* MW 2 */ + 4885 "00011000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1849 12 +.delay_slot + 4886 "00011000" // MOVX r19, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4887 "00000101" // /* MW 3 */ + 4888 "00100110" // /* MW 2 */ + 4889 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4891 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4892 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4893 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4894 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4895 "00000000" // /* MW 1 */ +.label __ll6__Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params +.src_ref 2 "conv2d_bf16_params.h" 250 71 first +.src_ref 2 "conv2d_bf16_params.h" 250 101 + 4896 "01110110" // MOVA r21, #4; ST p6, [sp, #-24]; EQ r27, r21, r30; MOV el0, r25 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 4897 "01111000" // /* MW 11 */ + 4898 "11001110" // /* MW 10 */ + 4899 "00001100" // /* MW 9 */ + 4900 "00111100" // /* MW 8 */ + 4901 "10111111" // /* MW 7 */ + 4902 "10101011" // /* MW 6 */ + 4903 "00011101" // /* MW 5 */ + 4904 "11101011" // /* MW 4 */ + 4905 "00000111" // /* MW 3 */ + 4906 "10010101" // /* MW 2 */ + 4907 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 101 + 4908 "10011000" // LSHL r21, r30, r21 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4909 "01011101" // /* MW 3 */ + 4910 "10101011" // /* MW 2 */ + 4911 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 106 + 4912 "00011000" // SEL.EQZ r21, r21, r25, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4913 "10010010" // /* MW 3 */ + 4914 "01101011" // /* MW 2 */ + 4915 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 129 + 4916 "10011000" // EQ r27, r19, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4917 "11100111" // /* MW 3 */ + 4918 "11110111" // /* MW 2 */ + 4919 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 106 +.src_ref 2 "conv2d_bf16_params.h" 250 133 + 4920 "11100100" // SEL.EQZ r19, r21, r25, r27; MOV r27, r16 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4921 "01000001" // /* MW 5 */ + 4922 "10110000" // /* MW 4 */ + 4923 "01001101" // /* MW 3 */ + 4924 "11110010" // /* MW 2 */ + 4925 "10101100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 133 + 4926 "00011000" // SEL.EQZ r19, r25, r19, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4927 "00110010" // /* MW 3 */ + 4928 "01100111" // /* MW 2 */ + 4929 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 249 87 first + 4930 "10011000" // AND r20, r28, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4931 "01000100" // /* MW 3 */ + 4932 "00101001" // /* MW 2 */ + 4933 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 249 87 + 4934 "00011000" // NEZ r27, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4935 "11110000" // /* MW 3 */ + 4936 "00110110" // /* MW 2 */ + 4937 "00010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 250 152 first + 4938 "00101100" // NOPA; OR r19, r19, r28 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4939 "10001011" // /* MW 5 */ + 4940 "11001111" // /* MW 4 */ + 4941 "11111001" // /* MW 3 */ + 4942 "00101100" // /* MW 2 */ + 4943 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_320 +.src_ref 2 "conv2d_bf16_params.h" 258 8 first + 4944 "01110110" // MOVA m4, #12; ST r27, [p2], #24; JNZ r29, #4992 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=4992 delay_slots=5 */ + 4945 "01100000" // /* MW 11 */ + 4946 "00000000" // /* MW 10 */ + 4947 "00010000" // /* MW 9 */ + 4948 "01110000" // /* MW 8 */ + 4949 "00000010" // /* MW 7 */ + 4950 "10111010" // /* MW 6 */ + 4951 "01110001" // /* MW 5 */ + 4952 "01101111" // /* MW 4 */ + 4953 "10000010" // /* MW 3 */ + 4954 "10010000" // /* MW 2 */ + 4955 "00000001" // /* MW 1 */ +.delay_slot + 4956 "00011000" // ST.s8 r19, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4957 "01100111" // /* MW 3 */ + 4958 "10001010" // /* MW 2 */ + 4959 "00000010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4961 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4962 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4963 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4964 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4965 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 4966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 4967 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 259 71 + 4968 "01000100" // MOVXM r20, #16777215 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 4969 "11111110" // /* MW 5 */ + 4970 "00111111" // /* MW 4 */ + 4971 "11111010" // /* MW 3 */ + 4972 "11111111" // /* MW 2 */ + 4973 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 259 71 first + 4974 "10011000" // AND r18, r18, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 4975 "01000100" // /* MW 3 */ + 4976 "10100101" // /* MW 2 */ + 4977 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16_params.h" 259 71 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 4978 "00101110" // NOPA; ST r18, [p3, #28]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 4979 "00011100" // /* MW 13 */ + 4980 "00000000" // /* MW 12 */ + 4981 "00000000" // /* MW 11 */ + 4982 "01010111" // /* MW 10 */ + 4983 "00011010" // /* MW 9 */ + 4984 "01000000" // /* MW 8 */ + 4985 "00000000" // /* MW 7 */ + 4986 "00000000" // /* MW 6 */ + 4987 "10100011" // /* MW 5 */ + 4988 "11101100" // /* MW 4 */ + 4989 "11110110" // /* MW 3 */ + 4990 "00101100" // /* MW 2 */ + 4991 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_368 +.src_ref 2 "conv2d_bf16.h" 1841 65 first +.src_ref 2 "conv2d_bf16.h" 1849 4 +.src_ref 2 "conv2d_bf16.h" 1849 12 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 4992 "10111010" // LDA r20, [p2], #-32; EXTEND.u8 r20, r19; MOV r22, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 4993 "01011000" // /* MW 9 */ + 4994 "11111101" // /* MW 8 */ + 4995 "11001111" // /* MW 7 */ + 4996 "10000010" // /* MW 6 */ + 4997 "01000100" // /* MW 5 */ + 4998 "00100111" // /* MW 4 */ + 4999 "11010000" // /* MW 3 */ + 5000 "11010010" // /* MW 2 */ + 5001 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16.h" 1841 34 +.src_ref 2 "conv2d_bf16.h" 1842 36 +.src_ref 2 "conv2d_bf16.h" 1842 67 +.src_ref 2 "conv2d_bf16.h" 1842 75 +.src_ref 2 "conv2d_bf16.h" 1845 31 +.src_ref 2 "conv2d_bf16.h" 1849 4 +.src_ref 2 "conv2d_bf16_params.h" 243 80 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 5002 "10111010" // MOVA r18, #2; ADD r21, r20, #-1; MOV m4, #36 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5003 "01011000" // /* MW 9 */ + 5004 "00100100" // /* MW 8 */ + 5005 "00000000" // /* MW 7 */ + 5006 "11111010" // /* MW 6 */ + 5007 "01011111" // /* MW 5 */ + 5008 "00101001" // /* MW 4 */ + 5009 "00000000" // /* MW 3 */ + 5010 "01010010" // /* MW 2 */ + 5011 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1842 67 first +.src_ref 2 "conv2d_bf16.h" 1842 106 +.src_ref 2 "conv2d_bf16.h" 1849 4 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 5012 "01110110" // LDA r22, [p2], m4; ST el0, [sp, #-48]; AND r22, r21, r22; MOV m4, #-52 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5013 "01011000" // /* MW 11 */ + 5014 "11001100" // /* MW 10 */ + 5015 "00000111" // /* MW 9 */ + 5016 "00100110" // /* MW 8 */ + 5017 "01101011" // /* MW 7 */ + 5018 "10101011" // /* MW 6 */ + 5019 "00101101" // /* MW 5 */ + 5020 "11010000" // /* MW 4 */ + 5021 "11010111" // /* MW 3 */ + 5022 "01011010" // /* MW 2 */ + 5023 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 862 52 +.src_ref 2 "conv2d_bf16.h" 1842 106 +.src_ref 2 "conv2d_bf16.h" 1845 80 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5024 "01110110" // LDA r23, [p2], m4; ST r22, [sp, #-36]; MOVX r19, #-1; MOV m4, #196 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5025 "01011000" // /* MW 11 */ + 5026 "11000100" // /* MW 10 */ + 5027 "00000000" // /* MW 9 */ + 5028 "11101010" // /* MW 8 */ + 5029 "00110111" // /* MW 7 */ + 5030 "10111111" // /* MW 6 */ + 5031 "11010101" // /* MW 5 */ + 5032 "11011110" // /* MW 4 */ + 5033 "11010111" // /* MW 3 */ + 5034 "01011110" // /* MW 2 */ + 5035 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1845 63 first + 5036 "10011000" // LDA r29, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5037 "10110110" // /* MW 3 */ + 5038 "11111111" // /* MW 2 */ + 5039 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 862 52 first + 5040 "10011000" // LDA r31, [p2], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5041 "11110110" // /* MW 3 */ + 5042 "10001011" // /* MW 2 */ + 5043 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 5044 "10011000" // LDA r21, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5045 "10110110" // /* MW 3 */ + 5046 "00000110" // /* MW 2 */ + 5047 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 2 "conv2d_bf16.h" 1841 34 first + 5048 "00101100" // LDA r20, [p0]; LSHL r9, r20, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5049 "01011011" // /* MW 5 */ + 5050 "00100110" // /* MW 4 */ + 5051 "11011010" // /* MW 3 */ + 5052 "11010010" // /* MW 2 */ + 5053 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 5054 "10011000" // LDA r30, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5055 "11010110" // /* MW 3 */ + 5056 "00000111" // /* MW 2 */ + 5057 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1842 36 first + 5058 "10011000" // LSHL r22, r22, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5059 "00101101" // /* MW 3 */ + 5060 "10101101" // /* MW 2 */ + 5061 "00010101" // /* MW 1 */ + 5062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5063 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1845 80 first + 5064 "10011000" // ASHL r19, r29, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5065 "00111110" // /* MW 3 */ + 5066 "01100111" // /* MW 2 */ + 5067 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 866 21 first + 5068 "10011000" // NE r17, r31, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5069 "00011000" // /* MW 3 */ + 5070 "11100011" // /* MW 2 */ + 5071 "00010111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 866 12 + 5072 "10000100" // JNZ r17, #5184 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=5184 delay_slots=5 */ + 5073 "00000001" // /* MW 5 */ + 5074 "01000000" // /* MW 4 */ + 5075 "00100000" // /* MW 3 */ + 5076 "00001010" // /* MW 2 */ + 5077 "10001000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1842 36 first +.src_ref 2 "conv2d_bf16.h" 1842 75 first +.delay_slot + 5078 "10100100" // LSHL r22, r23, r18; ADD.NC r21, r21, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5079 "10110010" // /* MW 5 */ + 5080 "10110101" // /* MW 4 */ + 5081 "10111010" // /* MW 3 */ + 5082 "10100101" // /* MW 2 */ + 5083 "10111101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1842 75 +.src_ref 2 "conv2d_bf16.h" 1845 31 first +.delay_slot + 5084 "10100100" // LSHL r21, r19, r18; ADD.NC dn0, r21, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5085 "10110010" // /* MW 5 */ + 5086 "10010101" // /* MW 4 */ + 5087 "10110000" // /* MW 3 */ + 5088 "01100101" // /* MW 2 */ + 5089 "10011101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1841 34 first +.delay_slot + 5090 "00000010" // ST dn0, [sp, #-44]; ADD.NC r14, r9, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5091 "10100000" // /* MW 7 */ + 5092 "01101000" // /* MW 6 */ + 5093 "11001010" // /* MW 5 */ + 5094 "00000001" // /* MW 4 */ + 5095 "10110000" // /* MW 3 */ + 5096 "10000100" // /* MW 2 */ + 5097 "11111010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16.h" 885 4 +.delay_slot + 5098 "11111000" // MOV r15, dn0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5099 "10000000" // /* MW 3 */ + 5100 "11010000" // /* MW 2 */ + 5101 "00011011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1845 31 first +.delay_slot + 5102 "01011000" // ADD.NC p6, r21, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5103 "11111001" // /* MW 3 */ + 5104 "01101010" // /* MW 2 */ + 5105 "00011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 + 5106 "01000100" // MOVXM p7, #509028 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5107 "11001000" // /* MW 5 */ + 5108 "11001000" // /* MW 4 */ + 5109 "11001110" // /* MW 3 */ + 5110 "00000111" // /* MW 2 */ + 5111 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.src_ref 2 "conv2d_bf16.h" 867 18 first + 5112 "00101100" // LDA.s8 r17, [p7]; MOVX vaddSign0, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5113 "10000000" // /* MW 5 */ + 5114 "10110100" // /* MW 4 */ + 5115 "01010000" // /* MW 3 */ + 5116 "11000100" // /* MW 2 */ + 5117 "11100000" // /* MW 1 */ + 5118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5119 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 + 5120 "01000100" // MOVXM r20, #-8454144 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5121 "00000000" // /* MW 5 */ + 5122 "00100000" // /* MW 4 */ + 5123 "00001010" // /* MW 3 */ + 5124 "01111111" // /* MW 2 */ + 5125 "11111111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 + 5126 "01111000" // VINSERT.32 x0, x0, #0, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5127 "10010001" // /* MW 3 */ + 5128 "00000010" // /* MW 2 */ + 5129 "00011000" // /* MW 1 */ + 5130 "11111000" // MOV r20, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5131 "11100000" // /* MW 3 */ + 5132 "00010101" // /* MW 2 */ + 5133 "00011101" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 5134 "00011000" // ADD.NC p7, r20, #-66 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5135 "01011111" // /* MW 3 */ + 5136 "01101010" // /* MW 2 */ + 5137 "00011111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.src_ref 2 "conv2d_bf16.h" 867 18 +.aggressive_scheduled_block_id 3 +.noswbrkpt + 5138 "11010100" // ST.s16 r17, [p7]; VMOV bmll0, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5139 "00100101" // /* MW 5 */ + 5140 "00000001" // /* MW 4 */ + 5141 "11100000" // /* MW 3 */ + 5142 "11000110" // /* MW 2 */ + 5143 "11100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 5144 "00011000" // MOVX crRnd, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5145 "10000000" // /* MW 3 */ + 5146 "01111010" // /* MW 2 */ + 5147 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 5148 "00011000" // VCONV.bf16.fp32 wl0, bmll0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5149 "00010110" // /* MW 3 */ + 5150 "01000000" // /* MW 2 */ + 5151 "00001000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 5152 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5153 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5154 "10111000" // VEXTRACT.16 r17, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5155 "00000001" // /* MW 3 */ + 5156 "01000001" // /* MW 2 */ + 5157 "00011100" // /* MW 1 */ + 5158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5159 "00000000" // /* MW 1 */ + 5160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5161 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 867 18 + 5162 "10011000" // LDA.s16 r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5163 "00110010" // /* MW 3 */ + 5164 "00000110" // /* MW 2 */ + 5165 "00000111" // /* MW 1 */ + 5166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5167 "00000000" // /* MW 1 */ + 5168 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5169 "00000000" // /* MW 1 */ + 5170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5171 "00000000" // /* MW 1 */ + 5172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5173 "00000000" // /* MW 1 */ + 5174 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5175 "00000000" // /* MW 1 */ + 5176 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5177 "00000000" // /* MW 1 */ + 5178 "00001100" // NOPA; ST r17, [sp, #-48] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5179 "01101011" // /* MW 5 */ + 5180 "10100100" // /* MW 4 */ + 5181 "11111111" // /* MW 3 */ + 5182 "00101100" // /* MW 2 */ + 5183 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_560 +.src_ref 2 "conv2d_bf16.h" 881 76 +.src_ref 2 "conv2d_bf16.h" 883 4 +.src_ref 2 "conv2d_bf16.h" 884 4 +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id first + 5184 "01110110" // MOVA m4, #92; MOVS p1, r14; MOVXM p3, #509028 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5185 "00010000" // /* MW 11 */ + 5186 "00110010" // /* MW 10 */ + 5187 "10110010" // /* MW 9 */ + 5188 "11110001" // /* MW 8 */ + 5189 "00000001" // /* MW 7 */ + 5190 "00000000" // /* MW 6 */ + 5191 "00001011" // /* MW 5 */ + 5192 "10001110" // /* MW 4 */ + 5193 "10000001" // /* MW 3 */ + 5194 "10010000" // /* MW 2 */ + 5195 "00001011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 first +.src_ref 2 "conv2d_bf16.h" 876 51 first +.src_ref 2 "conv2d_bf16.h" 881 76 first +.src_ref 2 "conv2d_bf16.h" 883 4 +.src_ref 2 "conv2d_bf16.h" 884 4 +.aggressive_scheduled_block_id 4 +.noswbrkpt + 5196 "01110110" // LDA.u8 r17, [p2], m4; MOVS p0, p1; SEL.EQZ r17, r25, r19, r27; MOV r19, #11 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5197 "01011000" // /* MW 11 */ + 5198 "00001011" // /* MW 10 */ + 5199 "01101000" // /* MW 9 */ + 5200 "10010010" // /* MW 8 */ + 5201 "00011001" // /* MW 7 */ + 5202 "00110011" // /* MW 6 */ + 5203 "10001011" // /* MW 5 */ + 5204 "10000100" // /* MW 4 */ + 5205 "01010000" // /* MW 3 */ + 5206 "01000101" // /* MW 2 */ + 5207 "01010001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 44 +.src_ref 2 "conv2d_bf16_params.h" 243 80 +.src_ref 2 "conv2d_bf16_params.h" 243 80 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5208 "10111010" // MOVA r22, #780; LTU r27, r28, r18; MOV r13, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5209 "01111000" // /* MW 9 */ + 5210 "01100000" // /* MW 8 */ + 5211 "10101010" // /* MW 7 */ + 5212 "01100101" // /* MW 6 */ + 5213 "10111001" // /* MW 5 */ + 5214 "00111001" // /* MW 4 */ + 5215 "00000000" // /* MW 3 */ + 5216 "10010110" // /* MW 2 */ + 5217 "01100001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 883 4 first +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5218 "00011000" // ST.s8 r19, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5219 "01100111" // /* MW 3 */ + 5220 "00000110" // /* MW 2 */ + 5221 "00000011" // /* MW 1 */ +.aggressive_scheduled_block_id 4 +.nohwbrkpt +.noswbrkpt + 5222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5223 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 884 4 first +.aggressive_scheduled_block_id 4 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 5224 "00000100" // JL #4352 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4352 delay_slots=5 */ + 5225 "00000001" // /* MW 5 */ + 5226 "00000000" // /* MW 4 */ + 5227 "10000000" // /* MW 3 */ + 5228 "00001000" // /* MW 2 */ + 5229 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 first +.delay_slot +.aggressive_scheduled_block_id 4 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5230 "10011000" // LSHL r21, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5231 "00101101" // /* MW 3 */ + 5232 "01101011" // /* MW 2 */ + 5233 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 876 12 +.delay_slot + 5234 "01011000" // ADD.NC p7, r21, r30 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5235 "11111001" // /* MW 3 */ + 5236 "01101010" // /* MW 2 */ + 5237 "00011111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 881 45 first +.delay_slot + 5238 "10011000" // SUB r17, r25, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5239 "00010001" // /* MW 3 */ + 5240 "01100011" // /* MW 2 */ + 5241 "00010110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16_params.h" 243 80 +.delay_slot + 5242 "01100100" // LSHL r17, r17, r18; MOV r20, #781 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5243 "00110101" // /* MW 5 */ + 5244 "00101100" // /* MW 4 */ + 5245 "10111010" // /* MW 3 */ + 5246 "01100101" // /* MW 2 */ + 5247 "10001100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 881 45 +.src_ref 2 "conv2d_bf16_params.h" 243 80 first +.delay_slot + 5248 "11100001" // NOPA; NOPB; NOPS; SEL.EQZ r9, r22, r20, r27; ADD.NC r12, r15, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5249 "00000000" // /* MW 15 */ + 5250 "00000000" // /* MW 14 */ + 5251 "10101000" // /* MW 13 */ + 5252 "11100010" // /* MW 12 */ + 5253 "10001011" // /* MW 11 */ + 5254 "00010001" // /* MW 10 */ + 5255 "10011010" // /* MW 9 */ + 5256 "00101100" // /* MW 8 */ + 5257 "01011011" // /* MW 7 */ + 5258 "00000001" // /* MW 6 */ + 5259 "00100000" // /* MW 5 */ + 5260 "00000000" // /* MW 4 */ + 5261 "11110000" // /* MW 3 */ + 5262 "00101100" // /* MW 2 */ + 5263 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 4 +.return_address + 5264 "00011000" // LDA p1, [sp, #-44] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5265 "10011001" // /* MW 3 */ + 5266 "11010100" // /* MW 2 */ + 5267 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 4 first +.no_stack_arguments + 5268 "00000100" // JL #4352 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4352 delay_slots=5 */ + 5269 "00000001" // /* MW 5 */ + 5270 "00000000" // /* MW 4 */ + 5271 "10000000" // /* MW 3 */ + 5272 "00001000" // /* MW 2 */ + 5273 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5275 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5277 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 44 +.delay_slot + 5278 "00011000" // ADD.NC r13, r13, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5279 "10010000" // /* MW 3 */ + 5280 "01010110" // /* MW 2 */ + 5281 "00011011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 4 +.delay_slot + 5282 "11111000" // MOV p2, r13 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5283 "10100000" // /* MW 3 */ + 5284 "01100110" // /* MW 2 */ + 5285 "00011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 885 4 +.delay_slot + 5286 "01111010" // NOPA; MOVS p0, r15; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5287 "00000000" // /* MW 9 */ + 5288 "00000000" // /* MW 8 */ + 5289 "00000000" // /* MW 7 */ + 5290 "00000000" // /* MW 6 */ + 5291 "00001011" // /* MW 5 */ + 5292 "10001111" // /* MW 4 */ + 5293 "11110000" // /* MW 3 */ + 5294 "00101100" // /* MW 2 */ + 5295 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 +.src_ref 2 "conv2d_bf16.h" 1115 26 +.src_ref 2 "conv2d_bf16.h" 1115 26 +.return_address + 5296 "10111010" // MOVA dj6, #-332; MOVX r19, #63; ADD.NC p4, r13, #-116 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5297 "00001000" // /* MW 9 */ + 5298 "01100011" // /* MW 8 */ + 5299 "00110011" // /* MW 7 */ + 5300 "11101010" // /* MW 6 */ + 5301 "00110111" // /* MW 5 */ + 5302 "00000001" // /* MW 4 */ + 5303 "10000000" // /* MW 3 */ + 5304 "10011010" // /* MW 2 */ + 5305 "11010110" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 886 4 +.src_ref 2 "conv2d_bf16.h" 896 23 first +.src_ref 2 "conv2d_bf16.h" 1123 71 + 5306 "00101100" // LDA dn0, [p4], #4; MOVX r13, #12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5307 "01100010" // /* MW 5 */ + 5308 "00110100" // /* MW 4 */ + 5309 "11010000" // /* MW 3 */ + 5310 "10000100" // /* MW 2 */ + 5311 "10000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5312 "10011000" // LDA dj0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5313 "01000110" // /* MW 3 */ + 5314 "00011100" // /* MW 2 */ + 5315 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5316 "10011000" // LDA dn4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5317 "00100110" // /* MW 3 */ + 5318 "00011110" // /* MW 2 */ + 5319 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5320 "10011000" // LDA dj4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5321 "01000110" // /* MW 3 */ + 5322 "00011110" // /* MW 2 */ + 5323 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5324 "10011000" // LDA m0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5325 "00000110" // /* MW 3 */ + 5326 "00011100" // /* MW 2 */ + 5327 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5328 "10011000" // LDA dc0, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5329 "01100110" // /* MW 3 */ + 5330 "00011100" // /* MW 2 */ + 5331 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 896 23 + 5332 "10011000" // LDA dc4, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5333 "01100110" // /* MW 3 */ + 5334 "00011110" // /* MW 2 */ + 5335 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 first + 5336 "10011000" // LDA r22, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5337 "11010110" // /* MW 3 */ + 5338 "00011110" // /* MW 2 */ + 5339 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5340 "10011000" // LDA r17, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5341 "00110110" // /* MW 3 */ + 5342 "00011110" // /* MW 2 */ + 5343 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5344 "10011000" // LDA r28, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5345 "10010110" // /* MW 3 */ + 5346 "00011111" // /* MW 2 */ + 5347 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5348 "10011000" // LDA r21, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5349 "10110110" // /* MW 3 */ + 5350 "00011110" // /* MW 2 */ + 5351 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5352 "10011000" // LDA r23, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5353 "11110110" // /* MW 3 */ + 5354 "00011110" // /* MW 2 */ + 5355 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5356 "10011000" // LDA p3, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5357 "10011110" // /* MW 3 */ + 5358 "00011101" // /* MW 2 */ + 5359 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 897 23 + 5360 "10011000" // LDA dn2, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5361 "00100110" // /* MW 3 */ + 5362 "00011101" // /* MW 2 */ + 5363 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 first + 5364 "10011000" // LDA dn1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5365 "10100110" // /* MW 3 */ + 5366 "00011100" // /* MW 2 */ + 5367 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5368 "10011000" // LDA dj1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5369 "11000110" // /* MW 3 */ + 5370 "00011100" // /* MW 2 */ + 5371 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5372 "10011000" // LDA dn5, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5373 "10100110" // /* MW 3 */ + 5374 "00011110" // /* MW 2 */ + 5375 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5376 "10011000" // LDA r30, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5377 "11010110" // /* MW 3 */ + 5378 "00011111" // /* MW 2 */ + 5379 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5380 "10011000" // LDA r29, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5381 "10110110" // /* MW 3 */ + 5382 "00011111" // /* MW 2 */ + 5383 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 + 5384 "10011000" // LDA dc1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5385 "11100110" // /* MW 3 */ + 5386 "00011100" // /* MW 2 */ + 5387 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1115 26 first + 5388 "10011000" // LDA.u8 r18, [p4, dj6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5389 "01001010" // /* MW 3 */ + 5390 "11000010" // /* MW 2 */ + 5391 "00000100" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 + 5392 "00011000" // LDA r20, [sp, #-48] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5393 "10010001" // /* MW 3 */ + 5394 "11010010" // /* MW 2 */ + 5395 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 898 22 first + 5396 "10011000" // LDA r2, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5397 "01010110" // /* MW 3 */ + 5398 "00000100" // /* MW 2 */ + 5399 "00000100" // /* MW 1 */ + 5400 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5401 "00000000" // /* MW 1 */ + 5402 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5403 "00000000" // /* MW 1 */ + 5404 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5405 "00000000" // /* MW 1 */ + 5406 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5407 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1115 26 first + 5408 "10011000" // LTU r19, r19, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5409 "00101100" // /* MW 3 */ + 5410 "11100111" // /* MW 2 */ + 5411 "00010100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1115 12 + 5412 "10000100" // JNZ r19, #6336 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6336 delay_slots=5 */ + 5413 "00000001" // /* MW 5 */ + 5414 "01000000" // /* MW 4 */ + 5415 "01100000" // /* MW 3 */ + 5416 "00001100" // /* MW 2 */ + 5417 "10011000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 886 4 +.delay_slot + 5418 "01000100" // MOVXM p2, #509028 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5419 "11001000" // /* MW 5 */ + 5420 "11001000" // /* MW 4 */ + 5421 "11000100" // /* MW 3 */ + 5422 "00000111" // /* MW 2 */ + 5423 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 886 4 first +.delay_slot + 5424 "00011000" // ST.s8 r13, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5425 "10100111" // /* MW 3 */ + 5426 "00000101" // /* MW 2 */ + 5427 "00000010" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first +.delay_slot + 5428 "11111000" // VBCST.16 x9, r20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5429 "01110010" // /* MW 3 */ + 5430 "11010001" // /* MW 2 */ + 5431 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5433 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 5434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 5435 "00000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1123 71 first + 5436 "10111010" // LDA p4, [sp, #-40]; EQ r27, r13, r18; MOV m7, #132 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5437 "01011000" // /* MW 9 */ + 5438 "10000100" // /* MW 8 */ + 5439 "10000000" // /* MW 7 */ + 5440 "00111111" // /* MW 6 */ + 5441 "10111001" // /* MW 5 */ + 5442 "00011011" // /* MW 4 */ + 5443 "00100000" // /* MW 3 */ + 5444 "01000011" // /* MW 2 */ + 5445 "11111011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1125 16 +.src_ref 2 "conv2d_bf16.h" 1154 80 + 5446 "10111010" // MOVA r19, #0; MOVX r18, #-128; MOV m4, #60 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5447 "01011000" // /* MW 9 */ + 5448 "00111100" // /* MW 8 */ + 5449 "00000000" // /* MW 7 */ + 5450 "00001010" // /* MW 6 */ + 5451 "00100000" // /* MW 5 */ + 5452 "00111101" // /* MW 4 */ + 5453 "00000000" // /* MW 3 */ + 5454 "00010011" // /* MW 2 */ + 5455 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1125 16 + 5456 "10111010" // MOVA m5, #-64; MOVX r26, #0; MOV dc7, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5457 "01111000" // /* MW 9 */ + 5458 "11010000" // /* MW 8 */ + 5459 "11100100" // /* MW 7 */ + 5460 "00001011" // /* MW 6 */ + 5461 "10100000" // /* MW 5 */ + 5462 "00000001" // /* MW 4 */ + 5463 "10000000" // /* MW 3 */ + 5464 "00010100" // /* MW 2 */ + 5465 "11111000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 746 83 + 5466 "01110110" // MOVA m6, #-132; MOVS dc2, dc7; MOVX crRnd, r13; MOV dn3, dc7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5467 "01111000" // /* MW 11 */ + 5468 "11000000" // /* MW 10 */ + 5469 "10100111" // /* MW 9 */ + 5470 "00000001" // /* MW 8 */ + 5471 "11010100" // /* MW 7 */ + 5472 "00011011" // /* MW 6 */ + 5473 "01001011" // /* MW 5 */ + 5474 "00011100" // /* MW 4 */ + 5475 "10000010" // /* MW 3 */ + 5476 "10011000" // /* MW 2 */ + 5477 "11101111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 2 "conv2d_bf16.h" 738 8 +.src_ref 2 "conv2d_bf16.h" 1187 40 +.src_ref 2 "conv2d_bf16.h" 1199 26 +.src_ref 2 "conv2d_bf16.h" 1200 26 +.src_ref 2 "conv2d_bf16.h" 1201 26 +.src_ref 2 "conv2d_bf16.h" 1202 26 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id first + 5478 "01110110" // LDA r5, [sp, #-44]; MOVS dc6, dc7; MOVX r31, #60; MOV r15, #7 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5479 "01011000" // /* MW 11 */ + 5480 "00000111" // /* MW 10 */ + 5481 "11101000" // /* MW 9 */ + 5482 "10001001" // /* MW 8 */ + 5483 "11110111" // /* MW 7 */ + 5484 "00000001" // /* MW 6 */ + 5485 "01001011" // /* MW 5 */ + 5486 "00011100" // /* MW 4 */ + 5487 "00100110" // /* MW 3 */ + 5488 "10010110" // /* MW 2 */ + 5489 "11111010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1143 12 +.src_ref 2 "conv2d_bf16.h" 1218 20 +.aggressive_scheduled_block_id 5 +.noswbrkpt + 5490 "10111010" // LDA r18, [sp, #-36]; MOVXM p2, #5600 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5491 "00010000" // /* MW 9 */ + 5492 "11110000" // /* MW 8 */ + 5493 "00110010" // /* MW 7 */ + 5494 "00000101" // /* MW 6 */ + 5495 "00000000" // /* MW 5 */ + 5496 "00000000" // /* MW 4 */ + 5497 "00100000" // /* MW 3 */ + 5498 "11001010" // /* MW 2 */ + 5499 "11111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 749 26 +.src_ref 2 "conv2d_bf16.h" 750 26 +.src_ref 2 "conv2d_bf16.h" 751 26 +.src_ref 2 "conv2d_bf16.h" 752 26 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.aggressive_scheduled_block_id 5 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 5500 "10111010" // LDA r13, [sp, #-32]; SEL.EQZ r6, r26, r18, r27; MOV r20, #780 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5501 "01011000" // /* MW 9 */ + 5502 "00001100" // /* MW 8 */ + 5503 "10001011" // /* MW 7 */ + 5504 "00010010" // /* MW 6 */ + 5505 "01101001" // /* MW 5 */ + 5506 "00110100" // /* MW 4 */ + 5507 "00100000" // /* MW 3 */ + 5508 "00110110" // /* MW 2 */ + 5509 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 736 8 +.src_ref 2 "conv2d_bf16.h" 738 8 +.src_ref 2 "conv2d_bf16.h" 1123 47 +.src_ref 2 "conv2d_bf16.h" 1873 + 5510 "10110110" // LDA lr, [sp, #-28]; PADDB [p4], m7; MOVX r25, #0; MOV r24, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5511 "01011000" // /* MW 11 */ + 5512 "00000000" // /* MW 10 */ + 5513 "00001000" // /* MW 9 */ + 5514 "00001011" // /* MW 8 */ + 5515 "10010000" // /* MW 7 */ + 5516 "00000001" // /* MW 6 */ + 5517 "00100000" // /* MW 5 */ + 5518 "11010111" // /* MW 4 */ + 5519 "00101001" // /* MW 3 */ + 5520 "10000111" // /* MW 2 */ + 5521 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1125 16 first + 5522 "10011000" // LDA r0, [p4], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5523 "00010110" // /* MW 3 */ + 5524 "10001000" // /* MW 2 */ + 5525 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1125 16 + 5526 "10011000" // LDA dn6, [p4], m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5527 "00100110" // /* MW 3 */ + 5528 "10101011" // /* MW 2 */ + 5529 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1125 16 + 5530 "10011000" // LDA r27, [p4], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5531 "01110110" // /* MW 3 */ + 5532 "00101111" // /* MW 2 */ + 5533 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1149 80 first + 5534 "10011000" // LDA m5, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5535 "10000110" // /* MW 3 */ + 5536 "00011110" // /* MW 2 */ + 5537 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1154 80 first + 5538 "10011000" // LDA dj5, [p4], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5539 "11000110" // /* MW 3 */ + 5540 "10001010" // /* MW 2 */ + 5541 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 743 87 first + 5542 "10011000" // LDA m4, [p4], #-28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5543 "00000110" // /* MW 3 */ + 5544 "10011110" // /* MW 2 */ + 5545 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 745 83 first + 5546 "10011000" // LDA r1, [p4], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5547 "00110110" // /* MW 3 */ + 5548 "00011100" // /* MW 2 */ + 5549 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 746 83 first +.src_ref 2 "conv2d_bf16.h" 1125 16 first + 5550 "10010100" // LDA r0, [p4], m6; ADD.NC dj6, r6, r0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5551 "00000010" // /* MW 5 */ + 5552 "00000110" // /* MW 4 */ + 5553 "11011101" // /* MW 3 */ + 5554 "00000010" // /* MW 2 */ + 5555 "10011001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1143 66 first + 5556 "10011000" // LDA r3, [p4, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5557 "01110110" // /* MW 3 */ + 5558 "00010100" // /* MW 2 */ + 5559 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1206 63 first + 5560 "10011000" // LDA r4, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5561 "10010110" // /* MW 3 */ + 5562 "00000100" // /* MW 2 */ + 5563 "00000100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1149 89 + 5564 "11111000" // MOV r7, m5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5565 "00000000" // /* MW 3 */ + 5566 "11011010" // /* MW 2 */ + 5567 "00011001" // /* MW 1 */ + 5568 "01011000" // ADD.NC dj2, r7, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5569 "10011001" // /* MW 3 */ + 5570 "10000011" // /* MW 2 */ + 5571 "00011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1154 89 + 5572 "11111000" // MOV r16, dj5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5573 "00000000" // /* MW 3 */ + 5574 "00011011" // /* MW 2 */ + 5575 "00011100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1125 16 first + 5576 "01011000" // ADD.NC m2, r27, r6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5577 "10011001" // /* MW 3 */ + 5578 "00001101" // /* MW 2 */ + 5579 "00011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1149 89 first + 5580 "00011000" // ADD.NC m6, r7, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5581 "11100000" // /* MW 3 */ + 5582 "00000011" // /* MW 2 */ + 5583 "00011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1154 89 first + 5584 "00100100" // ADD r3, r3, #-1; ADD.NC m7, r16, #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5585 "11000000" // /* MW 5 */ + 5586 "00010000" // /* MW 4 */ + 5587 "11101110" // /* MW 3 */ + 5588 "11111111" // /* MW 2 */ + 5589 "00011000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1287 37 + 5590 "10111010" // NOPA; NOPB; MOV m1, dj2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5591 "01111110" // /* MW 9 */ + 5592 "10000000" // /* MW 8 */ + 5593 "10000010" // /* MW 7 */ + 5594 "00000000" // /* MW 6 */ + 5595 "00010000" // /* MW 5 */ + 5596 "00000000" // /* MW 4 */ + 5597 "11110000" // /* MW 3 */ + 5598 "00101100" // /* MW 2 */ + 5599 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_976 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 736 8 +.src_ref 2 "conv2d_bf16.h" 738 8 +.src_ref 2 "conv2d_bf16.h" 1147 31 first +.src_ref 2 "conv2d_bf16.h" 1187 40 first +.loop_nesting 1 + 5600 "01110110" // VLDA.CONV.fp32.bf16 cml0, [p6], #64; MOVS p1, r5; LSHL r14, r2, r15; MOV p0, r14 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5601 "01111000" // /* MW 11 */ + 5602 "10010000" // /* MW 10 */ + 5603 "00110011" // /* MW 9 */ + 5604 "11101100" // /* MW 8 */ + 5605 "11100111" // /* MW 7 */ + 5606 "00000100" // /* MW 6 */ + 5607 "00001011" // /* MW 5 */ + 5608 "10000101" // /* MW 4 */ + 5609 "01110001" // /* MW 3 */ + 5610 "10000101" // /* MW 2 */ + 5611 "11000011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 1188 50 first + 5612 "11110110" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24]; MOVS dc3, dn3; ADD.NC p4, r14, r12 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5613 "10100000" // /* MW 11 */ + 5614 "10011000" // /* MW 10 */ + 5615 "00110011" // /* MW 9 */ + 5616 "00000010" // /* MW 8 */ + 5617 "01001011" // /* MW 7 */ + 5618 "00001110" // /* MW 6 */ + 5619 "00101011" // /* MW 5 */ + 5620 "00101000" // /* MW 4 */ + 5621 "01111000" // /* MW 3 */ + 5622 "10000001" // /* MW 2 */ + 5623 "00100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 742 30 first + 5624 "11110110" // VLDA.POP.576 ex7, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];MOVS dn7, dn6; MOV dj7, dj6 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5625 "01110000" // /* MW 11 */ + 5626 "10000000" // /* MW 10 */ + 5627 "11000110" // /* MW 9 */ + 5628 "00000011" // /* MW 8 */ + 5629 "01001011" // /* MW 7 */ + 5630 "01011010" // /* MW 6 */ + 5631 "00101111" // /* MW 5 */ + 5632 "00101000" // /* MW 4 */ + 5633 "01111000" // /* MW 3 */ + 5634 "00111001" // /* MW 2 */ + 5635 "10100000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.src_ref 2 "conv2d_bf16.h" 1149 31 first + 5636 "11110110" // VLDA.CONV.fp32.bf16 cmh0, [p6], m6;VLDB.POP.576 ex6, [p0, lf0, r24];MOVS dn3, r19; MOV m3, m2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5637 "01110000" // /* MW 11 */ + 5638 "00000000" // /* MW 10 */ + 5639 "10000010" // /* MW 9 */ + 5640 "00000001" // /* MW 8 */ + 5641 "00001011" // /* MW 7 */ + 5642 "01010011" // /* MW 6 */ + 5643 "00101011" // /* MW 5 */ + 5644 "00000011" // /* MW 4 */ + 5645 "01110100" // /* MW 3 */ + 5646 "00001101" // /* MW 2 */ + 5647 "11011001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 743 30 first + 5648 "10111010" // VLDA.POP.576 ex8, [p1, lf1, r25, m4];VLDB.POP.576.3D ex11, [p0, lf0, r24, d0]; MOV dj3, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5649 "01011110" // /* MW 9 */ + 5650 "00000000" // /* MW 8 */ + 5651 "11000000" // /* MW 7 */ + 5652 "00000001" // /* MW 6 */ + 5653 "11010100" // /* MW 5 */ + 5654 "00010010" // /* MW 4 */ + 5655 "01110100" // /* MW 3 */ + 5656 "01000001" // /* MW 2 */ + 5657 "01110001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 1152 31 first +.src_ref 2 "conv2d_bf16.h" 1206 8 first + 5658 "10110110" // VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.FILL.512 [p0, lf0, r24]; MOVXM le, #5920 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5659 "00010000" // /* MW 11 */ + 5660 "10010000" // /* MW 10 */ + 5661 "10111011" // /* MW 9 */ + 5662 "00000101" // /* MW 8 */ + 5663 "00000000" // /* MW 7 */ + 5664 "00000000" // /* MW 6 */ + 5665 "00101000" // /* MW 5 */ + 5666 "00101000" // /* MW 4 */ + 5667 "01111000" // /* MW 3 */ + 5668 "10010101" // /* MW 2 */ + 5669 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 1154 31 first +.src_ref 2 "conv2d_bf16.h" 1206 8 + 5670 "10110110" // VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.FILL.512 [p0, lf0, r24]; MOVXM ls, #5872 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5671 "00010000" // /* MW 11 */ + 5672 "01111000" // /* MW 10 */ + 5673 "01111011" // /* MW 9 */ + 5674 "00000100" // /* MW 8 */ + 5675 "00000000" // /* MW 7 */ + 5676 "00000000" // /* MW 6 */ + 5677 "00101000" // /* MW 5 */ + 5678 "00101000" // /* MW 4 */ + 5679 "01111000" // /* MW 3 */ + 5680 "00011101" // /* MW 2 */ + 5681 "11011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 740 30 first + 5682 "00111100" // VLDA.CONV.fp32.bf16 cml3, [p4];VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5683 "00101000" // /* MW 5 */ + 5684 "00000001" // /* MW 4 */ + 5685 "01110100" // /* MW 3 */ + 5686 "10110101" // /* MW 2 */ + 5687 "10000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 1157 31 first + 5688 "00111100" // VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.POP.576.3D ex4, [p0, lf0, r24, d0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5689 "00101000" // /* MW 5 */ + 5690 "00100010" // /* MW 4 */ + 5691 "01111000" // /* MW 3 */ + 5692 "10100101" // /* MW 2 */ + 5693 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 1159 31 first + 5694 "00111100" // VLDA.CONV.fp32.bf16 cmh2, [p6], m6;VLDB.FILL.512 [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5695 "00101000" // /* MW 5 */ + 5696 "00101000" // /* MW 4 */ + 5697 "01111000" // /* MW 3 */ + 5698 "00101101" // /* MW 2 */ + 5699 "11011001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 738 8 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id first + 5700 "00111100" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.FILL.512 [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5701 "00101000" // /* MW 5 */ + 5702 "00101000" // /* MW 4 */ + 5703 "01111000" // /* MW 3 */ + 5704 "10000001" // /* MW 2 */ + 5705 "00100010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.src_ref 2 "conv2d_bf16.h" 1192 29 first +.aggressive_scheduled_block_id 6 +.noswbrkpt + 5706 "00111100" // VLDA.CONV.fp32.bf16 cmh3, [p4], #64;VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5707 "00101000" // /* MW 5 */ + 5708 "00000001" // /* MW 4 */ + 5709 "01110100" // /* MW 3 */ + 5710 "10111101" // /* MW 2 */ + 5711 "10000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 745 30 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5712 "10111010" // VLDA.CONV.fp32.bf16 cmh4, [p4];VLDB.POP.576.3D ex4, [p0, lf0, r24, d0]; VSHUFFLE ex10, ex6, ex11, r1 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5713 "11101110" // /* MW 9 */ + 5714 "11000011" // /* MW 8 */ + 5715 "10011010" // /* MW 7 */ + 5716 "00000010" // /* MW 6 */ + 5717 "00010100" // /* MW 5 */ + 5718 "00010001" // /* MW 4 */ + 5719 "01110100" // /* MW 3 */ + 5720 "11001101" // /* MW 2 */ + 5721 "10000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 1162 81 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5722 "11110110" // VLDA.CONV.fp32.bf16 cml4, [p4];VLDB.FILL.512 [p0, lf0, r24];MOVS p4, p6; VSHUFFLE ex6, ex6, ex11, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5723 "11100000" // /* MW 11 */ + 5724 "11000001" // /* MW 10 */ + 5725 "10011010" // /* MW 9 */ + 5726 "00000001" // /* MW 8 */ + 5727 "10001011" // /* MW 7 */ + 5728 "10011000" // /* MW 6 */ + 5729 "00101100" // /* MW 5 */ + 5730 "00101000" // /* MW 4 */ + 5731 "01111000" // /* MW 3 */ + 5732 "11000101" // /* MW 2 */ + 5733 "10000000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 749 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5734 "01001010" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex1, [p1, lf1, r25]; VMAC.f dm0, dm0, ex10, ex7, r9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5735 "11101001" // /* MW 9 */ + 5736 "00010100" // /* MW 8 */ + 5737 "01001000" // /* MW 7 */ + 5738 "00011101" // /* MW 6 */ + 5739 "01010100" // /* MW 5 */ + 5740 "00000000" // /* MW 4 */ + 5741 "01110011" // /* MW 3 */ + 5742 "10000001" // /* MW 2 */ + 5743 "00000010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.src_ref 2 "conv2d_bf16.h" 1286 32 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5744 "01101110" // VLDA.3D.CONV.fp32.bf16 cml3, [p6], d3; MOVS dn3, dn2; MOV dj3, dj5; VMAC.f dm1, dm1, ex6, ex7, r9 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5745 "11101001" // /* MW 13 */ + 5746 "00101100" // /* MW 12 */ + 5747 "01001001" // /* MW 11 */ + 5748 "00000111" // /* MW 10 */ + 5749 "01011000" // /* MW 9 */ + 5750 "01011100" // /* MW 8 */ + 5751 "00000000" // /* MW 7 */ + 5752 "00000000" // /* MW 6 */ + 5753 "10010110" // /* MW 5 */ + 5754 "10010100" // /* MW 4 */ + 5755 "01110110" // /* MW 3 */ + 5756 "00110101" // /* MW 2 */ + 5757 "11001111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 751 26 first +.src_ref 2 "conv2d_bf16.h" 1162 81 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5758 "01101110" // VLDA.CONV.fp32.bf16 cmh3, [p4, #64]; MOVS dc5, dc7; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm2, dm2, ex10, ex8, r9 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5759 "00001001" // /* MW 13 */ + 5760 "01010101" // /* MW 12 */ + 5761 "01001010" // /* MW 11 */ + 5762 "00111110" // /* MW 10 */ + 5763 "10010000" // /* MW 9 */ + 5764 "01001100" // /* MW 8 */ + 5765 "00000000" // /* MW 7 */ + 5766 "00000000" // /* MW 6 */ + 5767 "10010110" // /* MW 5 */ + 5768 "00111000" // /* MW 4 */ + 5769 "01111010" // /* MW 3 */ + 5770 "10111101" // /* MW 2 */ + 5771 "10000010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 1199 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5772 "01101110" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dn2, dc3; VSHUFFLE ex5, ex2, ex4, r0; VADD.f dm0, dm3, dm0, r31 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5773 "00111101" // /* MW 13 */ + 5774 "01100000" // /* MW 12 */ + 5775 "11111000" // /* MW 11 */ + 5776 "00011110" // /* MW 10 */ + 5777 "10010000" // /* MW 9 */ + 5778 "01010100" // /* MW 8 */ + 5779 "00000000" // /* MW 7 */ + 5780 "00000000" // /* MW 6 */ + 5781 "10010110" // /* MW 5 */ + 5782 "10011000" // /* MW 4 */ + 5783 "01110100" // /* MW 3 */ + 5784 "00000001" // /* MW 2 */ + 5785 "01110001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 1200 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5786 "01100010" // VLDA.FILL.512 [p1, lf1, r25]; VADD.f dm1, dm3, dm1, r31 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5787 "00111101" // /* MW 7 */ + 5788 "01100100" // /* MW 6 */ + 5789 "11111001" // /* MW 5 */ + 5790 "00000100" // /* MW 4 */ + 5791 "01110000" // /* MW 3 */ + 5792 "10000001" // /* MW 2 */ + 5793 "00100010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 1201 26 first +.aggressive_scheduled_block_id 6 +.noswbrkpt + 5794 "01100010" // VLDA.POP.576 ex1, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r31 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5795 "00111101" // /* MW 7 */ + 5796 "10001000" // /* MW 6 */ + 5797 "11111010" // /* MW 5 */ + 5798 "00000100" // /* MW 4 */ + 5799 "01110000" // /* MW 3 */ + 5800 "00001001" // /* MW 2 */ + 5801 "10100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5802 "01100010" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; VMAC.f dm3, dm3, ex6, ex8, r9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 5803 "00001001" // /* MW 7 */ + 5804 "01101101" // /* MW 6 */ + 5805 "01001011" // /* MW 5 */ + 5806 "00000100" // /* MW 4 */ + 5807 "01110000" // /* MW 3 */ + 5808 "00000001" // /* MW 2 */ + 5809 "01110001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5810 "00111100" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 5811 "00101000" // /* MW 5 */ + 5812 "00000001" // /* MW 4 */ + 5813 "01110100" // /* MW 3 */ + 5814 "10000001" // /* MW 2 */ + 5815 "00100010" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 978 11 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5816 "00011000" // VLDB.POP.576.3D ex4, [p0, lf0, r24, d0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 5817 "00010100" // /* MW 3 */ + 5818 "00010001" // /* MW 2 */ + 5819 "00111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 1202 26 first +.src_ref 2 "conv2d_bf16.h" 1206 8 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5820 "01100110" // VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24]; ADD.NC lc, r4, #-5; VADD.f dm3, dm4, dm3, r31 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5821 "00111101" // /* MW 11 */ + 5822 "10001100" // /* MW 10 */ + 5823 "11111011" // /* MW 9 */ + 5824 "10000010" // /* MW 8 */ + 5825 "01111101" // /* MW 7 */ + 5826 "01110010" // /* MW 6 */ + 5827 "00101101" // /* MW 5 */ + 5828 "00101000" // /* MW 4 */ + 5829 "01111000" // /* MW 3 */ + 5830 "00001001" // /* MW 2 */ + 5831 "10100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 749 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5832 "01001010" // VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24]; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5833 "00101001" // /* MW 9 */ + 5834 "00000110" // /* MW 8 */ + 5835 "10100000" // /* MW 7 */ + 5836 "00011101" // /* MW 6 */ + 5837 "00010100" // /* MW 5 */ + 5838 "00010100" // /* MW 4 */ + 5839 "01110100" // /* MW 3 */ + 5840 "00000001" // /* MW 2 */ + 5841 "01110001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.src_ref 2 "conv2d_bf16.h" 751 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5842 "01001110" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24]; NOPX; MOV dj5, r21; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5843 "00001001" // /* MW 13 */ + 5844 "01000110" // /* MW 12 */ + 5845 "10100010" // /* MW 11 */ + 5846 "00001111" // /* MW 10 */ + 5847 "10101010" // /* MW 9 */ + 5848 "01011000" // /* MW 8 */ + 5849 "00000000" // /* MW 7 */ + 5850 "00000000" // /* MW 6 */ + 5851 "00101000" // /* MW 5 */ + 5852 "00000001" // /* MW 4 */ + 5853 "01110100" // /* MW 3 */ + 5854 "10000001" // /* MW 2 */ + 5855 "00100010" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5856 "01001011" // NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5857 "01010001" // /* MW 15 */ + 5858 "00001001" // /* MW 14 */ + 5859 "11101101" // /* MW 13 */ + 5860 "00000011" // /* MW 12 */ + 5861 "11001001" // /* MW 11 */ + 5862 "00000000" // /* MW 10 */ + 5863 "00000000" // /* MW 9 */ + 5864 "00000000" // /* MW 8 */ + 5865 "01011011" // /* MW 7 */ + 5866 "00000001" // /* MW 6 */ + 5867 "00101000" // /* MW 5 */ + 5868 "00100010" // /* MW 4 */ + 5869 "11111000" // /* MW 3 */ + 5870 "00101100" // /* MW 2 */ + 5871 "00000000" // /* MW 1 */ +.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1248 +.src_ref 2 "conv2d_bf16.h" 736 8 first +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.begin_of_loop +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt +.loop_nesting 2 + 5872 "01001011" // VLDA.POP.576 ex1, [p1, lf1, r25];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5873 "01010000" // /* MW 15 */ + 5874 "00011011" // /* MW 14 */ + 5875 "11101101" // /* MW 13 */ + 5876 "00000001" // /* MW 12 */ + 5877 "01001001" // /* MW 11 */ + 5878 "00000001" // /* MW 10 */ + 5879 "00000000" // /* MW 9 */ + 5880 "00000000" // /* MW 8 */ + 5881 "01011011" // /* MW 7 */ + 5882 "00000001" // /* MW 6 */ + 5883 "00101000" // /* MW 5 */ + 5884 "00101000" // /* MW 4 */ + 5885 "01111000" // /* MW 3 */ + 5886 "00001001" // /* MW 2 */ + 5887 "10100000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 737 8 first +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 749 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5888 "01001011" // VLDA.POP.576 ex0, [p1, lf1, r25, m4];VLDB.FILL.512 [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5889 "00110001" // /* MW 15 */ + 5890 "00000000" // /* MW 14 */ + 5891 "01111101" // /* MW 13 */ + 5892 "10100101" // /* MW 12 */ + 5893 "00000001" // /* MW 11 */ + 5894 "00000000" // /* MW 10 */ + 5895 "00000000" // /* MW 9 */ + 5896 "00000000" // /* MW 8 */ + 5897 "01011011" // /* MW 7 */ + 5898 "00000001" // /* MW 6 */ + 5899 "00101000" // /* MW 5 */ + 5900 "00101000" // /* MW 4 */ + 5901 "01111000" // /* MW 3 */ + 5902 "00000001" // /* MW 2 */ + 5903 "01110001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 738 8 first +.src_ref 2 "conv2d_bf16.h" 740 30 first +.src_ref 2 "conv2d_bf16.h" 751 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5904 "01001011" // VLDA.FILL.512 [p1, lf1, r25]; VLDB.POP.576 ex2, [p0, lf0, r24];NOPS; NOPX; NOPM; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5905 "00110000" // /* MW 15 */ + 5906 "00010010" // /* MW 14 */ + 5907 "01111101" // /* MW 13 */ + 5908 "10100101" // /* MW 12 */ + 5909 "00000001" // /* MW 11 */ + 5910 "00000000" // /* MW 10 */ + 5911 "00000000" // /* MW 9 */ + 5912 "00000000" // /* MW 8 */ + 5913 "01011011" // /* MW 7 */ + 5914 "00000001" // /* MW 6 */ + 5915 "00101000" // /* MW 5 */ + 5916 "00000001" // /* MW 4 */ + 5917 "01110100" // /* MW 3 */ + 5918 "10000001" // /* MW 2 */ + 5919 "00100010" // /* MW 1 */ +.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1296 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.end_of_loop +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5920 "01001011" // NOPA; VLDB.POP.576.3D ex4, [p0, lf0, r24, d0];NOPS; NOPX; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 5921 "01010001" // /* MW 15 */ + 5922 "00001001" // /* MW 14 */ + 5923 "11101101" // /* MW 13 */ + 5924 "00000011" // /* MW 12 */ + 5925 "11001001" // /* MW 11 */ + 5926 "00000000" // /* MW 10 */ + 5927 "00000000" // /* MW 9 */ + 5928 "00000000" // /* MW 8 */ + 5929 "01011011" // /* MW 7 */ + 5930 "00000001" // /* MW 6 */ + 5931 "00101000" // /* MW 5 */ + 5932 "00100010" // /* MW 4 */ + 5933 "11111000" // /* MW 3 */ + 5934 "00101100" // /* MW 2 */ + 5935 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 742 30 first +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 5936 "01101110" // VLDA.POP.576 ex1, [p1, lf1, r25]; MOVS dn6, dn7; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5937 "00001001" // /* MW 13 */ + 5938 "01101010" // /* MW 12 */ + 5939 "10100011" // /* MW 11 */ + 5940 "00011110" // /* MW 10 */ + 5941 "10010000" // /* MW 9 */ + 5942 "01010100" // /* MW 8 */ + 5943 "00000000" // /* MW 7 */ + 5944 "00000000" // /* MW 6 */ + 5945 "10010110" // /* MW 5 */ + 5946 "10111100" // /* MW 4 */ + 5947 "01111100" // /* MW 3 */ + 5948 "00001001" // /* MW 2 */ + 5949 "10100000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 743 30 first +.src_ref 2 "conv2d_bf16.h" 749 26 first +.src_ref 2 "conv2d_bf16.h" 1286 32 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5950 "01101110" // VLDA.POP.576 ex0, [p1, lf1, r25, m4]; MOVS dc7, dn3; MOV dj7, dj3; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 5951 "00101001" // /* MW 13 */ + 5952 "00000110" // /* MW 12 */ + 5953 "10100000" // /* MW 11 */ + 5954 "00000111" // /* MW 10 */ + 5955 "00111000" // /* MW 9 */ + 5956 "01111100" // /* MW 8 */ + 5957 "00000000" // /* MW 7 */ + 5958 "00000000" // /* MW 6 */ + 5959 "10010110" // /* MW 5 */ + 5960 "00011100" // /* MW 4 */ + 5961 "01111110" // /* MW 3 */ + 5962 "00000001" // /* MW 2 */ + 5963 "01110001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 751 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5964 "01001010" // MOVS dc3, p3; MOV r5, dj2; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5965 "00001001" // /* MW 9 */ + 5966 "01000110" // /* MW 8 */ + 5967 "10100010" // /* MW 7 */ + 5968 "11100100" // /* MW 6 */ + 5969 "00000000" // /* MW 5 */ + 5970 "01010101" // /* MW 4 */ + 5971 "01100001" // /* MW 3 */ + 5972 "10010001" // /* MW 2 */ + 5973 "01100001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5974 "01001010" // MOVS dn3, r22; VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5975 "00101001" // /* MW 9 */ + 5976 "00101010" // /* MW 8 */ + 5977 "10100001" // /* MW 7 */ + 5978 "11000100" // /* MW 6 */ + 5979 "00000111" // /* MW 5 */ + 5980 "10010010" // /* MW 4 */ + 5981 "01100001" // /* MW 3 */ + 5982 "11000001" // /* MW 2 */ + 5983 "01101010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5984 "01001010" // MOVS dn7, r28; VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 5985 "00001001" // /* MW 9 */ + 5986 "01101010" // /* MW 8 */ + 5987 "10100011" // /* MW 7 */ + 5988 "11000100" // /* MW 6 */ + 5989 "00000011" // /* MW 5 */ + 5990 "10010010" // /* MW 4 */ + 5991 "01100010" // /* MW 3 */ + 5992 "10000001" // /* MW 2 */ + 5993 "11101011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 749 26 first +.src_ref 2 "conv2d_bf16.h" 1285 32 first +.src_ref 2 "conv2d_bf16.h" 1286 32 +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 5994 "01100110" // PADDB [p7], m5; MOVS p5, p7; MOV dj2, dj7; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 5995 "00101001" // /* MW 11 */ + 5996 "00000110" // /* MW 10 */ + 5997 "10100000" // /* MW 9 */ + 5998 "11100110" // /* MW 8 */ + 5999 "00000000" // /* MW 7 */ + 6000 "10001111" // /* MW 6 */ + 6001 "00100010" // /* MW 5 */ + 6002 "01010111" // /* MW 4 */ + 6003 "01101111" // /* MW 3 */ + 6004 "10010001" // /* MW 2 */ + 6005 "10110011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 751 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 6006 "01001010" // MOVS p4, p7; MOV m2, m3; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6007 "00001001" // /* MW 9 */ + 6008 "01000110" // /* MW 8 */ + 6009 "10100010" // /* MW 7 */ + 6010 "11100100" // /* MW 6 */ + 6011 "00000000" // /* MW 5 */ + 6012 "00000110" // /* MW 4 */ + 6013 "01100010" // /* MW 3 */ + 6014 "10010001" // /* MW 2 */ + 6015 "10010011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 746 30 first +.src_ref 2 "conv2d_bf16.h" 750 26 first +.aggressive_scheduled_block_id 6 +.nohwbrkpt +.noswbrkpt + 6016 "01100010" // VSHUFFLE ex5, ex2, ex4, r0; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6017 "00101001" // /* MW 7 */ + 6018 "00101010" // /* MW 6 */ + 6019 "10100001" // /* MW 5 */ + 6020 "11000110" // /* MW 4 */ + 6021 "00000011" // /* MW 3 */ + 6022 "10010010" // /* MW 2 */ + 6023 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 745 30 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.aggressive_scheduled_block_id 6 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6024 "01100010" // VSHUFFLE ex3, ex2, ex4, r1; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6025 "00001001" // /* MW 7 */ + 6026 "01101010" // /* MW 6 */ + 6027 "10100011" // /* MW 5 */ + 6028 "11000110" // /* MW 4 */ + 6029 "00000111" // /* MW 3 */ + 6030 "10010010" // /* MW 2 */ + 6031 "00000001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 + 6032 "11111000" // MOV dj7, dj5 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6033 "00000000" // /* MW 3 */ + 6034 "10001011" // /* MW 2 */ + 6035 "00011111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 750 26 first + 6036 "01100010" // MOV m3, r23; VMAC.f dm1, dm1, ex5, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6037 "00101001" // /* MW 7 */ + 6038 "00101010" // /* MW 6 */ + 6039 "10100001" // /* MW 5 */ + 6040 "11100110" // /* MW 4 */ + 6041 "10100000" // /* MW 3 */ + 6042 "00001011" // /* MW 2 */ + 6043 "00000011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 749 26 first + 6044 "01100010" // MOV dj3, r17; VMAC.f dm0, dm0, ex3, ex1, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6045 "00101001" // /* MW 7 */ + 6046 "00000110" // /* MW 6 */ + 6047 "10100000" // /* MW 5 */ + 6048 "11100110" // /* MW 4 */ + 6049 "10100000" // /* MW 3 */ + 6050 "10001000" // /* MW 2 */ + 6051 "00000011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 first +.src_ref 2 "conv2d_bf16.h" 752 26 first +.src_ref 2 "conv2d_bf16.h" 1286 32 + 6052 "01001010" // PADDB.3D [p0], d3; MOV m3, dj2; VMAC.f dm3, dm3, ex5, ex0, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6053 "00001001" // /* MW 9 */ + 6054 "01101010" // /* MW 8 */ + 6055 "10100011" // /* MW 7 */ + 6056 "11100110" // /* MW 6 */ + 6057 "00000000" // /* MW 5 */ + 6058 "00000101" // /* MW 4 */ + 6059 "00100011" // /* MW 3 */ + 6060 "11110111" // /* MW 2 */ + 6061 "00000000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 751 26 first +.src_ref 2 "conv2d_bf16.h" 1286 32 first + 6062 "01100110" // PADDB [p7], m3; MOVS p3, dc3; MOV dj5, r5; VMAC.f dm2, dm2, ex3, ex0, r20 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6063 "00001001" // /* MW 11 */ + 6064 "01000110" // /* MW 10 */ + 6065 "10100010" // /* MW 9 */ + 6066 "11100110" // /* MW 8 */ + 6067 "10100000" // /* MW 7 */ + 6068 "10000010" // /* MW 6 */ + 6069 "00100101" // /* MW 5 */ + 6070 "11010111" // /* MW 4 */ + 6071 "01101110" // /* MW 3 */ + 6072 "10001001" // /* MW 2 */ + 6073 "01110001" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 + 6074 "00000010" // MOVS dc3, dc5; MOV dj7, dj5 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6075 "01110000" // /* MW 7 */ + 6076 "10000000" // /* MW 6 */ + 6077 "11000101" // /* MW 5 */ + 6078 "00000011" // /* MW 4 */ + 6079 "01100000" // /* MW 3 */ + 6080 "10001001" // /* MW 2 */ + 6081 "01100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 1287 37 + 6082 "00000010" // MOVS dc5, r2; MOV m3, m1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6083 "01110000" // /* MW 7 */ + 6084 "00000000" // /* MW 6 */ + 6085 "10000001" // /* MW 5 */ + 6086 "00000001" // /* MW 4 */ + 6087 "01100000" // /* MW 3 */ + 6088 "01000001" // /* MW 2 */ + 6089 "10100000" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first + 6090 "00000010" // VCONV.bf16.fp32 x11, cml1; MOV m1, r29 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6091 "01110000" // /* MW 7 */ + 6092 "01010000" // /* MW 6 */ + 6093 "10000111" // /* MW 5 */ + 6094 "00000000" // /* MW 4 */ + 6095 "11000000" // /* MW 3 */ + 6096 "00010010" // /* MW 2 */ + 6097 "10110010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 + 6098 "00000010" // VCONV.bf16.fp32 x10, cml0; MOV dj5, r30 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6099 "01110000" // /* MW 7 */ + 6100 "10010000" // /* MW 6 */ + 6101 "11000111" // /* MW 5 */ + 6102 "00000010" // /* MW 4 */ + 6103 "11000000" // /* MW 3 */ + 6104 "00000010" // /* MW 2 */ + 6105 "10100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 first +.src_ref 2 "conv2d_bf16.h" 736 8 +.src_ref 2 "conv2d_bf16.h" 1287 37 + 6106 "10111010" // PADDB.3D [p1], d1; MOVS p0, p7; MOV r14, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6107 "01110110" // /* MW 9 */ + 6108 "01100000" // /* MW 8 */ + 6109 "11001000" // /* MW 7 */ + 6110 "00000001" // /* MW 6 */ + 6111 "10010000" // /* MW 5 */ + 6112 "00111011" // /* MW 4 */ + 6113 "01100001" // /* MW 3 */ + 6114 "10010001" // /* MW 2 */ + 6115 "00010011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 2 "conv2d_bf16.h" 1287 37 + 6116 "00000010" // VCONV.bf16.fp32 x6, cmh0; MOV m1, m3 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6117 "01110000" // /* MW 7 */ + 6118 "00000000" // /* MW 6 */ + 6119 "10000011" // /* MW 5 */ + 6120 "00000000" // /* MW 4 */ + 6121 "11000000" // /* MW 3 */ + 6122 "00001010" // /* MW 2 */ + 6123 "01100010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 1218 20 first +.src_ref 2 "conv2d_bf16.h" 1287 37 first + 6124 "00110110" // PADDB [p0], m1; VCONV.bf16.fp32 x5, cml2; JZ r18, #6256 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6256 delay_slots=5 */ + 6125 "01100000" // /* MW 11 */ + 6126 "00000000" // /* MW 10 */ + 6127 "00000000" // /* MW 9 */ + 6128 "00001110" // /* MW 8 */ + 6129 "00000011" // /* MW 7 */ + 6130 "00100100" // /* MW 6 */ + 6131 "00100000" // /* MW 5 */ + 6132 "01010111" // /* MW 4 */ + 6133 "11000000" // /* MW 3 */ + 6134 "00100010" // /* MW 2 */ + 6135 "01010010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 2 "conv2d_bf16.h" 738 8 +.delay_slot + 6136 "00000010" // VCONV.bf16.fp32 x7, cmh1; MOV r5, p1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6137 "01110000" // /* MW 7 */ + 6138 "01100000" // /* MW 6 */ + 6139 "10101001" // /* MW 5 */ + 6140 "00000000" // /* MW 4 */ + 6141 "11000000" // /* MW 3 */ + 6142 "00011010" // /* MW 2 */ + 6143 "01110010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 6144 "00000010" // VCONV.bf16.fp32 x8, cml3; MOV dn7, dc7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6145 "01110000" // /* MW 7 */ + 6146 "11000000" // /* MW 6 */ + 6147 "10100111" // /* MW 5 */ + 6148 "00000011" // /* MW 4 */ + 6149 "11000000" // /* MW 3 */ + 6150 "00110010" // /* MW 2 */ + 6151 "10000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 6152 "10111010" // PADDB [p5], m1; VCONV.bf16.fp32 x1, cmh3; MOV p1, p5 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6153 "01110110" // /* MW 9 */ + 6154 "01100000" // /* MW 8 */ + 6155 "10110101" // /* MW 7 */ + 6156 "00000000" // /* MW 6 */ + 6157 "10010000" // /* MW 5 */ + 6158 "00101011" // /* MW 4 */ + 6159 "11000101" // /* MW 3 */ + 6160 "00111010" // /* MW 2 */ + 6161 "00010010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 2 "conv2d_bf16.h" 1286 32 +.delay_slot + 6162 "00000010" // VCONV.bf16.fp32 x2, cmh2; MOV dj5, dj2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6163 "01110000" // /* MW 7 */ + 6164 "10000000" // /* MW 6 */ + 6165 "11000010" // /* MW 5 */ + 6166 "00000010" // /* MW 4 */ + 6167 "11000000" // /* MW 3 */ + 6168 "00101010" // /* MW 2 */ + 6169 "00100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 1187 40 +.delay_slot + 6170 "00000010" // MOVS dc7, dc3; MOV r2, dc5 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6171 "01110000" // /* MW 7 */ + 6172 "11000000" // /* MW 6 */ + 6173 "01001101" // /* MW 5 */ + 6174 "00000000" // /* MW 4 */ + 6175 "01100000" // /* MW 3 */ + 6176 "10001001" // /* MW 2 */ + 6177 "11100001" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first + 6178 "11111000" // VMAX_LT.bf16 x11, r16, x11, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6179 "11101100" // /* MW 3 */ + 6180 "11011100" // /* MW 2 */ + 6181 "00011101" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 + 6182 "11111000" // VMAX_LT.bf16 x7, r16, x7, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6183 "11101100" // /* MW 3 */ + 6184 "10111100" // /* MW 2 */ + 6185 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.src_ref 4 "max_min.hpp" 20 104 + 6186 "00000010" // VST x11, [p1, dj7]; VMAX_LT.bf16 x10, r16, x10, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6187 "01110000" // /* MW 7 */ + 6188 "01110110" // /* MW 6 */ + 6189 "10101010" // /* MW 5 */ + 6190 "00000010" // /* MW 4 */ + 6191 "01100000" // /* MW 3 */ + 6192 "01011010" // /* MW 2 */ + 6193 "00111100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first + 6194 "00000010" // VST x7, [p5, #64]; VMAX_LT.bf16 x7, r16, x6, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6195 "01110000" // /* MW 7 */ + 6196 "01110110" // /* MW 6 */ + 6197 "11011010" // /* MW 5 */ + 6198 "00000001" // /* MW 4 */ + 6199 "01100000" // /* MW 3 */ + 6200 "10111010" // /* MW 2 */ + 6201 "10100010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first + 6202 "00111010" // VST x10, [p1]; J #6288 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=6288 delay_slots=5 */ + 6203 "00100001" // /* MW 9 */ + 6204 "00000000" // /* MW 8 */ + 6205 "00000000" // /* MW 7 */ + 6206 "00010010" // /* MW 6 */ + 6207 "00000011" // /* MW 5 */ + 6208 "00000000" // /* MW 4 */ + 6209 "01100000" // /* MW 3 */ + 6210 "11010010" // /* MW 2 */ + 6211 "00100000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 6212 "00000010" // VST x7, [p1, #64]; VMAX_LT.bf16 x10, r16, x8, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6213 "01110000" // /* MW 7 */ + 6214 "01110110" // /* MW 6 */ + 6215 "10100010" // /* MW 5 */ + 6216 "00000010" // /* MW 4 */ + 6217 "01100000" // /* MW 3 */ + 6218 "10111010" // /* MW 2 */ + 6219 "00100010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 +.delay_slot + 6220 "11111000" // VMAX_LT.bf16 x7, r16, x1, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6221 "11101100" // /* MW 3 */ + 6222 "10001100" // /* MW 2 */ + 6223 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.src_ref 4 "max_min.hpp" 20 104 +.delay_slot + 6224 "00000010" // VST x10, [p0]; VMAX_LT.bf16 x10, r16, x5, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6225 "01110000" // /* MW 7 */ + 6226 "01110110" // /* MW 6 */ + 6227 "10010110" // /* MW 5 */ + 6228 "00000010" // /* MW 4 */ + 6229 "01100000" // /* MW 3 */ + 6230 "11010010" // /* MW 2 */ + 6231 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 6232 "00000010" // VST x7, [p0, #64]; VMAX_LT.bf16 x2, r16, x2, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6233 "01110000" // /* MW 7 */ + 6234 "01110110" // /* MW 6 */ + 6235 "10001010" // /* MW 5 */ + 6236 "00000000" // /* MW 4 */ + 6237 "01100000" // /* MW 3 */ + 6238 "10111010" // /* MW 2 */ + 6239 "00000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.delay_slot + 6240 "11100001" // NOPA; NOPB; VST x10, [p4, dj5]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6241 "00000000" // /* MW 15 */ + 6242 "00000000" // /* MW 14 */ + 6243 "01111000" // /* MW 13 */ + 6244 "10100101" // /* MW 12 */ + 6245 "00000001" // /* MW 11 */ + 6246 "00000000" // /* MW 10 */ + 6247 "00000000" // /* MW 9 */ + 6248 "00000000" // /* MW 8 */ + 6249 "10010011" // /* MW 7 */ + 6250 "10100010" // /* MW 6 */ + 6251 "00100100" // /* MW 5 */ + 6252 "00000000" // /* MW 4 */ + 6253 "11110000" // /* MW 3 */ + 6254 "00101100" // /* MW 2 */ + 6255 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1632 +.src_ref 4 "vector.hpp" 1152 43 + 6256 "00011000" // VST.CONV.bf16.fp32 cml1, [p1, dj7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6257 "10100011" // /* MW 3 */ + 6258 "11100000" // /* MW 2 */ + 6259 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6260 "00011000" // VST.CONV.bf16.fp32 cmh1, [p5, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6261 "11100011" // /* MW 3 */ + 6262 "00010100" // /* MW 2 */ + 6263 "00001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6264 "00011000" // VST.CONV.bf16.fp32 cml0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6265 "00100011" // /* MW 3 */ + 6266 "00000100" // /* MW 2 */ + 6267 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6268 "00011000" // VST.CONV.bf16.fp32 cmh0, [p1, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6269 "01100011" // /* MW 3 */ + 6270 "00010100" // /* MW 2 */ + 6271 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6272 "00011000" // VST x8, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6273 "00010011" // /* MW 3 */ + 6274 "00000110" // /* MW 2 */ + 6275 "00001000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6276 "00011000" // VST.CONV.bf16.fp32 cmh3, [p0, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6277 "11100011" // /* MW 3 */ + 6278 "00010101" // /* MW 2 */ + 6279 "00001000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6280 "00000010" // VST.CONV.bf16.fp32 cml2, [p4, dj5]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6281 "01110000" // /* MW 7 */ + 6282 "10100101" // /* MW 6 */ + 6283 "00000001" // /* MW 5 */ + 6284 "00000000" // /* MW 4 */ + 6285 "01100000" // /* MW 3 */ + 6286 "00100100" // /* MW 2 */ + 6287 "10010100" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1664 +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 1143 12 first + 6288 "00110110" // PADDB [p7], m5; VST x2, [p7, #64]; JNZD r3, r3, p2; MOV dj2, #0 /* MW 12 */ /* control_operation: words=12 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 6289 "01011000" // /* MW 11 */ + 6290 "00000000" // /* MW 10 */ + 6291 "01000000" // /* MW 9 */ + 6292 "00000001" // /* MW 8 */ + 6293 "00110101" // /* MW 7 */ + 6294 "00000110" // /* MW 6 */ + 6295 "00100000" // /* MW 5 */ + 6296 "01010111" // /* MW 4 */ + 6297 "01101111" // /* MW 3 */ + 6298 "10010010" // /* MW 2 */ + 6299 "11100010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.delay_slot + 6300 "11111000" // MOV dn3, dn2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6301 "10000000" // /* MW 3 */ + 6302 "01000100" // /* MW 2 */ + 6303 "00011011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 +.delay_slot + 6304 "11111000" // MOV dn2, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6305 "10100000" // /* MW 3 */ + 6306 "01001001" // /* MW 2 */ + 6307 "00011010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 100 15 first +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.delay_slot + 6308 "11110100" // PADDB.3D [p7], d2; MOV dj2, dj7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6309 "00000001" // /* MW 5 */ + 6310 "00011110" // /* MW 4 */ + 6311 "00000101" // /* MW 3 */ + 6312 "01110010" // /* MW 2 */ + 6313 "11101011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.delay_slot + 6314 "11111000" // MOV dn2, dn7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6315 "10000000" // /* MW 3 */ + 6316 "01001110" // /* MW 2 */ + 6317 "00011010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6318 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6319 "00000000" // /* MW 1 */ +.loop_nesting 0 + 6320 "10000100" // J #6992 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=6992 delay_slots=5 */ + 6321 "00000000" // /* MW 5 */ + 6322 "00000000" // /* MW 4 */ + 6323 "10101000" // /* MW 3 */ + 6324 "00001101" // /* MW 2 */ + 6325 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6327 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6328 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6329 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6330 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6331 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6333 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6335 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1712 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 792 8 +.src_ref 2 "conv2d_bf16.h" 1364 80 +.src_ref 2 "conv2d_bf16.h" 1364 80 + 6336 "01110110" // LDA r31, [sp, #-40]; MOVS dc2, p3; MOVX r14, #136; MOV p1, r14 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6337 "01111000" // /* MW 11 */ + 6338 "10010000" // /* MW 10 */ + 6339 "10110011" // /* MW 9 */ + 6340 "00001000" // /* MW 8 */ + 6341 "11100001" // /* MW 7 */ + 6342 "00000100" // /* MW 6 */ + 6343 "10001011" // /* MW 5 */ + 6344 "00001100" // /* MW 4 */ + 6345 "00100010" // /* MW 3 */ + 6346 "01111110" // /* MW 2 */ + 6347 "11111011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 1369 80 + 6348 "01110110" // MOVA m4, #60; MOVS dn2, r22; MOVX crRnd, r13; MOV dc6, dn2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6349 "01111000" // /* MW 11 */ + 6350 "01000000" // /* MW 10 */ + 6351 "01100010" // /* MW 9 */ + 6352 "00000011" // /* MW 8 */ + 6353 "11010100" // /* MW 7 */ + 6354 "00011011" // /* MW 6 */ + 6355 "00001011" // /* MW 5 */ + 6356 "01010110" // /* MW 4 */ + 6357 "10000010" // /* MW 3 */ + 6358 "10010000" // /* MW 2 */ + 6359 "00000111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 807 26 +.src_ref 2 "conv2d_bf16.h" 808 26 +.src_ref 2 "conv2d_bf16.h" 809 26 +.src_ref 2 "conv2d_bf16.h" 810 26 +.src_ref 2 "conv2d_bf16.h" 1436 26 +.src_ref 2 "conv2d_bf16.h" 1437 26 +.src_ref 2 "conv2d_bf16.h" 1438 26 +.src_ref 2 "conv2d_bf16.h" 1439 26 + 6360 "10111010" // MOVA r20, #60; MOVX r19, #780; MOV m2, r23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6361 "01111000" // /* MW 9 */ + 6362 "11010000" // /* MW 8 */ + 6363 "00000101" // /* MW 7 */ + 6364 "10001001" // /* MW 6 */ + 6365 "00110001" // /* MW 5 */ + 6366 "00011001" // /* MW 4 */ + 6367 "00000000" // /* MW 3 */ + 6368 "10010100" // /* MW 2 */ + 6369 "00000111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 802 83 +.src_ref 2 "conv2d_bf16.h" 1428 39 + 6370 "01110110" // MOVA m6, #-132; MOVS dn6, r28; MOVX r18, #6; MOV dj5, r30 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6371 "01111000" // /* MW 11 */ + 6372 "10010000" // /* MW 10 */ + 6373 "11000111" // /* MW 9 */ + 6374 "11001010" // /* MW 8 */ + 6375 "00100000" // /* MW 7 */ + 6376 "00000001" // /* MW 6 */ + 6377 "00001011" // /* MW 5 */ + 6378 "01011100" // /* MW 4 */ + 6379 "10000110" // /* MW 3 */ + 6380 "10011000" // /* MW 2 */ + 6381 "11101111" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 792 8 +.src_ref 2 "conv2d_bf16.h" 794 8 + 6382 "01110110" // LDA p0, [sp, #-44]; MOVS dc5, r2; MOVX r25, #0; MOV m1, r29 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6383 "01111000" // /* MW 11 */ + 6384 "01010000" // /* MW 10 */ + 6385 "10000111" // /* MW 9 */ + 6386 "00001000" // /* MW 8 */ + 6387 "10010000" // /* MW 7 */ + 6388 "00000001" // /* MW 6 */ + 6389 "00001011" // /* MW 5 */ + 6390 "00000010" // /* MW 4 */ + 6391 "00100101" // /* MW 3 */ + 6392 "10000011" // /* MW 2 */ + 6393 "11111010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 794 8 +.src_ref 2 "conv2d_bf16.h" 1455 20 + 6394 "10111010" // LDA r21, [sp, #-36]; MOVX r24, #0; MOV dj6, r21 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6395 "01111000" // /* MW 9 */ + 6396 "01010000" // /* MW 8 */ + 6397 "01000101" // /* MW 7 */ + 6398 "00001011" // /* MW 6 */ + 6399 "10000000" // /* MW 5 */ + 6400 "00000001" // /* MW 4 */ + 6401 "00100000" // /* MW 3 */ + 6402 "11010110" // /* MW 2 */ + 6403 "11111011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1337 12 + 6404 "10111010" // LDA r13, [sp, #-32]; MOVXM p2, #6480 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6405 "00010000" // /* MW 9 */ + 6406 "10101000" // /* MW 8 */ + 6407 "00110100" // /* MW 7 */ + 6408 "00000101" // /* MW 6 */ + 6409 "00000000" // /* MW 5 */ + 6410 "00000000" // /* MW 4 */ + 6411 "00100000" // /* MW 3 */ + 6412 "00110110" // /* MW 2 */ + 6413 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 80 first +.src_ref 2 "conv2d_bf16.h" 1873 + 6414 "10010100" // LDA lr, [sp, #-28]; ADD.NC p3, r31, r14 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6415 "01110010" // /* MW 5 */ + 6416 "11011111" // /* MW 4 */ + 6417 "00100110" // /* MW 3 */ + 6418 "10000111" // /* MW 2 */ + 6419 "11111100" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 80 + 6420 "10011000" // LDA dj3, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6421 "11000110" // /* MW 3 */ + 6422 "00011101" // /* MW 2 */ + 6423 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1369 80 first + 6424 "10011000" // LDA m4, [p3], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6425 "00000110" // /* MW 3 */ + 6426 "10001010" // /* MW 2 */ + 6427 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 799 87 first + 6428 "10011000" // LDA m5, [p3], #-28 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6429 "10000110" // /* MW 3 */ + 6430 "10011110" // /* MW 2 */ + 6431 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 801 83 first + 6432 "10011000" // LDA r22, [p3], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6433 "11010110" // /* MW 3 */ + 6434 "00011110" // /* MW 2 */ + 6435 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 802 83 first + 6436 "10011000" // LDA r23, [p3], m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6437 "11110110" // /* MW 3 */ + 6438 "11001010" // /* MW 2 */ + 6439 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1337 66 first + 6440 "10011000" // LDA r29, [p3, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6441 "10110110" // /* MW 3 */ + 6442 "00010111" // /* MW 2 */ + 6443 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1443 71 first + 6444 "10011000" // LDA r28, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6445 "10010110" // /* MW 3 */ + 6446 "00000111" // /* MW 2 */ + 6447 "00000011" // /* MW 1 */ + 6448 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6449 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 1369 89 + 6450 "11111000" // MOV r30, m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6451 "00000000" // /* MW 3 */ + 6452 "10011000" // /* MW 2 */ + 6453 "00011111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 89 +.src_ref 2 "conv2d_bf16.h" 1518 37 + 6454 "11111000" // MOV m6, dj3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6455 "00000000" // /* MW 3 */ + 6456 "00000111" // /* MW 2 */ + 6457 "00011110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 89 + 6458 "11111000" // MOV r31, m6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6459 "00000000" // /* MW 3 */ + 6460 "11011100" // /* MW 2 */ + 6461 "00011111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1364 89 first + 6462 "00011000" // ADD.NC m3, r31, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6463 "11100000" // /* MW 3 */ + 6464 "00001111" // /* MW 2 */ + 6465 "00011011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1369 89 first + 6466 "00100100" // ADD r29, r29, #-1; ADD.NC m7, r30, #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6467 "11000000" // /* MW 5 */ + 6468 "00011110" // /* MW 4 */ + 6469 "11101110" // /* MW 3 */ + 6470 "01111111" // /* MW 2 */ + 6471 "11101111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 + 6472 "00000010" // NOPS; MOV dj7, r30 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6473 "01110000" // /* MW 7 */ + 6474 "10010000" // /* MW 6 */ + 6475 "11000111" // /* MW 5 */ + 6476 "00000011" // /* MW 4 */ + 6477 "01100000" // /* MW 3 */ + 6478 "00101011" // /* MW 2 */ + 6479 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_1856 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 792 8 first +.src_ref 2 "conv2d_bf16.h" 1362 31 first +.src_ref 2 "conv2d_bf16.h" 1429 50 +.src_ref 2 "conv2d_bf16.h" 1443 16 first +.loop_nesting 1 + 6480 "01111110" // VLDA.CONV.fp32.bf16 cml0, [p6], #64;VLDB.FILL.512 [p1, lf1, r25];MOVS p3, r12; MOVXM ls, #6656 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 6481 "01100000" // /* MW 13 */ + 6482 "10000001" // /* MW 12 */ + 6483 "01110001" // /* MW 11 */ + 6484 "00000010" // /* MW 10 */ + 6485 "10100000" // /* MW 9 */ + 6486 "10001111" // /* MW 8 */ + 6487 "00000000" // /* MW 7 */ + 6488 "00000000" // /* MW 6 */ + 6489 "00101000" // /* MW 5 */ + 6490 "00101000" // /* MW 4 */ + 6491 "01111010" // /* MW 3 */ + 6492 "10000101" // /* MW 2 */ + 6493 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 793 8 first +.src_ref 2 "conv2d_bf16.h" 1364 31 first +.src_ref 2 "conv2d_bf16.h" 1443 16 + 6494 "10110110" // VLDA.CONV.fp32.bf16 cmh0, [p6], m3;VLDB.FILL.512 [p1, lf1, r25]; MOVXM le, #6704 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6495 "00010000" // /* MW 11 */ + 6496 "00011000" // /* MW 10 */ + 6497 "10111101" // /* MW 9 */ + 6498 "00000101" // /* MW 8 */ + 6499 "00000000" // /* MW 7 */ + 6500 "00000000" // /* MW 6 */ + 6501 "00101000" // /* MW 5 */ + 6502 "00101000" // /* MW 4 */ + 6503 "01111010" // /* MW 3 */ + 6504 "00001101" // /* MW 2 */ + 6505 "11001101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 794 8 first +.src_ref 2 "conv2d_bf16.h" 795 30 first +.src_ref 2 "conv2d_bf16.h" 1428 39 first +.src_ref 2 "conv2d_bf16.h" 1443 16 first + 6506 "10110110" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex10, [p1, lf1, r25]; LSHL r30, r2, r18; ADD.NC lc, r28, #-3 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6507 "01001000" // /* MW 11 */ + 6508 "00111111" // /* MW 10 */ + 6509 "10111111" // /* MW 9 */ + 6510 "01101110" // /* MW 8 */ + 6511 "11101001" // /* MW 7 */ + 6512 "00000101" // /* MW 6 */ + 6513 "00101000" // /* MW 5 */ + 6514 "00000101" // /* MW 4 */ + 6515 "01110110" // /* MW 3 */ + 6516 "10000001" // /* MW 2 */ + 6517 "00000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 796 30 first +.src_ref 2 "conv2d_bf16.h" 799 30 first +.src_ref 2 "conv2d_bf16.h" 1429 50 + 6518 "10111010" // VLDA.POP.576 ex11, [p0, lf0, r24, m5];VLDB.POP.576 ex4, [p1, lf1, r25]; MOV dj2, r30 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6519 "01111110" // /* MW 9 */ + 6520 "10010000" // /* MW 8 */ + 6521 "01000111" // /* MW 7 */ + 6522 "00000001" // /* MW 6 */ + 6523 "00010100" // /* MW 5 */ + 6524 "00000001" // /* MW 4 */ + 6525 "01110011" // /* MW 3 */ + 6526 "01011001" // /* MW 2 */ + 6527 "01010101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 797 30 first +.src_ref 2 "conv2d_bf16.h" 1367 31 first + 6528 "00111100" // VLDA.CONV.fp32.bf16 cml1, [p6], #64;VLDB.POP.576 ex2, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6529 "00101000" // /* MW 5 */ + 6530 "00000001" // /* MW 4 */ + 6531 "01110110" // /* MW 3 */ + 6532 "10010101" // /* MW 2 */ + 6533 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 1369 31 first + 6534 "00111100" // VLDA.CONV.fp32.bf16 cmh1, [p6], m7;VLDB.POP.576.3D ex3, [p1, lf1, r25, d0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6535 "10101000" // /* MW 5 */ + 6536 "00100001" // /* MW 4 */ + 6537 "01111010" // /* MW 3 */ + 6538 "00011101" // /* MW 2 */ + 6539 "11011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 792 8 first +.src_ref 2 "conv2d_bf16.h" 1372 31 first + 6540 "00111100" // VLDA.CONV.fp32.bf16 cml2, [p6], #64;VLDB.FILL.512 [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6541 "00101000" // /* MW 5 */ + 6542 "00101000" // /* MW 4 */ + 6543 "01111010" // /* MW 3 */ + 6544 "10100101" // /* MW 2 */ + 6545 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 793 8 first +.src_ref 2 "conv2d_bf16.h" 1374 31 first + 6546 "00111100" // VLDA.CONV.fp32.bf16 cmh2, [p6], m3;VLDB.FILL.512 [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6547 "00101000" // /* MW 5 */ + 6548 "00101000" // /* MW 4 */ + 6549 "01111010" // /* MW 3 */ + 6550 "00101101" // /* MW 2 */ + 6551 "11001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 795 30 first +.src_ref 2 "conv2d_bf16.h" 1377 31 first + 6552 "00111100" // VLDA.CONV.fp32.bf16 cml3, [p6], #64;VLDB.POP.576 ex1, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6553 "10101000" // /* MW 5 */ + 6554 "00000000" // /* MW 4 */ + 6555 "01110110" // /* MW 3 */ + 6556 "10110101" // /* MW 2 */ + 6557 "11000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 2 "conv2d_bf16.h" 796 30 first +.src_ref 2 "conv2d_bf16.h" 1379 31 first + 6558 "00111100" // VLDA.CONV.fp32.bf16 cmh3, [p6], m7;VLDB.POP.576 ex6, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6559 "00101000" // /* MW 5 */ + 6560 "00000011" // /* MW 4 */ + 6561 "01110110" // /* MW 3 */ + 6562 "00111101" // /* MW 2 */ + 6563 "11011101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 578 27 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 2 "conv2d_bf16.h" 797 30 first +.src_ref 2 "conv2d_bf16.h" 1429 50 first + 6564 "00111100" // VLDA.CONV.fp32.bf16 cml4, [p3, dj2];VLDB.POP.576 ex7, [p1, lf1, r25] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6565 "10101000" // /* MW 5 */ + 6566 "00000011" // /* MW 4 */ + 6567 "01110110" // /* MW 3 */ + 6568 "01000101" // /* MW 2 */ + 6569 "01101000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 578 27 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 801 30 first +.src_ref 2 "conv2d_bf16.h" 1429 50 + 6570 "10111010" // VLDA.CONV.fp32.bf16 cmh4, [p3, dj2];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VSHUFFLE ex5, ex10, ex4, r22 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6571 "11101110" // /* MW 9 */ + 6572 "00101101" // /* MW 8 */ + 6573 "01101001" // /* MW 7 */ + 6574 "00000001" // /* MW 6 */ + 6575 "00010100" // /* MW 5 */ + 6576 "00010010" // /* MW 4 */ + 6577 "01110101" // /* MW 3 */ + 6578 "01001101" // /* MW 2 */ + 6579 "01101000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 792 8 first +.src_ref 2 "conv2d_bf16.h" 794 8 first +.src_ref 2 "conv2d_bf16.h" 802 30 first + 6580 "10111010" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex10, ex10, ex4, r23 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6581 "11101110" // /* MW 9 */ + 6582 "00101111" // /* MW 8 */ + 6583 "10101001" // /* MW 7 */ + 6584 "00000010" // /* MW 6 */ + 6585 "00010100" // /* MW 5 */ + 6586 "00010100" // /* MW 4 */ + 6587 "01110101" // /* MW 3 */ + 6588 "10000001" // /* MW 2 */ + 6589 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 793 8 first +.src_ref 2 "conv2d_bf16.h" 799 30 first +.src_ref 2 "conv2d_bf16.h" 803 30 first +.src_ref 2 "conv2d_bf16.h" 807 26 first + 6590 "01100110" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex2, ex3, r22; VMAC.f dm0, dm0, ex5, ex11, r9 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6591 "01101001" // /* MW 11 */ + 6592 "00001011" // /* MW 10 */ + 6593 "01001000" // /* MW 9 */ + 6594 "11000010" // /* MW 8 */ + 6595 "11011011" // /* MW 7 */ + 6596 "00010001" // /* MW 6 */ + 6597 "00101010" // /* MW 5 */ + 6598 "00101000" // /* MW 4 */ + 6599 "01111010" // /* MW 3 */ + 6600 "00000001" // /* MW 2 */ + 6601 "01010101" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 794 8 first +.src_ref 2 "conv2d_bf16.h" 804 30 first +.src_ref 2 "conv2d_bf16.h" 808 26 first + 6602 "01001010" // VLDA.FILL.512 [p0, lf0, r24]; VSHUFFLE ex10, ex2, ex3, r23; VMAC.f dm1, dm1, ex10, ex11, r9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6603 "01101001" // /* MW 9 */ + 6604 "00110101" // /* MW 8 */ + 6605 "01001001" // /* MW 7 */ + 6606 "11000010" // /* MW 6 */ + 6607 "11011111" // /* MW 5 */ + 6608 "00010001" // /* MW 4 */ + 6609 "01110101" // /* MW 3 */ + 6610 "10000001" // /* MW 2 */ + 6611 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 809 26 first + 6612 "01001000" // VMAC.f dm2, dm2, ex4, ex11, r9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6613 "01101001" // /* MW 3 */ + 6614 "01001001" // /* MW 2 */ + 6615 "01001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 810 26 first +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id first + 6616 "01001000" // VMAC.f dm3, dm3, ex10, ex11, r9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6617 "01101001" // /* MW 3 */ + 6618 "01110101" // /* MW 2 */ + 6619 "01001011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 795 30 first +.src_ref 2 "conv2d_bf16.h" 802 30 first +.src_ref 2 "conv2d_bf16.h" 1437 26 first +.aggressive_scheduled_block_id 7 +.noswbrkpt + 6620 "01001010" // VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex5, ex1, ex6, r23; VADD.f dm1, dm4, dm1, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6621 "00111101" // /* MW 9 */ + 6622 "10000100" // /* MW 8 */ + 6623 "10100001" // /* MW 7 */ + 6624 "11000110" // /* MW 6 */ + 6625 "01011111" // /* MW 5 */ + 6626 "10001011" // /* MW 4 */ + 6627 "10101010" // /* MW 3 */ + 6628 "00000000" // /* MW 2 */ + 6629 "00000110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 796 30 first +.src_ref 2 "conv2d_bf16.h" 1436 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6630 "01100010" // VLDB.POP.576 ex6, [p1, lf1, r25]; VADD.f dm0, dm4, dm0, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6631 "00111101" // /* MW 7 */ + 6632 "10000000" // /* MW 6 */ + 6633 "10100000" // /* MW 5 */ + 6634 "00000000" // /* MW 4 */ + 6635 "10010100" // /* MW 3 */ + 6636 "00000001" // /* MW 2 */ + 6637 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 797 30 first +.src_ref 2 "conv2d_bf16.h" 1438 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6638 "01100010" // VLDB.POP.576 ex7, [p1, lf1, r25]; VADD.f dm2, dm4, dm2, r20 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6639 "00111101" // /* MW 7 */ + 6640 "10001000" // /* MW 6 */ + 6641 "10100010" // /* MW 5 */ + 6642 "00000000" // /* MW 4 */ + 6643 "11010100" // /* MW 3 */ + 6644 "00000001" // /* MW 2 */ + 6645 "00000011" // /* MW 1 */ +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 799 30 first +.src_ref 2 "conv2d_bf16.h" 1439 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6646 "01001010" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0]; VADD.f dm3, dm4, dm3, r20 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6647 "00111101" // /* MW 9 */ + 6648 "10001100" // /* MW 8 */ + 6649 "10100011" // /* MW 7 */ + 6650 "00011101" // /* MW 6 */ + 6651 "00010100" // /* MW 5 */ + 6652 "00010010" // /* MW 4 */ + 6653 "01110101" // /* MW 3 */ + 6654 "00000001" // /* MW 2 */ + 6655 "01010101" // /* MW 1 */ +.label ZLS_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2032 +.src_ref 2 "conv2d_bf16.h" 792 8 first +.src_ref 2 "conv2d_bf16.h" 801 30 first +.begin_of_loop +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt +.loop_nesting 2 + 6656 "10110100" // VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex2, ex1, ex6, r22 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 6657 "10110111" // /* MW 5 */ + 6658 "00010110" // /* MW 4 */ + 6659 "10000010" // /* MW 3 */ + 6660 "10000010" // /* MW 2 */ + 6661 "10100010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 793 8 first +.src_ref 2 "conv2d_bf16.h" 804 30 first +.src_ref 2 "conv2d_bf16.h" 808 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6662 "01001010" // VLDB.FILL.512 [p1, lf1, r25]; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6663 "00001001" // /* MW 9 */ + 6664 "00101010" // /* MW 8 */ + 6665 "10011001" // /* MW 7 */ + 6666 "11000110" // /* MW 6 */ + 6667 "01011111" // /* MW 5 */ + 6668 "00111100" // /* MW 4 */ + 6669 "00101010" // /* MW 3 */ + 6670 "00101000" // /* MW 2 */ + 6671 "00001010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 795 30 first +.src_ref 2 "conv2d_bf16.h" 803 30 first +.src_ref 2 "conv2d_bf16.h" 807 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6672 "01001010" // VLDB.POP.576 ex1, [p1, lf1, r25]; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6673 "00001001" // /* MW 9 */ + 6674 "00000100" // /* MW 8 */ + 6675 "10011000" // /* MW 7 */ + 6676 "11000110" // /* MW 6 */ + 6677 "01011011" // /* MW 5 */ + 6678 "10111100" // /* MW 4 */ + 6679 "10101001" // /* MW 3 */ + 6680 "00000000" // /* MW 2 */ + 6681 "00000110" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 796 30 first +.src_ref 2 "conv2d_bf16.h" 810 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6682 "01100010" // VLDB.POP.576 ex6, [p1, lf1, r25]; VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6683 "00001001" // /* MW 7 */ + 6684 "01101000" // /* MW 6 */ + 6685 "10011011" // /* MW 5 */ + 6686 "00000000" // /* MW 4 */ + 6687 "10010100" // /* MW 3 */ + 6688 "00000001" // /* MW 2 */ + 6689 "00000011" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 794 8 first +.src_ref 2 "conv2d_bf16.h" 797 30 first +.src_ref 2 "conv2d_bf16.h" 809 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6690 "01101110" // VLDA.FILL.512 [p0, lf0, r24]; VLDB.POP.576 ex7, [p1, lf1, r25];NOPS; NOPX; VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 6691 "00001001" // /* MW 13 */ + 6692 "01000110" // /* MW 12 */ + 6693 "10011010" // /* MW 11 */ + 6694 "01101100" // /* MW 10 */ + 6695 "00000101" // /* MW 9 */ + 6696 "00000000" // /* MW 8 */ + 6697 "00000000" // /* MW 7 */ + 6698 "00000000" // /* MW 6 */ + 6699 "10101000" // /* MW 5 */ + 6700 "00000011" // /* MW 4 */ + 6701 "01110110" // /* MW 3 */ + 6702 "10000001" // /* MW 2 */ + 6703 "00000010" // /* MW 1 */ +.label ZLE_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2080 +.src_ref 3 "kernel_helpers.h" 978 11 first +.src_ref 2 "conv2d_bf16.h" 799 30 first +.src_ref 2 "conv2d_bf16.h" 802 30 first +.end_of_loop +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6704 "11100001" // VLDA.POP.576 ex0, [p0, lf0, r24, m5];VLDB.POP.576.3D ex8, [p1, lf1, r25, d0];NOPS; NOPX; VSHUFFLE ex5, ex1, ex6, r23; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6705 "00000000" // /* MW 15 */ + 6706 "00000000" // /* MW 14 */ + 6707 "11101000" // /* MW 13 */ + 6708 "10101111" // /* MW 12 */ + 6709 "01000101" // /* MW 11 */ + 6710 "00000001" // /* MW 10 */ + 6711 "00000000" // /* MW 9 */ + 6712 "00000000" // /* MW 8 */ + 6713 "01011011" // /* MW 7 */ + 6714 "00000001" // /* MW 6 */ + 6715 "00101000" // /* MW 5 */ + 6716 "00100100" // /* MW 4 */ + 6717 "01111010" // /* MW 3 */ + 6718 "00000001" // /* MW 2 */ + 6719 "01010101" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 first +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 801 30 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 6720 "11110110" // PADDA.3D [p0], d1; PADDB [p7], m6; MOVS p5, p7; VSHUFFLE ex2, ex1, ex6, r22 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6721 "11100000" // /* MW 11 */ + 6722 "10101101" // /* MW 10 */ + 6723 "10000101" // /* MW 9 */ + 6724 "00000000" // /* MW 8 */ + 6725 "10001011" // /* MW 7 */ + 6726 "10011100" // /* MW 6 */ + 6727 "00100101" // /* MW 5 */ + 6728 "10010111" // /* MW 4 */ + 6729 "11111111" // /* MW 3 */ + 6730 "00001100" // /* MW 2 */ + 6731 "00000111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 804 30 first +.src_ref 2 "conv2d_bf16.h" 808 26 first +.src_ref 2 "conv2d_bf16.h" 1517 32 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6732 "01100110" // PADDB [p7], m4; MOVS p4, p7; VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6733 "00001001" // /* MW 11 */ + 6734 "00101010" // /* MW 10 */ + 6735 "10011001" // /* MW 9 */ + 6736 "11000110" // /* MW 8 */ + 6737 "01011111" // /* MW 7 */ + 6738 "00111100" // /* MW 6 */ + 6739 "00100010" // /* MW 5 */ + 6740 "00010111" // /* MW 4 */ + 6741 "01101111" // /* MW 3 */ + 6742 "10010001" // /* MW 2 */ + 6743 "10010011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 803 30 first +.src_ref 2 "conv2d_bf16.h" 807 26 first +.src_ref 2 "conv2d_bf16.h" 1518 37 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6744 "01100110" // PADDB [p7], m6; MOVS p3, p7; VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 6745 "00001001" // /* MW 11 */ + 6746 "00000100" // /* MW 10 */ + 6747 "10011000" // /* MW 9 */ + 6748 "11000110" // /* MW 8 */ + 6749 "01011011" // /* MW 7 */ + 6750 "10111100" // /* MW 6 */ + 6751 "00100001" // /* MW 5 */ + 6752 "10010111" // /* MW 4 */ + 6753 "01101111" // /* MW 3 */ + 6754 "10010001" // /* MW 2 */ + 6755 "01110011" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 +.src_ref 2 "conv2d_bf16.h" 810 26 first +.aggressive_scheduled_block_id 7 +.nohwbrkpt +.noswbrkpt + 6756 "01100010" // MOV dj2, r17; VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6757 "00001001" // /* MW 7 */ + 6758 "01101000" // /* MW 6 */ + 6759 "10011011" // /* MW 5 */ + 6760 "11100110" // /* MW 4 */ + 6761 "10100000" // /* MW 3 */ + 6762 "10001000" // /* MW 2 */ + 6763 "00000010" // /* MW 1 */ +.src_ref 6 "aie_core.h" 143 15 first +.src_ref 2 "conv2d_bf16.h" 809 26 first +.src_ref 2 "conv2d_bf16.h" 1428 39 +.aggressive_scheduled_block_id 7 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 6764 "01001010" // PADDB.3D [p1], d2; MOV r2, dc5; VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6765 "00001001" // /* MW 9 */ + 6766 "01000110" // /* MW 8 */ + 6767 "10011010" // /* MW 7 */ + 6768 "11100110" // /* MW 6 */ + 6769 "10000000" // /* MW 5 */ + 6770 "10011011" // /* MW 4 */ + 6771 "00100000" // /* MW 3 */ + 6772 "10110111" // /* MW 2 */ + 6773 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 801 30 first + 6774 "11011000" // VSHUFFLE ex2, ex1, ex6, r22 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6775 "01011011" // /* MW 3 */ + 6776 "00001011" // /* MW 2 */ + 6777 "00011001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 802 30 first + 6778 "11011000" // VSHUFFLE ex5, ex1, ex6, r23 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6779 "01011111" // /* MW 3 */ + 6780 "10001011" // /* MW 2 */ + 6781 "00011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 803 30 first +.src_ref 2 "conv2d_bf16.h" 807 26 first + 6782 "01100010" // VSHUFFLE ex3, ex7, ex8, r22; VMAC.f dm0, dm0, ex2, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6783 "00001001" // /* MW 7 */ + 6784 "00000100" // /* MW 6 */ + 6785 "10011000" // /* MW 5 */ + 6786 "11000110" // /* MW 4 */ + 6787 "01011011" // /* MW 3 */ + 6788 "10111100" // /* MW 2 */ + 6789 "00000001" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 804 30 first +.src_ref 2 "conv2d_bf16.h" 808 26 first + 6790 "01100010" // VSHUFFLE ex4, ex7, ex8, r23; VMAC.f dm1, dm1, ex5, ex0, r19 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6791 "00001001" // /* MW 7 */ + 6792 "00101010" // /* MW 6 */ + 6793 "10011001" // /* MW 5 */ + 6794 "11000110" // /* MW 4 */ + 6795 "01011111" // /* MW 3 */ + 6796 "00111100" // /* MW 2 */ + 6797 "00000010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 809 26 first + 6798 "01001000" // VMAC.f dm2, dm2, ex3, ex0, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6799 "00001001" // /* MW 3 */ + 6800 "01000110" // /* MW 2 */ + 6801 "10011010" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 810 26 first + 6802 "01001000" // VMAC.f dm3, dm3, ex4, ex0, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6803 "00001001" // /* MW 3 */ + 6804 "01101000" // /* MW 2 */ + 6805 "10011011" // /* MW 1 */ + 6806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6807 "00000000" // /* MW 1 */ + 6808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6809 "00000000" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first + 6810 "00011000" // VCONV.bf16.fp32 x10, cml0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6811 "00010110" // /* MW 3 */ + 6812 "00010000" // /* MW 2 */ + 6813 "00001101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 + 6814 "00011000" // VCONV.bf16.fp32 x11, cml1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6815 "10010110" // /* MW 3 */ + 6816 "10010000" // /* MW 2 */ + 6817 "00001101" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 2 "conv2d_bf16.h" 1455 20 first + 6818 "00111010" // VCONV.bf16.fp32 x1, cmh1; JZ r21, #6928 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=6928 delay_slots=5 */ + 6819 "01100001" // /* MW 9 */ + 6820 "00000000" // /* MW 8 */ + 6821 "00000000" // /* MW 7 */ + 6822 "01100010" // /* MW 6 */ + 6823 "00000011" // /* MW 5 */ + 6824 "00101010" // /* MW 4 */ + 6825 "11000000" // /* MW 3 */ + 6826 "00011010" // /* MW 2 */ + 6827 "00010010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 first +.src_ref 5 "accum.hpp" 1110 102 first +.delay_slot + 6828 "00011000" // VCONV.bf16.fp32 x6, cmh0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6829 "01010110" // /* MW 3 */ + 6830 "00010000" // /* MW 2 */ + 6831 "00001011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 6832 "00011000" // VCONV.bf16.fp32 x2, cml3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6833 "10010110" // /* MW 3 */ + 6834 "00010001" // /* MW 2 */ + 6835 "00001001" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 6836 "00011000" // VCONV.bf16.fp32 x7, cmh3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6837 "11010110" // /* MW 3 */ + 6838 "10010001" // /* MW 2 */ + 6839 "00001011" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 6840 "00011000" // VCONV.bf16.fp32 x5, cml2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6841 "00010110" // /* MW 3 */ + 6842 "10010001" // /* MW 2 */ + 6843 "00001010" // /* MW 1 */ +.src_ref 5 "accum.hpp" 149 115 +.src_ref 5 "accum.hpp" 1110 102 +.delay_slot + 6844 "00011000" // VCONV.bf16.fp32 x8, cmh2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6845 "01010110" // /* MW 3 */ + 6846 "00010001" // /* MW 2 */ + 6847 "00001100" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first + 6848 "11111000" // VMAX_LT.bf16 x11, r16, x11, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6849 "11101100" // /* MW 3 */ + 6850 "11011100" // /* MW 2 */ + 6851 "00011101" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 + 6852 "11111000" // VMAX_LT.bf16 x1, r16, x1, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6853 "11101100" // /* MW 3 */ + 6854 "10001100" // /* MW 2 */ + 6855 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.src_ref 4 "max_min.hpp" 20 104 + 6856 "00000010" // VST x11, [p5, dj3]; VMAX_LT.bf16 x10, r16, x10, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6857 "01110000" // /* MW 7 */ + 6858 "01110110" // /* MW 6 */ + 6859 "10101010" // /* MW 5 */ + 6860 "00000010" // /* MW 4 */ + 6861 "01100000" // /* MW 3 */ + 6862 "01011010" // /* MW 2 */ + 6863 "10101100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first + 6864 "00000010" // VST x1, [p4, #64]; VMAX_LT.bf16 x1, r16, x6, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6865 "01110000" // /* MW 7 */ + 6866 "01110110" // /* MW 6 */ + 6867 "01011010" // /* MW 5 */ + 6868 "00000000" // /* MW 4 */ + 6869 "01100000" // /* MW 3 */ + 6870 "10001010" // /* MW 2 */ + 6871 "10000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first + 6872 "00111010" // VST x10, [p5]; J #6960 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=6960 delay_slots=5 */ + 6873 "00100001" // /* MW 9 */ + 6874 "00000000" // /* MW 8 */ + 6875 "00000000" // /* MW 7 */ + 6876 "01100110" // /* MW 6 */ + 6877 "00000011" // /* MW 5 */ + 6878 "00000000" // /* MW 4 */ + 6879 "01100000" // /* MW 3 */ + 6880 "11010010" // /* MW 2 */ + 6881 "10100000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 6882 "00000010" // VST x1, [p5, #64]; VMAX_LT.bf16 x10, r16, x2, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6883 "01110000" // /* MW 7 */ + 6884 "01110110" // /* MW 6 */ + 6885 "10001010" // /* MW 5 */ + 6886 "00000010" // /* MW 4 */ + 6887 "01100000" // /* MW 3 */ + 6888 "10001010" // /* MW 2 */ + 6889 "10100010" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 +.delay_slot + 6890 "11111000" // VMAX_LT.bf16 x1, r16, x7, x9 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6891 "11101100" // /* MW 3 */ + 6892 "10111100" // /* MW 2 */ + 6893 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.src_ref 4 "max_min.hpp" 20 104 +.delay_slot + 6894 "00000010" // VST x10, [p3, dj3]; VMAX_LT.bf16 x10, r16, x5, x9 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6895 "01110000" // /* MW 7 */ + 6896 "01110110" // /* MW 6 */ + 6897 "10010110" // /* MW 5 */ + 6898 "00000010" // /* MW 4 */ + 6899 "01100000" // /* MW 3 */ + 6900 "01010010" // /* MW 2 */ + 6901 "01101100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 6902 "10111010" // NOPA; VST x1, [p7, #64]; VMAX_LT.bf16 x8, r16, x8, x9 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 6903 "01110010" // /* MW 9 */ + 6904 "01110110" // /* MW 8 */ + 6905 "00100010" // /* MW 7 */ + 6906 "00000010" // /* MW 6 */ + 6907 "01010011" // /* MW 5 */ + 6908 "00010100" // /* MW 4 */ + 6909 "11110111" // /* MW 3 */ + 6910 "00101100" // /* MW 2 */ + 6911 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 first +.delay_slot + 6912 "11100001" // NOPA; NOPB; VST x10, [p4, dj7]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6913 "00000000" // /* MW 15 */ + 6914 "00000000" // /* MW 14 */ + 6915 "01111000" // /* MW 13 */ + 6916 "10100101" // /* MW 12 */ + 6917 "00000001" // /* MW 11 */ + 6918 "00000000" // /* MW 10 */ + 6919 "00000000" // /* MW 9 */ + 6920 "00000000" // /* MW 8 */ + 6921 "10010011" // /* MW 7 */ + 6922 "11100010" // /* MW 6 */ + 6923 "00100100" // /* MW 5 */ + 6924 "00000000" // /* MW 4 */ + 6925 "11110000" // /* MW 3 */ + 6926 "00101100" // /* MW 2 */ + 6927 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2304 +.src_ref 4 "vector.hpp" 1152 43 + 6928 "00011000" // VST.CONV.bf16.fp32 cml1, [p5, dj3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6929 "10100011" // /* MW 3 */ + 6930 "01100000" // /* MW 2 */ + 6931 "00001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6932 "00011000" // VST.CONV.bf16.fp32 cmh1, [p4, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6933 "11100011" // /* MW 3 */ + 6934 "00010100" // /* MW 2 */ + 6935 "00001100" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6936 "00011000" // VST.CONV.bf16.fp32 cml0, [p5] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6937 "00100011" // /* MW 3 */ + 6938 "00000100" // /* MW 2 */ + 6939 "00001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6940 "00011000" // VST.CONV.bf16.fp32 cmh0, [p5, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6941 "01100011" // /* MW 3 */ + 6942 "00010100" // /* MW 2 */ + 6943 "00001101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6944 "00011000" // VST.CONV.bf16.fp32 cml3, [p3, dj3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6945 "10100011" // /* MW 3 */ + 6946 "01100001" // /* MW 2 */ + 6947 "00001011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6948 "00011000" // VST.CONV.bf16.fp32 cmh3, [p7, #64] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6949 "11100011" // /* MW 3 */ + 6950 "00010101" // /* MW 2 */ + 6951 "00001111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1152 43 + 6952 "00000010" // VST.CONV.bf16.fp32 cml2, [p4, dj7]; NOPM /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 6953 "01110000" // /* MW 7 */ + 6954 "10100101" // /* MW 6 */ + 6955 "00000001" // /* MW 5 */ + 6956 "00000000" // /* MW 4 */ + 6957 "01100000" // /* MW 3 */ + 6958 "00100100" // /* MW 2 */ + 6959 "10011100" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2336 +.src_ref 4 "vector.hpp" 1152 43 +.src_ref 2 "conv2d_bf16.h" 1337 12 first + 6960 "01011100" // VST x8, [p3, #64]; JNZD r29, r29, p2 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 indirect absolute delay_slots=5 */ + 6961 "01000000" // /* MW 5 */ + 6962 "11110101" // /* MW 4 */ + 6963 "01101110" // /* MW 3 */ + 6964 "11000010" // /* MW 2 */ + 6965 "01100010" // /* MW 1 */ +.delay_slot + 6966 "00011000" // PADDB [p7], m4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6967 "10010000" // /* MW 3 */ + 6968 "10001011" // /* MW 2 */ + 6969 "00111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6971 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6973 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 6975 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 6976 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 6977 "00000000" // /* MW 15 */ + 6978 "00000000" // /* MW 14 */ + 6979 "01111000" // /* MW 13 */ + 6980 "10100101" // /* MW 12 */ + 6981 "00000001" // /* MW 11 */ + 6982 "00000000" // /* MW 10 */ + 6983 "00000000" // /* MW 9 */ + 6984 "00000000" // /* MW 8 */ + 6985 "01011011" // /* MW 7 */ + 6986 "00000001" // /* MW 6 */ + 6987 "00100000" // /* MW 5 */ + 6988 "00000000" // /* MW 4 */ + 6989 "11110000" // /* MW 3 */ + 6990 "00101100" // /* MW 2 */ + 6991 "00000000" // /* MW 1 */ +.label TGT_F_Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params_2368 +.loop_nesting 0 + 6992 "00011000" // LDA r15, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6993 "11110001" // /* MW 3 */ + 6994 "11101101" // /* MW 2 */ + 6995 "00000111" // /* MW 1 */ + 6996 "00011000" // LDA r12, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 6997 "10010001" // /* MW 3 */ + 6998 "11110001" // /* MW 2 */ + 6999 "00000111" // /* MW 1 */ + 7000 "00011000" // LDA r9, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7001 "00110001" // /* MW 3 */ + 7002 "11110101" // /* MW 2 */ + 7003 "00000111" // /* MW 1 */ + 7004 "00011000" // LDA p6, [sp, #-24] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7005 "00011001" // /* MW 3 */ + 7006 "11101011" // /* MW 2 */ + 7007 "00000111" // /* MW 1 */ + 7008 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7009 "10011001" // /* MW 3 */ + 7010 "11111011" // /* MW 2 */ + 7011 "00000111" // /* MW 1 */ + 7012 "00011000" // LDA r14, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7013 "11010001" // /* MW 3 */ + 7014 "11111101" // /* MW 2 */ + 7015 "00000111" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1873 first + 7016 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 7017 "00000000" // /* MW 3 */ + 7018 "00101000" // /* MW 2 */ + 7019 "00010000" // /* MW 1 */ +.src_ref 2 "conv2d_bf16.h" 1873 +.delay_slot + 7020 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7021 "00000001" // /* MW 5 */ + 7022 "00000000" // /* MW 4 */ + 7023 "00000000" // /* MW 3 */ + 7024 "11110000" // /* MW 2 */ + 7025 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7027 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7029 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7031 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7032 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params__end +.label __Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params___func_end0 + 7033 "00000000" // /* MW 1 */ +.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function conv2d_maxpool _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 7 "superkernels.cpp" 74 first +.src_ref 7 "superkernels.cpp" 79 6 +.src_ref 7 "superkernels.cpp" 81 4 +.function_start + 7040 "10111010" // MOVA r0, #1; MOVXM p4, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7041 "00010000" // /* MW 9 */ + 7042 "00100000" // /* MW 8 */ + 7043 "00110010" // /* MW 7 */ + 7044 "11110010" // /* MW 6 */ + 7045 "00000001" // /* MW 5 */ + 7046 "00000000" // /* MW 4 */ + 7047 "00000000" // /* MW 3 */ + 7048 "00100000" // /* MW 2 */ + 7049 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 79 6 first +.src_ref 7 "superkernels.cpp" 81 4 + 7050 "10111010" // LDA r16, [p4]; MOVX r1, #0; MOV r2, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7051 "01111000" // /* MW 9 */ + 7052 "11010000" // /* MW 8 */ + 7053 "01001011" // /* MW 7 */ + 7054 "00001000" // /* MW 6 */ + 7055 "00010000" // /* MW 5 */ + 7056 "00000000" // /* MW 4 */ + 7057 "11010000" // /* MW 3 */ + 7058 "11000010" // /* MW 2 */ + 7059 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 74 + 7060 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7061 "00000001" // /* MW 5 */ + 7062 "00000000" // /* MW 4 */ + 7063 "00000000" // /* MW 3 */ + 7064 "00001000" // /* MW 2 */ + 7065 "00000000" // /* MW 1 */ + 7066 "10011000" // ST r2, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7067 "01010101" // /* MW 3 */ + 7068 "11110000" // /* MW 2 */ + 7069 "00001111" // /* MW 1 */ + 7070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7071 "00000000" // /* MW 1 */ + 7072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7073 "00000000" // /* MW 1 */ + 7074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7075 "00000000" // /* MW 1 */ + 7076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7077 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 79 6 +.src_ref 7 "superkernels.cpp" 79 16 + 7078 "10000100" // JNZ r16, #7248 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7248 delay_slots=5 */ + 7079 "00000001" // /* MW 5 */ + 7080 "01000000" // /* MW 4 */ + 7081 "00101000" // /* MW 3 */ + 7082 "00001110" // /* MW 2 */ + 7083 "10000000" // /* MW 1 */ +.delay_slot + 7084 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7085 "10011101" // /* MW 3 */ + 7086 "11111011" // /* MW 2 */ + 7087 "00001111" // /* MW 1 */ +.delay_slot + 7088 "10011000" // ST p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7089 "00011101" // /* MW 3 */ + 7090 "11111111" // /* MW 2 */ + 7091 "00001111" // /* MW 1 */ +.delay_slot + 7092 "10011000" // ST p3, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7093 "10011101" // /* MW 3 */ + 7094 "11101101" // /* MW 2 */ + 7095 "00001111" // /* MW 1 */ +.delay_slot + 7096 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7097 "00111101" // /* MW 3 */ + 7098 "11110100" // /* MW 2 */ + 7099 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 +.delay_slot + 7100 "01000100" // MOVXM r15, #509504 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7101 "10000000" // /* MW 5 */ + 7102 "10101100" // /* MW 4 */ + 7103 "11000111" // /* MW 3 */ + 7104 "00000111" // /* MW 2 */ + 7105 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 113 2 +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7106 "00111010" // MOVS p6, p1; MOVXM p7, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7107 "00010001" // /* MW 9 */ + 7108 "00110010" // /* MW 8 */ + 7109 "10110010" // /* MW 7 */ + 7110 "11110011" // /* MW 6 */ + 7111 "00000001" // /* MW 5 */ + 7112 "00000000" // /* MW 4 */ + 7113 "01100000" // /* MW 3 */ + 7114 "10010001" // /* MW 2 */ + 7115 "11010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7116 "01110110" // ST.s8 r16, [p7]; MOVS p1, r15; MOVXM p7, #509024 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7117 "00010000" // /* MW 11 */ + 7118 "00110000" // /* MW 10 */ + 7119 "10110010" // /* MW 9 */ + 7120 "11110011" // /* MW 8 */ + 7121 "00000001" // /* MW 7 */ + 7122 "00000000" // /* MW 6 */ + 7123 "00001011" // /* MW 5 */ + 7124 "10001111" // /* MW 4 */ + 7125 "11100001" // /* MW 3 */ + 7126 "11000000" // /* MW 2 */ + 7127 "11100000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7129 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7131 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 7132 "00000100" // JL #2912 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=2912 delay_slots=5 */ + 7133 "00000001" // /* MW 5 */ + 7134 "00000000" // /* MW 4 */ + 7135 "10110000" // /* MW 3 */ + 7136 "00000101" // /* MW 2 */ + 7137 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7139 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7140 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7141 "00110001" // /* MW 3 */ + 7142 "00100000" // /* MW 2 */ + 7143 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 7144 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7145 "00000101" // /* MW 3 */ + 7146 "00100000" // /* MW 2 */ + 7147 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 113 2 +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 7148 "00000010" // ST r16, [p7]; MOV p7, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7149 "01110000" // /* MW 7 */ + 7150 "01100000" // /* MW 6 */ + 7151 "10110000" // /* MW 5 */ + 7152 "00000011" // /* MW 4 */ + 7153 "00110000" // /* MW 3 */ + 7154 "11000010" // /* MW 2 */ + 7155 "11100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 +.delay_slot + 7156 "11110110" // NOPA; NOPB; NOPS; MOV p0, p2 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7157 "01110000" // /* MW 11 */ + 7158 "01100000" // /* MW 10 */ + 7159 "00110010" // /* MW 9 */ + 7160 "00000000" // /* MW 8 */ + 7161 "01011011" // /* MW 7 */ + 7162 "00000001" // /* MW 6 */ + 7163 "00100000" // /* MW 5 */ + 7164 "00000000" // /* MW 4 */ + 7165 "11110000" // /* MW 3 */ + 7166 "00101100" // /* MW 2 */ + 7167 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 81 4 +.return_address + 7168 "10011000" // ADD.NC p2, r15, #11 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7169 "10000101" // /* MW 3 */ + 7170 "01100111" // /* MW 2 */ + 7171 "00011010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 87 19 +.src_ref 7 "superkernels.cpp" 87 35 first + 7172 "10111010" // LDA.u8 r16, [p2], #7; MOVXM p1, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7173 "00010000" // /* MW 9 */ + 7174 "00100010" // /* MW 8 */ + 7175 "10110010" // /* MW 7 */ + 7176 "11110000" // /* MW 6 */ + 7177 "00000001" // /* MW 5 */ + 7178 "00000000" // /* MW 4 */ + 7179 "01010000" // /* MW 3 */ + 7180 "11000001" // /* MW 2 */ + 7181 "01001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 37 first +.src_ref 7 "superkernels.cpp" 89 13 + 7182 "10111010" // LDA.u16 r19, [p2], #2; MOVXM p0, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7183 "00010000" // /* MW 9 */ + 7184 "00101110" // /* MW 8 */ + 7185 "00110010" // /* MW 7 */ + 7186 "11110000" // /* MW 6 */ + 7187 "00000001" // /* MW 5 */ + 7188 "00000000" // /* MW 4 */ + 7189 "01010000" // /* MW 3 */ + 7190 "11001111" // /* MW 2 */ + 7191 "01000011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 73 + 7192 "10011000" // LDA.u16 r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7193 "00111010" // /* MW 3 */ + 7194 "00000110" // /* MW 2 */ + 7195 "00000010" // /* MW 1 */ + 7196 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7197 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 110 + 7198 "10011000" // LDA.u16 r18, [p2, #2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7199 "01011010" // /* MW 3 */ + 7200 "00010110" // /* MW 2 */ + 7201 "00000010" // /* MW 1 */ + 7202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7203 "00000000" // /* MW 1 */ + 7204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7205 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 87 19 first +.src_ref 7 "superkernels.cpp" 113 2 + 7206 "00000010" // ST r16, [p1]; MOV p1, p6 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 7207 "01110000" // /* MW 7 */ + 7208 "01100000" // /* MW 6 */ + 7209 "10110110" // /* MW 5 */ + 7210 "00000000" // /* MW 4 */ + 7211 "00110000" // /* MW 3 */ + 7212 "11000010" // /* MW 2 */ + 7213 "00100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 57 first + 7214 "10011000" // MUL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7215 "00001111" // /* MW 3 */ + 7216 "11100001" // /* MW 2 */ + 7217 "00010100" // /* MW 1 */ + 7218 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7219 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 88 94 + 7220 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7221 "00001111" // /* MW 3 */ + 7222 "01100001" // /* MW 2 */ + 7223 "00010100" // /* MW 1 */ + 7224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7225 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 89 28 first + 7226 "10011000" // MUL r16, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7227 "00001111" // /* MW 3 */ + 7228 "10100001" // /* MW 2 */ + 7229 "00010100" // /* MW 1 */ + 7230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7231 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 89 13 +.src_ref 7 "superkernels.cpp" 113 2 + 7232 "11100001" // NOPA; NOPB; ST r16, [p0]; NOPX; MOV p0, p7; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 7233 "00000000" // /* MW 15 */ + 7234 "00000000" // /* MW 14 */ + 7235 "01111000" // /* MW 13 */ + 7236 "01100000" // /* MW 12 */ + 7237 "00110111" // /* MW 11 */ + 7238 "00000000" // /* MW 10 */ + 7239 "00000000" // /* MW 9 */ + 7240 "10000000" // /* MW 8 */ + 7241 "00010001" // /* MW 7 */ + 7242 "00000110" // /* MW 6 */ + 7243 "00100000" // /* MW 5 */ + 7244 "00000000" // /* MW 4 */ + 7245 "11110000" // /* MW 3 */ + 7246 "00101100" // /* MW 2 */ + 7247 "00000000" // /* MW 1 */ +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 7 "superkernels.cpp" 106 12 +.src_ref 7 "superkernels.cpp" 113 2 +.src_ref 7 "superkernels.cpp" 117 6 +.src_ref 7 "superkernels.cpp" 136 15 +.src_ref 1 "io_buffer_main.h" 218 49 +.src_ref 1 "io_buffer_main.h" 324 51 + 7248 "10111010" // LDA r15, [sp, #-20]; MOVXM p6, #509000 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7249 "00010000" // /* MW 9 */ + 7250 "00100100" // /* MW 8 */ + 7251 "00110010" // /* MW 7 */ + 7252 "11110011" // /* MW 6 */ + 7253 "00000001" // /* MW 5 */ + 7254 "00000000" // /* MW 4 */ + 7255 "00100000" // /* MW 3 */ + 7256 "10111110" // /* MW 2 */ + 7257 "11111101" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 106 12 first +.src_ref 7 "superkernels.cpp" 108 13 + 7258 "10111010" // LDA r16, [p6]; MOVXM p2, #509004 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7259 "00010000" // /* MW 9 */ + 7260 "00100110" // /* MW 8 */ + 7261 "00110010" // /* MW 7 */ + 7262 "11110001" // /* MW 6 */ + 7263 "00000001" // /* MW 5 */ + 7264 "00000000" // /* MW 4 */ + 7265 "11010000" // /* MW 3 */ + 7266 "11000010" // /* MW 2 */ + 7267 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 107 11 +.src_ref 7 "superkernels.cpp" 108 13 first +.src_ref 7 "superkernels.cpp" 139 6 +.src_ref 7 "superkernels.cpp" 140 14 + 7268 "10111010" // LDA r17, [p2]; MOVXM p7, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7269 "00010000" // /* MW 9 */ + 7270 "00100000" // /* MW 8 */ + 7271 "10110010" // /* MW 7 */ + 7272 "11110011" // /* MW 6 */ + 7273 "00000001" // /* MW 5 */ + 7274 "00000000" // /* MW 4 */ + 7275 "11010000" // /* MW 3 */ + 7276 "11000110" // /* MW 2 */ + 7277 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 107 11 first + 7278 "10011000" // LDA r18, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7279 "01010110" // /* MW 3 */ + 7280 "00000110" // /* MW 2 */ + 7281 "00000111" // /* MW 1 */ + 7282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7283 "00000000" // /* MW 1 */ + 7284 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7285 "00000000" // /* MW 1 */ + 7286 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7287 "00000000" // /* MW 1 */ + 7288 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7289 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 110 6 first +.src_ref 7 "superkernels.cpp" 110 17 first + 7290 "10000100" // JNZ r16, #7376 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7376 delay_slots=5 */ + 7291 "00000001" // /* MW 5 */ + 7292 "01000000" // /* MW 4 */ + 7293 "01101000" // /* MW 3 */ + 7294 "00001110" // /* MW 2 */ + 7295 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 108 13 first +.delay_slot + 7296 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7297 "00000111" // /* MW 3 */ + 7298 "01100010" // /* MW 2 */ + 7299 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 107 11 first +.src_ref 7 "superkernels.cpp" 108 13 +.delay_slot + 7300 "01011100" // ST r17, [p2]; ADD r17, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7301 "00001110" // /* MW 5 */ + 7302 "01000100" // /* MW 4 */ + 7303 "00111001" // /* MW 3 */ + 7304 "11000110" // /* MW 2 */ + 7305 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 106 12 first +.delay_slot + 7306 "00011000" // ADD r19, r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7307 "00000111" // /* MW 3 */ + 7308 "00100110" // /* MW 2 */ + 7309 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 106 12 +.delay_slot + 7310 "10011000" // ST r19, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7311 "01110001" // /* MW 3 */ + 7312 "00000110" // /* MW 2 */ + 7313 "00001110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 107 11 first +.delay_slot + 7314 "10011000" // ST r17, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7315 "00110001" // /* MW 3 */ + 7316 "00000110" // /* MW 2 */ + 7317 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 first + 7318 "00011000" // ADD.NC p2, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7319 "10000110" // /* MW 3 */ + 7320 "01100111" // /* MW 2 */ + 7321 "00011010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 7322 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7323 "01110110" // /* MW 3 */ + 7324 "11111111" // /* MW 2 */ + 7325 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 7326 "10011000" // LDA r16, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7327 "00010110" // /* MW 3 */ + 7328 "11111110" // /* MW 2 */ + 7329 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 7330 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7331 "00110110" // /* MW 3 */ + 7332 "11111110" // /* MW 2 */ + 7333 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 7334 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7335 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 7336 "10011000" // LDA r16, [p2, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7337 "00010110" // /* MW 3 */ + 7338 "01000110" // /* MW 2 */ + 7339 "00000010" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7340 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7341 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7342 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7343 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7344 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7345 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7346 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7347 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 7348 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7349 "00000010" // /* MW 3 */ + 7350 "01100001" // /* MW 2 */ + 7351 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7352 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7353 "00010001" // /* MW 3 */ + 7354 "00000110" // /* MW 2 */ + 7355 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 + 7356 "00011000" // MOVX r17, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7357 "11111101" // /* MW 3 */ + 7358 "11100010" // /* MW 2 */ + 7359 "00010111" // /* MW 1 */ + 7360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7361 "00000000" // /* MW 1 */ + 7362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7363 "00000000" // /* MW 1 */ + 7364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7365 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 7366 "01111010" // NOPA; NOPS; ACQ r16, r17 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7367 "00011000" // /* MW 9 */ + 7368 "00010011" // /* MW 8 */ + 7369 "00000100" // /* MW 7 */ + 7370 "00000000" // /* MW 6 */ + 7371 "01011011" // /* MW 5 */ + 7372 "00000001" // /* MW 4 */ + 7373 "11110000" // /* MW 3 */ + 7374 "00101100" // /* MW 2 */ + 7375 "00000000" // /* MW 1 */ +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_336 +.src_ref 7 "superkernels.cpp" 113 2 first +.no_stack_arguments + 7376 "00000100" // JL #4624 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=4624 delay_slots=5 */ + 7377 "00000001" // /* MW 5 */ + 7378 "00000000" // /* MW 4 */ + 7379 "00001000" // /* MW 3 */ + 7380 "00001001" // /* MW 2 */ + 7381 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 113 2 +.delay_slot + 7382 "01000100" // MOVXM p3, #509504 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7383 "10000000" // /* MW 5 */ + 7384 "11001100" // /* MW 4 */ + 7385 "11000110" // /* MW 3 */ + 7386 "00000111" // /* MW 2 */ + 7387 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7388 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7389 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7390 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7391 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7393 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 113 2 +.delay_slot + 7394 "00101110" // NOPA; NOPS; MOV p2, r15; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 7395 "00011100" // /* MW 13 */ + 7396 "00000000" // /* MW 12 */ + 7397 "00000000" // /* MW 11 */ + 7398 "00000111" // /* MW 10 */ + 7399 "00111101" // /* MW 9 */ + 7400 "01010011" // /* MW 8 */ + 7401 "00000000" // /* MW 7 */ + 7402 "00000000" // /* MW 6 */ + 7403 "10110110" // /* MW 5 */ + 7404 "00000010" // /* MW 4 */ + 7405 "11110000" // /* MW 3 */ + 7406 "00101100" // /* MW 2 */ + 7407 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 117 6 first +.src_ref 7 "superkernels.cpp" 117 20 +.return_address + 7408 "10111010" // LDA r16, [p6]; MOVXM p1, #508996 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7409 "00010000" // /* MW 9 */ + 7410 "00100010" // /* MW 8 */ + 7411 "10110010" // /* MW 7 */ + 7412 "11110000" // /* MW 6 */ + 7413 "00000001" // /* MW 5 */ + 7414 "00000000" // /* MW 4 */ + 7415 "11010000" // /* MW 3 */ + 7416 "11000010" // /* MW 2 */ + 7417 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 117 20 + 7418 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7419 "00110110" // /* MW 3 */ + 7420 "00000110" // /* MW 2 */ + 7421 "00000001" // /* MW 1 */ + 7422 "00011000" // LDA r0, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7423 "00010001" // /* MW 3 */ + 7424 "11110000" // /* MW 2 */ + 7425 "00000111" // /* MW 1 */ + 7426 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7427 "00000000" // /* MW 1 */ + 7428 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7429 "00000000" // /* MW 1 */ + 7430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7431 "00000000" // /* MW 1 */ + 7432 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7433 "00000000" // /* MW 1 */ + 7434 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7435 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 117 17 + 7436 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7437 "00001000" // /* MW 3 */ + 7438 "01100001" // /* MW 2 */ + 7439 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 117 6 + 7440 "10000100" // JNZ r16, #7520 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7520 delay_slots=5 */ + 7441 "00000001" // /* MW 5 */ + 7442 "01000000" // /* MW 4 */ + 7443 "10110000" // /* MW 3 */ + 7444 "00001110" // /* MW 2 */ + 7445 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 136 15 +.src_ref 7 "superkernels.cpp" 140 14 +.delay_slot + 7446 "00011000" // MOVX r24, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7447 "00000001" // /* MW 3 */ + 7448 "00110000" // /* MW 2 */ + 7449 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7450 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7451 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7452 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7453 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7454 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7455 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7456 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7457 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 first +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 + 7458 "00100100" // MOVX r16, #1; ADD.NC p1, r15, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7459 "00010100" // /* MW 5 */ + 7460 "11001111" // /* MW 4 */ + 7461 "10100010" // /* MW 3 */ + 7462 "00000000" // /* MW 2 */ + 7463 "00000100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 + 7464 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7465 "00110110" // /* MW 3 */ + 7466 "00000110" // /* MW 2 */ + 7467 "00000001" // /* MW 1 */ + 7468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7469 "00000000" // /* MW 1 */ + 7470 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7471 "00000000" // /* MW 1 */ + 7472 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7473 "00000000" // /* MW 1 */ + 7474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7475 "00000000" // /* MW 1 */ + 7476 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7477 "00000000" // /* MW 1 */ + 7478 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7479 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 7480 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7481 "00001000" // /* MW 3 */ + 7482 "01010001" // /* MW 2 */ + 7483 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 136 15 first +.src_ref 1 "io_buffer_main.h" 327 40 first + 7484 "00001100" // LDA r17, [p1, #-8]; ST r24, [p6] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7485 "00100011" // /* MW 5 */ + 7486 "00001110" // /* MW 4 */ + 7487 "11011100" // /* MW 3 */ + 7488 "11000110" // /* MW 2 */ + 7489 "00111100" // /* MW 1 */ + 7490 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7491 "00000000" // /* MW 1 */ + 7492 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7493 "00000000" // /* MW 1 */ + 7494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7495 "00000000" // /* MW 1 */ + 7496 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7497 "00000000" // /* MW 1 */ + 7498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7499 "00000000" // /* MW 1 */ + 7500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7501 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 + 7502 "10011000" // SUB r16, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7503 "00010001" // /* MW 3 */ + 7504 "00100001" // /* MW 2 */ + 7505 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 7506 "00101110" // NOPA; ST r16, [p1, #-8]; NOPM; NOPV /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 7507 "00011100" // /* MW 13 */ + 7508 "00000000" // /* MW 12 */ + 7509 "00000000" // /* MW 11 */ + 7510 "01010111" // /* MW 10 */ + 7511 "00011010" // /* MW 9 */ + 7512 "01000000" // /* MW 8 */ + 7513 "00000000" // /* MW 7 */ + 7514 "00000000" // /* MW 6 */ + 7515 "00100011" // /* MW 5 */ + 7516 "11001100" // /* MW 4 */ + 7517 "11110011" // /* MW 3 */ + 7518 "00101100" // /* MW 2 */ + 7519 "00000000" // /* MW 1 */ +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_480 +.src_ref 7 "superkernels.cpp" 139 6 first +.src_ref 7 "superkernels.cpp" 139 19 + 7520 "10111010" // LDA r16, [p7]; MOVXM p6, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7521 "00010000" // /* MW 9 */ + 7522 "00101110" // /* MW 8 */ + 7523 "00110010" // /* MW 7 */ + 7524 "11110011" // /* MW 6 */ + 7525 "00000001" // /* MW 5 */ + 7526 "00000000" // /* MW 4 */ + 7527 "11010000" // /* MW 3 */ + 7528 "11000010" // /* MW 2 */ + 7529 "11100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 139 19 + 7530 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7531 "00110110" // /* MW 3 */ + 7532 "00000110" // /* MW 2 */ + 7533 "00000110" // /* MW 1 */ + 7534 "00011000" // LDA p1, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7535 "10011001" // /* MW 3 */ + 7536 "11111000" // /* MW 2 */ + 7537 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 142 + 7538 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7539 "00111001" // /* MW 3 */ + 7540 "11110100" // /* MW 2 */ + 7541 "00000111" // /* MW 1 */ + 7542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7543 "00000000" // /* MW 1 */ + 7544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7545 "00000000" // /* MW 1 */ + 7546 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7547 "00000000" // /* MW 1 */ + 7548 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7549 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 139 16 + 7550 "10011000" // NE r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7551 "00001000" // /* MW 3 */ + 7552 "01100001" // /* MW 2 */ + 7553 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 139 6 + 7554 "10000100" // JNZ r16, #7584 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=7584 delay_slots=5 */ + 7555 "00000001" // /* MW 5 */ + 7556 "01000000" // /* MW 4 */ + 7557 "11010000" // /* MW 3 */ + 7558 "00001110" // /* MW 2 */ + 7559 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7560 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7561 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7562 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7563 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7565 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7567 "00000000" // /* MW 1 */ +.delay_slot + 7568 "11111000" // MOV r15, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7569 "00100000" // /* MW 3 */ + 7570 "11010000" // /* MW 2 */ + 7571 "00011011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 140 14 first + 7572 "00110110" // NOPA; NOPB; ST r24, [p7]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 7573 "11000001" // /* MW 11 */ + 7574 "10001000" // /* MW 10 */ + 7575 "10000011" // /* MW 9 */ + 7576 "00000011" // /* MW 8 */ + 7577 "00000000" // /* MW 7 */ + 7578 "00000000" // /* MW 6 */ + 7579 "00100000" // /* MW 5 */ + 7580 "00000000" // /* MW 4 */ + 7581 "11110000" // /* MW 3 */ + 7582 "00101100" // /* MW 2 */ + 7583 "00000000" // /* MW 1 */ +.label TGT_F_Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_544 + 7584 "00011000" // LDA p6, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7585 "00011001" // /* MW 3 */ + 7586 "11111111" // /* MW 2 */ + 7587 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 142 first + 7588 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 7589 "00000000" // /* MW 3 */ + 7590 "00101000" // /* MW 2 */ + 7591 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 142 +.delay_slot + 7592 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7593 "00000001" // /* MW 5 */ + 7594 "00000000" // /* MW 4 */ + 7595 "00000000" // /* MW 3 */ + 7596 "11111000" // /* MW 2 */ + 7597 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7598 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7599 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7601 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7603 "00000000" // /* MW 1 */ +.delay_slot + 7604 "00011000" // MOVS p7, p1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7605 "10001011" // /* MW 3 */ + 7606 "10000100" // /* MW 2 */ +.label _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 7607 "00001111" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E +.src_ref 3 "elementwise_binary.h" 141 first +.src_ref 3 "elementwise_binary.h" 142 23 +.src_ref 3 "elementwise_binary.h" 144 4 first +.function_start + 7616 "01100100" // RET lr; MOV r0, #64 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 7617 "00000001" // /* MW 5 */ + 7618 "00100001" // /* MW 4 */ + 7619 "00000000" // /* MW 3 */ + 7620 "00000000" // /* MW 2 */ + 7621 "00000101" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 141 +.delay_slot + 7622 "11111000" // MOV r1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7623 "11000000" // /* MW 3 */ + 7624 "01010000" // /* MW 2 */ + 7625 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 141 +.delay_slot + 7626 "00011000" // ADD.NC p0, r1, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7627 "10010000" // /* MW 3 */ + 7628 "01100000" // /* MW 2 */ + 7629 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 142 23 first +.delay_slot + 7630 "10011000" // ST r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7631 "00010001" // /* MW 3 */ + 7632 "00000100" // /* MW 2 */ + 7633 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 142 23 +.delay_slot + 7634 "10011000" // ST r0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7635 "00010001" // /* MW 3 */ + 7636 "00010100" // /* MW 2 */ + 7637 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_E___func_end0 + 7639 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 3 "elementwise_binary.h" 130 first +.src_ref 3 "elementwise_binary.h" 133 24 first +.function_start + 7648 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7649 "00101110" // /* MW 3 */ + 7650 "00011100" // /* MW 2 */ + 7651 "00000001" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 130 + 7652 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7653 "00000001" // /* MW 5 */ + 7654 "00000000" // /* MW 4 */ + 7655 "00000000" // /* MW 3 */ + 7656 "00001000" // /* MW 2 */ + 7657 "00000000" // /* MW 1 */ + 7658 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7659 "00111101" // /* MW 3 */ + 7660 "11111000" // /* MW 2 */ + 7661 "00001111" // /* MW 1 */ + 7662 "10011000" // ST r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7663 "11110101" // /* MW 3 */ + 7664 "11111101" // /* MW 2 */ + 7665 "00001111" // /* MW 1 */ + 7666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7667 "00000000" // /* MW 1 */ + 7668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7669 "00000000" // /* MW 1 */ + 7670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7671 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 133 22 first + 7672 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7673 "00101001" // /* MW 3 */ + 7674 "00011100" // /* MW 2 */ + 7675 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 134 24 first + 7676 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7677 "00101110" // /* MW 3 */ + 7678 "00011100" // /* MW 2 */ + 7679 "00000001" // /* MW 1 */ + 7680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7681 "00000000" // /* MW 1 */ + 7682 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7683 "00000000" // /* MW 1 */ + 7684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7685 "00000000" // /* MW 1 */ + 7686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7687 "00000000" // /* MW 1 */ + 7688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7689 "00000000" // /* MW 1 */ + 7690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7691 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 134 22 + 7692 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7693 "00101001" // /* MW 3 */ + 7694 "00011100" // /* MW 2 */ + 7695 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 135 24 first + 7696 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7697 "00101110" // /* MW 3 */ + 7698 "00000100" // /* MW 2 */ + 7699 "00000001" // /* MW 1 */ + 7700 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7701 "00000000" // /* MW 1 */ + 7702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7703 "00000000" // /* MW 1 */ + 7704 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7705 "00000000" // /* MW 1 */ + 7706 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7707 "00000000" // /* MW 1 */ + 7708 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7709 "00000000" // /* MW 1 */ + 7710 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7711 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 135 22 + 7712 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7713 "00101001" // /* MW 3 */ + 7714 "00011100" // /* MW 2 */ + 7715 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 136 24 first + 7716 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7717 "00101110" // /* MW 3 */ + 7718 "00010100" // /* MW 2 */ + 7719 "00000001" // /* MW 1 */ + 7720 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7721 "00000000" // /* MW 1 */ + 7722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7723 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 137 8 first +.no_stack_arguments + 7724 "00000100" // JL #7616 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7616 delay_slots=5 */ + 7725 "00000001" // /* MW 5 */ + 7726 "00000000" // /* MW 4 */ + 7727 "11100000" // /* MW 3 */ + 7728 "00001110" // /* MW 2 */ + 7729 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7731 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7732 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7733 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7734 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7735 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 136 22 first +.delay_slot + 7736 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7737 "00101001" // /* MW 3 */ + 7738 "11011100" // /* MW 2 */ + 7739 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 137 8 +.delay_slot + 7740 "11111000" // MOV r15, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7741 "11000000" // /* MW 3 */ + 7742 "11010000" // /* MW 2 */ + 7743 "00011011" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 137 8 +.src_ref 3 "elementwise_binary.h" 139 4 +.src_ref 8 "add_impl.h" 146 29 +.return_address + 7744 "10111010" // LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7745 "00001000" // /* MW 9 */ + 7746 "11000100" // /* MW 8 */ + 7747 "00110011" // /* MW 7 */ + 7748 "01101000" // /* MW 6 */ + 7749 "00000000" // /* MW 5 */ + 7750 "00000001" // /* MW 4 */ + 7751 "00100000" // /* MW 3 */ + 7752 "00000111" // /* MW 2 */ + 7753 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 146 29 +.src_ref 8 "add_impl.h" 147 37 +.src_ref 8 "add_impl.h" 147 39 + 7754 "10111010" // MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7755 "01011000" // /* MW 9 */ + 7756 "11111101" // /* MW 8 */ + 7757 "00000111" // /* MW 7 */ + 7758 "00001000" // /* MW 6 */ + 7759 "10000000" // /* MW 5 */ + 7760 "00000001" // /* MW 4 */ + 7761 "10000000" // /* MW 3 */ + 7762 "11100010" // /* MW 2 */ + 7763 "00000001" // /* MW 1 */ +.src_ref 8 "add_impl.h" 146 29 first +.src_ref 8 "add_impl.h" 147 39 + 7764 "01111010" // LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7765 "00000001" // /* MW 9 */ + 7766 "10100000" // /* MW 8 */ + 7767 "00000111" // /* MW 7 */ + 7768 "10000000" // /* MW 6 */ + 7769 "00010001" // /* MW 5 */ + 7770 "00001010" // /* MW 4 */ + 7771 "00100000" // /* MW 3 */ + 7772 "10111110" // /* MW 2 */ + 7773 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 50 first + 7774 "10011000" // LDA.u8 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7775 "01001010" // /* MW 3 */ + 7776 "00000110" // /* MW 2 */ + 7777 "00000000" // /* MW 1 */ + 7778 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7779 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7781 "00000000" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 37 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7782 "00011000" // ST.s16 r16, [p0, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7783 "00010111" // /* MW 3 */ + 7784 "00000010" // /* MW 2 */ + 7785 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7786 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 7787 "00000000" // /* MW 3 */ + 7788 "00101000" // /* MW 2 */ + 7789 "00010000" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 54 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7790 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7791 "00000101" // /* MW 3 */ + 7792 "00100010" // /* MW 2 */ + 7793 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7794 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7795 "00000001" // /* MW 5 */ + 7796 "00000000" // /* MW 4 */ + 7797 "00000000" // /* MW 3 */ + 7798 "11111000" // /* MW 2 */ + 7799 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 54 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7800 "10011000" // EQ r27, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7801 "00100111" // /* MW 3 */ + 7802 "01110111" // /* MW 2 */ + 7803 "00010100" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 39 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7804 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7805 "10000010" // /* MW 3 */ + 7806 "00100001" // /* MW 2 */ + 7807 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 7809 "00000000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E +.src_ref 3 "elementwise_binary_broadcasting.h" 81 +.src_ref 3 "elementwise_binary_broadcasting.h" 81 first +.src_ref 3 "elementwise_binary_broadcasting.h" 82 25 +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 +.src_ref 3 "elementwise_binary_broadcasting.h" 83 25 +.function_start + 7824 "10111010" // MOVA m0, #20; MOVX r1, #6; MOV r0, p0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 7825 "01111000" // /* MW 9 */ + 7826 "01100000" // /* MW 8 */ + 7827 "00001000" // /* MW 7 */ + 7828 "11001000" // /* MW 6 */ + 7829 "00010000" // /* MW 5 */ + 7830 "00000000" // /* MW 4 */ + 7831 "10000000" // /* MW 3 */ + 7832 "10000000" // /* MW 2 */ + 7833 "00000010" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 81 +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 + 7834 "00100100" // MOVX r0, #1; ADD.NC p0, r0, #12 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7835 "00001100" // /* MW 5 */ + 7836 "11000000" // /* MW 4 */ + 7837 "10100000" // /* MW 3 */ + 7838 "00000000" // /* MW 2 */ + 7839 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first + 7840 "10011000" // LDA.u8 r2, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7841 "01001010" // /* MW 3 */ + 7842 "00001000" // /* MW 2 */ + 7843 "00000000" // /* MW 1 */ + 7844 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7845 "00000000" // /* MW 1 */ + 7846 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7847 "00000000" // /* MW 1 */ + 7848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7849 "00000000" // /* MW 1 */ + 7850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7851 "00000000" // /* MW 1 */ + 7852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7853 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 84 4 first + 7854 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 7855 "00000000" // /* MW 3 */ + 7856 "00101000" // /* MW 2 */ + 7857 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first +.delay_slot + 7858 "10011000" // NE r0, r2, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7859 "00001000" // /* MW 3 */ + 7860 "10000000" // /* MW 2 */ + 7861 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 83 25 first +.delay_slot + 7862 "10011000" // LSHL r0, r0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7863 "00011101" // /* MW 3 */ + 7864 "00000000" // /* MW 2 */ + 7865 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 33 first +.src_ref 3 "elementwise_binary_broadcasting.h" 83 23 +.delay_slot + 7866 "01011100" // ST r0, [p0, #4]; NEZ r3, r2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7867 "11100000" // /* MW 5 */ + 7868 "00001101" // /* MW 4 */ + 7869 "00110001" // /* MW 3 */ + 7870 "10000010" // /* MW 2 */ + 7871 "00000010" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 25 +.delay_slot + 7872 "10011000" // LSHL r2, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7873 "00011101" // /* MW 3 */ + 7874 "11000100" // /* MW 2 */ + 7875 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 82 23 +.delay_slot + 7876 "10011000" // ST r2, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7877 "01010001" // /* MW 3 */ + 7878 "00000100" // /* MW 2 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_E___func_end0 + 7879 "00001000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 3 "elementwise_binary_broadcasting.h" 76 +.src_ref 3 "elementwise_binary_broadcasting.h" 76 first +.function_start + 7888 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7889 "00000001" // /* MW 5 */ + 7890 "00000000" // /* MW 4 */ + 7891 "00000000" // /* MW 3 */ + 7892 "00001000" // /* MW 2 */ + 7893 "00000000" // /* MW 1 */ + 7894 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7895 "00111101" // /* MW 3 */ + 7896 "11111100" // /* MW 2 */ + 7897 "00001111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 77 8 first +.no_stack_arguments + 7898 "00000100" // JL #7648 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7648 delay_slots=5 */ + 7899 "00000001" // /* MW 5 */ + 7900 "00000000" // /* MW 4 */ + 7901 "11110000" // /* MW 3 */ + 7902 "00001110" // /* MW 2 */ + 7903 "00000000" // /* MW 1 */ +.delay_slot + 7904 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7905 "10011101" // /* MW 3 */ + 7906 "11111011" // /* MW 2 */ + 7907 "00001111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 78 8 +.delay_slot + 7908 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7909 "11000000" // /* MW 3 */ + 7910 "01100000" // /* MW 2 */ + 7911 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7913 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7915 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 7916 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7917 "01100111" // /* MW 3 */ + 7918 "00000001" // /* MW 2 */ + 7919 "00000000" // /* MW 1 */ +.return_address +.aggressive_scheduled_block_id 1 +.noswbrkpt + 7920 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7921 "10011001" // /* MW 3 */ + 7922 "11111011" // /* MW 2 */ + 7923 "00000111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 78 8 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7924 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7925 "00111001" // /* MW 3 */ + 7926 "11111100" // /* MW 2 */ + 7927 "00000111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 78 8 first +.tail_call +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 7928 "10000100" // J #7824 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=7824 delay_slots=5 */ + 7929 "00000000" // /* MW 5 */ + 7930 "00000000" // /* MW 4 */ + 7931 "01001000" // /* MW 3 */ + 7932 "00001111" // /* MW 2 */ + 7933 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 78 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 7934 "11111000" // MOV p0, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7935 "11000000" // /* MW 3 */ + 7936 "01101110" // /* MW 2 */ + 7937 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 79 4 first +.delay_slot + 7938 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7939 "00000001" // /* MW 5 */ + 7940 "00000000" // /* MW 4 */ + 7941 "00000000" // /* MW 3 */ + 7942 "11111000" // /* MW 2 */ + 7943 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7945 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7947 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 7949 "00000000" // /* MW 1 */ +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.src_ref 3 "elementwise_binary_broadcasting.h" 89 first +.src_ref 3 "elementwise_binary_broadcasting.h" 96 37 first +.src_ref 3 "elementwise_binary_broadcasting.h" 102 19 +.function_start + 7952 "01010100" // LDA r0, [p3], #12; MOV m0, #20 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7953 "01010001" // /* MW 5 */ + 7954 "00000000" // /* MW 4 */ + 7955 "11010000" // /* MW 3 */ + 7956 "10000010" // /* MW 2 */ + 7957 "01100111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 102 19 first +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 7958 "11010100" // LDA.u8 r1, [p3], m0; MOV p4, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 7959 "10000001" // /* MW 5 */ + 7960 "11001101" // /* MW 4 */ + 7961 "01011000" // /* MW 3 */ + 7962 "00000101" // /* MW 2 */ + 7963 "01100001" // /* MW 1 */ + 7964 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7965 "00000000" // /* MW 1 */ + 7966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7967 "00000000" // /* MW 1 */ + 7968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7969 "00000000" // /* MW 1 */ + 7970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7971 "00000000" // /* MW 1 */ + 7972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7973 "00000000" // /* MW 1 */ + 7974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7975 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 102 12 +.src_ref 3 "elementwise_binary_broadcasting.h" 102 35 + 7976 "10000100" // JNZ r1, #8032 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8032 delay_slots=5 */ + 7977 "00000001" // /* MW 5 */ + 7978 "01000000" // /* MW 4 */ + 7979 "10110000" // /* MW 3 */ + 7980 "00001111" // /* MW 2 */ + 7981 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 96 78 +.delay_slot + 7982 "00011000" // MOVX r2, #-6 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7983 "11101001" // /* MW 3 */ + 7984 "11000100" // /* MW 2 */ + 7985 "00010111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 96 78 first +.delay_slot + 7986 "10011000" // LSHL r0, r0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7987 "00101101" // /* MW 3 */ + 7988 "00000000" // /* MW 2 */ + 7989 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7991 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7993 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 7994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 7995 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 103 28 first + 7996 "10011000" // LDA.s16 r1, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 7997 "00110010" // /* MW 3 */ + 7998 "00000100" // /* MW 2 */ + 7999 "00000000" // /* MW 1 */ + 8000 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8001 "00000000" // /* MW 1 */ + 8002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8003 "00000000" // /* MW 1 */ + 8004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8005 "00000000" // /* MW 1 */ + 8006 "10000100" // J #8064 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8064 delay_slots=5 */ + 8007 "00000000" // /* MW 5 */ + 8008 "00000000" // /* MW 4 */ + 8009 "11000000" // /* MW 3 */ + 8010 "00001111" // /* MW 2 */ + 8011 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8012 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8013 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8014 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8015 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first +.delay_slot + 8016 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8017 "01110010" // /* MW 3 */ + 8018 "00000101" // /* MW 2 */ + 8019 "00011000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8021 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.delay_slot + 8022 "01111010" // NOPA; VST x0, [p0]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8023 "00000000" // /* MW 9 */ + 8024 "00000000" // /* MW 8 */ + 8025 "00000000" // /* MW 7 */ + 8026 "00000000" // /* MW 6 */ + 8027 "00010011" // /* MW 5 */ + 8028 "00000100" // /* MW 4 */ + 8029 "11110000" // /* MW 3 */ + 8030 "00101100" // /* MW 2 */ + 8031 "00000000" // /* MW 1 */ +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_80 +.src_ref 3 "elementwise_binary_broadcasting.h" 106 28 first + 8032 "10011000" // LDA.s16 r1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8033 "00110010" // /* MW 3 */ + 8034 "00000100" // /* MW 2 */ + 8035 "00000001" // /* MW 1 */ + 8036 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8037 "00000000" // /* MW 1 */ + 8038 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8039 "00000000" // /* MW 1 */ + 8040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8041 "00000000" // /* MW 1 */ + 8042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8043 "00000000" // /* MW 1 */ + 8044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8045 "00000000" // /* MW 1 */ + 8046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8047 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first + 8048 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8049 "01110010" // /* MW 3 */ + 8050 "00000101" // /* MW 2 */ + 8051 "00011000" // /* MW 1 */ + 8052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8053 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first + 8054 "01111010" // NOPA; VST x0, [p1]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8055 "00000000" // /* MW 9 */ + 8056 "00000000" // /* MW 8 */ + 8057 "00000000" // /* MW 7 */ + 8058 "00000000" // /* MW 6 */ + 8059 "00010011" // /* MW 5 */ + 8060 "00000100" // /* MW 4 */ + 8061 "11110001" // /* MW 3 */ + 8062 "00101100" // /* MW 2 */ + 8063 "00000000" // /* MW 1 */ +.label TGT_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_112 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 first +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 first + 8064 "10111010" // LDA m0, [p4, #20]; MOVX r0, #60; ADD.NC lc, r0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8065 "01001000" // /* MW 9 */ + 8066 "00111111" // /* MW 8 */ + 8067 "10111000" // /* MW 7 */ + 8068 "10001010" // /* MW 6 */ + 8069 "00000111" // /* MW 5 */ + 8070 "00000000" // /* MW 4 */ + 8071 "11010000" // /* MW 3 */ + 8072 "10000000" // /* MW 2 */ + 8073 "10001010" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 8074 "10111010" // LDA m1, [p3, #4]; MOVXM ls, #8176 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8075 "00010000" // /* MW 9 */ + 8076 "11111000" // /* MW 8 */ + 8077 "01111111" // /* MW 7 */ + 8078 "00000100" // /* MW 6 */ + 8079 "00000000" // /* MW 5 */ + 8080 "00000000" // /* MW 4 */ + 8081 "11010000" // /* MW 3 */ + 8082 "10010000" // /* MW 2 */ + 8083 "01100010" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 8084 "01000100" // MOVXM le, #8208 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8085 "00100000" // /* MW 5 */ + 8086 "11100000" // /* MW 4 */ + 8087 "00100110" // /* MW 3 */ + 8088 "00000000" // /* MW 2 */ + 8089 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 8090 "01000100" // MOVXM p4, #509028 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8091 "11001000" // /* MW 5 */ + 8092 "11001000" // /* MW 4 */ + 8093 "11001000" // /* MW 3 */ + 8094 "00000111" // /* MW 2 */ + 8095 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_broadcasting.h" 117 8 + 8096 "10011000" // LDA.s8 r1, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8097 "00100010" // /* MW 3 */ + 8098 "00000100" // /* MW 2 */ + 8099 "00000100" // /* MW 1 */ + 8100 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8101 "00000000" // /* MW 1 */ + 8102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8103 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 187 20 first + 8104 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8105 "10101011" // /* MW 3 */ + 8106 "00001000" // /* MW 2 */ + 8107 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary.h" 189 20 first + 8108 "10011000" // VLDA.CONV.fp32.bf16 cml2, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8109 "00101011" // /* MW 3 */ + 8110 "00101001" // /* MW 2 */ + 8111 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 211 20 first + 8112 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8113 "00101011" // /* MW 3 */ + 8114 "00001000" // /* MW 2 */ + 8115 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8116 "10011000" // VLDA.CONV.fp32.bf16 cml4, [p1], m1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8117 "00101011" // /* MW 3 */ + 8118 "00101010" // /* MW 2 */ + 8119 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 195 20 +.src_ref 3 "elementwise_binary.h" 218 20 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8120 "00101100" // VLDA.CONV.fp32.bf16 cml1, [p0], m0; MOVX crRnd, r1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8121 "00000000" // /* MW 5 */ + 8122 "11110101" // /* MW 4 */ + 8123 "01110000" // /* MW 3 */ + 8124 "00010101" // /* MW 2 */ + 8125 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8126 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8127 "00111101" // /* MW 7 */ + 8128 "00101000" // /* MW 6 */ + 8129 "00000011" // /* MW 5 */ + 8130 "00000100" // /* MW 4 */ + 8131 "01110000" // /* MW 3 */ + 8132 "00100101" // /* MW 2 */ + 8133 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8134 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8135 "00101011" // /* MW 3 */ + 8136 "00001000" // /* MW 2 */ + 8137 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8138 "01100010" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8139 "00111101" // /* MW 7 */ + 8140 "00010000" // /* MW 6 */ + 8141 "00000100" // /* MW 5 */ + 8142 "00000100" // /* MW 4 */ + 8143 "01110000" // /* MW 3 */ + 8144 "01000101" // /* MW 2 */ + 8145 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 187 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8146 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8147 "10101011" // /* MW 3 */ + 8148 "00001000" // /* MW 2 */ + 8149 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8150 "01100010" // VLDA.CONV.fp32.bf16 cml2, [p1], m1; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8151 "00111101" // /* MW 7 */ + 8152 "00101000" // /* MW 6 */ + 8153 "00000011" // /* MW 5 */ + 8154 "00000100" // /* MW 4 */ + 8155 "01110000" // /* MW 3 */ + 8156 "00100101" // /* MW 2 */ + 8157 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8158 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8159 "00101011" // /* MW 3 */ + 8160 "00001000" // /* MW 2 */ + 8161 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8162 "01101110" // VLDA.CONV.fp32.bf16 cml4, [p1], m1; VST.CONV.bf16.fp32 cml3, [p2], #64; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 8163 "00111101" // /* MW 13 */ + 8164 "00010000" // /* MW 12 */ + 8165 "00000100" // /* MW 11 */ + 8166 "01010111" // /* MW 10 */ + 8167 "00011010" // /* MW 9 */ + 8168 "01000000" // /* MW 8 */ + 8169 "00000000" // /* MW 7 */ + 8170 "00000000" // /* MW 6 */ + 8171 "01000110" // /* MW 5 */ + 8172 "00111011" // /* MW 4 */ + 8173 "01110100" // /* MW 3 */ + 8174 "01000101" // /* MW 2 */ + 8175 "00100101" // /* MW 1 */ +.label ZLS_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_224 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 187 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 8176 "10011000" // VLDA.CONV.fp32.bf16 cml1, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8177 "10101011" // /* MW 3 */ + 8178 "00001000" // /* MW 2 */ + 8179 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8180 "01100110" // VLDA.CONV.fp32.bf16 cml2, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 8181 "00111101" // /* MW 11 */ + 8182 "00101000" // /* MW 10 */ + 8183 "00000011" // /* MW 9 */ + 8184 "10001110" // /* MW 8 */ + 8185 "00010001" // /* MW 7 */ + 8186 "00001111" // /* MW 6 */ + 8187 "00100001" // /* MW 5 */ + 8188 "00000000" // /* MW 4 */ + 8189 "01110000" // /* MW 3 */ + 8190 "00100101" // /* MW 2 */ + 8191 "00100101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8192 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8193 "00000000" // /* MW 15 */ + 8194 "00000000" // /* MW 14 */ + 8195 "01111000" // /* MW 13 */ + 8196 "10100101" // /* MW 12 */ + 8197 "00000001" // /* MW 11 */ + 8198 "00000000" // /* MW 10 */ + 8199 "00000000" // /* MW 9 */ + 8200 "00000000" // /* MW 8 */ + 8201 "01011011" // /* MW 7 */ + 8202 "00000001" // /* MW 6 */ + 8203 "00100000" // /* MW 5 */ + 8204 "00000000" // /* MW 4 */ + 8205 "01110000" // /* MW 3 */ + 8206 "00000101" // /* MW 2 */ + 8207 "00000001" // /* MW 1 */ +.label ZLE_F_ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E_256 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 946 89 +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8208 "11101011" // VLDA.CONV.fp32.bf16 cml4, [p1], m1;NOPB; VST.CONV.bf16.fp32 cml3, [p2], #64;NOPX; NOPM; VADD.f dm4, dm0, dm4, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8209 "10000001" // /* MW 15 */ + 8210 "00100000" // /* MW 14 */ + 8211 "01111000" // /* MW 13 */ + 8212 "10100101" // /* MW 12 */ + 8213 "00000001" // /* MW 11 */ + 8214 "00000000" // /* MW 10 */ + 8215 "00000000" // /* MW 9 */ + 8216 "00000000" // /* MW 8 */ + 8217 "10100011" // /* MW 7 */ + 8218 "00011101" // /* MW 6 */ + 8219 "00100010" // /* MW 5 */ + 8220 "00000000" // /* MW 4 */ + 8221 "01110000" // /* MW 3 */ + 8222 "01000101" // /* MW 2 */ + 8223 "00100101" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 8224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8225 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "add_accum.hpp" 19 92 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8226 "01100010" // VST.CONV.bf16.fp32 cml4, [p2], #64; VADD.f dm3, dm1, dm2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8227 "00111101" // /* MW 7 */ + 8228 "00101000" // /* MW 6 */ + 8229 "00000011" // /* MW 5 */ + 8230 "00000010" // /* MW 4 */ + 8231 "01100000" // /* MW 3 */ + 8232 "11000100" // /* MW 2 */ + 8233 "01000011" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8234 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8235 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "add_accum.hpp" 19 92 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8236 "01100010" // VST.CONV.bf16.fp32 cml3, [p2], #64; VADD.f dm4, dm0, dm4, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8237 "00111101" // /* MW 7 */ + 8238 "00010000" // /* MW 6 */ + 8239 "00000100" // /* MW 5 */ + 8240 "00000010" // /* MW 4 */ + 8241 "01100000" // /* MW 3 */ + 8242 "10110100" // /* MW 2 */ + 8243 "01000011" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8244 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8245 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 195 20 first +.src_ref 3 "elementwise_binary_broadcasting.h" 121 4 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8246 "01011100" // VST.CONV.bf16.fp32 cml4, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 8247 "00000000" // /* MW 5 */ + 8248 "01010000" // /* MW 4 */ + 8249 "01100000" // /* MW 3 */ + 8250 "11000100" // /* MW 2 */ + 8251 "01000011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8253 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.delay_slot + 8254 "00011000" // VST.CONV.bf16.fp32 cml3, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8255 "10100011" // /* MW 3 */ + 8256 "00011101" // /* MW 2 */ + 8257 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8259 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 195 20 first +.delay_slot + 8260 "00011000" // VST.CONV.bf16.fp32 cml4, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8261 "00100011" // /* MW 3 */ + 8262 "00011110" // /* MW 2 */ + 8263 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN31elementwise_binary_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 8265 "00000000" // /* MW 1 */ +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 82 +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 82 first +.function_start + 8272 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8273 "00000001" // /* MW 5 */ + 8274 "00000000" // /* MW 4 */ + 8275 "00000000" // /* MW 3 */ + 8276 "00010000" // /* MW 2 */ + 8277 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 90 24 + 8278 "00000010" // ST lr, [sp, #-4]; MOV r16, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8279 "01110000" // /* MW 7 */ + 8280 "01100000" // /* MW 6 */ + 8281 "00001010" // /* MW 5 */ + 8282 "00000010" // /* MW 4 */ + 8283 "10110000" // /* MW 3 */ + 8284 "10000111" // /* MW 2 */ + 8285 "11111111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 90 24 first +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8 + 8286 "00000010" // MOVS p2, p1; ADD.NC p3, r16, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8287 "00000000" // /* MW 7 */ + 8288 "00000011" // /* MW 6 */ + 8289 "10110100" // /* MW 5 */ + 8290 "00000001" // /* MW 4 */ + 8291 "01100000" // /* MW 3 */ + 8292 "10010001" // /* MW 2 */ + 8293 "01010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 19 first +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35 + 8294 "11010100" // LDA.u8 r27, [p3], #2; MOV r16, p0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8295 "10000001" // /* MW 5 */ + 8296 "00100001" // /* MW 4 */ + 8297 "01011000" // /* MW 3 */ + 8298 "11101101" // /* MW 2 */ + 8299 "01100101" // /* MW 1 */ + 8300 "11010100" // LDA.s16 r18, [p3], #-14; MOV r17, sp /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8301 "11000001" // /* MW 5 */ + 8302 "10101011" // /* MW 4 */ + 8303 "01011000" // /* MW 3 */ + 8304 "11001010" // /* MW 2 */ + 8305 "01110011" // /* MW 1 */ + 8306 "00011000" // ADD.NC p0, r17, #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8307 "11000000" // /* MW 3 */ + 8308 "01101000" // /* MW 2 */ + 8309 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 538 13 first +.src_ref 4 "vector_native_types.hpp" 374 137 first + 8310 "00011000" // VST sfh, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8311 "00101011" // /* MW 3 */ + 8312 "00000111" // /* MW 2 */ + 8313 "00001000" // /* MW 1 */ + 8314 "00011000" // ST.s16 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8315 "01010111" // /* MW 3 */ + 8316 "00000110" // /* MW 2 */ + 8317 "00000000" // /* MW 1 */ + 8318 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8319 "00000000" // /* MW 1 */ + 8320 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8321 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8 first +.no_stack_arguments + 8322 "00000100" // JL #7952 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7952 delay_slots=5 */ + 8323 "00000001" // /* MW 5 */ + 8324 "00000000" // /* MW 4 */ + 8325 "10001000" // /* MW 3 */ + 8326 "00001111" // /* MW 2 */ + 8327 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35 +.delay_slot + 8328 "11111000" // MOV r17, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8329 "11000000" // /* MW 3 */ + 8330 "01010000" // /* MW 2 */ + 8331 "00011100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8333 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35 first +.delay_slot + 8334 "00011000" // SEL.EQZ r18, r16, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8335 "00010010" // /* MW 3 */ + 8336 "00100101" // /* MW 2 */ + 8337 "00010100" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 92 35 +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8 +.delay_slot + 8338 "11100100" // SEL.EQZ r16, r17, r16, r27; MOV p1, r18 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8339 "01000001" // /* MW 5 */ + 8340 "11010010" // /* MW 4 */ + 8341 "01000010" // /* MW 3 */ + 8342 "00100000" // /* MW 2 */ + 8343 "10001100" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 117 8 +.delay_slot + 8344 "00000010" // NOPS; MOV p0, r16 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8345 "01110000" // /* MW 7 */ + 8346 "00010000" // /* MW 6 */ + 8347 "00110100" // /* MW 5 */ + 8348 "00000000" // /* MW 4 */ + 8349 "01100000" // /* MW 3 */ + 8350 "00101011" // /* MW 2 */ + 8351 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4 +.return_address + 8352 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8353 "00111001" // /* MW 3 */ + 8354 "11111100" // /* MW 2 */ + 8355 "00000111" // /* MW 1 */ + 8356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8357 "00000000" // /* MW 1 */ + 8358 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8359 "00000000" // /* MW 1 */ + 8360 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8361 "00000000" // /* MW 1 */ + 8362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8363 "00000000" // /* MW 1 */ + 8364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8365 "00000000" // /* MW 1 */ + 8366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8367 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4 first + 8368 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 8369 "00000000" // /* MW 3 */ + 8370 "00101000" // /* MW 2 */ + 8371 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_attribute_broadcasting.h" 118 4 +.delay_slot + 8372 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8373 "00000001" // /* MW 5 */ + 8374 "00000000" // /* MW 4 */ + 8375 "00000000" // /* MW 3 */ + 8376 "11110000" // /* MW 2 */ + 8377 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8378 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8379 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8380 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8381 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8382 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8383 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8384 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN41elementwise_binary_attribute_broadcastingI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_EE3runEPS0_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 8385 "00000000" // /* MW 1 */ +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_add1d_attribute_broadcasting _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 7 "superkernels.cpp" 147 first +.src_ref 7 "superkernels.cpp" 152 6 +.function_start + 8400 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8401 "10000000" // /* MW 5 */ + 8402 "11001000" // /* MW 4 */ + 8403 "11000110" // /* MW 3 */ + 8404 "00000111" // /* MW 2 */ + 8405 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 152 6 first + 8406 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8407 "11000001" // /* MW 5 */ + 8408 "10110101" // /* MW 4 */ + 8409 "11011000" // /* MW 3 */ + 8410 "11000010" // /* MW 2 */ + 8411 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 147 + 8412 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8413 "00000001" // /* MW 5 */ + 8414 "00000000" // /* MW 4 */ + 8415 "00000000" // /* MW 3 */ + 8416 "00001000" // /* MW 2 */ + 8417 "00000000" // /* MW 1 */ + 8418 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8419 "01110000" // /* MW 7 */ + 8420 "11010000" // /* MW 6 */ + 8421 "00001011" // /* MW 5 */ + 8422 "00000000" // /* MW 4 */ + 8423 "10110000" // /* MW 3 */ + 8424 "01100011" // /* MW 2 */ + 8425 "11111111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 149 11 + 8426 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8427 "00010001" // /* MW 9 */ + 8428 "00101000" // /* MW 8 */ + 8429 "00110010" // /* MW 7 */ + 8430 "11110011" // /* MW 6 */ + 8431 "00000001" // /* MW 5 */ + 8432 "00000000" // /* MW 4 */ + 8433 "10110000" // /* MW 3 */ + 8434 "10000010" // /* MW 2 */ + 8435 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 8436 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8437 "11000000" // /* MW 3 */ + 8438 "11010100" // /* MW 2 */ + 8439 "00011011" // /* MW 1 */ + 8440 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8441 "00000000" // /* MW 1 */ + 8442 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8443 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 152 6 +.src_ref 7 "superkernels.cpp" 152 16 + 8444 "10000100" // JNZ r16, #8608 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8608 delay_slots=5 */ + 8445 "00000001" // /* MW 5 */ + 8446 "01000000" // /* MW 4 */ + 8447 "11010000" // /* MW 3 */ + 8448 "00010000" // /* MW 2 */ + 8449 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 149 22 first +.delay_slot + 8450 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8451 "10010000" // /* MW 3 */ + 8452 "01100010" // /* MW 2 */ + 8453 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 149 30 +.delay_slot + 8454 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8455 "11111011" // /* MW 3 */ + 8456 "01100011" // /* MW 2 */ + 8457 "00010100" // /* MW 1 */ +.delay_slot + 8458 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8459 "00111101" // /* MW 3 */ + 8460 "11110100" // /* MW 2 */ + 8461 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 149 11 +.src_ref 1 "io_buffer_main.h" 125 25 +.delay_slot + 8462 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8463 "01110000" // /* MW 7 */ + 8464 "01100000" // /* MW 6 */ + 8465 "00110000" // /* MW 5 */ + 8466 "00000011" // /* MW 4 */ + 8467 "00110000" // /* MW 3 */ + 8468 "11000110" // /* MW 2 */ + 8469 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 155 4 +.src_ref 7 "superkernels.cpp" 166 2 +.delay_slot + 8470 "01000100" // MOVXM p0, #509184 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8471 "00000000" // /* MW 5 */ + 8472 "11001010" // /* MW 4 */ + 8473 "11000000" // /* MW 3 */ + 8474 "00000111" // /* MW 2 */ + 8475 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 8476 "01000100" // MOVXM p2, #509028 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8477 "11001000" // /* MW 5 */ + 8478 "11001000" // /* MW 4 */ + 8479 "11000100" // /* MW 3 */ + 8480 "00000111" // /* MW 2 */ + 8481 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 8482 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8483 "00010000" // /* MW 9 */ + 8484 "00110000" // /* MW 8 */ + 8485 "00110010" // /* MW 7 */ + 8486 "11110001" // /* MW 6 */ + 8487 "00000001" // /* MW 5 */ + 8488 "00000000" // /* MW 4 */ + 8489 "11100000" // /* MW 3 */ + 8490 "11000000" // /* MW 2 */ + 8491 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8492 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8493 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 155 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 8494 "00000100" // JL #7888 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=7888 delay_slots=5 */ + 8495 "00000001" // /* MW 5 */ + 8496 "00000000" // /* MW 4 */ + 8497 "01101000" // /* MW 3 */ + 8498 "00001111" // /* MW 2 */ + 8499 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8501 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 8502 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8503 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8504 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8505 "00110001" // /* MW 3 */ + 8506 "00100000" // /* MW 2 */ + 8507 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 8508 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8509 "00000101" // /* MW 3 */ + 8510 "00100000" // /* MW 2 */ + 8511 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 8512 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8513 "00000000" // /* MW 15 */ + 8514 "00000000" // /* MW 14 */ + 8515 "01111000" // /* MW 13 */ + 8516 "10100101" // /* MW 12 */ + 8517 "00000001" // /* MW 11 */ + 8518 "00000000" // /* MW 10 */ + 8519 "00000000" // /* MW 9 */ + 8520 "10000000" // /* MW 8 */ + 8521 "00010001" // /* MW 7 */ + 8522 "00000110" // /* MW 6 */ + 8523 "00100010" // /* MW 5 */ + 8524 "00000000" // /* MW 4 */ + 8525 "11110000" // /* MW 3 */ + 8526 "00101100" // /* MW 2 */ + 8527 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 159 18 +.return_address + 8528 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8529 "10100000" // /* MW 5 */ + 8530 "11001000" // /* MW 4 */ + 8531 "11000100" // /* MW 3 */ + 8532 "00000111" // /* MW 2 */ + 8533 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 159 18 first +.src_ref 7 "superkernels.cpp" 159 65 + 8534 "10111010" // LDA r16, [p2]; MOVXM p2, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8535 "00010000" // /* MW 9 */ + 8536 "10000000" // /* MW 8 */ + 8537 "00110010" // /* MW 7 */ + 8538 "11110001" // /* MW 6 */ + 8539 "00000001" // /* MW 5 */ + 8540 "00000000" // /* MW 4 */ + 8541 "11010000" // /* MW 3 */ + 8542 "11000010" // /* MW 2 */ + 8543 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 157 51 +.src_ref 7 "superkernels.cpp" 159 65 +.src_ref 7 "superkernels.cpp" 166 2 + 8544 "10111010" // LDA r17, [p2]; MOVXM p2, #509184 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8545 "00010000" // /* MW 9 */ + 8546 "10000000" // /* MW 8 */ + 8547 "00110010" // /* MW 7 */ + 8548 "11110001" // /* MW 6 */ + 8549 "00000001" // /* MW 5 */ + 8550 "00000000" // /* MW 4 */ + 8551 "11010000" // /* MW 3 */ + 8552 "11000110" // /* MW 2 */ + 8553 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 157 51 first +.src_ref 7 "superkernels.cpp" 159 16 +.src_ref 7 "superkernels.cpp" 164 47 + 8554 "10111010" // LDA.u16 r18, [p2, #10]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8555 "00010000" // /* MW 9 */ + 8556 "00101010" // /* MW 8 */ + 8557 "10110010" // /* MW 7 */ + 8558 "11110000" // /* MW 6 */ + 8559 "00000001" // /* MW 5 */ + 8560 "00000000" // /* MW 4 */ + 8561 "01010000" // /* MW 3 */ + 8562 "11001011" // /* MW 2 */ + 8563 "01001010" // /* MW 1 */ + 8564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8565 "00000000" // /* MW 1 */ + 8566 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8567 "00000000" // /* MW 1 */ + 8568 "10000100" // J #8624 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=8624 delay_slots=5 */ + 8569 "00000000" // /* MW 5 */ + 8570 "00000000" // /* MW 4 */ + 8571 "11011000" // /* MW 3 */ + 8572 "00010000" // /* MW 2 */ + 8573 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 157 13 +.delay_slot + 8574 "01000100" // MOVXM p0, #509020 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8575 "10111000" // /* MW 5 */ + 8576 "11001000" // /* MW 4 */ + 8577 "11000000" // /* MW 3 */ + 8578 "00000111" // /* MW 2 */ + 8579 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8580 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8581 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 159 27 first +.delay_slot + 8582 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8583 "00001111" // /* MW 3 */ + 8584 "01100001" // /* MW 2 */ + 8585 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 157 13 first +.delay_slot + 8586 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8587 "10100011" // /* MW 5 */ + 8588 "00001100" // /* MW 4 */ + 8589 "11110000" // /* MW 3 */ + 8590 "00101100" // /* MW 2 */ + 8591 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 159 16 first +.delay_slot + 8592 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8593 "00000000" // /* MW 15 */ + 8594 "00000000" // /* MW 14 */ + 8595 "01111000" // /* MW 13 */ + 8596 "10100101" // /* MW 12 */ + 8597 "00000001" // /* MW 11 */ + 8598 "00000000" // /* MW 10 */ + 8599 "00000000" // /* MW 9 */ + 8600 "10000000" // /* MW 8 */ + 8601 "00010001" // /* MW 7 */ + 8602 "00000110" // /* MW 6 */ + 8603 "00100001" // /* MW 5 */ + 8604 "00000000" // /* MW 4 */ + 8605 "11110000" // /* MW 3 */ + 8606 "00101100" // /* MW 2 */ + 8607 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 7 "superkernels.cpp" 164 47 +.src_ref 7 "superkernels.cpp" 166 2 + 8608 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8609 "00000000" // /* MW 15 */ + 8610 "00000000" // /* MW 14 */ + 8611 "00010000" // /* MW 13 */ + 8612 "00101010" // /* MW 12 */ + 8613 "10110010" // /* MW 11 */ + 8614 "11110000" // /* MW 10 */ + 8615 "00000001" // /* MW 9 */ + 8616 "00000000" // /* MW 8 */ + 8617 "10001011" // /* MW 7 */ + 8618 "10000000" // /* MW 6 */ + 8619 "00100010" // /* MW 5 */ + 8620 "00000000" // /* MW 4 */ + 8621 "11110000" // /* MW 3 */ + 8622 "00101100" // /* MW 2 */ + 8623 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 218 49 first + 8624 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 8625 "00000000" // /* MW 7 */ + 8626 "11000011" // /* MW 6 */ + 8627 "10110011" // /* MW 5 */ + 8628 "00000011" // /* MW 4 */ + 8629 "01100000" // /* MW 3 */ + 8630 "10010001" // /* MW 2 */ + 8631 "01110011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 163 2 +.src_ref 1 "io_buffer_main.h" 218 49 + 8632 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8633 "00010000" // /* MW 9 */ + 8634 "00100000" // /* MW 8 */ + 8635 "00110010" // /* MW 7 */ + 8636 "11110000" // /* MW 6 */ + 8637 "00000001" // /* MW 5 */ + 8638 "00000000" // /* MW 4 */ + 8639 "11010000" // /* MW 3 */ + 8640 "11101110" // /* MW 2 */ + 8641 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 8642 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8643 "00010110" // /* MW 3 */ + 8644 "11111110" // /* MW 2 */ + 8645 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 8646 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8647 "00110110" // /* MW 3 */ + 8648 "11111110" // /* MW 2 */ + 8649 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 8650 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8651 "01010110" // /* MW 3 */ + 8652 "01000110" // /* MW 2 */ + 8653 "00000111" // /* MW 1 */ + 8654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8655 "00000000" // /* MW 1 */ + 8656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8657 "00000000" // /* MW 1 */ + 8658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8659 "00000000" // /* MW 1 */ + 8660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8661 "00000000" // /* MW 1 */ + 8662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8663 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 8664 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8665 "00000010" // /* MW 3 */ + 8666 "01100001" // /* MW 2 */ + 8667 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 8668 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8669 "00010001" // /* MW 3 */ + 8670 "00000110" // /* MW 2 */ + 8671 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 + 8672 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8673 "11111101" // /* MW 3 */ + 8674 "11100000" // /* MW 2 */ + 8675 "00010111" // /* MW 1 */ + 8676 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8677 "00000000" // /* MW 1 */ + 8678 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8679 "00000000" // /* MW 1 */ + 8680 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8681 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 8682 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8683 "00001000" // /* MW 3 */ + 8684 "10010011" // /* MW 2 */ + 8685 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 164 45 + 8686 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8687 "10000001" // /* MW 5 */ + 8688 "10101101" // /* MW 4 */ + 8689 "10100111" // /* MW 3 */ + 8690 "00000000" // /* MW 2 */ + 8691 "00000100" // /* MW 1 */ + 8692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8693 "00000000" // /* MW 1 */ + 8694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8695 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 163 2 first + 8696 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8697 "00110110" // /* MW 3 */ + 8698 "00000110" // /* MW 2 */ + 8699 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 1 "io_buffer_main.h" 324 51 + 8700 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8701 "10000001" // /* MW 5 */ + 8702 "11011101" // /* MW 4 */ + 8703 "11011100" // /* MW 3 */ + 8704 "11001010" // /* MW 2 */ + 8705 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 164 47 first + 8706 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8707 "01110110" // /* MW 3 */ + 8708 "00000110" // /* MW 2 */ + 8709 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 8710 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8711 "10011110" // /* MW 3 */ + 8712 "01011100" // /* MW 2 */ + 8713 "00000111" // /* MW 1 */ + 8714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8715 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 166 2 first +.no_stack_arguments + 8716 "00000100" // JL #8272 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8272 delay_slots=5 */ + 8717 "00000001" // /* MW 5 */ + 8718 "00000000" // /* MW 4 */ + 8719 "00101000" // /* MW 3 */ + 8720 "00010000" // /* MW 2 */ + 8721 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8723 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 163 2 first +.delay_slot + 8724 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8725 "00000111" // /* MW 3 */ + 8726 "01100010" // /* MW 2 */ + 8727 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 163 2 +.delay_slot + 8728 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8729 "00110001" // /* MW 3 */ + 8730 "00000110" // /* MW 2 */ + 8731 "00001000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 164 45 first +.delay_slot + 8732 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8733 "00001101" // /* MW 3 */ + 8734 "11100001" // /* MW 2 */ + 8735 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 164 45 +.delay_slot + 8736 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 8737 "00000000" // /* MW 15 */ + 8738 "00000000" // /* MW 14 */ + 8739 "10101000" // /* MW 13 */ + 8740 "10100000" // /* MW 12 */ + 8741 "00110100" // /* MW 11 */ + 8742 "00000000" // /* MW 10 */ + 8743 "00000000" // /* MW 9 */ + 8744 "00000000" // /* MW 8 */ + 8745 "01011011" // /* MW 7 */ + 8746 "00000001" // /* MW 6 */ + 8747 "00100000" // /* MW 5 */ + 8748 "00000000" // /* MW 4 */ + 8749 "11110000" // /* MW 3 */ + 8750 "00101100" // /* MW 2 */ + 8751 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 6 +.src_ref 7 "superkernels.cpp" 169 14 +.src_ref 1 "io_buffer_main.h" 324 51 first +.return_address + 8752 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8753 "00010000" // /* MW 9 */ + 8754 "00100000" // /* MW 8 */ + 8755 "00110010" // /* MW 7 */ + 8756 "11110011" // /* MW 6 */ + 8757 "00000001" // /* MW 5 */ + 8758 "00000000" // /* MW 4 */ + 8759 "11010000" // /* MW 3 */ + 8760 "11000110" // /* MW 2 */ + 8761 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 + 8762 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8763 "00000101" // /* MW 3 */ + 8764 "00100000" // /* MW 2 */ + 8765 "00010000" // /* MW 1 */ + 8766 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8767 "00000000" // /* MW 1 */ + 8768 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8769 "00000000" // /* MW 1 */ + 8770 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8771 "00000000" // /* MW 1 */ + 8772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8773 "00000000" // /* MW 1 */ + 8774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8775 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 8776 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8777 "00001000" // /* MW 3 */ + 8778 "01010001" // /* MW 2 */ + 8779 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 19 +.src_ref 1 "io_buffer_main.h" 327 40 first + 8780 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8781 "00010000" // /* MW 9 */ + 8782 "00101110" // /* MW 8 */ + 8783 "00110010" // /* MW 7 */ + 8784 "11110001" // /* MW 6 */ + 8785 "00000001" // /* MW 5 */ + 8786 "00000000" // /* MW 4 */ + 8787 "11010000" // /* MW 3 */ + 8788 "11001110" // /* MW 2 */ + 8789 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 6 first + 8790 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8791 "00110110" // /* MW 3 */ + 8792 "00000110" // /* MW 2 */ + 8793 "00000110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 19 + 8794 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8795 "01010110" // /* MW 3 */ + 8796 "00000110" // /* MW 2 */ + 8797 "00000010" // /* MW 1 */ + 8798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8799 "00000000" // /* MW 1 */ + 8800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8801 "00000000" // /* MW 1 */ + 8802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8803 "00000000" // /* MW 1 */ + 8804 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8805 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 8806 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8807 "00110001" // /* MW 3 */ + 8808 "00100001" // /* MW 2 */ + 8809 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 8810 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8811 "00010001" // /* MW 3 */ + 8812 "11100110" // /* MW 2 */ + 8813 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 16 first + 8814 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8815 "00101000" // /* MW 3 */ + 8816 "01100001" // /* MW 2 */ + 8817 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 168 6 + 8818 "10000100" // JNZ r16, #8848 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=8848 delay_slots=5 */ + 8819 "00000001" // /* MW 5 */ + 8820 "01000000" // /* MW 4 */ + 8821 "01001000" // /* MW 3 */ + 8822 "00010001" // /* MW 2 */ + 8823 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8824 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8825 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8826 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8827 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8829 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8831 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8833 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 169 14 + 8834 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8835 "00000001" // /* MW 3 */ + 8836 "00100000" // /* MW 2 */ + 8837 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 169 14 first + 8838 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 8839 "00000000" // /* MW 9 */ + 8840 "00000000" // /* MW 8 */ + 8841 "00000000" // /* MW 7 */ + 8842 "10000000" // /* MW 6 */ + 8843 "00010001" // /* MW 5 */ + 8844 "00000110" // /* MW 4 */ + 8845 "11110110" // /* MW 3 */ + 8846 "00101100" // /* MW 2 */ + 8847 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 7 "superkernels.cpp" 171 + 8848 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8849 "00111001" // /* MW 3 */ + 8850 "11110100" // /* MW 2 */ + 8851 "00000111" // /* MW 1 */ + 8852 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8853 "00011001" // /* MW 3 */ + 8854 "11111011" // /* MW 2 */ + 8855 "00000111" // /* MW 1 */ + 8856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8857 "00000000" // /* MW 1 */ + 8858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8859 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 8860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8861 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 8862 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8863 "11110001" // /* MW 3 */ + 8864 "11111101" // /* MW 2 */ + 8865 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 8866 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8867 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 171 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 8868 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 8869 "00000000" // /* MW 3 */ + 8870 "00101000" // /* MW 2 */ + 8871 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 8872 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8873 "10100000" // /* MW 3 */ + 8874 "01100111" // /* MW 2 */ + 8875 "00011111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 171 +.delay_slot + 8876 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 8877 "00000001" // /* MW 5 */ + 8878 "00000000" // /* MW 4 */ + 8879 "00000000" // /* MW 3 */ + 8880 "11111000" // /* MW 2 */ + 8881 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8883 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8885 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z40superkernel_add1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 8887 "00000000" // /* MW 1 */ +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.function setup _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv +.src_ref 3 "elementwise_unary.h" 124 first +.src_ref 3 "elementwise_unary.h" 126 24 first +.function_start + 8896 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8897 "00101110" // /* MW 3 */ + 8898 "00011100" // /* MW 2 */ + 8899 "00000001" // /* MW 1 */ + 8900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8901 "00000000" // /* MW 1 */ + 8902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8903 "00000000" // /* MW 1 */ + 8904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8905 "00000000" // /* MW 1 */ + 8906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8907 "00000000" // /* MW 1 */ + 8908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8909 "00000000" // /* MW 1 */ + 8910 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8911 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 126 22 first + 8912 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8913 "00101001" // /* MW 3 */ + 8914 "00011100" // /* MW 2 */ + 8915 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 127 24 first + 8916 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8917 "00101110" // /* MW 3 */ + 8918 "00011100" // /* MW 2 */ + 8919 "00000001" // /* MW 1 */ + 8920 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8921 "00000000" // /* MW 1 */ + 8922 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8923 "00000000" // /* MW 1 */ + 8924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8925 "00000000" // /* MW 1 */ + 8926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8927 "00000000" // /* MW 1 */ + 8928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8929 "00000000" // /* MW 1 */ + 8930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8931 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 127 22 + 8932 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8933 "00101001" // /* MW 3 */ + 8934 "00011100" // /* MW 2 */ + 8935 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 128 24 first + 8936 "10011000" // LDA el0, [p1], #24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8937 "00101110" // /* MW 3 */ + 8938 "01101100" // /* MW 2 */ + 8939 "00000001" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 113 33 first + 8940 "10011000" // LDA.s16 r0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8941 "00010010" // /* MW 3 */ + 8942 "00000100" // /* MW 2 */ + 8943 "00000001" // /* MW 1 */ + 8944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8945 "00000000" // /* MW 1 */ + 8946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8947 "00000000" // /* MW 1 */ + 8948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8949 "00000000" // /* MW 1 */ + 8950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8951 "00000000" // /* MW 1 */ + 8952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8953 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 128 22 first + 8954 "10011000" // ST el0, [p0], #24 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8955 "00101001" // /* MW 3 */ + 8956 "01101100" // /* MW 2 */ + 8957 "00001000" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 113 33 first + 8958 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8959 "00010111" // /* MW 3 */ + 8960 "00000100" // /* MW 2 */ + 8961 "00000000" // /* MW 1 */ + 8962 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8963 "00000000" // /* MW 1 */ + 8964 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8965 "00000000" // /* MW 1 */ + 8966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8967 "00000000" // /* MW 1 */ + 8968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8969 "00000000" // /* MW 1 */ + 8970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8971 "00000000" // /* MW 1 */ + 8972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8973 "00000000" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 114 33 first + 8974 "10011000" // LDA.s16 r0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8975 "00010010" // /* MW 3 */ + 8976 "00100100" // /* MW 2 */ + 8977 "00000001" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 114 33 + 8978 "00011000" // ST.s16 r0, [p0, #2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 8979 "00010111" // /* MW 3 */ + 8980 "00010100" // /* MW 2 */ + 8981 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 130 4 first + 8982 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 8983 "00000000" // /* MW 3 */ + 8984 "00101000" // /* MW 2 */ + 8985 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8987 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8989 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8991 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 8993 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 8994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv__end +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE5setupER26elementwise_unary_params_tIS4_EPKv___func_end0 + 8995 "00000000" // /* MW 1 */ +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_begin0 +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.function run _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_unary.h" 136 first +.src_ref 3 "elementwise_unary.h" 142 37 +.src_ref 3 "elementwise_unary.h" 154 8 first +.src_ref 3 "elementwise_unary.h" 171 19 +.function_start + 9008 "10110110" // MOVA dj0, #-34; VLDB x4, [p0], #64; MOVXM ls, #9136 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9009 "00010000" // /* MW 11 */ + 9010 "11011000" // /* MW 10 */ + 9011 "01111001" // /* MW 9 */ + 9012 "00001000" // /* MW 8 */ + 9013 "00000000" // /* MW 7 */ + 9014 "00000000" // /* MW 6 */ + 9015 "01101000" // /* MW 5 */ + 9016 "00111010" // /* MW 4 */ + 9017 "10000000" // /* MW 3 */ + 9018 "11000010" // /* MW 2 */ + 9019 "11111011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_unary.h" 142 78 +.src_ref 3 "elementwise_unary.h" 154 8 first +.src_ref 3 "elementwise_unary.h" 190 19 first + 9020 "10110110" // MOVA r17, #-6; VLDB x2, [p0], #64; MOVXM le, #9184 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 9021 "00010000" // /* MW 11 */ + 9022 "11110000" // /* MW 10 */ + 9023 "10111001" // /* MW 9 */ + 9024 "00001001" // /* MW 8 */ + 9025 "00000000" // /* MW 7 */ + 9026 "00000000" // /* MW 6 */ + 9027 "01101000" // /* MW 5 */ + 9028 "00111001" // /* MW 4 */ + 9029 "00000000" // /* MW 3 */ + 9030 "01010001" // /* MW 2 */ + 9031 "11111111" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 136 + 9032 "11111000" // MOV r0, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9033 "11000000" // /* MW 3 */ + 9034 "00010100" // /* MW 2 */ + 9035 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 136 first + 9036 "00011000" // ADD.NC p2, r0, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9037 "00010000" // /* MW 3 */ + 9038 "01100000" // /* MW 2 */ + 9039 "00011010" // /* MW 1 */ +.src_ref 8 "clip_impl.h" 103 16 first + 9040 "10011000" // LDA.s16 r2, [p2], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9041 "01010010" // /* MW 3 */ + 9042 "00011100" // /* MW 2 */ + 9043 "00000010" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 142 37 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9044 "10011000" // LDA r0, [p2, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9045 "00010110" // /* MW 3 */ + 9046 "00000000" // /* MW 2 */ + 9047 "00000010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_unary.h" 171 19 first +.src_ref 8 "clip_impl.h" 104 16 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9048 "00111100" // LDA.s16 r1, [p2]; VLDB x4, [p0], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9049 "01101000" // /* MW 5 */ + 9050 "00111010" // /* MW 4 */ + 9051 "01010000" // /* MW 3 */ + 9052 "10000110" // /* MW 2 */ + 9053 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9055 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9057 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9059 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_unary.h" 190 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9060 "00011000" // VLDB x2, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9061 "10110100" // /* MW 3 */ + 9062 "00011100" // /* MW 2 */ + 9063 "00111000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9064 "11111000" // VBCST.16 x0, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9065 "01110010" // /* MW 3 */ + 9066 "00001001" // /* MW 2 */ + 9067 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 3 "elementwise_unary.h" 142 78 first +.src_ref 3 "elementwise_unary.h" 171 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9068 "00111010" // VLDB x4, [p0], #64; LSHL r17, r0, r17; VMAX_LT.bf16 x5, r16, x4, x0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9069 "01111000" // /* MW 9 */ + 9070 "00110110" // /* MW 8 */ + 9071 "01010000" // /* MW 7 */ + 9072 "11101101" // /* MW 6 */ + 9073 "00011000" // /* MW 5 */ + 9074 "00000001" // /* MW 4 */ + 9075 "01101000" // /* MW 3 */ + 9076 "00111010" // /* MW 2 */ + 9077 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_unary.h" 154 8 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9078 "10011000" // ADD.NC lc, r17, #-3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9079 "11111110" // /* MW 3 */ + 9080 "01111000" // /* MW 2 */ + 9081 "00011101" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9082 "11111000" // VBCST.16 x1, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9083 "01110010" // /* MW 3 */ + 9084 "10000101" // /* MW 2 */ + 9085 "00011000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9086 "11111000" // VMIN_GE.bf16 x3, r16, x5, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9087 "10101100" // /* MW 3 */ + 9088 "10101000" // /* MW 2 */ + 9089 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 3 "elementwise_unary.h" 190 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9090 "01111110" // NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 9091 "01100000" // /* MW 13 */ + 9092 "00101011" // /* MW 12 */ + 9093 "00000000" // /* MW 11 */ + 9094 "11001111" // /* MW 10 */ + 9095 "00000110" // /* MW 9 */ + 9096 "00110001" // /* MW 8 */ + 9097 "00000000" // /* MW 7 */ + 9098 "00000000" // /* MW 6 */ + 9099 "01101000" // /* MW 5 */ + 9100 "00111001" // /* MW 4 */ + 9101 "11110000" // /* MW 3 */ + 9102 "00101100" // /* MW 2 */ + 9103 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9104 "11100001" // NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9105 "00000000" // /* MW 15 */ + 9106 "00000000" // /* MW 14 */ + 9107 "01111000" // /* MW 13 */ + 9108 "01010110" // /* MW 12 */ + 9109 "11011000" // /* MW 11 */ + 9110 "00000001" // /* MW 10 */ + 9111 "00000000" // /* MW 9 */ + 9112 "00000000" // /* MW 8 */ + 9113 "11010011" // /* MW 7 */ + 9114 "00011100" // /* MW 6 */ + 9115 "00100001" // /* MW 5 */ + 9116 "00000000" // /* MW 4 */ + 9117 "11110000" // /* MW 3 */ + 9118 "00101100" // /* MW 2 */ + 9119 "00000000" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9120 "11100001" // NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9121 "00000000" // /* MW 15 */ + 9122 "00000000" // /* MW 14 */ + 9123 "01111000" // /* MW 13 */ + 9124 "00110110" // /* MW 12 */ + 9125 "01010000" // /* MW 11 */ + 9126 "00000001" // /* MW 10 */ + 9127 "00000000" // /* MW 9 */ + 9128 "00000000" // /* MW 8 */ + 9129 "01011011" // /* MW 7 */ + 9130 "00000001" // /* MW 6 */ + 9131 "00100000" // /* MW 5 */ + 9132 "00000000" // /* MW 4 */ + 9133 "11110000" // /* MW 3 */ + 9134 "00101100" // /* MW 2 */ + 9135 "00000000" // /* MW 1 */ +.label ZLS_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_128 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 171 19 first +.src_ref 3 "elementwise_unary.h" 176 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 9136 "11100001" // NOPA; VLDB x4, [p0], #64; VST x7, [p1], #64; NOPX; VMIN_GE.bf16 x3, r16, x5, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9137 "00000000" // /* MW 15 */ + 9138 "00000000" // /* MW 14 */ + 9139 "01111000" // /* MW 13 */ + 9140 "01010110" // /* MW 12 */ + 9141 "11010100" // /* MW 11 */ + 9142 "00000000" // /* MW 10 */ + 9143 "00000000" // /* MW 9 */ + 9144 "00000000" // /* MW 8 */ + 9145 "11010011" // /* MW 7 */ + 9146 "00011101" // /* MW 6 */ + 9147 "01101001" // /* MW 5 */ + 9148 "00111010" // /* MW 4 */ + 9149 "11110000" // /* MW 3 */ + 9150 "00101100" // /* MW 2 */ + 9151 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 3 "elementwise_unary.h" 190 19 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9152 "11100001" // NOPA; VLDB x2, [p0], #64; NOPS; NOPX; VMAX_LT.bf16 x6, r16, x2, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9153 "00000000" // /* MW 15 */ + 9154 "00000000" // /* MW 14 */ + 9155 "01111000" // /* MW 13 */ + 9156 "00110110" // /* MW 12 */ + 9157 "10001000" // /* MW 11 */ + 9158 "00000001" // /* MW 10 */ + 9159 "00000000" // /* MW 9 */ + 9160 "00000000" // /* MW 8 */ + 9161 "01011011" // /* MW 7 */ + 9162 "00000001" // /* MW 6 */ + 9163 "01101000" // /* MW 5 */ + 9164 "00111001" // /* MW 4 */ + 9165 "11110000" // /* MW 3 */ + 9166 "00101100" // /* MW 2 */ + 9167 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9168 "11100001" // NOPA; NOPB; VST x3, [p1], #64; NOPX; VMIN_GE.bf16 x7, r16, x6, x1; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9169 "00000000" // /* MW 15 */ + 9170 "00000000" // /* MW 14 */ + 9171 "01111000" // /* MW 13 */ + 9172 "01010110" // /* MW 12 */ + 9173 "11011000" // /* MW 11 */ + 9174 "00000001" // /* MW 10 */ + 9175 "00000000" // /* MW 9 */ + 9176 "00000000" // /* MW 8 */ + 9177 "11010011" // /* MW 7 */ + 9178 "00011100" // /* MW 6 */ + 9179 "00100001" // /* MW 5 */ + 9180 "00000000" // /* MW 4 */ + 9181 "11110000" // /* MW 3 */ + 9182 "00101100" // /* MW 2 */ + 9183 "00000000" // /* MW 1 */ +.label ZLE_F_ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E_176 +.src_ref 4 "max_min.hpp" 20 104 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9184 "11100001" // NOPA; NOPB; NOPS; NOPX; VMAX_LT.bf16 x5, r16, x4, x0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9185 "00000000" // /* MW 15 */ + 9186 "00000000" // /* MW 14 */ + 9187 "01111000" // /* MW 13 */ + 9188 "00110110" // /* MW 12 */ + 9189 "01010000" // /* MW 11 */ + 9190 "00000001" // /* MW 10 */ + 9191 "00000000" // /* MW 9 */ + 9192 "00000000" // /* MW 8 */ + 9193 "01011011" // /* MW 7 */ + 9194 "00000001" // /* MW 6 */ + 9195 "00100000" // /* MW 5 */ + 9196 "00000000" // /* MW 4 */ + 9197 "11110000" // /* MW 3 */ + 9198 "00101100" // /* MW 2 */ + 9199 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 176 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 9200 "00000010" // VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9201 "01110000" // /* MW 7 */ + 9202 "01010110" // /* MW 6 */ + 9203 "11010100" // /* MW 5 */ + 9204 "00000000" // /* MW 4 */ + 9205 "01100000" // /* MW 3 */ + 9206 "10111010" // /* MW 2 */ + 9207 "00100011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9208 "11111000" // VMAX_LT.bf16 x6, r16, x2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9209 "01101100" // /* MW 3 */ + 9210 "00010000" // /* MW 2 */ + 9211 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 195 20 first + 9212 "00000010" // VST x3, [p1], #64; VMIN_GE.bf16 x7, r16, x6, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9213 "01110000" // /* MW 7 */ + 9214 "01010110" // /* MW 6 */ + 9215 "11011000" // /* MW 5 */ + 9216 "00000001" // /* MW 4 */ + 9217 "01100000" // /* MW 3 */ + 9218 "10011010" // /* MW 2 */ + 9219 "00100011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.src_ref 3 "elementwise_unary.h" 158 4 first + 9220 "11100100" // RET lr; VMAX_LT.bf16 x5, r16, x4, x0 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 9221 "11011001" // /* MW 5 */ + 9222 "01000000" // /* MW 4 */ + 9223 "00000101" // /* MW 3 */ + 9224 "00000000" // /* MW 2 */ + 9225 "00000101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 4 "max_min.hpp" 21 104 first +.src_ref 3 "elementwise_unary.h" 176 20 first +.delay_slot + 9226 "00000010" // VST x7, [p1], #64; VMIN_GE.bf16 x3, r16, x5, x1 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9227 "01110000" // /* MW 7 */ + 9228 "01010110" // /* MW 6 */ + 9229 "11010100" // /* MW 5 */ + 9230 "00000000" // /* MW 4 */ + 9231 "01100000" // /* MW 3 */ + 9232 "10111010" // /* MW 2 */ + 9233 "00100011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 20 104 first +.delay_slot + 9234 "11111000" // VMAX_LT.bf16 x6, r16, x2, x0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9235 "01101100" // /* MW 3 */ + 9236 "00010000" // /* MW 2 */ + 9237 "00011011" // /* MW 1 */ +.src_ref 4 "max_min.hpp" 21 104 first +.delay_slot + 9238 "11111000" // VMIN_GE.bf16 x7, r16, x6, x1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9239 "10101100" // /* MW 3 */ + 9240 "10110000" // /* MW 2 */ + 9241 "00011011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 3 "elementwise_unary.h" 195 20 first +.delay_slot + 9242 "00011000" // VST x3, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9243 "11010011" // /* MW 3 */ + 9244 "00011100" // /* MW 2 */ + 9245 "00001001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "elementwise_unary.h" 176 20 first +.delay_slot + 9246 "00011000" // VST x7, [p1], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9247 "11010011" // /* MW 3 */ + 9248 "00011101" // /* MW 2 */ +.label _ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E__end +.label __ZN17elementwise_unaryI8bfloat1616elementwise_clipIS0_E20clip_internal_paramsIS0_EE3runEPS0_S6_R26elementwise_unary_params_tIS4_E___func_end0 + 9249 "00001001" // /* MW 1 */ +.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_clip1d _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 7 "superkernels.cpp" 176 first +.src_ref 7 "superkernels.cpp" 181 6 +.function_start + 9264 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9265 "10000000" // /* MW 5 */ + 9266 "11001000" // /* MW 4 */ + 9267 "11000110" // /* MW 3 */ + 9268 "00000111" // /* MW 2 */ + 9269 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 181 6 first + 9270 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9271 "11000001" // /* MW 5 */ + 9272 "10110101" // /* MW 4 */ + 9273 "11011000" // /* MW 3 */ + 9274 "11000010" // /* MW 2 */ + 9275 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 176 + 9276 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9277 "00000001" // /* MW 5 */ + 9278 "00000000" // /* MW 4 */ + 9279 "00000000" // /* MW 3 */ + 9280 "00001000" // /* MW 2 */ + 9281 "00000000" // /* MW 1 */ + 9282 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9283 "01110000" // /* MW 7 */ + 9284 "11010000" // /* MW 6 */ + 9285 "00001011" // /* MW 5 */ + 9286 "00000000" // /* MW 4 */ + 9287 "10110000" // /* MW 3 */ + 9288 "01100011" // /* MW 2 */ + 9289 "11111111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 178 11 + 9290 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9291 "00010001" // /* MW 9 */ + 9292 "00101000" // /* MW 8 */ + 9293 "00110010" // /* MW 7 */ + 9294 "11110011" // /* MW 6 */ + 9295 "00000001" // /* MW 5 */ + 9296 "00000000" // /* MW 4 */ + 9297 "10110000" // /* MW 3 */ + 9298 "10000010" // /* MW 2 */ + 9299 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 9300 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9301 "11000000" // /* MW 3 */ + 9302 "11010100" // /* MW 2 */ + 9303 "00011011" // /* MW 1 */ + 9304 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9305 "00000000" // /* MW 1 */ + 9306 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9307 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 181 6 +.src_ref 7 "superkernels.cpp" 181 16 + 9308 "10000100" // JNZ r16, #9472 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9472 delay_slots=5 */ + 9309 "00000001" // /* MW 5 */ + 9310 "01000000" // /* MW 4 */ + 9311 "10000000" // /* MW 3 */ + 9312 "00010010" // /* MW 2 */ + 9313 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 178 22 first +.delay_slot + 9314 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9315 "10010000" // /* MW 3 */ + 9316 "01100010" // /* MW 2 */ + 9317 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 178 30 +.delay_slot + 9318 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9319 "11111011" // /* MW 3 */ + 9320 "01100011" // /* MW 2 */ + 9321 "00010100" // /* MW 1 */ +.delay_slot + 9322 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9323 "00111101" // /* MW 3 */ + 9324 "11110100" // /* MW 2 */ + 9325 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 178 11 +.src_ref 1 "io_buffer_main.h" 125 25 +.delay_slot + 9326 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9327 "01110000" // /* MW 7 */ + 9328 "01100000" // /* MW 6 */ + 9329 "00110000" // /* MW 5 */ + 9330 "00000011" // /* MW 4 */ + 9331 "00110000" // /* MW 3 */ + 9332 "11000110" // /* MW 2 */ + 9333 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 184 4 +.src_ref 7 "superkernels.cpp" 195 2 +.delay_slot + 9334 "01000100" // MOVXM p0, #509440 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9335 "00000000" // /* MW 5 */ + 9336 "11001100" // /* MW 4 */ + 9337 "11000000" // /* MW 3 */ + 9338 "00000111" // /* MW 2 */ + 9339 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9340 "01000100" // MOVXM p2, #509028 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9341 "11001000" // /* MW 5 */ + 9342 "11001000" // /* MW 4 */ + 9343 "11000100" // /* MW 3 */ + 9344 "00000111" // /* MW 2 */ + 9345 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9346 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9347 "00010000" // /* MW 9 */ + 9348 "00110000" // /* MW 8 */ + 9349 "00110010" // /* MW 7 */ + 9350 "11110001" // /* MW 6 */ + 9351 "00000001" // /* MW 5 */ + 9352 "00000000" // /* MW 4 */ + 9353 "11100000" // /* MW 3 */ + 9354 "11000000" // /* MW 2 */ + 9355 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9356 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9357 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 184 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 9358 "00000100" // JL #8896 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=8896 delay_slots=5 */ + 9359 "00000001" // /* MW 5 */ + 9360 "00000000" // /* MW 4 */ + 9361 "01100000" // /* MW 3 */ + 9362 "00010001" // /* MW 2 */ + 9363 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9365 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9366 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9367 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9368 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9369 "00110001" // /* MW 3 */ + 9370 "00100000" // /* MW 2 */ + 9371 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 9372 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9373 "00000101" // /* MW 3 */ + 9374 "00100000" // /* MW 2 */ + 9375 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 9376 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9377 "00000000" // /* MW 15 */ + 9378 "00000000" // /* MW 14 */ + 9379 "01111000" // /* MW 13 */ + 9380 "10100101" // /* MW 12 */ + 9381 "00000001" // /* MW 11 */ + 9382 "00000000" // /* MW 10 */ + 9383 "00000000" // /* MW 9 */ + 9384 "10000000" // /* MW 8 */ + 9385 "00010001" // /* MW 7 */ + 9386 "00000110" // /* MW 6 */ + 9387 "00100010" // /* MW 5 */ + 9388 "00000000" // /* MW 4 */ + 9389 "11110000" // /* MW 3 */ + 9390 "00101100" // /* MW 2 */ + 9391 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 188 18 +.return_address + 9392 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9393 "10100000" // /* MW 5 */ + 9394 "11001000" // /* MW 4 */ + 9395 "11000100" // /* MW 3 */ + 9396 "00000111" // /* MW 2 */ + 9397 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 188 18 first +.src_ref 7 "superkernels.cpp" 188 43 + 9398 "10111010" // LDA r16, [p2]; MOVXM p2, #509440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9399 "00010000" // /* MW 9 */ + 9400 "00000000" // /* MW 8 */ + 9401 "00110011" // /* MW 7 */ + 9402 "11110001" // /* MW 6 */ + 9403 "00000001" // /* MW 5 */ + 9404 "00000000" // /* MW 4 */ + 9405 "11010000" // /* MW 3 */ + 9406 "11000010" // /* MW 2 */ + 9407 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 186 29 +.src_ref 7 "superkernels.cpp" 188 43 +.src_ref 7 "superkernels.cpp" 195 2 + 9408 "10111010" // LDA r17, [p2]; MOVXM p2, #509440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9409 "00010000" // /* MW 9 */ + 9410 "00000000" // /* MW 8 */ + 9411 "00110011" // /* MW 7 */ + 9412 "11110001" // /* MW 6 */ + 9413 "00000001" // /* MW 5 */ + 9414 "00000000" // /* MW 4 */ + 9415 "11010000" // /* MW 3 */ + 9416 "11000110" // /* MW 2 */ + 9417 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 186 29 first +.src_ref 7 "superkernels.cpp" 188 16 +.src_ref 7 "superkernels.cpp" 193 47 + 9418 "10111010" // LDA.u16 r18, [p2, #8]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9419 "00010000" // /* MW 9 */ + 9420 "00101010" // /* MW 8 */ + 9421 "10110010" // /* MW 7 */ + 9422 "11110000" // /* MW 6 */ + 9423 "00000001" // /* MW 5 */ + 9424 "00000000" // /* MW 4 */ + 9425 "01010000" // /* MW 3 */ + 9426 "11001011" // /* MW 2 */ + 9427 "01001000" // /* MW 1 */ + 9428 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9429 "00000000" // /* MW 1 */ + 9430 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9431 "00000000" // /* MW 1 */ + 9432 "10000100" // J #9488 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=9488 delay_slots=5 */ + 9433 "00000000" // /* MW 5 */ + 9434 "00000000" // /* MW 4 */ + 9435 "10001000" // /* MW 3 */ + 9436 "00010010" // /* MW 2 */ + 9437 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 186 13 +.delay_slot + 9438 "01000100" // MOVXM p0, #509020 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9439 "10111000" // /* MW 5 */ + 9440 "11001000" // /* MW 4 */ + 9441 "11000000" // /* MW 3 */ + 9442 "00000111" // /* MW 2 */ + 9443 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9444 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9445 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 188 27 first +.delay_slot + 9446 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9447 "00001111" // /* MW 3 */ + 9448 "01100001" // /* MW 2 */ + 9449 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 186 13 first +.delay_slot + 9450 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9451 "10100011" // /* MW 5 */ + 9452 "00001100" // /* MW 4 */ + 9453 "11110000" // /* MW 3 */ + 9454 "00101100" // /* MW 2 */ + 9455 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 188 16 first +.delay_slot + 9456 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9457 "00000000" // /* MW 15 */ + 9458 "00000000" // /* MW 14 */ + 9459 "01111000" // /* MW 13 */ + 9460 "10100101" // /* MW 12 */ + 9461 "00000001" // /* MW 11 */ + 9462 "00000000" // /* MW 10 */ + 9463 "00000000" // /* MW 9 */ + 9464 "10000000" // /* MW 8 */ + 9465 "00010001" // /* MW 7 */ + 9466 "00000110" // /* MW 6 */ + 9467 "00100001" // /* MW 5 */ + 9468 "00000000" // /* MW 4 */ + 9469 "11110000" // /* MW 3 */ + 9470 "00101100" // /* MW 2 */ + 9471 "00000000" // /* MW 1 */ +.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 7 "superkernels.cpp" 193 47 +.src_ref 7 "superkernels.cpp" 195 2 + 9472 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9473 "00000000" // /* MW 15 */ + 9474 "00000000" // /* MW 14 */ + 9475 "00010000" // /* MW 13 */ + 9476 "00101010" // /* MW 12 */ + 9477 "10110010" // /* MW 11 */ + 9478 "11110000" // /* MW 10 */ + 9479 "00000001" // /* MW 9 */ + 9480 "00000000" // /* MW 8 */ + 9481 "10001011" // /* MW 7 */ + 9482 "10000000" // /* MW 6 */ + 9483 "00100010" // /* MW 5 */ + 9484 "00000000" // /* MW 4 */ + 9485 "11110000" // /* MW 3 */ + 9486 "00101100" // /* MW 2 */ + 9487 "00000000" // /* MW 1 */ +.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 218 49 first + 9488 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 9489 "00000000" // /* MW 7 */ + 9490 "11000011" // /* MW 6 */ + 9491 "10110011" // /* MW 5 */ + 9492 "00000011" // /* MW 4 */ + 9493 "01100000" // /* MW 3 */ + 9494 "10010001" // /* MW 2 */ + 9495 "01110011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 192 2 +.src_ref 1 "io_buffer_main.h" 218 49 + 9496 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9497 "00010000" // /* MW 9 */ + 9498 "00100000" // /* MW 8 */ + 9499 "00110010" // /* MW 7 */ + 9500 "11110000" // /* MW 6 */ + 9501 "00000001" // /* MW 5 */ + 9502 "00000000" // /* MW 4 */ + 9503 "11010000" // /* MW 3 */ + 9504 "11101110" // /* MW 2 */ + 9505 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 9506 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9507 "00010110" // /* MW 3 */ + 9508 "11111110" // /* MW 2 */ + 9509 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 9510 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9511 "00110110" // /* MW 3 */ + 9512 "11111110" // /* MW 2 */ + 9513 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 9514 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9515 "01010110" // /* MW 3 */ + 9516 "01000110" // /* MW 2 */ + 9517 "00000111" // /* MW 1 */ + 9518 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9519 "00000000" // /* MW 1 */ + 9520 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9521 "00000000" // /* MW 1 */ + 9522 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9523 "00000000" // /* MW 1 */ + 9524 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9525 "00000000" // /* MW 1 */ + 9526 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9527 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 9528 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9529 "00000010" // /* MW 3 */ + 9530 "01100001" // /* MW 2 */ + 9531 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 9532 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9533 "00010001" // /* MW 3 */ + 9534 "00000110" // /* MW 2 */ + 9535 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 + 9536 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9537 "11111101" // /* MW 3 */ + 9538 "11100000" // /* MW 2 */ + 9539 "00010111" // /* MW 1 */ + 9540 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9541 "00000000" // /* MW 1 */ + 9542 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9543 "00000000" // /* MW 1 */ + 9544 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9545 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 9546 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9547 "00001000" // /* MW 3 */ + 9548 "10010011" // /* MW 2 */ + 9549 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 193 45 + 9550 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9551 "10000001" // /* MW 5 */ + 9552 "10101101" // /* MW 4 */ + 9553 "10100111" // /* MW 3 */ + 9554 "00000000" // /* MW 2 */ + 9555 "00000100" // /* MW 1 */ + 9556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9557 "00000000" // /* MW 1 */ + 9558 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9559 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 192 2 first + 9560 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9561 "00110110" // /* MW 3 */ + 9562 "00000110" // /* MW 2 */ + 9563 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 1 "io_buffer_main.h" 324 51 + 9564 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9565 "10000001" // /* MW 5 */ + 9566 "11011101" // /* MW 4 */ + 9567 "11011100" // /* MW 3 */ + 9568 "11001010" // /* MW 2 */ + 9569 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 193 47 first + 9570 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9571 "01110110" // /* MW 3 */ + 9572 "00000110" // /* MW 2 */ + 9573 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 9574 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9575 "10011110" // /* MW 3 */ + 9576 "01011100" // /* MW 2 */ + 9577 "00000111" // /* MW 1 */ + 9578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9579 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 195 2 first +.no_stack_arguments + 9580 "00000100" // JL #9008 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9008 delay_slots=5 */ + 9581 "00000001" // /* MW 5 */ + 9582 "00000000" // /* MW 4 */ + 9583 "10011000" // /* MW 3 */ + 9584 "00010001" // /* MW 2 */ + 9585 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9587 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 192 2 first +.delay_slot + 9588 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9589 "00000111" // /* MW 3 */ + 9590 "01100010" // /* MW 2 */ + 9591 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 192 2 +.delay_slot + 9592 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9593 "00110001" // /* MW 3 */ + 9594 "00000110" // /* MW 2 */ + 9595 "00001000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 193 45 first +.delay_slot + 9596 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9597 "00001101" // /* MW 3 */ + 9598 "11100001" // /* MW 2 */ + 9599 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 193 45 +.delay_slot + 9600 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 9601 "00000000" // /* MW 15 */ + 9602 "00000000" // /* MW 14 */ + 9603 "10101000" // /* MW 13 */ + 9604 "10100000" // /* MW 12 */ + 9605 "00110100" // /* MW 11 */ + 9606 "00000000" // /* MW 10 */ + 9607 "00000000" // /* MW 9 */ + 9608 "00000000" // /* MW 8 */ + 9609 "01011011" // /* MW 7 */ + 9610 "00000001" // /* MW 6 */ + 9611 "00100000" // /* MW 5 */ + 9612 "00000000" // /* MW 4 */ + 9613 "11110000" // /* MW 3 */ + 9614 "00101100" // /* MW 2 */ + 9615 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 6 +.src_ref 7 "superkernels.cpp" 198 14 +.src_ref 1 "io_buffer_main.h" 324 51 first +.return_address + 9616 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9617 "00010000" // /* MW 9 */ + 9618 "00100000" // /* MW 8 */ + 9619 "00110010" // /* MW 7 */ + 9620 "11110011" // /* MW 6 */ + 9621 "00000001" // /* MW 5 */ + 9622 "00000000" // /* MW 4 */ + 9623 "11010000" // /* MW 3 */ + 9624 "11000110" // /* MW 2 */ + 9625 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 + 9626 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9627 "00000101" // /* MW 3 */ + 9628 "00100000" // /* MW 2 */ + 9629 "00010000" // /* MW 1 */ + 9630 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9631 "00000000" // /* MW 1 */ + 9632 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9633 "00000000" // /* MW 1 */ + 9634 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9635 "00000000" // /* MW 1 */ + 9636 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9637 "00000000" // /* MW 1 */ + 9638 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9639 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 9640 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9641 "00001000" // /* MW 3 */ + 9642 "01010001" // /* MW 2 */ + 9643 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 19 +.src_ref 1 "io_buffer_main.h" 327 40 first + 9644 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9645 "00010000" // /* MW 9 */ + 9646 "00101110" // /* MW 8 */ + 9647 "00110010" // /* MW 7 */ + 9648 "11110001" // /* MW 6 */ + 9649 "00000001" // /* MW 5 */ + 9650 "00000000" // /* MW 4 */ + 9651 "11010000" // /* MW 3 */ + 9652 "11001110" // /* MW 2 */ + 9653 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 6 first + 9654 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9655 "00110110" // /* MW 3 */ + 9656 "00000110" // /* MW 2 */ + 9657 "00000110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 19 + 9658 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9659 "01010110" // /* MW 3 */ + 9660 "00000110" // /* MW 2 */ + 9661 "00000010" // /* MW 1 */ + 9662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9663 "00000000" // /* MW 1 */ + 9664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9665 "00000000" // /* MW 1 */ + 9666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9667 "00000000" // /* MW 1 */ + 9668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9669 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 9670 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9671 "00110001" // /* MW 3 */ + 9672 "00100001" // /* MW 2 */ + 9673 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 9674 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9675 "00010001" // /* MW 3 */ + 9676 "11100110" // /* MW 2 */ + 9677 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 16 first + 9678 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9679 "00101000" // /* MW 3 */ + 9680 "01100001" // /* MW 2 */ + 9681 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 197 6 + 9682 "10000100" // JNZ r16, #9712 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=9712 delay_slots=5 */ + 9683 "00000001" // /* MW 5 */ + 9684 "01000000" // /* MW 4 */ + 9685 "11111000" // /* MW 3 */ + 9686 "00010010" // /* MW 2 */ + 9687 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9689 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9691 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9693 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9694 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9695 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9696 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9697 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 198 14 + 9698 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9699 "00000001" // /* MW 3 */ + 9700 "00100000" // /* MW 2 */ + 9701 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 198 14 first + 9702 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9703 "00000000" // /* MW 9 */ + 9704 "00000000" // /* MW 8 */ + 9705 "00000000" // /* MW 7 */ + 9706 "10000000" // /* MW 6 */ + 9707 "00010001" // /* MW 5 */ + 9708 "00000110" // /* MW 4 */ + 9709 "11110110" // /* MW 3 */ + 9710 "00101100" // /* MW 2 */ + 9711 "00000000" // /* MW 1 */ +.label TGT_F_Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 7 "superkernels.cpp" 200 + 9712 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9713 "00111001" // /* MW 3 */ + 9714 "11110100" // /* MW 2 */ + 9715 "00000111" // /* MW 1 */ + 9716 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9717 "00011001" // /* MW 3 */ + 9718 "11111011" // /* MW 2 */ + 9719 "00000111" // /* MW 1 */ + 9720 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9721 "00000000" // /* MW 1 */ + 9722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9723 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 9724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9725 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 9726 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9727 "11110001" // /* MW 3 */ + 9728 "11111101" // /* MW 2 */ + 9729 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9731 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 200 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 9732 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9733 "00000000" // /* MW 3 */ + 9734 "00101000" // /* MW 2 */ + 9735 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9736 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9737 "10100000" // /* MW 3 */ + 9738 "01100111" // /* MW 2 */ + 9739 "00011111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 200 +.delay_slot + 9740 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9741 "00000001" // /* MW 5 */ + 9742 "00000000" // /* MW 4 */ + 9743 "00000000" // /* MW 3 */ + 9744 "11111000" // /* MW 2 */ + 9745 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9747 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9748 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9749 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z18superkernel_clip1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 9751 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv +.function shared_setup_backbone _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv +.src_ref 3 "elementwise_binary_shared.h" 205 first +.src_ref 3 "elementwise_binary_shared.h" 211 24 first +.src_ref 3 "elementwise_binary_shared.h" 216 36 +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.function_start + 9760 "10111010" // LDA el0, [p1], #4; MOVX r2, #256; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9761 "01011000" // /* MW 9 */ + 9762 "00000000" // /* MW 8 */ + 9763 "00001000" // /* MW 7 */ + 9764 "00001011" // /* MW 6 */ + 9765 "00100000" // /* MW 5 */ + 9766 "00001000" // /* MW 4 */ + 9767 "11010000" // /* MW 3 */ + 9768 "10000101" // /* MW 2 */ + 9769 "00100011" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 36 + 9770 "00011000" // MOVX r0, #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9771 "00000001" // /* MW 3 */ + 9772 "10000000" // /* MW 2 */ + 9773 "00010111" // /* MW 1 */ + 9774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9775 "00000000" // /* MW 1 */ + 9776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9777 "00000000" // /* MW 1 */ + 9778 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9779 "00000000" // /* MW 1 */ + 9780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9781 "00000000" // /* MW 1 */ + 9782 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9783 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 211 22 first + 9784 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9785 "00101001" // /* MW 3 */ + 9786 "00011100" // /* MW 2 */ + 9787 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 212 24 first + 9788 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9789 "00101110" // /* MW 3 */ + 9790 "00011100" // /* MW 2 */ + 9791 "00000001" // /* MW 1 */ + 9792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9793 "00000000" // /* MW 1 */ + 9794 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9795 "00000000" // /* MW 1 */ + 9796 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9797 "00000000" // /* MW 1 */ + 9798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9799 "00000000" // /* MW 1 */ + 9800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9801 "00000000" // /* MW 1 */ + 9802 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9803 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 212 22 + 9804 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9805 "00101001" // /* MW 3 */ + 9806 "00011100" // /* MW 2 */ + 9807 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 213 24 first + 9808 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9809 "00101110" // /* MW 3 */ + 9810 "00000100" // /* MW 2 */ + 9811 "00000001" // /* MW 1 */ + 9812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9813 "00000000" // /* MW 1 */ + 9814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9815 "00000000" // /* MW 1 */ + 9816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9817 "00000000" // /* MW 1 */ + 9818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9819 "00000000" // /* MW 1 */ + 9820 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9821 "00000000" // /* MW 1 */ + 9822 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9823 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 213 22 + 9824 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9825 "00101001" // /* MW 3 */ + 9826 "00011100" // /* MW 2 */ + 9827 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 214 24 first + 9828 "10011000" // LDA r3, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9829 "01110110" // /* MW 3 */ + 9830 "00010100" // /* MW 2 */ + 9831 "00000001" // /* MW 1 */ + 9832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9833 "00000000" // /* MW 1 */ + 9834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9835 "00000000" // /* MW 1 */ + 9836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9837 "00000000" // /* MW 1 */ + 9838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9839 "00000000" // /* MW 1 */ + 9840 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9841 "00000000" // /* MW 1 */ + 9842 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9843 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 214 22 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9844 "10011000" // ST r3, [p0], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9845 "01110001" // /* MW 3 */ + 9846 "01001100" // /* MW 2 */ + 9847 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 34 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9848 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9849 "00010111" // /* MW 3 */ + 9850 "00000100" // /* MW 2 */ + 9851 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 217 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9852 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9853 "00000000" // /* MW 3 */ + 9854 "00101000" // /* MW 2 */ + 9855 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9856 "01000100" // MOVXM r1, #65280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9857 "00000000" // /* MW 5 */ + 9858 "10111110" // /* MW 4 */ + 9859 "11110000" // /* MW 3 */ + 9860 "00000000" // /* MW 2 */ + 9861 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9862 "10011000" // AND r1, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9863 "00010100" // /* MW 3 */ + 9864 "11000010" // /* MW 2 */ + 9865 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9866 "10011000" // EQ r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9867 "00100111" // /* MW 3 */ + 9868 "01110110" // /* MW 2 */ + 9869 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 36 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 9870 "00011000" // SEL.EQZ r0, r0, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9871 "10000010" // /* MW 3 */ + 9872 "00000001" // /* MW 2 */ + 9873 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9874 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE21shared_setup_backboneER27elementwise_binary_params_tIS4_EPKv___func_end0 + 9875 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv +.function setup _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv +.src_ref 3 "elementwise_binary_shared.h" 219 +.src_ref 3 "elementwise_binary_shared.h" 219 first +.function_start + 9888 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9889 "00000001" // /* MW 5 */ + 9890 "00000000" // /* MW 4 */ + 9891 "00000000" // /* MW 3 */ + 9892 "00001000" // /* MW 2 */ + 9893 "00000000" // /* MW 1 */ + 9894 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9895 "00111101" // /* MW 3 */ + 9896 "11111000" // /* MW 2 */ + 9897 "00001111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 220 8 first +.no_stack_arguments + 9898 "00000100" // JL #9760 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9760 delay_slots=5 */ + 9899 "00000001" // /* MW 5 */ + 9900 "00000000" // /* MW 4 */ + 9901 "00010000" // /* MW 3 */ + 9902 "00010011" // /* MW 2 */ + 9903 "00000000" // /* MW 1 */ +.delay_slot + 9904 "10011000" // ST p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9905 "10011101" // /* MW 3 */ + 9906 "11111111" // /* MW 2 */ + 9907 "00001111" // /* MW 1 */ +.src_ref 8 "mul_impl.h" 193 25 +.delay_slot + 9908 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9909 "11000000" // /* MW 3 */ + 9910 "01100000" // /* MW 2 */ + 9911 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9912 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9913 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9914 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9915 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 9916 "10011000" // NOPA /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9917 "01100111" // /* MW 3 */ + 9918 "00000001" // /* MW 2 */ + 9919 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 222 4 +.return_address + 9920 "00011000" // LDA lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9921 "00111001" // /* MW 3 */ + 9922 "11111000" // /* MW 2 */ + 9923 "00000111" // /* MW 1 */ + 9924 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9925 "00000000" // /* MW 1 */ + 9926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9927 "00000000" // /* MW 1 */ + 9928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9929 "00000000" // /* MW 1 */ + 9930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9931 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 9932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9933 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.noswbrkpt + 9934 "00011000" // LDA p7, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9935 "10011001" // /* MW 3 */ + 9936 "11111111" // /* MW 2 */ + 9937 "00000111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 222 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9938 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 9939 "00000000" // /* MW 3 */ + 9940 "00101000" // /* MW 2 */ + 9941 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9942 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9943 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9945 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9947 "00000000" // /* MW 1 */ +.src_ref 8 "mul_impl.h" 193 25 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9948 "00011000" // MOVX r16, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9949 "00001001" // /* MW 3 */ + 9950 "00100000" // /* MW 2 */ + 9951 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 222 4 +.src_ref 8 "mul_impl.h" 193 25 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 9952 "00111010" // ST r16, [p7, #16]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 9953 "01110001" // /* MW 9 */ + 9954 "00000000" // /* MW 8 */ + 9955 "00000000" // /* MW 7 */ + 9956 "00000000" // /* MW 6 */ + 9957 "11111110" // /* MW 5 */ + 9958 "00111111" // /* MW 4 */ + 9959 "00110000" // /* MW 3 */ + 9960 "11000010" // /* MW 2 */ +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + 9961 "11101000" // /* MW 1 */ +.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE +.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_begin0 +.function shared_run_backbone _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE +.src_ref 3 "elementwise_binary_shared.h" 107 first +.src_ref 3 "elementwise_binary_shared.h" 119 37 +.src_ref 3 "elementwise_binary_shared.h" 126 34 +.src_ref 3 "elementwise_binary_shared.h" 131 19 +.function_start + 9968 "11111000" // MOV r0, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9969 "11000000" // /* MW 3 */ + 9970 "00010110" // /* MW 2 */ + 9971 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 119 37 first + 9972 "00011000" // ADD.NC p3, r0, #14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9973 "00000111" // /* MW 3 */ + 9974 "01100000" // /* MW 2 */ + 9975 "00011011" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 122 22 first + 9976 "10011000" // LDA.s16 r2, [p3], #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9977 "01010010" // /* MW 3 */ + 9978 "00011100" // /* MW 2 */ + 9979 "00000011" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 124 15 first + 9980 "10011000" // LDA r4, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9981 "10010110" // /* MW 3 */ + 9982 "00000100" // /* MW 2 */ + 9983 "00000011" // /* MW 1 */ + 9984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9985 "00000000" // /* MW 1 */ + 9986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9987 "00000000" // /* MW 1 */ + 9988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9989 "00000000" // /* MW 1 */ + 9990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 9991 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 124 26 + 9992 "00011000" // MOVX r3, #2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 9993 "00001001" // /* MW 3 */ + 9994 "00000110" // /* MW 2 */ + 9995 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 107 + 9996 "11000100" // PADDXM [sp], #128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 9997 "00000001" // /* MW 5 */ + 9998 "00000000" // /* MW 4 */ + 9999 "00000000" // /* MW 3 */ + 10000 "00010000" // /* MW 2 */ + 10001 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 124 26 + 10002 "10011000" // LTU r3, r3, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10003 "01001100" // /* MW 3 */ + 10004 "11000110" // /* MW 2 */ + 10005 "00010000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 65 25 +.src_ref 3 "elementwise_binary_shared.h" 124 8 + 10006 "10111010" // MOVA r1, #0; JNZ r3, #10160 /* MW 10 */ /* control_operation: words=10 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10160 delay_slots=5 */ + 10007 "01100000" // /* MW 9 */ + 10008 "00000000" // /* MW 8 */ + 10009 "00010000" // /* MW 7 */ + 10010 "11110110" // /* MW 6 */ + 10011 "00000100" // /* MW 5 */ + 10012 "00000110" // /* MW 4 */ + 10013 "00000000" // /* MW 3 */ + 10014 "00000001" // /* MW 2 */ + 10015 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 65 25 first +.delay_slot + 10016 "11111000" // VBCST.16 x0, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10017 "01110010" // /* MW 3 */ + 10018 "00000101" // /* MW 2 */ + 10019 "00011000" // /* MW 1 */ +.delay_slot + 10020 "11111000" // MOV r1, p7 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10021 "11000000" // /* MW 3 */ + 10022 "01011110" // /* MW 2 */ + 10023 "00011000" // /* MW 1 */ +.delay_slot + 10024 "11111000" // MOV p7, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10025 "11100000" // /* MW 3 */ + 10026 "01100101" // /* MW 2 */ + 10027 "00011111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.delay_slot + 10028 "11110100" // PADDB [p7], #-64; MOV p5, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10029 "10000001" // /* MW 5 */ + 10030 "11011101" // /* MW 4 */ + 10031 "00001010" // /* MW 3 */ + 10032 "11110010" // /* MW 2 */ + 10033 "11111111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 119 37 first +.delay_slot + 10034 "00011000" // VST x0, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10035 "00010011" // /* MW 3 */ + 10036 "00000100" // /* MW 2 */ + 10037 "00001111" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first +.src_ref 3 "elementwise_binary_shared.h" 126 34 +.src_ref 3 "elementwise_binary_shared.h" 126 34 +.src_ref 3 "elementwise_binary_shared.h" 131 19 +.src_ref 3 "elementwise_binary_shared.h" 131 19 + 10038 "10111010" // MOVA dj0, #12; MOVS p4, r0; VBCST.16 x0, r2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10039 "01110010" // /* MW 9 */ + 10040 "10111001" // /* MW 8 */ + 10041 "00000100" // /* MW 7 */ + 10042 "00000000" // /* MW 6 */ + 10043 "00001011" // /* MW 5 */ + 10044 "10000000" // /* MW 4 */ + 10045 "10000100" // /* MW 3 */ + 10046 "10000010" // /* MW 2 */ + 10047 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 126 34 first +.src_ref 3 "elementwise_binary_shared.h" 131 19 first +.src_ref 3 "elementwise_binary_shared.h" 171 16 + 10048 "01010100" // LDA.u8 r0, [p4, dj0]; MOV m2, #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10049 "00000001" // /* MW 5 */ + 10050 "00000001" // /* MW 4 */ + 10051 "01010100" // /* MW 3 */ + 10052 "00000001" // /* MW 2 */ + 10053 "10000000" // /* MW 1 */ + 10054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10055 "00000000" // /* MW 1 */ + 10056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10057 "00000000" // /* MW 1 */ + 10058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10059 "00000000" // /* MW 1 */ + 10060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10061 "00000000" // /* MW 1 */ + 10062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10063 "00000000" // /* MW 1 */ + 10064 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10065 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 131 12 +.src_ref 3 "elementwise_binary_shared.h" 131 35 + 10066 "10000100" // JNZ r0, #10112 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10112 delay_slots=5 */ + 10067 "00000001" // /* MW 5 */ + 10068 "01000000" // /* MW 4 */ + 10069 "11000000" // /* MW 3 */ + 10070 "00010011" // /* MW 2 */ + 10071 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary_shared.h" 173 18 +.delay_slot + 10072 "10111000" // MOV m0, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10073 "00000000" // /* MW 3 */ + 10074 "00000000" // /* MW 2 */ + 10075 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 166 31 +.delay_slot + 10076 "01000100" // MOVXM p4, #509028 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10077 "11001000" // /* MW 5 */ + 10078 "11001000" // /* MW 4 */ + 10079 "11001000" // /* MW 3 */ + 10080 "00000111" // /* MW 2 */ + 10081 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10083 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10085 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10086 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10087 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 169 16 + 10088 "10111010" // MOVA m1, #0; J #10128 /* MW 10 */ /* control_operation: words=10 jump unconditional cycles_taken=1 direct absolute target_address=10128 delay_slots=5 */ + 10089 "00100000" // /* MW 9 */ + 10090 "00000000" // /* MW 8 */ + 10091 "00000000" // /* MW 7 */ + 10092 "11110010" // /* MW 6 */ + 10093 "00000100" // /* MW 5 */ + 10094 "00000000" // /* MW 4 */ + 10095 "10000000" // /* MW 3 */ + 10096 "00000100" // /* MW 2 */ + 10097 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10099 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10100 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10101 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10103 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10105 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.delay_slot + 10106 "00001100" // NOPA; VST x0, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10107 "00100110" // /* MW 5 */ + 10108 "00001000" // /* MW 4 */ + 10109 "11110000" // /* MW 3 */ + 10110 "00101100" // /* MW 2 */ + 10111 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_144 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 169 16 + 10112 "10111000" // MOV m1, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10113 "10000000" // /* MW 3 */ + 10114 "00000000" // /* MW 2 */ + 10115 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 3 "elementwise_binary_shared.h" 171 16 + 10116 "11110110" // NOPA; NOPB; VST x0, [p1]; MOV m2, #0 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10117 "01010000" // /* MW 11 */ + 10118 "00000000" // /* MW 10 */ + 10119 "00000000" // /* MW 9 */ + 10120 "00000001" // /* MW 8 */ + 10121 "00010011" // /* MW 7 */ + 10122 "00000100" // /* MW 6 */ + 10123 "00100001" // /* MW 5 */ + 10124 "00000000" // /* MW 4 */ + 10125 "11110000" // /* MW 3 */ + 10126 "00101100" // /* MW 2 */ + 10127 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_160 + 10128 "10000100" // J #10288 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10288 delay_slots=5 */ + 10129 "00000000" // /* MW 5 */ + 10130 "00000000" // /* MW 4 */ + 10131 "00011000" // /* MW 3 */ + 10132 "00010100" // /* MW 2 */ + 10133 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary_shared.h" 169 16 +.src_ref 3 "elementwise_binary_shared.h" 173 18 +.delay_slot + 10134 "00000010" // MOVS p0, p7; MOV p7, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10135 "01110000" // /* MW 7 */ + 10136 "01100000" // /* MW 6 */ + 10137 "10110000" // /* MW 5 */ + 10138 "00000011" // /* MW 4 */ + 10139 "01100000" // /* MW 3 */ + 10140 "10010001" // /* MW 2 */ + 10141 "00010011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10143 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10145 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10147 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10148 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10149 "10000001" // /* MW 11 */ + 10150 "10101101" // /* MW 10 */ + 10151 "00000000" // /* MW 9 */ + 10152 "00000000" // /* MW 8 */ + 10153 "00000000" // /* MW 7 */ + 10154 "00000000" // /* MW 6 */ + 10155 "00100000" // /* MW 5 */ + 10156 "00000000" // /* MW 4 */ + 10157 "11110000" // /* MW 3 */ + 10158 "00101100" // /* MW 2 */ + 10159 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_192 +.src_ref 3 "elementwise_binary_shared.h" 150 97 + 10160 "00011000" // MOVX r2, #3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10161 "00001101" // /* MW 3 */ + 10162 "00000100" // /* MW 2 */ + 10163 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 97 first + 10164 "10011000" // EQ r2, r2, r4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10165 "01000111" // /* MW 3 */ + 10166 "10000100" // /* MW 2 */ + 10167 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 + 10168 "10000100" // JNZ r2, #10208 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10208 delay_slots=5 */ + 10169 "00000001" // /* MW 5 */ + 10170 "01000000" // /* MW 4 */ + 10171 "11110000" // /* MW 3 */ + 10172 "00010011" // /* MW 2 */ + 10173 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.delay_slot + 10174 "01000100" // MOVXM r0, #1065353216 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10175 "00000000" // /* MW 5 */ + 10176 "00100000" // /* MW 4 */ + 10177 "00000000" // /* MW 3 */ + 10178 "10000000" // /* MW 2 */ + 10179 "00111111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.src_ref 3 "elementwise_binary_shared.h" 166 31 +.delay_slot + 10180 "01000100" // MOVXM p4, #509028 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10181 "11001000" // /* MW 5 */ + 10182 "11001000" // /* MW 4 */ + 10183 "11001000" // /* MW 3 */ + 10184 "00000111" // /* MW 2 */ + 10185 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10187 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10189 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10191 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 10192 "11100001" // NOPA; NOPB; NOPS; MOVXM r0, #-1082130432; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10193 "00000000" // /* MW 15 */ + 10194 "00000000" // /* MW 14 */ + 10195 "00010000" // /* MW 13 */ + 10196 "00000000" // /* MW 12 */ + 10197 "00001000" // /* MW 11 */ + 10198 "00000000" // /* MW 10 */ + 10199 "11100000" // /* MW 9 */ + 10200 "00101111" // /* MW 8 */ + 10201 "01011011" // /* MW 7 */ + 10202 "00000001" // /* MW 6 */ + 10203 "00100000" // /* MW 5 */ + 10204 "00000000" // /* MW 4 */ + 10205 "11110000" // /* MW 3 */ + 10206 "00101100" // /* MW 2 */ + 10207 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_240 +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10208 "10111010" // LDA.s8 r0, [p4]; MOVX vaddSign0, #1; MOV dj0, #-66 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10209 "01011000" // /* MW 9 */ + 10210 "10111110" // /* MW 8 */ + 10211 "01000111" // /* MW 7 */ + 10212 "00000000" // /* MW 6 */ + 10213 "11010010" // /* MW 5 */ + 10214 "00000010" // /* MW 4 */ + 10215 "01010000" // /* MW 3 */ + 10216 "10000000" // /* MW 2 */ + 10217 "10000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "accum.hpp" 946 89 +.src_ref 3 "elementwise_binary_shared.h" 173 18 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10218 "10111000" // MOV m0, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10219 "10000000" // /* MW 3 */ + 10220 "00000000" // /* MW 2 */ + 10221 "00011000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 169 16 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10222 "10111000" // MOV m1, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10223 "00000000" // /* MW 3 */ + 10224 "00000000" // /* MW 2 */ + 10225 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 171 16 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10226 "10111000" // MOV m2, #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10227 "10000000" // /* MW 3 */ + 10228 "00000000" // /* MW 2 */ + 10229 "00011010" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10231 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10232 "01111000" // VINSERT.32 x0, x0, #0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10233 "00010001" // /* MW 3 */ + 10234 "00000000" // /* MW 2 */ + 10235 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10236 "11010100" // ST.s16 r0, [p5, dj0]; VMOV bmll1, x0 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10237 "00100101" // /* MW 5 */ + 10238 "00000001" // /* MW 4 */ + 10239 "11100010" // /* MW 3 */ + 10240 "00000010" // /* MW 2 */ + 10241 "10100000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10242 "00011000" // MOVX crRnd, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10243 "10000000" // /* MW 3 */ + 10244 "00111010" // /* MW 2 */ + 10245 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10246 "00011000" // VCONV.bf16.fp32 wl0, bmll1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10247 "10010110" // /* MW 3 */ + 10248 "01000000" // /* MW 2 */ + 10249 "00001000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10251 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10252 "10111000" // VEXTRACT.16 r0, x0, #0, vaddSign0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10253 "00000001" // /* MW 3 */ + 10254 "00000001" // /* MW 2 */ + 10255 "00011000" // /* MW 1 */ + 10256 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10257 "00000000" // /* MW 1 */ + 10258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10259 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 150 78 + 10260 "10011000" // LDA.s16 r0, [p5, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10261 "00010010" // /* MW 3 */ + 10262 "00000000" // /* MW 2 */ + 10263 "00000101" // /* MW 1 */ + 10264 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10265 "00000000" // /* MW 1 */ + 10266 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10267 "00000000" // /* MW 1 */ + 10268 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10269 "00000000" // /* MW 1 */ + 10270 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10271 "00000000" // /* MW 1 */ + 10272 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10273 "00000000" // /* MW 1 */ + 10274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10275 "00000000" // /* MW 1 */ +.src_ref 4 "broadcast.hpp" 56 25 first + 10276 "11111000" // VBCST.16 x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10277 "01110010" // /* MW 3 */ + 10278 "00000001" // /* MW 2 */ + 10279 "00011000" // /* MW 1 */ + 10280 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10281 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first + 10282 "00001100" // NOPA; VST x0, [sp, #-64] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10283 "01100110" // /* MW 5 */ + 10284 "11111000" // /* MW 4 */ + 10285 "11111111" // /* MW 3 */ + 10286 "00101100" // /* MW 2 */ + 10287 "00000000" // /* MW 1 */ +.label TGT_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_320 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary_shared.h" 166 4 first +.src_ref 3 "elementwise_binary_shared.h" 166 31 first +.src_ref 3 "elementwise_binary_shared.h" 169 16 first + 10288 "10110110" // LDA r2, [p3, #-16]; VLDB x1, [p7], m1; MOVXM ls, #10400 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10289 "00010000" // /* MW 11 */ + 10290 "01010000" // /* MW 10 */ + 10291 "01111100" // /* MW 9 */ + 10292 "00001000" // /* MW 8 */ + 10293 "00000000" // /* MW 7 */ + 10294 "00000000" // /* MW 6 */ + 10295 "11101000" // /* MW 5 */ + 10296 "01010000" // /* MW 4 */ + 10297 "11011110" // /* MW 3 */ + 10298 "10001010" // /* MW 2 */ + 10299 "01111000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 166 4 +.src_ref 3 "elementwise_binary_shared.h" 166 31 +.src_ref 3 "elementwise_binary_shared.h" 171 16 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 10300 "10110110" // MOVA r3, #-5; VLDB x0, [p1], m2; MOVXM le, #10448 /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10301 "00010000" // /* MW 11 */ + 10302 "01101000" // /* MW 10 */ + 10303 "10111100" // /* MW 9 */ + 10304 "00001001" // /* MW 8 */ + 10305 "00000000" // /* MW 7 */ + 10306 "00000000" // /* MW 6 */ + 10307 "01101000" // /* MW 5 */ + 10308 "10010000" // /* MW 4 */ + 10309 "00000010" // /* MW 3 */ + 10310 "01100011" // /* MW 2 */ + 10311 "11111111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary_shared.h" 169 16 first +.src_ref 3 "elementwise_binary_shared.h" 173 18 first +.src_ref 3 "elementwise_binary_shared.h" 177 44 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10312 "00010010" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;VLDB x1, [p7], m1; MOVX r0, #60 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10313 "11110001" // /* MW 7 */ + 10314 "00000000" // /* MW 6 */ + 10315 "11101000" // /* MW 5 */ + 10316 "01010000" // /* MW 4 */ + 10317 "01111110" // /* MW 3 */ + 10318 "00000101" // /* MW 2 */ + 10319 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary_shared.h" 166 31 first +.src_ref 3 "elementwise_binary_shared.h" 171 16 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10320 "00111100" // LDA.s8 r4, [p4]; VLDB x0, [p1], m2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10321 "01101000" // /* MW 5 */ + 10322 "10010000" // /* MW 4 */ + 10323 "01010010" // /* MW 3 */ + 10324 "10010000" // /* MW 2 */ + 10325 "10000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10327 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 3 "elementwise_binary_shared.h" 173 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10328 "10011000" // VLDA.CONV.fp32.bf16 cml0, [p0], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10329 "00101011" // /* MW 3 */ + 10330 "00001000" // /* MW 2 */ + 10331 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10332 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10333 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 166 31 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10334 "10011000" // LSHL r2, r2, r3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10335 "00111101" // /* MW 3 */ + 10336 "10000100" // /* MW 2 */ + 10337 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 166 4 +.src_ref 3 "elementwise_binary_shared.h" 177 44 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10338 "01100010" // ADD.NC lc, r2, #-3; VMAC.f dm1, dm0, x1, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10339 "00000001" // /* MW 7 */ + 10340 "00000010" // /* MW 6 */ + 10341 "00000001" // /* MW 5 */ + 10342 "10000110" // /* MW 4 */ + 10343 "01111110" // /* MW 3 */ + 10344 "01110001" // /* MW 2 */ + 10345 "00000101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary_shared.h" 169 16 first +.src_ref 3 "elementwise_binary_shared.h" 171 16 first +.aggressive_scheduled_block_id 2 +.noswbrkpt + 10346 "00111100" // VLDA x0, [p1], m2; VLDB x1, [p7], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10347 "11101000" // /* MW 5 */ + 10348 "01010000" // /* MW 4 */ + 10349 "01111110" // /* MW 3 */ + 10350 "00000011" // /* MW 2 */ + 10351 "00101001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary_shared.h" 173 18 first +.src_ref 3 "elementwise_binary_shared.h" 185 18 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10352 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; NOPS; MOVX crRnd, r4; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10353 "00000000" // /* MW 15 */ + 10354 "00000000" // /* MW 14 */ + 10355 "01111000" // /* MW 13 */ + 10356 "10100101" // /* MW 12 */ + 10357 "00000001" // /* MW 11 */ + 10358 "00000000" // /* MW 10 */ + 10359 "11010100" // /* MW 9 */ + 10360 "00001001" // /* MW 8 */ + 10361 "01011011" // /* MW 7 */ + 10362 "00000001" // /* MW 6 */ + 10363 "00100000" // /* MW 5 */ + 10364 "00000000" // /* MW 4 */ + 10365 "01110000" // /* MW 3 */ + 10366 "00000101" // /* MW 2 */ + 10367 "00000001" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10368 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10369 "00000000" // /* MW 15 */ + 10370 "00000000" // /* MW 14 */ + 10371 "01111000" // /* MW 13 */ + 10372 "10100101" // /* MW 12 */ + 10373 "00000001" // /* MW 11 */ + 10374 "00000000" // /* MW 10 */ + 10375 "00000000" // /* MW 9 */ + 10376 "00000000" // /* MW 8 */ + 10377 "01011011" // /* MW 7 */ + 10378 "00000001" // /* MW 6 */ + 10379 "00100000" // /* MW 5 */ + 10380 "00000000" // /* MW 4 */ + 10381 "11110000" // /* MW 3 */ + 10382 "00101100" // /* MW 2 */ + 10383 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 177 44 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10384 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10385 "00010000" // /* MW 15 */ + 10386 "00001000" // /* MW 14 */ + 10387 "01111000" // /* MW 13 */ + 10388 "10100101" // /* MW 12 */ + 10389 "00000001" // /* MW 11 */ + 10390 "00000000" // /* MW 10 */ + 10391 "00000000" // /* MW 9 */ + 10392 "00000000" // /* MW 8 */ + 10393 "01011011" // /* MW 7 */ + 10394 "00000001" // /* MW 6 */ + 10395 "00100000" // /* MW 5 */ + 10396 "00000000" // /* MW 4 */ + 10397 "11110000" // /* MW 3 */ + 10398 "00101100" // /* MW 2 */ + 10399 "00000000" // /* MW 1 */ +.label ZLS_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_432 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary_shared.h" 169 16 first +.src_ref 3 "elementwise_binary_shared.h" 171 16 first +.begin_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 10400 "11100001" // VLDA x0, [p1], m2; VLDB x1, [p7], m1; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10401 "00000000" // /* MW 15 */ + 10402 "00000000" // /* MW 14 */ + 10403 "01111000" // /* MW 13 */ + 10404 "10100101" // /* MW 12 */ + 10405 "00000001" // /* MW 11 */ + 10406 "00000000" // /* MW 10 */ + 10407 "00000000" // /* MW 9 */ + 10408 "00000000" // /* MW 8 */ + 10409 "01011011" // /* MW 7 */ + 10410 "00000001" // /* MW 6 */ + 10411 "11101000" // /* MW 5 */ + 10412 "01010000" // /* MW 4 */ + 10413 "01111110" // /* MW 3 */ + 10414 "00000011" // /* MW 2 */ + 10415 "00101001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 946 89 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary_shared.h" 173 18 first +.src_ref 3 "elementwise_binary_shared.h" 185 18 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10416 "11100001" // VLDA.CONV.fp32.bf16 cml0, [p0], m0;NOPB; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10417 "00000000" // /* MW 15 */ + 10418 "00000000" // /* MW 14 */ + 10419 "01111000" // /* MW 13 */ + 10420 "10100101" // /* MW 12 */ + 10421 "00000001" // /* MW 11 */ + 10422 "00000000" // /* MW 10 */ + 10423 "00000000" // /* MW 9 */ + 10424 "00000000" // /* MW 8 */ + 10425 "10100011" // /* MW 7 */ + 10426 "00011100" // /* MW 6 */ + 10427 "00100010" // /* MW 5 */ + 10428 "00000000" // /* MW 4 */ + 10429 "01110000" // /* MW 3 */ + 10430 "00000101" // /* MW 2 */ + 10431 "00000001" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10432 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10433 "00000000" // /* MW 15 */ + 10434 "00000000" // /* MW 14 */ + 10435 "01111000" // /* MW 13 */ + 10436 "10100101" // /* MW 12 */ + 10437 "00000001" // /* MW 11 */ + 10438 "00000000" // /* MW 10 */ + 10439 "00000000" // /* MW 9 */ + 10440 "00000000" // /* MW 8 */ + 10441 "01011011" // /* MW 7 */ + 10442 "00000001" // /* MW 6 */ + 10443 "00100000" // /* MW 5 */ + 10444 "00000000" // /* MW 4 */ + 10445 "11110000" // /* MW 3 */ + 10446 "00101100" // /* MW 2 */ + 10447 "00000000" // /* MW 1 */ +.label ZLE_F_ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE_480 +.src_ref 3 "elementwise_binary_shared.h" 177 44 first +.end_of_loop +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 10448 "00001011" // NOPA; NOPB; NOPS; NOPX; NOPM; VMAC.f dm1, dm0, x1, x0, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10449 "00010000" // /* MW 15 */ + 10450 "00001000" // /* MW 14 */ + 10451 "01111000" // /* MW 13 */ + 10452 "10100101" // /* MW 12 */ + 10453 "00000001" // /* MW 11 */ + 10454 "00000000" // /* MW 10 */ + 10455 "00000000" // /* MW 9 */ + 10456 "00000000" // /* MW 8 */ + 10457 "01011011" // /* MW 7 */ + 10458 "00000001" // /* MW 6 */ + 10459 "00100000" // /* MW 5 */ + 10460 "00000000" // /* MW 4 */ + 10461 "11110000" // /* MW 3 */ + 10462 "00101100" // /* MW 2 */ + 10463 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 187 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 10464 "11000100" // PADDXM [sp], #-128 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10465 "00000001" // /* MW 5 */ + 10466 "00000000" // /* MW 4 */ + 10467 "00000000" // /* MW 3 */ + 10468 "11110000" // /* MW 2 */ + 10469 "11111111" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary_shared.h" 185 18 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10470 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10471 "10100011" // /* MW 3 */ + 10472 "00011100" // /* MW 2 */ + 10473 "00001010" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 10474 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10475 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 177 44 first +.aggressive_scheduled_block_id 3 +.noswbrkpt + 10476 "01001000" // VMAC.f dm1, dm0, x1, x0, r0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10477 "00000001" // /* MW 3 */ + 10478 "00000010" // /* MW 2 */ + 10479 "00000001" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 10480 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10481 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 187 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 10482 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10483 "00000000" // /* MW 3 */ + 10484 "00101000" // /* MW 2 */ + 10485 "00010000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary_shared.h" 185 18 first +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10486 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10487 "10100011" // /* MW 3 */ + 10488 "00011100" // /* MW 2 */ + 10489 "00001010" // /* MW 1 */ +.delay_slot + 10490 "11111000" // MOV p7, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10491 "10100000" // /* MW 3 */ + 10492 "01100000" // /* MW 2 */ + 10493 "00011111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10494 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10495 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary_shared.h" 185 18 +.delay_slot + 10496 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10497 "10100011" // /* MW 3 */ + 10498 "00011100" // /* MW 2 */ + 10499 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10500 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE__end +.label __ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE___func_end0 + 10501 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E +.function run _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E +.src_ref 4 "vector.hpp" 538 13 +.src_ref 3 "elementwise_binary_shared.h" 237 first +.src_ref 3 "elementwise_binary_shared.h" 244 19 +.src_ref 3 "elementwise_binary_shared.h" 245 12 +.src_ref 3 "elementwise_binary_shared.h" 247 12 +.src_ref 3 "elementwise_binary_shared.h" 250 4 +.function_start + 10512 "10111010" // MOVA dj0, #12; MOVS p3, p2; MOV dc0, lr /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10513 "01110010" // /* MW 9 */ + 10514 "11110000" // /* MW 8 */ + 10515 "01100000" // /* MW 7 */ + 10516 "00000000" // /* MW 6 */ + 10517 "10001011" // /* MW 5 */ + 10518 "10001000" // /* MW 4 */ + 10519 "10000011" // /* MW 3 */ + 10520 "10000010" // /* MW 2 */ + 10521 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 538 13 first +.src_ref 3 "elementwise_binary_shared.h" 244 19 first +.src_ref 3 "elementwise_binary_shared.h" 245 12 +.src_ref 3 "elementwise_binary_shared.h" 247 12 + 10522 "11010100" // LDA.u8 r0, [p2, dj0]; MOV p2, p1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10523 "10000001" // /* MW 5 */ + 10524 "11000101" // /* MW 4 */ + 10525 "01010100" // /* MW 3 */ + 10526 "00000001" // /* MW 2 */ + 10527 "01000000" // /* MW 1 */ + 10528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10529 "00000000" // /* MW 1 */ + 10530 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10531 "00000000" // /* MW 1 */ + 10532 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10533 "00000000" // /* MW 1 */ + 10534 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10535 "00000000" // /* MW 1 */ + 10536 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10537 "00000000" // /* MW 1 */ + 10538 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10539 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 244 12 +.src_ref 3 "elementwise_binary_shared.h" 244 35 + 10540 "10000100" // JZ r0, #10608 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10608 delay_slots=5 */ + 10541 "00000001" // /* MW 5 */ + 10542 "00000000" // /* MW 4 */ + 10543 "10111000" // /* MW 3 */ + 10544 "00010100" // /* MW 2 */ + 10545 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 237 +.delay_slot + 10546 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10547 "00000001" // /* MW 5 */ + 10548 "00000000" // /* MW 4 */ + 10549 "00000000" // /* MW 3 */ + 10550 "00001000" // /* MW 2 */ + 10551 "00000000" // /* MW 1 */ +.delay_slot + 10552 "11111000" // MOV r1, sp /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10553 "11100000" // /* MW 3 */ + 10554 "01010101" // /* MW 2 */ + 10555 "00011000" // /* MW 1 */ +.delay_slot + 10556 "00011000" // ADD.NC p1, r1, #-64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10557 "11100000" // /* MW 3 */ + 10558 "01100000" // /* MW 2 */ + 10559 "00011001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 538 13 +.src_ref 4 "vector_native_types.hpp" 374 137 first +.delay_slot + 10560 "00011000" // VST sfh, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10561 "00101011" // /* MW 3 */ + 10562 "00000111" // /* MW 2 */ + 10563 "00001001" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10564 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10565 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 247 12 first +.no_stack_arguments + 10566 "00000100" // JL #9968 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9968 delay_slots=5 */ + 10567 "00000001" // /* MW 5 */ + 10568 "00000000" // /* MW 4 */ + 10569 "01111000" // /* MW 3 */ + 10570 "00010011" // /* MW 2 */ + 10571 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10572 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10573 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10574 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10575 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10576 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10577 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10578 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10579 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10580 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10581 "10000001" // /* MW 11 */ + 10582 "10101101" // /* MW 10 */ + 10583 "00000000" // /* MW 9 */ + 10584 "00000000" // /* MW 8 */ + 10585 "00000000" // /* MW 7 */ + 10586 "00000000" // /* MW 6 */ + 10587 "00100000" // /* MW 5 */ + 10588 "00000000" // /* MW 4 */ + 10589 "11110000" // /* MW 3 */ + 10590 "00101100" // /* MW 2 */ + 10591 "00000000" // /* MW 1 */ +.return_address + 10592 "10000100" // J #10640 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10640 delay_slots=5 */ + 10593 "00000000" // /* MW 5 */ + 10594 "00000000" // /* MW 4 */ + 10595 "11001000" // /* MW 3 */ + 10596 "00010100" // /* MW 2 */ + 10597 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10598 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10599 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10600 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10601 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10602 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10603 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10604 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10605 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10606 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10607 "00000000" // /* MW 1 */ +.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_96 +.src_ref 3 "elementwise_binary_shared.h" 245 12 first +.no_stack_arguments + 10608 "00000100" // JL #9968 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9968 delay_slots=5 */ + 10609 "00000001" // /* MW 5 */ + 10610 "00000000" // /* MW 4 */ + 10611 "01111000" // /* MW 3 */ + 10612 "00010011" // /* MW 2 */ + 10613 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 245 12 +.src_ref 3 "elementwise_binary_shared.h" 245 12 +.delay_slot + 10614 "00000010" // MOVS p0, p1; MOV p1, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10615 "01110000" // /* MW 7 */ + 10616 "01100000" // /* MW 6 */ + 10617 "10110000" // /* MW 5 */ + 10618 "00000000" // /* MW 4 */ + 10619 "01100000" // /* MW 3 */ + 10620 "10010001" // /* MW 2 */ + 10621 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10622 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10623 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10625 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10627 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10628 "00110110" // NOPA; NOPB; NOPS; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 10629 "10000001" // /* MW 11 */ + 10630 "10101101" // /* MW 10 */ + 10631 "00000000" // /* MW 9 */ + 10632 "00000000" // /* MW 8 */ + 10633 "00000000" // /* MW 7 */ + 10634 "00000000" // /* MW 6 */ + 10635 "00100000" // /* MW 5 */ + 10636 "00000000" // /* MW 4 */ + 10637 "11110000" // /* MW 3 */ + 10638 "00101100" // /* MW 2 */ + 10639 "00000000" // /* MW 1 */ +.label TGT_F_ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E_128 +.src_ref 3 "elementwise_binary_shared.h" 250 4 +.return_address + 10640 "11111000" // MOV lr, dc0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10641 "10000000" // /* MW 3 */ + 10642 "01110001" // /* MW 2 */ + 10643 "00011111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 250 4 first + 10644 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 10645 "00000000" // /* MW 3 */ + 10646 "00101000" // /* MW 2 */ + 10647 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 250 4 +.delay_slot + 10648 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10649 "00000001" // /* MW 5 */ + 10650 "00000000" // /* MW 4 */ + 10651 "00000000" // /* MW 3 */ + 10652 "11111000" // /* MW 2 */ + 10653 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10654 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10655 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10656 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10657 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10658 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10659 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10660 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E__end +.label __ZN25elementwise_binary_sharedI8bfloat1626mul_impl_broadcasting_attrIS0_E15shared_params_tIS0_EL5act_t0EE3runEPS0_S7_R27elementwise_binary_params_tIS4_E___func_end0 + 10661 "00000000" // /* MW 1 */ +.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_begin0 +.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.function superkernel_mul1d_attribute_broadcasting _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE +.src_ref 7 "superkernels.cpp" 205 first +.src_ref 7 "superkernels.cpp" 210 6 +.function_start + 10672 "01000100" // MOVXM p3, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10673 "10000000" // /* MW 5 */ + 10674 "11001000" // /* MW 4 */ + 10675 "11000110" // /* MW 3 */ + 10676 "00000111" // /* MW 2 */ + 10677 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 210 6 first + 10678 "11010100" // LDA r16, [p3]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10679 "11000001" // /* MW 5 */ + 10680 "10110101" // /* MW 4 */ + 10681 "11011000" // /* MW 3 */ + 10682 "11000010" // /* MW 2 */ + 10683 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 205 + 10684 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10685 "00000001" // /* MW 5 */ + 10686 "00000000" // /* MW 4 */ + 10687 "00000000" // /* MW 3 */ + 10688 "00001000" // /* MW 2 */ + 10689 "00000000" // /* MW 1 */ + 10690 "00000010" // ST p6, [sp, #-8]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10691 "01110000" // /* MW 7 */ + 10692 "11010000" // /* MW 6 */ + 10693 "00001011" // /* MW 5 */ + 10694 "00000000" // /* MW 4 */ + 10695 "10110000" // /* MW 3 */ + 10696 "01100011" // /* MW 2 */ + 10697 "11111111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 207 11 + 10698 "00111010" // ST r0, [sp, #-4]; MOVXM p6, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10699 "00010001" // /* MW 9 */ + 10700 "00101000" // /* MW 8 */ + 10701 "00110010" // /* MW 7 */ + 10702 "11110011" // /* MW 6 */ + 10703 "00000001" // /* MW 5 */ + 10704 "00000000" // /* MW 4 */ + 10705 "10110000" // /* MW 3 */ + 10706 "10000010" // /* MW 2 */ + 10707 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 10708 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10709 "11000000" // /* MW 3 */ + 10710 "11010100" // /* MW 2 */ + 10711 "00011011" // /* MW 1 */ + 10712 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10713 "00000000" // /* MW 1 */ + 10714 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10715 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 210 6 +.src_ref 7 "superkernels.cpp" 210 16 + 10716 "10000100" // JNZ r16, #10880 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=10880 delay_slots=5 */ + 10717 "00000001" // /* MW 5 */ + 10718 "01000000" // /* MW 4 */ + 10719 "01000000" // /* MW 3 */ + 10720 "00010101" // /* MW 2 */ + 10721 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 207 22 first +.delay_slot + 10722 "00011000" // EXTEND.u8 r17, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10723 "10010000" // /* MW 3 */ + 10724 "01100010" // /* MW 2 */ + 10725 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 207 30 +.delay_slot + 10726 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10727 "11111011" // /* MW 3 */ + 10728 "01100011" // /* MW 2 */ + 10729 "00010100" // /* MW 1 */ +.delay_slot + 10730 "10011000" // ST lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10731 "00111101" // /* MW 3 */ + 10732 "11110100" // /* MW 2 */ + 10733 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 207 11 +.src_ref 1 "io_buffer_main.h" 125 25 +.delay_slot + 10734 "00000010" // ST r17, [p6]; MOV p6, p0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10735 "01110000" // /* MW 7 */ + 10736 "01100000" // /* MW 6 */ + 10737 "00110000" // /* MW 5 */ + 10738 "00000011" // /* MW 4 */ + 10739 "00110000" // /* MW 3 */ + 10740 "11000110" // /* MW 2 */ + 10741 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 213 4 +.src_ref 7 "superkernels.cpp" 224 2 +.delay_slot + 10742 "01000100" // MOVXM p0, #509248 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10743 "10000000" // /* MW 5 */ + 10744 "11001010" // /* MW 4 */ + 10745 "11000000" // /* MW 3 */ + 10746 "00000111" // /* MW 2 */ + 10747 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 10748 "01000100" // MOVXM p2, #509028 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10749 "11001000" // /* MW 5 */ + 10750 "11001000" // /* MW 4 */ + 10751 "11000100" // /* MW 3 */ + 10752 "00000111" // /* MW 2 */ + 10753 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 10754 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10755 "00010000" // /* MW 9 */ + 10756 "00110000" // /* MW 8 */ + 10757 "00110010" // /* MW 7 */ + 10758 "11110001" // /* MW 6 */ + 10759 "00000001" // /* MW 5 */ + 10760 "00000000" // /* MW 4 */ + 10761 "11100000" // /* MW 3 */ + 10762 "11000000" // /* MW 2 */ + 10763 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10765 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 213 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 10766 "00000100" // JL #9888 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=9888 delay_slots=5 */ + 10767 "00000001" // /* MW 5 */ + 10768 "00000000" // /* MW 4 */ + 10769 "01010000" // /* MW 3 */ + 10770 "00010011" // /* MW 2 */ + 10771 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10772 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10773 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 10774 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10775 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 10776 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10777 "00110001" // /* MW 3 */ + 10778 "00100000" // /* MW 2 */ + 10779 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 10780 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10781 "00000101" // /* MW 3 */ + 10782 "00100000" // /* MW 2 */ + 10783 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 10784 "11100001" // NOPA; NOPB; ST r16, [p2]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10785 "00000000" // /* MW 15 */ + 10786 "00000000" // /* MW 14 */ + 10787 "01111000" // /* MW 13 */ + 10788 "10100101" // /* MW 12 */ + 10789 "00000001" // /* MW 11 */ + 10790 "00000000" // /* MW 10 */ + 10791 "00000000" // /* MW 9 */ + 10792 "10000000" // /* MW 8 */ + 10793 "00010001" // /* MW 7 */ + 10794 "00000110" // /* MW 6 */ + 10795 "00100010" // /* MW 5 */ + 10796 "00000000" // /* MW 4 */ + 10797 "11110000" // /* MW 3 */ + 10798 "00101100" // /* MW 2 */ + 10799 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 217 18 +.return_address + 10800 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10801 "10100000" // /* MW 5 */ + 10802 "11001000" // /* MW 4 */ + 10803 "11000100" // /* MW 3 */ + 10804 "00000111" // /* MW 2 */ + 10805 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 217 18 first +.src_ref 7 "superkernels.cpp" 217 65 + 10806 "10111010" // LDA r16, [p2]; MOVXM p2, #509248 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10807 "00010000" // /* MW 9 */ + 10808 "10100000" // /* MW 8 */ + 10809 "00110010" // /* MW 7 */ + 10810 "11110001" // /* MW 6 */ + 10811 "00000001" // /* MW 5 */ + 10812 "00000000" // /* MW 4 */ + 10813 "11010000" // /* MW 3 */ + 10814 "11000010" // /* MW 2 */ + 10815 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 215 51 +.src_ref 7 "superkernels.cpp" 217 65 +.src_ref 7 "superkernels.cpp" 224 2 + 10816 "10111010" // LDA r17, [p2]; MOVXM p2, #509248 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10817 "00010000" // /* MW 9 */ + 10818 "10100000" // /* MW 8 */ + 10819 "00110010" // /* MW 7 */ + 10820 "11110001" // /* MW 6 */ + 10821 "00000001" // /* MW 5 */ + 10822 "00000000" // /* MW 4 */ + 10823 "11010000" // /* MW 3 */ + 10824 "11000110" // /* MW 2 */ + 10825 "01000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 215 51 first +.src_ref 7 "superkernels.cpp" 217 16 +.src_ref 7 "superkernels.cpp" 222 47 + 10826 "10111010" // LDA.u16 r18, [p2, #10]; MOVXM p1, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10827 "00010000" // /* MW 9 */ + 10828 "00101010" // /* MW 8 */ + 10829 "10110010" // /* MW 7 */ + 10830 "11110000" // /* MW 6 */ + 10831 "00000001" // /* MW 5 */ + 10832 "00000000" // /* MW 4 */ + 10833 "01010000" // /* MW 3 */ + 10834 "11001011" // /* MW 2 */ + 10835 "01001010" // /* MW 1 */ + 10836 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10837 "00000000" // /* MW 1 */ + 10838 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10839 "00000000" // /* MW 1 */ + 10840 "10000100" // J #10896 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=10896 delay_slots=5 */ + 10841 "00000000" // /* MW 5 */ + 10842 "00000000" // /* MW 4 */ + 10843 "01001000" // /* MW 3 */ + 10844 "00010101" // /* MW 2 */ + 10845 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 215 13 +.delay_slot + 10846 "01000100" // MOVXM p0, #509020 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10847 "10111000" // /* MW 5 */ + 10848 "11001000" // /* MW 4 */ + 10849 "11000000" // /* MW 3 */ + 10850 "00000111" // /* MW 2 */ + 10851 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10853 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 217 27 first +.delay_slot + 10854 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10855 "00001111" // /* MW 3 */ + 10856 "01100001" // /* MW 2 */ + 10857 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 215 13 first +.delay_slot + 10858 "00001100" // NOPA; ST r18, [p0] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10859 "10100011" // /* MW 5 */ + 10860 "00001100" // /* MW 4 */ + 10861 "11110000" // /* MW 3 */ + 10862 "00101100" // /* MW 2 */ + 10863 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 217 16 first +.delay_slot + 10864 "11100001" // NOPA; NOPB; ST r16, [p1]; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10865 "00000000" // /* MW 15 */ + 10866 "00000000" // /* MW 14 */ + 10867 "01111000" // /* MW 13 */ + 10868 "10100101" // /* MW 12 */ + 10869 "00000001" // /* MW 11 */ + 10870 "00000000" // /* MW 10 */ + 10871 "00000000" // /* MW 9 */ + 10872 "10000000" // /* MW 8 */ + 10873 "00010001" // /* MW 7 */ + 10874 "00000110" // /* MW 6 */ + 10875 "00100001" // /* MW 5 */ + 10876 "00000000" // /* MW 4 */ + 10877 "11110000" // /* MW 3 */ + 10878 "00101100" // /* MW 2 */ + 10879 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_208 +.src_ref 7 "superkernels.cpp" 222 47 +.src_ref 7 "superkernels.cpp" 224 2 + 10880 "11100001" // NOPA; NOPB; MOVS p2, p0; MOVXM p1, #509012; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 10881 "00000000" // /* MW 15 */ + 10882 "00000000" // /* MW 14 */ + 10883 "00010000" // /* MW 13 */ + 10884 "00101010" // /* MW 12 */ + 10885 "10110010" // /* MW 11 */ + 10886 "11110000" // /* MW 10 */ + 10887 "00000001" // /* MW 9 */ + 10888 "00000000" // /* MW 8 */ + 10889 "10001011" // /* MW 7 */ + 10890 "10000000" // /* MW 6 */ + 10891 "00100010" // /* MW 5 */ + 10892 "00000000" // /* MW 4 */ + 10893 "11110000" // /* MW 3 */ + 10894 "00101100" // /* MW 2 */ + 10895 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_224 +.src_ref 1 "io_buffer_main.h" 218 49 first + 10896 "00000010" // MOVS p3, p7; ADD.NC p7, r15, #12 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 10897 "00000000" // /* MW 7 */ + 10898 "11000011" // /* MW 6 */ + 10899 "10110011" // /* MW 5 */ + 10900 "00000011" // /* MW 4 */ + 10901 "01100000" // /* MW 3 */ + 10902 "10010001" // /* MW 2 */ + 10903 "01110011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 221 2 +.src_ref 1 "io_buffer_main.h" 218 49 + 10904 "10111010" // LDA r27, [p7], #-4; MOVXM p0, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 10905 "00010000" // /* MW 9 */ + 10906 "00100000" // /* MW 8 */ + 10907 "00110010" // /* MW 7 */ + 10908 "11110000" // /* MW 6 */ + 10909 "00000001" // /* MW 5 */ + 10910 "00000000" // /* MW 4 */ + 10911 "11010000" // /* MW 3 */ + 10912 "11101110" // /* MW 2 */ + 10913 "11111111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 10914 "10011000" // LDA r16, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10915 "00010110" // /* MW 3 */ + 10916 "11111110" // /* MW 2 */ + 10917 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 10918 "10011000" // LDA r17, [p7], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10919 "00110110" // /* MW 3 */ + 10920 "11111110" // /* MW 2 */ + 10921 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 10922 "10011000" // LDA r18, [p7, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10923 "01010110" // /* MW 3 */ + 10924 "01000110" // /* MW 2 */ + 10925 "00000111" // /* MW 1 */ + 10926 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10927 "00000000" // /* MW 1 */ + 10928 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10929 "00000000" // /* MW 1 */ + 10930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10931 "00000000" // /* MW 1 */ + 10932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10933 "00000000" // /* MW 1 */ + 10934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10935 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 10936 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10937 "00000010" // /* MW 3 */ + 10938 "01100001" // /* MW 2 */ + 10939 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 10940 "10011000" // ST r16, [p7] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10941 "00010001" // /* MW 3 */ + 10942 "00000110" // /* MW 2 */ + 10943 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 + 10944 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10945 "11111101" // /* MW 3 */ + 10946 "11100000" // /* MW 2 */ + 10947 "00010111" // /* MW 1 */ + 10948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10949 "00000000" // /* MW 1 */ + 10950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10951 "00000000" // /* MW 1 */ + 10952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10953 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 10954 "00011000" // ACQ r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10955 "00001000" // /* MW 3 */ + 10956 "10010011" // /* MW 2 */ + 10957 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 222 45 + 10958 "11100100" // MOVX r16, #1; MOV r15, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10959 "10000001" // /* MW 5 */ + 10960 "10101101" // /* MW 4 */ + 10961 "10100111" // /* MW 3 */ + 10962 "00000000" // /* MW 2 */ + 10963 "00000100" // /* MW 1 */ + 10964 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10965 "00000000" // /* MW 1 */ + 10966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10967 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 221 2 first + 10968 "10011000" // LDA r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10969 "00110110" // /* MW 3 */ + 10970 "00000110" // /* MW 2 */ + 10971 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.src_ref 1 "io_buffer_main.h" 324 51 + 10972 "11010100" // LDA r18, [p6]; MOV p6, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 10973 "10000001" // /* MW 5 */ + 10974 "11011101" // /* MW 4 */ + 10975 "11011100" // /* MW 3 */ + 10976 "11001010" // /* MW 2 */ + 10977 "11000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 222 47 first + 10978 "10011000" // LDA r19, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10979 "01110110" // /* MW 3 */ + 10980 "00000110" // /* MW 2 */ + 10981 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 10982 "10011000" // LDA p1, [p7], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10983 "10011110" // /* MW 3 */ + 10984 "01011100" // /* MW 2 */ + 10985 "00000111" // /* MW 1 */ + 10986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10987 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 224 2 first +.no_stack_arguments + 10988 "00000100" // JL #10512 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=10512 delay_slots=5 */ + 10989 "00000001" // /* MW 5 */ + 10990 "00000000" // /* MW 4 */ + 10991 "10001000" // /* MW 3 */ + 10992 "00010100" // /* MW 2 */ + 10993 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 10994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 10995 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 221 2 first +.delay_slot + 10996 "00011000" // ADD r17, r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 10997 "00000111" // /* MW 3 */ + 10998 "01100010" // /* MW 2 */ + 10999 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 221 2 +.delay_slot + 11000 "10011000" // ST r17, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11001 "00110001" // /* MW 3 */ + 11002 "00000110" // /* MW 2 */ + 11003 "00001000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 222 45 first +.delay_slot + 11004 "10011000" // LSHL r16, r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11005 "00001101" // /* MW 3 */ + 11006 "11100001" // /* MW 2 */ + 11007 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 222 45 +.delay_slot + 11008 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r18, r16; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11009 "00000000" // /* MW 15 */ + 11010 "00000000" // /* MW 14 */ + 11011 "10101000" // /* MW 13 */ + 11012 "10100000" // /* MW 12 */ + 11013 "00110100" // /* MW 11 */ + 11014 "00000000" // /* MW 10 */ + 11015 "00000000" // /* MW 9 */ + 11016 "00000000" // /* MW 8 */ + 11017 "01011011" // /* MW 7 */ + 11018 "00000001" // /* MW 6 */ + 11019 "00100000" // /* MW 5 */ + 11020 "00000000" // /* MW 4 */ + 11021 "11110000" // /* MW 3 */ + 11022 "00101100" // /* MW 2 */ + 11023 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 6 +.src_ref 7 "superkernels.cpp" 227 14 +.src_ref 1 "io_buffer_main.h" 324 51 first +.return_address + 11024 "10111010" // LDA r17, [p6, #20]; MOVXM p6, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11025 "00010000" // /* MW 9 */ + 11026 "00100000" // /* MW 8 */ + 11027 "00110010" // /* MW 7 */ + 11028 "11110011" // /* MW 6 */ + 11029 "00000001" // /* MW 5 */ + 11030 "00000000" // /* MW 4 */ + 11031 "11010000" // /* MW 3 */ + 11032 "11000110" // /* MW 2 */ + 11033 "11001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 425 8 + 11034 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11035 "00000101" // /* MW 3 */ + 11036 "00100000" // /* MW 2 */ + 11037 "00010000" // /* MW 1 */ + 11038 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11039 "00000000" // /* MW 1 */ + 11040 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11041 "00000000" // /* MW 1 */ + 11042 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11043 "00000000" // /* MW 1 */ + 11044 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11045 "00000000" // /* MW 1 */ + 11046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11047 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 11048 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11049 "00001000" // /* MW 3 */ + 11050 "01010001" // /* MW 2 */ + 11051 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 19 +.src_ref 1 "io_buffer_main.h" 327 40 first + 11052 "10111010" // LDA r19, [p7, #-8]; MOVXM p2, #509020 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11053 "00010000" // /* MW 9 */ + 11054 "00101110" // /* MW 8 */ + 11055 "00110010" // /* MW 7 */ + 11056 "11110001" // /* MW 6 */ + 11057 "00000001" // /* MW 5 */ + 11058 "00000000" // /* MW 4 */ + 11059 "11010000" // /* MW 3 */ + 11060 "11001110" // /* MW 2 */ + 11061 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 6 first + 11062 "10011000" // LDA r17, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11063 "00110110" // /* MW 3 */ + 11064 "00000110" // /* MW 2 */ + 11065 "00000110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 19 + 11066 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11067 "01010110" // /* MW 3 */ + 11068 "00000110" // /* MW 2 */ + 11069 "00000010" // /* MW 1 */ + 11070 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11071 "00000000" // /* MW 1 */ + 11072 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11073 "00000000" // /* MW 1 */ + 11074 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11075 "00000000" // /* MW 1 */ + 11076 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11077 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 11078 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11079 "00110001" // /* MW 3 */ + 11080 "00100001" // /* MW 2 */ + 11081 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 11082 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11083 "00010001" // /* MW 3 */ + 11084 "11100110" // /* MW 2 */ + 11085 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 16 first + 11086 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11087 "00101000" // /* MW 3 */ + 11088 "01100001" // /* MW 2 */ + 11089 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 226 6 + 11090 "10000100" // JNZ r16, #11120 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11120 delay_slots=5 */ + 11091 "00000001" // /* MW 5 */ + 11092 "01000000" // /* MW 4 */ + 11093 "10111000" // /* MW 3 */ + 11094 "00010101" // /* MW 2 */ + 11095 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11097 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11099 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11100 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11101 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11103 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11105 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 227 14 + 11106 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11107 "00000001" // /* MW 3 */ + 11108 "00100000" // /* MW 2 */ + 11109 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 227 14 first + 11110 "01111010" // NOPA; ST r16, [p6]; NOPX /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11111 "00000000" // /* MW 9 */ + 11112 "00000000" // /* MW 8 */ + 11113 "00000000" // /* MW 7 */ + 11114 "10000000" // /* MW 6 */ + 11115 "00010001" // /* MW 5 */ + 11116 "00000110" // /* MW 4 */ + 11117 "11110110" // /* MW 3 */ + 11118 "00101100" // /* MW 2 */ + 11119 "00000000" // /* MW 1 */ +.label TGT_F_Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE_448 +.src_ref 7 "superkernels.cpp" 229 + 11120 "00011000" // LDA lr, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11121 "00111001" // /* MW 3 */ + 11122 "11110100" // /* MW 2 */ + 11123 "00000111" // /* MW 1 */ + 11124 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11125 "00011001" // /* MW 3 */ + 11126 "11111011" // /* MW 2 */ + 11127 "00000111" // /* MW 1 */ + 11128 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11129 "00000000" // /* MW 1 */ + 11130 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11131 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 11132 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11133 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.noswbrkpt + 11134 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11135 "11110001" // /* MW 3 */ + 11136 "11111101" // /* MW 2 */ + 11137 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11138 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11139 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 229 first +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11140 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11141 "00000000" // /* MW 3 */ + 11142 "00101000" // /* MW 2 */ + 11143 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11144 "11111000" // MOV p7, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11145 "10100000" // /* MW 3 */ + 11146 "01100111" // /* MW 2 */ + 11147 "00011111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 229 +.delay_slot + 11148 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11149 "00000001" // /* MW 5 */ + 11150 "00000000" // /* MW 4 */ + 11151 "00000000" // /* MW 3 */ + 11152 "11111000" // /* MW 2 */ + 11153 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11154 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11155 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11157 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE__end +.label __Z40superkernel_mul1d_attribute_broadcastingRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE___func_end0 + 11159 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv +.function shared_setup_backbone _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv +.src_ref 3 "elementwise_binary_shared.h" 205 first +.src_ref 3 "elementwise_binary_shared.h" 211 24 first +.src_ref 3 "elementwise_binary_shared.h" 216 36 +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.function_start + 11168 "10111010" // LDA el0, [p1], #4; MOVX r2, #256; MOV r24, #0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11169 "01011000" // /* MW 9 */ + 11170 "00000000" // /* MW 8 */ + 11171 "00001000" // /* MW 7 */ + 11172 "00001011" // /* MW 6 */ + 11173 "00100000" // /* MW 5 */ + 11174 "00001000" // /* MW 4 */ + 11175 "11010000" // /* MW 3 */ + 11176 "10000101" // /* MW 2 */ + 11177 "00100011" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 36 + 11178 "00011000" // MOVX r0, #-128 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11179 "00000001" // /* MW 3 */ + 11180 "10000000" // /* MW 2 */ + 11181 "00010111" // /* MW 1 */ + 11182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11183 "00000000" // /* MW 1 */ + 11184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11185 "00000000" // /* MW 1 */ + 11186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11187 "00000000" // /* MW 1 */ + 11188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11189 "00000000" // /* MW 1 */ + 11190 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11191 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 211 22 first + 11192 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11193 "00101001" // /* MW 3 */ + 11194 "00011100" // /* MW 2 */ + 11195 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 212 24 first + 11196 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11197 "00101110" // /* MW 3 */ + 11198 "00011100" // /* MW 2 */ + 11199 "00000001" // /* MW 1 */ + 11200 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11201 "00000000" // /* MW 1 */ + 11202 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11203 "00000000" // /* MW 1 */ + 11204 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11205 "00000000" // /* MW 1 */ + 11206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11207 "00000000" // /* MW 1 */ + 11208 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11209 "00000000" // /* MW 1 */ + 11210 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11211 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 212 22 + 11212 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11213 "00101001" // /* MW 3 */ + 11214 "00011100" // /* MW 2 */ + 11215 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 213 24 first + 11216 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11217 "00101110" // /* MW 3 */ + 11218 "00000100" // /* MW 2 */ + 11219 "00000001" // /* MW 1 */ + 11220 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11221 "00000000" // /* MW 1 */ + 11222 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11223 "00000000" // /* MW 1 */ + 11224 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11225 "00000000" // /* MW 1 */ + 11226 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11227 "00000000" // /* MW 1 */ + 11228 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11229 "00000000" // /* MW 1 */ + 11230 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11231 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 213 22 + 11232 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11233 "00101001" // /* MW 3 */ + 11234 "00011100" // /* MW 2 */ + 11235 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 214 24 first + 11236 "10011000" // LDA r3, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11237 "01110110" // /* MW 3 */ + 11238 "00010100" // /* MW 2 */ + 11239 "00000001" // /* MW 1 */ + 11240 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11241 "00000000" // /* MW 1 */ + 11242 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11243 "00000000" // /* MW 1 */ + 11244 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11245 "00000000" // /* MW 1 */ + 11246 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11247 "00000000" // /* MW 1 */ + 11248 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11249 "00000000" // /* MW 1 */ + 11250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11251 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 214 22 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11252 "10011000" // ST r3, [p0], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11253 "01110001" // /* MW 3 */ + 11254 "01001100" // /* MW 2 */ + 11255 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 34 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11256 "00011000" // ST.s16 r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11257 "00010111" // /* MW 3 */ + 11258 "00000100" // /* MW 2 */ + 11259 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 217 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11260 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11261 "00000000" // /* MW 3 */ + 11262 "00101000" // /* MW 2 */ + 11263 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11264 "01000100" // MOVXM r1, #65280 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11265 "00000000" // /* MW 5 */ + 11266 "10111110" // /* MW 4 */ + 11267 "11110000" // /* MW 3 */ + 11268 "00000000" // /* MW 2 */ + 11269 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11270 "10011000" // AND r1, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11271 "00010100" // /* MW 3 */ + 11272 "11000010" // /* MW 2 */ + 11273 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 48 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11274 "10011000" // EQ r27, r1, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11275 "00100111" // /* MW 3 */ + 11276 "01110110" // /* MW 2 */ + 11277 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 216 36 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11278 "00011000" // SEL.EQZ r0, r0, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11279 "10000010" // /* MW 3 */ + 11280 "00000001" // /* MW 2 */ + 11281 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11282 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv___func_end0 + 11283 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv +.function setup _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv +.src_ref 3 "elementwise_binary_shared.h" 219 +.src_ref 3 "elementwise_binary_shared.h" 219 first +.function_start + 11296 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11297 "00000001" // /* MW 5 */ + 11298 "00000000" // /* MW 4 */ + 11299 "00000000" // /* MW 3 */ + 11300 "00001000" // /* MW 2 */ + 11301 "00000000" // /* MW 1 */ + 11302 "10011000" // ST lr, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11303 "00111101" // /* MW 3 */ + 11304 "11111000" // /* MW 2 */ + 11305 "00001111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 220 8 first +.no_stack_arguments + 11306 "00000100" // JL #11168 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11168 delay_slots=5 */ + 11307 "00000001" // /* MW 5 */ + 11308 "00000000" // /* MW 4 */ + 11309 "11010000" // /* MW 3 */ + 11310 "00010101" // /* MW 2 */ + 11311 "00000000" // /* MW 1 */ +.delay_slot + 11312 "11111000" // MOV r0, r15 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11313 "10100000" // /* MW 3 */ + 11314 "00010111" // /* MW 2 */ + 11315 "00011000" // /* MW 1 */ +.delay_slot + 11316 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11317 "00010101" // /* MW 3 */ + 11318 "11111100" // /* MW 2 */ + 11319 "00001111" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 220 8 +.delay_slot + 11320 "11111000" // MOV r15, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11321 "11000000" // /* MW 3 */ + 11322 "11010000" // /* MW 2 */ + 11323 "00011011" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11324 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11325 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11326 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11327 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 220 8 +.src_ref 3 "elementwise_binary_shared.h" 222 4 +.src_ref 8 "add_impl.h" 146 29 +.return_address + 11328 "10111010" // LDA lr, [sp, #-8]; MOVX r16, #3; ADD.NC p0, r15, #16 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11329 "00001000" // /* MW 9 */ + 11330 "11000100" // /* MW 8 */ + 11331 "00110011" // /* MW 7 */ + 11332 "01101000" // /* MW 6 */ + 11333 "00000000" // /* MW 5 */ + 11334 "00000001" // /* MW 4 */ + 11335 "00100000" // /* MW 3 */ + 11336 "00000111" // /* MW 2 */ + 11337 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 146 29 +.src_ref 8 "add_impl.h" 147 37 +.src_ref 8 "add_impl.h" 147 39 + 11338 "10111010" // MOVA dj0, #15; MOVX r24, #0; MOV m0, #-3 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11339 "01011000" // /* MW 9 */ + 11340 "11111101" // /* MW 8 */ + 11341 "00000111" // /* MW 7 */ + 11342 "00001000" // /* MW 6 */ + 11343 "10000000" // /* MW 5 */ + 11344 "00000001" // /* MW 4 */ + 11345 "10000000" // /* MW 3 */ + 11346 "11100010" // /* MW 2 */ + 11347 "00000001" // /* MW 1 */ +.src_ref 8 "add_impl.h" 146 29 first +.src_ref 8 "add_impl.h" 147 39 + 11348 "01111010" // LDA r15, [sp, #-4]; ST r16, [p0], m0; MOVX r16, #-128 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11349 "00000001" // /* MW 9 */ + 11350 "10100000" // /* MW 8 */ + 11351 "00000111" // /* MW 7 */ + 11352 "10000000" // /* MW 6 */ + 11353 "00010001" // /* MW 5 */ + 11354 "00001010" // /* MW 4 */ + 11355 "00100000" // /* MW 3 */ + 11356 "10111110" // /* MW 2 */ + 11357 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 50 first + 11358 "10011000" // LDA.u8 r18, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11359 "01001010" // /* MW 3 */ + 11360 "00000110" // /* MW 2 */ + 11361 "00000000" // /* MW 1 */ + 11362 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11363 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11364 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11365 "00000000" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 37 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11366 "00011000" // ST.s16 r16, [p0, dj0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11367 "00010111" // /* MW 3 */ + 11368 "00000010" // /* MW 2 */ + 11369 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 222 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11370 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 11371 "00000000" // /* MW 3 */ + 11372 "00101000" // /* MW 2 */ + 11373 "00010000" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 54 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11374 "00011000" // MOVX r17, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11375 "00000101" // /* MW 3 */ + 11376 "00100010" // /* MW 2 */ + 11377 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary_shared.h" 222 4 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11378 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11379 "00000001" // /* MW 5 */ + 11380 "00000000" // /* MW 4 */ + 11381 "00000000" // /* MW 3 */ + 11382 "11111000" // /* MW 2 */ + 11383 "11111111" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 54 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11384 "10011000" // EQ r27, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11385 "00100111" // /* MW 3 */ + 11386 "01110111" // /* MW 2 */ + 11387 "00010100" // /* MW 1 */ +.src_ref 8 "add_impl.h" 147 39 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11388 "00011000" // SEL.EQZ r16, r16, r24, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11389 "10000010" // /* MW 3 */ + 11390 "00100001" // /* MW 2 */ + 11391 "00010100" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11392 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv___func_end0 + 11393 "00000000" // /* MW 1 */ +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_begin0 +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.function run _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E +.src_ref 3 "elementwise_binary_shared.h" 227 first +.src_ref 3 "elementwise_binary_shared.h" 232 8 first +.tail_call +.function_start + 11408 "10000100" // J #9968 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=9968 delay_slots=5 */ + 11409 "00000000" // /* MW 5 */ + 11410 "00000000" // /* MW 4 */ + 11411 "01111000" // /* MW 3 */ + 11412 "00010011" // /* MW 2 */ + 11413 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11414 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11415 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11416 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11417 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11418 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11419 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11420 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11421 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11422 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E__end +.label __ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E___func_end0 + 11423 "00000000" // /* MW 1 */ +.label __Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.label _Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.function superkernel_add1d _Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.src_ref 7 "superkernels.cpp" 332 first +.src_ref 7 "superkernels.cpp" 337 6 +.function_start + 11424 "01000100" // MOVXM p4, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11425 "10000000" // /* MW 5 */ + 11426 "11001000" // /* MW 4 */ + 11427 "11001000" // /* MW 3 */ + 11428 "00000111" // /* MW 2 */ + 11429 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 337 6 first + 11430 "11010100" // LDA r16, [p4]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11431 "11000001" // /* MW 5 */ + 11432 "10110101" // /* MW 4 */ + 11433 "11011000" // /* MW 3 */ + 11434 "11000010" // /* MW 2 */ + 11435 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 332 + 11436 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11437 "00000001" // /* MW 5 */ + 11438 "00000000" // /* MW 4 */ + 11439 "00000000" // /* MW 3 */ + 11440 "00001000" // /* MW 2 */ + 11441 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 334 22 first +.src_ref 1 "io_buffer_main.h" 218 49 + 11442 "00111010" // ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11443 "01111001" // /* MW 9 */ + 11444 "01100000" // /* MW 8 */ + 11445 "11001010" // /* MW 7 */ + 11446 "10000001" // /* MW 6 */ + 11447 "00010100" // /* MW 5 */ + 11448 "00100011" // /* MW 4 */ + 11449 "10110000" // /* MW 3 */ + 11450 "00111010" // /* MW 2 */ + 11451 "11111111" // /* MW 1 */ + 11452 "00000010" // ST p0, [sp, #-20]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11453 "01110000" // /* MW 7 */ + 11454 "11010000" // /* MW 6 */ + 11455 "00001011" // /* MW 5 */ + 11456 "00000000" // /* MW 4 */ + 11457 "10110000" // /* MW 3 */ + 11458 "10000011" // /* MW 2 */ + 11459 "11111101" // /* MW 1 */ + 11460 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11461 "00010101" // /* MW 3 */ + 11462 "11111100" // /* MW 2 */ + 11463 "00001111" // /* MW 1 */ + 11464 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11465 "00111101" // /* MW 3 */ + 11466 "11110000" // /* MW 2 */ + 11467 "00001111" // /* MW 1 */ + 11468 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11469 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 337 6 first +.src_ref 7 "superkernels.cpp" 337 16 first + 11470 "10000100" // JNZ r16, #11616 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11616 delay_slots=5 */ + 11471 "00000001" // /* MW 5 */ + 11472 "01000000" // /* MW 4 */ + 11473 "10110000" // /* MW 3 */ + 11474 "00010110" // /* MW 2 */ + 11475 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 334 30 first +.delay_slot + 11476 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11477 "11111011" // /* MW 3 */ + 11478 "01100011" // /* MW 2 */ + 11479 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 334 11 +.delay_slot + 11480 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11481 "10100000" // /* MW 5 */ + 11482 "11001000" // /* MW 4 */ + 11483 "11000100" // /* MW 3 */ + 11484 "00000111" // /* MW 2 */ + 11485 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 334 11 +.delay_slot + 11486 "00000010" // ST r17, [p2]; MOV p2, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 11487 "01110000" // /* MW 7 */ + 11488 "01100000" // /* MW 6 */ + 11489 "00110111" // /* MW 5 */ + 11490 "00000001" // /* MW 4 */ + 11491 "00110000" // /* MW 3 */ + 11492 "11000110" // /* MW 2 */ + 11493 "01000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 +.delay_slot + 11494 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11495 "11000000" // /* MW 3 */ + 11496 "11010110" // /* MW 2 */ + 11497 "00011011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 340 4 +.src_ref 7 "superkernels.cpp" 342 28 +.src_ref 7 "superkernels.cpp" 344 42 +.src_ref 7 "superkernels.cpp" 356 2 +.delay_slot + 11498 "00111010" // ST p2, [sp, #-12]; MOVXM p7, #509312 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11499 "00010001" // /* MW 9 */ + 11500 "11000000" // /* MW 8 */ + 11501 "10110010" // /* MW 7 */ + 11502 "11110011" // /* MW 6 */ + 11503 "00000001" // /* MW 5 */ + 11504 "00000000" // /* MW 4 */ + 11505 "10110000" // /* MW 3 */ + 11506 "10100011" // /* MW 2 */ + 11507 "11111110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 340 4 +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 11508 "00111010" // MOVS p0, p7; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11509 "00010001" // /* MW 9 */ + 11510 "00110010" // /* MW 8 */ + 11511 "00110010" // /* MW 7 */ + 11512 "11110001" // /* MW 6 */ + 11513 "00000001" // /* MW 5 */ + 11514 "00000000" // /* MW 4 */ + 11515 "01100000" // /* MW 3 */ + 11516 "10010001" // /* MW 2 */ + 11517 "00010011" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 11518 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11519 "00010000" // /* MW 9 */ + 11520 "00110000" // /* MW 8 */ + 11521 "00110010" // /* MW 7 */ + 11522 "11110001" // /* MW 6 */ + 11523 "00000001" // /* MW 5 */ + 11524 "00000000" // /* MW 4 */ + 11525 "11100000" // /* MW 3 */ + 11526 "11000000" // /* MW 2 */ + 11527 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11528 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11529 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 340 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 11530 "00000100" // JL #11296 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11296 delay_slots=5 */ + 11531 "00000001" // /* MW 5 */ + 11532 "00000000" // /* MW 4 */ + 11533 "00010000" // /* MW 3 */ + 11534 "00010110" // /* MW 2 */ + 11535 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11536 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11537 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 11538 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11539 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11540 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11541 "00110001" // /* MW 3 */ + 11542 "00100000" // /* MW 2 */ + 11543 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 11544 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11545 "00000101" // /* MW 3 */ + 11546 "00100000" // /* MW 2 */ + 11547 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 11548 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11549 "00010001" // /* MW 3 */ + 11550 "00000110" // /* MW 2 */ + 11551 "00001010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 344 18 +.src_ref 7 "superkernels.cpp" 344 42 first +.return_address + 11552 "10111010" // LDA r16, [p7]; MOVXM p1, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11553 "00010000" // /* MW 9 */ + 11554 "00101000" // /* MW 8 */ + 11555 "10110010" // /* MW 7 */ + 11556 "11110000" // /* MW 6 */ + 11557 "00000001" // /* MW 5 */ + 11558 "00000000" // /* MW 4 */ + 11559 "11010000" // /* MW 3 */ + 11560 "11000010" // /* MW 2 */ + 11561 "11100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 344 16 +.src_ref 7 "superkernels.cpp" 344 18 +.src_ref 7 "superkernels.cpp" 353 48 + 11562 "10111010" // LDA r17, [p1]; MOVXM p3, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11563 "00010000" // /* MW 9 */ + 11564 "00101010" // /* MW 8 */ + 11565 "10110010" // /* MW 7 */ + 11566 "11110001" // /* MW 6 */ + 11567 "00000001" // /* MW 5 */ + 11568 "00000000" // /* MW 4 */ + 11569 "11010000" // /* MW 3 */ + 11570 "11000110" // /* MW 2 */ + 11571 "00100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 342 28 first +.src_ref 7 "superkernels.cpp" 345 16 +.src_ref 7 "superkernels.cpp" 354 48 + 11572 "10111010" // LDA.u16 r18, [p7, #10]; MOVXM p1, #509016 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11573 "00010000" // /* MW 9 */ + 11574 "00101100" // /* MW 8 */ + 11575 "10110010" // /* MW 7 */ + 11576 "11110000" // /* MW 6 */ + 11577 "00000001" // /* MW 5 */ + 11578 "00000000" // /* MW 4 */ + 11579 "01010000" // /* MW 3 */ + 11580 "11001011" // /* MW 2 */ + 11581 "11101010" // /* MW 1 */ + 11582 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11583 "00000000" // /* MW 1 */ + 11584 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11585 "00000000" // /* MW 1 */ + 11586 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11587 "00000000" // /* MW 1 */ + 11588 "10000100" // J #11632 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=11632 delay_slots=5 */ + 11589 "00000000" // /* MW 5 */ + 11590 "00000000" // /* MW 4 */ + 11591 "10111000" // /* MW 3 */ + 11592 "00010110" // /* MW 2 */ + 11593 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 342 13 +.delay_slot + 11594 "01000100" // MOVXM p2, #509020 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11595 "10111000" // /* MW 5 */ + 11596 "11001000" // /* MW 4 */ + 11597 "11000100" // /* MW 3 */ + 11598 "00000111" // /* MW 2 */ + 11599 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 344 27 first +.delay_slot + 11600 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11601 "00001111" // /* MW 3 */ + 11602 "01100001" // /* MW 2 */ + 11603 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 342 13 first +.delay_slot + 11604 "10011000" // ST r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11605 "01010001" // /* MW 3 */ + 11606 "00000110" // /* MW 2 */ + 11607 "00001010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 344 16 first +.delay_slot + 11608 "10011000" // ST r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11609 "00010001" // /* MW 3 */ + 11610 "00000110" // /* MW 2 */ + 11611 "00001011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 345 16 first +.delay_slot + 11612 "10011000" // ST r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11613 "00010001" // /* MW 3 */ + 11614 "00000110" // /* MW 2 */ + 11615 "00001001" // /* MW 1 */ +.label TGT_F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 +.src_ref 7 "superkernels.cpp" 353 48 + 11616 "01000100" // MOVXM p3, #509012 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11617 "10101000" // /* MW 5 */ + 11618 "11001000" // /* MW 4 */ + 11619 "11000110" // /* MW 3 */ + 11620 "00000111" // /* MW 2 */ + 11621 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 354 48 + 11622 "10111010" // NOPA; MOVXM p1, #509016 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11623 "00010000" // /* MW 9 */ + 11624 "00101100" // /* MW 8 */ + 11625 "10110010" // /* MW 7 */ + 11626 "11110000" // /* MW 6 */ + 11627 "00000001" // /* MW 5 */ + 11628 "00000000" // /* MW 4 */ + 11629 "11110000" // /* MW 3 */ + 11630 "00101100" // /* MW 2 */ + 11631 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 +.src_ref 1 "io_buffer_main.h" 218 49 first + 11632 "00011000" // ADD.NC p0, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11633 "10000110" // /* MW 3 */ + 11634 "01100111" // /* MW 2 */ + 11635 "00011000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 348 2 +.src_ref 1 "io_buffer_main.h" 218 49 + 11636 "10111010" // LDA r27, [p0], #-4; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11637 "00010000" // /* MW 9 */ + 11638 "00100000" // /* MW 8 */ + 11639 "00110010" // /* MW 7 */ + 11640 "11110001" // /* MW 6 */ + 11641 "00000001" // /* MW 5 */ + 11642 "00000000" // /* MW 4 */ + 11643 "11010000" // /* MW 3 */ + 11644 "11101110" // /* MW 2 */ + 11645 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 11646 "10011000" // LDA r16, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11647 "00010110" // /* MW 3 */ + 11648 "11111110" // /* MW 2 */ + 11649 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 11650 "10011000" // LDA r17, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11651 "00110110" // /* MW 3 */ + 11652 "11111110" // /* MW 2 */ + 11653 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 348 2 first + 11654 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11655 "01010110" // /* MW 3 */ + 11656 "00000110" // /* MW 2 */ + 11657 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 11658 "10011000" // LDA r19, [p0, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11659 "01110110" // /* MW 3 */ + 11660 "01000110" // /* MW 2 */ + 11661 "00000000" // /* MW 1 */ + 11662 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11663 "00000000" // /* MW 1 */ + 11664 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11665 "00000000" // /* MW 1 */ + 11666 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11667 "00000000" // /* MW 1 */ + 11668 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11669 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 11670 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11671 "00000010" // /* MW 3 */ + 11672 "01100001" // /* MW 2 */ + 11673 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 348 2 first +.src_ref 1 "io_buffer_main.h" 218 20 + 11674 "01011100" // ST r16, [p0]; ADD r16, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11675 "00001110" // /* MW 5 */ + 11676 "01000000" // /* MW 4 */ + 11677 "00111001" // /* MW 3 */ + 11678 "11000010" // /* MW 2 */ + 11679 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 348 2 + 11680 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11681 "00010001" // /* MW 3 */ + 11682 "00000110" // /* MW 2 */ + 11683 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 +.src_ref 1 "io_buffer_main.h" 395 8 + 11684 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11685 "11111101" // /* MW 3 */ + 11686 "11100000" // /* MW 2 */ + 11687 "00010111" // /* MW 1 */ + 11688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11689 "00000000" // /* MW 1 */ + 11690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11691 "00000000" // /* MW 1 */ + 11692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11693 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 11694 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11695 "00001000" // /* MW 3 */ + 11696 "11010011" // /* MW 2 */ + 11697 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 first + 11698 "00011000" // ADD.NC p2, r14, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11699 "00000110" // /* MW 3 */ + 11700 "01100111" // /* MW 2 */ + 11701 "00011010" // /* MW 1 */ + 11702 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11703 "00000000" // /* MW 1 */ + 11704 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11705 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 11706 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11707 "01110110" // /* MW 3 */ + 11708 "11111111" // /* MW 2 */ + 11709 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 11710 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11711 "00110110" // /* MW 3 */ + 11712 "11111110" // /* MW 2 */ + 11713 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 11714 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11715 "01010110" // /* MW 3 */ + 11716 "11111110" // /* MW 2 */ + 11717 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 47 first + 11718 "10011000" // LDA r19, [p2, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11719 "01110110" // /* MW 3 */ + 11720 "01010110" // /* MW 2 */ + 11721 "00000010" // /* MW 1 */ + 11722 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11723 "00000000" // /* MW 1 */ + 11724 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11725 "00000000" // /* MW 1 */ + 11726 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11727 "00000000" // /* MW 1 */ + 11728 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11729 "00000000" // /* MW 1 */ + 11730 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11731 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 11732 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11733 "00010010" // /* MW 3 */ + 11734 "10100011" // /* MW 2 */ + 11735 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 11736 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11737 "00110001" // /* MW 3 */ + 11738 "00000110" // /* MW 2 */ + 11739 "00001010" // /* MW 1 */ + 11740 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11741 "00000000" // /* MW 1 */ + 11742 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11743 "00000000" // /* MW 1 */ + 11744 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11745 "00000000" // /* MW 1 */ + 11746 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11747 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 11748 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11749 "00001000" // /* MW 3 */ + 11750 "11010011" // /* MW 2 */ + 11751 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 353 46 +.src_ref 7 "superkernels.cpp" 354 46 +.src_ref 1 "io_buffer_main.h" 324 32 + 11752 "00111010" // MOVS p6, p2; MOVX r16, #1; MOV r14, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11753 "01111001" // /* MW 9 */ + 11754 "01100000" // /* MW 8 */ + 11755 "11001110" // /* MW 7 */ + 11756 "00101001" // /* MW 6 */ + 11757 "00000000" // /* MW 5 */ + 11758 "00000001" // /* MW 4 */ + 11759 "01100000" // /* MW 3 */ + 11760 "00010001" // /* MW 2 */ + 11761 "11010001" // /* MW 1 */ + 11762 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11763 "00000000" // /* MW 1 */ + 11764 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11765 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 + 11766 "00011000" // LDA p4, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11767 "00011001" // /* MW 3 */ + 11768 "11101110" // /* MW 2 */ + 11769 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 353 48 first + 11770 "00001100" // LDA r17, [p3]; ST p0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11771 "00111011" // /* MW 5 */ + 11772 "11011000" // /* MW 4 */ + 11773 "11011111" // /* MW 3 */ + 11774 "11000110" // /* MW 2 */ + 11775 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 354 48 first +.src_ref 7 "superkernels.cpp" 356 2 + 11776 "11010100" // LDA r20, [p1]; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11777 "10000001" // /* MW 5 */ + 11778 "11011101" // /* MW 4 */ + 11779 "11010110" // /* MW 3 */ + 11780 "11010010" // /* MW 2 */ + 11781 "00100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 11782 "10011000" // LDA r18, [p2], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11783 "01010110" // /* MW 3 */ + 11784 "01001110" // /* MW 2 */ + 11785 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 11786 "10011000" // LDA p2, [p0], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11787 "00011110" // /* MW 3 */ + 11788 "01011101" // /* MW 2 */ + 11789 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 +.src_ref 1 "io_buffer_main.h" 327 40 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11790 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11791 "11000000" // /* MW 3 */ + 11792 "01100000" // /* MW 2 */ + 11793 "00011111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11794 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11795 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11796 "10011000" // LDA r19, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11797 "01110110" // /* MW 3 */ + 11798 "00000110" // /* MW 2 */ + 11799 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 11800 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11801 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 356 2 first +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 11802 "00000100" // JL #11408 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=11408 delay_slots=5 */ + 11803 "00000001" // /* MW 5 */ + 11804 "00000000" // /* MW 4 */ + 11805 "01001000" // /* MW 3 */ + 11806 "00010110" // /* MW 2 */ + 11807 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 +.src_ref 1 "io_buffer_main.h" 327 40 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 11808 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11809 "11000000" // /* MW 3 */ + 11810 "11010100" // /* MW 2 */ + 11811 "00011011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 353 46 first +.delay_slot + 11812 "10011000" // LSHL r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11813 "00001101" // /* MW 3 */ + 11814 "01100011" // /* MW 2 */ + 11815 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 354 46 first +.delay_slot + 11816 "10011000" // LSHL r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11817 "00001101" // /* MW 3 */ + 11818 "00100001" // /* MW 2 */ + 11819 "00010101" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 354 46 +.delay_slot + 11820 "01011000" // ADD.NC p1, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11821 "01000001" // /* MW 3 */ + 11822 "01101001" // /* MW 2 */ + 11823 "00011001" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 353 46 first +.delay_slot + 11824 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 11825 "00000000" // /* MW 15 */ + 11826 "00000000" // /* MW 14 */ + 11827 "10101000" // /* MW 13 */ + 11828 "11100010" // /* MW 12 */ + 11829 "00110100" // /* MW 11 */ + 11830 "00000000" // /* MW 10 */ + 11831 "00000000" // /* MW 9 */ + 11832 "00000000" // /* MW 8 */ + 11833 "01011011" // /* MW 7 */ + 11834 "00000001" // /* MW 6 */ + 11835 "00100000" // /* MW 5 */ + 11836 "00000000" // /* MW 4 */ + 11837 "11110000" // /* MW 3 */ + 11838 "00101100" // /* MW 2 */ + 11839 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 32 first +.src_ref 1 "io_buffer_main.h" 327 28 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 40 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 +.return_address + 11840 "10111010" // LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11841 "01111000" // /* MW 9 */ + 11842 "11010000" // /* MW 8 */ + 11843 "10110011" // /* MW 7 */ + 11844 "00101000" // /* MW 6 */ + 11845 "00000000" // /* MW 5 */ + 11846 "00000001" // /* MW 4 */ + 11847 "11010000" // /* MW 3 */ + 11848 "11000110" // /* MW 2 */ + 11849 "11001000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 360 19 + 11850 "01000100" // MOVXM p6, #509020 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11851 "10111000" // /* MW 5 */ + 11852 "11001000" // /* MW 4 */ + 11853 "11001100" // /* MW 3 */ + 11854 "00000111" // /* MW 2 */ + 11855 "00000000" // /* MW 1 */ + 11856 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11857 "00000000" // /* MW 1 */ + 11858 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11859 "00000000" // /* MW 1 */ + 11860 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11861 "00000000" // /* MW 1 */ + 11862 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11863 "00000000" // /* MW 1 */ + 11864 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11865 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 11866 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11867 "00001000" // /* MW 3 */ + 11868 "01010001" // /* MW 2 */ + 11869 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 40 first + 11870 "10011000" // LDA r17, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11871 "00110110" // /* MW 3 */ + 11872 "11110110" // /* MW 2 */ + 11873 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 + 11874 "00011000" // LDA p2, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11875 "00011001" // /* MW 3 */ + 11876 "11101101" // /* MW 2 */ + 11877 "00000111" // /* MW 1 */ + 11878 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11879 "00000000" // /* MW 1 */ + 11880 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11881 "00000000" // /* MW 1 */ + 11882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11883 "00000000" // /* MW 1 */ + 11884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11885 "00000000" // /* MW 1 */ + 11886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11887 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 + 11888 "10011000" // SUB r17, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11889 "00010001" // /* MW 3 */ + 11890 "00100011" // /* MW 2 */ + 11891 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 first +.src_ref 1 "io_buffer_main.h" 327 28 + 11892 "00001100" // LDA r17, [p2, #20]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 11893 "01100011" // /* MW 5 */ + 11894 "11101100" // /* MW 4 */ + 11895 "11010011" // /* MW 3 */ + 11896 "11000110" // /* MW 2 */ + 11897 "01001010" // /* MW 1 */ + 11898 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11899 "00000000" // /* MW 1 */ + 11900 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11901 "00000000" // /* MW 1 */ + 11902 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11903 "00000000" // /* MW 1 */ + 11904 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11905 "00000000" // /* MW 1 */ + 11906 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11907 "00000000" // /* MW 1 */ + 11908 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11909 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 11910 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11911 "00001000" // /* MW 3 */ + 11912 "01010001" // /* MW 2 */ + 11913 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 360 6 +.src_ref 7 "superkernels.cpp" 361 14 +.src_ref 1 "io_buffer_main.h" 327 40 first + 11914 "10111010" // LDA r19, [p7, #-8]; MOVXM p1, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 11915 "00010000" // /* MW 9 */ + 11916 "00100000" // /* MW 8 */ + 11917 "10110010" // /* MW 7 */ + 11918 "11110000" // /* MW 6 */ + 11919 "00000001" // /* MW 5 */ + 11920 "00000000" // /* MW 4 */ + 11921 "11010000" // /* MW 3 */ + 11922 "11001110" // /* MW 2 */ + 11923 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 360 19 first + 11924 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11925 "01010110" // /* MW 3 */ + 11926 "00000110" // /* MW 2 */ + 11927 "00000110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 360 6 + 11928 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11929 "00110110" // /* MW 3 */ + 11930 "00000110" // /* MW 2 */ + 11931 "00000001" // /* MW 1 */ + 11932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11933 "00000000" // /* MW 1 */ + 11934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11935 "00000000" // /* MW 1 */ + 11936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11937 "00000000" // /* MW 1 */ + 11938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11939 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 11940 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11941 "00110001" // /* MW 3 */ + 11942 "00100001" // /* MW 2 */ + 11943 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 11944 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11945 "00010001" // /* MW 3 */ + 11946 "11100110" // /* MW 2 */ + 11947 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 360 16 first + 11948 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11949 "00101000" // /* MW 3 */ + 11950 "01100001" // /* MW 2 */ + 11951 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 360 6 + 11952 "10000100" // JNZ r16, #11984 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=11984 delay_slots=5 */ + 11953 "00000001" // /* MW 5 */ + 11954 "01000000" // /* MW 4 */ + 11955 "01101000" // /* MW 3 */ + 11956 "00010111" // /* MW 2 */ + 11957 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11958 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11959 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11960 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11961 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11962 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11963 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11964 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11965 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 11966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11967 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 361 14 + 11968 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11969 "00000001" // /* MW 3 */ + 11970 "00100000" // /* MW 2 */ + 11971 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 361 14 first + 11972 "00110110" // NOPA; NOPB; ST r16, [p1]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 11973 "11000001" // /* MW 11 */ + 11974 "00001000" // /* MW 10 */ + 11975 "10000011" // /* MW 9 */ + 11976 "00000000" // /* MW 8 */ + 11977 "00000000" // /* MW 7 */ + 11978 "00000000" // /* MW 6 */ + 11979 "00100000" // /* MW 5 */ + 11980 "00000000" // /* MW 4 */ + 11981 "11110000" // /* MW 3 */ + 11982 "00101100" // /* MW 2 */ + 11983 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 +.src_ref 7 "superkernels.cpp" 363 + 11984 "00011000" // LDA lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11985 "00111001" // /* MW 3 */ + 11986 "11110000" // /* MW 2 */ + 11987 "00000111" // /* MW 1 */ + 11988 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11989 "11110001" // /* MW 3 */ + 11990 "11111101" // /* MW 2 */ + 11991 "00000111" // /* MW 1 */ + 11992 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11993 "10011001" // /* MW 3 */ + 11994 "11110111" // /* MW 2 */ + 11995 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 11996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 11997 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.noswbrkpt + 11998 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 11999 "11010001" // /* MW 3 */ + 12000 "11111001" // /* MW 2 */ + 12001 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 12002 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12003 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 12004 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12005 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 363 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 12006 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12007 "00000000" // /* MW 3 */ + 12008 "00101000" // /* MW 2 */ + 12009 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12010 "00011000" // MOVS p6, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12011 "00001011" // /* MW 3 */ + 12012 "10001110" // /* MW 2 */ + 12013 "00001110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 363 +.delay_slot + 12014 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12015 "00000001" // /* MW 5 */ + 12016 "00000000" // /* MW 4 */ + 12017 "00000000" // /* MW 3 */ + 12018 "11111000" // /* MW 2 */ + 12019 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12021 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12023 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end +.label __Z17superkernel_add1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 + 12025 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E +.src_ref 3 "elementwise_binary.h" 141 first +.src_ref 3 "elementwise_binary.h" 142 23 +.src_ref 3 "elementwise_binary.h" 144 4 first +.function_start + 12032 "01100100" // RET lr; MOV r0, #64 /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 12033 "00000001" // /* MW 5 */ + 12034 "00100001" // /* MW 4 */ + 12035 "00000000" // /* MW 3 */ + 12036 "00000000" // /* MW 2 */ + 12037 "00000101" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 141 +.delay_slot + 12038 "11111000" // MOV r1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12039 "11000000" // /* MW 3 */ + 12040 "01010000" // /* MW 2 */ + 12041 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 141 +.delay_slot + 12042 "00011000" // ADD.NC p0, r1, #32 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12043 "10010000" // /* MW 3 */ + 12044 "01100000" // /* MW 2 */ + 12045 "00011000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 142 23 first +.delay_slot + 12046 "10011000" // ST r0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12047 "00010001" // /* MW 3 */ + 12048 "00000100" // /* MW 2 */ + 12049 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 142 23 +.delay_slot + 12050 "10011000" // ST r0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12051 "00010001" // /* MW 3 */ + 12052 "00010100" // /* MW 2 */ + 12053 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E___func_end0 + 12055 "00000000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.function setup _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv +.src_ref 3 "elementwise_binary.h" 130 first +.src_ref 3 "elementwise_binary.h" 133 24 first +.function_start + 12064 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12065 "00101110" // /* MW 3 */ + 12066 "00011100" // /* MW 2 */ + 12067 "00000001" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 130 + 12068 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12069 "00000001" // /* MW 5 */ + 12070 "00000000" // /* MW 4 */ + 12071 "00000000" // /* MW 3 */ + 12072 "00001000" // /* MW 2 */ + 12073 "00000000" // /* MW 1 */ + 12074 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12075 "00111101" // /* MW 3 */ + 12076 "11111100" // /* MW 2 */ + 12077 "00001111" // /* MW 1 */ + 12078 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12079 "00000000" // /* MW 1 */ + 12080 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12081 "00000000" // /* MW 1 */ + 12082 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12083 "00000000" // /* MW 1 */ + 12084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12085 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 133 22 first + 12086 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12087 "00101001" // /* MW 3 */ + 12088 "00011100" // /* MW 2 */ + 12089 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 134 24 first + 12090 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12091 "00101110" // /* MW 3 */ + 12092 "00011100" // /* MW 2 */ + 12093 "00000001" // /* MW 1 */ + 12094 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12095 "00000000" // /* MW 1 */ + 12096 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12097 "00000000" // /* MW 1 */ + 12098 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12099 "00000000" // /* MW 1 */ + 12100 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12101 "00000000" // /* MW 1 */ + 12102 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12103 "00000000" // /* MW 1 */ + 12104 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12105 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 134 22 + 12106 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12107 "00101001" // /* MW 3 */ + 12108 "00011100" // /* MW 2 */ + 12109 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 135 24 first + 12110 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12111 "00101110" // /* MW 3 */ + 12112 "00000100" // /* MW 2 */ + 12113 "00000001" // /* MW 1 */ + 12114 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12115 "00000000" // /* MW 1 */ + 12116 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12117 "00000000" // /* MW 1 */ + 12118 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12119 "00000000" // /* MW 1 */ + 12120 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12121 "00000000" // /* MW 1 */ + 12122 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12123 "00000000" // /* MW 1 */ + 12124 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12125 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 135 22 + 12126 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12127 "00101001" // /* MW 3 */ + 12128 "00011100" // /* MW 2 */ + 12129 "00001000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 136 24 first + 12130 "10011000" // LDA el0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12131 "00101110" // /* MW 3 */ + 12132 "00010100" // /* MW 2 */ + 12133 "00000001" // /* MW 1 */ + 12134 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12135 "00000000" // /* MW 1 */ + 12136 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12137 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 137 8 first +.no_stack_arguments + 12138 "00000100" // JL #12032 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12032 delay_slots=5 */ + 12139 "00000001" // /* MW 5 */ + 12140 "00000000" // /* MW 4 */ + 12141 "10000000" // /* MW 3 */ + 12142 "00010111" // /* MW 2 */ + 12143 "00000000" // /* MW 1 */ +.delay_slot + 12144 "10011000" // ST p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12145 "10011101" // /* MW 3 */ + 12146 "11111011" // /* MW 2 */ + 12147 "00001111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12149 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12150 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12151 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 136 22 first +.delay_slot + 12152 "10011000" // ST el0, [p0], #-12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12153 "00101001" // /* MW 3 */ + 12154 "11011100" // /* MW 2 */ + 12155 "00001000" // /* MW 1 */ +.src_ref 8 "mul_impl.h" 134 25 +.delay_slot + 12156 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12157 "11000000" // /* MW 3 */ + 12158 "01100000" // /* MW 2 */ + 12159 "00011111" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 +.return_address + 12160 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12161 "00111001" // /* MW 3 */ + 12162 "11111100" // /* MW 2 */ + 12163 "00000111" // /* MW 1 */ + 12164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12165 "00000000" // /* MW 1 */ + 12166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12167 "00000000" // /* MW 1 */ + 12168 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12169 "00000000" // /* MW 1 */ + 12170 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12171 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 12172 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12173 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.noswbrkpt + 12174 "00011000" // LDA p7, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12175 "10011001" // /* MW 3 */ + 12176 "11111011" // /* MW 2 */ + 12177 "00000111" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12178 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 12179 "00000000" // /* MW 3 */ + 12180 "00101000" // /* MW 2 */ + 12181 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12183 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12185 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12187 "00000000" // /* MW 1 */ +.src_ref 8 "mul_impl.h" 134 25 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12188 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12189 "00000001" // /* MW 3 */ + 12190 "00100000" // /* MW 2 */ + 12191 "00010000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 139 4 +.src_ref 8 "mul_impl.h" 134 25 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12192 "00111010" // ST r16, [p7, #16]; PADDXM [sp], #-64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12193 "01110001" // /* MW 9 */ + 12194 "00000000" // /* MW 8 */ + 12195 "00000000" // /* MW 7 */ + 12196 "00000000" // /* MW 6 */ + 12197 "11111110" // /* MW 5 */ + 12198 "00111111" // /* MW 4 */ + 12199 "00110000" // /* MW 3 */ + 12200 "11000010" // /* MW 2 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv___func_end0 + 12201 "11101000" // /* MW 1 */ +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_begin0 +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.function run _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E +.src_ref 3 "elementwise_binary.h" 149 first +.src_ref 3 "elementwise_binary.h" 156 37 +.src_ref 3 "elementwise_binary.h" 168 8 first +.function_start + 12208 "10111010" // MOVA m0, #32; MOVXM ls, #12384 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12209 "00010000" // /* MW 9 */ + 12210 "00110000" // /* MW 8 */ + 12211 "01111000" // /* MW 7 */ + 12212 "00001100" // /* MW 6 */ + 12213 "00000000" // /* MW 5 */ + 12214 "00000000" // /* MW 4 */ + 12215 "10000000" // /* MW 3 */ + 12216 "00000000" // /* MW 2 */ + 12217 "00000100" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 156 37 first +.src_ref 3 "elementwise_binary.h" 168 8 first + 12218 "10111010" // LDA r3, [p3], m0; MOVXM le, #12400 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12219 "00010000" // /* MW 9 */ + 12220 "00111000" // /* MW 8 */ + 12221 "10111000" // /* MW 7 */ + 12222 "00001101" // /* MW 6 */ + 12223 "00000000" // /* MW 5 */ + 12224 "00000000" // /* MW 4 */ + 12225 "11010000" // /* MW 3 */ + 12226 "00001110" // /* MW 2 */ + 12227 "01100001" // /* MW 1 */ +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 156 78 +.src_ref 3 "elementwise_binary.h" 156 78 + 12228 "10111010" // LDA m1, [p3]; MOVX r1, #-6; MOV r0, #828 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12229 "01011000" // /* MW 9 */ + 12230 "00111100" // /* MW 8 */ + 12231 "00001011" // /* MW 7 */ + 12232 "01001000" // /* MW 6 */ + 12233 "00010111" // /* MW 5 */ + 12234 "00111110" // /* MW 4 */ + 12235 "11010000" // /* MW 3 */ + 12236 "10010000" // /* MW 2 */ + 12237 "01100000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 156 78 +.src_ref 3 "elementwise_binary.h" 156 78 + 12238 "10111010" // LDA m0, [p3, #4]; MOVXM p4, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12239 "00010000" // /* MW 9 */ + 12240 "00110010" // /* MW 8 */ + 12241 "00110010" // /* MW 7 */ + 12242 "11110010" // /* MW 6 */ + 12243 "00000001" // /* MW 5 */ + 12244 "00000000" // /* MW 4 */ + 12245 "11010000" // /* MW 3 */ + 12246 "10000000" // /* MW 2 */ + 12247 "01100010" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 156 78 + 12248 "10011000" // LDA.s8 r2, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12249 "01000010" // /* MW 3 */ + 12250 "00000100" // /* MW 2 */ + 12251 "00000100" // /* MW 1 */ + 12252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12253 "00000000" // /* MW 1 */ + 12254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12255 "00000000" // /* MW 1 */ + 12256 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12257 "00000000" // /* MW 1 */ +.src_ref 3 "elementwise_binary.h" 156 78 + 12258 "10011000" // LSHL r1, r3, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12259 "00011101" // /* MW 3 */ + 12260 "11000010" // /* MW 2 */ + 12261 "00010000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary.h" 168 8 +.src_ref 3 "elementwise_binary.h" 187 20 first + 12262 "00110100" // VLDB x1, [p0], m1; ADD.NC lc, r1, #-7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12263 "11111001" // /* MW 5 */ + 12264 "11100001" // /* MW 4 */ + 12265 "10001010" // /* MW 3 */ + 12266 "00001110" // /* MW 2 */ + 12267 "00000101" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary.h" 189 20 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 12268 "00111100" // VLDA x2, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12269 "01101000" // /* MW 5 */ + 12270 "01010000" // /* MW 4 */ + 12271 "01110000" // /* MW 3 */ + 12272 "00010011" // /* MW 2 */ + 12273 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 195 20 +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 12274 "00010010" // VLDA x3, [p1], m0; VLDB x1, [p0], m1; MOVX crRnd, r2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12275 "10000000" // /* MW 7 */ + 12276 "10111010" // /* MW 6 */ + 12277 "11101000" // /* MW 5 */ + 12278 "01010000" // /* MW 4 */ + 12279 "01110000" // /* MW 3 */ + 12280 "00011011" // /* MW 2 */ + 12281 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary.h" 189 20 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12282 "00111100" // VLDA x2, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12283 "01101000" // /* MW 5 */ + 12284 "01010000" // /* MW 4 */ + 12285 "01110000" // /* MW 3 */ + 12286 "00010011" // /* MW 2 */ + 12287 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12288 "00111100" // VLDA x3, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12289 "11101000" // /* MW 5 */ + 12290 "01010000" // /* MW 4 */ + 12291 "01110000" // /* MW 3 */ + 12292 "00011011" // /* MW 2 */ + 12293 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12294 "10011000" // VLDA x2, [p1], m0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12295 "10011011" // /* MW 3 */ + 12296 "00001000" // /* MW 2 */ + 12297 "00000001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12298 "00111100" // VLDA x3, [p1], m0; VLDB x0, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12299 "01101000" // /* MW 5 */ + 12300 "01010000" // /* MW 4 */ + 12301 "01110000" // /* MW 3 */ + 12302 "00011011" // /* MW 2 */ + 12303 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12304 "00111100" // VLDA x2, [p1], m0; VLDB x1, [p0], m1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12305 "11101000" // /* MW 5 */ + 12306 "01010000" // /* MW 4 */ + 12307 "01110000" // /* MW 3 */ + 12308 "00010011" // /* MW 2 */ + 12309 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12310 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12311 "01000001" // /* MW 9 */ + 12312 "11100010" // /* MW 8 */ + 12313 "00000000" // /* MW 7 */ + 12314 "00011101" // /* MW 6 */ + 12315 "00110100" // /* MW 5 */ + 12316 "00101000" // /* MW 4 */ + 12317 "01110000" // /* MW 3 */ + 12318 "00011011" // /* MW 2 */ + 12319 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12320 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12321 "01100001" // /* MW 9 */ + 12322 "11100000" // /* MW 8 */ + 12323 "00000001" // /* MW 7 */ + 12324 "00011101" // /* MW 6 */ + 12325 "01110100" // /* MW 5 */ + 12326 "00101000" // /* MW 4 */ + 12327 "01110000" // /* MW 3 */ + 12328 "00010011" // /* MW 2 */ + 12329 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12330 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12331 "01000001" // /* MW 9 */ + 12332 "11100010" // /* MW 8 */ + 12333 "00000000" // /* MW 7 */ + 12334 "00011101" // /* MW 6 */ + 12335 "00110100" // /* MW 5 */ + 12336 "00101000" // /* MW 4 */ + 12337 "01110000" // /* MW 3 */ + 12338 "00011011" // /* MW 2 */ + 12339 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12340 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12341 "01100001" // /* MW 9 */ + 12342 "11100000" // /* MW 8 */ + 12343 "00000001" // /* MW 7 */ + 12344 "00011101" // /* MW 6 */ + 12345 "01110100" // /* MW 5 */ + 12346 "00101000" // /* MW 4 */ + 12347 "01110000" // /* MW 3 */ + 12348 "00010011" // /* MW 2 */ + 12349 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12350 "01001010" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VMUL.f dm0, x1, x2, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12351 "01000001" // /* MW 9 */ + 12352 "11100010" // /* MW 8 */ + 12353 "00000000" // /* MW 7 */ + 12354 "00011101" // /* MW 6 */ + 12355 "00110100" // /* MW 5 */ + 12356 "00101000" // /* MW 4 */ + 12357 "01110000" // /* MW 3 */ + 12358 "00011011" // /* MW 2 */ + 12359 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12360 "01001010" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VMUL.f dm1, x0, x3, r0 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12361 "01100001" // /* MW 9 */ + 12362 "11100000" // /* MW 8 */ + 12363 "00000001" // /* MW 7 */ + 12364 "00011101" // /* MW 6 */ + 12365 "01110100" // /* MW 5 */ + 12366 "00101000" // /* MW 4 */ + 12367 "01110000" // /* MW 3 */ + 12368 "00010011" // /* MW 2 */ + 12369 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12370 "01101110" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; VMUL.f dm0, x1, x2, r0 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 12371 "01000001" // /* MW 13 */ + 12372 "11100010" // /* MW 12 */ + 12373 "00000000" // /* MW 11 */ + 12374 "10001100" // /* MW 10 */ + 12375 "01110000" // /* MW 9 */ + 12376 "00001000" // /* MW 8 */ + 12377 "00000000" // /* MW 7 */ + 12378 "00000000" // /* MW 6 */ + 12379 "01101000" // /* MW 5 */ + 12380 "01010000" // /* MW 4 */ + 12381 "01110000" // /* MW 3 */ + 12382 "00011011" // /* MW 2 */ + 12383 "00100001" // /* MW 1 */ +.label ZLS_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_176 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 187 20 first +.src_ref 3 "elementwise_binary.h" 189 20 first +.src_ref 3 "elementwise_binary.h" 195 20 first +.begin_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 12384 "00001011" // VLDA x2, [p1], m0; VLDB x1, [p0], m1; VST.CONV.bf16.fp32 cml1, [p2], #64;NOPX; NOPM; VMUL.f dm1, x0, x3, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12385 "00000011" // /* MW 15 */ + 12386 "00001111" // /* MW 14 */ + 12387 "01111000" // /* MW 13 */ + 12388 "10100101" // /* MW 12 */ + 12389 "00000001" // /* MW 11 */ + 12390 "00000000" // /* MW 10 */ + 12391 "00000000" // /* MW 9 */ + 12392 "00000000" // /* MW 8 */ + 12393 "10100011" // /* MW 7 */ + 12394 "00011100" // /* MW 6 */ + 12395 "11101010" // /* MW 5 */ + 12396 "01010000" // /* MW 4 */ + 12397 "01110000" // /* MW 3 */ + 12398 "00010011" // /* MW 2 */ + 12399 "00100001" // /* MW 1 */ +.label ZLE_F_ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E_192 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 211 20 first +.src_ref 3 "elementwise_binary.h" 213 20 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12400 "00001011" // VLDA x3, [p1], m0; VLDB x0, [p0], m1; VST.CONV.bf16.fp32 cml0, [p2], #64;NOPX; NOPM; VMUL.f dm0, x1, x2, r0 /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12401 "00010010" // /* MW 15 */ + 12402 "00000111" // /* MW 14 */ + 12403 "01111000" // /* MW 13 */ + 12404 "10100101" // /* MW 12 */ + 12405 "00000001" // /* MW 11 */ + 12406 "00000000" // /* MW 10 */ + 12407 "00000000" // /* MW 9 */ + 12408 "00000000" // /* MW 8 */ + 12409 "00100011" // /* MW 7 */ + 12410 "00011100" // /* MW 6 */ + 12411 "01101010" // /* MW 5 */ + 12412 "01010000" // /* MW 4 */ + 12413 "01110000" // /* MW 3 */ + 12414 "00011011" // /* MW 2 */ + 12415 "00100001" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 12416 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12417 "01100001" // /* MW 7 */ + 12418 "11100000" // /* MW 6 */ + 12419 "00000001" // /* MW 5 */ + 12420 "00000010" // /* MW 4 */ + 12421 "01100000" // /* MW 3 */ + 12422 "10010100" // /* MW 2 */ + 12423 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12424 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12425 "01000001" // /* MW 7 */ + 12426 "11100010" // /* MW 6 */ + 12427 "00000000" // /* MW 5 */ + 12428 "00000010" // /* MW 4 */ + 12429 "01100000" // /* MW 3 */ + 12430 "10000100" // /* MW 2 */ + 12431 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12432 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12433 "01100001" // /* MW 7 */ + 12434 "11100000" // /* MW 6 */ + 12435 "00000001" // /* MW 5 */ + 12436 "00000010" // /* MW 4 */ + 12437 "01100000" // /* MW 3 */ + 12438 "10010100" // /* MW 2 */ + 12439 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12440 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12441 "01000001" // /* MW 7 */ + 12442 "11100010" // /* MW 6 */ + 12443 "00000000" // /* MW 5 */ + 12444 "00000010" // /* MW 4 */ + 12445 "01100000" // /* MW 3 */ + 12446 "10000100" // /* MW 2 */ + 12447 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12448 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12449 "01100001" // /* MW 7 */ + 12450 "11100000" // /* MW 6 */ + 12451 "00000001" // /* MW 5 */ + 12452 "00000010" // /* MW 4 */ + 12453 "01100000" // /* MW 3 */ + 12454 "10010100" // /* MW 2 */ + 12455 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 5 "mul_acc32_fp.hpp" 36 105 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12456 "01100010" // VST.CONV.bf16.fp32 cml0, [p2], #64; VMUL.f dm0, x1, x2, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12457 "01000001" // /* MW 7 */ + 12458 "11100010" // /* MW 6 */ + 12459 "00000000" // /* MW 5 */ + 12460 "00000010" // /* MW 4 */ + 12461 "01100000" // /* MW 3 */ + 12462 "10000100" // /* MW 2 */ + 12463 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 5 "mul_acc32_fp.hpp" 36 105 +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12464 "01100010" // VST.CONV.bf16.fp32 cml1, [p2], #64; VMUL.f dm1, x0, x3, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12465 "01100001" // /* MW 7 */ + 12466 "11100000" // /* MW 6 */ + 12467 "00000001" // /* MW 5 */ + 12468 "00000010" // /* MW 4 */ + 12469 "01100000" // /* MW 3 */ + 12470 "10010100" // /* MW 2 */ + 12471 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12472 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12473 "00100011" // /* MW 3 */ + 12474 "00011100" // /* MW 2 */ + 12475 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 172 4 first +.src_ref 3 "elementwise_binary.h" 195 20 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12476 "01011100" // VST.CONV.bf16.fp32 cml1, [p2], #64;RET lr /* MW 6 */ /* control_operation: words=6 rts unconditional cycles_taken=1 delay_slots=5 */ + 12477 "00000000" // /* MW 5 */ + 12478 "01010000" // /* MW 4 */ + 12479 "01100000" // /* MW 3 */ + 12480 "10010100" // /* MW 2 */ + 12481 "01000011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12482 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12483 "00100011" // /* MW 3 */ + 12484 "00011100" // /* MW 2 */ + 12485 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 195 20 first +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12486 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12487 "10100011" // /* MW 3 */ + 12488 "00011100" // /* MW 2 */ + 12489 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 5 "accum.hpp" 1110 102 first +.src_ref 3 "elementwise_binary.h" 218 20 first +.delay_slot + 12490 "00011000" // VST.CONV.bf16.fp32 cml0, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12491 "00100011" // /* MW 3 */ + 12492 "00011100" // /* MW 2 */ + 12493 "00001010" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 5 "accum.hpp" 1110 102 +.src_ref 3 "elementwise_binary.h" 195 20 first +.delay_slot + 12494 "00011000" // VST.CONV.bf16.fp32 cml1, [p2], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12495 "10100011" // /* MW 3 */ + 12496 "00011100" // /* MW 2 */ + 12497 "00001010" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 12498 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E__end +.label __ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E___func_end0 + 12499 "00000000" // /* MW 1 */ +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_begin0 +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.function superkernel_mul1d _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE +.src_ref 7 "superkernels.cpp" 369 first +.src_ref 7 "superkernels.cpp" 374 6 +.function_start + 12512 "01000100" // MOVXM p4, #508992 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12513 "10000000" // /* MW 5 */ + 12514 "11001000" // /* MW 4 */ + 12515 "11001000" // /* MW 3 */ + 12516 "00000111" // /* MW 2 */ + 12517 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 374 6 first + 12518 "11010100" // LDA r16, [p4]; MOV r17, CORE_ID /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12519 "11000001" // /* MW 5 */ + 12520 "10110101" // /* MW 4 */ + 12521 "11011000" // /* MW 3 */ + 12522 "11000010" // /* MW 2 */ + 12523 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 369 + 12524 "11000100" // PADDXM [sp], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12525 "00000001" // /* MW 5 */ + 12526 "00000000" // /* MW 4 */ + 12527 "00000000" // /* MW 3 */ + 12528 "00001000" // /* MW 2 */ + 12529 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 371 22 first +.src_ref 1 "io_buffer_main.h" 218 49 + 12530 "00111010" // ST r14, [sp, #-8]; EXTEND.u8 r17, r17; MOV r14, p2 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12531 "01111001" // /* MW 9 */ + 12532 "01100000" // /* MW 8 */ + 12533 "11001010" // /* MW 7 */ + 12534 "10000001" // /* MW 6 */ + 12535 "00010100" // /* MW 5 */ + 12536 "00100011" // /* MW 4 */ + 12537 "10110000" // /* MW 3 */ + 12538 "00111010" // /* MW 2 */ + 12539 "11111111" // /* MW 1 */ + 12540 "00000010" // ST p0, [sp, #-20]; MOV r0, r15 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12541 "01110000" // /* MW 7 */ + 12542 "11010000" // /* MW 6 */ + 12543 "00001011" // /* MW 5 */ + 12544 "00000000" // /* MW 4 */ + 12545 "10110000" // /* MW 3 */ + 12546 "10000011" // /* MW 2 */ + 12547 "11111101" // /* MW 1 */ + 12548 "10011000" // ST r0, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12549 "00010101" // /* MW 3 */ + 12550 "11111100" // /* MW 2 */ + 12551 "00001111" // /* MW 1 */ + 12552 "10011000" // ST lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12553 "00111101" // /* MW 3 */ + 12554 "11110000" // /* MW 2 */ + 12555 "00001111" // /* MW 1 */ + 12556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12557 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 374 6 first +.src_ref 7 "superkernels.cpp" 374 16 first + 12558 "10000100" // JNZ r16, #12704 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=12704 delay_slots=5 */ + 12559 "00000001" // /* MW 5 */ + 12560 "01000000" // /* MW 4 */ + 12561 "11010000" // /* MW 3 */ + 12562 "00011000" // /* MW 2 */ + 12563 "10000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 371 30 first +.delay_slot + 12564 "00011000" // ADD r17, r17, #-2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12565 "11111011" // /* MW 3 */ + 12566 "01100011" // /* MW 2 */ + 12567 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 371 11 +.delay_slot + 12568 "01000100" // MOVXM p2, #509008 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12569 "10100000" // /* MW 5 */ + 12570 "11001000" // /* MW 4 */ + 12571 "11000100" // /* MW 3 */ + 12572 "00000111" // /* MW 2 */ + 12573 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 371 11 +.delay_slot + 12574 "00000010" // ST r17, [p2]; MOV p2, p7 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 12575 "01110000" // /* MW 7 */ + 12576 "01100000" // /* MW 6 */ + 12577 "00110111" // /* MW 5 */ + 12578 "00000001" // /* MW 4 */ + 12579 "00110000" // /* MW 3 */ + 12580 "11000110" // /* MW 2 */ + 12581 "01000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 +.delay_slot + 12582 "11111000" // MOV r15, p3 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12583 "11000000" // /* MW 3 */ + 12584 "11010110" // /* MW 2 */ + 12585 "00011011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 377 4 +.src_ref 7 "superkernels.cpp" 379 28 +.src_ref 7 "superkernels.cpp" 381 42 +.src_ref 7 "superkernels.cpp" 393 2 +.delay_slot + 12586 "00111010" // ST p2, [sp, #-12]; MOVXM p7, #509376 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12587 "00010001" // /* MW 9 */ + 12588 "11100000" // /* MW 8 */ + 12589 "10110010" // /* MW 7 */ + 12590 "11110011" // /* MW 6 */ + 12591 "00000001" // /* MW 5 */ + 12592 "00000000" // /* MW 4 */ + 12593 "10110000" // /* MW 3 */ + 12594 "10100011" // /* MW 2 */ + 12595 "11111110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 377 4 +.src_ref 4 "tile.hpp" 86 8 +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 12596 "00111010" // MOVS p0, p7; MOVXM p2, #509028 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12597 "00010001" // /* MW 9 */ + 12598 "00110010" // /* MW 8 */ + 12599 "00110010" // /* MW 7 */ + 12600 "11110001" // /* MW 6 */ + 12601 "00000001" // /* MW 5 */ + 12602 "00000000" // /* MW 4 */ + 12603 "01100000" // /* MW 3 */ + 12604 "10010001" // /* MW 2 */ + 12605 "00010011" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.src_ref 4 "tile.hpp" 86 8 first +.aggressive_scheduled_block_id 1 +.noswbrkpt + 12606 "10111010" // ST.s8 r16, [p2]; MOVXM p2, #509024 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12607 "00010000" // /* MW 9 */ + 12608 "00110000" // /* MW 8 */ + 12609 "00110010" // /* MW 7 */ + 12610 "11110001" // /* MW 6 */ + 12611 "00000001" // /* MW 5 */ + 12612 "00000000" // /* MW 4 */ + 12613 "11100000" // /* MW 3 */ + 12614 "11000000" // /* MW 2 */ + 12615 "01000000" // /* MW 1 */ +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12616 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12617 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 377 4 first +.aggressive_scheduled_block_id 1 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 12618 "00000100" // JL #12064 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12064 delay_slots=5 */ + 12619 "00000001" // /* MW 5 */ + 12620 "00000000" // /* MW 4 */ + 12621 "10010000" // /* MW 3 */ + 12622 "00010111" // /* MW 2 */ + 12623 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12624 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12625 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 12626 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12627 "00000000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 86 8 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12628 "00011000" // MOVX r16, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12629 "00110001" // /* MW 3 */ + 12630 "00100000" // /* MW 2 */ + 12631 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 +.delay_slot + 12632 "00011000" // MOVX r16, #1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12633 "00000101" // /* MW 3 */ + 12634 "00100000" // /* MW 2 */ + 12635 "00010000" // /* MW 1 */ +.src_ref 4 "tile.hpp" 74 8 first +.delay_slot + 12636 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12637 "00010001" // /* MW 3 */ + 12638 "00000110" // /* MW 2 */ + 12639 "00001010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 381 18 +.src_ref 7 "superkernels.cpp" 381 42 first +.return_address + 12640 "10111010" // LDA r16, [p7]; MOVXM p1, #509008 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12641 "00010000" // /* MW 9 */ + 12642 "00101000" // /* MW 8 */ + 12643 "10110010" // /* MW 7 */ + 12644 "11110000" // /* MW 6 */ + 12645 "00000001" // /* MW 5 */ + 12646 "00000000" // /* MW 4 */ + 12647 "11010000" // /* MW 3 */ + 12648 "11000010" // /* MW 2 */ + 12649 "11100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 381 16 +.src_ref 7 "superkernels.cpp" 381 18 +.src_ref 7 "superkernels.cpp" 390 48 + 12650 "10111010" // LDA r17, [p1]; MOVXM p3, #509012 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12651 "00010000" // /* MW 9 */ + 12652 "00101010" // /* MW 8 */ + 12653 "10110010" // /* MW 7 */ + 12654 "11110001" // /* MW 6 */ + 12655 "00000001" // /* MW 5 */ + 12656 "00000000" // /* MW 4 */ + 12657 "11010000" // /* MW 3 */ + 12658 "11000110" // /* MW 2 */ + 12659 "00100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 379 28 first +.src_ref 7 "superkernels.cpp" 382 16 +.src_ref 7 "superkernels.cpp" 391 48 + 12660 "10111010" // LDA.u16 r18, [p7, #10]; MOVXM p1, #509016 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12661 "00010000" // /* MW 9 */ + 12662 "00101100" // /* MW 8 */ + 12663 "10110010" // /* MW 7 */ + 12664 "11110000" // /* MW 6 */ + 12665 "00000001" // /* MW 5 */ + 12666 "00000000" // /* MW 4 */ + 12667 "01010000" // /* MW 3 */ + 12668 "11001011" // /* MW 2 */ + 12669 "11101010" // /* MW 1 */ + 12670 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12671 "00000000" // /* MW 1 */ + 12672 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12673 "00000000" // /* MW 1 */ + 12674 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12675 "00000000" // /* MW 1 */ + 12676 "10000100" // J #12720 /* MW 6 */ /* control_operation: words=6 jump unconditional cycles_taken=1 direct absolute target_address=12720 delay_slots=5 */ + 12677 "00000000" // /* MW 5 */ + 12678 "00000000" // /* MW 4 */ + 12679 "11011000" // /* MW 3 */ + 12680 "00011000" // /* MW 2 */ + 12681 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 379 13 +.delay_slot + 12682 "01000100" // MOVXM p2, #509020 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12683 "10111000" // /* MW 5 */ + 12684 "11001000" // /* MW 4 */ + 12685 "11000100" // /* MW 3 */ + 12686 "00000111" // /* MW 2 */ + 12687 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 381 27 first +.delay_slot + 12688 "10011000" // MUL r16, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12689 "00001111" // /* MW 3 */ + 12690 "01100001" // /* MW 2 */ + 12691 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 379 13 first +.delay_slot + 12692 "10011000" // ST r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12693 "01010001" // /* MW 3 */ + 12694 "00000110" // /* MW 2 */ + 12695 "00001010" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 381 16 first +.delay_slot + 12696 "10011000" // ST r16, [p3] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12697 "00010001" // /* MW 3 */ + 12698 "00000110" // /* MW 2 */ + 12699 "00001011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 382 16 first +.delay_slot + 12700 "10011000" // ST r16, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12701 "00010001" // /* MW 3 */ + 12702 "00000110" // /* MW 2 */ + 12703 "00001001" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_192 +.src_ref 7 "superkernels.cpp" 390 48 + 12704 "01000100" // MOVXM p3, #509012 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12705 "10101000" // /* MW 5 */ + 12706 "11001000" // /* MW 4 */ + 12707 "11000110" // /* MW 3 */ + 12708 "00000111" // /* MW 2 */ + 12709 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 391 48 + 12710 "10111010" // NOPA; MOVXM p1, #509016 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12711 "00010000" // /* MW 9 */ + 12712 "00101100" // /* MW 8 */ + 12713 "10110010" // /* MW 7 */ + 12714 "11110000" // /* MW 6 */ + 12715 "00000001" // /* MW 5 */ + 12716 "00000000" // /* MW 4 */ + 12717 "11110000" // /* MW 3 */ + 12718 "00101100" // /* MW 2 */ + 12719 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_208 +.src_ref 1 "io_buffer_main.h" 218 49 first + 12720 "00011000" // ADD.NC p0, r15, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12721 "10000110" // /* MW 3 */ + 12722 "01100111" // /* MW 2 */ + 12723 "00011000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 385 2 +.src_ref 1 "io_buffer_main.h" 218 49 + 12724 "10111010" // LDA r27, [p0], #-4; MOVXM p2, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12725 "00010000" // /* MW 9 */ + 12726 "00100000" // /* MW 8 */ + 12727 "00110010" // /* MW 7 */ + 12728 "11110001" // /* MW 6 */ + 12729 "00000001" // /* MW 5 */ + 12730 "00000000" // /* MW 4 */ + 12731 "11010000" // /* MW 3 */ + 12732 "11101110" // /* MW 2 */ + 12733 "00011111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 12734 "10011000" // LDA r16, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12735 "00010110" // /* MW 3 */ + 12736 "11111110" // /* MW 2 */ + 12737 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 12738 "10011000" // LDA r17, [p0], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12739 "00110110" // /* MW 3 */ + 12740 "11111110" // /* MW 2 */ + 12741 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 385 2 first + 12742 "10011000" // LDA r18, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12743 "01010110" // /* MW 3 */ + 12744 "00000110" // /* MW 2 */ + 12745 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 28 first + 12746 "10011000" // LDA r19, [p0, #16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12747 "01110110" // /* MW 3 */ + 12748 "01000110" // /* MW 2 */ + 12749 "00000000" // /* MW 1 */ + 12750 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12751 "00000000" // /* MW 1 */ + 12752 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12753 "00000000" // /* MW 1 */ + 12754 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12755 "00000000" // /* MW 1 */ + 12756 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12757 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 12758 "00011000" // SEL.EQZ r16, r17, r16, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12759 "00000010" // /* MW 3 */ + 12760 "01100001" // /* MW 2 */ + 12761 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 385 2 first +.src_ref 1 "io_buffer_main.h" 218 20 + 12762 "01011100" // ST r16, [p0]; ADD r16, r18, #1 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12763 "00001110" // /* MW 5 */ + 12764 "01000000" // /* MW 4 */ + 12765 "00111001" // /* MW 3 */ + 12766 "11000010" // /* MW 2 */ + 12767 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 385 2 + 12768 "10011000" // ST r16, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12769 "00010001" // /* MW 3 */ + 12770 "00000110" // /* MW 2 */ + 12771 "00001010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 +.src_ref 1 "io_buffer_main.h" 395 8 + 12772 "00011000" // MOVX r16, #-1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12773 "11111101" // /* MW 3 */ + 12774 "11100000" // /* MW 2 */ + 12775 "00010111" // /* MW 1 */ + 12776 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12777 "00000000" // /* MW 1 */ + 12778 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12779 "00000000" // /* MW 1 */ + 12780 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12781 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 12782 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12783 "00001000" // /* MW 3 */ + 12784 "11010011" // /* MW 2 */ + 12785 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 first + 12786 "00011000" // ADD.NC p2, r14, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12787 "00000110" // /* MW 3 */ + 12788 "01100111" // /* MW 2 */ + 12789 "00011010" // /* MW 1 */ + 12790 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12791 "00000000" // /* MW 1 */ + 12792 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12793 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 49 + 12794 "10011000" // LDA r27, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12795 "01110110" // /* MW 3 */ + 12796 "11111111" // /* MW 2 */ + 12797 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 64 + 12798 "10011000" // LDA r17, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12799 "00110110" // /* MW 3 */ + 12800 "11111110" // /* MW 2 */ + 12801 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 80 + 12802 "10011000" // LDA r18, [p2], #-4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12803 "01010110" // /* MW 3 */ + 12804 "11111110" // /* MW 2 */ + 12805 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 235 47 first + 12806 "10011000" // LDA r19, [p2, #20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12807 "01110110" // /* MW 3 */ + 12808 "01010110" // /* MW 2 */ + 12809 "00000010" // /* MW 1 */ + 12810 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12811 "00000000" // /* MW 1 */ + 12812 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12813 "00000000" // /* MW 1 */ + 12814 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12815 "00000000" // /* MW 1 */ + 12816 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12817 "00000000" // /* MW 1 */ + 12818 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12819 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 36 first +.src_ref 1 "io_buffer_main.h" 218 43 first + 12820 "00011000" // SEL.EQZ r17, r18, r17, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12821 "00010010" // /* MW 3 */ + 12822 "10100011" // /* MW 2 */ + 12823 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 218 20 + 12824 "10011000" // ST r17, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12825 "00110001" // /* MW 3 */ + 12826 "00000110" // /* MW 2 */ + 12827 "00001010" // /* MW 1 */ + 12828 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12829 "00000000" // /* MW 1 */ + 12830 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12831 "00000000" // /* MW 1 */ + 12832 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12833 "00000000" // /* MW 1 */ + 12834 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12835 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 395 8 first + 12836 "00011000" // ACQ r19, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12837 "00001000" // /* MW 3 */ + 12838 "11010011" // /* MW 2 */ + 12839 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 390 46 +.src_ref 7 "superkernels.cpp" 391 46 +.src_ref 1 "io_buffer_main.h" 324 32 + 12840 "00111010" // MOVS p6, p2; MOVX r16, #1; MOV r14, p6 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12841 "01111001" // /* MW 9 */ + 12842 "01100000" // /* MW 8 */ + 12843 "11001110" // /* MW 7 */ + 12844 "00101001" // /* MW 6 */ + 12845 "00000000" // /* MW 5 */ + 12846 "00000001" // /* MW 4 */ + 12847 "01100000" // /* MW 3 */ + 12848 "00010001" // /* MW 2 */ + 12849 "11010001" // /* MW 1 */ + 12850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12851 "00000000" // /* MW 1 */ + 12852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12853 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 + 12854 "00011000" // LDA p4, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12855 "00011001" // /* MW 3 */ + 12856 "11101110" // /* MW 2 */ + 12857 "00000111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 390 48 first + 12858 "00001100" // LDA r17, [p3]; ST p0, [sp, #-20] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12859 "00111011" // /* MW 5 */ + 12860 "11011000" // /* MW 4 */ + 12861 "11011111" // /* MW 3 */ + 12862 "11000110" // /* MW 2 */ + 12863 "01100000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 391 48 first +.src_ref 7 "superkernels.cpp" 393 2 + 12864 "11010100" // LDA r20, [p1]; MOV p3, p7 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12865 "10000001" // /* MW 5 */ + 12866 "11011101" // /* MW 4 */ + 12867 "11010110" // /* MW 3 */ + 12868 "11010010" // /* MW 2 */ + 12869 "00100000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id first + 12870 "10011000" // LDA r18, [p2], #16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12871 "01010110" // /* MW 3 */ + 12872 "01001110" // /* MW 2 */ + 12873 "00000010" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.aggressive_scheduled_block_id 2 +.noswbrkpt + 12874 "10011000" // LDA p2, [p0], #20 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12875 "00011110" // /* MW 3 */ + 12876 "01011101" // /* MW 2 */ + 12877 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 +.src_ref 1 "io_buffer_main.h" 327 40 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12878 "11111000" // MOV p7, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12879 "11000000" // /* MW 3 */ + 12880 "01100000" // /* MW 2 */ + 12881 "00011111" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12883 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12884 "10011000" // LDA r19, [p4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12885 "01110110" // /* MW 3 */ + 12886 "00000110" // /* MW 2 */ + 12887 "00000100" // /* MW 1 */ +.aggressive_scheduled_block_id 2 +.nohwbrkpt +.noswbrkpt + 12888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12889 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 393 2 first +.aggressive_scheduled_block_id 2 +.no_stack_arguments +.nohwbrkpt +.noswbrkpt + 12890 "00000100" // JL #12208 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=12208 delay_slots=5 */ + 12891 "00000001" // /* MW 5 */ + 12892 "00000000" // /* MW 4 */ + 12893 "11011000" // /* MW 3 */ + 12894 "00010111" // /* MW 2 */ + 12895 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 +.src_ref 1 "io_buffer_main.h" 327 40 +.delay_slot +.aggressive_scheduled_block_id 2 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 12896 "11111000" // MOV r15, p2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12897 "11000000" // /* MW 3 */ + 12898 "11010100" // /* MW 2 */ + 12899 "00011011" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 390 46 first +.delay_slot + 12900 "10011000" // LSHL r17, r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12901 "00001101" // /* MW 3 */ + 12902 "01100011" // /* MW 2 */ + 12903 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 391 46 first +.delay_slot + 12904 "10011000" // LSHL r16, r20, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12905 "00001101" // /* MW 3 */ + 12906 "00100001" // /* MW 2 */ + 12907 "00010101" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 391 46 +.delay_slot + 12908 "01011000" // ADD.NC p1, r18, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12909 "01000001" // /* MW 3 */ + 12910 "01101001" // /* MW 2 */ + 12911 "00011001" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 390 46 first +.delay_slot + 12912 "11100001" // NOPA; NOPB; NOPS; NOPX; ADD.NC p0, r19, r17; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 12913 "00000000" // /* MW 15 */ + 12914 "00000000" // /* MW 14 */ + 12915 "10101000" // /* MW 13 */ + 12916 "11100010" // /* MW 12 */ + 12917 "00110100" // /* MW 11 */ + 12918 "00000000" // /* MW 10 */ + 12919 "00000000" // /* MW 9 */ + 12920 "00000000" // /* MW 8 */ + 12921 "01011011" // /* MW 7 */ + 12922 "00000001" // /* MW 6 */ + 12923 "00100000" // /* MW 5 */ + 12924 "00000000" // /* MW 4 */ + 12925 "11110000" // /* MW 3 */ + 12926 "00101100" // /* MW 2 */ + 12927 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 32 first +.src_ref 1 "io_buffer_main.h" 327 28 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 32 +.src_ref 1 "io_buffer_main.h" 327 40 +.src_ref 1 "io_buffer_main.h" 425 8 +.src_ref 1 "io_buffer_main.h" 425 8 +.return_address + 12928 "10111010" // LDA r17, [p6, #16]; MOVX r16, #1; MOV p1, r15 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 12929 "01111000" // /* MW 9 */ + 12930 "11010000" // /* MW 8 */ + 12931 "10110011" // /* MW 7 */ + 12932 "00101000" // /* MW 6 */ + 12933 "00000000" // /* MW 5 */ + 12934 "00000001" // /* MW 4 */ + 12935 "11010000" // /* MW 3 */ + 12936 "11000110" // /* MW 2 */ + 12937 "11001000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 19 + 12938 "01000100" // MOVXM p6, #509020 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12939 "10111000" // /* MW 5 */ + 12940 "11001000" // /* MW 4 */ + 12941 "11001100" // /* MW 3 */ + 12942 "00000111" // /* MW 2 */ + 12943 "00000000" // /* MW 1 */ + 12944 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12945 "00000000" // /* MW 1 */ + 12946 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12947 "00000000" // /* MW 1 */ + 12948 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12949 "00000000" // /* MW 1 */ + 12950 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12951 "00000000" // /* MW 1 */ + 12952 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12953 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 12954 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12955 "00001000" // /* MW 3 */ + 12956 "01010001" // /* MW 2 */ + 12957 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 40 first + 12958 "10011000" // LDA r17, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12959 "00110110" // /* MW 3 */ + 12960 "11110110" // /* MW 2 */ + 12961 "00000001" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 + 12962 "00011000" // LDA p2, [sp, #-20] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12963 "00011001" // /* MW 3 */ + 12964 "11101101" // /* MW 2 */ + 12965 "00000111" // /* MW 1 */ + 12966 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12967 "00000000" // /* MW 1 */ + 12968 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12969 "00000000" // /* MW 1 */ + 12970 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12971 "00000000" // /* MW 1 */ + 12972 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12973 "00000000" // /* MW 1 */ + 12974 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12975 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 + 12976 "10011000" // SUB r17, r16, r17 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12977 "00010001" // /* MW 3 */ + 12978 "00100011" // /* MW 2 */ + 12979 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 324 51 first +.src_ref 1 "io_buffer_main.h" 327 28 + 12980 "00001100" // LDA r17, [p2, #20]; ST r17, [p1, #-4] /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 12981 "01100011" // /* MW 5 */ + 12982 "11101100" // /* MW 4 */ + 12983 "11010011" // /* MW 3 */ + 12984 "11000110" // /* MW 2 */ + 12985 "01001010" // /* MW 1 */ + 12986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12987 "00000000" // /* MW 1 */ + 12988 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12989 "00000000" // /* MW 1 */ + 12990 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12991 "00000000" // /* MW 1 */ + 12992 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12993 "00000000" // /* MW 1 */ + 12994 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12995 "00000000" // /* MW 1 */ + 12996 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 12997 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 425 8 first + 12998 "00011000" // REL r17, r16 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 12999 "00001000" // /* MW 3 */ + 13000 "01010001" // /* MW 2 */ + 13001 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 6 +.src_ref 7 "superkernels.cpp" 398 14 +.src_ref 1 "io_buffer_main.h" 327 40 first + 13002 "10111010" // LDA r19, [p7, #-8]; MOVXM p1, #508992 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13003 "00010000" // /* MW 9 */ + 13004 "00100000" // /* MW 8 */ + 13005 "10110010" // /* MW 7 */ + 13006 "11110000" // /* MW 6 */ + 13007 "00000001" // /* MW 5 */ + 13008 "00000000" // /* MW 4 */ + 13009 "11010000" // /* MW 3 */ + 13010 "11001110" // /* MW 2 */ + 13011 "11111100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 19 first + 13012 "10011000" // LDA r18, [p6] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13013 "01010110" // /* MW 3 */ + 13014 "00000110" // /* MW 2 */ + 13015 "00000110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 6 + 13016 "10011000" // LDA r17, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13017 "00110110" // /* MW 3 */ + 13018 "00000110" // /* MW 2 */ + 13019 "00000001" // /* MW 1 */ + 13020 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13021 "00000000" // /* MW 1 */ + 13022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13023 "00000000" // /* MW 1 */ + 13024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13025 "00000000" // /* MW 1 */ + 13026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13027 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 32 first + 13028 "10011000" // SUB r16, r16, r19 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13029 "00110001" // /* MW 3 */ + 13030 "00100001" // /* MW 2 */ + 13031 "00010100" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 327 28 + 13032 "10011000" // ST r16, [p7, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13033 "00010001" // /* MW 3 */ + 13034 "11100110" // /* MW 2 */ + 13035 "00001111" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 16 first + 13036 "10011000" // NE r16, r17, r18 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13037 "00101000" // /* MW 3 */ + 13038 "01100001" // /* MW 2 */ + 13039 "00010100" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 397 6 + 13040 "10000100" // JNZ r16, #13072 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=13072 delay_slots=5 */ + 13041 "00000001" // /* MW 5 */ + 13042 "01000000" // /* MW 4 */ + 13043 "10001000" // /* MW 3 */ + 13044 "00011001" // /* MW 2 */ + 13045 "10000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13046 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13047 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13048 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13049 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13050 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13051 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13052 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13053 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13055 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 398 14 + 13056 "00011000" // MOVX r16, #0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13057 "00000001" // /* MW 3 */ + 13058 "00100000" // /* MW 2 */ + 13059 "00010000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 398 14 first + 13060 "00110110" // NOPA; NOPB; ST r16, [p1]; NOPX /* MW 12 */ /* control_operation: words=12 cycles_taken=1 */ + 13061 "11000001" // /* MW 11 */ + 13062 "00001000" // /* MW 10 */ + 13063 "10000011" // /* MW 9 */ + 13064 "00000000" // /* MW 8 */ + 13065 "00000000" // /* MW 7 */ + 13066 "00000000" // /* MW 6 */ + 13067 "00100000" // /* MW 5 */ + 13068 "00000000" // /* MW 4 */ + 13069 "11110000" // /* MW 3 */ + 13070 "00101100" // /* MW 2 */ + 13071 "00000000" // /* MW 1 */ +.label TGT_F_Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE_560 +.src_ref 7 "superkernels.cpp" 400 + 13072 "00011000" // LDA lr, [sp, #-16] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13073 "00111001" // /* MW 3 */ + 13074 "11110000" // /* MW 2 */ + 13075 "00000111" // /* MW 1 */ + 13076 "00011000" // LDA r15, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13077 "11110001" // /* MW 3 */ + 13078 "11111101" // /* MW 2 */ + 13079 "00000111" // /* MW 1 */ + 13080 "00011000" // LDA p7, [sp, #-12] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13081 "10011001" // /* MW 3 */ + 13082 "11110111" // /* MW 2 */ + 13083 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id first + 13084 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13085 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.noswbrkpt + 13086 "00011000" // LDA r14, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13087 "11010001" // /* MW 3 */ + 13088 "11111001" // /* MW 2 */ + 13089 "00000111" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13090 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13091 "00000000" // /* MW 1 */ +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13092 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13093 "00000000" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 400 first +.aggressive_scheduled_block_id 3 +.nohwbrkpt +.noswbrkpt + 13094 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 13095 "00000000" // /* MW 3 */ + 13096 "00101000" // /* MW 2 */ + 13097 "00010000" // /* MW 1 */ +.delay_slot +.aggressive_scheduled_block_id 3 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13098 "00011000" // MOVS p6, r14 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13099 "00001011" // /* MW 3 */ + 13100 "10001110" // /* MW 2 */ + 13101 "00001110" // /* MW 1 */ +.src_ref 7 "superkernels.cpp" 400 +.delay_slot + 13102 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13103 "00000001" // /* MW 5 */ + 13104 "00000000" // /* MW 4 */ + 13105 "00000000" // /* MW 3 */ + 13106 "11111000" // /* MW 2 */ + 13107 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13108 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13109 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13110 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13111 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13112 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE__end +.label __Z17superkernel_mul1dRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEERA16_KjRNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERNS0_IS1_NS2_3outESK_EE___func_end0 + 13113 "00000000" // /* MW 1 */ +.label __Z13_b881_wrapperPPv___func_begin0 +.label _Z13_b881_wrapperPPv +.function _b881_wrapper _Z13_b881_wrapperPPv +.src_ref 0 "0_0_reloadable0.cc" 21 first +.src_ref 0 "0_0_reloadable0.cc" 23 79 +.function_start + 13120 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13121 "11000000" // /* MW 3 */ + 13122 "01100000" // /* MW 2 */ + 13123 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 23 79 first + 13124 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13125 "00011110" // /* MW 3 */ + 13126 "00101100" // /* MW 2 */ + 13127 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 25 81 first + 13128 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13129 "00011110" // /* MW 3 */ + 13130 "11110101" // /* MW 2 */ + 13131 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 24 47 first + 13132 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13133 "10011110" // /* MW 3 */ + 13134 "00000100" // /* MW 2 */ + 13135 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 22 4 first +.tail_call + 13136 "10000100" // J #10672 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=10672 delay_slots=5 */ + 13137 "00000000" // /* MW 5 */ + 13138 "00000000" // /* MW 4 */ + 13139 "11011000" // /* MW 3 */ + 13140 "00010100" // /* MW 2 */ + 13141 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13142 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13143 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13144 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13145 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13146 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13147 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13148 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13149 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13150 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b881_wrapperPPv__end +.label __Z13_b881_wrapperPPv___func_end0 + 13151 "00000000" // /* MW 1 */ +.label __Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA5_Kj___func_begin0 +.label _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA5_Kj +.function setup_transposeshuffle_params _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA5_Kj +.src_ref 8 "transposeshuffle_params.h" 93 first +.src_ref 8 "transposeshuffle_params.h" 102 18 first +.function_start + 13152 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13153 "00101110" // /* MW 3 */ + 13154 "00011100" // /* MW 2 */ + 13155 "00000001" // /* MW 1 */ + 13156 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13157 "00000000" // /* MW 1 */ + 13158 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13159 "00000000" // /* MW 1 */ + 13160 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13161 "00000000" // /* MW 1 */ + 13162 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13163 "00000000" // /* MW 1 */ + 13164 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13165 "00000000" // /* MW 1 */ + 13166 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13167 "00000000" // /* MW 1 */ +.src_ref 8 "transposeshuffle_params.h" 102 16 first + 13168 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13169 "00101001" // /* MW 3 */ + 13170 "00011100" // /* MW 2 */ + 13171 "00001000" // /* MW 1 */ +.src_ref 8 "transposeshuffle_params.h" 102 18 + 13172 "10011000" // LDA el0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13173 "00101110" // /* MW 3 */ + 13174 "00011100" // /* MW 2 */ + 13175 "00000001" // /* MW 1 */ +.src_ref 8 "transposeshuffle_params.h" 102 18 + 13176 "10011000" // LDA eh0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13177 "00001110" // /* MW 3 */ + 13178 "00011100" // /* MW 2 */ + 13179 "00000001" // /* MW 1 */ + 13180 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13181 "00000000" // /* MW 1 */ + 13182 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13183 "00000000" // /* MW 1 */ + 13184 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13185 "00000000" // /* MW 1 */ + 13186 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13187 "00000000" // /* MW 1 */ + 13188 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13189 "00000000" // /* MW 1 */ +.src_ref 8 "transposeshuffle_params.h" 102 16 + 13190 "10011000" // ST el0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13191 "00101001" // /* MW 3 */ + 13192 "00011100" // /* MW 2 */ + 13193 "00001000" // /* MW 1 */ +.src_ref 8 "transposeshuffle_params.h" 102 16 + 13194 "10011000" // ST eh0, [p0], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13195 "00001001" // /* MW 3 */ + 13196 "00011100" // /* MW 2 */ + 13197 "00001000" // /* MW 1 */ +.src_ref 8 "transposeshuffle_params.h" 102 18 + 13198 "10011000" // LDA el0, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13199 "00101110" // /* MW 3 */ + 13200 "00000100" // /* MW 2 */ + 13201 "00000001" // /* MW 1 */ +.src_ref 8 "transposeshuffle_params.h" 102 18 + 13202 "10011000" // LDA eh0, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13203 "00001110" // /* MW 3 */ + 13204 "00010100" // /* MW 2 */ + 13205 "00000001" // /* MW 1 */ + 13206 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13207 "00000000" // /* MW 1 */ +.src_ref 8 "transposeshuffle_params.h" 111 first + 13208 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 13209 "00000000" // /* MW 3 */ + 13210 "00101000" // /* MW 2 */ + 13211 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13212 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13213 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13214 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13215 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13216 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13217 "00000000" // /* MW 1 */ +.src_ref 8 "transposeshuffle_params.h" 102 16 first +.delay_slot + 13218 "10011000" // ST el0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13219 "00101001" // /* MW 3 */ + 13220 "00000100" // /* MW 2 */ + 13221 "00001000" // /* MW 1 */ +.src_ref 8 "transposeshuffle_params.h" 102 16 +.delay_slot + 13222 "10011000" // ST eh0, [p0, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13223 "00001001" // /* MW 3 */ + 13224 "00010100" // /* MW 2 */ +.label _Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA5_Kj__end +.label __Z29setup_transposeshuffle_paramsI8bfloat16EvR23transposeshuffle_paramsRA5_Kj___func_end0 + 13225 "00001000" // /* MW 1 */ +.label __Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_EEvPS1_S2_R23transposeshuffle_params___func_begin0 +.label _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_EEvPS1_S2_R23transposeshuffle_params +.function transposeshuffle _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_EEvPS1_S2_R23transposeshuffle_params +.src_ref 8 "transposeshuffle.h" 71 first +.src_ref 8 "transposeshuffle.h" 78 8 +.src_ref 8 "transposeshuffle.h" 78 14 +.src_ref 8 "transposeshuffle.h" 78 23 +.function_start + 13232 "11100100" // MOVX r1, #22; MOV r2, p2 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13233 "10000001" // /* MW 5 */ + 13234 "00101001" // /* MW 4 */ + 13235 "00100001" // /* MW 3 */ + 13236 "01001011" // /* MW 2 */ + 13237 "00000000" // /* MW 1 */ +.src_ref 8 "transposeshuffle.h" 78 14 first + 13238 "00011000" // ADD.NC p2, r2, #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13239 "00000110" // /* MW 3 */ + 13240 "01100001" // /* MW 2 */ + 13241 "00011010" // /* MW 1 */ +.src_ref 8 "transposeshuffle.h" 81 26 first + 13242 "10011000" // LDA r2, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13243 "01010110" // /* MW 3 */ + 13244 "00010100" // /* MW 2 */ + 13245 "00000010" // /* MW 1 */ +.src_ref 8 "transposeshuffle.h" 78 14 first + 13246 "10011000" // LDA r27, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13247 "01110110" // /* MW 3 */ + 13248 "00000111" // /* MW 2 */ + 13249 "00000010" // /* MW 1 */ + 13250 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13251 "00000000" // /* MW 1 */ + 13252 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13253 "00000000" // /* MW 1 */ + 13254 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13255 "00000000" // /* MW 1 */ + 13256 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13257 "00000000" // /* MW 1 */ + 13258 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13259 "00000000" // /* MW 1 */ +.src_ref 8 "transposeshuffle.h" 81 4 first +.src_ref 8 "transposeshuffle.h" 81 19 first + 13260 "10000100" // JZ r2, #13680 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=13680 delay_slots=5 */ + 13261 "00000001" // /* MW 5 */ + 13262 "00000000" // /* MW 4 */ + 13263 "10111000" // /* MW 3 */ + 13264 "00011010" // /* MW 2 */ + 13265 "00010000" // /* MW 1 */ +.src_ref 8 "transposeshuffle.h" 78 8 +.src_ref 8 "transposeshuffle.h" 78 23 +.delay_slot + 13266 "00011000" // MOVX r0, #29 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13267 "01110101" // /* MW 3 */ + 13268 "00000000" // /* MW 2 */ + 13269 "00010000" // /* MW 1 */ +.src_ref 8 "transposeshuffle.h" 78 8 first +.src_ref 8 "transposeshuffle.h" 78 23 first +.delay_slot + 13270 "00011000" // SEL.EQZ r0, r1, r0, r27 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13271 "00000010" // /* MW 3 */ + 13272 "01000000" // /* MW 2 */ + 13273 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13274 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13275 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13276 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13277 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13278 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13279 "00000000" // /* MW 1 */ + 13280 "00011000" // MOVX r1, #10 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13281 "00101001" // /* MW 3 */ + 13282 "00000010" // /* MW 2 */ + 13283 "00010000" // /* MW 1 */ + 13284 "10011000" // LTU r1, r2, r1 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13285 "00011100" // /* MW 3 */ + 13286 "10000010" // /* MW 2 */ + 13287 "00010000" // /* MW 1 */ + 13288 "10000100" // JNZ r1, #13536 /* MW 6 */ /* control_operation: words=6 jump conditional cycles_taken=1 cycles_not_taken=0 direct absolute target_address=13536 delay_slots=5 */ + 13289 "00000001" // /* MW 5 */ + 13290 "01000000" // /* MW 4 */ + 13291 "01110000" // /* MW 3 */ + 13292 "00011010" // /* MW 2 */ + 13293 "00001000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13294 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13295 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13296 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13297 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13298 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13299 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13300 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13301 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13302 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13303 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 8 "transposeshuffle.h" 81 4 first +.src_ref 8 "transposeshuffle.h" 83 46 first +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id first + 13304 "00111010" // VLDB x0, [p0], #64; MOVXM ls, #13440 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13305 "00010000" // /* MW 9 */ + 13306 "01000000" // /* MW 8 */ + 13307 "01111010" // /* MW 7 */ + 13308 "00001100" // /* MW 6 */ + 13309 "00000000" // /* MW 5 */ + 13310 "00000000" // /* MW 4 */ + 13311 "01101000" // /* MW 3 */ + 13312 "00111000" // /* MW 2 */ + 13313 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 8 "transposeshuffle.h" 81 4 +.src_ref 8 "transposeshuffle.h" 83 46 +.aggressive_scheduled_block_id 1 +.noswbrkpt + 13314 "01111110" // NOPA; VLDB x0, [p0], #64; NOPS; MOVXM le, #13440 /* MW 14 */ /* control_operation: words=14 cycles_taken=1 */ + 13315 "01100000" // /* MW 13 */ + 13316 "00101011" // /* MW 12 */ + 13317 "00000000" // /* MW 11 */ + 13318 "00000010" // /* MW 10 */ + 13319 "01001000" // /* MW 9 */ + 13320 "10110111" // /* MW 8 */ + 13321 "00000001" // /* MW 7 */ + 13322 "00000000" // /* MW 6 */ + 13323 "01101000" // /* MW 5 */ + 13324 "00111000" // /* MW 4 */ + 13325 "11110000" // /* MW 3 */ + 13326 "00101100" // /* MW 2 */ + 13327 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 8 "transposeshuffle.h" 81 4 +.src_ref 8 "transposeshuffle.h" 83 46 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13328 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; ADD.NC lc, r2, #-9; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13329 "00000000" // /* MW 15 */ + 13330 "00000000" // /* MW 14 */ + 13331 "11001000" // /* MW 13 */ + 13332 "10111101" // /* MW 12 */ + 13333 "10111000" // /* MW 11 */ + 13334 "00000010" // /* MW 10 */ + 13335 "00000000" // /* MW 9 */ + 13336 "00000000" // /* MW 8 */ + 13337 "01011011" // /* MW 7 */ + 13338 "00000001" // /* MW 6 */ + 13339 "01101000" // /* MW 5 */ + 13340 "00111000" // /* MW 4 */ + 13341 "11110000" // /* MW 3 */ + 13342 "00101100" // /* MW 2 */ + 13343 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 8 "transposeshuffle.h" 83 46 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13344 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13345 "00000000" // /* MW 15 */ + 13346 "00000000" // /* MW 14 */ + 13347 "01111000" // /* MW 13 */ + 13348 "10100101" // /* MW 12 */ + 13349 "00000001" // /* MW 11 */ + 13350 "00000000" // /* MW 10 */ + 13351 "00000000" // /* MW 9 */ + 13352 "00000000" // /* MW 8 */ + 13353 "01011011" // /* MW 7 */ + 13354 "00000001" // /* MW 6 */ + 13355 "01101000" // /* MW 5 */ + 13356 "00111000" // /* MW 4 */ + 13357 "11110000" // /* MW 3 */ + 13358 "00101100" // /* MW 2 */ + 13359 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 8 "transposeshuffle.h" 83 46 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13360 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13361 "00000000" // /* MW 15 */ + 13362 "00000000" // /* MW 14 */ + 13363 "01111000" // /* MW 13 */ + 13364 "10100101" // /* MW 12 */ + 13365 "00000001" // /* MW 11 */ + 13366 "00000000" // /* MW 10 */ + 13367 "00000000" // /* MW 9 */ + 13368 "00000000" // /* MW 8 */ + 13369 "01011011" // /* MW 7 */ + 13370 "00000001" // /* MW 6 */ + 13371 "01101000" // /* MW 5 */ + 13372 "00111000" // /* MW 4 */ + 13373 "11110000" // /* MW 3 */ + 13374 "00101100" // /* MW 2 */ + 13375 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 8 "transposeshuffle.h" 83 46 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13376 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13377 "00000000" // /* MW 15 */ + 13378 "00000000" // /* MW 14 */ + 13379 "01111000" // /* MW 13 */ + 13380 "10100101" // /* MW 12 */ + 13381 "00000001" // /* MW 11 */ + 13382 "00000000" // /* MW 10 */ + 13383 "00000000" // /* MW 9 */ + 13384 "00000000" // /* MW 8 */ + 13385 "01011011" // /* MW 7 */ + 13386 "00000001" // /* MW 6 */ + 13387 "01101000" // /* MW 5 */ + 13388 "00111000" // /* MW 4 */ + 13389 "11110000" // /* MW 3 */ + 13390 "00101100" // /* MW 2 */ + 13391 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 8 "transposeshuffle.h" 83 46 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13392 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13393 "00000000" // /* MW 15 */ + 13394 "00000000" // /* MW 14 */ + 13395 "01111000" // /* MW 13 */ + 13396 "10100101" // /* MW 12 */ + 13397 "00000001" // /* MW 11 */ + 13398 "00000000" // /* MW 10 */ + 13399 "00000000" // /* MW 9 */ + 13400 "00000000" // /* MW 8 */ + 13401 "01011011" // /* MW 7 */ + 13402 "00000001" // /* MW 6 */ + 13403 "01101000" // /* MW 5 */ + 13404 "00111000" // /* MW 4 */ + 13405 "11110000" // /* MW 3 */ + 13406 "00101100" // /* MW 2 */ + 13407 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 8 "transposeshuffle.h" 83 46 +.src_ref 8 "transposeshuffle.h" 84 13 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13408 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13409 "00000000" // /* MW 15 */ + 13410 "00000000" // /* MW 14 */ + 13411 "11101000" // /* MW 13 */ + 13412 "00000000" // /* MW 12 */ + 13413 "00000000" // /* MW 11 */ + 13414 "00000000" // /* MW 10 */ + 13415 "00000000" // /* MW 9 */ + 13416 "00000000" // /* MW 8 */ + 13417 "01011011" // /* MW 7 */ + 13418 "00000001" // /* MW 6 */ + 13419 "01101000" // /* MW 5 */ + 13420 "00111000" // /* MW 4 */ + 13421 "11110000" // /* MW 3 */ + 13422 "00101100" // /* MW 2 */ + 13423 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 8 "transposeshuffle.h" 83 46 first +.src_ref 8 "transposeshuffle.h" 84 13 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13424 "11100001" // NOPA; VLDB x0, [p0], #64; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13425 "00000000" // /* MW 15 */ + 13426 "00000000" // /* MW 14 */ + 13427 "11101000" // /* MW 13 */ + 13428 "00000000" // /* MW 12 */ + 13429 "00000000" // /* MW 11 */ + 13430 "00000000" // /* MW 10 */ + 13431 "00000000" // /* MW 9 */ + 13432 "00000000" // /* MW 8 */ + 13433 "01011011" // /* MW 7 */ + 13434 "00000001" // /* MW 6 */ + 13435 "01101000" // /* MW 5 */ + 13436 "00111000" // /* MW 4 */ + 13437 "11110000" // /* MW 3 */ + 13438 "00101100" // /* MW 2 */ + 13439 "00000000" // /* MW 1 */ +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_EEvPS1_S2_R23transposeshuffle_params_208 +.src_ref 4 "vector.hpp" 1139 17 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 8 "transposeshuffle.h" 83 46 +.src_ref 8 "transposeshuffle.h" 84 13 first +.src_ref 8 "transposeshuffle.h" 85 46 first +.begin_of_loop +.end_of_loop +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 1 + 13440 "11100001" // NOPA; VLDB x0, [p0], #64; VST bmll0, [p1], #64; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13441 "00000000" // /* MW 15 */ + 13442 "00000000" // /* MW 14 */ + 13443 "11101000" // /* MW 13 */ + 13444 "00000000" // /* MW 12 */ + 13445 "00000000" // /* MW 11 */ + 13446 "00000000" // /* MW 10 */ + 13447 "00000000" // /* MW 9 */ + 13448 "10000000" // /* MW 8 */ + 13449 "00000110" // /* MW 7 */ + 13450 "00011100" // /* MW 6 */ + 13451 "01101001" // /* MW 5 */ + 13452 "00111000" // /* MW 4 */ + 13453 "11110000" // /* MW 3 */ + 13454 "00101100" // /* MW 2 */ + 13455 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 8 "transposeshuffle.h" 84 13 +.src_ref 8 "transposeshuffle.h" 85 46 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt +.loop_nesting 0 + 13456 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13457 "11100000" // /* MW 7 */ + 13458 "00000000" // /* MW 6 */ + 13459 "00000000" // /* MW 5 */ + 13460 "00000000" // /* MW 4 */ + 13461 "11010000" // /* MW 3 */ + 13462 "10000000" // /* MW 2 */ + 13463 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 8 "transposeshuffle.h" 84 13 +.src_ref 8 "transposeshuffle.h" 85 46 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13464 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13465 "11100000" // /* MW 7 */ + 13466 "00000000" // /* MW 6 */ + 13467 "00000000" // /* MW 5 */ + 13468 "00000000" // /* MW 4 */ + 13469 "11010000" // /* MW 3 */ + 13470 "10000000" // /* MW 2 */ + 13471 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 8 "transposeshuffle.h" 84 13 +.src_ref 8 "transposeshuffle.h" 85 46 +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13472 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13473 "11100000" // /* MW 7 */ + 13474 "00000000" // /* MW 6 */ + 13475 "00000000" // /* MW 5 */ + 13476 "00000000" // /* MW 4 */ + 13477 "11010000" // /* MW 3 */ + 13478 "10000000" // /* MW 2 */ + 13479 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 8 "transposeshuffle.h" 84 13 +.src_ref 8 "transposeshuffle.h" 85 46 +.src_ref 8 "transposeshuffle.h" 88 first +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13480 "00111010" // VST bmll0, [p1], #64; RET lr; VSHUFFLE bmll0, x0, x0, r0 /* MW 10 */ /* control_operation: words=10 rts unconditional cycles_taken=1 delay_slots=5 */ + 13481 "11101001" // /* MW 9 */ + 13482 "00000000" // /* MW 8 */ + 13483 "00000000" // /* MW 7 */ + 13484 "00000000" // /* MW 6 */ + 13485 "01000000" // /* MW 5 */ + 13486 "00000001" // /* MW 4 */ + 13487 "11010000" // /* MW 3 */ + 13488 "10000000" // /* MW 2 */ + 13489 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 8 "transposeshuffle.h" 84 13 first +.src_ref 8 "transposeshuffle.h" 85 46 first +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13490 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13491 "11100000" // /* MW 7 */ + 13492 "00000000" // /* MW 6 */ + 13493 "00000000" // /* MW 5 */ + 13494 "00000000" // /* MW 4 */ + 13495 "11010000" // /* MW 3 */ + 13496 "10000000" // /* MW 2 */ + 13497 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 8 "transposeshuffle.h" 84 13 +.src_ref 8 "transposeshuffle.h" 85 46 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13498 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13499 "11100000" // /* MW 7 */ + 13500 "00000000" // /* MW 6 */ + 13501 "00000000" // /* MW 5 */ + 13502 "00000000" // /* MW 4 */ + 13503 "11010000" // /* MW 3 */ + 13504 "10000000" // /* MW 2 */ + 13505 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 8 "transposeshuffle.h" 84 13 +.src_ref 8 "transposeshuffle.h" 85 46 +.delay_slot +.aggressive_scheduled_block_id 1 +.nohwbrkpt +.noswbrkpt + 13506 "00000010" // VST bmll0, [p1], #64; VSHUFFLE bmll0, x0, x0, r0 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13507 "11100000" // /* MW 7 */ + 13508 "00000000" // /* MW 6 */ + 13509 "00000000" // /* MW 5 */ + 13510 "00000000" // /* MW 4 */ + 13511 "11010000" // /* MW 3 */ + 13512 "10000000" // /* MW 2 */ + 13513 "00100011" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 8 "transposeshuffle.h" 85 46 +.delay_slot +.aggressive_scheduled_block_id 1 +.aggressive_scheduled_block_id last +.nohwbrkpt +.noswbrkpt + 13514 "00001100" // NOPA; VST bmll0, [p1], #64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13515 "00001101" // /* MW 5 */ + 13516 "00111000" // /* MW 4 */ + 13517 "11110010" // /* MW 3 */ + 13518 "00101100" // /* MW 2 */ + 13519 "00000000" // /* MW 1 */ +.src_ref 4 "vector.hpp" 1159 33 +.src_ref 8 "transposeshuffle.h" 85 46 +.delay_slot + 13520 "11100001" // NOPA; NOPB; VST bmll0, [p1], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13521 "00000000" // /* MW 15 */ + 13522 "00000000" // /* MW 14 */ + 13523 "01111000" // /* MW 13 */ + 13524 "10100101" // /* MW 12 */ + 13525 "00000001" // /* MW 11 */ + 13526 "00000000" // /* MW 10 */ + 13527 "00000000" // /* MW 9 */ + 13528 "10000000" // /* MW 8 */ + 13529 "00000110" // /* MW 7 */ + 13530 "00011100" // /* MW 6 */ + 13531 "00100001" // /* MW 5 */ + 13532 "00000000" // /* MW 4 */ + 13533 "11110000" // /* MW 3 */ + 13534 "00101100" // /* MW 2 */ + 13535 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_EEvPS1_S2_R23transposeshuffle_params_304 +.src_ref 8 "transposeshuffle.h" 81 4 first + 13536 "11111000" // MOV lc, r2 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13537 "00100000" // /* MW 3 */ + 13538 "01110001" // /* MW 2 */ + 13539 "00011101" // /* MW 1 */ +.src_ref 8 "transposeshuffle.h" 81 4 + 13540 "01000100" // MOVXM ls, #13552 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13541 "11100000" // /* MW 5 */ + 13542 "11101001" // /* MW 4 */ + 13543 "00110001" // /* MW 3 */ + 13544 "00000000" // /* MW 2 */ + 13545 "00000000" // /* MW 1 */ +.src_ref 8 "transposeshuffle.h" 81 4 + 13546 "01000100" // MOVXM le, #13664 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13547 "11000000" // /* MW 5 */ + 13548 "11101010" // /* MW 4 */ + 13549 "00110110" // /* MW 3 */ + 13550 "00000000" // /* MW 2 */ + 13551 "00000000" // /* MW 1 */ +.label ZLS_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_EEvPS1_S2_R23transposeshuffle_params_320 +.src_ref 4 "vector.hpp" 1139 17 first +.src_ref 8 "transposeshuffle.h" 83 46 first +.begin_of_loop +.loop_nesting 1 + 13552 "00011000" // VLDB x0, [p0], #64 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13553 "00110100" // /* MW 3 */ + 13554 "00011100" // /* MW 2 */ + 13555 "00111000" // /* MW 1 */ + 13556 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13557 "00000000" // /* MW 1 */ + 13558 "10111010" // NOPA; NOPB; NOPM /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13559 "01111110" // /* MW 9 */ + 13560 "10100101" // /* MW 8 */ + 13561 "00000001" // /* MW 7 */ + 13562 "00000000" // /* MW 6 */ + 13563 "00010000" // /* MW 5 */ + 13564 "00000000" // /* MW 4 */ + 13565 "11110000" // /* MW 3 */ + 13566 "00101100" // /* MW 2 */ + 13567 "00000000" // /* MW 1 */ + 13568 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13569 "00000000" // /* MW 15 */ + 13570 "00000000" // /* MW 14 */ + 13571 "01111000" // /* MW 13 */ + 13572 "10100101" // /* MW 12 */ + 13573 "00000001" // /* MW 11 */ + 13574 "00000000" // /* MW 10 */ + 13575 "00000000" // /* MW 9 */ + 13576 "00000000" // /* MW 8 */ + 13577 "01011011" // /* MW 7 */ + 13578 "00000001" // /* MW 6 */ + 13579 "00100000" // /* MW 5 */ + 13580 "00000000" // /* MW 4 */ + 13581 "11110000" // /* MW 3 */ + 13582 "00101100" // /* MW 2 */ + 13583 "00000000" // /* MW 1 */ + 13584 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13585 "00000000" // /* MW 15 */ + 13586 "00000000" // /* MW 14 */ + 13587 "01111000" // /* MW 13 */ + 13588 "10100101" // /* MW 12 */ + 13589 "00000001" // /* MW 11 */ + 13590 "00000000" // /* MW 10 */ + 13591 "00000000" // /* MW 9 */ + 13592 "00000000" // /* MW 8 */ + 13593 "01011011" // /* MW 7 */ + 13594 "00000001" // /* MW 6 */ + 13595 "00100000" // /* MW 5 */ + 13596 "00000000" // /* MW 4 */ + 13597 "11110000" // /* MW 3 */ + 13598 "00101100" // /* MW 2 */ + 13599 "00000000" // /* MW 1 */ + 13600 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13601 "00000000" // /* MW 15 */ + 13602 "00000000" // /* MW 14 */ + 13603 "01111000" // /* MW 13 */ + 13604 "10100101" // /* MW 12 */ + 13605 "00000001" // /* MW 11 */ + 13606 "00000000" // /* MW 10 */ + 13607 "00000000" // /* MW 9 */ + 13608 "00000000" // /* MW 8 */ + 13609 "01011011" // /* MW 7 */ + 13610 "00000001" // /* MW 6 */ + 13611 "00100000" // /* MW 5 */ + 13612 "00000000" // /* MW 4 */ + 13613 "11110000" // /* MW 3 */ + 13614 "00101100" // /* MW 2 */ + 13615 "00000000" // /* MW 1 */ + 13616 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13617 "00000000" // /* MW 15 */ + 13618 "00000000" // /* MW 14 */ + 13619 "01111000" // /* MW 13 */ + 13620 "10100101" // /* MW 12 */ + 13621 "00000001" // /* MW 11 */ + 13622 "00000000" // /* MW 10 */ + 13623 "00000000" // /* MW 9 */ + 13624 "00000000" // /* MW 8 */ + 13625 "01011011" // /* MW 7 */ + 13626 "00000001" // /* MW 6 */ + 13627 "00100000" // /* MW 5 */ + 13628 "00000000" // /* MW 4 */ + 13629 "11110000" // /* MW 3 */ + 13630 "00101100" // /* MW 2 */ + 13631 "00000000" // /* MW 1 */ +.src_ref 8 "transposeshuffle.h" 84 13 first + 13632 "11100001" // NOPA; NOPB; NOPS; NOPX; VSHUFFLE bmll0, x0, x0, r0; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13633 "00000000" // /* MW 15 */ + 13634 "00000000" // /* MW 14 */ + 13635 "11101000" // /* MW 13 */ + 13636 "00000000" // /* MW 12 */ + 13637 "00000000" // /* MW 11 */ + 13638 "00000000" // /* MW 10 */ + 13639 "00000000" // /* MW 9 */ + 13640 "00000000" // /* MW 8 */ + 13641 "01011011" // /* MW 7 */ + 13642 "00000001" // /* MW 6 */ + 13643 "00100000" // /* MW 5 */ + 13644 "00000000" // /* MW 4 */ + 13645 "11110000" // /* MW 3 */ + 13646 "00101100" // /* MW 2 */ + 13647 "00000000" // /* MW 1 */ + 13648 "11100001" // NOPA; NOPB; NOPS; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13649 "00000000" // /* MW 15 */ + 13650 "00000000" // /* MW 14 */ + 13651 "01111000" // /* MW 13 */ + 13652 "10100101" // /* MW 12 */ + 13653 "00000001" // /* MW 11 */ + 13654 "00000000" // /* MW 10 */ + 13655 "00000000" // /* MW 9 */ + 13656 "00000000" // /* MW 8 */ + 13657 "01011011" // /* MW 7 */ + 13658 "00000001" // /* MW 6 */ + 13659 "00100000" // /* MW 5 */ + 13660 "00000000" // /* MW 4 */ + 13661 "11110000" // /* MW 3 */ + 13662 "00101100" // /* MW 2 */ + 13663 "00000000" // /* MW 1 */ +.label ZLE_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_EEvPS1_S2_R23transposeshuffle_params_432 +.src_ref 4 "vector.hpp" 1159 33 first +.src_ref 8 "transposeshuffle.h" 85 46 first +.end_of_loop + 13664 "11100001" // NOPA; NOPB; VST bmll0, [p1], #64; NOPX; NOPM; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13665 "00000000" // /* MW 15 */ + 13666 "00000000" // /* MW 14 */ + 13667 "01111000" // /* MW 13 */ + 13668 "10100101" // /* MW 12 */ + 13669 "00000001" // /* MW 11 */ + 13670 "00000000" // /* MW 10 */ + 13671 "00000000" // /* MW 9 */ + 13672 "10000000" // /* MW 8 */ + 13673 "00000110" // /* MW 7 */ + 13674 "00011100" // /* MW 6 */ + 13675 "00100001" // /* MW 5 */ + 13676 "00000000" // /* MW 4 */ + 13677 "11110000" // /* MW 3 */ + 13678 "00101100" // /* MW 2 */ + 13679 "00000000" // /* MW 1 */ +.label TGT_F_Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_EEvPS1_S2_R23transposeshuffle_params_448 +.src_ref 8 "transposeshuffle.h" 88 first +.loop_nesting 0 + 13680 "00011000" // RET lr /* MW 4 */ /* control_operation: words=4 rts unconditional cycles_taken=1 delay_slots=5 */ + 13681 "00000000" // /* MW 3 */ + 13682 "00101000" // /* MW 2 */ + 13683 "00010000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13684 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13685 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13686 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13687 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13688 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13689 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13690 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13691 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13692 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_EEvPS1_S2_R23transposeshuffle_params__end +.label __Z16transposeshuffleI8bfloat16Qsr5mllib5utilsE11is_one_of_vIT_aS0_EEvPS1_S2_R23transposeshuffle_params___func_end0 + 13693 "00000000" // /* MW 1 */ +.label __ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA5_Kj___func_begin0 +.label _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA5_Kj +.function transpose4d_adf_wrapper, adf::locking::sync, adf::addressing::linear, adf::margin<0U> >, adf::io_buffer_config, adf::locking::sync, adf::addressing::linear, adf::margin<0U> > > _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA5_Kj +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 9 "transpose4d_adf_wrapper.cpp" 78 +.src_ref 9 "transpose4d_adf_wrapper.cpp" 78 first +.function_start + 13696 "00111010" // MOVS p3, p1; PADDXM [sp], #64 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13697 "01110001" // /* MW 9 */ + 13698 "00000000" // /* MW 8 */ + 13699 "00000000" // /* MW 7 */ + 13700 "00000000" // /* MW 6 */ + 13701 "00000010" // /* MW 5 */ + 13702 "00000000" // /* MW 4 */ + 13703 "01100000" // /* MW 3 */ + 13704 "10010001" // /* MW 2 */ + 13705 "01110000" // /* MW 1 */ + 13706 "10011000" // ST lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13707 "00111101" // /* MW 3 */ + 13708 "11111100" // /* MW 2 */ + 13709 "00001111" // /* MW 1 */ +.src_ref 9 "transpose4d_adf_wrapper.cpp" 80 4 first +.no_stack_arguments + 13710 "00000100" // JL #13152 /* MW 6 */ /* control_operation: words=6 call unconditional cycles_taken=1 direct absolute target_address=13152 delay_slots=5 */ + 13711 "00000001" // /* MW 5 */ + 13712 "00000000" // /* MW 4 */ + 13713 "10110000" // /* MW 3 */ + 13714 "00011001" // /* MW 2 */ + 13715 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 9 "transpose4d_adf_wrapper.cpp" 80 4 +.delay_slot + 13716 "00000010" // MOVS p2, p0; MOV p1, p2 /* MW 8 */ /* control_operation: words=8 cycles_taken=1 */ + 13717 "01110000" // /* MW 7 */ + 13718 "01100000" // /* MW 6 */ + 13719 "10110010" // /* MW 5 */ + 13720 "00000000" // /* MW 4 */ + 13721 "01100000" // /* MW 3 */ + 13722 "00010001" // /* MW 2 */ + 13723 "01010000" // /* MW 1 */ +.src_ref 9 "transpose4d_adf_wrapper.cpp" 80 4 +.delay_slot + 13724 "00111010" // ST p7, [sp, #-12]; MOVXM p0, #509120 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13725 "00010001" // /* MW 9 */ + 13726 "01100000" // /* MW 8 */ + 13727 "00110010" // /* MW 7 */ + 13728 "11110000" // /* MW 6 */ + 13729 "00000001" // /* MW 5 */ + 13730 "00000000" // /* MW 4 */ + 13731 "10110000" // /* MW 3 */ + 13732 "11110011" // /* MW 2 */ + 13733 "11111110" // /* MW 1 */ +.delay_slot + 13734 "10011000" // ST p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13735 "00011101" // /* MW 3 */ + 13736 "11111011" // /* MW 2 */ + 13737 "00001111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.delay_slot + 13738 "11010100" // NOPA; MOV p6, p3 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13739 "10000001" // /* MW 5 */ + 13740 "11001101" // /* MW 4 */ + 13741 "11111100" // /* MW 3 */ + 13742 "00101100" // /* MW 2 */ + 13743 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.delay_slot + 13744 "11100001" // NOPA; NOPB; NOPS; NOPX; MOV p7, p2; NOPV /* MW 16 */ /* control_operation: words=16 cycles_taken=1 */ + 13745 "00000000" // /* MW 15 */ + 13746 "00000000" // /* MW 14 */ + 13747 "01111000" // /* MW 13 */ + 13748 "01100000" // /* MW 12 */ + 13749 "10110010" // /* MW 11 */ + 13750 "00000011" // /* MW 10 */ + 13751 "00000000" // /* MW 9 */ + 13752 "00000000" // /* MW 8 */ + 13753 "01011011" // /* MW 7 */ + 13754 "00000001" // /* MW 6 */ + 13755 "00100000" // /* MW 5 */ + 13756 "00000000" // /* MW 4 */ + 13757 "11110000" // /* MW 3 */ + 13758 "00101100" // /* MW 2 */ + 13759 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 +.src_ref 1 "io_buffer_main.h" 125 25 +.return_address + 13760 "10111010" // LDA p7, [sp, #-12]; MOVS p1, p6; MOV p0, p7 /* MW 10 */ /* control_operation: words=10 cycles_taken=1 */ + 13761 "01110010" // /* MW 9 */ + 13762 "01100000" // /* MW 8 */ + 13763 "00110111" // /* MW 7 */ + 13764 "00000000" // /* MW 6 */ + 13765 "10001011" // /* MW 5 */ + 13766 "10011000" // /* MW 4 */ + 13767 "00100001" // /* MW 3 */ + 13768 "11110011" // /* MW 2 */ + 13769 "11111110" // /* MW 1 */ + 13770 "00011000" // LDA p6, [sp, #-8] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13771 "00011001" // /* MW 3 */ + 13772 "11111011" // /* MW 2 */ + 13773 "00000111" // /* MW 1 */ +.src_ref 8 "transposeshuffle.h" 99 11 + 13774 "00011000" // LDA lr, [sp, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13775 "00111001" // /* MW 3 */ + 13776 "11111100" // /* MW 2 */ + 13777 "00000111" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 first + 13778 "10011000" // LDA p0, [p0] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13779 "00011110" // /* MW 3 */ + 13780 "00000100" // /* MW 2 */ + 13781 "00000000" // /* MW 1 */ +.src_ref 1 "io_buffer_main.h" 125 25 + 13782 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13783 "10011110" // /* MW 3 */ + 13784 "00000100" // /* MW 2 */ + 13785 "00000001" // /* MW 1 */ +.src_ref 8 "transposeshuffle.h" 99 11 first +.tail_call + 13786 "10000100" // J #13232 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=13232 delay_slots=5 */ + 13787 "00000000" // /* MW 5 */ + 13788 "00000000" // /* MW 4 */ + 13789 "11011000" // /* MW 3 */ + 13790 "00011001" // /* MW 2 */ + 13791 "00000000" // /* MW 1 */ +.src_ref 9 "transpose4d_adf_wrapper.cpp" 82 first +.delay_slot + 13792 "11000100" // PADDXM [sp], #-64 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13793 "00000001" // /* MW 5 */ + 13794 "00000000" // /* MW 4 */ + 13795 "00000000" // /* MW 3 */ + 13796 "11111000" // /* MW 2 */ + 13797 "11111111" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13798 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13799 "00000000" // /* MW 1 */ +.src_ref 8 "transposeshuffle.h" 99 11 +.delay_slot + 13800 "01000100" // MOVXM p2, #509120 /* MW 6 */ /* control_operation: words=6 cycles_taken=1 */ + 13801 "10000000" // /* MW 5 */ + 13802 "11001001" // /* MW 4 */ + 13803 "11000100" // /* MW 3 */ + 13804 "00000111" // /* MW 2 */ + 13805 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13806 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13807 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13808 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA5_Kj__end +.label __ZN12mllib_graphs23transpose4d_adf_wrapperI8bfloat16N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_EEvRNS2_9io_bufferIT_NS2_9direction2inET0_EERNSD_ISE_NSF_3outET1_EERA5_Kj___func_end0 + 13809 "00000000" // /* MW 1 */ +.label __Z13_b719_wrapperPPv___func_begin0 +.label _Z13_b719_wrapperPPv +.function _b719_wrapper _Z13_b719_wrapperPPv +.src_ref 0 "0_0_reloadable0.cc" 29 first +.src_ref 0 "0_0_reloadable0.cc" 31 79 +.function_start + 13824 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13825 "11000000" // /* MW 3 */ + 13826 "01100000" // /* MW 2 */ + 13827 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 31 79 first + 13828 "10011000" // LDA p0, [p1], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13829 "00011110" // /* MW 3 */ + 13830 "00011100" // /* MW 2 */ + 13831 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 33 46 first + 13832 "10011000" // LDA p2, [p1, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13833 "00011110" // /* MW 3 */ + 13834 "00010101" // /* MW 2 */ + 13835 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 32 80 first + 13836 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13837 "10011110" // /* MW 3 */ + 13838 "00000100" // /* MW 2 */ + 13839 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 30 4 first +.tail_call + 13840 "10000100" // J #13696 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=13696 delay_slots=5 */ + 13841 "00000000" // /* MW 5 */ + 13842 "00000000" // /* MW 4 */ + 13843 "11000000" // /* MW 3 */ + 13844 "00011010" // /* MW 2 */ + 13845 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13846 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13847 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13848 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13849 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13850 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13851 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13852 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13853 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13854 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b719_wrapperPPv__end +.label __Z13_b719_wrapperPPv___func_end0 + 13855 "00000000" // /* MW 1 */ +.label __Z13_b886_wrapperPPv___func_begin0 +.label _Z13_b886_wrapperPPv +.function _b886_wrapper _Z13_b886_wrapperPPv +.src_ref 0 "0_0_reloadable0.cc" 37 first +.src_ref 0 "0_0_reloadable0.cc" 39 79 +.function_start + 13856 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13857 "11000000" // /* MW 3 */ + 13858 "01100000" // /* MW 2 */ + 13859 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 39 79 first + 13860 "10011000" // LDA p0, [p2], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13861 "00011110" // /* MW 3 */ + 13862 "00111100" // /* MW 2 */ + 13863 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 40 47 first + 13864 "10011000" // LDA p1, [p2], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13865 "10011110" // /* MW 3 */ + 13866 "11101100" // /* MW 2 */ + 13867 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 42 81 first + 13868 "10011000" // LDA p3, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13869 "10011110" // /* MW 3 */ + 13870 "00010101" // /* MW 2 */ + 13871 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 41 80 first + 13872 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13873 "00011110" // /* MW 3 */ + 13874 "00000101" // /* MW 2 */ + 13875 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 38 4 first +.tail_call + 13876 "10000100" // J #11424 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=11424 delay_slots=5 */ + 13877 "00000000" // /* MW 5 */ + 13878 "00000000" // /* MW 4 */ + 13879 "01010000" // /* MW 3 */ + 13880 "00010110" // /* MW 2 */ + 13881 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13882 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13883 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13884 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13885 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13886 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13887 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13888 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13889 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13890 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b886_wrapperPPv__end +.label __Z13_b886_wrapperPPv___func_end0 + 13891 "00000000" // /* MW 1 */ +.label __Z13_b891_wrapperPPv___func_begin0 +.label _Z13_b891_wrapperPPv +.function _b891_wrapper _Z13_b891_wrapperPPv +.src_ref 0 "0_0_reloadable0.cc" 46 first +.src_ref 0 "0_0_reloadable0.cc" 48 79 +.function_start + 13904 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13905 "11000000" // /* MW 3 */ + 13906 "01100000" // /* MW 2 */ + 13907 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 48 79 first + 13908 "10011000" // LDA p0, [p2], #12 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13909 "00011110" // /* MW 3 */ + 13910 "00111100" // /* MW 2 */ + 13911 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 49 47 first + 13912 "10011000" // LDA p1, [p2], #-8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13913 "10011110" // /* MW 3 */ + 13914 "11101100" // /* MW 2 */ + 13915 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 51 81 first + 13916 "10011000" // LDA p3, [p2, #4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13917 "10011110" // /* MW 3 */ + 13918 "00010101" // /* MW 2 */ + 13919 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 50 80 first + 13920 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13921 "00011110" // /* MW 3 */ + 13922 "00000101" // /* MW 2 */ + 13923 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 47 4 first +.tail_call + 13924 "10000100" // J #12512 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=12512 delay_slots=5 */ + 13925 "00000000" // /* MW 5 */ + 13926 "00000000" // /* MW 4 */ + 13927 "01110000" // /* MW 3 */ + 13928 "00011000" // /* MW 2 */ + 13929 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13930 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13931 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13932 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13933 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13934 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13935 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13936 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13937 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13938 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b891_wrapperPPv__end +.label __Z13_b891_wrapperPPv___func_end0 + 13939 "00000000" // /* MW 1 */ +.label __Z13_b896_wrapperPPv___func_begin0 +.label _Z13_b896_wrapperPPv +.function _b896_wrapper _Z13_b896_wrapperPPv +.src_ref 0 "0_0_reloadable0.cc" 55 first +.src_ref 0 "0_0_reloadable0.cc" 57 79 +.function_start + 13952 "11111000" // MOV p2, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13953 "11000000" // /* MW 3 */ + 13954 "01100000" // /* MW 2 */ + 13955 "00011010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 57 79 first + 13956 "10011000" // LDA p0, [p2], #4 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13957 "00011110" // /* MW 3 */ + 13958 "00011100" // /* MW 2 */ + 13959 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 58 79 first + 13960 "10011000" // LDA p1, [p2], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13961 "10011110" // /* MW 3 */ + 13962 "00101100" // /* MW 2 */ + 13963 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 60 81 first + 13964 "10011000" // LDA p3, [p2, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13965 "10011110" // /* MW 3 */ + 13966 "11110101" // /* MW 2 */ + 13967 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 59 47 first + 13968 "10011000" // LDA p2, [p2] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 13969 "00011110" // /* MW 3 */ + 13970 "00000101" // /* MW 2 */ + 13971 "00000010" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 56 4 first +.tail_call + 13972 "10000100" // J #7040 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=7040 delay_slots=5 */ + 13973 "00000000" // /* MW 5 */ + 13974 "00000000" // /* MW 4 */ + 13975 "11000000" // /* MW 3 */ + 13976 "00001101" // /* MW 2 */ + 13977 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13978 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13979 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13980 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13981 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13982 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13983 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13984 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 13985 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 13986 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b896_wrapperPPv__end +.label __Z13_b896_wrapperPPv___func_end0 + 13987 "00000000" // /* MW 1 */ +.label __Z13_b901_wrapperPPv___func_begin0 +.label _Z13_b901_wrapperPPv +.function _b901_wrapper _Z13_b901_wrapperPPv +.src_ref 0 "0_0_reloadable0.cc" 64 first +.src_ref 0 "0_0_reloadable0.cc" 66 79 +.function_start + 14000 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14001 "11000000" // /* MW 3 */ + 14002 "01100000" // /* MW 2 */ + 14003 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 66 79 first + 14004 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14005 "00011110" // /* MW 3 */ + 14006 "00101100" // /* MW 2 */ + 14007 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 68 81 first + 14008 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14009 "00011110" // /* MW 3 */ + 14010 "11110101" // /* MW 2 */ + 14011 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 67 47 first + 14012 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14013 "10011110" // /* MW 3 */ + 14014 "00000100" // /* MW 2 */ + 14015 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 65 4 first +.tail_call + 14016 "10000100" // J #8400 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=8400 delay_slots=5 */ + 14017 "00000000" // /* MW 5 */ + 14018 "00000000" // /* MW 4 */ + 14019 "01101000" // /* MW 3 */ + 14020 "00010000" // /* MW 2 */ + 14021 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14022 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14023 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14024 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14025 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14026 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14027 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14028 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14029 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14030 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b901_wrapperPPv__end +.label __Z13_b901_wrapperPPv___func_end0 + 14031 "00000000" // /* MW 1 */ +.label __Z13_b906_wrapperPPv___func_begin0 +.label _Z13_b906_wrapperPPv +.function _b906_wrapper _Z13_b906_wrapperPPv +.src_ref 0 "0_0_reloadable0.cc" 72 first +.src_ref 0 "0_0_reloadable0.cc" 74 79 +.function_start + 14032 "11111000" // MOV p1, p0 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14033 "11000000" // /* MW 3 */ + 14034 "01100000" // /* MW 2 */ + 14035 "00011001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 74 79 first + 14036 "10011000" // LDA p0, [p1], #8 /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14037 "00011110" // /* MW 3 */ + 14038 "00101100" // /* MW 2 */ + 14039 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 76 81 first + 14040 "10011000" // LDA p2, [p1, #-4] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14041 "00011110" // /* MW 3 */ + 14042 "11110101" // /* MW 2 */ + 14043 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 75 47 first + 14044 "10011000" // LDA p1, [p1] /* MW 4 */ /* control_operation: words=4 cycles_taken=1 */ + 14045 "10011110" // /* MW 3 */ + 14046 "00000100" // /* MW 2 */ + 14047 "00000001" // /* MW 1 */ +.src_ref 0 "0_0_reloadable0.cc" 73 4 first +.tail_call + 14048 "10000100" // J #9264 /* MW 6 */ /* control_operation: words=6 jump tail_call unconditional cycles_taken=1 direct absolute target_address=9264 delay_slots=5 */ + 14049 "00000000" // /* MW 5 */ + 14050 "00000000" // /* MW 4 */ + 14051 "00011000" // /* MW 3 */ + 14052 "00010010" // /* MW 2 */ + 14053 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14054 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14055 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14056 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14057 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14058 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14059 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14060 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ + 14061 "00000000" // /* MW 1 */ +.delay_slot +.swstall delay_slot + 14062 "00000000" // NOPX /* MW 2 */ /* control_operation: words=2 cycles_taken=1 */ +.label _Z13_b906_wrapperPPv__end +.label __Z13_b906_wrapperPPv___func_end0 + 14063 "00000000" // /* MW 1 */ +.dir 0 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable0/src" +.dir 1 "/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer" +.dir 2 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/conv" +.dir 3 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common" +.dir 4 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2" +.dir 5 "/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p" +.dir 6 "/usr/local/lib/python3.10/dist-packages/data/aie2p/lib" +.dir 7 "/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend" +.dir 8 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc" +.dir 9 "/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf" diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3_reloadable0/Release/0_3_reloadable0.txt b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3_reloadable0/Release/0_3_reloadable0.txt new file mode 100644 index 0000000000000000000000000000000000000000..6fd868aa52fc4ffc5b73d8aeb4253a6c648fc757 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3_reloadable0/Release/0_3_reloadable0.txt @@ -0,0 +1,4622 @@ +Contents of the .debug_line section: + +sigmoid_carf_templated_lut.h: +File name Line number Starting address View Stmt + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 205 0x2620 x +elementwise_binary_shared.h 211 0x2620 1 x +elementwise_binary_shared.h 216 0x2620 2 +elementwise_binary_shared.h 216 0x2620 3 +elementwise_binary_shared.h 216 0x262a +elementwise_binary_shared.h 211 0x2638 x +elementwise_binary_shared.h 212 0x263c x +elementwise_binary_shared.h 212 0x264c +elementwise_binary_shared.h 213 0x2650 x +elementwise_binary_shared.h 213 0x2660 +elementwise_binary_shared.h 214 0x2664 x +elementwise_binary_shared.h 214 0x2674 +elementwise_binary_shared.h 216 0x2678 x +elementwise_binary_shared.h 217 0x267c x +elementwise_binary_shared.h 216 0x2680 +elementwise_binary_shared.h 216 0x2686 x +elementwise_binary_shared.h 216 0x268a +elementwise_binary_shared.h 216 0x268e +elementwise_binary_shared.h 107 0x26f0 x +elementwise_binary_shared.h 119 0x26f0 1 +elementwise_binary_shared.h 126 0x26f0 2 +elementwise_binary_shared.h 131 0x26f0 3 +elementwise_binary_shared.h 119 0x26f4 x +elementwise_binary_shared.h 122 0x26f8 x +elementwise_binary_shared.h 124 0x26fc x +elementwise_binary_shared.h 124 0x2708 +elementwise_binary_shared.h 107 0x270c +elementwise_binary_shared.h 124 0x2712 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 65 0x2716 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 124 0x2716 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 65 0x2720 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 150 0x272c +elementwise_binary_shared.h 119 0x2732 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 56 0x2736 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 126 0x2736 1 +elementwise_binary_shared.h 126 0x2736 2 +elementwise_binary_shared.h 131 0x2736 3 +elementwise_binary_shared.h 131 0x2736 4 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2740 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 126 0x2740 1 x +elementwise_binary_shared.h 131 0x2740 2 x +elementwise_binary_shared.h 171 0x2740 3 +elementwise_binary_shared.h 131 0x2752 +elementwise_binary_shared.h 131 0x2752 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2758 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x2758 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 173 0x2758 2 +elementwise_binary_shared.h 166 0x275c + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2768 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 169 0x2768 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x277a x +vector.hpp 1139 0x2780 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 169 0x2780 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2784 +vector.hpp 1159 0x2784 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 171 0x2784 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2796 +vector.hpp 1139 0x2796 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x2796 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 169 0x2796 3 +elementwise_binary_shared.h 173 0x2796 4 +elementwise_binary_shared.h 150 0x27b0 +elementwise_binary_shared.h 150 0x27b4 x +elementwise_binary_shared.h 150 0x27b8 +elementwise_binary_shared.h 150 0x27be +elementwise_binary_shared.h 150 0x27c4 +elementwise_binary_shared.h 166 0x27c4 1 +elementwise_binary_shared.h 150 0x27d0 +elementwise_binary_shared.h 150 0x27e0 +elementwise_binary_shared.h 150 0x27e0 1 +elementwise_binary_shared.h 150 0x27e0 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x27ea + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x27ea 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 173 0x27ea 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x27ee + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 169 0x27ee 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x27f2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 171 0x27f2 1 +elementwise_binary_shared.h 150 0x27f8 +elementwise_binary_shared.h 150 0x27fc +elementwise_binary_shared.h 150 0x27fc 1 +elementwise_binary_shared.h 150 0x2802 +elementwise_binary_shared.h 150 0x2806 +elementwise_binary_shared.h 150 0x280c +elementwise_binary_shared.h 150 0x2814 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 56 0x2824 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x282a x +vector.hpp 1139 0x2830 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 166 0x2830 1 x +elementwise_binary_shared.h 166 0x2830 2 x +elementwise_binary_shared.h 169 0x2830 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x283c + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 166 0x283c 1 +elementwise_binary_shared.h 166 0x283c 2 +elementwise_binary_shared.h 171 0x283c 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2848 x +vector.hpp 1139 0x2848 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x2848 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 169 0x2848 3 x +elementwise_binary_shared.h 173 0x2848 4 x +elementwise_binary_shared.h 177 0x2848 5 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2850 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 166 0x2850 1 x +elementwise_binary_shared.h 171 0x2850 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2858 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x2858 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 173 0x2858 2 x +elementwise_binary_shared.h 166 0x285e x +elementwise_binary_shared.h 166 0x2862 +elementwise_binary_shared.h 177 0x2862 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x286a x +vector.hpp 1139 0x286a 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 169 0x286a 2 x +elementwise_binary_shared.h 171 0x286a 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2870 +vector.hpp 1159 0x2870 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x2870 2 x +accum.hpp 1110 0x2870 3 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 173 0x2870 4 x +elementwise_binary_shared.h 185 0x2870 5 +elementwise_binary_shared.h 177 0x2890 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x28a0 x +vector.hpp 1139 0x28a0 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 169 0x28a0 2 x +elementwise_binary_shared.h 171 0x28a0 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x28b0 +vector.hpp 1159 0x28b0 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x28b0 2 x +accum.hpp 1110 0x28b0 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 173 0x28b0 4 x +elementwise_binary_shared.h 185 0x28b0 5 x +elementwise_binary_shared.h 177 0x28d0 x +elementwise_binary_shared.h 187 0x28e0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x28e6 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x28e6 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 185 0x28e6 2 x +elementwise_binary_shared.h 177 0x28ec x +elementwise_binary_shared.h 187 0x28f2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x28f6 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x28f6 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 185 0x28f6 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2900 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2900 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 185 0x2900 2 +elementwise_binary_shared.h 205 0x2ba0 x +elementwise_binary_shared.h 211 0x2ba0 1 x +elementwise_binary_shared.h 216 0x2ba0 2 +elementwise_binary_shared.h 216 0x2ba0 3 +elementwise_binary_shared.h 216 0x2baa +elementwise_binary_shared.h 211 0x2bb8 x +elementwise_binary_shared.h 212 0x2bbc x +elementwise_binary_shared.h 212 0x2bcc +elementwise_binary_shared.h 213 0x2bd0 x +elementwise_binary_shared.h 213 0x2be0 +elementwise_binary_shared.h 214 0x2be4 x +elementwise_binary_shared.h 214 0x2bf4 +elementwise_binary_shared.h 216 0x2bf8 x +elementwise_binary_shared.h 217 0x2bfc x +elementwise_binary_shared.h 216 0x2c00 +elementwise_binary_shared.h 216 0x2c06 x +elementwise_binary_shared.h 216 0x2c0a +elementwise_binary_shared.h 216 0x2c0e + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h: +conv2d_bf16_params.h 432 0xb60 x +conv2d_bf16_params.h 438 0xb60 1 x +conv2d_bf16_params.h 452 0xb60 2 +conv2d_bf16_params.h 453 0xb60 3 +conv2d_bf16_params.h 458 0xb60 4 +conv2d_bf16_params.h 470 0xb60 5 +conv2d_bf16_params.h 438 0xb6a +conv2d_bf16_params.h 438 0xb6a 1 x +conv2d_bf16_params.h 452 0xb6a 2 +conv2d_bf16_params.h 462 0xb6a 3 +conv2d_bf16_params.h 432 0xb74 +conv2d_bf16_params.h 444 0xb74 1 +conv2d_bf16_params.h 453 0xb7e +conv2d_bf16_params.h 458 0xb7e 1 +conv2d_bf16_params.h 458 0xb7e 2 +conv2d_bf16_params.h 444 0xb8a +conv2d_bf16_params.h 470 0xb8a 1 +conv2d_bf16_params.h 477 0xb8a 2 +conv2d_bf16_params.h 557 0xb8a 3 +conv2d_bf16_params.h 452 0xb96 +conv2d_bf16_params.h 458 0xb96 1 +conv2d_bf16_params.h 462 0xb96 2 +conv2d_bf16_params.h 438 0xb9e +conv2d_bf16_params.h 438 0xba2 +conv2d_bf16_params.h 438 0xba6 +conv2d_bf16_params.h 438 0xbaa +conv2d_bf16_params.h 438 0xbb8 +conv2d_bf16_params.h 438 0xbbc +conv2d_bf16_params.h 438 0xbc0 +conv2d_bf16_params.h 438 0xbc4 +conv2d_bf16_params.h 438 0xbd2 +conv2d_bf16_params.h 438 0xbd6 +conv2d_bf16_params.h 438 0xbda +conv2d_bf16_params.h 438 0xbde +conv2d_bf16_params.h 438 0xbec +conv2d_bf16_params.h 438 0xbf0 +conv2d_bf16_params.h 444 0xbf4 x +conv2d_bf16_params.h 447 0xbf8 x +conv2d_bf16_params.h 448 0xbfc x +conv2d_bf16_params.h 452 0xc00 x +conv2d_bf16_params.h 453 0xc04 x +conv2d_bf16_params.h 458 0xc08 x +conv2d_bf16_params.h 444 0xc0e x +conv2d_bf16_params.h 458 0xc12 x +conv2d_bf16_params.h 462 0xc12 1 x +conv2d_bf16_params.h 462 0xc18 +conv2d_bf16_params.h 452 0xc1c x +conv2d_bf16_params.h 452 0xc20 +conv2d_bf16_params.h 462 0xc20 1 x +conv2d_bf16_params.h 557 0xc20 2 +conv2d_bf16_params.h 462 0xc26 +conv2d_bf16_params.h 458 0xc2a x +conv2d_bf16_params.h 458 0xc2e +conv2d_bf16_params.h 458 0xc32 +conv2d_bf16_params.h 477 0xc32 1 +conv2d_bf16_params.h 557 0xc32 2 x +conv2d_bf16_params.h 458 0xc38 x +conv2d_bf16_params.h 458 0xc3e +conv2d_bf16_params.h 477 0xc3e 1 x +conv2d_bf16_params.h 458 0xc44 x +conv2d_bf16_params.h 444 0xc48 x +conv2d_bf16_params.h 462 0xc4c x +conv2d_bf16_params.h 470 0xc50 x +conv2d_bf16_params.h 470 0xc54 +conv2d_bf16_params.h 477 0xc54 1 x +conv2d_bf16_params.h 477 0xc58 +conv2d_bf16_params.h 491 0xc68 +conv2d_bf16_params.h 492 0xc68 1 +conv2d_bf16_params.h 495 0xc68 2 +conv2d_bf16_params.h 502 0xc68 3 +conv2d_bf16_params.h 533 0xc68 4 +conv2d_bf16_params.h 539 0xc68 5 +conv2d_bf16_params.h 557 0xc68 6 +conv2d_bf16_params.h 621 0xc68 7 +conv2d_bf16_params.h 645 0xc68 8 +conv2d_bf16_params.h 709 0xc68 9 +conv2d_bf16_params.h 477 0xc72 +conv2d_bf16_params.h 481 0xc72 1 +conv2d_bf16_params.h 500 0xc72 2 +conv2d_bf16_params.h 506 0xc72 3 +conv2d_bf16_params.h 507 0xc72 4 +conv2d_bf16_params.h 524 0xc72 5 +conv2d_bf16_params.h 539 0xc72 6 +conv2d_bf16_params.h 655 0xc72 7 +conv2d_bf16_params.h 477 0xc7c +conv2d_bf16_params.h 504 0xc7c 1 +conv2d_bf16_params.h 510 0xc7c 2 +conv2d_bf16_params.h 520 0xc7c 3 +conv2d_bf16_params.h 700 0xc7c 4 +conv2d_bf16_params.h 477 0xc82 +conv2d_bf16_params.h 539 0xc82 1 +conv2d_bf16_params.h 578 0xc82 2 +conv2d_bf16_params.h 642 0xc82 3 +conv2d_bf16_params.h 529 0xc86 +conv2d_bf16_params.h 642 0xc86 1 +conv2d_bf16_params.h 642 0xc86 2 +conv2d_bf16_params.h 655 0xc8a +conv2d_bf16_params.h 453 0xc90 +conv2d_bf16_params.h 453 0xc90 1 +conv2d_bf16_params.h 477 0xc90 2 +conv2d_bf16_params.h 504 0xc90 3 +conv2d_bf16_params.h 655 0xc90 4 +conv2d_bf16_params.h 453 0xc9c x +conv2d_bf16_params.h 477 0xc9c 1 +conv2d_bf16_params.h 481 0xc9c 2 +conv2d_bf16_params.h 500 0xc9c 3 +conv2d_bf16_params.h 506 0xc9c 4 +conv2d_bf16_params.h 507 0xc9c 5 +conv2d_bf16_params.h 524 0xc9c 6 +conv2d_bf16_params.h 539 0xc9c 7 +conv2d_bf16_params.h 491 0xca6 +conv2d_bf16_params.h 492 0xca6 1 +conv2d_bf16_params.h 495 0xca6 2 +conv2d_bf16_params.h 502 0xca6 3 +conv2d_bf16_params.h 510 0xca6 4 +conv2d_bf16_params.h 520 0xca6 5 +conv2d_bf16_params.h 533 0xca6 6 +conv2d_bf16_params.h 539 0xca6 7 +conv2d_bf16_params.h 557 0xca6 8 +conv2d_bf16_params.h 621 0xca6 9 +conv2d_bf16_params.h 645 0xca6 10 +conv2d_bf16_params.h 655 0xca6 11 +conv2d_bf16_params.h 700 0xca6 12 +conv2d_bf16_params.h 709 0xca6 13 +conv2d_bf16_params.h 477 0xcb0 +conv2d_bf16_params.h 529 0xcb0 1 +conv2d_bf16_params.h 539 0xcb0 2 +conv2d_bf16_params.h 578 0xcb0 3 +conv2d_bf16_params.h 642 0xcb0 4 +conv2d_bf16_params.h 642 0xcb0 5 +conv2d_bf16_params.h 642 0xcb0 6 +conv2d_bf16_params.h 477 0xcc0 x +conv2d_bf16_params.h 495 0xcc0 1 x +conv2d_bf16_params.h 495 0xcc0 2 +conv2d_bf16_params.h 682 0xcc0 3 +conv2d_bf16_params.h 477 0xcca +conv2d_bf16_params.h 481 0xcca 1 x +conv2d_bf16_params.h 495 0xcca 2 +conv2d_bf16_params.h 495 0xcca 3 +conv2d_bf16_params.h 477 0xcd4 x +conv2d_bf16_params.h 496 0xcd4 1 +conv2d_bf16_params.h 504 0xcd4 2 +conv2d_bf16_params.h 539 0xcd4 3 +conv2d_bf16_params.h 578 0xcd4 4 +conv2d_bf16_params.h 496 0xcde +conv2d_bf16_params.h 499 0xcde 1 +conv2d_bf16_params.h 504 0xcde 2 x +conv2d_bf16_params.h 509 0xcde 3 +conv2d_bf16_params.h 519 0xcde 4 +conv2d_bf16_params.h 700 0xcde 5 +conv2d_bf16_params.h 492 0xce8 x +conv2d_bf16_params.h 497 0xce8 1 +conv2d_bf16_params.h 509 0xce8 2 +conv2d_bf16_params.h 500 0xcf2 +conv2d_bf16_params.h 520 0xcf2 1 x +conv2d_bf16_params.h 502 0xcf8 +conv2d_bf16_params.h 520 0xcf8 1 +conv2d_bf16_params.h 502 0xd02 +conv2d_bf16_params.h 507 0xd02 1 x +conv2d_bf16_params.h 495 0xd08 x +conv2d_bf16_params.h 495 0xd0c +conv2d_bf16_params.h 495 0xd0c 1 +conv2d_bf16_params.h 610 0xd0c 2 +conv2d_bf16_params.h 709 0xd0c 3 +conv2d_bf16_params.h 507 0xd12 x +conv2d_bf16_params.h 495 0xd16 x +conv2d_bf16_params.h 495 0xd1a +conv2d_bf16_params.h 506 0xd1a 1 +conv2d_bf16_params.h 519 0xd1a 2 x +conv2d_bf16_params.h 496 0xd24 x +conv2d_bf16_params.h 504 0xd24 1 x +conv2d_bf16_params.h 522 0xd24 2 +conv2d_bf16_params.h 509 0xd2e x +conv2d_bf16_params.h 496 0xd34 x +conv2d_bf16_params.h 520 0xd34 1 x +conv2d_bf16_params.h 529 0xd34 2 +conv2d_bf16_params.h 497 0xd3e x +conv2d_bf16_params.h 509 0xd3e 1 x +conv2d_bf16_params.h 533 0xd3e 2 +conv2d_bf16_params.h 539 0xd48 x +conv2d_bf16_params.h 499 0xd4c x +conv2d_bf16_params.h 499 0xd50 +conv2d_bf16_params.h 529 0xd54 x +conv2d_bf16_params.h 507 0xd58 x +conv2d_bf16_params.h 511 0xd58 1 +conv2d_bf16_params.h 491 0xd5e x +conv2d_bf16_params.h 507 0xd5e 1 +conv2d_bf16_params.h 500 0xd68 x +conv2d_bf16_params.h 511 0xd68 1 x +conv2d_bf16_params.h 500 0xd6e +conv2d_bf16_params.h 534 0xd6e 1 +conv2d_bf16_params.h 502 0xd76 x +conv2d_bf16_params.h 509 0xd76 1 x +conv2d_bf16_params.h 642 0xd76 2 +conv2d_bf16_params.h 510 0xd82 x +conv2d_bf16_params.h 506 0xd86 x +conv2d_bf16_params.h 527 0xd8a x +conv2d_bf16_params.h 502 0xd94 x +conv2d_bf16_params.h 502 0xd98 +conv2d_bf16_params.h 506 0xd9c x +conv2d_bf16_params.h 506 0xdac +conv2d_bf16_params.h 506 0xdb0 +conv2d_bf16_params.h 510 0xdb4 x +conv2d_bf16_params.h 510 0xdb8 +conv2d_bf16_params.h 510 0xdbe +conv2d_bf16_params.h 510 0xdc2 +conv2d_bf16_params.h 510 0xdc8 +conv2d_bf16_params.h 539 0xdc8 1 +conv2d_bf16_params.h 642 0xdc8 2 +conv2d_bf16_params.h 511 0xdce x +conv2d_bf16_params.h 524 0xdce 1 +conv2d_bf16_params.h 539 0xdce 2 +conv2d_bf16_params.h 512 0xdd4 x +conv2d_bf16_params.h 524 0xdd4 1 x +conv2d_bf16_params.h 524 0xdda +conv2d_bf16_params.h 524 0xdde +conv2d_bf16_params.h 520 0xde2 x +conv2d_bf16_params.h 511 0xde6 x +conv2d_bf16_params.h 522 0xde6 1 x +conv2d_bf16_params.h 524 0xdec x +conv2d_bf16_params.h 529 0xdec 1 x +conv2d_bf16_params.h 539 0xdec 2 x +conv2d_bf16_params.h 534 0xdf6 +conv2d_bf16_params.h 539 0xdf6 1 +conv2d_bf16_params.h 527 0xdfc x +conv2d_bf16_params.h 533 0xdfc 1 x +conv2d_bf16_params.h 529 0xe0a x +conv2d_bf16_params.h 533 0xe0a 1 +conv2d_bf16_params.h 539 0xe10 x +conv2d_bf16_params.h 529 0xe16 x +conv2d_bf16_params.h 529 0xe16 1 +conv2d_bf16_params.h 529 0xe1c +conv2d_bf16_params.h 534 0xe20 x +conv2d_bf16_params.h 534 0xe24 +conv2d_bf16_params.h 539 0xe24 1 x +conv2d_bf16_params.h 555 0xe24 2 +conv2d_bf16_params.h 559 0xe24 3 +conv2d_bf16_params.h 700 0xe24 4 +conv2d_bf16_params.h 669 0xe2e +conv2d_bf16_params.h 700 0xe2e 1 +conv2d_bf16_params.h 539 0xe32 +conv2d_bf16_params.h 539 0xe42 +conv2d_bf16_params.h 539 0xe52 +conv2d_bf16_params.h 539 0xe52 1 +conv2d_bf16_params.h 539 0xe52 2 +conv2d_bf16_params.h 539 0xe52 3 +conv2d_bf16_params.h 539 0xe5c +conv2d_bf16_params.h 539 0xe60 +conv2d_bf16_params.h 539 0xe64 +conv2d_bf16_params.h 539 0xe64 1 +conv2d_bf16_params.h 539 0xe6a +conv2d_bf16_params.h 539 0xe6e +conv2d_bf16_params.h 539 0xe72 +conv2d_bf16_params.h 669 0xe72 1 +conv2d_bf16_params.h 539 0xe78 +conv2d_bf16_params.h 539 0xe7c +conv2d_bf16_params.h 539 0xe80 +conv2d_bf16_params.h 539 0xe84 +conv2d_bf16_params.h 555 0xe88 x +conv2d_bf16_params.h 642 0xe90 +conv2d_bf16_params.h 669 0xe90 1 +conv2d_bf16_params.h 669 0xe90 2 +conv2d_bf16_params.h 669 0xe9a x +conv2d_bf16_params.h 497 0xe9e x +conv2d_bf16_params.h 641 0xe9e 1 x +conv2d_bf16_params.h 645 0xe9e 2 +conv2d_bf16_params.h 559 0xea8 x +conv2d_bf16_params.h 640 0xea8 1 +conv2d_bf16_params.h 642 0xea8 2 +conv2d_bf16_params.h 642 0xea8 3 +conv2d_bf16_params.h 642 0xeb2 x +conv2d_bf16_params.h 578 0xeb6 x +conv2d_bf16_params.h 640 0xeba x +conv2d_bf16_params.h 557 0xebe +conv2d_bf16_params.h 645 0xebe 1 +conv2d_bf16_params.h 641 0xec8 x +conv2d_bf16_params.h 642 0xec8 1 x +conv2d_bf16_params.h 642 0xece +conv2d_bf16_params.h 642 0xece 1 +conv2d_bf16_params.h 558 0xed2 x +conv2d_bf16_params.h 645 0xed2 1 +conv2d_bf16_params.h 540 0xed8 +conv2d_bf16_params.h 645 0xed8 1 x +conv2d_bf16_params.h 540 0xede x +conv2d_bf16_params.h 557 0xede 1 +conv2d_bf16_params.h 642 0xee4 x +conv2d_bf16_params.h 557 0xee8 x +conv2d_bf16_params.h 655 0xee8 1 +conv2d_bf16_params.h 558 0xeee +conv2d_bf16_params.h 655 0xeee 1 x +conv2d_bf16_params.h 558 0xef4 x +conv2d_bf16_params.h 540 0xef8 x +conv2d_bf16_params.h 655 0xef8 1 +conv2d_bf16_params.h 655 0xef8 2 +conv2d_bf16_params.h 679 0xef8 3 +conv2d_bf16_params.h 655 0xf02 x +conv2d_bf16_params.h 558 0xf06 x +conv2d_bf16_params.h 655 0xf06 1 +conv2d_bf16_params.h 655 0xf06 2 +conv2d_bf16_params.h 679 0xf06 3 +conv2d_bf16_params.h 655 0xf10 x +conv2d_bf16_params.h 126 0xf14 x +conv2d_bf16_params.h 559 0xf14 1 x +conv2d_bf16_params.h 669 0xf1a x +conv2d_bf16_params.h 700 0xf1a 1 +conv2d_bf16_params.h 558 0xf20 x +conv2d_bf16_params.h 700 0xf26 x +conv2d_bf16_params.h 578 0xf2a x +conv2d_bf16_params.h 559 0xf2e x +conv2d_bf16_params.h 578 0xf32 x +conv2d_bf16_params.h 610 0xf36 x +conv2d_bf16_params.h 611 0xf36 1 +conv2d_bf16_params.h 621 0xf36 2 +conv2d_bf16_params.h 621 0xf36 3 +conv2d_bf16_params.h 629 0xf36 4 +conv2d_bf16_params.h 621 0xf42 +conv2d_bf16_params.h 621 0xf42 1 x +conv2d_bf16_params.h 645 0xf42 2 +conv2d_bf16_params.h 649 0xf42 3 +conv2d_bf16_params.h 645 0xf48 +conv2d_bf16_params.h 554 0xf4e x +conv2d_bf16_params.h 645 0xf4e 1 x +conv2d_bf16_params.h 554 0xf58 +conv2d_bf16_params.h 555 0xf58 1 +conv2d_bf16_params.h 555 0xf58 2 x +conv2d_bf16_params.h 645 0xf58 3 +conv2d_bf16_params.h 555 0xf64 +conv2d_bf16_params.h 621 0xf64 1 +conv2d_bf16_params.h 621 0xf64 2 x +conv2d_bf16_params.h 645 0xf64 3 +conv2d_bf16_params.h 558 0xf6e x +conv2d_bf16_params.h 559 0xf6e 1 +conv2d_bf16_params.h 621 0xf6e 2 +conv2d_bf16_params.h 621 0xf6e 3 +conv2d_bf16_params.h 645 0xf6e 4 +conv2d_bf16_params.h 559 0xf7a x +conv2d_bf16_params.h 621 0xf7a 1 x +conv2d_bf16_params.h 645 0xf7a 2 x +conv2d_bf16_params.h 610 0xf80 x +conv2d_bf16_params.h 621 0xf80 1 +conv2d_bf16_params.h 655 0xf80 2 +conv2d_bf16_params.h 679 0xf80 3 +conv2d_bf16_params.h 621 0xf8c +conv2d_bf16_params.h 649 0xf8c 1 +conv2d_bf16_params.h 655 0xf8c 2 x +conv2d_bf16_params.h 661 0xf8c 3 +conv2d_bf16_params.h 127 0xf96 x +conv2d_bf16_params.h 127 0xf96 1 x +conv2d_bf16_params.h 621 0xf96 2 +conv2d_bf16_params.h 649 0xf96 3 +conv2d_bf16_params.h 655 0xf96 4 +conv2d_bf16_params.h 679 0xf96 5 +conv2d_bf16_params.h 710 0xf96 6 +conv2d_bf16_params.h 710 0xf96 7 +conv2d_bf16_params.h 655 0xfa0 x +conv2d_bf16_params.h 679 0xfa0 1 x +conv2d_bf16_params.h 621 0xfa6 x +conv2d_bf16_params.h 649 0xfa6 1 x +conv2d_bf16_params.h 655 0xfa6 2 +conv2d_bf16_params.h 655 0xfa6 3 +conv2d_bf16_params.h 700 0xfa6 4 +conv2d_bf16_params.h 700 0xfa6 5 +conv2d_bf16_params.h 655 0xfb0 x +conv2d_bf16_params.h 700 0xfb0 1 x +conv2d_bf16_params.h 629 0xfb4 x +conv2d_bf16_params.h 611 0xfb8 x +conv2d_bf16_params.h 643 0xfc6 x +conv2d_bf16_params.h 664 0xfca +conv2d_bf16_params.h 621 0xfd0 x +conv2d_bf16_params.h 629 0xfd0 1 +conv2d_bf16_params.h 684 0xfd0 2 x +conv2d_bf16_params.h 629 0xfda x +conv2d_bf16_params.h 127 0xfe0 x +conv2d_bf16_params.h 644 0xfe0 1 +conv2d_bf16_params.h 700 0xfe0 2 x +conv2d_bf16_params.h 705 0xfe0 3 +conv2d_bf16_params.h 705 0xfe0 4 +conv2d_bf16_params.h 645 0xfea x +conv2d_bf16_params.h 700 0xfea 1 +conv2d_bf16_params.h 700 0xfea 2 +conv2d_bf16_params.h 705 0xfea 3 +conv2d_bf16_params.h 644 0xff4 +conv2d_bf16_params.h 649 0xff4 1 x +conv2d_bf16_params.h 674 0xff4 2 +conv2d_bf16_params.h 644 0xffe x +conv2d_bf16_params.h 662 0xffe 1 +conv2d_bf16_params.h 664 0xffe 2 x +conv2d_bf16_params.h 127 0x1008 x +conv2d_bf16_params.h 663 0x1008 1 x +conv2d_bf16_params.h 664 0x1008 2 +conv2d_bf16_params.h 126 0x100e x +conv2d_bf16_params.h 664 0x100e 1 x +conv2d_bf16_params.h 126 0x1014 +conv2d_bf16_params.h 664 0x1014 1 +conv2d_bf16_params.h 127 0x101a x +conv2d_bf16_params.h 127 0x101a 1 x +conv2d_bf16_params.h 664 0x101a 2 +conv2d_bf16_params.h 664 0x101a 3 +conv2d_bf16_params.h 675 0x101a 4 +conv2d_bf16_params.h 696 0x101a 5 +conv2d_bf16_params.h 644 0x1024 x +conv2d_bf16_params.h 664 0x1024 1 x +conv2d_bf16_params.h 705 0x1024 2 +conv2d_bf16_params.h 664 0x102e +conv2d_bf16_params.h 705 0x102e 1 x +conv2d_bf16_params.h 705 0x102e 2 x +conv2d_bf16_params.h 127 0x1034 +conv2d_bf16_params.h 674 0x1034 1 x +conv2d_bf16_params.h 675 0x1034 2 x +conv2d_bf16_params.h 682 0x1034 3 +conv2d_bf16_params.h 718 0x1034 4 +conv2d_bf16_params.h 720 0x1034 5 +conv2d_bf16_params.h 127 0x103e x +conv2d_bf16_params.h 642 0x103e 1 +conv2d_bf16_params.h 675 0x103e 2 +conv2d_bf16_params.h 675 0x1048 x +conv2d_bf16_params.h 707 0x1048 1 x +conv2d_bf16_params.h 642 0x104e +conv2d_bf16_params.h 674 0x104e 1 x +conv2d_bf16_params.h 675 0x104e 2 +conv2d_bf16_params.h 642 0x1058 x +conv2d_bf16_params.h 655 0x1058 1 +conv2d_bf16_params.h 655 0x1058 2 +conv2d_bf16_params.h 675 0x1058 3 x +conv2d_bf16_params.h 679 0x1058 4 +conv2d_bf16_params.h 679 0x1058 5 +conv2d_bf16_params.h 655 0x1064 x +conv2d_bf16_params.h 679 0x1064 1 x +conv2d_bf16_params.h 713 0x1064 2 +conv2d_bf16_params.h 691 0x106a x +conv2d_bf16_params.h 675 0x106e +conv2d_bf16_params.h 675 0x106e 1 x +conv2d_bf16_params.h 709 0x106e 2 x +conv2d_bf16_params.h 675 0x1078 +conv2d_bf16_params.h 706 0x1078 1 x +conv2d_bf16_params.h 706 0x1078 2 +conv2d_bf16_params.h 709 0x1078 3 +conv2d_bf16_params.h 682 0x1084 x +conv2d_bf16_params.h 706 0x1084 1 +conv2d_bf16_params.h 126 0x108a x +conv2d_bf16_params.h 696 0x108a 1 x +conv2d_bf16_params.h 127 0x1090 x +conv2d_bf16_params.h 127 0x1090 1 x +conv2d_bf16_params.h 696 0x1090 2 +conv2d_bf16_params.h 696 0x1096 x +conv2d_bf16_params.h 713 0x1096 1 x +conv2d_bf16_params.h 696 0x109c +conv2d_bf16_params.h 706 0x109c 1 +conv2d_bf16_params.h 706 0x109c 2 x +conv2d_bf16_params.h 706 0x10a6 +conv2d_bf16_params.h 696 0x10aa x +conv2d_bf16_params.h 707 0x10aa 1 x +conv2d_bf16_params.h 696 0x10b0 +conv2d_bf16_params.h 709 0x10b0 1 x +conv2d_bf16_params.h 696 0x10b6 x +conv2d_bf16_params.h 709 0x10b6 1 +conv2d_bf16_params.h 707 0x10c0 x +conv2d_bf16_params.h 708 0x10c0 1 +conv2d_bf16_params.h 710 0x10c0 2 x +conv2d_bf16_params.h 710 0x10c0 3 x +conv2d_bf16_params.h 708 0x10cc x +conv2d_bf16_params.h 713 0x10cc 1 x +conv2d_bf16_params.h 709 0x10d6 x +conv2d_bf16_params.h 800 0x10d6 1 x +conv2d_bf16_params.h 710 0x10dc x +conv2d_bf16_params.h 718 0x10e4 x +conv2d_bf16_params.h 718 0x10e8 +conv2d_bf16_params.h 720 0x10ec x +conv2d_bf16_params.h 800 0x10ec 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 526 0x1100 +utils.h 531 0x1100 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 689 0x1100 2 x +conv2d_bf16.h 698 0x1100 3 +conv2d_bf16.h 704 0x1100 4 +conv2d_bf16.h 707 0x1100 5 +conv2d_bf16.h 707 0x1100 6 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 526 0x110c +utils.h 526 0x110c 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 698 0x110c 2 x +conv2d_bf16.h 704 0x110c 3 x +conv2d_bf16.h 707 0x110c 4 +conv2d_bf16.h 707 0x110c 5 +conv2d_bf16.h 698 0x111a +conv2d_bf16.h 702 0x111a 1 +conv2d_bf16.h 698 0x1124 +conv2d_bf16.h 702 0x1124 1 x +conv2d_bf16.h 699 0x112e x +conv2d_bf16.h 702 0x112e 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 531 0x1138 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 702 0x1138 1 x +conv2d_bf16.h 702 0x113e + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 531 0x1146 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 705 0x1146 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 526 0x114c x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 704 0x1150 x +conv2d_bf16.h 702 0x1154 x +conv2d_bf16.h 705 0x1154 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 526 0x115a x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 707 0x115a 1 +conv2d_bf16.h 707 0x115a 2 +conv2d_bf16.h 704 0x1160 x +conv2d_bf16.h 705 0x1166 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 350 0x1170 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 526 0x1170 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 705 0x1170 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 350 0x1180 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 706 0x1180 1 x +conv2d_bf16.h 704 0x1190 x +conv2d_bf16.h 705 0x11a0 x +conv2d_bf16.h 707 0x11a0 1 x +conv2d_bf16.h 707 0x11a0 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 350 0x11b0 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 526 0x11b0 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 705 0x11b0 2 +conv2d_bf16.h 708 0x11b0 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 350 0x11c0 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 531 0x11c0 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 706 0x11c0 2 x +conv2d_bf16.h 707 0x11d2 x +conv2d_bf16.h 707 0x11d2 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 350 0x11d6 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 705 0x11d6 1 x +conv2d_bf16.h 708 0x11d6 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 350 0x11de + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 706 0x11de 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 531 0x11e2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 350 0x11e6 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 705 0x11e6 1 x +conv2d_bf16.h 707 0x11e6 2 x +conv2d_bf16.h 707 0x11e6 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 350 0x11ee + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 706 0x11ee 1 x +conv2d_bf16.h 708 0x11ee 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 531 0x11f6 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 707 0x11fa x +conv2d_bf16.h 707 0x11fa 1 x +conv2d_bf16.h 723 0x11fa 2 x +conv2d_bf16.h 708 0x1200 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/common/utils.h: +utils.h 531 0x1204 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x1210 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1836 0x1210 1 +conv2d_bf16.h 1836 0x1210 2 x +conv2d_bf16.h 1836 0x1210 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h: +conv2d_bf16_params.h 240 0x1210 4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1836 0x121e + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h: +conv2d_bf16_params.h 241 0x121e 1 +conv2d_bf16_params.h 242 0x121e 2 +conv2d_bf16_params.h 250 0x121e 3 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 866 0x122a +conv2d_bf16.h 876 0x122a 1 +conv2d_bf16.h 876 0x122a 2 +conv2d_bf16.h 881 0x122a 3 +conv2d_bf16.h 1836 0x122a 4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h: +conv2d_bf16_params.h 242 0x122a 5 +conv2d_bf16_params.h 242 0x122a 6 +conv2d_bf16_params.h 242 0x122a 7 +conv2d_bf16_params.h 242 0x122a 8 +conv2d_bf16_params.h 242 0x122a 9 +conv2d_bf16_params.h 243 0x122a 10 +conv2d_bf16_params.h 245 0x122a 11 +conv2d_bf16_params.h 250 0x122a 12 +conv2d_bf16_params.h 250 0x122a 13 +conv2d_bf16_params.h 240 0x1236 +conv2d_bf16_params.h 240 0x1236 1 x +conv2d_bf16_params.h 242 0x1242 +conv2d_bf16_params.h 245 0x1242 1 +conv2d_bf16_params.h 242 0x124e +conv2d_bf16_params.h 244 0x124e 1 +conv2d_bf16_params.h 244 0x124e 2 +conv2d_bf16_params.h 249 0x124e 3 +conv2d_bf16_params.h 243 0x125a +conv2d_bf16_params.h 244 0x125a 1 +conv2d_bf16_params.h 250 0x125a 2 +conv2d_bf16_params.h 244 0x1266 +conv2d_bf16_params.h 240 0x1274 +conv2d_bf16_params.h 240 0x1278 +conv2d_bf16_params.h 241 0x1278 1 x +conv2d_bf16_params.h 242 0x127e x +conv2d_bf16_params.h 242 0x127e 1 x +conv2d_bf16_params.h 245 0x1284 x +conv2d_bf16_params.h 242 0x1292 x +conv2d_bf16_params.h 242 0x1296 +conv2d_bf16_params.h 242 0x129a +conv2d_bf16_params.h 241 0x129e x +conv2d_bf16_params.h 242 0x129e 1 +conv2d_bf16_params.h 242 0x12a4 x +conv2d_bf16_params.h 242 0x12a8 +conv2d_bf16_params.h 242 0x12ac +conv2d_bf16_params.h 242 0x12b0 +conv2d_bf16_params.h 242 0x12b0 1 +conv2d_bf16_params.h 242 0x12b6 +conv2d_bf16_params.h 243 0x12ba x +conv2d_bf16_params.h 242 0x12be x +conv2d_bf16_params.h 243 0x12be 1 +conv2d_bf16_params.h 244 0x12c4 x +conv2d_bf16_params.h 245 0x12c4 1 x +conv2d_bf16_params.h 244 0x12d6 +conv2d_bf16_params.h 244 0x12d6 1 +conv2d_bf16_params.h 245 0x12dc +conv2d_bf16_params.h 244 0x12e2 +conv2d_bf16_params.h 244 0x12e6 +conv2d_bf16_params.h 244 0x12ea +conv2d_bf16_params.h 244 0x12ee +conv2d_bf16_params.h 244 0x12f2 +conv2d_bf16_params.h 245 0x12f6 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 876 0x1308 +conv2d_bf16.h 876 0x1308 1 +conv2d_bf16.h 1849 0x1316 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h: +conv2d_bf16_params.h 250 0x1320 x +conv2d_bf16_params.h 250 0x1320 1 +conv2d_bf16_params.h 250 0x132c +conv2d_bf16_params.h 250 0x1330 +conv2d_bf16_params.h 250 0x1334 +conv2d_bf16_params.h 250 0x1338 +conv2d_bf16_params.h 250 0x1338 1 +conv2d_bf16_params.h 250 0x133e +conv2d_bf16_params.h 249 0x1342 x +conv2d_bf16_params.h 249 0x1346 +conv2d_bf16_params.h 250 0x134a x +conv2d_bf16_params.h 258 0x1350 x +conv2d_bf16_params.h 259 0x1368 +conv2d_bf16_params.h 259 0x136e x +conv2d_bf16_params.h 259 0x1372 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1841 0x1380 x +conv2d_bf16.h 1849 0x1380 1 +conv2d_bf16.h 1849 0x1380 2 x +conv2d_bf16.h 876 0x138a +conv2d_bf16.h 881 0x138a 1 +conv2d_bf16.h 1841 0x138a 2 +conv2d_bf16.h 1842 0x138a 3 +conv2d_bf16.h 1842 0x138a 4 +conv2d_bf16.h 1842 0x138a 5 +conv2d_bf16.h 1845 0x138a 6 +conv2d_bf16.h 1849 0x138a 7 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h: +conv2d_bf16_params.h 243 0x138a 8 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1842 0x1394 x +conv2d_bf16.h 1842 0x1394 1 +conv2d_bf16.h 1849 0x1394 2 +conv2d_bf16.h 862 0x13a0 +conv2d_bf16.h 1842 0x13a0 1 +conv2d_bf16.h 1845 0x13a0 2 +conv2d_bf16.h 1845 0x13ac x +conv2d_bf16.h 862 0x13b0 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x13b4 x +io_buffer_main.h 125 0x13b8 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1841 0x13b8 1 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x13be x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1842 0x13c2 x +conv2d_bf16.h 1845 0x13c8 x +conv2d_bf16.h 866 0x13cc x +conv2d_bf16.h 866 0x13d0 +conv2d_bf16.h 1842 0x13d6 x +conv2d_bf16.h 1842 0x13d6 1 x +conv2d_bf16.h 1842 0x13dc +conv2d_bf16.h 1845 0x13dc 1 x +conv2d_bf16.h 1841 0x13e2 x +conv2d_bf16.h 881 0x13ea +conv2d_bf16.h 885 0x13ea 1 +conv2d_bf16.h 1845 0x13ee x +conv2d_bf16.h 867 0x13f2 +conv2d_bf16.h 867 0x13f8 +conv2d_bf16.h 867 0x13f8 1 x +conv2d_bf16.h 867 0x1400 +conv2d_bf16.h 867 0x1406 +conv2d_bf16.h 867 0x1412 +conv2d_bf16.h 867 0x1412 1 +conv2d_bf16.h 867 0x1418 +conv2d_bf16.h 867 0x141c +conv2d_bf16.h 867 0x1422 +conv2d_bf16.h 867 0x142a +conv2d_bf16.h 881 0x1440 +conv2d_bf16.h 883 0x1440 1 +conv2d_bf16.h 884 0x1440 2 +conv2d_bf16.h 876 0x144c x +conv2d_bf16.h 876 0x144c 1 x +conv2d_bf16.h 881 0x144c 2 x +conv2d_bf16.h 883 0x144c 3 +conv2d_bf16.h 884 0x144c 4 +conv2d_bf16.h 885 0x1458 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h: +conv2d_bf16_params.h 243 0x1458 1 +conv2d_bf16_params.h 243 0x1458 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 883 0x1462 x +conv2d_bf16.h 884 0x1468 x +conv2d_bf16.h 876 0x146e x +conv2d_bf16.h 876 0x1472 +conv2d_bf16.h 881 0x1476 x +conv2d_bf16.h 881 0x147a + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h: +conv2d_bf16_params.h 243 0x147a 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 881 0x1480 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16_params.h: +conv2d_bf16_params.h 243 0x1480 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 885 0x1490 +conv2d_bf16.h 885 0x1494 x +conv2d_bf16.h 885 0x149e +conv2d_bf16.h 885 0x14a2 +conv2d_bf16.h 885 0x14a6 +conv2d_bf16.h 896 0x14b0 +conv2d_bf16.h 1115 0x14b0 1 +conv2d_bf16.h 1115 0x14b0 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x14ba +vector.hpp 1152 0x14ba 1 +vector.hpp 1152 0x14ba 2 +vector.hpp 1152 0x14ba 3 +vector.hpp 1152 0x14ba 4 +vector.hpp 1152 0x14ba 5 +vector.hpp 1152 0x14ba 6 +vector.hpp 1152 0x14ba 7 +vector.hpp 1152 0x14ba 8 +vector.hpp 1152 0x14ba 9 +vector.hpp 1152 0x14ba 10 +vector.hpp 1152 0x14ba 11 +vector.hpp 1152 0x14ba 12 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x14ba 13 +accum.hpp 149 0x14ba 14 +accum.hpp 149 0x14ba 15 +accum.hpp 149 0x14ba 16 +accum.hpp 149 0x14ba 17 +accum.hpp 149 0x14ba 18 +accum.hpp 149 0x14ba 19 +accum.hpp 149 0x14ba 20 +accum.hpp 149 0x14ba 21 +accum.hpp 149 0x14ba 22 +accum.hpp 149 0x14ba 23 +accum.hpp 149 0x14ba 24 +accum.hpp 149 0x14ba 25 +accum.hpp 149 0x14ba 26 +accum.hpp 149 0x14ba 27 +accum.hpp 149 0x14ba 28 +accum.hpp 1110 0x14ba 29 +accum.hpp 1110 0x14ba 30 +accum.hpp 1110 0x14ba 31 +accum.hpp 1110 0x14ba 32 +accum.hpp 1110 0x14ba 33 +accum.hpp 1110 0x14ba 34 +accum.hpp 1110 0x14ba 35 +accum.hpp 1110 0x14ba 36 +accum.hpp 1110 0x14ba 37 +accum.hpp 1110 0x14ba 38 +accum.hpp 1110 0x14ba 39 +accum.hpp 1110 0x14ba 40 +accum.hpp 1110 0x14ba 41 +accum.hpp 1110 0x14ba 42 +accum.hpp 1110 0x14ba 43 +accum.hpp 1110 0x14ba 44 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 886 0x14ba 45 +conv2d_bf16.h 896 0x14ba 46 x +conv2d_bf16.h 1123 0x14ba 47 +conv2d_bf16.h 896 0x14c0 +conv2d_bf16.h 896 0x14c4 +conv2d_bf16.h 896 0x14c8 +conv2d_bf16.h 896 0x14cc +conv2d_bf16.h 896 0x14d0 +conv2d_bf16.h 896 0x14d4 +conv2d_bf16.h 897 0x14d8 x +conv2d_bf16.h 897 0x14dc +conv2d_bf16.h 897 0x14e0 +conv2d_bf16.h 897 0x14e4 +conv2d_bf16.h 897 0x14e8 +conv2d_bf16.h 897 0x14ec +conv2d_bf16.h 897 0x14f0 +conv2d_bf16.h 898 0x14f4 x +conv2d_bf16.h 898 0x14f8 +conv2d_bf16.h 898 0x14fc +conv2d_bf16.h 898 0x1500 +conv2d_bf16.h 898 0x1504 +conv2d_bf16.h 898 0x1508 +conv2d_bf16.h 1115 0x150c x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 56 0x1510 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 898 0x1514 x +conv2d_bf16.h 1115 0x1520 x +conv2d_bf16.h 1115 0x1524 +conv2d_bf16.h 886 0x152a +conv2d_bf16.h 886 0x1530 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 56 0x1534 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1123 0x153c +conv2d_bf16.h 1123 0x153c 1 +conv2d_bf16.h 1123 0x153c 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1546 +aie_core.h 100 0x1546 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1546 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1546 3 +accum.hpp 946 0x1546 4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1123 0x1546 5 +conv2d_bf16.h 1125 0x1546 6 +conv2d_bf16.h 1154 0x1546 7 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1550 +aie_core.h 100 0x1550 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1550 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1550 3 +accum.hpp 946 0x1550 4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1123 0x1550 5 +conv2d_bf16.h 1125 0x1550 6 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x155a +aie_core.h 100 0x155a 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x155a 2 +vector.hpp 1152 0x155a 3 +vector.hpp 1152 0x155a 4 +vector.hpp 1152 0x155a 5 +vector.hpp 1152 0x155a 6 +vector.hpp 1152 0x155a 7 +vector.hpp 1152 0x155a 8 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x155a 9 +accum.hpp 149 0x155a 10 +accum.hpp 149 0x155a 11 +accum.hpp 149 0x155a 12 +accum.hpp 149 0x155a 13 +accum.hpp 149 0x155a 14 +accum.hpp 149 0x155a 15 +accum.hpp 149 0x155a 16 +accum.hpp 578 0x155a 17 +accum.hpp 946 0x155a 18 +accum.hpp 1110 0x155a 19 +accum.hpp 1110 0x155a 20 +accum.hpp 1110 0x155a 21 +accum.hpp 1110 0x155a 22 +accum.hpp 1110 0x155a 23 +accum.hpp 1110 0x155a 24 +accum.hpp 1110 0x155a 25 +accum.hpp 1110 0x155a 26 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 746 0x155a 27 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1566 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 738 0x1566 1 +conv2d_bf16.h 1187 0x1566 2 +conv2d_bf16.h 1199 0x1566 3 +conv2d_bf16.h 1200 0x1566 4 +conv2d_bf16.h 1201 0x1566 5 +conv2d_bf16.h 1202 0x1566 6 +conv2d_bf16.h 1143 0x1572 +conv2d_bf16.h 1218 0x1572 1 +conv2d_bf16.h 749 0x157c +conv2d_bf16.h 750 0x157c 1 +conv2d_bf16.h 751 0x157c 2 +conv2d_bf16.h 752 0x157c 3 +conv2d_bf16.h 1123 0x157c 4 +conv2d_bf16.h 736 0x1586 +conv2d_bf16.h 738 0x1586 1 +conv2d_bf16.h 1123 0x1586 2 +conv2d_bf16.h 1873 0x1586 3 +conv2d_bf16.h 1125 0x1592 x +conv2d_bf16.h 1125 0x1596 +conv2d_bf16.h 1125 0x159a +conv2d_bf16.h 1149 0x159e x +conv2d_bf16.h 1154 0x15a2 x +conv2d_bf16.h 743 0x15a6 x +conv2d_bf16.h 745 0x15aa x +conv2d_bf16.h 746 0x15ae x +conv2d_bf16.h 1125 0x15ae 1 x +conv2d_bf16.h 1143 0x15b4 x +conv2d_bf16.h 1206 0x15b8 x +conv2d_bf16.h 1149 0x15bc +conv2d_bf16.h 1154 0x15c4 +conv2d_bf16.h 1125 0x15c8 x +conv2d_bf16.h 1149 0x15cc x +conv2d_bf16.h 1154 0x15d0 x +conv2d_bf16.h 1287 0x15d6 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x15e0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x15e0 1 x +accum.hpp 946 0x15e0 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 736 0x15e0 3 +conv2d_bf16.h 738 0x15e0 4 +conv2d_bf16.h 1147 0x15e0 5 x +conv2d_bf16.h 1187 0x15e0 6 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x15ec + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x15ec 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x15ec 2 +accum.hpp 946 0x15ec 3 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 736 0x15ec 4 x +conv2d_bf16.h 738 0x15ec 5 x +conv2d_bf16.h 1188 0x15ec 6 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x15f8 +aie_core.h 100 0x15f8 1 +aie_core.h 100 0x15f8 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x15f8 3 +vector.hpp 1139 0x15f8 4 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x15f8 5 +accum.hpp 578 0x15f8 6 +accum.hpp 946 0x15f8 7 +accum.hpp 946 0x15f8 8 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 737 0x15f8 9 x +conv2d_bf16.h 742 0x15f8 10 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1604 +aie_core.h 100 0x1604 1 +aie_core.h 100 0x1604 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1604 3 +vector.hpp 1139 0x1604 4 +vector.hpp 1139 0x1604 5 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1604 6 +accum.hpp 578 0x1604 7 +accum.hpp 578 0x1604 8 x +accum.hpp 946 0x1604 9 +accum.hpp 946 0x1604 10 +accum.hpp 946 0x1604 11 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 740 0x1604 12 x +conv2d_bf16.h 1149 0x1604 13 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1610 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1610 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1610 2 +accum.hpp 946 0x1610 3 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x1610 4 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 743 0x1610 5 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x161a x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x161a 1 x +accum.hpp 946 0x161a 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 736 0x161a 3 x +conv2d_bf16.h 1152 0x161a 4 x +conv2d_bf16.h 1206 0x161a 5 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1626 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1626 1 +accum.hpp 946 0x1626 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 737 0x1626 3 x +conv2d_bf16.h 1154 0x1626 4 x +conv2d_bf16.h 1206 0x1626 5 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1632 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1632 1 x +accum.hpp 946 0x1632 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 740 0x1632 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1638 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1638 1 +accum.hpp 946 0x1638 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x1638 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1157 0x1638 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x163e x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x163e 1 x +accum.hpp 946 0x163e 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 736 0x163e 3 x +conv2d_bf16.h 1159 0x163e 4 x +conv2d_bf16.h 737 0x1644 x +conv2d_bf16.h 738 0x1644 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x164a x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x164a 1 x +accum.hpp 946 0x164a 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 740 0x164a 3 x +conv2d_bf16.h 1192 0x164a 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1650 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1650 1 +accum.hpp 946 0x1650 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x1650 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 745 0x1650 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x165a +vector.hpp 1139 0x165a 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x165a 2 +accum.hpp 578 0x165a 3 x +accum.hpp 946 0x165a 4 +accum.hpp 946 0x165a 5 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 736 0x165a 6 x +conv2d_bf16.h 746 0x165a 7 x +conv2d_bf16.h 1162 0x165a 8 +conv2d_bf16.h 737 0x1666 x +conv2d_bf16.h 742 0x1666 1 x +conv2d_bf16.h 749 0x1666 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1670 x +aie_core.h 143 0x1670 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1670 2 x +vector.hpp 1152 0x1670 3 +vector.hpp 1152 0x1670 4 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1670 5 x +accum.hpp 946 0x1670 6 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 750 0x1670 7 x +conv2d_bf16.h 1286 0x1670 8 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x167e + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x167e 1 +vector.hpp 1139 0x167e 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x167e 3 +accum.hpp 578 0x167e 4 +accum.hpp 946 0x167e 5 +accum.hpp 946 0x167e 6 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 745 0x167e 7 x +conv2d_bf16.h 751 0x167e 8 x +conv2d_bf16.h 1162 0x167e 9 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x168c + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x168c 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x168c 2 +accum.hpp 946 0x168c 3 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 743 0x168c 4 x +conv2d_bf16.h 746 0x168c 5 x +conv2d_bf16.h 1199 0x168c 6 x +conv2d_bf16.h 738 0x169a x +conv2d_bf16.h 1200 0x169a 1 x +conv2d_bf16.h 742 0x16a2 x +conv2d_bf16.h 1201 0x16a2 1 x +conv2d_bf16.h 743 0x16aa x +conv2d_bf16.h 752 0x16aa 1 x +conv2d_bf16.h 738 0x16b2 x +conv2d_bf16.h 740 0x16b2 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x16b8 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 736 0x16bc x +conv2d_bf16.h 742 0x16bc 1 x +conv2d_bf16.h 1202 0x16bc 2 x +conv2d_bf16.h 1206 0x16bc 3 x +conv2d_bf16.h 737 0x16c8 x +conv2d_bf16.h 743 0x16c8 1 x +conv2d_bf16.h 749 0x16c8 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x16d2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 738 0x16d2 1 x +conv2d_bf16.h 740 0x16d2 2 x +conv2d_bf16.h 751 0x16d2 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x16e0 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 745 0x16e0 1 x +conv2d_bf16.h 750 0x16e0 2 x +conv2d_bf16.h 736 0x16f0 x +conv2d_bf16.h 742 0x16f0 1 x +conv2d_bf16.h 746 0x16f0 2 x +conv2d_bf16.h 752 0x16f0 3 x +conv2d_bf16.h 737 0x1700 x +conv2d_bf16.h 743 0x1700 1 x +conv2d_bf16.h 749 0x1700 2 x +conv2d_bf16.h 738 0x1710 x +conv2d_bf16.h 740 0x1710 1 x +conv2d_bf16.h 751 0x1710 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x1720 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 745 0x1720 1 x +conv2d_bf16.h 750 0x1720 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1730 +aie_core.h 100 0x1730 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1730 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1730 3 +accum.hpp 946 0x1730 4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 742 0x1730 5 x +conv2d_bf16.h 746 0x1730 6 x +conv2d_bf16.h 752 0x1730 7 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x173e + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x173e 1 +vector.hpp 1152 0x173e 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 743 0x173e 3 x +conv2d_bf16.h 749 0x173e 4 x +conv2d_bf16.h 1286 0x173e 5 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x174c + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x174c 1 +vector.hpp 1152 0x174c 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 751 0x174c 3 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x1756 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 745 0x1756 1 x +conv2d_bf16.h 750 0x1756 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x1760 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 746 0x1760 1 x +conv2d_bf16.h 752 0x1760 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x176a +vector.hpp 1152 0x176a 1 +vector.hpp 1152 0x176a 2 +vector.hpp 1152 0x176a 3 +vector.hpp 1152 0x176a 4 +vector.hpp 1152 0x176a 5 +vector.hpp 1152 0x176a 6 +vector.hpp 1152 0x176a 7 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 749 0x176a 8 x +conv2d_bf16.h 1285 0x176a 9 x +conv2d_bf16.h 1286 0x176a 10 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1776 +aie_core.h 100 0x1776 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1776 2 +vector.hpp 1152 0x1776 3 +vector.hpp 1152 0x1776 4 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1776 5 +accum.hpp 946 0x1776 6 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 751 0x1776 7 x +conv2d_bf16.h 746 0x1780 x +conv2d_bf16.h 750 0x1780 1 x +conv2d_bf16.h 745 0x1788 x +conv2d_bf16.h 752 0x1788 1 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x1790 +aie_core.h 143 0x1794 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 750 0x1794 1 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x179c + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 749 0x179c 1 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x17a4 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 752 0x17a4 1 x +conv2d_bf16.h 1286 0x17a4 2 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x17ae + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x17ae 1 +vector.hpp 1152 0x17ae 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 751 0x17ae 3 x +conv2d_bf16.h 1286 0x17ae 4 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x17ba + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x17ba 1 +vector.hpp 1152 0x17ba 2 +vector.hpp 1152 0x17ba 3 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x17ba 4 +accum.hpp 946 0x17ba 5 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x17c2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1287 0x17c2 1 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x17ca + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x17ca 1 x +accum.hpp 1110 0x17ca 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x17d2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x17d2 1 +accum.hpp 1110 0x17d2 2 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x17da x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 736 0x17da 1 +conv2d_bf16.h 1287 0x17da 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x17e4 x +accum.hpp 1110 0x17e4 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1287 0x17e4 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x17ec +accum.hpp 1110 0x17ec 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1218 0x17ec 2 x +conv2d_bf16.h 1287 0x17ec 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x17f8 x +accum.hpp 1110 0x17f8 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 738 0x17f8 2 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x1800 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x1800 1 +accum.hpp 1110 0x1800 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1808 +vector.hpp 1152 0x1808 1 +vector.hpp 1152 0x1808 2 +vector.hpp 1152 0x1808 3 +vector.hpp 1152 0x1808 4 +vector.hpp 1152 0x1808 5 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x1808 6 +accum.hpp 1110 0x1808 7 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1812 +vector.hpp 1152 0x1812 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x1812 2 x +accum.hpp 1110 0x1812 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1286 0x1812 4 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x181a +aie_core.h 143 0x181a 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x181a 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x181a 3 +accum.hpp 946 0x181a 4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1187 0x181a 5 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1822 x +max_min.hpp 20 0x1826 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x182a x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x182a 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1832 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1832 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x183a x +vector.hpp 1152 0x1844 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1844 1 x +max_min.hpp 20 0x184c + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1850 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1850 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1858 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1858 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1860 x +vector.hpp 1152 0x1870 +vector.hpp 1152 0x1874 +vector.hpp 1152 0x1878 +vector.hpp 1152 0x187c +vector.hpp 1152 0x1880 +vector.hpp 1152 0x1884 +vector.hpp 1152 0x1888 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x1890 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1890 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1143 0x1890 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x189c + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x189c 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x189c 2 +accum.hpp 946 0x189c 3 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 100 0x18a0 +aie_core.h 100 0x18a4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x18a4 1 +vector.hpp 1152 0x18a4 2 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x18aa +aie_core.h 143 0x18c0 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 792 0x18c0 1 +conv2d_bf16.h 1364 0x18c0 2 +conv2d_bf16.h 1364 0x18c0 3 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x18cc +aie_core.h 143 0x18cc 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x18cc 2 +vector.hpp 1152 0x18cc 3 +vector.hpp 1152 0x18cc 4 +vector.hpp 1152 0x18cc 5 +vector.hpp 1152 0x18cc 6 +vector.hpp 1152 0x18cc 7 +vector.hpp 1152 0x18cc 8 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x18cc 9 +accum.hpp 149 0x18cc 10 +accum.hpp 149 0x18cc 11 +accum.hpp 149 0x18cc 12 +accum.hpp 149 0x18cc 13 +accum.hpp 149 0x18cc 14 +accum.hpp 149 0x18cc 15 +accum.hpp 149 0x18cc 16 +accum.hpp 1110 0x18cc 17 +accum.hpp 1110 0x18cc 18 +accum.hpp 1110 0x18cc 19 +accum.hpp 1110 0x18cc 20 +accum.hpp 1110 0x18cc 21 +accum.hpp 1110 0x18cc 22 +accum.hpp 1110 0x18cc 23 +accum.hpp 1110 0x18cc 24 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1369 0x18cc 25 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x18d8 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 807 0x18d8 1 +conv2d_bf16.h 808 0x18d8 2 +conv2d_bf16.h 809 0x18d8 3 +conv2d_bf16.h 810 0x18d8 4 +conv2d_bf16.h 1436 0x18d8 5 +conv2d_bf16.h 1437 0x18d8 6 +conv2d_bf16.h 1438 0x18d8 7 +conv2d_bf16.h 1439 0x18d8 8 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x18e2 +aie_core.h 143 0x18e2 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 802 0x18e2 2 +conv2d_bf16.h 1428 0x18e2 3 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x18ee +aie_core.h 143 0x18ee 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 792 0x18ee 2 +conv2d_bf16.h 794 0x18ee 3 + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x18fa + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 794 0x18fa 1 +conv2d_bf16.h 1455 0x18fa 2 +conv2d_bf16.h 1337 0x1904 +conv2d_bf16.h 1364 0x190e x +conv2d_bf16.h 1873 0x190e 1 +conv2d_bf16.h 1364 0x1914 +conv2d_bf16.h 1369 0x1918 x +conv2d_bf16.h 799 0x191c x +conv2d_bf16.h 801 0x1920 x +conv2d_bf16.h 802 0x1924 x +conv2d_bf16.h 1337 0x1928 x +conv2d_bf16.h 1443 0x192c x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1932 +vector.hpp 1152 0x1932 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1369 0x1932 2 +conv2d_bf16.h 1364 0x1936 +conv2d_bf16.h 1518 0x1936 1 +conv2d_bf16.h 1364 0x193a +conv2d_bf16.h 1364 0x193e x +conv2d_bf16.h 1369 0x1942 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1948 +vector.hpp 1152 0x1948 1 +vector.hpp 1139 0x1950 +vector.hpp 1139 0x1950 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1950 2 +accum.hpp 578 0x1950 3 +accum.hpp 578 0x1950 4 x +accum.hpp 946 0x1950 5 +accum.hpp 946 0x1950 6 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 792 0x1950 7 x +conv2d_bf16.h 1362 0x1950 8 x +conv2d_bf16.h 1429 0x1950 9 +conv2d_bf16.h 1443 0x1950 10 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x195e + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x195e 1 +accum.hpp 946 0x195e 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 793 0x195e 3 x +conv2d_bf16.h 1364 0x195e 4 x +conv2d_bf16.h 1443 0x195e 5 +conv2d_bf16.h 794 0x196a x +conv2d_bf16.h 795 0x196a 1 x +conv2d_bf16.h 1428 0x196a 2 x +conv2d_bf16.h 1443 0x196a 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1976 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1976 1 +accum.hpp 578 0x1976 2 +accum.hpp 946 0x1976 3 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 796 0x1976 4 x +conv2d_bf16.h 799 0x1976 5 x +conv2d_bf16.h 1429 0x1976 6 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1980 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1980 1 x +accum.hpp 946 0x1980 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 797 0x1980 3 x +conv2d_bf16.h 1367 0x1980 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1986 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1986 1 +accum.hpp 946 0x1986 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x1986 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1369 0x1986 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x198c x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x198c 1 x +accum.hpp 946 0x198c 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 792 0x198c 3 x +conv2d_bf16.h 1372 0x198c 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1992 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1992 1 +accum.hpp 946 0x1992 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 793 0x1992 3 x +conv2d_bf16.h 1374 0x1992 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1998 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x1998 1 x +accum.hpp 946 0x1998 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 795 0x1998 3 x +conv2d_bf16.h 1377 0x1998 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x199e + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x199e 1 +accum.hpp 946 0x199e 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 796 0x199e 3 x +conv2d_bf16.h 1379 0x199e 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x19a4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x19a4 1 x +accum.hpp 946 0x19a4 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 797 0x19a4 3 x +conv2d_bf16.h 1429 0x19a4 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x19aa + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 578 0x19aa 1 +accum.hpp 946 0x19aa 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x19aa 3 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 801 0x19aa 4 x +conv2d_bf16.h 1429 0x19aa 5 +conv2d_bf16.h 792 0x19b4 x +conv2d_bf16.h 794 0x19b4 1 x +conv2d_bf16.h 802 0x19b4 2 x +conv2d_bf16.h 793 0x19be x +conv2d_bf16.h 799 0x19be 1 x +conv2d_bf16.h 803 0x19be 2 x +conv2d_bf16.h 807 0x19be 3 x +conv2d_bf16.h 794 0x19ca x +conv2d_bf16.h 804 0x19ca 1 x +conv2d_bf16.h 808 0x19ca 2 x +conv2d_bf16.h 809 0x19d4 x +conv2d_bf16.h 810 0x19d8 x +conv2d_bf16.h 795 0x19dc x +conv2d_bf16.h 802 0x19dc 1 x +conv2d_bf16.h 1437 0x19dc 2 x +conv2d_bf16.h 796 0x19e6 x +conv2d_bf16.h 1436 0x19e6 1 x +conv2d_bf16.h 797 0x19ee x +conv2d_bf16.h 1438 0x19ee 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x19f6 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 799 0x19f6 1 x +conv2d_bf16.h 1439 0x19f6 2 x +conv2d_bf16.h 792 0x1a00 x +conv2d_bf16.h 801 0x1a00 1 x +conv2d_bf16.h 793 0x1a06 x +conv2d_bf16.h 804 0x1a06 1 x +conv2d_bf16.h 808 0x1a06 2 x +conv2d_bf16.h 795 0x1a10 x +conv2d_bf16.h 803 0x1a10 1 x +conv2d_bf16.h 807 0x1a10 2 x +conv2d_bf16.h 796 0x1a1a x +conv2d_bf16.h 810 0x1a1a 1 x +conv2d_bf16.h 794 0x1a22 x +conv2d_bf16.h 797 0x1a22 1 x +conv2d_bf16.h 809 0x1a22 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/kernel_helpers.h: +kernel_helpers.h 978 0x1a30 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 799 0x1a30 1 x +conv2d_bf16.h 802 0x1a30 2 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x1a40 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1a40 1 +vector.hpp 1152 0x1a40 2 +vector.hpp 1152 0x1a40 3 +vector.hpp 1152 0x1a40 4 +vector.hpp 1152 0x1a40 5 +vector.hpp 1152 0x1a40 6 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 801 0x1a40 7 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1a4c +vector.hpp 1152 0x1a4c 1 +vector.hpp 1152 0x1a4c 2 +vector.hpp 1152 0x1a4c 3 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 804 0x1a4c 4 x +conv2d_bf16.h 808 0x1a4c 5 x +conv2d_bf16.h 1517 0x1a4c 6 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1a58 +vector.hpp 1152 0x1a58 1 +vector.hpp 1152 0x1a58 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 803 0x1a58 3 x +conv2d_bf16.h 807 0x1a58 4 x +conv2d_bf16.h 1518 0x1a58 5 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x1a64 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 810 0x1a64 1 x + +/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/aie_core.h: +aie_core.h 143 0x1a6c x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 809 0x1a6c 1 x +conv2d_bf16.h 1428 0x1a6c 2 +conv2d_bf16.h 801 0x1a76 x +conv2d_bf16.h 802 0x1a7a x +conv2d_bf16.h 803 0x1a7e x +conv2d_bf16.h 807 0x1a7e 1 x +conv2d_bf16.h 804 0x1a86 x +conv2d_bf16.h 808 0x1a86 1 x +conv2d_bf16.h 809 0x1a8e x +conv2d_bf16.h 810 0x1a92 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x1a9a x +accum.hpp 1110 0x1a9a 1 x +accum.hpp 149 0x1a9e +accum.hpp 1110 0x1a9e 1 +accum.hpp 149 0x1aa2 +accum.hpp 1110 0x1aa2 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1455 0x1aa2 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 149 0x1aac x +accum.hpp 1110 0x1aac 1 x +accum.hpp 149 0x1ab0 +accum.hpp 1110 0x1ab0 1 +accum.hpp 149 0x1ab4 +accum.hpp 1110 0x1ab4 1 +accum.hpp 149 0x1ab8 +accum.hpp 1110 0x1ab8 1 +accum.hpp 149 0x1abc +accum.hpp 1110 0x1abc 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1ac0 x +max_min.hpp 20 0x1ac4 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1ac8 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1ac8 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1ad0 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1ad0 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1ad8 x +vector.hpp 1152 0x1ae2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1ae2 1 x +max_min.hpp 20 0x1aea + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1aee x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1aee 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1af6 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x1af6 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1152 0x1b00 x +vector.hpp 1152 0x1b10 +vector.hpp 1152 0x1b14 +vector.hpp 1152 0x1b18 +vector.hpp 1152 0x1b1c +vector.hpp 1152 0x1b20 +vector.hpp 1152 0x1b24 +vector.hpp 1152 0x1b28 +vector.hpp 1152 0x1b30 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/conv/conv2d_bf16.h: +conv2d_bf16.h 1337 0x1b30 1 x +conv2d_bf16.h 1873 0x1b68 x +conv2d_bf16.h 1873 0x1b6c + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 74 0x1b80 x +superkernels.cpp 79 0x1b80 1 +superkernels.cpp 81 0x1b80 2 +superkernels.cpp 79 0x1b8a x +superkernels.cpp 81 0x1b8a 1 +superkernels.cpp 74 0x1b94 +superkernels.cpp 79 0x1ba6 +superkernels.cpp 79 0x1ba6 1 +superkernels.cpp 81 0x1bbc +superkernels.cpp 113 0x1bc2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x1bc2 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 81 0x1bcc + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x1bcc 1 +tile.hpp 86 0x1bcc 2 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 81 0x1bdc x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x1be4 +tile.hpp 74 0x1be8 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 113 0x1bec + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 74 0x1bec 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 81 0x1bf4 +superkernels.cpp 81 0x1c00 +superkernels.cpp 87 0x1c04 +superkernels.cpp 87 0x1c04 1 x +superkernels.cpp 88 0x1c0e x +superkernels.cpp 89 0x1c0e 1 +superkernels.cpp 88 0x1c18 +superkernels.cpp 88 0x1c1e +superkernels.cpp 87 0x1c26 x +superkernels.cpp 113 0x1c26 1 +superkernels.cpp 88 0x1c2e x +superkernels.cpp 88 0x1c34 +superkernels.cpp 89 0x1c3a x +superkernels.cpp 89 0x1c40 +superkernels.cpp 113 0x1c40 1 +superkernels.cpp 106 0x1c50 +superkernels.cpp 113 0x1c50 1 +superkernels.cpp 117 0x1c50 2 +superkernels.cpp 136 0x1c50 3 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x1c50 4 +io_buffer_main.h 324 0x1c50 5 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 106 0x1c5a x +superkernels.cpp 108 0x1c5a 1 +superkernels.cpp 107 0x1c64 +superkernels.cpp 108 0x1c64 1 x +superkernels.cpp 139 0x1c64 2 +superkernels.cpp 140 0x1c64 3 +superkernels.cpp 107 0x1c6e x +superkernels.cpp 110 0x1c7a x +superkernels.cpp 110 0x1c7a 1 x +superkernels.cpp 108 0x1c80 x +superkernels.cpp 107 0x1c84 x +superkernels.cpp 108 0x1c84 1 +superkernels.cpp 106 0x1c8a x +superkernels.cpp 106 0x1c8e +superkernels.cpp 107 0x1c92 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x1c96 x +io_buffer_main.h 218 0x1c9a +io_buffer_main.h 218 0x1c9e +io_buffer_main.h 218 0x1ca2 +io_buffer_main.h 235 0x1ca8 x +io_buffer_main.h 218 0x1cb4 x +io_buffer_main.h 218 0x1cb4 1 x +io_buffer_main.h 218 0x1cb8 +io_buffer_main.h 395 0x1cbc +io_buffer_main.h 395 0x1cc6 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 113 0x1cd0 x +superkernels.cpp 113 0x1cd6 +superkernels.cpp 113 0x1ce2 +superkernels.cpp 117 0x1cf0 x +superkernels.cpp 117 0x1cf0 1 +superkernels.cpp 117 0x1cfa +superkernels.cpp 117 0x1d0c +superkernels.cpp 117 0x1d10 +superkernels.cpp 136 0x1d16 +superkernels.cpp 140 0x1d16 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x1d22 x +io_buffer_main.h 327 0x1d22 1 +io_buffer_main.h 425 0x1d22 2 +io_buffer_main.h 324 0x1d28 +io_buffer_main.h 425 0x1d38 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 136 0x1d3c x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x1d3c 1 x +io_buffer_main.h 327 0x1d4e +io_buffer_main.h 327 0x1d52 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 139 0x1d60 x +superkernels.cpp 139 0x1d60 1 +superkernels.cpp 139 0x1d6a +superkernels.cpp 142 0x1d72 +superkernels.cpp 139 0x1d7e +superkernels.cpp 139 0x1d82 +superkernels.cpp 140 0x1d94 x +superkernels.cpp 142 0x1da4 x +superkernels.cpp 142 0x1da8 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 141 0x1dc0 x +elementwise_binary.h 142 0x1dc0 1 +elementwise_binary.h 144 0x1dc0 2 x +elementwise_binary.h 141 0x1dc6 +elementwise_binary.h 141 0x1dca +elementwise_binary.h 142 0x1dce x +elementwise_binary.h 142 0x1dd2 +elementwise_binary.h 130 0x1de0 x +elementwise_binary.h 133 0x1de0 1 x +elementwise_binary.h 130 0x1de4 +elementwise_binary.h 133 0x1df8 x +elementwise_binary.h 134 0x1dfc x +elementwise_binary.h 134 0x1e0c +elementwise_binary.h 135 0x1e10 x +elementwise_binary.h 135 0x1e20 +elementwise_binary.h 136 0x1e24 x +elementwise_binary.h 137 0x1e2c x +elementwise_binary.h 136 0x1e38 x +elementwise_binary.h 137 0x1e3c +elementwise_binary.h 137 0x1e40 +elementwise_binary.h 139 0x1e40 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h: +add_impl.h 146 0x1e40 2 +add_impl.h 146 0x1e4a +add_impl.h 147 0x1e4a 1 +add_impl.h 147 0x1e4a 2 +add_impl.h 146 0x1e54 x +add_impl.h 147 0x1e54 1 +add_impl.h 147 0x1e5e x +add_impl.h 147 0x1e66 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 139 0x1e6a x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h: +add_impl.h 147 0x1e6e + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 139 0x1e72 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h: +add_impl.h 147 0x1e78 x +add_impl.h 147 0x1e7c + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h: +elementwise_binary_broadcasting.h 81 0x1e90 +elementwise_binary_broadcasting.h 81 0x1e90 1 x +elementwise_binary_broadcasting.h 82 0x1e90 2 +elementwise_binary_broadcasting.h 82 0x1e90 3 +elementwise_binary_broadcasting.h 83 0x1e90 4 +elementwise_binary_broadcasting.h 81 0x1e9a +elementwise_binary_broadcasting.h 82 0x1e9a 1 +elementwise_binary_broadcasting.h 82 0x1ea0 x +elementwise_binary_broadcasting.h 84 0x1eae x +elementwise_binary_broadcasting.h 82 0x1eb2 x +elementwise_binary_broadcasting.h 83 0x1eb6 x +elementwise_binary_broadcasting.h 82 0x1eba x +elementwise_binary_broadcasting.h 83 0x1eba 1 +elementwise_binary_broadcasting.h 82 0x1ec0 +elementwise_binary_broadcasting.h 82 0x1ec4 +elementwise_binary_broadcasting.h 76 0x1ed0 +elementwise_binary_broadcasting.h 76 0x1ed0 1 x +elementwise_binary_broadcasting.h 77 0x1eda x +elementwise_binary_broadcasting.h 78 0x1ee4 +elementwise_binary_broadcasting.h 78 0x1ef4 +elementwise_binary_broadcasting.h 78 0x1ef8 x +elementwise_binary_broadcasting.h 78 0x1efe +elementwise_binary_broadcasting.h 79 0x1f02 x +elementwise_binary_broadcasting.h 89 0x1f10 x +elementwise_binary_broadcasting.h 96 0x1f10 1 x +elementwise_binary_broadcasting.h 102 0x1f10 2 +elementwise_binary_broadcasting.h 102 0x1f16 x +elementwise_binary_broadcasting.h 117 0x1f16 1 +elementwise_binary_broadcasting.h 102 0x1f28 +elementwise_binary_broadcasting.h 102 0x1f28 1 +elementwise_binary_broadcasting.h 96 0x1f2e +elementwise_binary_broadcasting.h 96 0x1f32 x +elementwise_binary_broadcasting.h 103 0x1f3c x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 56 0x1f50 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1f56 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h: +elementwise_binary_broadcasting.h 106 0x1f60 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../aie2/broadcast.hpp: +broadcast.hpp 56 0x1f70 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x1f76 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1f80 +add_accum.hpp 19 0x1f80 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h: +elementwise_binary_broadcasting.h 117 0x1f80 2 x +elementwise_binary_broadcasting.h 117 0x1f80 3 x +elementwise_binary_broadcasting.h 117 0x1f8a +elementwise_binary_broadcasting.h 117 0x1f8a 1 +elementwise_binary_broadcasting.h 117 0x1f94 +elementwise_binary_broadcasting.h 117 0x1f9a +elementwise_binary_broadcasting.h 117 0x1fa0 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1fa8 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1fa8 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x1fa8 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1fac + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1fac 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 189 0x1fac 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1fb0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1fb0 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x1fb0 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1fb4 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1fb4 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 213 0x1fb4 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1fb8 x +vector.hpp 1159 0x1fb8 1 +vector.hpp 1159 0x1fb8 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1fb8 3 x +accum.hpp 1110 0x1fb8 4 +accum.hpp 1110 0x1fb8 5 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x1fb8 6 x +elementwise_binary.h 195 0x1fb8 7 +elementwise_binary.h 218 0x1fb8 8 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1fbe + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1fbe 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1fbe 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 189 0x1fbe 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1fc6 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1fc6 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x1fc6 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1fca + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1fca 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1fca 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 213 0x1fca 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1fd2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1fd2 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x1fd2 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1fd6 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1fd6 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1fd6 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 189 0x1fd6 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1fde x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1fde 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x1fde 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1fe2 +vector.hpp 1159 0x1fe2 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1fe2 2 +accum.hpp 1110 0x1fe2 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1fe2 4 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 213 0x1fe2 5 x +elementwise_binary.h 218 0x1fe2 6 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1ff0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1ff0 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x1ff0 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x1ff4 +vector.hpp 1159 0x1ff4 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x1ff4 2 +accum.hpp 1110 0x1ff4 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x1ff4 4 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 189 0x1ff4 5 x +elementwise_binary.h 195 0x1ff4 6 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2000 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x2000 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x2000 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2010 +vector.hpp 1159 0x2010 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 946 0x2010 2 +accum.hpp 1110 0x2010 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x2010 4 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 213 0x2010 5 x +elementwise_binary.h 218 0x2010 6 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2022 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2022 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x2022 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 195 0x2022 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x202c x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x202c 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2p/add_accum.hpp: +add_accum.hpp 19 0x202c 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 218 0x202c 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2036 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2036 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 195 0x2036 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h: +elementwise_binary_broadcasting.h 121 0x2036 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x203e x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x203e 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 218 0x203e 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2044 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2044 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 195 0x2044 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_attribute_broadcasting.h: +ise_binary_attribute_broadcasting.h 82 0x2050 +ise_binary_attribute_broadcasting.h 82 0x2050 1 x +ise_binary_attribute_broadcasting.h 90 0x2056 +ise_binary_attribute_broadcasting.h 90 0x205e x +ise_binary_attribute_broadcasting.h 117 0x205e 1 +ise_binary_attribute_broadcasting.h 92 0x2066 x +ise_binary_attribute_broadcasting.h 92 0x2066 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 538 0x2076 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector_native_types.hpp: +vector_native_types.hpp 374 0x2076 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_attribute_broadcasting.h: +ise_binary_attribute_broadcasting.h 117 0x2082 x +ise_binary_attribute_broadcasting.h 92 0x2088 +ise_binary_attribute_broadcasting.h 92 0x208e x +ise_binary_attribute_broadcasting.h 92 0x2092 +ise_binary_attribute_broadcasting.h 117 0x2092 1 +ise_binary_attribute_broadcasting.h 117 0x2098 +ise_binary_attribute_broadcasting.h 118 0x20a0 +ise_binary_attribute_broadcasting.h 118 0x20b0 x +ise_binary_attribute_broadcasting.h 118 0x20b4 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 147 0x20d0 x +superkernels.cpp 152 0x20d0 1 +superkernels.cpp 152 0x20d6 x +superkernels.cpp 147 0x20dc +superkernels.cpp 149 0x20ea + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x20f4 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 152 0x20fc +superkernels.cpp 152 0x20fc 1 +superkernels.cpp 149 0x2102 x +superkernels.cpp 149 0x2106 +superkernels.cpp 149 0x210e + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x210e 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 155 0x2116 +superkernels.cpp 166 0x2116 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x211c +tile.hpp 74 0x2122 +tile.hpp 86 0x2122 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 155 0x212e x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x2138 +tile.hpp 74 0x213c +tile.hpp 74 0x2140 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 159 0x2150 +superkernels.cpp 159 0x2156 x +superkernels.cpp 159 0x2156 1 +superkernels.cpp 157 0x2160 +superkernels.cpp 159 0x2160 1 +superkernels.cpp 166 0x2160 2 +superkernels.cpp 157 0x216a x +superkernels.cpp 159 0x216a 1 +superkernels.cpp 164 0x216a 2 +superkernels.cpp 157 0x217e +superkernels.cpp 159 0x2186 x +superkernels.cpp 157 0x218a x +superkernels.cpp 159 0x2190 x +superkernels.cpp 164 0x21a0 +superkernels.cpp 166 0x21a0 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x21b0 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 163 0x21b8 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x21b8 1 +io_buffer_main.h 218 0x21c2 +io_buffer_main.h 218 0x21c6 +io_buffer_main.h 235 0x21ca x +io_buffer_main.h 218 0x21d8 x +io_buffer_main.h 218 0x21d8 1 x +io_buffer_main.h 218 0x21dc +io_buffer_main.h 395 0x21e0 +io_buffer_main.h 395 0x21ea x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 164 0x21ee +superkernels.cpp 163 0x21f8 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x21fc x +io_buffer_main.h 324 0x21fc 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 164 0x2202 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x2206 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 166 0x220c x +superkernels.cpp 163 0x2214 x +superkernels.cpp 163 0x2218 +superkernels.cpp 164 0x221c x +superkernels.cpp 164 0x2220 +superkernels.cpp 168 0x2230 +superkernels.cpp 169 0x2230 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x2230 2 x +io_buffer_main.h 327 0x223a +io_buffer_main.h 425 0x223a 1 +io_buffer_main.h 425 0x2248 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 168 0x224c + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x224c 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 168 0x2256 x +superkernels.cpp 168 0x225a + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x2266 x +io_buffer_main.h 327 0x226a + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 168 0x226e x +superkernels.cpp 168 0x2272 +superkernels.cpp 169 0x2282 +superkernels.cpp 169 0x2286 x +superkernels.cpp 171 0x2290 +superkernels.cpp 171 0x22a4 x +superkernels.cpp 171 0x22ac + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 124 0x22c0 x +elementwise_unary.h 126 0x22c0 1 x +elementwise_unary.h 126 0x22d0 x +elementwise_unary.h 127 0x22d4 x +elementwise_unary.h 127 0x22e4 +elementwise_unary.h 128 0x22e8 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/clip_impl.h: +clip_impl.h 113 0x22ec x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 128 0x22fa x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/clip_impl.h: +clip_impl.h 113 0x22fe x +clip_impl.h 114 0x230e x +clip_impl.h 114 0x2312 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 130 0x2316 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2330 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 136 0x2330 1 x +elementwise_unary.h 142 0x2330 2 +elementwise_unary.h 154 0x2330 3 x +elementwise_unary.h 171 0x2330 4 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x233c x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 142 0x233c 1 +elementwise_unary.h 154 0x233c 2 x +elementwise_unary.h 190 0x233c 3 x +elementwise_unary.h 136 0x2348 +elementwise_unary.h 136 0x234c x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/clip_impl.h: +clip_impl.h 103 0x2350 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 142 0x2354 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2358 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 171 0x2358 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/clip_impl.h: +clip_impl.h 104 0x2358 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2364 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 190 0x2364 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x236c x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x236c 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 142 0x236c 2 x +elementwise_unary.h 171 0x236c 3 x +elementwise_unary.h 154 0x2376 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x237e x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2382 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x2382 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 190 0x2382 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x2390 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x2390 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 195 0x2390 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x23a0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x23b0 x +vector.hpp 1159 0x23b0 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x23b0 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 171 0x23b0 3 x +elementwise_unary.h 176 0x23b0 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x23c0 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x23c0 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 190 0x23c0 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x23d0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x23d0 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 195 0x23d0 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x23e0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x23f0 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x23f0 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 176 0x23f0 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x23f8 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x23fc x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x23fc 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 195 0x23fc 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x2404 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 158 0x2404 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x240a x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 21 0x240a 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 176 0x240a 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/max_min.hpp: +max_min.hpp 20 0x2412 x +max_min.hpp 21 0x2416 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x241a x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 195 0x241a 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x241e + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_unary.h: +elementwise_unary.h 176 0x241e 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 176 0x2430 x +superkernels.cpp 181 0x2430 1 +superkernels.cpp 181 0x2436 x +superkernels.cpp 176 0x243c +superkernels.cpp 178 0x244a + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2454 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 181 0x245c +superkernels.cpp 181 0x245c 1 +superkernels.cpp 178 0x2462 x +superkernels.cpp 178 0x2466 +superkernels.cpp 178 0x246e + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x246e 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 184 0x2476 +superkernels.cpp 195 0x2476 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x247c +tile.hpp 74 0x2482 +tile.hpp 86 0x2482 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 184 0x248e x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x2498 +tile.hpp 74 0x249c +tile.hpp 74 0x24a0 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 188 0x24b0 +superkernels.cpp 188 0x24b6 x +superkernels.cpp 188 0x24b6 1 +superkernels.cpp 186 0x24c0 +superkernels.cpp 188 0x24c0 1 +superkernels.cpp 195 0x24c0 2 +superkernels.cpp 186 0x24ca x +superkernels.cpp 188 0x24ca 1 +superkernels.cpp 193 0x24ca 2 +superkernels.cpp 186 0x24de +superkernels.cpp 188 0x24e6 x +superkernels.cpp 186 0x24ea x +superkernels.cpp 188 0x24f0 x +superkernels.cpp 193 0x2500 +superkernels.cpp 195 0x2500 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2510 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 192 0x2518 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2518 1 +io_buffer_main.h 218 0x2522 +io_buffer_main.h 218 0x2526 +io_buffer_main.h 235 0x252a x +io_buffer_main.h 218 0x2538 x +io_buffer_main.h 218 0x2538 1 x +io_buffer_main.h 218 0x253c +io_buffer_main.h 395 0x2540 +io_buffer_main.h 395 0x254a x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 193 0x254e +superkernels.cpp 192 0x2558 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x255c x +io_buffer_main.h 324 0x255c 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 193 0x2562 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x2566 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 195 0x256c x +superkernels.cpp 192 0x2574 x +superkernels.cpp 192 0x2578 +superkernels.cpp 193 0x257c x +superkernels.cpp 193 0x2580 +superkernels.cpp 197 0x2590 +superkernels.cpp 198 0x2590 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x2590 2 x +io_buffer_main.h 327 0x259a +io_buffer_main.h 425 0x259a 1 +io_buffer_main.h 425 0x25a8 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 197 0x25ac + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x25ac 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 197 0x25b6 x +superkernels.cpp 197 0x25ba + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x25c6 x +io_buffer_main.h 327 0x25ca + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 197 0x25ce x +superkernels.cpp 197 0x25d2 +superkernels.cpp 198 0x25e2 +superkernels.cpp 198 0x25e6 x +superkernels.cpp 200 0x25f0 +superkernels.cpp 200 0x2604 x +superkernels.cpp 200 0x260c + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 219 0x26a0 +elementwise_binary_shared.h 219 0x26a0 1 x +elementwise_binary_shared.h 220 0x26aa x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h: +mul_impl.h 193 0x26b4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 222 0x26c0 +elementwise_binary_shared.h 222 0x26d2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h: +mul_impl.h 193 0x26dc + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 222 0x26e0 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h: +mul_impl.h 193 0x26e0 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 538 0x2910 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 237 0x2910 1 x +elementwise_binary_shared.h 244 0x2910 2 +elementwise_binary_shared.h 245 0x2910 3 +elementwise_binary_shared.h 247 0x2910 4 +elementwise_binary_shared.h 250 0x2910 5 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 538 0x291a x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 244 0x291a 1 x +elementwise_binary_shared.h 245 0x291a 2 +elementwise_binary_shared.h 247 0x291a 3 +elementwise_binary_shared.h 244 0x292c +elementwise_binary_shared.h 244 0x292c 1 +elementwise_binary_shared.h 237 0x2932 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 538 0x2940 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector_native_types.hpp: +vector_native_types.hpp 374 0x2940 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 247 0x2946 x +elementwise_binary_shared.h 245 0x2970 x +elementwise_binary_shared.h 245 0x2976 +elementwise_binary_shared.h 245 0x2976 1 +elementwise_binary_shared.h 250 0x2990 +elementwise_binary_shared.h 250 0x2994 x +elementwise_binary_shared.h 250 0x2998 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 205 0x29b0 x +superkernels.cpp 210 0x29b0 1 +superkernels.cpp 210 0x29b6 x +superkernels.cpp 205 0x29bc +superkernels.cpp 207 0x29ca + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x29d4 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 210 0x29dc +superkernels.cpp 210 0x29dc 1 +superkernels.cpp 207 0x29e2 x +superkernels.cpp 207 0x29e6 +superkernels.cpp 207 0x29ee + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x29ee 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 213 0x29f6 +superkernels.cpp 224 0x29f6 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x29fc +tile.hpp 74 0x2a02 +tile.hpp 86 0x2a02 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 213 0x2a0e x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x2a18 +tile.hpp 74 0x2a1c +tile.hpp 74 0x2a20 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 217 0x2a30 +superkernels.cpp 217 0x2a36 x +superkernels.cpp 217 0x2a36 1 +superkernels.cpp 215 0x2a40 +superkernels.cpp 217 0x2a40 1 +superkernels.cpp 224 0x2a40 2 +superkernels.cpp 215 0x2a4a x +superkernels.cpp 217 0x2a4a 1 +superkernels.cpp 222 0x2a4a 2 +superkernels.cpp 215 0x2a5e +superkernels.cpp 217 0x2a66 x +superkernels.cpp 215 0x2a6a x +superkernels.cpp 217 0x2a70 x +superkernels.cpp 222 0x2a80 +superkernels.cpp 224 0x2a80 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2a90 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 221 0x2a98 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2a98 1 +io_buffer_main.h 218 0x2aa2 +io_buffer_main.h 218 0x2aa6 +io_buffer_main.h 235 0x2aaa x +io_buffer_main.h 218 0x2ab8 x +io_buffer_main.h 218 0x2ab8 1 x +io_buffer_main.h 218 0x2abc +io_buffer_main.h 395 0x2ac0 +io_buffer_main.h 395 0x2aca x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 222 0x2ace +superkernels.cpp 221 0x2ad8 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x2adc x +io_buffer_main.h 324 0x2adc 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 222 0x2ae2 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x2ae6 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 224 0x2aec x +superkernels.cpp 221 0x2af4 x +superkernels.cpp 221 0x2af8 +superkernels.cpp 222 0x2afc x +superkernels.cpp 222 0x2b00 +superkernels.cpp 226 0x2b10 +superkernels.cpp 227 0x2b10 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x2b10 2 x +io_buffer_main.h 327 0x2b1a +io_buffer_main.h 425 0x2b1a 1 +io_buffer_main.h 425 0x2b28 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 226 0x2b2c + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x2b2c 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 226 0x2b36 x +superkernels.cpp 226 0x2b3a + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x2b46 x +io_buffer_main.h 327 0x2b4a + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 226 0x2b4e x +superkernels.cpp 226 0x2b52 +superkernels.cpp 227 0x2b62 +superkernels.cpp 227 0x2b66 x +superkernels.cpp 229 0x2b70 +superkernels.cpp 229 0x2b84 x +superkernels.cpp 229 0x2b8c + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 219 0x2c20 +elementwise_binary_shared.h 219 0x2c20 1 x +elementwise_binary_shared.h 220 0x2c2a x +elementwise_binary_shared.h 220 0x2c38 +elementwise_binary_shared.h 220 0x2c40 +elementwise_binary_shared.h 222 0x2c40 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h: +add_impl.h 146 0x2c40 2 +add_impl.h 146 0x2c4a +add_impl.h 147 0x2c4a 1 +add_impl.h 147 0x2c4a 2 +add_impl.h 146 0x2c54 x +add_impl.h 147 0x2c54 1 +add_impl.h 147 0x2c5e x +add_impl.h 147 0x2c66 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 222 0x2c6a x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h: +add_impl.h 147 0x2c6e + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 222 0x2c72 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/add_impl.h: +add_impl.h 147 0x2c78 x +add_impl.h 147 0x2c7c + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_shared.h: +elementwise_binary_shared.h 227 0x2c90 x +elementwise_binary_shared.h 232 0x2c90 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 332 0x2ca0 x +superkernels.cpp 337 0x2ca0 1 +superkernels.cpp 337 0x2ca6 x +superkernels.cpp 332 0x2cac +superkernels.cpp 334 0x2cb2 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2cb2 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 337 0x2cce x +superkernels.cpp 337 0x2cce 1 x +superkernels.cpp 334 0x2cd4 x +superkernels.cpp 334 0x2cd8 +superkernels.cpp 334 0x2cde + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2ce6 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 340 0x2cea +superkernels.cpp 342 0x2cea 1 +superkernels.cpp 344 0x2cea 2 +superkernels.cpp 356 0x2cea 3 +superkernels.cpp 340 0x2cf4 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x2cf4 1 +tile.hpp 74 0x2cfe +tile.hpp 86 0x2cfe 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 340 0x2d0a x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x2d14 +tile.hpp 74 0x2d18 +tile.hpp 74 0x2d1c x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 344 0x2d20 +superkernels.cpp 344 0x2d20 1 x +superkernels.cpp 344 0x2d2a +superkernels.cpp 344 0x2d2a 1 +superkernels.cpp 353 0x2d2a 2 +superkernels.cpp 342 0x2d34 x +superkernels.cpp 345 0x2d34 1 +superkernels.cpp 354 0x2d34 2 +superkernels.cpp 342 0x2d4a +superkernels.cpp 344 0x2d50 x +superkernels.cpp 342 0x2d54 x +superkernels.cpp 344 0x2d58 x +superkernels.cpp 345 0x2d5c x +superkernels.cpp 353 0x2d60 +superkernels.cpp 354 0x2d66 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2d70 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 348 0x2d74 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2d74 1 +io_buffer_main.h 218 0x2d7e +io_buffer_main.h 218 0x2d82 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 348 0x2d86 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 235 0x2d8a x +io_buffer_main.h 218 0x2d96 x +io_buffer_main.h 218 0x2d96 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 348 0x2d9a x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x2d9a 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 348 0x2da0 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 395 0x2da4 +io_buffer_main.h 395 0x2da4 1 +io_buffer_main.h 395 0x2dae x +io_buffer_main.h 218 0x2db2 x +io_buffer_main.h 218 0x2dba +io_buffer_main.h 218 0x2dbe +io_buffer_main.h 218 0x2dc2 +io_buffer_main.h 235 0x2dc6 x +io_buffer_main.h 218 0x2dd4 x +io_buffer_main.h 218 0x2dd4 1 x +io_buffer_main.h 218 0x2dd8 +io_buffer_main.h 395 0x2de4 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 353 0x2de8 +superkernels.cpp 354 0x2de8 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x2de8 2 +io_buffer_main.h 125 0x2df6 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 353 0x2dfa x +superkernels.cpp 354 0x2e00 x +superkernels.cpp 356 0x2e00 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x2e06 x +io_buffer_main.h 125 0x2e0a +io_buffer_main.h 327 0x2e0e +io_buffer_main.h 327 0x2e0e 1 +io_buffer_main.h 125 0x2e14 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 356 0x2e1a x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x2e20 +io_buffer_main.h 327 0x2e20 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 353 0x2e24 x +superkernels.cpp 354 0x2e28 x +superkernels.cpp 354 0x2e2c +superkernels.cpp 353 0x2e30 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x2e40 x +io_buffer_main.h 327 0x2e40 1 +io_buffer_main.h 327 0x2e40 2 +io_buffer_main.h 327 0x2e40 3 +io_buffer_main.h 327 0x2e40 4 +io_buffer_main.h 425 0x2e40 5 +io_buffer_main.h 425 0x2e40 6 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 360 0x2e4a + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 425 0x2e5a x +io_buffer_main.h 327 0x2e5e x +io_buffer_main.h 324 0x2e62 +io_buffer_main.h 327 0x2e70 +io_buffer_main.h 324 0x2e74 x +io_buffer_main.h 327 0x2e74 1 +io_buffer_main.h 425 0x2e86 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 360 0x2e8a +superkernels.cpp 361 0x2e8a 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x2e8a 2 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 360 0x2e94 x +superkernels.cpp 360 0x2e98 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x2ea4 x +io_buffer_main.h 327 0x2ea8 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 360 0x2eac x +superkernels.cpp 360 0x2eb0 +superkernels.cpp 361 0x2ec0 +superkernels.cpp 361 0x2ec4 x +superkernels.cpp 363 0x2ed0 +superkernels.cpp 363 0x2ee6 x +superkernels.cpp 363 0x2eee + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 141 0x2f00 x +elementwise_binary.h 142 0x2f00 1 +elementwise_binary.h 144 0x2f00 2 x +elementwise_binary.h 141 0x2f06 +elementwise_binary.h 141 0x2f0a +elementwise_binary.h 142 0x2f0e x +elementwise_binary.h 142 0x2f12 +elementwise_binary.h 130 0x2f20 x +elementwise_binary.h 133 0x2f20 1 x +elementwise_binary.h 130 0x2f24 +elementwise_binary.h 133 0x2f36 x +elementwise_binary.h 134 0x2f3a x +elementwise_binary.h 134 0x2f4a +elementwise_binary.h 135 0x2f4e x +elementwise_binary.h 135 0x2f5e +elementwise_binary.h 136 0x2f62 x +elementwise_binary.h 137 0x2f6a x +elementwise_binary.h 136 0x2f78 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h: +mul_impl.h 134 0x2f7c + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 139 0x2f80 +elementwise_binary.h 139 0x2f92 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h: +mul_impl.h 134 0x2f9c + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 139 0x2fa0 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/../../include/misc/mul_impl.h: +mul_impl.h 134 0x2fa0 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 149 0x2fb0 x +elementwise_binary.h 156 0x2fb0 1 +elementwise_binary.h 168 0x2fb0 2 x +elementwise_binary.h 156 0x2fba x +elementwise_binary.h 168 0x2fba 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x2fc4 +mul_acc32_fp.hpp 36 0x2fc4 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 156 0x2fc4 2 +elementwise_binary.h 156 0x2fc4 3 +elementwise_binary.h 156 0x2fce +elementwise_binary.h 156 0x2fce 1 +elementwise_binary.h 156 0x2fd8 +elementwise_binary.h 156 0x2fe2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2fe6 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 168 0x2fe6 1 +elementwise_binary.h 187 0x2fe6 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2fec +vector.hpp 1139 0x2fec 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 189 0x2fec 2 x +elementwise_binary.h 211 0x2fec 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2ff2 x +vector.hpp 1139 0x2ff2 1 x +vector.hpp 1159 0x2ff2 2 +vector.hpp 1159 0x2ff2 3 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x2ff2 4 +accum.hpp 1110 0x2ff2 5 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x2ff2 6 x +elementwise_binary.h 195 0x2ff2 7 +elementwise_binary.h 213 0x2ff2 8 x +elementwise_binary.h 218 0x2ff2 9 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x2ffa +vector.hpp 1139 0x2ffa 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 189 0x2ffa 2 x +elementwise_binary.h 211 0x2ffa 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3000 x +vector.hpp 1139 0x3000 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x3000 2 x +elementwise_binary.h 213 0x3000 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3006 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 189 0x3006 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x300a x +vector.hpp 1139 0x300a 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x300a 2 x +elementwise_binary.h 213 0x300a 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3010 +vector.hpp 1139 0x3010 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x3010 2 x +elementwise_binary.h 189 0x3010 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3016 x +vector.hpp 1139 0x3016 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x3016 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x3016 3 x +elementwise_binary.h 213 0x3016 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3020 +vector.hpp 1139 0x3020 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x3020 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x3020 3 x +elementwise_binary.h 189 0x3020 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x302a x +vector.hpp 1139 0x302a 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x302a 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x302a 3 x +elementwise_binary.h 213 0x302a 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3034 +vector.hpp 1139 0x3034 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x3034 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x3034 3 x +elementwise_binary.h 189 0x3034 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x303e x +vector.hpp 1139 0x303e 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x303e 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x303e 3 x +elementwise_binary.h 213 0x303e 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3048 +vector.hpp 1139 0x3048 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x3048 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x3048 3 x +elementwise_binary.h 189 0x3048 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3052 x +vector.hpp 1139 0x3052 1 x +vector.hpp 1159 0x3052 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x3052 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x3052 4 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x3052 5 x +elementwise_binary.h 213 0x3052 6 x +elementwise_binary.h 218 0x3052 7 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3060 +vector.hpp 1139 0x3060 1 +vector.hpp 1159 0x3060 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x3060 3 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x3060 4 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 187 0x3060 5 x +elementwise_binary.h 189 0x3060 6 x +elementwise_binary.h 195 0x3060 7 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3070 x +vector.hpp 1139 0x3070 1 x +vector.hpp 1159 0x3070 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x3070 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x3070 4 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 211 0x3070 5 x +elementwise_binary.h 213 0x3070 6 x +elementwise_binary.h 218 0x3070 7 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x3080 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x3080 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x3080 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 195 0x3080 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x3088 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x3088 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x3088 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 218 0x3088 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x3090 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x3090 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x3090 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 195 0x3090 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x3098 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x3098 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x3098 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 218 0x3098 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x30a0 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x30a0 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x30a0 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 195 0x30a0 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x30a8 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x30a8 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x30a8 2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 218 0x30a8 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x30b0 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x30b0 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/mul_acc32_fp.hpp: +mul_acc32_fp.hpp 36 0x30b0 2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 195 0x30b0 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x30b8 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x30b8 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 218 0x30b8 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x30bc + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x30bc 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 172 0x30bc 2 x +elementwise_binary.h 195 0x30bc 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x30c2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x30c2 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 218 0x30c2 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x30c6 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x30c6 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 195 0x30c6 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x30ca x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x30ca 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 218 0x30ca 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x30ce + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2/../../detail/aie2p/accum.hpp: +accum.hpp 1110 0x30ce 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary.h: +elementwise_binary.h 195 0x30ce 2 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 369 0x30e0 x +superkernels.cpp 374 0x30e0 1 +superkernels.cpp 374 0x30e6 x +superkernels.cpp 369 0x30ec +superkernels.cpp 371 0x30f2 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x30f2 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 374 0x310e x +superkernels.cpp 374 0x310e 1 x +superkernels.cpp 371 0x3114 x +superkernels.cpp 371 0x3118 +superkernels.cpp 371 0x311e + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x3126 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 377 0x312a +superkernels.cpp 379 0x312a 1 +superkernels.cpp 381 0x312a 2 +superkernels.cpp 393 0x312a 3 +superkernels.cpp 377 0x3134 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x3134 1 +tile.hpp 74 0x313e +tile.hpp 86 0x313e 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 377 0x314a x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/aie2p/../aie2/tile.hpp: +tile.hpp 86 0x3154 +tile.hpp 74 0x3158 +tile.hpp 74 0x315c x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 381 0x3160 +superkernels.cpp 381 0x3160 1 x +superkernels.cpp 381 0x316a +superkernels.cpp 381 0x316a 1 +superkernels.cpp 390 0x316a 2 +superkernels.cpp 379 0x3174 x +superkernels.cpp 382 0x3174 1 +superkernels.cpp 391 0x3174 2 +superkernels.cpp 379 0x318a +superkernels.cpp 381 0x3190 x +superkernels.cpp 379 0x3194 x +superkernels.cpp 381 0x3198 x +superkernels.cpp 382 0x319c x +superkernels.cpp 390 0x31a0 +superkernels.cpp 391 0x31a6 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x31b0 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 385 0x31b4 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x31b4 1 +io_buffer_main.h 218 0x31be +io_buffer_main.h 218 0x31c2 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 385 0x31c6 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 235 0x31ca x +io_buffer_main.h 218 0x31d6 x +io_buffer_main.h 218 0x31d6 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 385 0x31da x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 218 0x31da 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 385 0x31e0 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 395 0x31e4 +io_buffer_main.h 395 0x31e4 1 +io_buffer_main.h 395 0x31ee x +io_buffer_main.h 218 0x31f2 x +io_buffer_main.h 218 0x31fa +io_buffer_main.h 218 0x31fe +io_buffer_main.h 218 0x3202 +io_buffer_main.h 235 0x3206 x +io_buffer_main.h 218 0x3214 x +io_buffer_main.h 218 0x3214 1 x +io_buffer_main.h 218 0x3218 +io_buffer_main.h 395 0x3224 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 390 0x3228 +superkernels.cpp 391 0x3228 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x3228 2 +io_buffer_main.h 125 0x3236 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 390 0x323a x +superkernels.cpp 391 0x3240 x +superkernels.cpp 393 0x3240 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x3246 x +io_buffer_main.h 125 0x324a +io_buffer_main.h 327 0x324e +io_buffer_main.h 327 0x324e 1 +io_buffer_main.h 125 0x3254 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 393 0x325a x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x3260 +io_buffer_main.h 327 0x3260 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 390 0x3264 x +superkernels.cpp 391 0x3268 x +superkernels.cpp 391 0x326c +superkernels.cpp 390 0x3270 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 324 0x3280 x +io_buffer_main.h 327 0x3280 1 +io_buffer_main.h 327 0x3280 2 +io_buffer_main.h 327 0x3280 3 +io_buffer_main.h 327 0x3280 4 +io_buffer_main.h 425 0x3280 5 +io_buffer_main.h 425 0x3280 6 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 397 0x328a + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 425 0x329a x +io_buffer_main.h 327 0x329e x +io_buffer_main.h 324 0x32a2 +io_buffer_main.h 327 0x32b0 +io_buffer_main.h 324 0x32b4 x +io_buffer_main.h 327 0x32b4 1 +io_buffer_main.h 425 0x32c6 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 397 0x32ca +superkernels.cpp 398 0x32ca 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x32ca 2 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 397 0x32d4 x +superkernels.cpp 397 0x32d8 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 327 0x32e4 x +io_buffer_main.h 327 0x32e8 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend/superkernels.cpp: +superkernels.cpp 397 0x32ec x +superkernels.cpp 397 0x32f0 +superkernels.cpp 398 0x3300 +superkernels.cpp 398 0x3304 x +superkernels.cpp 400 0x3310 +superkernels.cpp 400 0x3326 x +superkernels.cpp 400 0x332e +superkernels.cpp - 0x332f + + +CU: /usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common/elementwise_binary_broadcasting.h: +File name Line number Starting address View Stmt + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x3580 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf/transpose4d_adf_wrapper.cpp: +transpose4d_adf_wrapper.cpp 78 0x3580 1 +transpose4d_adf_wrapper.cpp 78 0x3580 2 x +transpose4d_adf_wrapper.cpp 80 0x358e x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x3594 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf/transpose4d_adf_wrapper.cpp: +transpose4d_adf_wrapper.cpp 80 0x3594 1 +transpose4d_adf_wrapper.cpp 80 0x359c + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x35aa +io_buffer_main.h 125 0x35b0 +io_buffer_main.h 125 0x35c0 +io_buffer_main.h 125 0x35c0 1 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 99 0x35ce + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 125 0x35d2 x +io_buffer_main.h 125 0x35d6 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 99 0x35da x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf/transpose4d_adf_wrapper.cpp: +transpose4d_adf_wrapper.cpp 82 0x35e0 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 99 0x35e8 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable0/src/0_0_reloadable0.cc: +0_0_reloadable0.cc 21 0x3340 x +0_0_reloadable0.cc 23 0x3340 1 +0_0_reloadable0.cc 23 0x3344 x +0_0_reloadable0.cc 25 0x3348 x +0_0_reloadable0.cc 24 0x334c x +0_0_reloadable0.cc 22 0x3350 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc/transposeshuffle_params.h: +transposeshuffle_params.h 93 0x3360 x +transposeshuffle_params.h 102 0x3360 1 x +transposeshuffle_params.h 102 0x3370 x +transposeshuffle_params.h 102 0x3374 +transposeshuffle_params.h 102 0x3378 +transposeshuffle_params.h 102 0x3386 +transposeshuffle_params.h 102 0x338a +transposeshuffle_params.h 102 0x338e +transposeshuffle_params.h 102 0x3392 +transposeshuffle_params.h 111 0x3398 x +transposeshuffle_params.h 102 0x33a2 x +transposeshuffle_params.h 102 0x33a6 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 71 0x33b0 x +transposeshuffle.h 78 0x33b0 1 +transposeshuffle.h 78 0x33b0 2 +transposeshuffle.h 78 0x33b0 3 +transposeshuffle.h 78 0x33b6 x +transposeshuffle.h 81 0x33ba x +transposeshuffle.h 78 0x33be x +transposeshuffle.h 81 0x33cc x +transposeshuffle.h 81 0x33cc 1 x +transposeshuffle.h 78 0x33d2 +transposeshuffle.h 78 0x33d2 1 +transposeshuffle.h 78 0x33d6 x +transposeshuffle.h 78 0x33d6 1 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x33f8 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 81 0x33f8 1 x +transposeshuffle.h 83 0x33f8 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3402 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 81 0x3402 1 +transposeshuffle.h 83 0x3402 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3410 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 81 0x3410 1 +transposeshuffle.h 83 0x3410 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3420 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 83 0x3420 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3430 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 83 0x3430 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3440 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 83 0x3440 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3450 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 83 0x3450 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3460 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 83 0x3460 1 +transposeshuffle.h 84 0x3460 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3470 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 83 0x3470 1 x +transposeshuffle.h 84 0x3470 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x3480 +vector.hpp 1159 0x3480 1 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 83 0x3480 2 +transposeshuffle.h 84 0x3480 3 x +transposeshuffle.h 85 0x3480 4 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x3490 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 84 0x3490 1 +transposeshuffle.h 85 0x3490 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x3498 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 84 0x3498 1 +transposeshuffle.h 85 0x3498 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x34a0 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 84 0x34a0 1 +transposeshuffle.h 85 0x34a0 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x34a8 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 84 0x34a8 1 +transposeshuffle.h 85 0x34a8 2 +transposeshuffle.h 88 0x34a8 3 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x34b2 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 84 0x34b2 1 x +transposeshuffle.h 85 0x34b2 2 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x34ba + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 84 0x34ba 1 +transposeshuffle.h 85 0x34ba 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x34c2 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 84 0x34c2 1 +transposeshuffle.h 85 0x34c2 2 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x34ca + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 85 0x34ca 1 + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x34d0 + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 85 0x34d0 1 +transposeshuffle.h 81 0x34e0 x +transposeshuffle.h 81 0x34e4 +transposeshuffle.h 81 0x34ea + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1139 0x34f0 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 83 0x34f0 1 x +transposeshuffle.h 84 0x3540 x + +/usr/local/lib/python3.10/dist-packages/include/aie_api/detail/../detail/aie2/vector.hpp: +vector.hpp 1159 0x3560 x + +/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc/transposeshuffle.h: +transposeshuffle.h 85 0x3560 1 x +transposeshuffle.h 88 0x3570 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable0/src/0_0_reloadable0.cc: +0_0_reloadable0.cc 29 0x3600 x +0_0_reloadable0.cc 31 0x3600 1 +0_0_reloadable0.cc 31 0x3604 x +0_0_reloadable0.cc 33 0x3608 x +0_0_reloadable0.cc 32 0x360c x +0_0_reloadable0.cc 30 0x3610 x +0_0_reloadable0.cc 37 0x3620 x +0_0_reloadable0.cc 39 0x3620 1 +0_0_reloadable0.cc 39 0x3624 x +0_0_reloadable0.cc 40 0x3628 x +0_0_reloadable0.cc 42 0x362c x +0_0_reloadable0.cc 41 0x3630 x +0_0_reloadable0.cc 38 0x3634 x +0_0_reloadable0.cc 46 0x3650 x +0_0_reloadable0.cc 48 0x3650 1 +0_0_reloadable0.cc 48 0x3654 x +0_0_reloadable0.cc 49 0x3658 x +0_0_reloadable0.cc 51 0x365c x +0_0_reloadable0.cc 50 0x3660 x +0_0_reloadable0.cc 47 0x3664 x +0_0_reloadable0.cc 55 0x3680 x +0_0_reloadable0.cc 57 0x3680 1 +0_0_reloadable0.cc 57 0x3684 x +0_0_reloadable0.cc 58 0x3688 x +0_0_reloadable0.cc 60 0x368c x +0_0_reloadable0.cc 59 0x3690 x +0_0_reloadable0.cc 56 0x3694 x +0_0_reloadable0.cc 64 0x36b0 x +0_0_reloadable0.cc 66 0x36b0 1 +0_0_reloadable0.cc 66 0x36b4 x +0_0_reloadable0.cc 68 0x36b8 x +0_0_reloadable0.cc 67 0x36bc x +0_0_reloadable0.cc 65 0x36c0 x +0_0_reloadable0.cc 72 0x36d0 x +0_0_reloadable0.cc 74 0x36d0 1 +0_0_reloadable0.cc 74 0x36d4 x +0_0_reloadable0.cc 76 0x36d8 x +0_0_reloadable0.cc 75 0x36dc x +0_0_reloadable0.cc 73 0x36e0 x +0_0_reloadable0.cc 92 0x930 x +0_0_reloadable0.cc 94 0x930 1 +0_0_reloadable0.cc 94 0x930 2 x +0_0_reloadable0.cc 92 0x936 +0_0_reloadable0.cc 99 0x940 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0x940 1 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable0/src/0_0_reloadable0.cc: +0_0_reloadable0.cc 96 0x948 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0x948 1 +io_buffer_compiler.h 590 0x958 x +io_buffer_compiler.h 590 0x95c +io_buffer_compiler.h 590 0x960 +io_buffer_compiler.h 590 0x964 +io_buffer_compiler.h 590 0x968 +io_buffer_compiler.h 195 0x978 x +io_buffer_compiler.h 195 0x978 1 x +io_buffer_compiler.h 194 0x97c x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 410 0x980 +io_buffer_main.h 410 0x980 1 +io_buffer_main.h 410 0x980 2 +io_buffer_main.h 410 0x98c x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable0/src/0_0_reloadable0.cc: +0_0_reloadable0.cc 96 0x990 +0_0_reloadable0.cc 96 0x990 1 +0_0_reloadable0.cc 99 0x990 2 +0_0_reloadable0.cc 102 0x990 3 +0_0_reloadable0.cc 96 0x996 +0_0_reloadable0.cc 96 0x996 1 x +0_0_reloadable0.cc 96 0x99c +0_0_reloadable0.cc 96 0x99c 1 +0_0_reloadable0.cc 96 0x9a2 +0_0_reloadable0.cc 99 0x9a2 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 410 0x9ac + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 590 0x9be x +io_buffer_compiler.h 590 0x9c2 +io_buffer_compiler.h 590 0x9c6 +io_buffer_compiler.h 590 0x9ca +io_buffer_compiler.h 590 0x9ce +io_buffer_compiler.h 195 0x9de x +io_buffer_compiler.h 195 0x9de 1 x +io_buffer_compiler.h 194 0x9e2 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 410 0x9ee x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable0/src/0_0_reloadable0.cc: +0_0_reloadable0.cc 99 0x9f2 x +0_0_reloadable0.cc 99 0x9f6 +0_0_reloadable0.cc 99 0x9f6 1 +0_0_reloadable0.cc 99 0x9fc +0_0_reloadable0.cc 99 0x9fc 1 +0_0_reloadable0.cc 99 0xa02 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 590 0xa14 x +io_buffer_compiler.h 590 0xa18 +io_buffer_compiler.h 590 0xa1c +io_buffer_compiler.h 590 0xa20 +io_buffer_compiler.h 590 0xa24 +io_buffer_compiler.h 195 0xa34 x +io_buffer_compiler.h 195 0xa34 1 x +io_buffer_compiler.h 194 0xa38 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 410 0xa44 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable0/src/0_0_reloadable0.cc: +0_0_reloadable0.cc 102 0xa48 x +0_0_reloadable0.cc 102 0xa4c +0_0_reloadable0.cc 102 0xa50 +0_0_reloadable0.cc 102 0xa56 +0_0_reloadable0.cc 102 0xa68 +0_0_reloadable0.cc 105 0xa6c +0_0_reloadable0.cc 107 0xa6c 1 +0_0_reloadable0.cc 105 0xa80 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0xa80 1 +io_buffer_compiler.h 606 0xa80 2 +io_buffer_compiler.h 606 0xa80 3 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 440 0xa80 4 +io_buffer_main.h 440 0xa80 5 +io_buffer_main.h 440 0xa80 6 +io_buffer_main.h 440 0xa86 + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable0/src/0_0_reloadable0.cc: +0_0_reloadable0.cc 107 0xa8a + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0xa8e + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 440 0xa8e 1 +io_buffer_main.h 440 0xa92 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 605 0xa9a x +io_buffer_compiler.h 605 0xa9e + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 440 0xaae x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0xab2 +io_buffer_compiler.h 606 0xab2 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable0/src/0_0_reloadable0.cc: +0_0_reloadable0.cc 107 0xab8 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 440 0xab8 1 + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0xac8 x +io_buffer_compiler.h 605 0xacc x +io_buffer_compiler.h 606 0xacc 1 +io_buffer_compiler.h 605 0xad2 +io_buffer_compiler.h 606 0xad2 1 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 440 0xae2 +io_buffer_main.h 440 0xae6 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0xaea +io_buffer_compiler.h 606 0xaea 1 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable0/src/0_0_reloadable0.cc: +0_0_reloadable0.cc 110 0xaf0 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0xafe x +io_buffer_compiler.h 605 0xb02 x +io_buffer_compiler.h 606 0xb02 1 +io_buffer_compiler.h 605 0xb08 +io_buffer_compiler.h 606 0xb08 1 x + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_main.h: +io_buffer_main.h 440 0xb1a x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable0/src/0_0_reloadable0.cc: +0_0_reloadable0.cc 112 0xb1e + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0xb22 x + +/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_0_reloadable0/src/0_0_reloadable0.cc: +0_0_reloadable0.cc 112 0xb36 x +0_0_reloadable0.cc 112 0xb3c + +/usr/local/lib/python3.10/dist-packages/include/adf/io_buffer/io_buffer_compiler.h: +io_buffer_compiler.h 606 0xb40 x +io_buffer_compiler.h 606 0xb46 +io_buffer_compiler.h 606 0xb4a +io_buffer_compiler.h 606 0xb4e +io_buffer_compiler.h - 0xb4f + + +CU: No directory table +CU: Empty file name table + - 0x1 + + +CU: No directory table +CU: Empty file name table + - 0x1 + + +CU: No directory table +CU: Empty file name table + - 0x1 + + diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3_reloadable0/scripts/0_3_reloadable0.bcf b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3_reloadable0/scripts/0_3_reloadable0.bcf new file mode 100644 index 0000000000000000000000000000000000000000..ac2c44e2095fee61e0bb45bf67ea52ec6719ca60 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3_reloadable0/scripts/0_3_reloadable0.bcf @@ -0,0 +1,16 @@ +_reserved DMb 0x0 0x40000 + +_reserved PM 0x0 0x930 //reserved for main elf + +_entry_point _Z13kernelWrapperPPvjjjj +_symbol _Z13kernelWrapperPPvjjjj 0x930 + +_reserved DMb 0x7b280 0x800 //reserved for lcp ping-pong buffers +_reserved DMb 0x7ba80 0x40 //reserved for sync buffer +_stack DM_stack 0x7bac0 0x940 //stack for core +_reserved DMb 0x7c400 0x40 //reserved for main elf heap +//space for synopsys compiler at 0x7c440 0x880//heap +_reserved DMb 0x40000 0x3b280 + +_reserved DMb 0x7ccc0 0x3340 +_reserved DMb 0x80000 0x80000 // And everything else the core can't see diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3_reloadable0/scripts/0_3_reloadable0.prx b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3_reloadable0/scripts/0_3_reloadable0.prx new file mode 100644 index 0000000000000000000000000000000000000000..e47a080053390a3ffebd2f581635ad6465805f53 --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3_reloadable0/scripts/0_3_reloadable0.prx @@ -0,0 +1,13 @@ + + + \ No newline at end of file diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3_reloadable0/src/0_3_reloadable0.cc b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3_reloadable0/src/0_3_reloadable0.cc new file mode 100644 index 0000000000000000000000000000000000000000..e1d8e56064cb7607d7a6baab9056752be273a19e --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3_reloadable0/src/0_3_reloadable0.cc @@ -0,0 +1,112 @@ +// Automatically generated processor driver using AIEngine tool-chain + +#include +#include +#include + + +// Declare Kernel functions and initializers +void superkernel_mul1d_attribute_broadcasting(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict); +#include "transpose4d_adf_wrapper.cpp" +void superkernel_add1d(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict,adf::io_buffer, adf::locking::async>> &__restrict); +void superkernel_mul1d(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict,adf::io_buffer, adf::locking::async>> &__restrict); +void conv2d_maxpool(adf::io_buffer>> &__restrict,adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict); +void superkernel_add1d_attribute_broadcasting(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict); +void superkernel_clip1d(adf::io_buffer>> &__restrict,const unsigned int (&)[16],adf::io_buffer, adf::locking::async>> &__restrict); + +// Declare Kernel objects and external arrays + + +void _b881_wrapper(void* args[]) +{ + superkernel_mul1d_attribute_broadcasting( + *reinterpret_cast*>(args[0]), + *reinterpret_cast(args[2]), + *reinterpret_cast*>(args[1])); +} + +void _b719_wrapper(void* args[]) +{ + mllib_graphs::transpose4d_adf_wrapper>, adf::io_buffer_config>>( + *reinterpret_cast*>(args[0]), + *reinterpret_cast*>(args[1]), + *reinterpret_cast(args[2])); +} + +void _b886_wrapper(void* args[]) +{ + superkernel_add1d( + *reinterpret_cast*>(args[0]), + *reinterpret_cast(args[3]), + *reinterpret_cast*>(args[1]), + *reinterpret_cast*>(args[2])); +} + +void _b891_wrapper(void* args[]) +{ + superkernel_mul1d( + *reinterpret_cast*>(args[0]), + *reinterpret_cast(args[3]), + *reinterpret_cast*>(args[1]), + *reinterpret_cast*>(args[2])); +} + +void _b896_wrapper(void* args[]) +{ + conv2d_maxpool( + *reinterpret_cast*>(args[0]), + *reinterpret_cast*>(args[1]), + *reinterpret_cast(args[3]), + *reinterpret_cast*>(args[2])); +} + +void _b901_wrapper(void* args[]) +{ + superkernel_add1d_attribute_broadcasting( + *reinterpret_cast*>(args[0]), + *reinterpret_cast(args[2]), + *reinterpret_cast*>(args[1])); +} + +void _b906_wrapper(void* args[]) +{ + superkernel_clip1d( + *reinterpret_cast*>(args[0]), + *reinterpret_cast(args[2]), + *reinterpret_cast*>(args[1])); +} + +using UniformKernelFunc = void (*)(void **); + +static UniformKernelFunc g_uniformKernelFuncs[7] = { + _b881_wrapper, + _b719_wrapper, + _b886_wrapper, + _b891_wrapper, + _b896_wrapper, + _b901_wrapper, + _b906_wrapper +}; + +__attribute__((always_inline)) void kernelWrapper(void* args[], uint32 kernelId, uint32 numSyncIn, uint32 numAsyncIn, uint32 numSyncOut) +{ + uint32 idx = 0; + reinterpret_cast(args[idx])->acquire(numSyncIn > 0); + idx += (numSyncIn > 0) ? 1 : 0; + reinterpret_cast(args[idx])->acquire(numSyncIn > 1); + idx += (numSyncIn > 1) ? 1 : 0; + idx += numAsyncIn; + reinterpret_cast(args[idx])->acquire(numSyncOut > 0); + idx += (numSyncOut > 0) ? 1 : 0; + + (*(g_uniformKernelFuncs[kernelId]))(args); + + idx = 0; + reinterpret_cast(args[idx])->release(numSyncIn > 0); + idx += (numSyncIn > 0) ? 1 : 0; + reinterpret_cast(args[idx])->release(numSyncIn > 1); + idx += (numSyncIn > 1) ? 1 : 0; + idx += numAsyncIn; + reinterpret_cast(args[idx])->release(numSyncOut > 0); + idx += (numSyncOut > 0) ? 1 : 0; +} diff --git a/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3_reloadable1/Release/0_3_reloadable1.calltree b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3_reloadable1/Release/0_3_reloadable1.calltree new file mode 100644 index 0000000000000000000000000000000000000000..2740421ab65204768a46d772ccda05699187bfaa --- /dev/null +++ b/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/Work/aie/0_3_reloadable1/Release/0_3_reloadable1.calltree @@ -0,0 +1,64 @@ + +// File generated by bridge version V-2024.06#84922c0d9f#241219, Fri Mar 21 03:45:22 2025 +// Copyright 2014-2024 Synopsys, Inc. All rights reserved. +// bridge -o../Release/0_0_reloadable1 ../Release/0_0_reloadable1.o -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/isg -g -I/usr/local/lib/python3.10/dist-packages/include -I/app/vaiml_1.3_examples/camo/./segmentation_1_4_0_fp32_combined/vaiml_par_0/0/backend -I/usr/local/lib/python3.10/site-packages/include/aie_api -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/include/common -I/usr/local/lib/python3.10/dist-packages/vitis_mllib -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L1/include/misc -I/usr/local/lib/python3.10/dist-packages/vitis_mllib/L2/src/ml_adf -I/app/vaiml_1.3_examples/camo/segmentation_1_4_0_fp32_combined/vaiml_par_0/0/aiecompiler/. -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/include -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libs/libcxx-9.0.0/include-lite -I/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/include -D__AIENGINE__ -D__AIE_ARCH__=21 -DDEPLOYMENT_ELF=1 -D__LOCK_FENCE_MODE__=0 -D__IO_BUFFER_FORCE_LIGHT_WEIGHT__ -DAIE_API_EMULATE_BFLOAT16_MMUL_WITH_BFP16=1 -DAIE_OPTION_SCALAR_FLOAT_ON_VECTOR -D__tct_tgt__=241219 -c0_0_reloadable1.bcf -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/softfloat/lib/Release -L/usr/local/lib/python3.10/dist-packages/data/aie2p/lib/runtime_cxx/libcxx-lite/lib/Release_LLVM -lme -lc -lm -lc++lite -lsoftfloat -S -export-locals -iconfig extra_memories.bcf -yTM -m -fC -fS -fH +m -T +work ../Release/chesswork1394 -pme + + +// Release: ipp V-2024.06-TGT-241219 + +_Z13kernelWrapperPPvjjjj + _Z13_b919_wrapperPPv (referenced text) + _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh + _ZN12me_primitive10udiv_dstepEjjRjS0_ + _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params + _Z13_b924_wrapperPPv (referenced text) + _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE + _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E + _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv + _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv + _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params + _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams + _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E + _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E + _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE + _Z13_b896_wrapperPPv (referenced text) + _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh (*) + _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params (*) + + +Call tree stack and functions sizes: + +stack stack stack call func func function name + desc level level desc +----- ----- ----- ----- ----- ----- -------------------------------------------------------------- + 64 256 0 0 390 9538 _Z13kernelWrapperPPvjjjj + 0 192 1 1 36 2050 _Z13_b919_wrapperPPv + 128 192 1 2 478 2014 _Z22superkernel_conv2d_dwcRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + 64 64 2 3 672 814 _ZL27setup_conv2d_dw_params_bf16PKjR21conv2d_dw_bf16_paramsh + 0 0 3 4 142 142 _ZN12me_primitive10udiv_dstepEjjRjS0_ + 0 0 2 3 722 722 _Z9conv2d_dwILh1E8bfloat16S0_S0_N3adf16io_buffer_configINS1_7extentsIJEEENS1_7locking4syncENS1_10addressing6linearENS1_6marginILj0EEEEESB_NS2_IS4_NS5_5asyncES8_SA_EEQsr3stdE9is_same_vIT0_S0_EEvRNS1_9io_bufferISE_NS1_9direction2inET3_EERNSF_IT1_SH_T4_EERNSF_IT2_NSG_3outET5_EER21conv2d_dw_bf16_params + 0 192 1 1 40 6494 _Z13_b924_wrapperPPv + 64 192 1 2 1126 6454 _Z26superkernel_conv_eltbinaryRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RNS0_IS1_S3_NS4_IS6_NS7_5asyncESA_SC_EEEERA17_KjRNS0_IS1_NS2_3outESH_EE + 64 64 2 3 1430 1430 _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh + 64 64 2 3 138 162 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_EPKv + 0 0 3 4 24 24 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE5setupER27elementwise_binary_params_tIS4_E + 64 64 2 3 98 214 _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE5setupER27elementwise_binary_params_tIS5_EPKv + 0 0 3 4 116 116 _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE21shared_setup_backboneER27elementwise_binary_params_tIS5_EPKv + 128 128 2 3 2410 2680 _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params + 0 0 3 4 270 270 _Z21convert_bf16_to_bfp16I8bfloat16Lb0EEvPT_PS0_RK13BfToBfpParams + 0 0 2 3 292 292 _ZN18elementwise_binaryIJ8bfloat168mul_implIS0_E15shared_params_tIS0_EEE3runEPS0_S6_S6_R27elementwise_binary_params_tIS4_E + 0 128 2 3 16 550 _ZN25elementwise_binary_sharedI8bfloat168add_implIS0_L5act_t0EE15shared_params_tIS0_ELS2_0EE3runEPS0_S7_S7_R27elementwise_binary_params_tIS5_E + 128 128 2 4 534 534 _ZL19shared_run_backboneI8bfloat16L5act_t0EEKvPT_S4_S4_R27elementwise_binary_params_tI15shared_params_tIS3_EE + 0 192 1 1 36 4714 _Z13_b896_wrapperPPv + 64 192 1 2 568 4678 _Z14conv2d_maxpoolRN3adf9io_bufferI8bfloat16NS_9direction2inENS_16io_buffer_configINS_7extentsIJEEENS_7locking4syncENS_10addressing6linearENS_6marginILj0EEEEEEESF_RA16_KjRNS0_IS1_NS2_3outENS4_IS6_NS7_5asyncESA_SC_EEEE + 64 64 2 3 1430 1430 _Z24setup_conv2d_bf16_paramsILb1ELb0EEvPKjR18conv2d_bf16_paramshh + 128 128 2 3 2410 2680 _Z11conv2d_bf16ILh1EL5act_t0E8bfloat16S1_S1_N3adf16io_buffer_configINS2_7extentsIJEEENS2_7locking4syncENS2_10addressing6linearENS2_6marginILj0EEEEESC_NS3_IS5_NS6_5asyncES9_SB_EELb0ELb0ELb1ELb0EEvRNS2_9io_bufferIT1_NS2_9direction2inET4_EERNSF_IT2_SI_T5_EERNSF_IT3_NSH_3outET6_EER18conv2d_bf16_params (*) + + +Maximum call level : 4 +Maximum stack level: 3 +Maximum stack size : 256